Best Python code snippet using localstack_python
iea_parsing_Final.py
Source:iea_parsing_Final.py  
1import os2import re3import csv4import pymysql5import PyPDF2, re6import datetime7from tabula import read_pdf8from Common.log import logging9from Common.config import MYSQL_ALETHEIA, PDF_PATH, CSV_PATH10class IEABase:11    def __init__(self):12        try:13            self.sqlconn = pymysql.connect(host=MYSQL_ALETHEIA["HOST"], port=MYSQL_ALETHEIA["PORT"], user=MYSQL_ALETHEIA["USER"],14                                           passwd=MYSQL_ALETHEIA["PASSWORD"], db=MYSQL_ALETHEIA["DB_NAME"], autocommit=True)15            self.cursor = self.sqlconn.cursor()16        except Exception as e:17            logging.error(e)18    def __del__(self):19        try:20            self.cursor.close()21        except Exception as e:22            raise e23        try:24            self.sqlconn.close()25        except Exception as e:26            raise e27    def get_pdf_list(self):28        """29        :return: PDF files from folder30        """31        try:32            pdf_files = [f for f in os.listdir(PDF_PATH+'.') if os.path.isfile(PDF_PATH+f) and f.endswith('.pdf')]33            return pdf_files34        except Exception as e:35            logging.error(e)36            raise e37    def find_table_contents(self,pdf_list,table_name):38        page_numbers = {}39        pdf_value = {}40        flag = 041        for pdf in pdf_list:42            pdf = pdf.replace(' ', '')43            Xfile = open((PDF_PATH + pdf), 'rb')44            pdfDoc = PyPDF2.PdfFileReader(Xfile, "rb")45            if flag == 1:46                try:47                    for i in range(0, 10):48                        content = ""49                        content += (pdfDoc.getPage(i).extractText() + "\n")50                        content1 = content.encode('ascii', 'ignore').lower()51                        content1 = content1.decode('utf-8')52                        ResSearch = re.search(table_name, content1)53                        if ResSearch is not None:54                            PageFound = i55                            content += (pdfDoc.getPage(PageFound).extractText() + "\n")56                            content1 = content.encode('ascii', 'ignore').lower()57                            content1 = content1.decode('utf-8')58                            ResSearch = re.search('tables', content1)59                            if ResSearch is not None:60                                pageObj1 = pdfDoc.getPage(PageFound)61                                pagecontent = pageObj1.extractText()62                                pagecontent.replace('.', '')63                                f = open("error page.txt", "w")64                                f.write(pagecontent)65                                try:66                                    f = open("error page.txt", "r")67                                    read = f.read()68                                    l = (read.replace('.', ' ').split())69                                    for i in l:70                                        if 'TABLES' in i:71                                            index = int((l.index(i)) + 1)72                                            last = len(l) + 173                                            for page in range(index, last):74                                                page_number = l[page]75                                                if page_number.isdigit():76                                                    page_numbers[pdf] = page_number77                                                    break78                                except Exception as e:79                                    print(e)80                except Exception as e:81                    print(e)82            else:83                page_numbers[pdf] = '1'84        return page_numbers85    def find_page_number_1(self,pdf_list, table_name):86        page_numbers = {}87        for pdf, value in pdf_list.items():88            value = int(value)89            try:90                pdf = pdf.replace(' ', '')91                Xfile = open((PDF_PATH + pdf), 'rb')92                pdfDoc = PyPDF2.PdfFileReader(Xfile, "rb")93                for i in range(value, pdfDoc.getNumPages()):94                    content = ""95                    content += (pdfDoc.getPage(i).extractText() + "\n")96                    content1 = content.encode('ascii', 'ignore').lower()97                    content1 = content1.decode('utf-8')98                    if table_name == 'table2b':99                        name = 'OIL DEMAND IN SELECTED OECD COUNTRIES'.lower()100                        name1 = 'OIL DEMAND AND % GROWTH IN'.lower()101                        ResSearch = re.search(name, content1)102                        if ResSearch is not None:103                            PageFound = i + 1104                            page_numbers[pdf] = PageFound105                            break106                        else:107                            ResSearch = re.search(name1, content1)108                            if ResSearch is not None:109                                PageFound = i + 1110                                page_numbers[pdf] = PageFound111                                break112                    if (table_name == 'table1'):113                        name = "WORLD OIL SUPPLY AND DEMAND".lower()114                        ResSearch = re.search(name, content1)115                        if ResSearch is not None:116                            PageFound = i117                            page_numbers[pdf] = PageFound118                            break119                    if (table_name == 'table3'):120                        name = "WORLD OIL PRODUCTION".lower()121                        ResSearch = re.search(name, content1)122                        if ResSearch is not None:123                            PageFound = i + 1124                            page_numbers[pdf] = PageFound125                            break126                    if (table_name == 'table4'):127                        name = "OECD INDUSTRY STOCKS".lower()128                        ResSearch = re.search(name, content1)129                        if ResSearch is not None:130                            PageFound = i + 1131                            page_numbers[pdf] = PageFound132                            break133                    if (table_name == 'table2'):134                        name = "Summary of Global Oil Demand".lower()135                        ResSearch = re.search(name, content1)136                        if ResSearch is not None:137                            PageFound = i + 1138                            page_numbers[pdf] = PageFound139                            break140                    if (table_name == 'table2a'):141                        name = "OECD REGIONAL OIL DEMAND".lower()142                        ResSearch = re.search(name, content1)143                        if ResSearch is not None:144                            PageFound = i + 1145                            page_numbers[pdf] = PageFound146                            # print(page_numbers)147                            break148            except Exception as e:149                f = open("pdf.txt", "a")150                f.write(pdf + '\n')151                f.write(str(e))152                pass153        return page_numbers154    def conert_pdf_to_csv(self, pdf_name, csv_name, page_number):155        """156        Converts pdf page to csv file157        :param pdf_name: Existing PDF name158        :param csv_name: Expected CSV name159        :param page_number: PDF page number to be converted into CSV file(mostly table page)160        :return:161        """162        try:163            with open(csv_name, "w") as f:164                data = read_pdf(pdf_name, output_format="csv", pages=page_number)165                data.to_csv(f)166            return 1167        except Exception as e:168            logging.error(e)169            raise e170    def get_month_id(self,month_name):171        """172        get month number in integer173        :param monthName:174        :return:175        """176        try:177            month_list = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]178            count = 1179            for month in month_list:180                if month == month_name:181                    return count182                count += 1183        except Exception as e:184            logging.error(e)185            raise e186    def clean_string(self, string):187        """188        remove numeric and symbols from string189        :param string:190        :return:191        """192        try:193            new_string = re.sub('[0-9.,:*]+','',string)194            return new_string.strip()195        except Exception as e:196            logging.error(e)197            raise e198    def get_period_year(self, number):199        try:200            if number > 80:201                return 1900+number202            else:203                return 2000+number204        except Exception as e:205            logging.error(e)206            raise e207    def convert_to_period_data(self, row, allow_all=False):208        """209        convert csv rows into period_id, period_idx, mode format210        :param row:211        :param allow_all:212        :return:213        """214        try:215            year_row = {}216            inner_count = 0217            for r in row:218                if inner_count == 12:219                    if allow_all == True:220                        if "Q" in r:221                            v = str(r).split("Q")222                            year_row[inner_count] = {223                                "PeriodID": self.get_period_year(number=int(v[1].split(".")[0])),224                                "PeriodIdx": int(v[0]),225                                "Mode": "Q"226                            }227                        else:228                            v1 = str(r).split(" ")229                            if len(v1) > 1:230                                year_row[inner_count] = {231                                    "PeriodID": self.get_period_year(number=int(v1[1].split(".")[0])),232                                    "PeriodIdx": self.get_month_id(v1[0]),233                                    "Mode": "M"234                                }235                            else:236                                year_row[inner_count] = {237                                    "PeriodID": v1[0].split(".")[0],238                                    "PeriodIdx": 1,239                                    "Mode": "Y"240                                }241                else:242                    if "Q" in r:243                        v = str(r).split("Q")244                        year_row[inner_count] = {245                            "PeriodID": self.get_period_year(number=int(v[1].split(".")[0])),246                            "PeriodIdx": int(v[0]),247                            "Mode": "Q"248                        }249                    else:250                        v1 = str(r).split(" ")251                        if len(v1) > 1:252                            year_row[inner_count] = {253                                "PeriodID": self.get_period_year(number=int(v1[1].split(".")[0])),254                                "PeriodIdx": self.get_month_id(v1[0]),255                                "Mode": "M"256                            }257                        else:258                            year_row[inner_count] = {259                                "PeriodID": v1[0].split(".")[0],260                                "PeriodIdx": 1,261                                "Mode": "Y"262                            }263                inner_count += 1264            return year_row265        except Exception as e:266            logging.error(e)267            raise e268    def format_changer_for_table1(self, rows):269        try:270            final_result = []271            for row_id in range(len(rows)):272                inner_row = []273                if row_id == 0:274                    col_count = 0275                    for column in rows[row_id]:276                        if col_count >= 1:277                            if "Unna" not in column:278                                col_list = column.split(" ")279                                for col in col_list:280                                    if col:281                                        inner_row.append(col)282                        else:283                            inner_row.append(column)284                        col_count += 1285                    final_result.append(inner_row)286                else:287                    for column in rows[row_id][2:]:288                        col_split = column.split(" ")289                        for cols in col_split:290                            if cols:291                                inner_row.append(cols)292                    inner_row = rows[row_id][:2] + inner_row293                    final_result.append(inner_row)294            return final_result295        except Exception as e:296            logging.error(e)297            f = open("error files.txt", "a")298            f.write(str(e))299            raise e300    def convert_to_period_data_table_4(self, row):301        try:302            result = []303            for col in row:304                if "Q" in col:305                    seprate = col.split("Q")306                    result.append({307                        "PeriodID": seprate[-1],308                        "PeriodIdx": seprate[0],309                        "Mode": 'Q'310                    })311                else:312                    result.append({313                        "PeriodID": col[3:7],314                        "PeriodIdx": self.get_month_id(col[:3]),315                        "Mode": 'M'316                    })317            return result318        except Exception as e:319            logging.error(e)320            raise e321    def format_changer_table2a(self, list_of_list):322        """323        pdf format chaged before 2010/06324        :return:325        """326        try:327            raw_data = []328            r1 = []329            r2 = []330            rows = []331            quartrs = {332                'First Quarter': '1Q',333                'Second Quarter': '2Q',334                'Third Quarter': '3Q',335                'Fourth Quarter': '4Q'336            }337            for li in list_of_list:338                raw_data.append(li)339            list_of_list = raw_data340            for i in range(len(list_of_list)):341                if i == 0:342                    for lol in list_of_list[i]:343                        if lol and "Unnamed" not in lol:344                            r1.append(lol)345                    rows.append(r1)346                if i == 1:347                    count1 = 0348                    for data in list_of_list[i]:349                        x = data.split(" ")350                        for x1 in x:351                            if count1 > 1:352                                if x1:353                                    r2.append(x1.replace(".0", ""))354                            else:355                                r2.append(x1.replace(".0", ""))356                        count1 += 1357                    rows.append(r2)358                if i > 1:359                    var = []360                    counts = 0361                    for data in list_of_list[i]:362                        if counts == 1:363                            var.append(data)364                        else:365                            x = data.split(" ")366                            for x1 in x:367                                if x1:368                                    var.append(x1)369                        counts += 1370                    rows.append(var)371            second_index = 2372            for month_id in range(len(r1)):373                counter = 0374                for year_id in range(5):375                    month = r1[month_id]376                    year = r2[second_index]377                    if year == '%':378                        year = '2050'379                    get_quarter = quartrs.get(month)380                    if get_quarter:381                        all_year = get_quarter + year[2:]382                    else:383                        all_year = month[:3] + " " + year[2:]384                    r2[second_index] = all_year385                    second_index += 1386                    counter += 1387                    if counter == 3:388                        break389            rows[1] = r2390            return rows391        except Exception as e:392            logging.error(e)393            print(e)394    def string_seperator(self, string, decimal_places=1):395        try:396            numbers = []397            space_count = str(string).count(" ")398            if space_count == 0:399                digit_count = str(string).count(".")400                if digit_count > 0:401                    for i in range(digit_count):402                        data = str(string).index(".")403                        numbers.append(string[:data + decimal_places + 1])404                        string = string[data + decimal_places + 1:]405                else:406                    numbers.append(string)407            else:408                space_sep = str(string).split(" ")409                for sp in space_sep:410                    if sp:411                        numbers.append(sp)412            return numbers413        except Exception as e:414            logging.error(e)415            raise e416    def read_csv_file(self, file_name):417        try:418            result = []419            with open(file_name) as file:420                reader = csv.reader(file)421                for row in reader:422                    result.append(row)423            return result424        except Exception as e:425            logging.error(e)426            raise e427    def format_changer_for_table2(self, rows):428        try:429            final_result = []430            for row_id in range(len(rows)):431                inner_row = []432                if row_id == 0:433                    col_count = 0434                    for column in rows[row_id]:435                        if col_count > 1:436                            if "Unna" not in column:437                                col_list = column.split(" ")438                                for col in col_list:439                                    if col:440                                        inner_row.append(col)441                        else:442                            inner_row.append(column)443                        col_count += 1444                    final_result .append(inner_row)445                else:446                    for column in rows[row_id][2:]:447                        col_split = column.split(" ")448                        for cols in col_split:449                            if cols:450                                inner_row.append(cols)451                    inner_row = rows[row_id][:2]+inner_row452                    final_result.append(inner_row)453            return final_result454        except Exception as e:455            logging.error(e)456            raise e457    def table_3_prior_formatting(self, row):458        try:459            new_row = []460            count = 0461            for col in row:462                col = col.replace("*","")463                if "f" in col and "Feb" not in col:464                    col = col.replace("f","").strip()465                if count > 1:466                    if "Q" in col:467                        if len(col) == 6:468                            col = col[:2]+col[-2:]469                        elif len(col) == 4:470                            if col[0] == "Q":471                                col = col[1:2]+"Q"+col[-2]472                    if str(col[2]).isalpha() and str(col[3]).isdigit():473                        col = col[:3]+' '+col[-2:]474                count += 1475                new_row.append(col)476            return new_row477        except Exception as e:478            logging.error(e)479            raise e480class IEADataProcessing(IEABase):481    def __init__(self):482        try:483            IEABase.__init__(self)484        except Exception as e:485            logging.error(e)486    def insert_table2_data(self, file_name, sub_table):487        """488        insert table 2a and table 2b data489        :param file_name:490        :param sub_table: table2a_ or table2b_491        :return:492        """493        try:494            year_row = {}495            start_col = 2496            country = ""497            is_total = "null"498            table_type = "2a"499            if sub_table != "table2a_":500                table_type = "2b"501            rows = self.read_csv_file(file_name=file_name)502            check = 1503            version_id = str(file_name.split("/")[-1]).replace(".csv", "").replace(sub_table, "").replace("-", "")504            if int(version_id) < 20100713:505                rows = self.format_changer_table2a(rows)506            provided_date = str(file_name.split("/")[-1]).replace(".csv", "").replace(sub_table, "")507            reported_date = provided_date + " 00:00:00"508            print(reported_date)509            country_id = 'null'510            region_id = 'null'511            query = "delete from IEA_HistoricMacroElementsData where ReportedDate = %s and TableType = %s"512            try:513                result = self.cursor.execute(query, (reported_date, table_type,))514            except Exception as e:515                logging.error(e)516            for n in range(len(rows)):517                if n == check:518                    if rows[n][2] and rows[n][3]:519                        year_row = self.convert_to_period_data(row=rows[n])520                    else:521                        check += 1522                if n > check:523                    if (int(version_id) < 20100713 and len(rows[n]) > 1) or (rows[n][2] and rows[n][3]):524                        element = self.clean_string(rows[n][1])525                        country = self.clean_string(element)526                        if sub_table == "table2a_":527                            if "OECD" in country:528                                region_id = "null"529                                country_id = "null"530                            else:531                                region_id_query = "select RegionID from Regions where RegionName='%s' and ProviderID = '1' " % (532                                    country)533                                self.cursor.execute(region_id_query)534                                country_res = self.cursor.fetchall()535                                if country_res and len(country_res) > 0 and country_res[0][0] != None:536                                    region_id = country_res[0][0]537                                    country_id = 'null'538                                else:539                                    if (element == 'Jet/Kerosene'):540                                        element = 'Jet and kerosene'541                                    element = element.replace(' ', '')542                                    get_ph_heirarchy = "select PH1ID, PH2ID, PHID from ProductHR3 where upper(replace(Description,' ','')) = upper('%s')" % element543                                    self.cursor.execute(get_ph_heirarchy)544                                    ph_result = self.cursor.fetchall()545                                    if ph_result and len(ph_result) > 0:546                                        ph1 = ph_result[0][0]547                                        ph2 = ph_result[0][1]548                                        ph3 = ph_result[0][2]549                                        is_total = "null"550                                    else:551                                        ph1 = "null"552                                        is_total = "null"553                                        if element == "Total":554                                            ph1 = 1555                                            is_total = 1556                                        ph2 = "null"557                                        ph3 = "null"558                        else:559                            if (element == 'Jet/Kerosene'):560                                element = 'Jet and kerosene'561                            element = element.replace(' ', '')562                            get_ph_heirarchy = "select PH1ID, PH2ID, PHID from ProductHR3 where upper(replace(Description,' ','')) = upper('%s')" % element563                            self.cursor.execute(get_ph_heirarchy)564                            ph_result = self.cursor.fetchall()565                            if ph_result and len(ph_result) > 0:566                                ph1 = ph_result[0][0]567                                ph2 = ph_result[0][1]568                                ph3 = ph_result[0][2]569                                is_total = "null"570                            else:571                                ph1 = "null"572                                is_total = "null"573                                if element == "Total":574                                    ph1 = 1575                                    is_total = 1576                                else:577                                    pattern = country.replace(' ', '') + "%"578                                    country_id_query = "select CountryID from Country where upper(replace(CountryName,' ','')) LIKE  '%s'" % (579                                        pattern)580                                    self.cursor.execute(country_id_query)581                                    res = self.cursor.fetchall()582                                    if res and len(res) > 0:583                                        country_id = str(res[0][0])584                                    else:585                                        country_id = "null"586                                    region_id_query = "select RegionID from RegionCountryMapping where CountryID='%s'" % (587                                        country_id)588                                    self.cursor.execute(region_id_query)589                                    res = self.cursor.fetchall()590                                    if res and len(res) > 0:591                                        region_id = res[0][0]592                                    else:593                                        region_id = "null"594                                ph2 = "null"595                                ph3 = "null"596                        elemCount = start_col597                        for fr in rows[n][start_col:]:598                            value = fr599                            if value and elemCount != 12:600                                res = year_row.get(elemCount)601                                periodID = res.get("PeriodID")602                                periodIdx = res.get("PeriodIdx")603                                mode = res.get("Mode")604                                insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \605                                              "%s, %s, '%s', 'D', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', %s, %s, 1, null, null, 1.0, current_timestamp(), %s, '%s', 1, '%s')" % (606                                                  periodID, periodIdx, mode, ph1, ph2, ph3, value, country_id,607                                                  region_id, is_total, provided_date, table_type608                                              )609                                try:610                                    if int(periodID) != 2050:611                                        self.cursor.execute(insertQuery)612                                except Exception as e:613                                    logging.error(e)614                            elemCount += 1615                    else:616                        element = (rows[n][1])617                        # print(element)618                        country = self.clean_string(element)619                        if sub_table == "table2a_":620                            region_id_query = "select RegionID from Regions where RegionName='%s' and ProviderID = '1' " % (621                                country)622                            self.cursor.execute(region_id_query)623                            country_res = self.cursor.fetchall()624                            if country_res and len(country_res) > 0 and country_res[0][0] != None:625                                region_id = country_res[0][0]626                                country_id = 'null'627                            else:628                                country_id = 'null'629                                region_id = 'null'630                        else:631                            pattern = country.replace(' ', '') + "%"632                            country_id_query = "select CountryID from Country where upper(replace(CountryName,' ','')) LIKE  '%s'" % (633                                pattern)634                            self.cursor.execute(country_id_query)635                            res = self.cursor.fetchall()636                            if res and len(res) > 0:637                                country_id = str(res[0][0])638                            else:639                                country_id = "null"640                            region_id_query = "select RegionID from RegionCountryMapping where CountryID='%s'" % (641                                country_id)642                            self.cursor.execute(region_id_query)643                            res = self.cursor.fetchall()644                            if res and len(res) > 0:645                                region_id = res[0][0]646                            else:647                                region_id = "null"648        except Exception as e:649            logging.error(e)650            query = "update RefTables set parsing = 13 where ReportedDate = %s and TableType = %s"651            try:652                result = self.cursor.execute(query, (reported_date, table_type))653            except Exception as e:654                logging.error(e)655        os.remove(file_name)656        print("file removed", file_name)657    def insert_table4_data(self, file_name, sub_table=None):658        version_id = str(file_name.split("/")[-1]).replace(".csv", "").replace("table4_", "").replace("-", "")659        provided_date = str(file_name.split("/")[-1]).replace(".csv", "").replace(sub_table, "")660        reported_date = provided_date + " 00:00:00"661      #  print(reported_date)662        663        query = "delete from IEA_HistoricMacroElementsData_2_issue where ReportedDate = %s and TableType = '4'"664        try:665            result = self.cursor.execute(query, (reported_date,))666        except Exception as e:667            logging.error(e)668        query = "delete from RefTable where ReportedDate = %s and TableType = '4'"669        try:670            result = self.cursor.execute(query, (reported_date,))671            # print(result)672        except Exception as e:673            logging.error(e)674            # print(e, file_name)675     #   676        query = "insert into RefTable(ReportedDate,TableType,Flag) values(" \677                "'%s','4','1')" % (reported_date,)678        try:679            result = self.cursor.execute(query)680            # print(result)681        except Exception as e:682            logging.error(e)683        query = "select max(ReportedDate) from IEA_HistoricMacroElementsData_2_issue where TableType = 4"684        try:685            result = self.cursor.execute(query,)686            if result > 1:687                max_date = str(self.cursor.fetchone())688                max_date = (max_date.strip("(datetime.datetime(,)").replace('', '')[0:12])689                max_date = max_date.split(',')690                y = int(max_date[0])691                m = int(max_date[1])692                d = int(max_date[2])693                dates = str(datetime.date(y, m, d))694                dates = dates.replace('-', '')695      #          print((dates))696            else:697                dates = 000698        except Exception as e:699            logging.error(e)700        if int(dates) != int(version_id):701            try:702                period_data = []703                is_total = "null"704                govenrment_controlled = "S"705                region = ""706                region_id = "null"707                rows = self.read_csv_file(file_name=file_name)708                count = 0709                for index in range(len(rows)):710                    if index > 1:711                        temp_row = []712                        for col_index in range(len(rows[index])):713                            if col_index > 1:714                                d = self.string_seperator(string=rows[index][col_index])715                                temp_row.extend(d)716                            else:717                                temp_row.append(rows[index][col_index])718                        rows[index] = temp_row719                get_period_data = True720                for row_count in range(len(rows)):721                    count = count + 1722                    if row_count == 2 or get_period_data == True:723                        period_data = self.convert_to_period_data_table_4(rows[row_count])724                        get_period_data = False725                    if row_count > 2:726                        if is_total == 1:727                            region_id = "null"728                        if "Million" in rows[row_count]:729                            get_period_data = True730                            govenrment_controlled = "SG"731                        if rows[row_count][1]:732                            if rows[row_count][1] and "OECD" in rows[row_count][1]:733                                # print(rows[row_count][1])734                                region = self.clean_string(rows[row_count][1])735                                #  region =736                                region = region.strip("OECD").strip()737                                check_region_id = "select RegionID from Regions where upper(RegionName)=upper('%s')" % region738                                self.cursor.execute(check_region_id)739                                region_res = self.cursor.fetchall()740                                if region_res and len(region_res) > 0:741                                    region_id = region_res[0][0]742                                else:743                                    region_id = "null"744                            else:745                                element = self.clean_string(rows[row_count][1])746                                if "TOTAL" in element.upper() and "PRODUCTS" not in element.upper():747                                    is_total = 1748                                element = self.clean_string(rows[row_count][1])749                                get_ph_heirarchy = "select PH1ID, PH2ID, PHID from ProductHR3 where upper(Description) = upper('%s')" % element750                                self.cursor.execute(get_ph_heirarchy)751                                ph_result = self.cursor.fetchall()752                                if ph_result and len(ph_result) > 0:753                                    ph1 = ph_result[0][0]754                                    ph2 = ph_result[0][1]755                                    ph3 = ph_result[0][2]756                                else:757                                    if "TOTAL" in element.upper():758                                        is_total = 1759                                        if str(region_id).upper() == "Null".upper():760                                            region = self.clean_string(rows[row_count][1])761                                            is_total = 1762                                            check_region_id = "select RegionID from Regions where upper(RegionName)=upper('%s')" % region763                                            self.cursor.execute(check_region_id)764                                            region_res = self.cursor.fetchall()765                                            if region_res and len(region_res) > 0:766                                                region_id = region_res[0][0]767                                            else:768                                                region_id = "null"769                                    else:770                                        is_total = "null"771                                        region = self.clean_string(rows[row_count][1])772                                        check_region_id = "select RegionID from Regions where upper(RegionName)=upper('%s')" % region773                                        self.cursor.execute(check_region_id)774                                        region_res = self.cursor.fetchall()775                                        if region_res and len(region_res) > 0:776                                            region_id = region_res[0][0]777                                        else:778                                            region_id = "null"779                                    ph1 = 1780                                    ph2 = "null"781                                    ph3 = "null"782                                check_count = 0783                                for col_count in range(len(rows[row_count])):784                                    if rows[row_count][col_count] and col_count > 1:785                                        period_id = period_data[col_count].get("PeriodID")786                                        period_idx = period_data[col_count].get("PeriodIdx")787                                        mode = period_data[col_count].get("Mode")788                                        if mode != "Q":789                                            insert_query = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal,  ReportedDate, ProviderID, TableType) values(" \790                                                           "%s, %s, '%s', '%s', %s, %s, %s, null, %s, 'V', 'MBPM', 'USD', null, %s, 1, null, null, 1.0, current_timestamp(), %s, '%s', 1, '4')" % (791                                                               period_id, period_idx, mode, govenrment_controlled, ph1,792                                                               ph2, ph3, rows[row_count][col_count], region_id,793                                                               is_total, reported_date794                                                           )795                                            try:796                                                self.cursor.execute(insert_query)797                                                check_count += 1798                                                if check_count == 5:799                                                    break800                                            except Exception as e:801                                                logging.error(e)802            except Exception as e:803                logging.error(e)804              #  print(e)805                query = "update RefTables set flag = 2 where ReportedDate = %s and TableType = '4'"806                try:807                    result = self.cursor.execute(query, (reported_date,))808                except Exception as e:809                    logging.error(e)810                    print(e)811    def insert_table1_data(self, file_name, sub_table=None):812        flag = 0813        total_nonopec = 0814        provided_date = str(file_name.split("/")[-1]).replace("table1_", "").replace(".csv", "")815        reported_date = provided_date + " 00:00:00"816        query = "delete from IEA_HistoricMacroElementsData where ReportedDate = %s and TableType = '1'"817        try:818            result = self.cursor.execute(query, (reported_date,))819        except Exception as e:820            logging.error(e)821        try:822            period_data = {}823            start = False824            ph1 = 1825            ph2 = "null"826            ph3 = "null"827            gr = ''828            gr2 = ''829            provided_date = str(file_name.split("/")[-1]).replace("table1_", "").replace(".csv", "")830            data = self.read_csv_file(file_name=file_name)831            rows = self.format_changer_for_table1(rows=data)832            for row_id in range(len(rows)):833                row = rows[row_id]834                if row_id == 0:835                    period_data = self.convert_to_period_data(row=row, allow_all=True)836                # print("period data",period_data)837                else:838                    if start == True:839                        get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % gr840                        self.cursor.execute(get_group_query)841                        group_res = self.cursor.fetchall()842                        if group_res and len(group_res) > 0:843                            group_id = group_res[0][0]844                        else:845                            group_id = "null"846                        is_total = "null"847                        region_id_query = "select RegionID from Regions where RegionName='%s' and ProviderID=1" % self.clean_string(848                            row[1])849                        self.cursor.execute(region_id_query)850                        res = self.cursor.fetchall()851                        if res and len(res) > 0:852                            region_id = res[0][0]853                            country_id = "null"854                        else:855                            region_id = "null"856                            pattern = row[1].replace(' ', '') + "%"857                            country_id_query = "select CountryID from Country where upper(replace(CountryName,' ','')) LIKE  '%s'" % (858                                pattern)859                            self.cursor.execute(country_id_query)860                            res = self.cursor.fetchall()861                            if res and len(res) > 0:862                                country_id = str(res[0][0])863                                region_id_query = "select RegionID from RegionCountryMapping where CountryID='%s'" % (864                                    country_id)865                                self.cursor.execute(region_id_query)866                                res = self.cursor.fetchall()867                                if res and len(res) > 0:868                                    region_id = res[0][0]869                                else:870                                    region_id = "null"871                            else:872                                country_id = "null"873                        if "TOTAL" in row[1].upper():874                            is_total = 1875                        for val_id in range(2, len(row)):876                            value = row[val_id]877                            # print("value", value)878                            dict_id = (val_id - 1)879                            # print(dict_id)880                            peri_data = period_data.get(dict_id)881                            period_id = peri_data.get("PeriodID")882                            period_idx = peri_data.get("PeriodIdx")883                            mode = peri_data.get("Mode")884                            insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \885                                          "%s, %s, '%s', 'D', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', %s, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (886                                              period_id, period_idx, mode, ph1, ph2, ph3, value, country_id, region_id,887                                              group_id,888                                              is_total, provided_date889                                          )890                            try:891                                self.cursor.execute(insertQuery)892                            except Exception as e:893                                logging.error(e)894                        if self.clean_string(row[1]).upper() == "TOTAL NON-OECD":895                            total_demand = row_id896                            start = False897                    elif start == False:898                        if self.clean_string(row[1]).upper() == "OECD DEMAND".upper() or self.clean_string(899                                row[1]).upper() == "DEMAND".upper():900                            start = True901                            gr = 'OECD'902                    if "Total OECD".upper() == self.clean_string(row[1]).upper():903                        gr = "Non-OECD"904                    if "TOTAL DEMAND".upper() == self.clean_string(row[1]).upper():905                        oecd_supply = row_id906                        print("total demand")907                        for val_id in range(2, len(row)):908                            value = row[val_id]909                            print("value", value)910                            dict_id = (val_id - 1)911                            peri_data = period_data.get(dict_id)912                            period_id = peri_data.get("PeriodID")913                            period_idx = peri_data.get("PeriodIdx")914                            mode = peri_data.get("Mode")915                            is_total = 1916                            ph1 = 1917                            ph2 = "null"918                            ph3 = "null"919                            region_id = "null"920                            group_id = "null"921                            group_id_2 = "null"922                            insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \923                                          "%s, %s, '%s', 'D', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (924                                              period_id, period_idx, mode, ph1, ph2, ph3, value, region_id, group_id,925                                              is_total, provided_date926                                          )927                            try:928                                self.cursor.execute(insertQuery)929                            except Exception as e:930                                logging.error(e)931                        break932            #     # for supply933            supply_start = False934            for row_id in range(oecd_supply, len(rows)):935                row = rows[row_id]936                if supply_start == True:937                    if self.clean_string(row[1]).upper() == "PROCESSING GAINS":938                        supply_start = False939                        flag = 1940                    else:941                        get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % gr942                        self.cursor.execute(get_group_query)943                        group_res = self.cursor.fetchall()944                        if group_res and len(group_res) > 0:945                            group_id = group_res[0][0]946                        else:947                            group_id = "null"948                        get_group_query_2 = "select GroupID from Groups where upper(Description) = upper('%s')" % gr2949                        self.cursor.execute(get_group_query_2)950                        group_res = self.cursor.fetchall()951                        if group_res and len(group_res) > 0:952                            group_id_2 = group_res[0][0]953                        else:954                            group_id_2 = "null"955                        is_total = "null"956                        region_id_query = "select RegionID from Regions where RegionName='%s' and ProviderID=1" % self.clean_string(957                            row[1])958                        self.cursor.execute(region_id_query)959                        res = self.cursor.fetchall()960                        if res and len(res) > 0:961                            region_id = res[0][0]962                            country_id = "null"963                        else:964                            region_id = "null"965                            pattern = row[1].replace(' ', '') + "%"966                            country_id_query = "select CountryID from Country where upper(replace(CountryName,' ','')) LIKE  '%s'" % (967                                pattern)968                            self.cursor.execute(country_id_query)969                            res = self.cursor.fetchall()970                            if res and len(res) > 0:971                                country_id = str(res[0][0])972                                region_id_query = "select RegionID from RegionCountryMapping where CountryID='%s'" % (973                                    country_id)974                                self.cursor.execute(region_id_query)975                                res = self.cursor.fetchall()976                                if res and len(res) > 0:977                                    region_id = res[0][0]978                                else:979                                    region_id = "null"980                            else:981                                country_id = "null"982                        if "TOTAL" in row[1].upper():983                            is_total = 1984                        for val_id in range(2, len(row)):985                            value = row[val_id]986                            dict_id = (val_id - 1)987                            peri_data = period_data.get(dict_id)988                            period_id = peri_data.get("PeriodID")989                            period_idx = peri_data.get("PeriodIdx")990                            mode = peri_data.get("Mode")991                            ph1 = 1992                            ph2 = "null"993                            ph3 = "null"994                            if (995                                    ph1 == "null" and ph2 == "null" and region_id == "null" and is_total == "null" and group_id == "null" and group_id_2 == "null"):996                                break997                            else:998                                insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \999                                              "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', %s, %s, %s, %s, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1000                                                  period_id, period_idx, mode, ph1, ph2, ph3, value, country_id,1001                                                  region_id, group_id, group_id_2, is_total, provided_date1002                                              )1003                                try:1004                                    self.cursor.execute(insertQuery)1005                                except Exception as e:1006                                    logging.error(e)1007                        if "Total OECD".upper() == self.clean_string(row[1]).upper():1008                            gr = "Non-OPEC"1009                            gr2 = 'Non-OECD'1010                        if self.clean_string(row[1]).upper() == "TOTAL NON-OECD":1011                            supply_start = False1012                        if self.clean_string(row[1]).upper() == "TOTAL NON-OPEC":1013                            supply_start = False1014                            total_nonopec = 11015                if "PROCESSING GAINS".upper() == self.clean_string(row[1]).upper() or flag == 1:1016                    supply_start = False1017                    total_nonopec == 11018                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1019                        row[1]).upper()1020                    self.cursor.execute(get_group_query)1021                    group_res = self.cursor.fetchall()1022                    if group_res and len(group_res) > 0:1023                        group_id = group_res[0][0]1024                    else:1025                        group_id = "null"1026                    region_id = "null"1027                    is_total = "null"1028                    #1029                    for val_id in range(2, len(row)):1030                        value = row[val_id]1031                        print("value processing gains", value)1032                        dict_id = (val_id - 1)1033                        peri_data = period_data.get(dict_id)1034                        period_id = peri_data.get("PeriodID")1035                        period_idx = peri_data.get("PeriodIdx")1036                        mode = peri_data.get("Mode")1037                        ph1 = 11038                        ph2 = "null"1039                        ph3 = "null"1040                        if (1041                                ph1 == "null" and ph2 == "null" and region_id == "null" and is_total == "null" and group_id == "null" and group_id_2 == "null"):1042                            break1043                        else:1044                            insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1045                                          "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1046                                              period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1047                                              group_id,1048                                              is_total, provided_date1049                                          )1050                            try:1051                                self.cursor.execute(insertQuery)1052                            except Exception as e:1053                                logging.error(e)1054                if "BIOFUELS" in self.clean_string(row[1]).upper():1055                    global_bio = "global biofuels"1056                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1057                        global_bio).upper()1058                    self.cursor.execute(get_group_query)1059                    group_res = self.cursor.fetchall()1060                    if group_res and len(group_res) > 0:1061                        group_id = group_res[0][0]1062                    else:1063                        group_id = "null"1064                    # print("Group_id",group_id)1065                    region_id = "null"1066                    is_total = "null"1067                    #1068                    for val_id in range(2, len(row)):1069                        value = row[val_id]1070                        dict_id = (val_id - 1)1071                        peri_data = period_data.get(dict_id)1072                        period_id = peri_data.get("PeriodID")1073                        period_idx = peri_data.get("PeriodIdx")1074                        mode = peri_data.get("Mode")1075                        ph1 = 11076                        ph2 = "null"1077                        ph3 = "null"1078                        insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1079                                      "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1080                                          period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1081                                          group_id,1082                                          is_total, provided_date1083                                      )1084                        try:1085                            self.cursor.execute(insertQuery)1086                        except Exception as e:1087                            logging.error(e)1088                if "TOTAL NON-OPEC".upper() in self.clean_string(row[1]).upper() or total_nonopec == 1:1089                    non_opec = 'NON-OPEC'1090                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1091                        non_opec).upper()1092                    self.cursor.execute(get_group_query)1093                    group_res = self.cursor.fetchall()1094                    if group_res and len(group_res) > 0:1095                        group_id = group_res[0][0]1096                    else:1097                        group_id = "null"1098                    region_id = "null"1099                    is_total = 11100                    for val_id in range(2, len(row)):1101                        value = row[val_id]1102                        print("value non opec", value)1103                        dict_id = (val_id - 1)1104                        # print(dict_id)1105                        peri_data = period_data.get(dict_id)1106                        period_id = peri_data.get("PeriodID")1107                        period_idx = peri_data.get("PeriodIdx")1108                        mode = peri_data.get("Mode")1109                        ph1 = 11110                        ph2 = "null"1111                        ph3 = "null"1112                        if (1113                                ph1 == "null" and ph2 == "null" and region_id == "null" and is_total == "null" and group_id == "null" and group_id_2 == "null"):1114                            break1115                        else:1116                            insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1117                                          "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1118                                              period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1119                                              group_id,1120                                              is_total, provided_date1121                                          )1122                            try:1123                                self.cursor.execute(insertQuery)1124                            except Exception as e:1125                                logging.error(e)1126                    total_nonopec = 01127                if "OPEC" == self.clean_string(row[1]).upper():1128                    pass1129                if "CRUDE" in self.clean_string(row[1]).upper():1130                    opec = "OPEC"1131                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1132                        opec).upper()1133                    self.cursor.execute(get_group_query)1134                    group_res = self.cursor.fetchall()1135                    if group_res and len(group_res) > 0:1136                        group_id = group_res[0][0]1137                    else:1138                        group_id = "null"1139                    # print("Group_id",group_id)1140                    region_id = "null"1141                    is_total = 11142                    get_group_query = "select * from ProductHR2 where upper(Description) = upper('%s')" % self.clean_string(1143                        'Oil').upper()1144                    self.cursor.execute(get_group_query)1145                    ph_res = self.cursor.fetchall()1146                    # print(ph_res)1147                    if ph_res and len(ph_res) > 0:1148                        ph1 = ph_res[0][2]1149                        ph2 = ph_res[0][0]1150                    else:1151                        ph_id = "null"1152                    for val_id in range(2, len(row)):1153                        value = row[val_id]1154                        print("value crude", value)1155                        dict_id = (val_id - 1)1156                        # print(dict_id)1157                        peri_data = period_data.get(dict_id)1158                        period_id = peri_data.get("PeriodID")1159                        period_idx = peri_data.get("PeriodIdx")1160                        mode = peri_data.get("Mode")1161                        insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1162                                      "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1163                                          period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1164                                          group_id,1165                                          is_total, provided_date1166                                      )1167                        try:1168                            self.cursor.execute(insertQuery)1169                        except Exception as e:1170                            logging.error(e)1171                    ph1 = 11172                    ph2 = "null"1173                    ph3 = "null"1174                if "NGL" in self.clean_string(row[1]).upper():1175                    print("ngl")1176                    opec = "OPEC"1177                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1178                        opec).upper()1179                    self.cursor.execute(get_group_query)1180                    group_res = self.cursor.fetchall()1181                    if group_res and len(group_res) > 0:1182                        group_id = group_res[0][0]1183                    else:1184                        group_id = "null"1185                    region_id = "null"1186                    is_total = 11187                    get_group_query = "select * from ProductHR2 where upper(Description) = upper('%s')" % self.clean_string(1188                        'Gas').upper()1189                    self.cursor.execute(get_group_query)1190                    ph_res = self.cursor.fetchall()1191                    # print(ph_res)1192                    if ph_res and len(ph_res) > 0:1193                        ph1 = ph_res[0][2]1194                        ph2 = ph_res[0][0]1195                    else:1196                        ph_id = "null"1197                    for val_id in range(2, len(row)):1198                        if (1199                                ph1 == "null" and ph2 == "null" and region_id == "null" and is_total == "null" and group_id == "null" and group_id_2 == "null"):1200                            break1201                        else:1202                            value = row[val_id]1203                            # print("value ngls", value)1204                            dict_id = (val_id - 1)1205                            # print(dict_id)1206                            peri_data = period_data.get(dict_id)1207                            period_id = peri_data.get("PeriodID")1208                            period_idx = peri_data.get("PeriodIdx")1209                            mode = peri_data.get("Mode")1210                            insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1211                                          "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1212                                              period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1213                                              group_id,1214                                              is_total, provided_date1215                                          )1216                            try:1217                                self.cursor.execute(insertQuery)1218                            except Exception as e:1219                                logging.error(e)1220                    ph1 = 11221                    ph2 = "null"1222                    ph3 = "null"1223                if 'TOTAL OPEC' == self.clean_string(row[1]).upper():1224                    # print("total opec")1225                    opec = "OPEC"1226                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1227                        opec).upper()1228                    self.cursor.execute(get_group_query)1229                    group_res = self.cursor.fetchall()1230                    if group_res and len(group_res) > 0:1231                        group_id = group_res[0][0]1232                    else:1233                        group_id = "null"1234                    for val_id in range(2, len(row)):1235                        value = row[val_id]1236                        # print("value", value)1237                        dict_id = (val_id - 1)1238                        # print(dict_id)1239                        peri_data = period_data.get(dict_id)1240                        period_id = peri_data.get("PeriodID")1241                        period_idx = peri_data.get("PeriodIdx")1242                        mode = peri_data.get("Mode")1243                        is_total = 11244                        ph1 = 11245                        ph2 = "null"1246                        ph3 = "null"1247                        region_id = "null"1248                        insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1249                                      "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1250                                          period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1251                                          group_id,1252                                          is_total, provided_date1253                                      )1254                        try:1255                            self.cursor.execute(insertQuery)1256                        except Exception as e:1257                            logging.error(e)1258                if "NON-OPEC EXCL ANGOLA" == self.clean_string(1259                        row[1]).upper() or "OPEC INCL ANGOLA" in self.clean_string(row[1]).upper():1260                    print("exclusddding angola")1261                    get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1262                        'NON - OPEC EXCL ANGOLA').upper()1263                    self.cursor.execute(get_group_query)1264                    group_res = self.cursor.fetchall()1265                    if group_res and len(group_res) > 0:1266                        group_id = group_res[0][0]1267                    else:1268                        group_id = "null"1269                    for val_id in range(2, len(row)):1270                        value = row[val_id]1271                        # print("value", value)1272                        dict_id = (val_id - 1)1273                        # print(dict_id)1274                        peri_data = period_data.get(dict_id)1275                        period_id = peri_data.get("PeriodID")1276                        period_idx = peri_data.get("PeriodIdx")1277                        mode = peri_data.get("Mode")1278                        is_total = "null"1279                        ph1 = "null"1280                        ph2 = "null"1281                        ph3 = "null"1282                        region_id = "null"1283                        try:1284                            if (1285                                    ph2 == "null" and region_id == "null" and is_total == "null" and group_id == "null"):1286                                f = open("error files.txt", "a")1287                                f.write(str(row[1] + '\t'))1288                                f.write(str(value + '\t'))1289                                f.write(file_name + '\t')1290                                f.write('\n')1291                                break1292                        except Exception as e:1293                            logging.error(e)1294                            f = open("error files.txt", "a")1295                            f.write(file_name)1296                            f.write(str(e))1297                            print(e, file_name)1298                if 'HISTORICAL COMPOSITION' in self.clean_string(row[1]).upper():1299                    ph1 = 11300                    ph2 = "null"1301                    ph3 = "null"1302                    print("HISTORIACALLLLLLL")1303                    if 'NON-OPEC HISTORICAL COMPOSITION' in self.clean_string(row[1]).upper():1304                        get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1305                            'Non-OPEC').upper()1306                        self.cursor.execute(get_group_query)1307                        group_res = self.cursor.fetchall()1308                        if group_res and len(group_res) > 0:1309                            group_id = group_res[0][0]1310                        else:1311                            group_id = "null"1312                    else:1313                        get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1314                            'OPEC').upper()1315                        self.cursor.execute(get_group_query)1316                        group_res = self.cursor.fetchall()1317                        if group_res and len(group_res) > 0:1318                            group_id = group_res[0][0]1319                        else:1320                            group_id = "null"1321                    # print("group id",group_id)1322                    get_group_query2 = "select GroupID from Groups where upper(Description) = upper('%s')" % self.clean_string(1323                        'Historical Composition').upper()1324                    self.cursor.execute(get_group_query2)1325                    group_res = self.cursor.fetchall()1326                    if group_res and len(group_res) > 0:1327                        group_id_2 = group_res[0][0]1328                    else:1329                        group_id_2 = "null"1330                    for val_id in range(2, len(row)):1331                        value = row[val_id]1332                        # print("value", value)1333                        dict_id = (val_id - 1)1334                        # print(dict_id)1335                        peri_data = period_data.get(dict_id)1336                        period_id = peri_data.get("PeriodID")1337                        period_idx = peri_data.get("PeriodIdx")1338                        mode = peri_data.get("Mode")1339                        is_total = 11340                        ph1 = 11341                        ph2 = "null"1342                        ph3 = "null"1343                        region_id = "null"1344                        insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1345                                      "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, %s, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1346                                          period_id, period_idx, mode, ph1, ph2, ph3, value, region_id,1347                                          group_id, group_id_2,1348                                          is_total, provided_date1349                                      )1350                        try:1351                            self.cursor.execute(insertQuery)1352                        except Exception as e:1353                            logging.error(e)1354                if 'TOTAL SUPPLY' == self.clean_string(row[1]).upper():1355                    ph1 = 11356                    ph2 = "null"1357                    ph3 = "null"1358                    region_id = "null"1359                    group_id = "null"1360                    group_id_2 = "null"1361                    for val_id in range(2, len(row)):1362                        value = row[val_id]1363                        dict_id = (val_id - 1)1364                        # print(dict_id)1365                        peri_data = period_data.get(dict_id)1366                        period_id = peri_data.get("PeriodID")1367                        period_idx = peri_data.get("PeriodIdx")1368                        mode = peri_data.get("Mode")1369                        is_total = 11370                        ph1 = 11371                        ph2 = "null"1372                        ph3 = "null"1373                        region_id = "null"1374                        group_id = "null"1375                        insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1376                                      "%s, %s, '%s', 'S', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '1')" % (1377                                          period_id, period_idx, mode, ph1, ph2, ph3, value, region_id, group_id,1378                                          is_total, provided_date1379                                      )1380                        try:1381                            self.cursor.execute(insertQuery)1382                        except Exception as e:1383                            logging.error(e)1384                            f = open("error files.txt", "a")1385                            f.write(file_name)1386                            f.write(str(e))1387                    break1388                #1389                elif supply_start == False:1390                    if self.clean_string(row[1]).upper() == "SUPPLY" or self.clean_string(1391                            row[1]).upper() == "OECD SUPPLY":1392                        # print("cse truee")1393                        supply_start = True1394                        gr = 'Non-OPEC'1395                        gr2 = 'OECD'1396        except Exception as e:1397            logging.error(e)1398            print(e)1399            query = "update RefTables set start = 13 where ReportedDate = %s and TableType = '1'"1400            try:1401                result = self.cursor.execute(query, (reported_date,))1402            except Exception as e:1403                logging.error(e)1404        os.remove(file_name)1405        print("file removed", file_name)1406    def insert_table2(self, file_name, sub_table=None):1407        provided_date = str(file_name.split("/")[-1]).replace("table1_", "").replace(".csv", "")1408        reported_date = provided_date + " 00:00:00"1409        version_id = str(file_name.split("/")[-1]).replace(".csv", "").replace(sub_table, "").replace("-", "")1410        query = "delete from RefTable where ReportedDate = %s and TableType = '1'"1411        try:1412            result = self.cursor.execute(query, (reported_date))1413            # print(result)1414        except Exception as e:1415            logging.error(e)1416            # print(e, file_name)1417        # Mongo Query1418        query = "insert into RefTable(ReportedDate,TableType,Flag) values(" \1419                "'%s','2','1')" % (reported_date,)1420        try:1421            result = self.cursor.execute(query)1422            # print(result)1423        except Exception as e:1424            logging.error(e)1425        dates = 0001426        query = "select max(ReportedDate) from IEA_HistoricMacroElementsData_2_issue where TableType = 2"1427        try:1428            result = self.cursor.execute(query)1429            if result > 1:1430                max_date = str(self.cursor.fetchone())1431                max_date = (max_date.strip("(datetime.datetime(,)").replace('', '')[0:12])1432                max_date = max_date.split(',')1433                y = int(max_date[0])1434                m = int(max_date[1])1435                d = int(max_date[2])1436                dates = str(datetime.date(y, m, d))1437                dates = dates.replace('-', '')1438               # print((dates))1439            else:1440                dates = 0001441        except Exception as e:1442            logging.error(e)1443        if int(dates) != int(version_id):1444            try:1445                period_data = {}1446                start = False1447                ph1 = 11448                ph2 = "null"1449                ph3 = "null"1450                gr = ''1451                provided_date = str(file_name.split("/")[-1]).replace("table2_","").replace(".csv","")1452                data = self.read_csv_file(file_name=file_name)1453                rows = self.format_changer_for_table2(rows=data)1454                for row_id in range(len(rows)):1455                    row = rows[row_id]1456                    if row_id == 0:1457                        period_data = self.convert_to_period_data(row=row, allow_all=True)1458                    else:1459                        if start == True:1460                            get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % gr1461                            self.cursor.execute(get_group_query)1462                            group_res = self.cursor.fetchall()1463                            if group_res and len(group_res) > 0:1464                                group_id = group_res[0][0]1465                            else:1466                                group_id = "null"1467                            is_total = "null"1468                            region_id_query = "select RegionID from Regions where RegionName='%s' and ProviderID=1"%self.clean_string(row[1])1469                            self.cursor.execute(region_id_query)1470                            res = self.cursor.fetchall()1471                            if res and len(res) > 0:1472                                region_id = res[0][0]1473                            else:1474                                region_id = "null"1475                            if "TOTAL" in row[1].upper():1476                                is_total = 11477                            for val_id in range(2, len(row)):1478                                value = row[val_id]1479                                peri_data = period_data.get(val_id)1480                                period_id = peri_data.get("PeriodID")1481                                period_idx = peri_data.get("PeriodIdx")1482                                mode = peri_data.get("Mode")1483                                insertQuery = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1484                                              "%s, %s, '%s', 'D', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', null, %s, %s, null, null, 1.0, current_timestamp(), %s, '%s', 1, '2')" % (1485                                                period_id, period_idx, mode, ph1, ph2, ph3, value, region_id, group_id,1486                                                  is_total, provided_date1487                                              )1488                                try:1489                                    self.cursor.execute(insertQuery)1490                                except Exception as e:1491                                    logging.error(e)1492                            if self.clean_string(row[1]).upper() == "TOTAL NON-OECD":1493                                break1494                        elif start == False:1495                            if self.clean_string(row[1]).upper() == "Demand (mb/d)".upper():1496                                start = True1497                                gr = 'OECD'1498                        if "Total OECD".upper() == self.clean_string(row[1]).upper():1499                            gr = "Non-OECD"1500            except Exception as e:1501                logging.error(e)1502                query = "delete from IEA_HistoricMacroElementsData_2_issue where ReportedDate = %s and TableType = '2'"1503                try:1504                    result = self.cursor.execute(query, (reported_date,))1505                except Exception as e:1506                    logging.error(e)1507                query = "update RefTable set flag = 2 where ReportedDate = %s and TableType = '2'"1508                try:1509                    result = self.cursor.execute(query, (reported_date,))1510                except Exception as e:1511                    logging.error(e)1512    def convert_pdf_to_csv(self, pdf_list,csv_folder_list):1513        """1514        converts all pdf from folder into csv1515        :param pdf_list:1516        :param csv_folder_list:1517        :return:1518          page"""1519        page_numbers = {}1520        try:1521            for pdf_file in pdf_list:1522                try:1523                    for folder in csv_folder_list:1524                        provided_date = pdf_file.replace(".pdf", "") + " 00:00:00"1525                        if folder == 'table1':1526                            version_id = pdf_file.replace(".pdf", "").replace("-", "")1527                            print(pdf_file)1528                            page_numbers [pdf_file] = '1'1529                            print(page_numbers)1530                            tabletype = '1'1531                        # elif folder == 'table2b':1532                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1533                        #     page_numbers = iea_obj.find_page_number_1(table_contents, folder)1534                        #     # page_numbers[pdf_file] = '5'1535                        #     tabletype = '2b'1536                        #1537                        # elif folder == 'table3':1538                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1539                        #     # page_numbers = iea_obj.find_page_number_1(table_contents, folder)1540                        #     tabletype = '3'1541                        #     page_numbers[pdf_file] = '6'1542                        #1543                        # elif folder == 'table4':1544                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1545                        #     page_numbers = iea_obj.find_page_number_1(table_contents, folder)1546                        #     print(page_numbers)1547                        #     tabletype = '4'1548                        #1549                        # elif folder == 'table2':1550                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1551                        #     # print(table_contents)1552                        #     page_numbers = iea_obj.find_page_number_1(table_contents, folder)1553                        #     tabletype = '2'1554                        #1555                        # elif folder == 'table2a':1556                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1557                        #     # print(table_contents)1558                        #     page_numbers = iea_obj.find_page_number_1(table_contents, folder)1559                        #     tabletype = '2a'1560                        #1561                        # elif folder == 'table2b':1562                        #     version_id = pdf_file.replace(".pdf", "").replace("-", "")1563                        #     # print(table_contents)1564                        #     page_numbers = iea_obj.find_page_number_1(table_contents, folder)1565                        #     tabletype = '2b'1566                        if bool(page_numbers):1567                            for pdf, value in page_numbers.items():1568                                if pdf == pdf_file:1569                                    csv_name = folder + "_" + pdf_file.replace(".pdf", ".csv")1570                                    page_number = int(value)  #1571                                    con_tab_1 = iea_obj.conert_pdf_to_csv(1572                                        pdf_name=PDF_PATH + pdf_file,1573                                        csv_name=CSV_PATH + folder + "/" + csv_name,1574                                        page_number=page_number)1575                                    if con_tab_1 == 1:1576                                        os.remove(PDF_PATH + pdf_file)1577                                        iea_obj.insert_csv_data(csv_folder_list=csv_folder_list)1578                        else:1579                            query = "UPDATE RefTable SET flag = '2' where ReportedDate = %s and TableType = '%s"1580                            try:1581                                result = self.cursor.execute(query, (provided_date, tabletype,))1582                            except Exception as e:1583                                logging.error(e)1584                except Exception as e:1585                    logging.error(e)1586        except Exception as e:1587            logging.error(e)1588            raise e1589    def insert_csv_data(self, csv_folder_list):1590        """1591        call insert function of every table data1592        :param csv_folder_list:1593        :return:1594        """1595        try:1596            for csv_folder in csv_folder_list:1597                csv_list = [f for f in os.listdir(CSV_PATH + csv_folder + '/.') if1598                            os.path.isfile(CSV_PATH + csv_folder + '/' + f) and f.endswith('.csv')]1599                for csv_name in csv_list:1600                    try:1601                        if csv_folder[-1].isalpha():1602                            folder_name = csv_folder1603                            function_name = csv_folder[:-1]1604                        else:1605                            folder_name = csv_folder1606                            function_name = csv_folder1607                        if folder_name == "table2":1608                            data = ""1609                        else:1610                            data = "_data"1611                        eval(1612                            "iea_obj.insert_" + function_name + data+"(file_name='" + CSV_PATH + folder_name + "/" + csv_name + "', sub_table='" + folder_name + "_')")1613                    except Exception as e:1614                        logging.error(e)1615            #            print(e)1616        except Exception as e:1617            logging.error(e)1618            raise e1619    def insert_table3_data(self, file_name, sub_table=None):1620        version_id = str(file_name.split("/")[-1]).replace(".csv", "").replace("table3_", "").replace("-", "")1621        provided_date = str(file_name.split("/")[-1]).replace(".csv", "").replace(sub_table, "")1622        reported_date = provided_date + " 00:00:00"1623     #   print(reported_date)1624        query = "delete from RefTable where ReportedDate = %s and TableType = 3"1625        try:1626            result = self.cursor.execute(query, (reported_date,))1627      #      print(result)1628        except Exception as e:1629            logging.error(e)1630       #     print(e)1631        # Mongo Query1632        query = "insert into RefTable(ReportedDate,TableType,Flag) values(" \1633                "'%s','3','1')" % (reported_date,)1634        try:1635            result = self.cursor.execute(query)1636        #    print(result)1637        except Exception as e:1638            logging.error(e)1639        dates = 0001640        query = "select max(ReportedDate) from IEA_HistoricMacroElementsData where TableType = 3"1641        try:1642            result = self.cursor.execute(query, (reported_date,))1643            if result > 1:1644                max_date = str(self.cursor.fetchone())1645                max_date = (max_date.strip("(datetime.datetime(,)").replace('', '')[0:12])1646                max_date = max_date.split(',')1647                y = int(max_date[0])1648                m = int(max_date[1])1649                d = int(max_date[2])1650                dates = str(datetime.date(y, m, d))1651                dates = dates.replace('-', '')1652         #       print((dates))1653            else:1654                dates = 0001655        except Exception as e:1656            logging.error(e)1657        if int(dates) != int(version_id):1658          #  print(version_id)1659            try:1660                rows = self.read_csv_file(file_name=file_name)1661                category_list = ["OPEC", "NON-OPEC"]1662                exclude_list = ["OECD", "NON-OECD", "PROCESSING GAINS", "GLOBAL BIOFUELS"]1663                # exclude_list = ["PROCESSING GAINS","GLOBAL BIOFUELS"]1664                specific_country = {1665                    "UAE": 'United Arab Emirates',1666                    "Congo": 'Congo (Rep. of)',1667                    "UK": 'United Kingdom',1668                    "Russia": 'Russian Federation'1669                }1670                res = {}1671                country_id = "null"1672                region_id = "null"1673                group_id = "null"1674                group_2_id = "null"1675                reported_date = str(file_name.split("/")[-1]).replace(".csv", "").replace("table3_", "")1676                versionID = reported_date.replace(".csv", "").replace("-", "").replace("table3_", "")1677                for count in range(len(rows)):1678                    d = rows[count]1679                    if count == 0:1680                        if int(versionID) < 19990209:1681                            d = self.table_3_prior_formatting(d)1682                        periods = self.convert_to_period_data(d, allow_all=True)1683                    if count == 1:1684                        name = self.clean_string(d[1])1685                        if name.upper() in category_list:1686                            group_2_id = "null"1687                            get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % name1688                            self.cursor.execute(get_group_query)1689                            group_res = self.cursor.fetchall()1690                            if group_res and len(group_res) > 0:1691                                group_id = group_res[0][0]1692                            else:1693                                group_id = "null"1694                    if count > 2:1695                        # print(res)1696                        name = self.clean_string(d[1])1697                        name = name.replace(' ', '')1698           #             print(name)1699                        val1 = specific_country.get(name)1700                        if val1:1701                            name = val11702                        is_total = "null"1703                        ph1 = 11704                        ph2 = 11705                        ph3 = "null"1706                        ph4 = "null"1707                        if name.upper() == "NGLS":1708                            is_total = 11709                            ph1 = 11710                            ph2 = 21711                            ph3 = "null"1712                            is_total = "null"1713                        if name == 'ProcessingGains':1714                            # print("in processing bgainsssss")1715                            ph1 = 1;1716                            ph2 = "null"1717                            ph3 = "null"1718                            is_total = "null"1719                            group_2_id = "null"1720                            get_group_query = "select GroupID from Groups where  upper(Description) ='Processing Gains'"1721                            self.cursor.execute(get_group_query)1722                            group_res = self.cursor.fetchall()1723                            if group_res and len(group_res) > 0:1724                                group_id = group_res[0][0]1725                            else:1726                                group_id = "null"1727                        if "TOTAL" in name.upper():1728                            is_total = 11729                            ph1 = 11730                            ph2 = "null"1731                            ph3 = "null"1732                            ph4 = "null"1733                            country_id = "null"1734                            region_id = "null"1735                            name = name.replace("Total", "").strip()1736                            name = name.replace("TOTAL", "").strip()1737                            if name.upper() == "CRUDE OIL":1738                                ph1 = 11739                                ph2 = 11740                                ph3 = "null"1741                            elif name.upper() == "NGLS":1742                                ph1 = 11743                                ph2 = 21744                                ph3 = "null"1745                            elif name.upper() in category_list:1746                                ph1 = 11747                                ph2 = "null"1748                                ph3 = "null"1749                                group_2_id = "null"1750                        elif name.upper() in exclude_list:1751                            get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % name1752                            self.cursor.execute(get_group_query)1753                            group_res = self.cursor.fetchall()1754                            if group_res and len(group_res) > 0:1755                                group_2_id = group_res[0][0]1756                            else:1757                                group_2_id = "null"1758                        elif name.upper() in category_list:1759                            get_group_query = "select GroupID from Groups where upper(Description) = upper('%s')" % name1760                            self.cursor.execute(get_group_query)1761                            group_res = self.cursor.fetchall()1762                            if group_res and len(group_res) > 0:  # to make groupid2 null condition1763                                group_id = group_res[0][0]1764                            else:1765                                group_id = "null"1766                        else:1767                            name = name.replace(' ', '')1768                            check_region_name = "select RegionID from Regions where ProviderID = '1' and upper(replace(RegionName,' ',''))=upper('%s')" % name1769                            self.cursor.execute(check_region_name)1770                            region_res = self.cursor.fetchall()1771                            if region_res and len(region_res) > 0:1772                                region_id = region_res[0][0]1773                                country_id = "null"1774                            else:1775                                name = name.replace(' ', '')1776                                if (name == 'Others'):1777                                    region_id = region_id1778                                    country_id = "null"1779                                else:1780                                    name = name.replace(' ', '')1781                                    country_id_query = "select CountryID from Country where upper(replace(CountryName,' ','')) ='%s'" % self.clean_string(1782                                        name)1783                                    self.cursor.execute(country_id_query)1784                                    res = self.cursor.fetchall()1785                                    if res and len(res) > 0:1786                                        country_id = str(res[0][0])1787                                    else:1788                                        country_id = "null"1789                                    region_id_query = "select RegionID from RegionCountryMapping where CountryID = '%s'" % (1790                                        country_id)1791                                    self.cursor.execute(region_id_query)1792                                    res = self.cursor.fetchall()1793                                    if res and len(res) > 0:1794                                        region_id = str(res[0][0])1795                                    else:1796                                        region_id = "null"1797                        if name != "Neutral Zone" or "gains" in name or "Biofuels" in name:1798                            if name.upper() == "SUPPLY":1799                                group_id = "null"1800                                group_2_id = "null"1801                            if name.upper() == "Processing gains" and "Global Biofuels" in name:1802                                group_2_id = "null"1803                        elemCount = 21804                        for dvalue in d[2:]:1805                            if dvalue:1806                                p1 = periods.get(elemCount)1807                                periodID = p1.get("PeriodID")1808                                periodIdx = p1.get("PeriodIdx")1809                                mode = p1.get("Mode")1810                                ElementType = 'S'1811                                insert_query = "insert into IEA_HistoricMacroElementsData(PeriodID, PeriodIdx, Mode, ElementType, PH1, PH2, PH3, PH4, Value, ValueType, ValueUnit, Currency, CountryID, RegionID, GroupID1, GroupID2, GroupID3, VersionID, LastModifiedDate, IsTotal, ReportedDate, ProviderID, TableType) values(" \1812                                               "%s, %s, '%s', '%s', %s, %s, %s, null, %s, 'V', 'MBPD', 'USD', %s, %s, %s, %s, null, 1.0, current_timestamp(), %s, '%s', 1,'3')" % (1813                                                   periodID, periodIdx, mode, ElementType, ph1, ph2, ph3, dvalue,1814                                                   country_id, region_id, group_id, group_2_id, is_total, reported_date1815                                               )1816                                try:1817                                    self.cursor.execute(insert_query)1818                                except Exception as e:1819                                    logging.error(e)1820                            elemCount += 11821            except Exception as e:1822                logging.error(e)1823                query = "update RefTable set flag = 2 where ReportedDate = %s and TableType = '3'"1824                try:1825                    result = self.cursor.execute(query, (reported_date,))1826                except Exception as e:1827                    logging.error(e)1828if __name__ == '__main__':1829    try:1830        iea_obj = IEADataProcessing()1831        csv_folder_list = ["table1"]1832        pdf_list = iea_obj.get_pdf_list()1833        print(pdf_list)1834        # table_name = 'contents'1835        # table_contents = iea_obj.find_table_contents(pdf_list, table_name)1836        # print(table_contents)1837        iea_obj.convert_pdf_to_csv(pdf_list=pdf_list, csv_folder_list=csv_folder_list)1838    except Exception as e:1839        logging.error(e)...app_base.py
Source:app_base.py  
...33        self.add_item('globals', "<script>var origin='%s'</script>" % (self.item_id))34        self.content['page-title'] = ''35        36        dynamic = []37        sql = "SELECT * FROM `core.content` WHERE (`item_id`=%%s OR `item_id`='global') %s" % (get_group_query(self))3839        content = self.db.get_records(sql, (self.item_id,))40        for item in content:41            if item['dynamic'] == True:42                dynamic.append(item)43            else:44                self.content[item['block_name']] = item['content']4546            if self.page and self.page['title']:47                self.content['page-title'] = self.page['title']48                49        if len(dynamic) > 0:50            self.process_dynamic(dynamic)51            5253    def not_found(self):54        template = self.load_template('components/not-found')55        self.content['body'] = template.format_map({})56        self.context.status = '404 Not Found'57        self.render(collapse='hard')58        self.last_handler = True59        6061    def component_not_found(self):62        self.error = True63        self.context.status = '404 Not Found'64        self.context.write('Not Found')65        self.last_handler = True66        67        68    def get(self):69        sql = "SELECT * FROM `core.hierarchy` WHERE `path`=%%s %s" % (get_group_query(self))70        page = self.db.get_record(sql, (self.context.path,))7172        if page:73            if page['target']:74                sql = "SELECT * FROM `core.hierarchy` WHERE `item_id`=%%s %s" % (get_group_query(self))75                page = self.db.get_record(sql, (page['target'],))76                if not page:77                    return78                79                self.context.path = page['path']80            self.page = page81            self.item_id = page['item_id']82            self.parent_id = page['parent_id']83            if page['template'] != None:84                self.template_file = page['template']8586            self.render(collapse='hard')87        else:88            self.not_found()89            9091    def post(self):92        sql = "SELECT * FROM `core.hierarchy` WHERE `path`=%%s %s" % (get_group_query(self))93        page = self.db.get_record(sql, (self.context.path,))94        if page and page['processor']:95            if hasattr(self, page['processor']):96                method = getattr(self, page['processor'], None)97                if method != None:98                    method()
...groups_repository.py
Source:groups_repository.py  
...11    def find_by_id(id: str):12        result = db_session.query(GroupModel).get(id)13        return result.__dict__14    def list(params: ListGroupsQuery):15        query = get_group_query(params.dict())16        result = es.search(index=ES_INDEX_ITEM,17                           body=query,18                           request_timeout=20,19                           ignore=[400, 404],20                           size=500)21        if "hits" not in result:22            return []23        hits = result["hits"]["hits"]...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
