How to use list_tags method in tempest

Best Python code snippet using tempest_python

meop.py

Source:meop.py

1from pathlib import Path2import os3import xarray as xr4import pandas as pd5import numpy as np6import csv7import gsw8import matplotlib.pyplot as plt9import cartopy.crs as ccrs10from importlib import reload11import netCDF4 as nc1213processdir = Path.home() / 'MEOP_process'1415# list functions1617#  EXP_from_SMRU_CODE(smru_platform_code)18#  list_tag_EXP(EXP,qf='lr0')19#  fname_prof(smru_name,depl='',qf='hr1')20#  fname_plot_diags_matlab(smru_name,depl='',qf='hr1')21#  N_PARAM(ds,PARAM)22#  copy_file(file_name,src_dir,dst_dir)2324#  read_list_profiles(rebuild=False,verbose=False,public=False,Tdata=False)25#  read_list_deployment()2627#  label_regions(list_tags):28#  filter_public_data(list_profiles, list_tags, list_deployments)29#  filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments)30#  filter_country(country, list_profiles, list_tags, list_deployments)3132#  copy_netcdf_variable(nc_in,var_name_in,var_dims_in,nc_out,var_name_out,var_dims_out)33#  read_ncfile(ncfile_name)3435#-----------------------------------   utils     --------------------------------------------#36def EXP_from_SMRU_CODE(smru_platform_code):37    return smru_platform_code.split("-")[0]3839# return ncARGO filename40def fname_prof(smru_name,depl='',qf='hr1'):41    if not depl:42        depl = EXP_from_SMRU_CODE(smru_name)43    return Path(processdir,'final_dataset_prof',depl,smru_name+'_'+qf+'_prof.nc')4445# return ncARGO filename46def list_tag_EXP(EXP,qf='lr0'):47    dirEXP = Path(processdir,'final_dataset_prof',EXP)48    list_tag = [ncfile for ncfile in dirEXP.glob(f'{EXP}-*_{qf}_prof.nc')]49    return list_tag5051# return ncARGO filename52def fname_plot_diags_matlab(smru_name,depl='',qf='hr1'):53    if not depl:54        depl = EXP_from_SMRU_CODE(smru_name)55    return processdir / 'plots' / depl / (smru_name+'_'+qf+'_diags_TS_adj.png')5657# return a DaraArray with the number of valid profile for the given PARAM58def N_PARAM(ds,PARAM):59    if PARAM+'_QC' in list(ds.variables):60        N_PARAM = np.sum((ds[PARAM+'_QC']==b'1'),axis=1)61    else:62        N_PARAM = xr.DataArray(np.zeros(ds.dims['N_PROF']), dims=['N_PROF'])63    return N_PARAM6465# copy a file66def copy_file(file_name,src_dir,dst_dir):67    shutil.copyfile(Path(src_dir)/file_name,Path(dst_dir)/file_name)686970# read list_deployment.csv file in processdir and return pandas dataframe71def read_list_deployment(filename_list='list_deployment.csv'):72    73    from datetime import timedelta74    if Path(processdir,filename_list).is_file():75        list_deployment = pd.read_csv(Path(processdir,filename_list))76    else:77        print('File',filename_list,'not found')78        list_deployment = None79    newnames = {}80    for var in list_deployment:81        newnames[var] = var.upper()82    83    return list_deployment.rename(columns=newnames).set_index('DEPLOYMENT_CODE')  848586# build list data in MEOP, store it in pickle file and return the dataframe87def build_list_metadata(qf='lr0'):8889    datadir = Path(processdir,'final_dataset_prof')9091    # walk through datadir subfolderss92    list_df=[]93    for dirpath in datadir.iterdir():94        dirname = dirpath.parts[-1]95        if qf in ['lr1','hr1','fr1','all','hr2']:96            data_suffix = '_ADJUSTED'97        else:98            data_suffix = ''99        for ncfile in dirpath.glob(f'{dirname}-*_{qf}_prof.nc'):100            with xr.open_dataset(ncfile) as ds:101                data = {'DEPLOYMENT_CODE': EXP_from_SMRU_CODE(ds.smru_platform_code),102                        'SMRU_PLATFORM_CODE': ds.smru_platform_code,103                        'CYCLE_NUMBER': ds['CYCLE_NUMBER'].astype(int),104                        'JULD': ds['JULD'],105                        'LATITUDE': ds['LATITUDE'],106                        'LONGITUDE': ds['LONGITUDE'],107                        'N_TEMP' : N_PARAM(ds,'TEMP'+data_suffix),108                        'N_PSAL' : N_PARAM(ds,'PSAL'+data_suffix),109                        'N_CHLA' : N_PARAM(ds,'CHLA'+data_suffix)}110            df = pd.DataFrame(data)111            list_df.append(df)112113    # concatenate list of dataframes into one dataframe114    df_all = pd.concat(list_df)115    df_all.to_pickle(processdir / f'list_meta_{qf}_prof.pkl')116    117    return df_all118    119120# determine region for each tag121def label_regions(list_tags):122123    # set a new columns called MASK with a regional label124    from scipy.interpolate import RegularGridInterpolator125    import regionmask126127    basins = regionmask.defined_regions.ar6.all128    label = basins.names129    lon = np.arange(-179.5, 180)130    lat = np.arange(-89.5, 90)131    mask = basins.mask(lon,lat)132    f = RegularGridInterpolator((lon, lat), mask.transpose().values,method='nearest')133    list_tags["MASK"] = f(list_tags[['LONGITUDE','LATITUDE']].values)134    list_tags["MASK"] = list_tags.MASK.map(dict(enumerate(label)))135    136    map_regions = {137        'Southern-Ocean':'Southern Ocean',138        'E.Antarctica':'Southern Ocean',139        'W.Antarctica':'Southern Ocean',140        'Arctic-Ocean':'North Atlantic',141        'N.Pacific-Ocean':'North Pacific',142        'C.North-America':'North Pacific', 143        'W.North-America':'North Pacific',144        'N.E.North-America':'North Atlantic',145        'E.North-America':'North Atlantic',146        'Greenland/Iceland':'North Atlantic',147        'N.Atlantic-Ocean':'North Atlantic',148        'N.W.North-America':'North Pacific',149        'N.Europe':'North Atlantic',150        'S.Australia':'Australia',151        'N.Central-America':'North Pacific',152        'N.South-America':'Tropical Atlantic',153        'S.South-America':'South Atlantic',154        'S.Atlantic-Ocean':'South Atlantic',155    }156    list_tags['MASK'] = list_tags.MASK.map(map_regions)157158    return list_tags159160161# select only public data162def filter_public_data(list_profiles, list_tags, list_deployments):163    164    list_tags = list_tags[list_tags.PUBLIC == 1]165    list_deployments = list_deployments[list_deployments.PUBLIC == 1]166    list_profiles = list_profiles.merge(list_tags.SMRU_PLATFORM_CODE,on='SMRU_PLATFORM_CODE')167    168    return list_profiles, list_tags, list_deployments169170171# select only profiles with data points172def filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments):173    174    list_tags = list_tags[list_tags.N_PROF_TEMP!=0]175    list_deployments = list_deployments.merge(list_tags.DEPLOYMENT_CODE,on='DEPLOYMENT_CODE')176    list_profiles = list_profiles.loc[list_profiles.N_TEMP!=0]177    178    return list_profiles, list_tags, list_deployments179180181# select only profiles with data points182def filter_country(country, list_profiles, list_tags, list_deployments):183    184    list_deployments = list_deployments.loc[list_deployments.COUNTRY==country]185    list_tags = list_tags[list_tags.DEPLOYMENT_CODE.isin(list_deployments.DEPLOYMENT_CODE)]186    list_profiles = list_profiles[list_profiles.SMRU_PLATFORM_CODE.isin(list_tags.SMRU_PLATFORM_CODE)]187    188    return list_profiles, list_tags, list_deployments189190191# read MEOP data list from pickle file and return the dataframe.192# If filename_pkl is not found, the list file is generated.193def read_list_profiles(rebuild=False,verbose=False,public=False,Tdata=False,country=None,qf='lr0'):194195    if (Path(processdir / f'list_meta_{qf}_prof.pkl').is_file()) and (not rebuild):196        list_profiles = pd.read_pickle(processdir / f'list_meta_{qf}_prof.pkl')197    else:198        print(f'Create metadata files in {processdir}: list_meta_{qf}_prof.pkl')199        list_profiles = build_list_metadata()200201    # read list of profiles202    for col in ['N_TEMP','N_PSAL','N_CHLA']:203        list_profiles[col].where(list_profiles[col]!=0,np.nan,inplace=True)204        205    list_tags = list_profiles.groupby('SMRU_PLATFORM_CODE').first()\206        .drop(['N_TEMP','N_PSAL','N_CHLA','CYCLE_NUMBER'],axis='columns')207    list_tags['N_PROF_TEMP'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_TEMP.count()208    list_tags['N_PROF_PSAL'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_PSAL.count()209    list_tags['N_PROF_CHLA'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_CHLA.count()210    211    agg_ops = {'JULD': min, 'LATITUDE': np.mean, 'LONGITUDE': np.mean, 'N_PROF_TEMP': sum,212       'N_PROF_PSAL': sum, 'N_PROF_CHLA': sum}213    list_deployments = list_tags.groupby('DEPLOYMENT_CODE').agg(agg_ops)214    list_deployments['N_TAGS'] = list_tags.groupby('DEPLOYMENT_CODE').DEPLOYMENT_CODE.count()215    list_deployments = list_deployments.merge(read_list_deployment(),on='DEPLOYMENT_CODE',how='outer')216    drop_list = ['START_DATE','END_DATE','START_DATE_JUL']217    list_deployments = list_deployments.drop(drop_list,axis='columns')218    219    list_public = list_deployments.reset_index()[['DEPLOYMENT_CODE','PUBLIC']]220    list_tags = list_tags.reset_index().merge(list_public,on='DEPLOYMENT_CODE')221    222    # add correction coefficients in list_tags223    coeff = pd.read_csv(processdir / 'table_coeff.csv')224    list_tags = list_tags.merge(coeff,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')225    list_tags['comment'] = list_tags['comment'].fillna('no comment')226    tag_problem = list_tags.loc[list_tags.SMRU_PLATFORM_CODE.isnull(),:]227    if len(tag_problem.smru_platform_code):228        if verbose:229            print('List of tags with correction coefficients yet not listed in list_deployment:')230            print(tag_problem.smru_platform_code)231        message = 'tag with correction coefficient, yet no netcdf file'232        for tag in list(tag_problem.SMRU_PLATFORM_CODE):233            comment = coeff.loc[coeff.smru_platform_code == tag,'comment']            234            if message not in comment:235                if 'no comment' in comment:236                    comment = message237                else:238                    comment = comment+', '+message239                coeff.loc[coeff.smru_platform_code == tag,'comment'] = comment240                list_tags.loc[list_tags.SMRU_PLATFORM_CODE == tag,'comment'] = comment241    list_tags = list_tags.drop('smru_platform_code', axis='columns')   242    243    # add variable_offset in list_tags244    salinity_offsets = pd.read_csv(processdir / 'table_salinity_offsets.csv')245    salinity_offsets['variable_offset'] = 1246    variable_offset = salinity_offsets[['smru_platform_code','variable_offset']]247    list_tags = list_tags.merge(variable_offset,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')\248        .drop('smru_platform_code', axis='columns')249    250    # add parameters from list_param.csv in list_deployments251    param = pd.read_csv(processdir / 'table_param.csv')252    list_deployments = list_deployments.reset_index().merge(param,left_on='DEPLOYMENT_CODE',right_on='deployment_code',how='outer')\253        .drop('deployment_code', axis='columns')254    255    list_deployment_hr = pd.read_csv(processdir / 'list_deployment_hr.csv', dtype={'prefix': str,'instr_id':str,'year':str})256    list_tags = list_tags.merge(list_deployment_hr,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')257    list_tags['comment'] = list_tags['comment'].fillna('no comment')258    tag_problem = list_tags.loc[list_tags.SMRU_PLATFORM_CODE.isnull(),:]259    if len(tag_problem.instr_id):260        if verbose:261            print('List of instr id for tags with hr datasets but no low resolution ones:')262            print(tag_problem.instr_id)263        for tag in list(tag_problem.index):264            list_tags.drop(tag,axis=0, inplace=True)265    list_tags = list_tags.drop('smru_platform_code', axis='columns')266    267    list_tags = label_regions(list_tags)268    if 'MASK' not in list_profiles:269        list_profiles = list_profiles.merge(list_tags.set_index('SMRU_PLATFORM_CODE').MASK,on='SMRU_PLATFORM_CODE')270271    if verbose:272        print(f'Update metadata files in {processdir}: list_profiles.pkl, list_tags.csv, list_deployments.csv')273    274    if public:275        list_profiles, list_tags, list_deployments = filter_public_data(list_profiles, list_tags, list_deployments)276        277    if Tdata:278        list_profiles, list_tags, list_deployments = filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments)279        280    if country:281        list_profiles, list_tags, list_deployments = filter_country(country, list_profiles, list_tags, list_deployments)282        283    return list_profiles, list_tags, list_deployments284285286#  copy the variable var from nc_in in nc_out287def copy_netcdf_variable(nc_in,var_name_in,var_dims_in,nc_out,var_name_out,var_dims_out):288    289    with nc.Dataset(nc_in) as src, nc.Dataset(nc_out, "a") as dst:290        # copy dimensions if not already existing291        for i, name in enumerate(var_dims_out):292            if name not in dst.dimensions:293                dst.createDimension( name, src.dimensions[var_dims_in[i]].size )294            if src.dimensions[var_dims_in[i]].size - dst.dimensions[var_dims_out[i]].size != 0:295                print(f"Dimension {name} has wrong size in {nc_out}")296                return 0297        # copy variable298        if var_name_out not in dst.variables:299            var = dst.createVariable(var_name_out, src.variables[var_name_in].datatype, var_dims_out)300        # copy variable attributes all at once via dictionary301        dst[var_name_out].setncatts(src[var_name_in].__dict__)302        dst[var_name_out][:] = src[var_name_in][:]303304    return 1305306307# read a netCDF ARGO file and return a xarray dataset structure308def read_ncfile(ncfile_name):309    310    if ncfile_name.is_file():311        ds = xr.open_dataset(ncfile_name)312        for dim in ds.dims:313            ds[dim] = ((dim), ds[dim])314            ds.set_coords([dim])315        ds['N_TEMP'] = (('N_PROF'),N_PARAM(ds,'TEMP'))316        ds['N_PSAL'] = (('N_PROF'),N_PARAM(ds,'PSAL'))317        if 'N_CHLA' in ds.variables:318            ds['N_CHLA'] = (('N_PROF'),N_PARAM(ds,'CHLA'))319        if 'N_DOXY' in ds.variables:320            ds['N_DOXY'] = (('N_PROF'),N_PARAM(ds,'DOXY'))321    else:322        print('No file: ',ncfile_name)323        return None324    return ds
...

pysica_classes.py

Source:pysica_classes.py

1import pint 2import numpy as np3import loaders4import odm5import pandas as pd6from pymongo import MongoClient7from constantes import *8import datetime9#DEPRECATED10class Curva(object):11    def __init__(self, curva_id, **kwargs):12        #variÃ¡veis bÃ¡sicas13        self.curva_id = curva_id14        self.titulo = ""15        self.descricao = ""16        self.title = ""17        self.description = ""18        self.ndim = 019        self.tags = []20        # self.x = np.array()21        # TODO: abstrair melhor essa merda22        self.ue = [] # vetor de ue de acordo com os tags23        ue = {'original': '', 'used': ''}24        self.val = None25        # self.tags_ = tag26        self.metadata = []27        metadata = {'x_tag': None, 'y_tag':None, 'z_tag':None, 'x_options':{}, 'y_options':{}, 'z_options':{}}28        self.x_values = np.Array()29        self.y_values = np.Array()30        self.z_values = np.Array()31        if(kwargs):32            for k, v in kwargs.items():33                setattr(self, k, v)34                35    def set_values(self, list_tags, list_values):36        pass37# DEPRECATED        38class Tag(object):39    def __init__(self, tagname, **kwargs):40        self.tagname = tagname41        self.tag_id = ""42        self.titulo = ""43        self.descricao = ""44        self.title = ""45        self.description = ""46        self.origem_id = ""47        self.ue_original = ""48        self.metadata = {}49        self.related_to = {} # tags relacionados tag_id: tag_obj50        self.belongs_to = {} # tag do componente pai tag_id: tag_obj51        self.same_as = {} # tag_id: tag_obj52        if(kwargs):53            for k, v in kwargs.items():54                setattr(self, k, v)55class Dataset(object):56    def __init__(self, name, list_tags, **kwargs):57        print("aa")58        self.name = name59        self.list_tags = list_tags60        """61        if self.list_tags:62            self.read_tags(list_tags)63            """64        self.schema = pd.DataFrame()65        self.data_par = pd.DataFrame()66        self.data_var = pd.DataFrame()67    def read_tags(self, list_tags = []):68        client = MongoClient()69        db = client.get_database(MONGO_DATABASE)70        if(list_tags):71            self.list_tags.append(list_tags) 72        if not self.list_tags:73            print("Sem tags")74        75        coll_tag = db.get_collection('tag')76        coll_ue = db.get_collection('u_e')77        coll_origin = db.get_collection('data_origin')78        79        query_tag = {80            "name":{"$in":list_tags}, 81            "_id":{"$in": list_tags} }82        query_tags = {"$or":[{"name":{"$in":list_tags}}, {"_id":{"$in": list_tags}}]}83        dados_tags = coll_tag.find(query_tags)84        tags = []85        for tag in dados_tags:86            #print(tag)87            ue = coll_ue.find_one({"_id":tag['ue']})88            origin = coll_origin.find_one({"_id":tag['data_origin']})89            tag.update({'ue':ue['name'], 'origin_name':origin['name']})90            tags.append(tag)91      92        if self.schema.empty: 93            self.schema = pd.DataFrame(tags)94        else:95            self.schema.update(pd.DataFrame(tags))96        self.list_tags = tags97        return self.schema    98    def add_tag(self, tag_id):99        self.list_tags.append(coll_tag.find())100    def remove_tags(self, list_tag_removed = []):101        busca_itens = self.schema["_id"].isin(list_tag_removed)102        self.schema = self.schema.loc[~busca_itens]103    def read_mongo_df(db, collection, query={}, no_id=False):104        """ Read from Mongo and Store into DataFrame """105        client = MongoClient()106        db = client.get_database(db)107        collection = db.get_collection(collection)108        # Make a query to the specific DB and Collection109        cursor = collection.find(query)110        111        # em algum ponto aqui vou ter que e112        # Expand the cursor and construct the DataFrame113        df =  pd.DataFrame(list(cursor))114        # Delete the _id115        if no_id:116            del df['_id']117        return df118    def load_data_var(self, list_datas):119        client = MongoClient()120        db = client.get_database(MONGO_DATABASE)121        if not self.list_tags:122            print("Sem tags")123        list_ids = [x["_id"] for x in self.list_tags]124        print(list_ids)125        coll_tagval = db.get_collection('tag_val')126        coll_tag = db.get_collection('tag')127        coll_ue = db.get_collection('u_e')128        coll_origin = db.get_collection('data_origin')129        # TODO: DATE_LIST DEVE SER VERificada se tem 2 valores (inicio e fim) ou mais de um valor (datas especÃficas)130        format_string = '%Y-%m-%d %H:%M:%S'131        dt_inicio = datetime.datetime.strptime(list_datas[0], format_string)132        dt_fim = datetime.datetime.strptime(list_datas[1], format_string)133        tags = []134        list_tag_values = []135        for t in self.list_tags:136            tag = t137            #ue = coll_ue.find_one({"_id":tag['ue']})138            ue = tag['ue']139            origin = coll_origin.find_one({"_id":tag['data_origin']})140            #tag.update({'ue':ue['name'], 'origin_name':origin['name']})141            tag.update({'ue':ue, 'origin_name':origin['name']})142            query_tagval = {143                "tag":tag["_id"]144            # ,"date":{"$and":[{"$gte":dt_inicio},{"$lte":dt_fim}]}145            ,"$and":[{"date":{"$gte":dt_inicio}},{"date":{"$lte":dt_fim}}]146                #,"date":{"$gte":dt_inicio}147                #,"date":{"$lte":dt_fim}148            }149            dados_tagval = coll_tagval.find(query_tagval)150            valores = []151            count_none = []152            for dado in dados_tagval:153                try:154                    val = {"date": dado["date"], "val":dado["val"], "name": tag["name"], "origin": tag["origin_name"], "tag_id": tag["_id"]}155                    val.update(dado["values"])156                except KeyError as e:157                    count_none.append(dado)158                    pass159                valores.append(val)160            list_tag_values = list_tag_values+valores    161        #print(list_tag_values)162        self.data_var = pd.DataFrame(list_tag_values)163        return self.data_var164class Dataset2(object):165    def __init__(self, name, list_tags, **kwargs):166        self.titulo = ""167        self.dataset_id = dataset_id 168        self.origem_id = None # pra quando tiver origem169        #self.values = {}170        self.tags = {} # tag_id : Tag171        self.data = {} # tag_id : Curva 172        self.timesheet = []173        self.loader = None174        self.df = None175        if(kwargs):176            for k, v in kwargs.items():177                setattr(self, k, v)178    def update(self, **kwargs):179        if(kwargs):180            for k, v in kwargs.items():181                setattr(self, k, v)182        183    def load_vali_mea_df(self, list_tags, start, end):184        loader = loaders.ValiLoader(database = 'SICA1_SQL')185        df_mea = loader.get_vali_mea(list_tags, start, end)186        self.df = df_mea187        188        189    def load_vali_dvr(self, list_tags, start, end):190        loader = loaders.ValiLoader(database = 'ANGRA1_DVR')191        df_mea = loader.get_vali_mea(list_tags, start, end)192        self.df = df_mea193    194    def load_sica_file(self, list_tags, **kwargs):195        pass196        197    def register_tag(self, tag):198        self.tags.update({tag.tag_id:tag})199        print(self.tags)200        201    def get_tag_list(self):202        return self.tags.items()203    204    def get_val_dict(self, tag, **kwargs): # para satisfazer os testes agora. 205        # TODO: pensar em uma funÃ§Ã£o melhor e padronizar os dados de retorno, lembrando que a organizaÃ§Ã£o do dataframe pertence ao objeto loader. O Dataset deve trabalhar com os dados jÃ¡ padronizados206        self.df.loc207        pass208    def get_timesheet(self):209        return210    211    def load_vali_mea222(self, list_tags, start, end): #TODO: de quem Ã© a responsabilidade de 212        loader = loaders.ValiLoader()213        dados_mea = loader.get_vali_mea(list_tags, start, end)214        # print(dados_mea)215        valores = []216        tag_obj = None217        218        for dado_mea in dados_mea:219            tag_id = dado_mea['PSC']220            221            if tag_id not in self.tags: 222                # REGISTRA tag223                224                dados_tag = {225                    'titulo': dado_mea['Description'],226                    'origem_id': self.dataset_id,227                    'descricao': dado_mea['Description'],228                    'ue_original': dado_mea['UE']229                    }230                print(dados_tag)231                colunas_mantidas_tag = ['PSC', 'Description', 'UE']232                for col in colunas_mantidas_tag:233                    dados_tag.update({col: dado_mea[col]})234                tag_obj = Tag(tag_id, **dados_tag)235                self.register_tag(tag_obj)236            '''   237            if tag_id not in self.tags:238                 # reinicia inicia vetor de dados da curva para o tag239                valores = []240                self.tags.update({tag_id:tag_obj})241                '''242            valores.append(dado_mea['Value_Average'])243        # TODO: registrar os tags no dataset244        # TODO: registrar os dados 245    246    def load_tag(list_tags):247        pass248# nÃ£o serÃ¡ feito dessa forma249class Head(Curva):250    pass251  252# DEPRECATED253class Tagval(object):254    val = '' # valor principal procurado, np array + pint255    tag = ''256    dados = '';257    def __init__(self, pysica_tag_obj, dados):258        tag = pysica_tag_obj259        dados = dados...

pcfg.py

Source:pcfg.py

1from utils import *2class PCFG():3    def __init__(self, corpus):4        self.grammar = {}5        self.lexicon = {}6        #########################################################################7        self.extract_from_corpus(corpus)8        # this function will fill:9        # -  self.grammar as a dictionary such that self.grammar[X] is a dictionary for each tag X10        # with X1...Xn as keys and counts(X -> X1...Xn) as values11        # - self.lexicon as a dictionary such that self.grammar[X] is a dictionary for each tag X12        # with words as keys and counts(X -> word) as values13        # frequencies of each word/token14        self.freq_tokens = {}15        for tag in self.lexicon.keys():16            for word in self.lexicon[tag].keys():17                if word in self.freq_tokens.keys():18                    self.freq_tokens[word] += self.lexicon[tag][word]19                else:20                    self.freq_tokens[word] = self.lexicon[tag][word]21        sum = np.sum(list(self.freq_tokens.values()))22        for word in self.freq_tokens:23            self.freq_tokens[word] /= sum24        #########################################################################25        # this function introduce artificial symbols to put the grammar in Chomsky form26        self.binarize()27        #frequencies of each POS tag (ie a tag such that there exists a word st. tag -> word)28        self.freq_terminal_tags = {tag:np.sum(list(counts.values())) for (tag, counts) in self.lexicon.items()}29        sum = np.sum(list(self.freq_terminal_tags.values()))30        for tag in self.freq_terminal_tags:31            self.freq_terminal_tags[tag] /= sum32        #########################################################################33        #converting counts in self.grammar and self.lexicon into probabilities34        self.normalize_counts()35        #########################################################################36        #storing the list of symbols (only tags, and with all artificial symbols)37        list_all_symbols = all_symbols(self.grammar)38        self.list_artificial_symbols = list(self.set_artificial_symbols)39        self.list_tags = list(set(list_all_symbols).difference(self.set_artificial_symbols))40        self.list_all_symbols = self.list_tags + self.list_artificial_symbols41        self.nb_tags = len(self.list_tags)42        self.nb_all_symbols = len(self.list_all_symbols)43    def extract_from_corpus(self, corpus):44        #extract grammar and lexicon from corpus45        for tagged_sent in corpus:46            sent = tagged_sent.split() #into a list47            hierarchy = [] #index = number of opened brackets since the beginning of the sentence48                           #hierarchy[index] = list of tags pointed by root tag hierarchy[index-1]49            hierarchy.append([]) #list for level 050            level = 0 #current difference between the number of opened brackets (minus the first one) and the number of closed brackets51            current_tag = None52            for bloc in sent:53                if (bloc[0]=="("): #then the bloc is introducing a new tag54                    tag = non_functional_tag(bloc[1:])  #we add it to the hierarchy55                    if level<len(hierarchy): #there is already one tag as its level56                        hierarchy[level].append(tag)57                    else: #first tag as its level58                        hierarchy.append([tag])59                    #print(hierarchy)60                    level += 1 #since we opened a new bracket61                    current_tag = tag #saved in order to add the word to the lexicon62                else: #then the bloc is introducing the word name and the number of closing brackets63                    word = ""64                    nb_closing_brackets = 065                    for caract in bloc:66                        if (caract==")"):67                            nb_closing_brackets += 168                        else:69                            word += caract70                    add(self.lexicon, current_tag, word) #adding the pair (word,tag) to the lexicon71                    level -= nb_closing_brackets #since we closed a bracket72                    for k in range(nb_closing_brackets-1,0,-1): #at least 2 brackets closed -> new self.grammar rule defined73                        root = hierarchy[-2][-1] #root tag74                        if root=='': #if the root is the beginning of the sentence75                            break76                        tags = hierarchy[-1] #child tags77                        add(self.grammar, root, tags) #adding the rule to the grammar78                        hierarchy.pop() #popping from the hierarchy the childs list79    def normalize_counts(self):80        # convert counts into probabilities of grammar rules (from a given root) / words (for a given tag)81        self.grammar = normalize_counts(self.grammar)82        self.lexicon = normalize_counts(self.lexicon)83    def binarize(self):84        # convert into Chomsky_normal_form, applying BIN and UNIT rule (the only one really necessary here)85        self.set_artificial_symbols = set() #set of artificial symbols introduced86        # apply BIN RULE (eliminate right-hand sides with more than 2 non-terminals)87        self.apply_BIN_rule()88        # apply UNIT rule (eliminate unit rules)89        self.apply_UNIT_rule()90    def apply_BIN_rule(self):91        #apply BIN RULE (eliminate right-hand sides with more than 2 nonterminals)92        grammar0 = deepcopy(self.grammar)93        for (root_tag, rules) in grammar0.items():94            #root_tag is the left hand symbol of the grammar rule95            for (list_tags, counts) in rules.items(): #list_tags in the righ hand term of the rule96                nb_consecutive_tags = len(list_tags)97                if nb_consecutive_tags>2:98                    del self.grammar[root_tag][list_tags]99                    symbol = root_tag + "|" + '-'.join(list_tags[1:])100                    self.set_artificial_symbols.add(symbol)101                    add(self.grammar, root_tag, (list_tags[0],symbol), counts=counts)102                    for k in range(1,nb_consecutive_tags-2):103                        new_symbol = root_tag + "|" + '-'.join(list_tags[k+1:])104                        self.set_artificial_symbols.add(new_symbol)105                        add(self.grammar, symbol, (list_tags[k],new_symbol), counts=counts)106                        symbol = new_symbol107                    add(self.grammar, symbol, (list_tags[-2],list_tags[-1]), counts=counts)108    def apply_UNIT_rule(self):109        # apply UNIT rule (eliminate unit rules)110        grammar0 =  deepcopy(self.grammar)111        lexicon0 =  deepcopy(self.lexicon)112        rules_to_remove = []113        for (root_tag, rules) in grammar0.items():114            #root_tag is the left hand symbol of the grammar rule115            for (list_tags, counts) in rules.items(): #list_tags in the righ hand term of the rule116                if len(list_tags)==1: #unit rule A->B117                    child_tag = list_tags[0]118                    rules_to_remove.append((root_tag,list_tags))119                    freq = counts/(np.sum(list(self.grammar[root_tag].values())))120                    if child_tag in lexicon0.keys(): #existing rule A -> B where B is a preterminal symbol121                        if root_tag!="SENT":122                            symbol = root_tag + "&" + child_tag123                            self.set_artificial_symbols.add(symbol)124                            for (word, counts2) in lexicon0[child_tag].items(): #existing rule B -> word125                                add(self.lexicon, symbol, word, counts = counts2 * freq) #add A&B -> word, self.lexicon[word][A&B] = freq(A->B) * counts(B)126                            for (root_tag2, rules2) in grammar0.items():127                                for (list_tags2, counts2) in rules2.items():128                                    if (len(list_tags2) == 2) and (list_tags2[1] == root_tag): #existing rule X -> Y A129                                        add(self.grammar, root_tag2, (list_tags2[0],symbol), counts=counts2) # add rule X -> Y A&B130                    else:   #existing rule A -> B where B is not a preterminal symbol131                        for (list_tags_child, counts2) in grammar0[child_tag].items():132                            if len(list_tags_child) == 2:  #existing rule B -> X1 X2133                                add(self.grammar, root_tag, list_tags_child, counts=counts2*freq) #add rule A -> X1 X2134        for (left, right) in rules_to_remove:...

notes_main.py

Source:notes_main.py

1#Ð½Ð°ÑÐ½Ð¸ ÑÑÑ ÑÐ¾Ð·Ð´Ð°Ð²Ð°ÑÑ Ð¿ÑÐ¸Ð»Ð¾Ð¶ÐµÐ½Ð¸Ðµ Ñ ÑÐ¼Ð½ÑÐ¼Ð¸ Ð·Ð°Ð¼ÐµÑÐºÐ°Ð¼Ð¸2from PyQt5.QtCore import Qt 3from PyQt5.QtWidgets import (QApplication, QWidget, QPushButton, QHBoxLayout, QInputDialog, QVBoxLayout,QLabel, QMessageBox, QRadioButton, QGroupBox, QButtonGroup, QLineEdit, QInputDialog, QFormLayout, QListWidget, QTextEdit)4import json5app=QApplication([])67notes ={8    'ÐÐ¾Ð±ÑÐ¾ Ð¿Ð¾Ð¶Ð°Ð»Ð¾Ð²Ð°ÑÑ!':{9        'ÑÐµÐºÑÑ':'ÐÑÐ¾ ÑÐ°Ð¼Ð¾Ðµ Ð»ÑÑÑÐµÐµ Ð¿ÑÐ¸Ð»Ð¾Ð¶ÐµÐ½Ð¸Ðµ Ð´Ð»Ñ Ð¼Ð°Ð¹Ð½ÐºÑÐ°ÑÑÐ°',10        'ÑÐµÐ³Ð¸':['Ð´Ð¾Ð±ÑÐ¾','Ð¸Ð½ÑÑÑÑÐºÑÐ¸Ñ']11    }12}13with open('notes_data.json','w') as file:14    json.dump(notes,file)151617notes_win=QWidget()18notes_win.resize(900,600)1920list_notes=QListWidget()21list_notes_label=QLabel('Ð¡Ð¿Ð¸ÑÐ¾Ðº Ð·Ð°Ð¼ÐµÑÐ¾Ðº')2223button_note_create=QPushButton('Ð¡Ð¾Ð·Ð´Ð°ÑÑ Ð·Ð°Ð¼ÐµÑÐºÑ')24button_note_del=QPushButton('Ð£Ð´Ð°Ð»Ð¸ÑÑ Ð·Ð°Ð¼ÐµÑÐºÑ')25button_note_save=QPushButton('Ð¡Ð¾ÑÑÐ°Ð½Ð¸ÑÑ Ð·Ð°Ð¼ÐµÑÐºÑ')2627field_tag=QLineEdit('')28field_tag.setPlaceholderText('ÐÐ²ÐµÐ´Ð¸ÑÐµ ÑÐµÐ³...')29field_text=QTextEdit()30button_tag_add=QPushButton('ÐÐ¾Ð±Ð°Ð²Ð¸ÑÑ Ðº Ð·Ð°Ð¼ÐµÑÐºÐµ')31button_tag_del=QPushButton('ÐÑÐºÑÐµÐ¿Ð¸ÑÑ Ð¾Ñ Ð·Ð°Ð¼ÐµÑÐºÐ¸')32button_tag_search=QPushButton('ÐÑÐºÐ°ÑÑ Ð·Ð°Ð¼ÐµÑÐºÐ¸ Ð¿Ð¾ ÑÐµÐ³Ñ')33list_tags=QListWidget()34list_tags_label=QLabel('Ð¡Ð¿Ð¸ÑÐ¾Ðº ÑÐµÐ³Ð¾Ð²')3536layout_notes=QHBoxLayout()37col_1=QVBoxLayout()38col_1.addWidget(field_text)3940col_2=QVBoxLayout()41col_2.addWidget(list_notes_label)42col_2.addWidget(list_notes)43row_1=QHBoxLayout()44row_1.addWidget(button_note_create)45row_1.addWidget(button_note_del)46row_2=QHBoxLayout()47row_2.addWidget(button_note_save)48row_1.addWidget(button_note_del)49col_2.addLayout(row_1)50col_2.addLayout(row_2)5152col_2.addWidget(list_tags_label)53col_2.addWidget(list_tags)54col_2.addWidget(field_tag)55row_3=QHBoxLayout()56row_3.addWidget(button_tag_add)57row_3.addWidget(button_tag_del)58row_4=QHBoxLayout()59row_4.addWidget(button_tag_search)6061col_2.addLayout(row_3)62col_2.addLayout(row_4)6364layout_notes.addLayout(col_1, stretch=2)65layout_notes.addLayout(col_2, stretch=1)66notes_win.setLayout(layout_notes)6768def show_note():69    key=list_notes.selectedItems()[0].text()70    field_text.setText(notes[key]['ÑÐµÐºÑÑ'])71    list_tags.clear()72    list_tags.addItems(notes[key]['ÑÐµÐ³Ð¸'])7374def add_note():75    note_name, ok=QInputDialog.getText(notes_win, 'ÐÐ¾Ð±Ð°Ð²Ð¸ÑÑ Ð·Ð°Ð¼ÐµÑÐºÑ', 'ÐÐ°Ð·Ð²Ð°Ð½Ð¸Ðµ Ð·Ð°Ð¼ÐµÑÐºÐ¸:')76    if ok and note_name !='':77        notes[note_name]={'ÑÐµÐºÑÑ':'','ÑÐµÐ³Ð¸':[]}78        list_notes.addItem(note_name)79        list_tags.addItems(notes[note_name]['ÑÐµÐ³Ð¸'])8081def show_note ():82    key = list_notes.selectedItems()[0].text()83    field_text.setText(notes[key]['ÑÐµÐºÑÑ'])84    list_tags.clear()85    list_tags.addItems(notes[key]['ÑÐµÐ³Ð¸'])8687def save_note():88    if list_notes.selectedItems():89        key = list_notes.selectedItems()[0].text()90        notes[key]['ÑÐµÐºÑÑ']=field_text.toPlainText()91        with open('notes_data.json','w') as file:92            json.dump(notes, file, sort_keys=True, ensure_ascii=False)9394def del_note():95    if list_notes.selectedItems():96        key = list_notes.selectedItems()[0].text()97        del notes[key]98        list_notes.clear()99        list_tags.clear()100        field_text.clear()101        list_notes.addItems(notes)102        with open('notes_data.json','w') as file:103            json.dump(notes, file, sort_keys=True, ensure_ascii=False)104105def add_tag():106    if list_notes.selectedItems():107        key = list_notes.selectedItems()[0].text()108        tag = field_tag.text()109        if not tag in notes [key]['ÑÐµÐ³Ð¸']:110            notes[key]['ÑÐµÐ³Ð¸'].append(tag)111            list_tags.addItem(tag)112            field_tag.clear()113        with open('notes_data.json','w') as file:114            json.dump(notes, file, sort_keys=True, ensure_ascii=False)115116def del_tag():117    if list_notes.selectedItems():118        key = list_notes.selectedItems()[0].text()119        tag = list_tags.selectedItems()[0].text()120        notes[key]['ÑÐµÐ³Ð¸'].remove(tag)121        list_tags.clear()122        list_tags.addItems (notes[key]['ÑÐµÐ³Ð¸'])123        with open('notes_data.json','w') as file:124            json.dump(notes, file, sort_keys=True, ensure_ascii=False)125126def search_tag():127    tag=field_tag.text()128    if button_tag_search.text()=='ÐÑÐºÐ°ÑÑ Ð·Ð°Ð¼ÐµÑÐºÐ¸ Ð¿Ð¾ ÑÐµÐ³Ñ' and tag:129        notes.filtered={}130        for note in notes:131            if tag in notes[note]['ÑÐµÐ³Ð¸']:132                notes.filtered=[note]133134135136137button_note_create.clicked.connect(add_note)138list_notes.itemClicked.connect(show_note)139button_note_save.clicked.connect(save_note)140button_note_del.clicked.connect(del_note)141142list_notes.itemClicked.connect(show_note)143144notes_win.show()145146with open('notes_data.json','r') as file:147    json.load(file)148list_notes.addItems(notes)149
...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.