How to use list_tags method in tempest

Best Python code snippet using tempest_python

meop.py

Source:meop.py Github

copy

Full Screen

1from pathlib import Path2import os3import xarray as xr4import pandas as pd5import numpy as np6import csv7import gsw8import matplotlib.pyplot as plt9import cartopy.crs as ccrs10from importlib import reload11import netCDF4 as nc1213processdir = Path.home() / 'MEOP_process'1415# list functions1617# EXP_from_SMRU_CODE(smru_platform_code)18# list_tag_EXP(EXP,qf='lr0')19# fname_prof(smru_name,depl='',qf='hr1')20# fname_plot_diags_matlab(smru_name,depl='',qf='hr1')21# N_PARAM(ds,PARAM)22# copy_file(file_name,src_dir,dst_dir)2324# read_list_profiles(rebuild=False,verbose=False,public=False,Tdata=False)25# read_list_deployment()2627# label_regions(list_tags):28# filter_public_data(list_profiles, list_tags, list_deployments)29# filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments)30# filter_country(country, list_profiles, list_tags, list_deployments)3132# copy_netcdf_variable(nc_in,var_name_in,var_dims_in,nc_out,var_name_out,var_dims_out)33# read_ncfile(ncfile_name)3435#----------------------------------- utils --------------------------------------------#36def EXP_from_SMRU_CODE(smru_platform_code):37 return smru_platform_code.split("-")[0]3839# return ncARGO filename40def fname_prof(smru_name,depl='',qf='hr1'):41 if not depl:42 depl = EXP_from_SMRU_CODE(smru_name)43 return Path(processdir,'final_dataset_prof',depl,smru_name+'_'+qf+'_prof.nc')4445# return ncARGO filename46def list_tag_EXP(EXP,qf='lr0'):47 dirEXP = Path(processdir,'final_dataset_prof',EXP)48 list_tag = [ncfile for ncfile in dirEXP.glob(f'{EXP}-*_{qf}_prof.nc')]49 return list_tag5051# return ncARGO filename52def fname_plot_diags_matlab(smru_name,depl='',qf='hr1'):53 if not depl:54 depl = EXP_from_SMRU_CODE(smru_name)55 return processdir / 'plots' / depl / (smru_name+'_'+qf+'_diags_TS_adj.png')5657# return a DaraArray with the number of valid profile for the given PARAM58def N_PARAM(ds,PARAM):59 if PARAM+'_QC' in list(ds.variables):60 N_PARAM = np.sum((ds[PARAM+'_QC']==b'1'),axis=1)61 else:62 N_PARAM = xr.DataArray(np.zeros(ds.dims['N_PROF']), dims=['N_PROF'])63 return N_PARAM6465# copy a file66def copy_file(file_name,src_dir,dst_dir):67 shutil.copyfile(Path(src_dir)/file_name,Path(dst_dir)/file_name)686970# read list_deployment.csv file in processdir and return pandas dataframe71def read_list_deployment(filename_list='list_deployment.csv'):72 73 from datetime import timedelta74 if Path(processdir,filename_list).is_file():75 list_deployment = pd.read_csv(Path(processdir,filename_list))76 else:77 print('File',filename_list,'not found')78 list_deployment = None79 newnames = {}80 for var in list_deployment:81 newnames[var] = var.upper()82 83 return list_deployment.rename(columns=newnames).set_index('DEPLOYMENT_CODE') 848586# build list data in MEOP, store it in pickle file and return the dataframe87def build_list_metadata(qf='lr0'):8889 datadir = Path(processdir,'final_dataset_prof')9091 # walk through datadir subfolderss92 list_df=[]93 for dirpath in datadir.iterdir():94 dirname = dirpath.parts[-1]95 if qf in ['lr1','hr1','fr1','all','hr2']:96 data_suffix = '_ADJUSTED'97 else:98 data_suffix = ''99 for ncfile in dirpath.glob(f'{dirname}-*_{qf}_prof.nc'):100 with xr.open_dataset(ncfile) as ds:101 data = {'DEPLOYMENT_CODE': EXP_from_SMRU_CODE(ds.smru_platform_code),102 'SMRU_PLATFORM_CODE': ds.smru_platform_code,103 'CYCLE_NUMBER': ds['CYCLE_NUMBER'].astype(int),104 'JULD': ds['JULD'],105 'LATITUDE': ds['LATITUDE'],106 'LONGITUDE': ds['LONGITUDE'],107 'N_TEMP' : N_PARAM(ds,'TEMP'+data_suffix),108 'N_PSAL' : N_PARAM(ds,'PSAL'+data_suffix),109 'N_CHLA' : N_PARAM(ds,'CHLA'+data_suffix)}110 df = pd.DataFrame(data)111 list_df.append(df)112113 # concatenate list of dataframes into one dataframe114 df_all = pd.concat(list_df)115 df_all.to_pickle(processdir / f'list_meta_{qf}_prof.pkl')116 117 return df_all118 119120# determine region for each tag121def label_regions(list_tags):122123 # set a new columns called MASK with a regional label124 from scipy.interpolate import RegularGridInterpolator125 import regionmask126127 basins = regionmask.defined_regions.ar6.all128 label = basins.names129 lon = np.arange(-179.5, 180)130 lat = np.arange(-89.5, 90)131 mask = basins.mask(lon,lat)132 f = RegularGridInterpolator((lon, lat), mask.transpose().values,method='nearest')133 list_tags["MASK"] = f(list_tags[['LONGITUDE','LATITUDE']].values)134 list_tags["MASK"] = list_tags.MASK.map(dict(enumerate(label)))135 136 map_regions = {137 'Southern-Ocean':'Southern Ocean',138 'E.Antarctica':'Southern Ocean',139 'W.Antarctica':'Southern Ocean',140 'Arctic-Ocean':'North Atlantic',141 'N.Pacific-Ocean':'North Pacific',142 'C.North-America':'North Pacific', 143 'W.North-America':'North Pacific',144 'N.E.North-America':'North Atlantic',145 'E.North-America':'North Atlantic',146 'Greenland/Iceland':'North Atlantic',147 'N.Atlantic-Ocean':'North Atlantic',148 'N.W.North-America':'North Pacific',149 'N.Europe':'North Atlantic',150 'S.Australia':'Australia',151 'N.Central-America':'North Pacific',152 'N.South-America':'Tropical Atlantic',153 'S.South-America':'South Atlantic',154 'S.Atlantic-Ocean':'South Atlantic',155 }156 list_tags['MASK'] = list_tags.MASK.map(map_regions)157158 return list_tags159160161# select only public data162def filter_public_data(list_profiles, list_tags, list_deployments):163 164 list_tags = list_tags[list_tags.PUBLIC == 1]165 list_deployments = list_deployments[list_deployments.PUBLIC == 1]166 list_profiles = list_profiles.merge(list_tags.SMRU_PLATFORM_CODE,on='SMRU_PLATFORM_CODE')167 168 return list_profiles, list_tags, list_deployments169170171# select only profiles with data points172def filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments):173 174 list_tags = list_tags[list_tags.N_PROF_TEMP!=0]175 list_deployments = list_deployments.merge(list_tags.DEPLOYMENT_CODE,on='DEPLOYMENT_CODE')176 list_profiles = list_profiles.loc[list_profiles.N_TEMP!=0]177 178 return list_profiles, list_tags, list_deployments179180181# select only profiles with data points182def filter_country(country, list_profiles, list_tags, list_deployments):183 184 list_deployments = list_deployments.loc[list_deployments.COUNTRY==country]185 list_tags = list_tags[list_tags.DEPLOYMENT_CODE.isin(list_deployments.DEPLOYMENT_CODE)]186 list_profiles = list_profiles[list_profiles.SMRU_PLATFORM_CODE.isin(list_tags.SMRU_PLATFORM_CODE)]187 188 return list_profiles, list_tags, list_deployments189190191# read MEOP data list from pickle file and return the dataframe.192# If filename_pkl is not found, the list file is generated.193def read_list_profiles(rebuild=False,verbose=False,public=False,Tdata=False,country=None,qf='lr0'):194195 if (Path(processdir / f'list_meta_{qf}_prof.pkl').is_file()) and (not rebuild):196 list_profiles = pd.read_pickle(processdir / f'list_meta_{qf}_prof.pkl')197 else:198 print(f'Create metadata files in {processdir}: list_meta_{qf}_prof.pkl')199 list_profiles = build_list_metadata()200201 # read list of profiles202 for col in ['N_TEMP','N_PSAL','N_CHLA']:203 list_profiles[col].where(list_profiles[col]!=0,np.nan,inplace=True)204 205 list_tags = list_profiles.groupby('SMRU_PLATFORM_CODE').first()\206 .drop(['N_TEMP','N_PSAL','N_CHLA','CYCLE_NUMBER'],axis='columns')207 list_tags['N_PROF_TEMP'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_TEMP.count()208 list_tags['N_PROF_PSAL'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_PSAL.count()209 list_tags['N_PROF_CHLA'] = list_profiles.groupby('SMRU_PLATFORM_CODE').N_CHLA.count()210 211 agg_ops = {'JULD': min, 'LATITUDE': np.mean, 'LONGITUDE': np.mean, 'N_PROF_TEMP': sum,212 'N_PROF_PSAL': sum, 'N_PROF_CHLA': sum}213 list_deployments = list_tags.groupby('DEPLOYMENT_CODE').agg(agg_ops)214 list_deployments['N_TAGS'] = list_tags.groupby('DEPLOYMENT_CODE').DEPLOYMENT_CODE.count()215 list_deployments = list_deployments.merge(read_list_deployment(),on='DEPLOYMENT_CODE',how='outer')216 drop_list = ['START_DATE','END_DATE','START_DATE_JUL']217 list_deployments = list_deployments.drop(drop_list,axis='columns')218 219 list_public = list_deployments.reset_index()[['DEPLOYMENT_CODE','PUBLIC']]220 list_tags = list_tags.reset_index().merge(list_public,on='DEPLOYMENT_CODE')221 222 # add correction coefficients in list_tags223 coeff = pd.read_csv(processdir / 'table_coeff.csv')224 list_tags = list_tags.merge(coeff,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')225 list_tags['comment'] = list_tags['comment'].fillna('no comment')226 tag_problem = list_tags.loc[list_tags.SMRU_PLATFORM_CODE.isnull(),:]227 if len(tag_problem.smru_platform_code):228 if verbose:229 print('List of tags with correction coefficients yet not listed in list_deployment:')230 print(tag_problem.smru_platform_code)231 message = 'tag with correction coefficient, yet no netcdf file'232 for tag in list(tag_problem.SMRU_PLATFORM_CODE):233 comment = coeff.loc[coeff.smru_platform_code == tag,'comment'] 234 if message not in comment:235 if 'no comment' in comment:236 comment = message237 else:238 comment = comment+', '+message239 coeff.loc[coeff.smru_platform_code == tag,'comment'] = comment240 list_tags.loc[list_tags.SMRU_PLATFORM_CODE == tag,'comment'] = comment241 list_tags = list_tags.drop('smru_platform_code', axis='columns') 242 243 # add variable_offset in list_tags244 salinity_offsets = pd.read_csv(processdir / 'table_salinity_offsets.csv')245 salinity_offsets['variable_offset'] = 1246 variable_offset = salinity_offsets[['smru_platform_code','variable_offset']]247 list_tags = list_tags.merge(variable_offset,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')\248 .drop('smru_platform_code', axis='columns')249 250 # add parameters from list_param.csv in list_deployments251 param = pd.read_csv(processdir / 'table_param.csv')252 list_deployments = list_deployments.reset_index().merge(param,left_on='DEPLOYMENT_CODE',right_on='deployment_code',how='outer')\253 .drop('deployment_code', axis='columns')254 255 list_deployment_hr = pd.read_csv(processdir / 'list_deployment_hr.csv', dtype={'prefix': str,'instr_id':str,'year':str})256 list_tags = list_tags.merge(list_deployment_hr,left_on='SMRU_PLATFORM_CODE',right_on='smru_platform_code',how='outer')257 list_tags['comment'] = list_tags['comment'].fillna('no comment')258 tag_problem = list_tags.loc[list_tags.SMRU_PLATFORM_CODE.isnull(),:]259 if len(tag_problem.instr_id):260 if verbose:261 print('List of instr id for tags with hr datasets but no low resolution ones:')262 print(tag_problem.instr_id)263 for tag in list(tag_problem.index):264 list_tags.drop(tag,axis=0, inplace=True)265 list_tags = list_tags.drop('smru_platform_code', axis='columns')266 267 list_tags = label_regions(list_tags)268 if 'MASK' not in list_profiles:269 list_profiles = list_profiles.merge(list_tags.set_index('SMRU_PLATFORM_CODE').MASK,on='SMRU_PLATFORM_CODE')270271 if verbose:272 print(f'Update metadata files in {processdir}: list_profiles.pkl, list_tags.csv, list_deployments.csv')273 274 if public:275 list_profiles, list_tags, list_deployments = filter_public_data(list_profiles, list_tags, list_deployments)276 277 if Tdata:278 list_profiles, list_tags, list_deployments = filter_profiles_with_Tdata(list_profiles, list_tags, list_deployments)279 280 if country:281 list_profiles, list_tags, list_deployments = filter_country(country, list_profiles, list_tags, list_deployments)282 283 return list_profiles, list_tags, list_deployments284285286# copy the variable var from nc_in in nc_out287def copy_netcdf_variable(nc_in,var_name_in,var_dims_in,nc_out,var_name_out,var_dims_out):288 289 with nc.Dataset(nc_in) as src, nc.Dataset(nc_out, "a") as dst:290 # copy dimensions if not already existing291 for i, name in enumerate(var_dims_out):292 if name not in dst.dimensions:293 dst.createDimension( name, src.dimensions[var_dims_in[i]].size )294 if src.dimensions[var_dims_in[i]].size - dst.dimensions[var_dims_out[i]].size != 0:295 print(f"Dimension {name} has wrong size in {nc_out}")296 return 0297 # copy variable298 if var_name_out not in dst.variables:299 var = dst.createVariable(var_name_out, src.variables[var_name_in].datatype, var_dims_out)300 # copy variable attributes all at once via dictionary301 dst[var_name_out].setncatts(src[var_name_in].__dict__)302 dst[var_name_out][:] = src[var_name_in][:]303304 return 1305306307# read a netCDF ARGO file and return a xarray dataset structure308def read_ncfile(ncfile_name):309 310 if ncfile_name.is_file():311 ds = xr.open_dataset(ncfile_name)312 for dim in ds.dims:313 ds[dim] = ((dim), ds[dim])314 ds.set_coords([dim])315 ds['N_TEMP'] = (('N_PROF'),N_PARAM(ds,'TEMP'))316 ds['N_PSAL'] = (('N_PROF'),N_PARAM(ds,'PSAL'))317 if 'N_CHLA' in ds.variables:318 ds['N_CHLA'] = (('N_PROF'),N_PARAM(ds,'CHLA'))319 if 'N_DOXY' in ds.variables:320 ds['N_DOXY'] = (('N_PROF'),N_PARAM(ds,'DOXY'))321 else:322 print('No file: ',ncfile_name)323 return None324 return ds ...

Full Screen

Full Screen

pysica_classes.py

Source:pysica_classes.py Github

copy

Full Screen

1import pint 2import numpy as np3import loaders4import odm5import pandas as pd6from pymongo import MongoClient7from constantes import *8import datetime9#DEPRECATED10class Curva(object):11 def __init__(self, curva_id, **kwargs):12 #variáveis básicas13 self.curva_id = curva_id14 self.titulo = ""15 self.descricao = ""16 self.title = ""17 self.description = ""18 self.ndim = 019 self.tags = []20 # self.x = np.array()21 # TODO: abstrair melhor essa merda22 self.ue = [] # vetor de ue de acordo com os tags23 ue = {'original': '', 'used': ''}24 self.val = None25 # self.tags_ = tag26 self.metadata = []27 metadata = {'x_tag': None, 'y_tag':None, 'z_tag':None, 'x_options':{}, 'y_options':{}, 'z_options':{}}28 self.x_values = np.Array()29 self.y_values = np.Array()30 self.z_values = np.Array()31 if(kwargs):32 for k, v in kwargs.items():33 setattr(self, k, v)34 35 def set_values(self, list_tags, list_values):36 pass37# DEPRECATED 38class Tag(object):39 def __init__(self, tagname, **kwargs):40 self.tagname = tagname41 self.tag_id = ""42 self.titulo = ""43 self.descricao = ""44 self.title = ""45 self.description = ""46 self.origem_id = ""47 self.ue_original = ""48 self.metadata = {}49 self.related_to = {} # tags relacionados tag_id: tag_obj50 self.belongs_to = {} # tag do componente pai tag_id: tag_obj51 self.same_as = {} # tag_id: tag_obj52 if(kwargs):53 for k, v in kwargs.items():54 setattr(self, k, v)55class Dataset(object):56 def __init__(self, name, list_tags, **kwargs):57 print("aa")58 self.name = name59 self.list_tags = list_tags60 """61 if self.list_tags:62 self.read_tags(list_tags)63 """64 self.schema = pd.DataFrame()65 self.data_par = pd.DataFrame()66 self.data_var = pd.DataFrame()67 def read_tags(self, list_tags = []):68 client = MongoClient()69 db = client.get_database(MONGO_DATABASE)70 if(list_tags):71 self.list_tags.append(list_tags) 72 if not self.list_tags:73 print("Sem tags")74 75 coll_tag = db.get_collection('tag')76 coll_ue = db.get_collection('u_e')77 coll_origin = db.get_collection('data_origin')78 79 query_tag = {80 "name":{"$in":list_tags}, 81 "_id":{"$in": list_tags} }82 query_tags = {"$or":[{"name":{"$in":list_tags}}, {"_id":{"$in": list_tags}}]}83 dados_tags = coll_tag.find(query_tags)84 tags = []85 for tag in dados_tags:86 #print(tag)87 ue = coll_ue.find_one({"_id":tag['ue']})88 origin = coll_origin.find_one({"_id":tag['data_origin']})89 tag.update({'ue':ue['name'], 'origin_name':origin['name']})90 tags.append(tag)91 92 if self.schema.empty: 93 self.schema = pd.DataFrame(tags)94 else:95 self.schema.update(pd.DataFrame(tags))96 self.list_tags = tags97 return self.schema 98 def add_tag(self, tag_id):99 self.list_tags.append(coll_tag.find())100 def remove_tags(self, list_tag_removed = []):101 busca_itens = self.schema["_id"].isin(list_tag_removed)102 self.schema = self.schema.loc[~busca_itens]103 def read_mongo_df(db, collection, query={}, no_id=False):104 """ Read from Mongo and Store into DataFrame """105 client = MongoClient()106 db = client.get_database(db)107 collection = db.get_collection(collection)108 # Make a query to the specific DB and Collection109 cursor = collection.find(query)110 111 # em algum ponto aqui vou ter que e112 # Expand the cursor and construct the DataFrame113 df = pd.DataFrame(list(cursor))114 # Delete the _id115 if no_id:116 del df['_id']117 return df118 def load_data_var(self, list_datas):119 client = MongoClient()120 db = client.get_database(MONGO_DATABASE)121 if not self.list_tags:122 print("Sem tags")123 list_ids = [x["_id"] for x in self.list_tags]124 print(list_ids)125 coll_tagval = db.get_collection('tag_val')126 coll_tag = db.get_collection('tag')127 coll_ue = db.get_collection('u_e')128 coll_origin = db.get_collection('data_origin')129 # TODO: DATE_LIST DEVE SER VERificada se tem 2 valores (inicio e fim) ou mais de um valor (datas específicas)130 format_string = '%Y-%m-%d %H:%M:%S'131 dt_inicio = datetime.datetime.strptime(list_datas[0], format_string)132 dt_fim = datetime.datetime.strptime(list_datas[1], format_string)133 tags = []134 list_tag_values = []135 for t in self.list_tags:136 tag = t137 #ue = coll_ue.find_one({"_id":tag['ue']})138 ue = tag['ue']139 origin = coll_origin.find_one({"_id":tag['data_origin']})140 #tag.update({'ue':ue['name'], 'origin_name':origin['name']})141 tag.update({'ue':ue, 'origin_name':origin['name']})142 query_tagval = {143 "tag":tag["_id"]144 # ,"date":{"$and":[{"$gte":dt_inicio},{"$lte":dt_fim}]}145 ,"$and":[{"date":{"$gte":dt_inicio}},{"date":{"$lte":dt_fim}}]146 #,"date":{"$gte":dt_inicio}147 #,"date":{"$lte":dt_fim}148 }149 dados_tagval = coll_tagval.find(query_tagval)150 valores = []151 count_none = []152 for dado in dados_tagval:153 try:154 val = {"date": dado["date"], "val":dado["val"], "name": tag["name"], "origin": tag["origin_name"], "tag_id": tag["_id"]}155 val.update(dado["values"])156 except KeyError as e:157 count_none.append(dado)158 pass159 valores.append(val)160 list_tag_values = list_tag_values+valores 161 #print(list_tag_values)162 self.data_var = pd.DataFrame(list_tag_values)163 return self.data_var164class Dataset2(object):165 def __init__(self, name, list_tags, **kwargs):166 self.titulo = ""167 self.dataset_id = dataset_id 168 self.origem_id = None # pra quando tiver origem169 #self.values = {}170 self.tags = {} # tag_id : Tag171 self.data = {} # tag_id : Curva 172 self.timesheet = []173 self.loader = None174 self.df = None175 if(kwargs):176 for k, v in kwargs.items():177 setattr(self, k, v)178 def update(self, **kwargs):179 if(kwargs):180 for k, v in kwargs.items():181 setattr(self, k, v)182 183 def load_vali_mea_df(self, list_tags, start, end):184 loader = loaders.ValiLoader(database = 'SICA1_SQL')185 df_mea = loader.get_vali_mea(list_tags, start, end)186 self.df = df_mea187 188 189 def load_vali_dvr(self, list_tags, start, end):190 loader = loaders.ValiLoader(database = 'ANGRA1_DVR')191 df_mea = loader.get_vali_mea(list_tags, start, end)192 self.df = df_mea193 194 def load_sica_file(self, list_tags, **kwargs):195 pass196 197 def register_tag(self, tag):198 self.tags.update({tag.tag_id:tag})199 print(self.tags)200 201 def get_tag_list(self):202 return self.tags.items()203 204 def get_val_dict(self, tag, **kwargs): # para satisfazer os testes agora. 205 # TODO: pensar em uma função melhor e padronizar os dados de retorno, lembrando que a organização do dataframe pertence ao objeto loader. O Dataset deve trabalhar com os dados já padronizados206 self.df.loc207 pass208 def get_timesheet(self):209 return210 211 def load_vali_mea222(self, list_tags, start, end): #TODO: de quem é a responsabilidade de 212 loader = loaders.ValiLoader()213 dados_mea = loader.get_vali_mea(list_tags, start, end)214 # print(dados_mea)215 valores = []216 tag_obj = None217 218 for dado_mea in dados_mea:219 tag_id = dado_mea['PSC']220 221 if tag_id not in self.tags: 222 # REGISTRA tag223 224 dados_tag = {225 'titulo': dado_mea['Description'],226 'origem_id': self.dataset_id,227 'descricao': dado_mea['Description'],228 'ue_original': dado_mea['UE']229 }230 print(dados_tag)231 colunas_mantidas_tag = ['PSC', 'Description', 'UE']232 for col in colunas_mantidas_tag:233 dados_tag.update({col: dado_mea[col]})234 tag_obj = Tag(tag_id, **dados_tag)235 self.register_tag(tag_obj)236 ''' 237 if tag_id not in self.tags:238 # reinicia inicia vetor de dados da curva para o tag239 valores = []240 self.tags.update({tag_id:tag_obj})241 '''242 valores.append(dado_mea['Value_Average'])243 # TODO: registrar os tags no dataset244 # TODO: registrar os dados 245 246 def load_tag(list_tags):247 pass248# não será feito dessa forma249class Head(Curva):250 pass251 252# DEPRECATED253class Tagval(object):254 val = '' # valor principal procurado, np array + pint255 tag = ''256 dados = '';257 def __init__(self, pysica_tag_obj, dados):258 tag = pysica_tag_obj259 dados = dados...

Full Screen

Full Screen

pcfg.py

Source:pcfg.py Github

copy

Full Screen

1from utils import *2class PCFG():3 def __init__(self, corpus):4 self.grammar = {}5 self.lexicon = {}6 #########################################################################7 self.extract_from_corpus(corpus)8 # this function will fill:9 # - self.grammar as a dictionary such that self.grammar[X] is a dictionary for each tag X10 # with X1...Xn as keys and counts(X -> X1...Xn) as values11 # - self.lexicon as a dictionary such that self.grammar[X] is a dictionary for each tag X12 # with words as keys and counts(X -> word) as values13 # frequencies of each word/token14 self.freq_tokens = {}15 for tag in self.lexicon.keys():16 for word in self.lexicon[tag].keys():17 if word in self.freq_tokens.keys():18 self.freq_tokens[word] += self.lexicon[tag][word]19 else:20 self.freq_tokens[word] = self.lexicon[tag][word]21 sum = np.sum(list(self.freq_tokens.values()))22 for word in self.freq_tokens:23 self.freq_tokens[word] /= sum24 #########################################################################25 # this function introduce artificial symbols to put the grammar in Chomsky form26 self.binarize()27 #frequencies of each POS tag (ie a tag such that there exists a word st. tag -> word)28 self.freq_terminal_tags = {tag:np.sum(list(counts.values())) for (tag, counts) in self.lexicon.items()}29 sum = np.sum(list(self.freq_terminal_tags.values()))30 for tag in self.freq_terminal_tags:31 self.freq_terminal_tags[tag] /= sum32 #########################################################################33 #converting counts in self.grammar and self.lexicon into probabilities34 self.normalize_counts()35 #########################################################################36 #storing the list of symbols (only tags, and with all artificial symbols)37 list_all_symbols = all_symbols(self.grammar)38 self.list_artificial_symbols = list(self.set_artificial_symbols)39 self.list_tags = list(set(list_all_symbols).difference(self.set_artificial_symbols))40 self.list_all_symbols = self.list_tags + self.list_artificial_symbols41 self.nb_tags = len(self.list_tags)42 self.nb_all_symbols = len(self.list_all_symbols)43 def extract_from_corpus(self, corpus):44 #extract grammar and lexicon from corpus45 for tagged_sent in corpus:46 sent = tagged_sent.split() #into a list47 hierarchy = [] #index = number of opened brackets since the beginning of the sentence48 #hierarchy[index] = list of tags pointed by root tag hierarchy[index-1]49 hierarchy.append([]) #list for level 050 level = 0 #current difference between the number of opened brackets (minus the first one) and the number of closed brackets51 current_tag = None52 for bloc in sent:53 if (bloc[0]=="("): #then the bloc is introducing a new tag54 tag = non_functional_tag(bloc[1:]) #we add it to the hierarchy55 if level<len(hierarchy): #there is already one tag as its level56 hierarchy[level].append(tag)57 else: #first tag as its level58 hierarchy.append([tag])59 #print(hierarchy)60 level += 1 #since we opened a new bracket61 current_tag = tag #saved in order to add the word to the lexicon62 else: #then the bloc is introducing the word name and the number of closing brackets63 word = ""64 nb_closing_brackets = 065 for caract in bloc:66 if (caract==")"):67 nb_closing_brackets += 168 else:69 word += caract70 add(self.lexicon, current_tag, word) #adding the pair (word,tag) to the lexicon71 level -= nb_closing_brackets #since we closed a bracket72 for k in range(nb_closing_brackets-1,0,-1): #at least 2 brackets closed -> new self.grammar rule defined73 root = hierarchy[-2][-1] #root tag74 if root=='': #if the root is the beginning of the sentence75 break76 tags = hierarchy[-1] #child tags77 add(self.grammar, root, tags) #adding the rule to the grammar78 hierarchy.pop() #popping from the hierarchy the childs list79 def normalize_counts(self):80 # convert counts into probabilities of grammar rules (from a given root) / words (for a given tag)81 self.grammar = normalize_counts(self.grammar)82 self.lexicon = normalize_counts(self.lexicon)83 def binarize(self):84 # convert into Chomsky_normal_form, applying BIN and UNIT rule (the only one really necessary here)85 self.set_artificial_symbols = set() #set of artificial symbols introduced86 # apply BIN RULE (eliminate right-hand sides with more than 2 non-terminals)87 self.apply_BIN_rule()88 # apply UNIT rule (eliminate unit rules)89 self.apply_UNIT_rule()90 def apply_BIN_rule(self):91 #apply BIN RULE (eliminate right-hand sides with more than 2 nonterminals)92 grammar0 = deepcopy(self.grammar)93 for (root_tag, rules) in grammar0.items():94 #root_tag is the left hand symbol of the grammar rule95 for (list_tags, counts) in rules.items(): #list_tags in the righ hand term of the rule96 nb_consecutive_tags = len(list_tags)97 if nb_consecutive_tags>2:98 del self.grammar[root_tag][list_tags]99 symbol = root_tag + "|" + '-'.join(list_tags[1:])100 self.set_artificial_symbols.add(symbol)101 add(self.grammar, root_tag, (list_tags[0],symbol), counts=counts)102 for k in range(1,nb_consecutive_tags-2):103 new_symbol = root_tag + "|" + '-'.join(list_tags[k+1:])104 self.set_artificial_symbols.add(new_symbol)105 add(self.grammar, symbol, (list_tags[k],new_symbol), counts=counts)106 symbol = new_symbol107 add(self.grammar, symbol, (list_tags[-2],list_tags[-1]), counts=counts)108 def apply_UNIT_rule(self):109 # apply UNIT rule (eliminate unit rules)110 grammar0 = deepcopy(self.grammar)111 lexicon0 = deepcopy(self.lexicon)112 rules_to_remove = []113 for (root_tag, rules) in grammar0.items():114 #root_tag is the left hand symbol of the grammar rule115 for (list_tags, counts) in rules.items(): #list_tags in the righ hand term of the rule116 if len(list_tags)==1: #unit rule A->B117 child_tag = list_tags[0]118 rules_to_remove.append((root_tag,list_tags))119 freq = counts/(np.sum(list(self.grammar[root_tag].values())))120 if child_tag in lexicon0.keys(): #existing rule A -> B where B is a preterminal symbol121 if root_tag!="SENT":122 symbol = root_tag + "&" + child_tag123 self.set_artificial_symbols.add(symbol)124 for (word, counts2) in lexicon0[child_tag].items(): #existing rule B -> word125 add(self.lexicon, symbol, word, counts = counts2 * freq) #add A&B -> word, self.lexicon[word][A&B] = freq(A->B) * counts(B)126 for (root_tag2, rules2) in grammar0.items():127 for (list_tags2, counts2) in rules2.items():128 if (len(list_tags2) == 2) and (list_tags2[1] == root_tag): #existing rule X -> Y A129 add(self.grammar, root_tag2, (list_tags2[0],symbol), counts=counts2) # add rule X -> Y A&B130 else: #existing rule A -> B where B is not a preterminal symbol131 for (list_tags_child, counts2) in grammar0[child_tag].items():132 if len(list_tags_child) == 2: #existing rule B -> X1 X2133 add(self.grammar, root_tag, list_tags_child, counts=counts2*freq) #add rule A -> X1 X2134 for (left, right) in rules_to_remove:...

Full Screen

Full Screen

notes_main.py

Source:notes_main.py Github

copy

Full Screen

1#начни тут создавать приложение с умными заметками2from PyQt5.QtCore import Qt 3from PyQt5.QtWidgets import (QApplication, QWidget, QPushButton, QHBoxLayout, QInputDialog, QVBoxLayout,QLabel, QMessageBox, QRadioButton, QGroupBox, QButtonGroup, QLineEdit, QInputDialog, QFormLayout, QListWidget, QTextEdit)4import json5app=QApplication([])67notes ={8 'Добро пожаловать!':{9 'текст':'Это самое лучшее приложение для майнкрафта',10 'теги':['добро','инструкция']11 }12}13with open('notes_data.json','w') as file:14 json.dump(notes,file)151617notes_win=QWidget()18notes_win.resize(900,600)1920list_notes=QListWidget()21list_notes_label=QLabel('Список заметок')2223button_note_create=QPushButton('Создать заметку')24button_note_del=QPushButton('Удалить заметку')25button_note_save=QPushButton('Сохранить заметку')2627field_tag=QLineEdit('')28field_tag.setPlaceholderText('Введите тег...')29field_text=QTextEdit()30button_tag_add=QPushButton('Добавить к заметке')31button_tag_del=QPushButton('Открепить от заметки')32button_tag_search=QPushButton('Искать заметки по тегу')33list_tags=QListWidget()34list_tags_label=QLabel('Список тегов')3536layout_notes=QHBoxLayout()37col_1=QVBoxLayout()38col_1.addWidget(field_text)3940col_2=QVBoxLayout()41col_2.addWidget(list_notes_label)42col_2.addWidget(list_notes)43row_1=QHBoxLayout()44row_1.addWidget(button_note_create)45row_1.addWidget(button_note_del)46row_2=QHBoxLayout()47row_2.addWidget(button_note_save)48row_1.addWidget(button_note_del)49col_2.addLayout(row_1)50col_2.addLayout(row_2)5152col_2.addWidget(list_tags_label)53col_2.addWidget(list_tags)54col_2.addWidget(field_tag)55row_3=QHBoxLayout()56row_3.addWidget(button_tag_add)57row_3.addWidget(button_tag_del)58row_4=QHBoxLayout()59row_4.addWidget(button_tag_search)6061col_2.addLayout(row_3)62col_2.addLayout(row_4)6364layout_notes.addLayout(col_1, stretch=2)65layout_notes.addLayout(col_2, stretch=1)66notes_win.setLayout(layout_notes)6768def show_note():69 key=list_notes.selectedItems()[0].text()70 field_text.setText(notes[key]['текст'])71 list_tags.clear()72 list_tags.addItems(notes[key]['теги'])7374def add_note():75 note_name, ok=QInputDialog.getText(notes_win, 'Добавить заметку', 'Название заметки:')76 if ok and note_name !='':77 notes[note_name]={'текст':'','теги':[]}78 list_notes.addItem(note_name)79 list_tags.addItems(notes[note_name]['теги'])8081def show_note ():82 key = list_notes.selectedItems()[0].text()83 field_text.setText(notes[key]['текст'])84 list_tags.clear()85 list_tags.addItems(notes[key]['теги'])8687def save_note():88 if list_notes.selectedItems():89 key = list_notes.selectedItems()[0].text()90 notes[key]['текст']=field_text.toPlainText()91 with open('notes_data.json','w') as file:92 json.dump(notes, file, sort_keys=True, ensure_ascii=False)9394def del_note():95 if list_notes.selectedItems():96 key = list_notes.selectedItems()[0].text()97 del notes[key]98 list_notes.clear()99 list_tags.clear()100 field_text.clear()101 list_notes.addItems(notes)102 with open('notes_data.json','w') as file:103 json.dump(notes, file, sort_keys=True, ensure_ascii=False)104105def add_tag():106 if list_notes.selectedItems():107 key = list_notes.selectedItems()[0].text()108 tag = field_tag.text()109 if not tag in notes [key]['теги']:110 notes[key]['теги'].append(tag)111 list_tags.addItem(tag)112 field_tag.clear()113 with open('notes_data.json','w') as file:114 json.dump(notes, file, sort_keys=True, ensure_ascii=False)115116def del_tag():117 if list_notes.selectedItems():118 key = list_notes.selectedItems()[0].text()119 tag = list_tags.selectedItems()[0].text()120 notes[key]['теги'].remove(tag)121 list_tags.clear()122 list_tags.addItems (notes[key]['теги'])123 with open('notes_data.json','w') as file:124 json.dump(notes, file, sort_keys=True, ensure_ascii=False)125126def search_tag():127 tag=field_tag.text()128 if button_tag_search.text()=='Искать заметки по тегу' and tag:129 notes.filtered={}130 for note in notes:131 if tag in notes[note]['теги']:132 notes.filtered=[note]133134135136137button_note_create.clicked.connect(add_note)138list_notes.itemClicked.connect(show_note)139button_note_save.clicked.connect(save_note)140button_note_del.clicked.connect(del_note)141142list_notes.itemClicked.connect(show_note)143144notes_win.show()145146with open('notes_data.json','r') as file:147 json.load(file)148list_notes.addItems(notes)149 ...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run tempest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful