How to use all_first method in Selene

Best Python code snippet using selene_python

waze-real-time.py

Source:waze-real-time.py Github

copy

Full Screen

1# -*- coding: utf-8 -*-2"""3Created on Tue Jun 1 16:03:44 20214@author: mayur5"""6import requests7import json8import pandas as pd9import geopandas as gpd10from shapely.geometry import Point, LineString11from datetime import timedelta12import time13import warnings14from catboost import CatBoostClassifier15def waze_download(polygon_path):16 17 with open(polygon_path, 'r') as f:18 poly_data = json.load(f)19 20 responses = []21 for d in poly_data['polygons'][:-1]:22 if d['city_name'].startswith('Sao Paulo'):# or d['city_name'] in ['Xalapa','Quito','Lima','Montivideo']:23 24 try:25 response = requests.get(d['url_base'] + d['polygon'])26 responses.append(response.json())27 except Exception as e:28 # responses[poly_id] = None29 # capture_exception(e)30 print(e)31 32 alerts = []33 jams = []34 for data in responses:35 time_pub = data['endTime'][:-4]36 if 'alerts' in data.keys():37 alerts = alerts + data['alerts']38 if 'jams' in data.keys():39 jams = jams + data['jams']40 41 return alerts, jams,time_pub42 43def process_alerts(alerts_list,pub):44 45 dfa = pd.DataFrame(alerts_list)46 dfa = dfa[dfa.city.isin(['São Paulo'])]#,'Xalapa','Quito','Montevideo','Miraflores'])]47 dfa['pub_utc_date'] = pub48 dfa = dfa[dfa.type=='ACCIDENT'] 49 dfa['geometry'] = gpd.GeoSeries(dfa['location'].apply(50 lambda coord: Point((coord['x'],coord['y']))), crs='EPSG:4326').to_crs('EPSG:22523')51 dfa['longitude'] = dfa['location'].apply(lambda coord: coord['x'])52 dfa['latitude'] = dfa['location'].apply(lambda coord: coord['y'])53 54 dfa = dfa.drop(['location'],axis=1)55 dfa = dfa.drop_duplicates()56 dfa = dfa.drop_duplicates(subset=['pub_utc_date','uuid'],keep='first')57 58 dfa = dfa.reset_index().set_index(['uuid'])59 dfa = dfa.sort_index()60 alerts_uuids = dfa.index.unique()61 62 return dfa, alerts_uuids63 64 65def process_jams(jams_list,dfjtotal,pub):66 dfj = pd.DataFrame(jams_list)67 dfj = dfj[dfj.city.isin(['São Paulo'])]#,'Xalapa','Quito','Montevideo','Miraflores'])]68 dfj['pub_utc_date'] = pub69 70 dfj['geometry'] = gpd.GeoSeries(dfj.line.apply(71 lambda x: LineString([(coord['x'],coord['y']) for coord in x])), crs='EPSG:4326').to_crs('EPSG:22523')72 73 dfj = dfj.drop(['line','segments'],axis=1)74 dfj = dfj.drop_duplicates()75 dfj = dfj.drop_duplicates(subset=['pub_utc_date','uuid'],keep='first')76 dfj['street'] = dfj.street.apply(str)77 78 dfjtotal = pd.concat([dfjtotal,dfj])79 80 #keeps only last 60 minutes of jam data in memory81 if len(dfjtotal.pub_utc_date.unique())>60:82 older_date = sorted(dfjtotal.pub_utc_date.unique().tolist())[0]83 dfjtotal = dfjtotal[dfjtotal.pub_utc_date != older_date] 84 85 return dfjtotal86def search_acc_jams(alerts_uuids,dfa,jams_ix):87 88 jam_acc = []89 # t=len(alerts_uuids)90 # i=091 for au in alerts_uuids: 92 93 acid = dfa.loc[[au]]94 acid = acid.reset_index().set_index(['pub_utc_date', 'street'])95 # print(i,'/',t,au,len(acid),time.ctime()) 96 # i+=197 try:98 jrows = jams_ix.loc[jams_ix.index.intersection(acid.index)]99 jrows['d'] = jrows.geometry.apply(lambda x: acid.iloc[0].geometry.distance(x))100 jrows = jrows[jrows.d<=200]101 except:102 jrows = []103 if len(jrows)>0:104 jrows = jrows[jrows.d==jrows.d.min()]105 106 jrows = jrows.reset_index().drop_duplicates(subset=['pub_utc_date']).set_index(['pub_utc_date','street'])107 108 acid.loc[jrows.index,'juuid'] = jrows.uuid.apply(int)109 jam_acc.extend(acid.reset_index().loc[:,['pub_utc_date','uuid','juuid']].values.tolist())110 return jam_acc111def merge_acc_jams(dfa,jams_ix,jam_acc):112 # print(' preprocessing merge',time.ctime())113 accids2= dfa.reset_index().set_index(['pub_utc_date', 'uuid'])114 accids2= accids2.sort_index()115 jams_ix2 = jams_ix.reset_index().set_index(['pub_utc_date', 'uuid'])116 jams_ix2= jams_ix2.sort_index()117 jadf = pd.DataFrame(jam_acc)118 jadfnn = jadf.dropna()119 120 # print(' merging data',time.ctime())121 joined =[]122 if len(jadfnn)>0:123 for i,j in enumerate(jadfnn[1].unique()):124 # if i%100==0:125 # print(i,time.ctime())126 if len(jadf)>0:127 keys = jadf[jadf[1]==j]128 aa = accids2.loc[list(keys[[0,1]].itertuples(index=False,name=None))]129 130 keys2=jams_ix2.index.intersection(list(keys[[0,2]].itertuples(index=False,name=None)))131 ja = jams_ix2.loc[keys2]132 ja = ja.reset_index().drop_duplicates(subset=['pub_utc_date']).set_index(['pub_utc_date','uuid'])133 134 result = pd.merge(aa.reset_index(), ja.reset_index(), on=["pub_utc_date", "street"]).set_index('index')135 joined.append(result)136 137 if len(joined)>0:138 dfjoined =pd.concat(joined) 139 dfjoined = dfjoined[~dfjoined.index.duplicated(keep='first')]140 notfoundf = accids2[~accids2['index'].isin(dfjoined.index)]141 notfoundf = notfoundf.reset_index().set_index('index')142 notfoundf.rename(columns={'uuid':'uuid_x','type':'type_x','country':'country_x','city':'city_x','roadType':'roadType_x','pubMillis':'pubMillis_x','geometry':'geometry_x'}, inplace=True)143 all_df = pd.concat([dfjoined,notfoundf])144 all_df = all_df.sort_values('pub_utc_date')145 else:146 notfoundf = accids2 147 notfoundf = notfoundf.reset_index().set_index('index')148 notfoundf.rename(columns={'uuid':'uuid_x','type':'type_x','country':'country_x','city':'city_x','roadType':'roadType_x','pubMillis':'pubMillis_x','geometry':'geometry_x'}, inplace=True)149 all_df = notfoundf150 all_df = all_df.sort_values('pub_utc_date')151 all_df['length'] = 0152 all_df['level'] = 0153 all_df['delay'] = 0154 all_df['type_y'] = 'NONE'155 156 return joined, all_df157 158def process_extra_features(all_df,jams_ix):159 160 all_first = all_df.loc[all_df.uuid_x.drop_duplicates(keep='first').index]161 all_first.length.fillna(0,inplace=True)162 all_first.level.fillna(0,inplace=True)163 all_first.delay.fillna(0,inplace=True)164 all_first['pub_utc_date0'] = pd.to_datetime(all_first['pub_utc_date'])165 all_first['pub_utc_date1'] =all_first.pub_utc_date0 - timedelta(minutes=1)166 all_first['pub_utc_date1'] = all_first['pub_utc_date1'].apply(lambda x: str(x))#+'.000')167 168 all_first['pub_utc_date5'] =all_first.pub_utc_date0 - timedelta(minutes=5)169 all_first['pub_utc_date5'] = all_first['pub_utc_date5'].apply(lambda x: str(x))#+'.000')170 171 all_first['pub_utc_date10'] =all_first.pub_utc_date0 - timedelta(minutes=10)172 all_first['pub_utc_date10'] = all_first['pub_utc_date10'].apply(lambda x: str(x))#+'.000')173 174 all_first['pub_utc_date20'] =all_first.pub_utc_date0 - timedelta(minutes=20)175 all_first['pub_utc_date20'] = all_first['pub_utc_date20'].apply(lambda x: str(x))#+'.000')176 177 all_first['pub_utc_date30'] =all_first.pub_utc_date0 - timedelta(minutes=30)178 all_first['pub_utc_date30'] = all_first['pub_utc_date30'].apply(lambda x: str(x))#+'.000')179 180 for dt in ['1','5','10','20','30']:181 # ifo=0182 # inofo=0183 for row in all_first.iterrows():184 # break185 try:186 zzz = jams_ix.loc[(row[1]['pub_utc_date'+dt],row[1].street)]187 zzz['d'] = zzz.geometry.apply(lambda x: x.distance(row[1].geometry_x))188 zzz = zzz[zzz.d<=200]189 zzz = zzz[zzz.d==zzz.d.min()]190 191 all_first.loc[row[0],'len'+dt] = row[1].length - zzz.length[0]192 all_first.loc[row[0],'lev'+dt] = row[1].level - zzz.level[0]193 all_first.loc[row[0],'del'+dt] = row[1].delay - zzz.delay[0]194 # ifo+=1195 except:196 all_first.loc[row[0],'len'+dt] = row[1].length - 0197 all_first.loc[row[0],'lev'+dt] = row[1].level - 0198 all_first.loc[row[0],'del'+dt] = row[1].delay - 0199 return all_first200def merge_extra_features(alerts_uuids,all_first,all_df,dfatotal):201 for au in alerts_uuids:202 203 first = all_first[all_first.uuid_x==au]204 205 cols = ['len1', 'lev1', 'del1', 'len5', 'lev5', 'del5', 'len10', 'lev10',206 'del10', 'len20','lev20', 'del20', 'len30', 'lev30', 'del30']207 zzz=all_df[all_df.uuid_x==first.uuid_x.iloc[0]]208 209 210 firstrep = first.loc[first.index.repeat(len(zzz)),cols]211 firstrep.index = zzz.index 212 all_df.loc[firstrep.index,cols] = firstrep.loc[:,cols]213 214 for k in all_df.uuid_x.unique():215 # break216 temp = all_df[all_df.uuid_x==k]217 218 if temp.length.isnull().all():219 temp['length'] = 0220 temp['level'] = 0221 temp['delay'] = 0 222 temp['type_y'] = 'NONE'223 elif temp.length.isnull().any():224 # if i>5:225 # break226 # i+=1227 temp['c'] = temp.length.mask(temp.length.ffill().notnull(),temp.length.interpolate(limit_area='inside'))228 temp['level'] = temp.level.mask(temp.level.ffill().notnull(),temp.level.interpolate(limit_area='inside'))229 temp['delay'] = temp.delay.mask(temp.delay.ffill().notnull(),temp.delay.interpolate(limit_area='inside'))230 temp['type_y'] = temp.type_y.mask(temp.type_y.ffill().notnull(),temp.type_y.bfill())231 232 temp['length'].fillna(0,inplace=True)233 temp['level'].fillna(0,inplace=True)234 temp['delay'].fillna(0,inplace=True) 235 temp['type_y'].fillna('NONE',inplace=True) 236 237 all_df.loc[temp.index,'length'] = temp['length'] 238 all_df.loc[temp.index,'level'] = temp['level'] 239 all_df.loc[temp.index,'delay'] = temp['delay'] 240 all_df.loc[temp.index,'type_y'] = temp['type_y']241 #shows alerts from last 10 minutes242 dfatotal = pd.concat([dfatotal,all_df])243 if len(dfatotal.pub_utc_date.unique())>10:244 older_date = sorted(dfatotal.pub_utc_date.unique().tolist())[0]245 dfatotal = dfatotal[dfatotal.pub_utc_date != older_date] 246 247 all_df = dfatotal.drop_duplicates('uuid_x',keep='last').reset_index(drop=True)248 249 return all_df,dfatotal250def clean_and_predict_proba(all_df,model):251 #remove unwanted columns252 253 254 if len(joined)>0:255 x_predict = all_df.drop(['pub_utc_date','uuid_x','country_x','city_x','type_x',256 'magvar','street','pubMillis_x','reportDescription',257 'geometry_x','longitude','latitude','uuid_y','country_y',258 'city_y','speedKMH','turnType','endNode','speed',259 'roadType_y','pubMillis_y','blockingAlertUuid','startNode',260 'geometry_y'],axis=1)261 else:262 x_predict = all_df.drop(['pub_utc_date','uuid_x','country_x','city_x','type_x',263 'magvar','street','pubMillis_x','reportDescription',264 'geometry_x','longitude','latitude'],axis=1)265 266 x_predict.rename(columns={'nThumbsUp':'nthumbsup','reportRating':'reportrating','roadType_x':'roadtype'}, inplace=True)267 x_predict['subtype'] = x_predict['subtype'].fillna('not reported')268 x_predict['roadtype'] = x_predict.roadtype.fillna(0)269 x_predict['roadtype'] = x_predict.roadtype.apply(str)270 x_predict = x_predict[['subtype', 'roadtype', 'nthumbsup', 'reliability', 'reportrating',271 'confidence', 'length', 'level', 'delay', 'type_y', 'len1', 'lev1',272 'del1', 'len5', 'lev5', 'del5', 'len10', 'lev10', 'del10', 'len20',273 'lev20', 'del20', 'len30', 'lev30', 'del30']]274 275 276 277 y_prob = model.predict_proba(x_predict) 278 probdf = pd.DataFrame(y_prob)279 return probdf280 281if __name__ == '__main__':282 pd.options.mode.chained_assignment = None283 warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)284 285 286 #Cities timezones287 # xalapa = 'America/Mexico_City'288 # quito = 'America/Guayaquil'289 # montevideo = 'America/Montevideo'290 # miraflores = 'America/Lima'291 saopaulo = 'America/Sao_Paulo'292 293 polygon_path = 'data/polygons.json' #polygon and url file path294 path = 'data/real_time/' #final csv save location295 model_path = "data/waze_prob_model.cbm" #trained classification model path296 297 model = CatBoostClassifier()298 model.load_model(model_path)299 300 dfatotal = pd.DataFrame()301 302 try:303 dfjtotal = pd.read_pickle('data/dfjtotal')304 except:305 dfjtotal = pd.DataFrame()306 307 while True:308 net=False 309 t0 = time.time()310 try:311 312 alerts,jams,pub = waze_download(polygon_path)313 314 315 dfa, alerts_uuids = process_alerts(alerts,pub)316 dfjtotal = process_jams(jams,dfjtotal,pub)317 318 jams_ix = dfjtotal.set_index(['pub_utc_date', 'street'])319 jams_ix = jams_ix.sort_index()320 321 322 jam_acc = search_acc_jams(alerts_uuids,dfa,jams_ix)323 joined, all_df = merge_acc_jams(dfa,jams_ix,jam_acc)324 325 all_first = process_extra_features(all_df,jams_ix)326 all_df,dfatotal = merge_extra_features(alerts_uuids, all_first, all_df,dfatotal)327 328 probdf = clean_and_predict_proba(dfatotal,model)329 330 331 # datas = pd.to_datetime(all_df['pub_utc_date'])332 #fix cities timezone333 datasp = pd.to_datetime(all_df[all_df.city_x=='São Paulo']['pub_utc_date'],utc=True).dt.tz_convert(saopaulo)334 # dataxa = pd.to_datetime(all_df[all_df.city_x=='Xalapa']['pub_utc_date'],utc=True).dt.tz_convert(xalapa)335 # dataqt = pd.to_datetime(all_df[all_df.city_x=='Quito']['pub_utc_date'],utc=True).dt.tz_convert(quito)336 # datamv = pd.to_datetime(all_df[all_df.city_x=='Montevideo']['pub_utc_date'],utc=True).dt.tz_convert(montevideo)337 # datamf = pd.to_datetime(all_df[all_df.city_x=='Miraflores']['pub_utc_date'],utc=True).dt.tz_convert(miraflores)338 339 # datas = pd.concat([datasp,dataxa,dataqt,datamv,datamf]).sort_index()340 datas = datasp.sort_index()341 342 343 csvtodisk = all_df[['longitude','latitude','street','city_x']]344 csvtodisk['prob'] = probdf[1].round(2)345 csvtodisk['hour'] = datas.apply(lambda x: x.hour)346 csvtodisk['minute'] = datas.apply(lambda x: x.minute)347 csvtodisk['day'] = datas.apply(lambda x: x.day)348 csvtodisk['month'] = datas.apply(lambda x: x.month)349 csvtodisk['year'] = datas.apply(lambda x: x.year)350 csvtodisk['weekday'] = datas.apply(lambda x: x.weekday())351 csvtodisk.rename(columns={'city_x':'city'}, inplace=True)352 353 csvtodisk.to_csv(path+'acc_realtime.csv',index=False)354 dfjtotal.to_pickle('data/dfjtotal')355 except:356 net=True357 pass358 359 t1 = time.time()360 if net:361 print('download error',pub,' call',(t1-t0),'secs. next try in',60-(t1-t0),'secs')362 else:363 print('completed',pub,' call',(t1-t0),'secs. sleeping for',60-(t1-t0),'secs')364 time.sleep(60-(t1-t0))...

Full Screen

Full Screen

neighbor_sep_scatter.py

Source:neighbor_sep_scatter.py Github

copy

Full Screen

1from astropy.table import Table2import matplotlib.pyplot as plt3import numpy as np4import matplotlib.cm as cm5import pandas as pd6import matplotlib.lines as mlines7import os.path8import scipy.stats9loc = '/Users/josh/projects/intro/sources/'10res = 15011sources = pd.read_csv('sources.csv')12sources = sources['circinus'].values.tolist()13all_first, all_second, all_third = [], [], []14for i in range(len(sources)):15 fp = loc+str(sources[i])+'/'+str(sources[i])+'_'+str(res)+'pc_cloud_stats.csv'16 if os.path.isfile(fp):17 cat = pd.read_csv(fp)18 #ALL GALAXIES19 all_first = all_first + cat['min_dist'].tolist()20 all_second = all_second + cat['min_dist2nd'].tolist()21 all_third = all_third + cat['min_dist3rd'].tolist()22num_bins = 1523bins = scipy.stats.binned_statistic(all_first, all_second, statistic='median', bins=num_bins, range=(0,1000))24medians = bins[0]25xvals = (bins[1][1:] + bins[1][:-1]) / 226bins2 = scipy.stats.binned_statistic(all_first, all_second, statistic='median', bins=num_bins, range=(0,1000))27medians2 = bins2[0]28xvals2 = (bins2[1][1:] + bins2[1][:-1]) / 229#Get percentiles30err1 = np.zeros((2,num_bins))31err2 = np.zeros((2,num_bins))32for i in range(num_bins):33 err1[:,i] = [np.percentile(np.take(all_second, np.where(bins[2]==(i+1))),16),np.percentile(np.take(all_second, np.where(bins[2]==(i+1))),84)]34 err2[:,i] = [np.percentile(np.take(all_third, np.where(bins[2]==(i+1))),16),np.percentile(np.take(all_third, np.where(bins[2]==(i+1))),84)]35err1 = np.abs(err1 - medians)36err2 = np.abs(err2 - medians2)37fig, axes = plt.subplots(1, 2, figsize=(10,5))38axes[0].scatter(all_first, all_second, c='gray', s=3, alpha=0.2, label=r'$\rho$ = '+ str(np.around(scipy.stats.pearsonr(all_first, all_second)[0], 2)))39axes[0].set_xlim((0,1000))40axes[0].set_ylim((0,2000))41axes[0].scatter(xvals, medians, c='b')42axes[0].errorbar(xvals, medians, c='b', capsize=4, ls='none', xerr=None, yerr=err1)43axes[0].set_title(str(res)+'pc Resolution')44axes[0].set_xlabel('Distance to 1st Nearest Neighbor (pc)')45axes[0].set_ylabel('Distance to 2nd Nearest Neighbor (pc)')46axes[1].scatter(all_first, all_third, c='gray', s=3, alpha=0.2, label=r'$\rho$ = '+ str(np.around(scipy.stats.pearsonr(all_first, all_third)[0], 2)))47axes[1].set_xlim((0,1000))48axes[1].set_ylim((0,2000))49axes[1].scatter(xvals2, medians2, c='b')50axes[1].errorbar(xvals2, medians2, c='b', capsize=4, ls='none', xerr=None, yerr=err2)51axes[1].set_title(str(res)+'pc Resolution')52axes[1].set_xlabel('Distance to 1st Nearest Neighbor (pc)')53axes[1].set_ylabel('Distance to 3rd Nearest Neighbor (pc)')54#for i in range(num_bins):55 #axes[0].scatter(xvals[i], medians[i], c='b')56 #axes[0].errorbar(xvals[i], medians[i], c='b', capsize=4, ls='none', xerr=None, yerr=err1)57 #axes[1].scatter(xvals2[i], medians2[i], c='b')58 #axes[1].errorbar(xvals2[i], medians2[i], c='b', capsize=4, ls='none', xerr=None, yerr=[[np.percentile(np.take(all_third, np.where(bins2[2]==(i+1))),16)],[np.percentile(np.take(all_third, np.where(bins2[2]==(i+1))),84)]])59x1 = np.linspace(0,2500, 2501)60axes[0].plot(x1, x1*(np.median(all_second)/np.median(all_first)), c='r', linestyle='--', linewidth=3, label='Slope: Median 2nd / Median 1st')61axes[1].plot(x1, x1*(np.median(all_third)/np.median(all_first)), c='r', linestyle='--', linewidth=3, label='Slope: Median 3rd / Median 1st')62axes[0].legend()63axes[1].legend()64plt.legend()65plt.show()...

Full Screen

Full Screen

stats.py

Source:stats.py Github

copy

Full Screen

1# %%2token = "df8f82c719ece3df8ff58dbc6cdebc21"3# %%4import httpx5query = {6 "module": "API",7 "token_auth": token,8 "method": "Live.getLastVisitsDetails",9 "idSite": "3",10 "period": "month",11 "date": "today",12 "format": "JSON",13 "filter_limit": "-1",14}15response = httpx.get("https://matomo.schuetze.link/index.php", params=query)16response.raise_for_status()17data = response.json()18# %%19from collections import Counter20import idna21import re22all_domains = list()23all_first = list()24for i in data:25 urls = [action["url"] for action in i["actionDetails"]]26 if any(27 not re.match("https://[a-z-]+\.meine-stadt-transparent.de", url) for url in urls28 ):29 continue30 domains = [idna.decode(url.split("/")[2]).split(".")[0] for url in urls]31 all_domains.extend(domains)32 all_first.append(domains[0])33all_domains = Counter(all_domains)34all_first = Counter(all_first)35print(all_domains)36print(all_first)37#%%38import numpy39import matplotlib.pyplot as plt40labels = list(set(all_domains.keys()) | set(all_first.keys()))41labels.sort(key=lambda x: all_first.get(x, 0))42visitor_values = [all_domains.get(label, 0) for label in labels]43actions_values = [all_first.get(label, 0) for label in labels]44x = numpy.arange(len(labels)) # the label locations45width = 0.35 # the width of the bars46fig, ax = plt.subplots()47rects1 = ax.barh(x - width / 2, visitor_values, width, label="By action")48rects2 = ax.barh(x + width / 2, actions_values, width, label="By visitor")49# Add some text for labels, title and custom x-axis tick labels, etc.50ax.set_yticks(x)51ax.set_yticklabels(labels)52ax.legend()53fig.tight_layout()54plt.title("Visitors and actions per city in the last month")55plt.show()56#%%57import pandas58existing = pandas.read_csv("existing.csv").fillna("")59print(sorted(list(existing["name"])))60print(sorted(all_domains.keys()))...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Selene automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful