Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use all_first method in Selene

Best Python code snippet using selene_python

waze-real-time.py

Source:waze-real-time.py

1# -*- coding: utf-8 -*-2"""3Created on Tue Jun  1 16:03:44 20214@author: mayur5"""6import requests7import json8import pandas as pd9import geopandas as gpd10from shapely.geometry import Point, LineString11from datetime import timedelta12import time13import warnings14from catboost import CatBoostClassifier15def waze_download(polygon_path):16    17    with open(polygon_path, 'r') as f:18        poly_data = json.load(f)19        20    responses = []21    for d in poly_data['polygons'][:-1]:22        if d['city_name'].startswith('Sao Paulo'):# or d['city_name'] in ['Xalapa','Quito','Lima','Montivideo']:23    24            try:25                response = requests.get(d['url_base'] + d['polygon'])26                responses.append(response.json())27            except Exception as e:28                # responses[poly_id] = None29                # capture_exception(e)30                print(e)31                32    alerts = []33    jams = []34    for data in responses:35        time_pub = data['endTime'][:-4]36        if 'alerts' in data.keys():37            alerts = alerts + data['alerts']38        if 'jams' in data.keys():39            jams = jams + data['jams']40            41    return alerts, jams,time_pub42    43def process_alerts(alerts_list,pub):44    45    dfa = pd.DataFrame(alerts_list)46    dfa = dfa[dfa.city.isin(['SÃ£o Paulo'])]#,'Xalapa','Quito','Montevideo','Miraflores'])]47    dfa['pub_utc_date'] = pub48    dfa = dfa[dfa.type=='ACCIDENT'] 49    dfa['geometry'] = gpd.GeoSeries(dfa['location'].apply(50                    lambda coord: Point((coord['x'],coord['y']))), crs='EPSG:4326').to_crs('EPSG:22523')51    dfa['longitude'] = dfa['location'].apply(lambda coord: coord['x'])52    dfa['latitude'] = dfa['location'].apply(lambda coord: coord['y'])53    54    dfa = dfa.drop(['location'],axis=1)55    dfa  = dfa.drop_duplicates()56    dfa = dfa.drop_duplicates(subset=['pub_utc_date','uuid'],keep='first')57    58    dfa = dfa.reset_index().set_index(['uuid'])59    dfa  =  dfa.sort_index()60    alerts_uuids  = dfa.index.unique()61    62    return dfa, alerts_uuids63    64    65def process_jams(jams_list,dfjtotal,pub):66    dfj = pd.DataFrame(jams_list)67    dfj = dfj[dfj.city.isin(['SÃ£o Paulo'])]#,'Xalapa','Quito','Montevideo','Miraflores'])]68    dfj['pub_utc_date'] = pub69    70    dfj['geometry'] = gpd.GeoSeries(dfj.line.apply(71                    lambda x: LineString([(coord['x'],coord['y']) for coord in x])), crs='EPSG:4326').to_crs('EPSG:22523')72    73    dfj = dfj.drop(['line','segments'],axis=1)74    dfj = dfj.drop_duplicates()75    dfj = dfj.drop_duplicates(subset=['pub_utc_date','uuid'],keep='first')76    dfj['street'] = dfj.street.apply(str)77    78    dfjtotal = pd.concat([dfjtotal,dfj])79        80    #keeps only last 60 minutes of jam data in memory81    if len(dfjtotal.pub_utc_date.unique())>60:82        older_date = sorted(dfjtotal.pub_utc_date.unique().tolist())[0]83        dfjtotal = dfjtotal[dfjtotal.pub_utc_date != older_date] 84        85    return dfjtotal86def search_acc_jams(alerts_uuids,dfa,jams_ix):87    88    jam_acc = []89    # t=len(alerts_uuids)90    # i=091    for au in alerts_uuids: 92    93        acid = dfa.loc[[au]]94        acid = acid.reset_index().set_index(['pub_utc_date', 'street'])95        # print(i,'/',t,au,len(acid),time.ctime())    96        # i+=197        try:98            jrows = jams_ix.loc[jams_ix.index.intersection(acid.index)]99            jrows['d'] = jrows.geometry.apply(lambda x: acid.iloc[0].geometry.distance(x))100            jrows = jrows[jrows.d<=200]101        except:102            jrows =  []103        if len(jrows)>0:104            jrows =  jrows[jrows.d==jrows.d.min()]105            106            jrows = jrows.reset_index().drop_duplicates(subset=['pub_utc_date']).set_index(['pub_utc_date','street'])107            108            acid.loc[jrows.index,'juuid']  = jrows.uuid.apply(int)109            jam_acc.extend(acid.reset_index().loc[:,['pub_utc_date','uuid','juuid']].values.tolist())110    return jam_acc111def merge_acc_jams(dfa,jams_ix,jam_acc):112    # print(' preprocessing merge',time.ctime())113    accids2= dfa.reset_index().set_index(['pub_utc_date', 'uuid'])114    accids2= accids2.sort_index()115    jams_ix2 = jams_ix.reset_index().set_index(['pub_utc_date', 'uuid'])116    jams_ix2= jams_ix2.sort_index()117    jadf = pd.DataFrame(jam_acc)118    jadfnn = jadf.dropna()119    120    # print(' merging data',time.ctime())121    joined =[]122    if len(jadfnn)>0:123        for i,j in enumerate(jadfnn[1].unique()):124            # if i%100==0:125            #     print(i,time.ctime())126            if len(jadf)>0:127                keys = jadf[jadf[1]==j]128                aa = accids2.loc[list(keys[[0,1]].itertuples(index=False,name=None))]129                130                keys2=jams_ix2.index.intersection(list(keys[[0,2]].itertuples(index=False,name=None)))131                ja = jams_ix2.loc[keys2]132                ja = ja.reset_index().drop_duplicates(subset=['pub_utc_date']).set_index(['pub_utc_date','uuid'])133                134                result = pd.merge(aa.reset_index(), ja.reset_index(), on=["pub_utc_date", "street"]).set_index('index')135                joined.append(result)136    137    if len(joined)>0:138        dfjoined =pd.concat(joined) 139        dfjoined = dfjoined[~dfjoined.index.duplicated(keep='first')]140        notfoundf =  accids2[~accids2['index'].isin(dfjoined.index)]141        notfoundf = notfoundf.reset_index().set_index('index')142        notfoundf.rename(columns={'uuid':'uuid_x','type':'type_x','country':'country_x','city':'city_x','roadType':'roadType_x','pubMillis':'pubMillis_x','geometry':'geometry_x'}, inplace=True)143        all_df = pd.concat([dfjoined,notfoundf])144        all_df = all_df.sort_values('pub_utc_date')145    else:146        notfoundf =  accids2  147        notfoundf = notfoundf.reset_index().set_index('index')148        notfoundf.rename(columns={'uuid':'uuid_x','type':'type_x','country':'country_x','city':'city_x','roadType':'roadType_x','pubMillis':'pubMillis_x','geometry':'geometry_x'}, inplace=True)149        all_df = notfoundf150        all_df = all_df.sort_values('pub_utc_date')151        all_df['length'] = 0152        all_df['level'] = 0153        all_df['delay'] = 0154        all_df['type_y'] = 'NONE'155        156    return joined, all_df157    158def process_extra_features(all_df,jams_ix):159    160    all_first = all_df.loc[all_df.uuid_x.drop_duplicates(keep='first').index]161    all_first.length.fillna(0,inplace=True)162    all_first.level.fillna(0,inplace=True)163    all_first.delay.fillna(0,inplace=True)164    all_first['pub_utc_date0'] = pd.to_datetime(all_first['pub_utc_date'])165    all_first['pub_utc_date1'] =all_first.pub_utc_date0 - timedelta(minutes=1)166    all_first['pub_utc_date1'] = all_first['pub_utc_date1'].apply(lambda x: str(x))#+'.000')167    168    all_first['pub_utc_date5'] =all_first.pub_utc_date0 - timedelta(minutes=5)169    all_first['pub_utc_date5'] = all_first['pub_utc_date5'].apply(lambda x: str(x))#+'.000')170    171    all_first['pub_utc_date10'] =all_first.pub_utc_date0 - timedelta(minutes=10)172    all_first['pub_utc_date10'] = all_first['pub_utc_date10'].apply(lambda x: str(x))#+'.000')173    174    all_first['pub_utc_date20'] =all_first.pub_utc_date0 - timedelta(minutes=20)175    all_first['pub_utc_date20'] = all_first['pub_utc_date20'].apply(lambda x: str(x))#+'.000')176    177    all_first['pub_utc_date30'] =all_first.pub_utc_date0 - timedelta(minutes=30)178    all_first['pub_utc_date30'] = all_first['pub_utc_date30'].apply(lambda x: str(x))#+'.000')179    180    for dt  in ['1','5','10','20','30']:181        # ifo=0182        # inofo=0183        for row in all_first.iterrows():184            # break185            try:186                zzz = jams_ix.loc[(row[1]['pub_utc_date'+dt],row[1].street)]187                zzz['d'] = zzz.geometry.apply(lambda x: x.distance(row[1].geometry_x))188                zzz = zzz[zzz.d<=200]189                zzz =  zzz[zzz.d==zzz.d.min()]190                191                all_first.loc[row[0],'len'+dt] = row[1].length - zzz.length[0]192                all_first.loc[row[0],'lev'+dt] = row[1].level - zzz.level[0]193                all_first.loc[row[0],'del'+dt] = row[1].delay - zzz.delay[0]194                # ifo+=1195            except:196                all_first.loc[row[0],'len'+dt] = row[1].length - 0197                all_first.loc[row[0],'lev'+dt] = row[1].level - 0198                all_first.loc[row[0],'del'+dt] = row[1].delay - 0199    return all_first200def merge_extra_features(alerts_uuids,all_first,all_df,dfatotal):201    for au in alerts_uuids:202        203        first = all_first[all_first.uuid_x==au]204        205        cols = ['len1', 'lev1', 'del1', 'len5', 'lev5', 'del5', 'len10', 'lev10',206               'del10', 'len20','lev20', 'del20', 'len30', 'lev30', 'del30']207        zzz=all_df[all_df.uuid_x==first.uuid_x.iloc[0]]208        209        210        firstrep = first.loc[first.index.repeat(len(zzz)),cols]211        firstrep.index =  zzz.index 212        all_df.loc[firstrep.index,cols] = firstrep.loc[:,cols]213    214    for k in all_df.uuid_x.unique():215        # break216        temp = all_df[all_df.uuid_x==k]217        218        if temp.length.isnull().all():219            temp['length'] = 0220            temp['level'] = 0221            temp['delay'] = 0    222            temp['type_y'] = 'NONE'223        elif temp.length.isnull().any():224            # if i>5:225            #     break226            # i+=1227            temp['c'] = temp.length.mask(temp.length.ffill().notnull(),temp.length.interpolate(limit_area='inside'))228            temp['level'] = temp.level.mask(temp.level.ffill().notnull(),temp.level.interpolate(limit_area='inside'))229            temp['delay'] = temp.delay.mask(temp.delay.ffill().notnull(),temp.delay.interpolate(limit_area='inside'))230            temp['type_y'] = temp.type_y.mask(temp.type_y.ffill().notnull(),temp.type_y.bfill())231            232            temp['length'].fillna(0,inplace=True)233            temp['level'].fillna(0,inplace=True)234            temp['delay'].fillna(0,inplace=True)    235            temp['type_y'].fillna('NONE',inplace=True)    236        237        all_df.loc[temp.index,'length'] = temp['length']  238        all_df.loc[temp.index,'level'] = temp['level']  239        all_df.loc[temp.index,'delay'] = temp['delay']  240        all_df.loc[temp.index,'type_y'] = temp['type_y']241    #shows alerts from last 10 minutes242    dfatotal = pd.concat([dfatotal,all_df])243    if len(dfatotal.pub_utc_date.unique())>10:244        older_date = sorted(dfatotal.pub_utc_date.unique().tolist())[0]245        dfatotal = dfatotal[dfatotal.pub_utc_date != older_date] 246    247    all_df = dfatotal.drop_duplicates('uuid_x',keep='last').reset_index(drop=True)248    249    return all_df,dfatotal250def clean_and_predict_proba(all_df,model):251    #remove unwanted columns252    253    254    if len(joined)>0:255        x_predict = all_df.drop(['pub_utc_date','uuid_x','country_x','city_x','type_x',256                                 'magvar','street','pubMillis_x','reportDescription',257                                 'geometry_x','longitude','latitude','uuid_y','country_y',258                                 'city_y','speedKMH','turnType','endNode','speed',259                                 'roadType_y','pubMillis_y','blockingAlertUuid','startNode',260                                 'geometry_y'],axis=1)261    else:262        x_predict = all_df.drop(['pub_utc_date','uuid_x','country_x','city_x','type_x',263                                 'magvar','street','pubMillis_x','reportDescription',264                                 'geometry_x','longitude','latitude'],axis=1)265    266    x_predict.rename(columns={'nThumbsUp':'nthumbsup','reportRating':'reportrating','roadType_x':'roadtype'}, inplace=True)267    x_predict['subtype'] = x_predict['subtype'].fillna('not reported')268    x_predict['roadtype'] = x_predict.roadtype.fillna(0)269    x_predict['roadtype'] = x_predict.roadtype.apply(str)270    x_predict = x_predict[['subtype', 'roadtype', 'nthumbsup', 'reliability', 'reportrating',271       'confidence', 'length', 'level', 'delay', 'type_y', 'len1', 'lev1',272       'del1', 'len5', 'lev5', 'del5', 'len10', 'lev10', 'del10', 'len20',273       'lev20', 'del20', 'len30', 'lev30', 'del30']]274    275    276    277    y_prob = model.predict_proba(x_predict)   278    probdf = pd.DataFrame(y_prob)279    return probdf280             281if __name__ == '__main__':282    pd.options.mode.chained_assignment = None283    warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)284    285    286    #Cities timezones287    # xalapa = 'America/Mexico_City'288    # quito = 'America/Guayaquil'289    # montevideo = 'America/Montevideo'290    # miraflores = 'America/Lima'291    saopaulo = 'America/Sao_Paulo'292    293    polygon_path = 'data/polygons.json' #polygon and url file path294    path = 'data/real_time/' #final csv save location295    model_path = "data/waze_prob_model.cbm" #trained classification model path296    297    model = CatBoostClassifier()298    model.load_model(model_path)299    300    dfatotal = pd.DataFrame()301    302    try:303        dfjtotal = pd.read_pickle('data/dfjtotal')304    except:305        dfjtotal = pd.DataFrame()306    307    while True:308      net=False  309      t0 = time.time()310      try:311        312        alerts,jams,pub = waze_download(polygon_path)313          314        315        dfa, alerts_uuids = process_alerts(alerts,pub)316        dfjtotal = process_jams(jams,dfjtotal,pub)317                318        jams_ix = dfjtotal.set_index(['pub_utc_date', 'street'])319        jams_ix = jams_ix.sort_index()320        321        322        jam_acc = search_acc_jams(alerts_uuids,dfa,jams_ix)323        joined, all_df = merge_acc_jams(dfa,jams_ix,jam_acc)324        325        all_first = process_extra_features(all_df,jams_ix)326        all_df,dfatotal = merge_extra_features(alerts_uuids, all_first, all_df,dfatotal)327        328        probdf = clean_and_predict_proba(dfatotal,model)329        330        331        # datas = pd.to_datetime(all_df['pub_utc_date'])332        #fix cities timezone333        datasp = pd.to_datetime(all_df[all_df.city_x=='SÃ£o Paulo']['pub_utc_date'],utc=True).dt.tz_convert(saopaulo)334        # dataxa = pd.to_datetime(all_df[all_df.city_x=='Xalapa']['pub_utc_date'],utc=True).dt.tz_convert(xalapa)335        # dataqt = pd.to_datetime(all_df[all_df.city_x=='Quito']['pub_utc_date'],utc=True).dt.tz_convert(quito)336        # datamv = pd.to_datetime(all_df[all_df.city_x=='Montevideo']['pub_utc_date'],utc=True).dt.tz_convert(montevideo)337        # datamf = pd.to_datetime(all_df[all_df.city_x=='Miraflores']['pub_utc_date'],utc=True).dt.tz_convert(miraflores)338    339        # datas = pd.concat([datasp,dataxa,dataqt,datamv,datamf]).sort_index()340        datas = datasp.sort_index()341        342        343        csvtodisk = all_df[['longitude','latitude','street','city_x']]344        csvtodisk['prob'] = probdf[1].round(2)345        csvtodisk['hour'] = datas.apply(lambda x: x.hour)346        csvtodisk['minute'] = datas.apply(lambda x: x.minute)347        csvtodisk['day'] = datas.apply(lambda x: x.day)348        csvtodisk['month'] = datas.apply(lambda x: x.month)349        csvtodisk['year'] = datas.apply(lambda x: x.year)350        csvtodisk['weekday'] = datas.apply(lambda x: x.weekday())351        csvtodisk.rename(columns={'city_x':'city'}, inplace=True)352        353        csvtodisk.to_csv(path+'acc_realtime.csv',index=False)354        dfjtotal.to_pickle('data/dfjtotal')355      except:356          net=True357          pass358      359      t1 = time.time()360      if net:361         print('download error',pub,' call',(t1-t0),'secs. next try in',60-(t1-t0),'secs')362      else:363          print('completed',pub,' call',(t1-t0),'secs. sleeping for',60-(t1-t0),'secs')364      time.sleep(60-(t1-t0))...

neighbor_sep_scatter.py

Source:neighbor_sep_scatter.py

1from astropy.table import Table2import matplotlib.pyplot as plt3import numpy as np4import matplotlib.cm as cm5import pandas as pd6import matplotlib.lines as mlines7import os.path8import scipy.stats9loc = '/Users/josh/projects/intro/sources/'10res = 15011sources = pd.read_csv('sources.csv')12sources = sources['circinus'].values.tolist()13all_first, all_second, all_third = [], [], []14for i in range(len(sources)):15    fp = loc+str(sources[i])+'/'+str(sources[i])+'_'+str(res)+'pc_cloud_stats.csv'16    if os.path.isfile(fp):17        cat = pd.read_csv(fp)18        #ALL GALAXIES19        all_first = all_first + cat['min_dist'].tolist()20        all_second = all_second + cat['min_dist2nd'].tolist()21        all_third = all_third + cat['min_dist3rd'].tolist()22num_bins = 1523bins = scipy.stats.binned_statistic(all_first, all_second, statistic='median', bins=num_bins, range=(0,1000))24medians = bins[0]25xvals = (bins[1][1:] + bins[1][:-1]) / 226bins2 = scipy.stats.binned_statistic(all_first, all_second, statistic='median', bins=num_bins, range=(0,1000))27medians2 = bins2[0]28xvals2 = (bins2[1][1:] + bins2[1][:-1]) / 229#Get percentiles30err1 = np.zeros((2,num_bins))31err2 = np.zeros((2,num_bins))32for i in range(num_bins):33        err1[:,i] = [np.percentile(np.take(all_second, np.where(bins[2]==(i+1))),16),np.percentile(np.take(all_second, np.where(bins[2]==(i+1))),84)]34        err2[:,i] = [np.percentile(np.take(all_third, np.where(bins[2]==(i+1))),16),np.percentile(np.take(all_third, np.where(bins[2]==(i+1))),84)]35err1 = np.abs(err1 - medians)36err2 = np.abs(err2 - medians2)37fig, axes = plt.subplots(1, 2, figsize=(10,5))38axes[0].scatter(all_first, all_second, c='gray', s=3, alpha=0.2, label=r'$\rho$ = '+ str(np.around(scipy.stats.pearsonr(all_first, all_second)[0], 2)))39axes[0].set_xlim((0,1000))40axes[0].set_ylim((0,2000))41axes[0].scatter(xvals, medians, c='b')42axes[0].errorbar(xvals, medians, c='b', capsize=4, ls='none', xerr=None, yerr=err1)43axes[0].set_title(str(res)+'pc Resolution')44axes[0].set_xlabel('Distance to 1st Nearest Neighbor (pc)')45axes[0].set_ylabel('Distance to 2nd Nearest Neighbor (pc)')46axes[1].scatter(all_first, all_third, c='gray', s=3, alpha=0.2, label=r'$\rho$ = '+ str(np.around(scipy.stats.pearsonr(all_first, all_third)[0], 2)))47axes[1].set_xlim((0,1000))48axes[1].set_ylim((0,2000))49axes[1].scatter(xvals2, medians2, c='b')50axes[1].errorbar(xvals2, medians2, c='b', capsize=4, ls='none', xerr=None, yerr=err2)51axes[1].set_title(str(res)+'pc Resolution')52axes[1].set_xlabel('Distance to 1st Nearest Neighbor (pc)')53axes[1].set_ylabel('Distance to 3rd Nearest Neighbor (pc)')54#for i in range(num_bins):55    #axes[0].scatter(xvals[i], medians[i], c='b')56    #axes[0].errorbar(xvals[i], medians[i], c='b', capsize=4, ls='none', xerr=None, yerr=err1)57    #axes[1].scatter(xvals2[i], medians2[i], c='b')58    #axes[1].errorbar(xvals2[i], medians2[i], c='b', capsize=4, ls='none', xerr=None, yerr=[[np.percentile(np.take(all_third, np.where(bins2[2]==(i+1))),16)],[np.percentile(np.take(all_third, np.where(bins2[2]==(i+1))),84)]])59x1 = np.linspace(0,2500, 2501)60axes[0].plot(x1, x1*(np.median(all_second)/np.median(all_first)), c='r', linestyle='--', linewidth=3, label='Slope: Median 2nd / Median 1st')61axes[1].plot(x1, x1*(np.median(all_third)/np.median(all_first)), c='r', linestyle='--', linewidth=3, label='Slope: Median 3rd / Median 1st')62axes[0].legend()63axes[1].legend()64plt.legend()65plt.show()...

stats.py

Source:stats.py

1# %%2token = "df8f82c719ece3df8ff58dbc6cdebc21"3# %%4import httpx5query = {6    "module": "API",7    "token_auth": token,8    "method": "Live.getLastVisitsDetails",9    "idSite": "3",10    "period": "month",11    "date": "today",12    "format": "JSON",13    "filter_limit": "-1",14}15response = httpx.get("https://matomo.schuetze.link/index.php", params=query)16response.raise_for_status()17data = response.json()18# %%19from collections import Counter20import idna21import re22all_domains = list()23all_first = list()24for i in data:25    urls = [action["url"] for action in i["actionDetails"]]26    if any(27        not re.match("https://[a-z-]+\.meine-stadt-transparent.de", url) for url in urls28    ):29        continue30    domains = [idna.decode(url.split("/")[2]).split(".")[0] for url in urls]31    all_domains.extend(domains)32    all_first.append(domains[0])33all_domains = Counter(all_domains)34all_first = Counter(all_first)35print(all_domains)36print(all_first)37#%%38import numpy39import matplotlib.pyplot as plt40labels = list(set(all_domains.keys()) | set(all_first.keys()))41labels.sort(key=lambda x: all_first.get(x, 0))42visitor_values = [all_domains.get(label, 0) for label in labels]43actions_values = [all_first.get(label, 0) for label in labels]44x = numpy.arange(len(labels))  # the label locations45width = 0.35  # the width of the bars46fig, ax = plt.subplots()47rects1 = ax.barh(x - width / 2, visitor_values, width, label="By action")48rects2 = ax.barh(x + width / 2, actions_values, width, label="By visitor")49# Add some text for labels, title and custom x-axis tick labels, etc.50ax.set_yticks(x)51ax.set_yticklabels(labels)52ax.legend()53fig.tight_layout()54plt.title("Visitors and actions per city in the last month")55plt.show()56#%%57import pandas58existing = pandas.read_csv("existing.csv").fillna("")59print(sorted(list(existing["name"])))60print(sorted(all_domains.keys()))...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.