How to use sorted_data method in pytest-benchmark

Best Python code snippet using pytest-benchmark

train.py

Source:train.py

1import logging2import sys34import numpy as np5import numpy as np6import pandas as pd78import symfit as sf9from symfit.core.minimizers import BFGS, DifferentialEvolution101112logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)13logger = logging.getLogger(__name__)141516# Load data17logger.info("Loading data")18df = pd.read_csv(19    "https://opendata.ecdc.europa.eu/covid19/casedistribution/csv/",20    parse_dates=["dateRep"],21    infer_datetime_format=True,22    dayfirst=True,23)2425logger.info("Cleaning data")26df = df.rename(27    columns={"dateRep": "date", "countriesAndTerritories": "country"}28)  # Sane column names29df = df.drop(["day", "month", "year", "geoId"], axis=1)  # Not required3031# Create DF with sorted index32sorted_data = df.set_index(df["date"]).sort_index()3334# Remove all rows with zero deaths in it35sorted_data = sorted_data[sorted_data["deaths"] != 0]3637sorted_data["cumulative_cases"] = sorted_data.groupby(by="country")["cases"].cumsum()38sorted_data["cumulative_deaths"] = sorted_data.groupby(by="country")["deaths"].cumsum()3940# Filter out data with less than 100 deaths, we probably can't get very good estimates from these.41sorted_data = sorted_data[sorted_data["cumulative_deaths"] >= 100]4243# Remove "Czechia" it has a population of NaN44sorted_data = sorted_data[sorted_data["country"] != "Czechia"]4546# Get final list of countries47countries = sorted_data["country"].unique()48n_countries = len(countries)4950# Pull out population size per country51populations = {52    country: df[df["country"] == country].iloc[0]["popData2019"]53    for country in countries54}5556# A map from country to integer index (for the model)57idx_country = pd.Index(countries).get_indexer(sorted_data.country)5859# Create a new column with the number of days since first infection (the x-axis)60country_first_dates = {61    c: sorted_data[sorted_data["country"] == c].index.min() for c in countries62}63sorted_data["100_cases"] = sorted_data.apply(64    lambda x: country_first_dates[x.country], axis=165)66sorted_data["days_since_100_cases"] = (67    sorted_data.index - sorted_data["100_cases"]68).apply(lambda x: x.days)6970logger.info("Training...")71fit_result = {}72ode_model = {}73for country in countries:74    t, S, I, R, D = sf.variables("t, S, I, R, D")75    p_susceptible = 0.0008576    N_mu = populations[country] * p_susceptible77    Î²_0, Î³_0, Î¼_0 = 0.35, 0.1, 0.0378    N_0 = N_mu79    Î² = sf.Parameter("Î²", value=Î²_0, min=0.1, max=0.5)80    Î³ = sf.Parameter("Î³", value=Î³_0, min=0.01, max=0.2)81    N = sf.Parameter("N", value=N_0, min=1e4, max=1e7)82    Î¼ = sf.Parameter("Î¼", value=Î¼_0, min=0.0001, max=0.1)8384    print(country, N_0)8586    model_dict = {87        sf.D(S, t): -Î² * I * S / N,88        sf.D(I, t): Î² * I * S / N - Î³ * I - Î¼ * I,89        sf.D(R, t): Î³ * I,90        sf.D(D, t): Î¼ * I,91    }9293    p_infected = 0.0194    I_0, R_0, D_0 = N_mu * p_infected, N_mu * p_infected - 100.0, 100.095    S_0 = N_mu - I_0 - R_0 - D_096    ode_model[country] = sf.ODEModel(97        model_dict, initial={t: 0.0, S: S_0, I: I_0, R: R_0, D: D_0}98    )99100    idx = sorted_data["country"] == country101    x = sorted_data[idx]["days_since_100_cases"].values102    y = sorted_data[idx]["cumulative_deaths"].values103104    fit = sf.Fit(105        ode_model[country],106        t=x,107        S=None,108        I=None,109        R=None,110        D=y,111        minimizer=[DifferentialEvolution, BFGS],112    )113    fit_result[country] = fit.execute(114        DifferentialEvolution={"seed": 0, "tol": 1e-2, "maxiter": 5}, BFGS={"tol": 1e-6}115    )116    print(fit_result[country])117118logger.info("Inferencing...")119n_days = 365  # Daily predictions120121cumulative_prediction = {}122daily_prediction = {}123residuals_high = {}124residuals_low = {}125for country in countries:126    idx = sorted_data["country"] == country127    x = sorted_data[idx]["days_since_100_cases"].values128    y = sorted_data[idx]["cumulative_deaths"].values129    tvec = np.arange(x.max() + n_days)130    d, i, r, s = ode_model[country](t=tvec, **fit_result[country].params)131    cumulative_prediction[country] = d132    y = sorted_data[idx]["deaths"].values133    daily_prediction[country] = np.diff(d)134    residual_std = np.std(y - daily_prediction[country][: len(y)])135    residuals_high[country] = daily_prediction[country] + residual_std136    residuals_low[country] = daily_prediction[country] - residual_std137138# Remember this is one big vector that contains all countries at all times.139# To do inference we need to construct a new vector with new times140# Create the time index141time_index = np.arange(0, n_days, 1)142time_index = np.repeat(time_index, n_countries)143144# Create the country index145country_index = np.arange(n_countries)146country_index = np.tile(country_index, n_days)147dummy_y = np.zeros(len(time_index))148149logger.info("Saving model")150151# Calculate dates (must be in python datetime to work with pydantic)152country_start = [country_first_dates[x] for x in countries[country_index].tolist()]153country_offset = [pd.DateOffset(x) for x in time_index]154dates = list(155    map(lambda x: (x[0] + x[1]).to_pydatetime(), zip(country_start, country_offset))156)157158# Create a big dataframe with all this info159predictions = pd.DataFrame(160    {161        "timestamp": dates,162        "country": countries[country_index],163        "deaths_prediction": [164            daily_prediction[c][t]165            for t, c in zip(time_index, countries[country_index].tolist())166        ],167        "cumulative_deaths_prediction": [168            cumulative_prediction[c][t]169            for t, c in zip(time_index, countries[country_index].tolist())170        ],171        "residuals_low": [172            residuals_low[c][t]173            for t, c in zip(time_index, countries[country_index].tolist())174        ],175        "residuals_high": [176            residuals_high[c][t]177            for t, c in zip(time_index, countries[country_index].tolist())178        ],179        "days_since_100_cases": time_index,180    },181    index=dates,182)183184# Merge in the ground truth185predictions = pd.merge(186    predictions.rename_axis("index").reset_index(),187    sorted_data[["country", "deaths", "cumulative_deaths"]]188    .rename_axis("index")189    .reset_index(),190    on=["index", "country"],191    how="outer",192).set_index("index")193194# Save to file195predictions.to_pickle("predictions.pkl")
...

finder.py

Source:finder.py

1import csv2from collections import Counter34def get_mean(total_weight, total_entries):5    #Calculating Mean6    mean = total_weight / total_entries7    print(f"Mean(Average) is: {mean:2f}")89def get_median(total_entries, sorted_data):10    #Calculating Median11    if total_entries % 2 == 0:12        median1 = float(sorted_data[total_entries//2])13        median2 = float(sorted_data[total_entries//2 - 1])14        median = (median1 + median2) / 215    else:16        median = float(sorted_data[total_entries//2])17    print(f"Median is: {median:2f}")1819def get_mode(sorted_data):20    #Calculating Mode21    data = Counter(sorted_data)22    mode_data_for_range = {23                            "75-85": 0,24                            "85-95": 0,25                            "95-105": 0,26                            "105-115": 0,27                            "115-125": 0,28                            "125-135": 0,29                            "135-145": 0,30                            "145-155": 0,31                            "155-165": 0,32                            "165-175": 033                        }34    for weight, occurence in data.items():35        if 75 < weight < 85:36            mode_data_for_range["75-85"] += occurence37        elif 85 < weight < 95:38            mode_data_for_range["85-95"] += occurence39        elif 95 < weight < 105:40            mode_data_for_range["95-105"] += occurence41        elif 105 < weight < 115:42            mode_data_for_range["105-115"] += occurence43        elif 115 < weight < 125:44            mode_data_for_range["115-125"] += occurence45        elif 125 < weight < 135:46            mode_data_for_range["125-135"] += occurence47        elif 135 < weight < 145:48            mode_data_for_range["135-145"] += occurence49        elif 145 < weight < 155:50            mode_data_for_range["145-155"] += occurence51        elif 155 < weight < 165:52            mode_data_for_range["155-165"] += occurence53        elif 165 < weight < 175:54            mode_data_for_range["165-175"] += occurence55    mode_range, mode_occurence = 0, 056    for range, occurence in mode_data_for_range.items():57        if occurence > mode_occurence:58            mode_range, mode_occurence = [int(range.split("-")[0]), int(range.split("-")[1])], occurence59    mode = float((mode_range[0] + mode_range[1]) / 2)60    print(f"Mode is: {mode:2f}")6162with open('SOCR-HeightWeight.csv', newline='') as f:63    reader = csv.reader(f)64    file_data = list(reader)6566file_data.pop(0)6768total_weight = 069total_entries = len(file_data)70sorted_data = []7172for person_data in file_data:73    total_weight += float(person_data[2])74    sorted_data.append(float(person_data[2]))7576sorted_data.sort()7778get_mean(total_weight, total_entries)79get_median(total_entries, sorted_data)
...

task1.py

Source:task1.py

1import numpy as np2import matplotlib.pyplot as plt3from matplotlib.ticker import AutoMinorLocator4from lab2.kernel_functions import box_kernel5def interquartile_range(down_quartile_val, up_quartile_val):6    return up_quartile_val - down_quartile_val7def down_quartile(sorted_data):8    l = len(sorted_data)9    down = sorted_data[int((2 + l) / 4)]10    up = sorted_data[np.math.ceil((2 + l) / 4)]11    return (down + up) / 212def up_quartile(sorted_data):13    l = len(sorted_data)14    down = sorted_data[int((2 + 3 * l) / 4)]15    up = sorted_data[np.math.ceil((2 + 3 * l) / 4)]16    return (down + up) / 217def silverman_bandwidth(data):18    sorted_data = sorted(data)19    return 0.9 * min(np.std(data), interquartile_range(down_quartile(sorted_data), up_quartile(sorted_data)) / 1.34) * np.math.pow(len(data), -0.2)20def general_naive_density_estimator(x, data, bandwidth, kernel_function):21    n = len(data)22    kernel_sum = 023    for i in range(n):24        kernel_sum += kernel_function((x - data[i]) / bandwidth)25    return 1 / (n * bandwidth) * kernel_sum26def task1(data):27    sorted_data = sorted(data)28    silverman_band = silverman_bandwidth(data)29    print(silverman_band)30    small_band = 0.131    large_band = 332    val = 0.33    ins = []34    silverman_outs = []35    small_outs = []36    large_outs = []37    for x in sorted_data:38        ins.append(x)39        silverman_outs.append(general_naive_density_estimator(x, data, silverman_band, box_kernel))40        small_outs.append(general_naive_density_estimator(x, data, small_band, box_kernel))41        large_outs.append(general_naive_density_estimator(x, data, large_band, box_kernel))42    plt.figure(2)43    ax = plt.axes()44    ax.yaxis.set_minor_locator(AutoMinorLocator(2))45    ax.xaxis.set_minor_locator(AutoMinorLocator(2))46    ax.spines['top'].set_visible(False)47    ax.spines['right'].set_visible(False)48    plt.plot(ins, small_outs)49    plt.plot(sorted_data, np.zeros_like(sorted_data) + val, 'x', color='orange')50    plt.xlabel('x')51    plt.ylabel('f(x)')52    plt.show()53    plt.figure(3)54    ax = plt.axes()55    ax.yaxis.set_minor_locator(AutoMinorLocator(2))56    ax.xaxis.set_minor_locator(AutoMinorLocator(2))57    ax.spines['top'].set_visible(False)58    ax.spines['right'].set_visible(False)59    plt.plot(ins, silverman_outs)60    plt.plot(sorted_data, np.zeros_like(sorted_data) + val, 'x', color='orange')61    plt.xlabel('x')62    plt.ylabel('f(x)')63    plt.show()64    plt.figure(4)65    ax = plt.axes()66    ax.yaxis.set_minor_locator(AutoMinorLocator(2))67    ax.xaxis.set_minor_locator(AutoMinorLocator(2))68    ax.spines['top'].set_visible(False)69    ax.spines['right'].set_visible(False)70    plt.plot(ins, large_outs)71    plt.plot(sorted_data, np.zeros_like(sorted_data) + val, 'x', color='orange')72    plt.xlabel('x')73    plt.ylabel('f(x)')...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.