Best Python code snippet using avocado_python
Day_Ahead_Imbalance_Forecast_LEAR_test.py
Source:Day_Ahead_Imbalance_Forecast_LEAR_test.py  
1from urllib.parse import urljoin2import datetime as datetime3import numpy as np4import pandas as pd5import matplotlib.pyplot as plt6import matplotlib as mpl7import seaborn as sns8from epftoolbox.models import LEAR9import os10import datetime as datetime11from epftoolbox.data import read_data12from epftoolbox.evaluation import MAE, sMAPE,RMSE13from epftoolbox.models import LEAR14import warnings15from sklearn.metrics import confusion_matrix16warnings.filterwarnings("ignore")17#### Activate Lear Model18instance = LEAR()19def plot_forecast_nc(forecast, train, test, mape=None, mase=None):20    plt.figure(figsize=(10,4), dpi=100)21    plt.plot(train, label='training', color='black')22    plt.plot(test, label='actual', color='black', ls='--')23    plt.plot(forecast, label='forecast', color='orange')24    title = 'Forecast vs Actuals'25    plt.title(title)26    plt.legend(loc='upper left', fontsize=8)27    plt.show()28def make_dt_index(df, timestamp_col, dt_offset=None):29    df.index = pd.to_datetime(30        df.loc[:, timestamp_col], format='%Y-%m-%dT%H:%M:%S.%f'31    )32    if dt_offset:33        df.index = df.index + dt_offset34    df.drop(timestamp_col, axis=1, inplace=True)35    df.sort_index(inplace=True)36    return df37def check_duplicate_index(df, verbose=True):38    """ checks for duplicates in the index of a dataframe """39    dupes = df[df.index.duplicated()]40    num = dupes.shape[0]41    print('{} index duplicates'.format(num))42    if verbose == True:43        print('duplicates are:')44        print(dupes.head(3))45    return df[df.index.duplicated(keep=False)]46def drop_duplicate_index(df):47    return df[~df.index.duplicated(keep='first')]48def check_duplicate_rows(df, verbose=True):49    duplicated_bools = df.duplicated()50    num = np.sum(duplicated_bools)51    print('{} row duplicates'.format(num))52    if verbose:53        df[duplicated_bools].head(3)54    return df[df.duplicated(keep=False)]55def check_nans(df, verbose=True):56    """ checks for NANs in a dataframe """57    nans = df[df.isnull().any(axis=1)]58    num = nans.shape[0]59    print('{} nan rows'.format(num))60    if verbose:61        print('nan values are:')62        print(nans.head())63    return nans64def check_index_length(df, freq, verbose=True):65    """ compare a DatetimeIndex with the expected length """66    ideal = pd.DatetimeIndex(start=df.index[0],67                             end=df.index[-1],68                             freq=freq)69    ideal_len = ideal.shape[0]70    actual_len = df.shape[0]71    num_missing = ideal_len - actual_len72    print('ideal index len {} actual {} missing {}'.format(73        ideal_len, actual_len, num_missing))74    if ideal.shape[0] != df.shape[0]:75        missing = set(df.index).symmetric_difference(set(ideal))76        if verbose:77            print('missing are:')78            print(missing)79        return missing, ideal80    else:81        return None, df.index82def make_df_fill_dt_index(df, freq, method='ffill'):83    missing, ideal = check_index_length(df, freq)84    ideal_idx_df = pd.DataFrame(index=ideal)85    df = pd.concat([ideal_idx_df, df], axis=1)86    return df.fillna(method=method)87def check_dataframe(df, freq, verbose=False):88    """ wraps together all the checks """89    duplicate_index = check_duplicate_index(df, verbose)90    duplicate_rows = check_duplicate_rows(df, verbose)91    nans = check_nans(df, verbose)92    missing_index, ideal_index = check_index_length(df, freq, verbose)93    return {94        'duplicate_index': duplicate_index,95        'duplicate_rows': duplicate_rows,96        'nans': nans,97        'missing_index': missing_index,98        'ideal_index': ideal_index99    }100df_may_recalibration = pd.read_csv('./datasets/All_data_may_final.csv')101df_may_recalibration['Unnamed: 0'] = pd.to_datetime(df_may_recalibration['Unnamed: 0'])102df_may_recalibration.set_index('Unnamed: 0', drop=True, inplace=True)103df_may_recalibration.index.name = None104path_datasets_folder = os.path.join('.', 'datasets')105path_recalibration_folder = os.path.join('.', 'experimental_files')106calibration_window = 364107##### Load DATASET FROM FOLDER datasets that contains all the data108dataset = 'All_data_may_final'109begin_test_date = datetime.datetime(2022,3,28)110end_test_date   = datetime.datetime(2022,5,31)111#dataset = 'final_data_imputed_all'112years_test = None113forecast_file_name = 'fc_nl' + '_dat' + str(dataset) + '_YT' + str(years_test) + \114                     '_CW' + str(calibration_window) + '.csv'115forecast_file_path = os.path.join(path_recalibration_folder, forecast_file_name)116calibration_window = 364117df_train, df_test = read_data(dataset=dataset, years_test=None, path=path_datasets_folder,118                            begin_test_date=begin_test_date, end_test_date=end_test_date)119# Defining empty forecast array and the real values to be predicted in a more friendly format120forecast = pd.DataFrame(index=df_test.index[::24], columns=['h' + str(k) for k in range(24)])121forecast_list = []122real_values = df_test.loc[:, ['Price']].values.reshape(-1, 24)123real_values = pd.DataFrame(real_values, index=forecast.index, columns=forecast.columns)124forecast_dates = forecast.index125model = LEAR(calibration_window=calibration_window)126RMSE_LIST = []127MAE_LIST  = []128SMAPE_LIST= []129residual_list = []130datetime_published = forecast.resample('7D').last()131datetime_published_index = pd.date_range('2022-03-31 00:00:00', '2022-05-29 23:00:00', freq='7D')132#print(pd.date_range('2022-03-31 00:00:00', '2022-05-29 23:00:00', freq='7D'))133sdate = datetime.datetime(2022,3,28)134edate = datetime.datetime(2022,5,29)135list_datetime_changes = [datetime.datetime(2022,4,7),datetime.datetime(2022,4,14),datetime.datetime(2022,4,21),datetime.datetime(2022,4,28),datetime.datetime(2022,5,5),datetime.datetime(2022,5,12),datetime.datetime(2022,5,19),datetime.datetime(2022,5,29)]136#sdate = sdate[-1:]137#print(sdate.strftime("%Y-%m-%d, %H:%M:%S"))138#datetime_published_index = [datetime.datetime(2022,4,7),datetime.datetime(2022,4,14),datetime.datetime(2022,4,21),datetime.datetime(2022,4,28),datetime.datetime(2022,5,5),datetime.datetime(2022,5,12),datetime.datetime(2022,5,19),datetime.datetime(2022,5,26)]139#datetime_published_index = datetime_published.index140datetime_published_index = datetime_published_index[1:9]141print(datetime_published_index)142###### Here we get all the data tha have been published until 28/03143data_available_1 = pd.concat([df_train, df_test.loc[sdate:sdate + pd.Timedelta(hours=23), :]], axis=0)144print(data_available_1)145###### Here we get all the data that have been published on 7/04 and we have data until146##### 3/04147data_available_2 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[0] - pd.Timedelta(days = 4) + pd.Timedelta(hours =23),:]],axis= 0)148print(data_available_2)149#### Here we get the data that have been published on 14/04 and we have available data until150#### 10/04151data_available_3 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[1] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)152print(data_available_3)153#### Data published on 21/04 and we have data until 18/04154data_available_4 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[2] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)155print(data_available_4)156##### Data published on 28/04 and are available until 25/04157data_available_5 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[3] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)158print(data_available_5)159#### Data published on 6/5 and are available until 01/05160data_available_6 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[4] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)161print(data_available_6)162###### Data published on 13/5 and are available until 8/05163data_available_7 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[5] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)164print(data_available_7)165##### Data published on 19/5 and are available until 13/5166data_available_8 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[6] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)167print(data_available_8)168##### Data published on 26/5 and are availble ntil 22/5169data_available_9 = pd.concat([df_train,df_test.loc[sdate:datetime_published_index[7] - pd.Timedelta(days = 4) + pd.Timedelta(hours = 23),:]],axis=0)170print(data_available_9)171for date in forecast_dates:172    if date < datetime_published_index[0]:173        # We set the real prices for current date to NaN in the dataframe of available data174        data_available = data_available_1175        print(data_available)176        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)177        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN178        data_available_last =  data_available_forecast.tail(23)179        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index180        data_available_last = data_available_last.reindex(pd.date_range('2022-03-28 00:00:00', '2022-03-28 23:00:00', freq='H'),method = "bfill")181        n=24182        data_available = data_available.iloc[:-n]183        data_available = pd.concat([data_available,data_available_last])184        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,3,28),185                                                 calibration_window=calibration_window)186        # Saving the current prediction187        forecast_list.append(Yp)188        forecast.loc[date , :] = Yp189        residuals = data_available.Price - forecast190        residual_list.append(residuals)191    # Computing metrics up-to-current-date192        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))193        MAE_LIST.append(mae)194        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100195        SMAPE_LIST.append(smape)196        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))197        RMSE_LIST.append(rmse)198        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))199        print(data_available.tail(48))200    elif(date >= datetime_published_index[0] and date < datetime_published_index[1]):201        print(date)202        data_available = data_available_2203        print(data_available)204        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)205        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN206        data_available_last =  data_available_forecast.tail(23)207        print(data_available_last)208        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index209        data_available_last = data_available_last.reindex(pd.date_range('2022-04-03 00:00:00', '2022-04-03 23:00:00', freq='H'),method = "bfill")210        n=24211        data_available = data_available.iloc[:-n]212        data_available = pd.concat([data_available,data_available_last])213        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,3),214                                                 calibration_window=calibration_window)215        # Saving the current prediction216        forecast_list.append(Yp)217        forecast.loc[date , :] = Yp218        residuals = data_available.Price - forecast219        residual_list.append(residuals)220    # Computing metrics up-to-current-date221        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))222        MAE_LIST.append(mae)223        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100224        SMAPE_LIST.append(smape)225        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))226        RMSE_LIST.append(rmse)227        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))228        #print(data_available.tail(48))229    elif(date >= datetime_published_index[1] and date < datetime_published_index[2]):230        print(date)231        data_available = data_available_3232        print(data_available)233        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)234        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN235        data_available_last =  data_available_forecast.tail(23)236        print(data_available_last)237        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index238        data_available_last = data_available_last.reindex(pd.date_range('2022-04-10 00:00:00', '2022-04-10 23:00:00', freq='H'),method = "bfill")239        n=24240        data_available = data_available.iloc[:-n]241        data_available = pd.concat([data_available,data_available_last])242        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,10),243                                                 calibration_window=calibration_window)244        # Saving the current prediction245        forecast_list.append(Yp)246        forecast.loc[date , :] = Yp247        residuals = data_available.Price - forecast248        residual_list.append(residuals)249    # Computing metrics up-to-current-date250        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))251        MAE_LIST.append(mae)252        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100253        SMAPE_LIST.append(smape)254        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))255        RMSE_LIST.append(rmse)256        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))257    elif(date >= datetime_published_index[2] and date < datetime_published_index[3]):258        print(date)259        data_available = data_available_4260        print(data_available)261        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)262        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN263        data_available_last =  data_available_forecast.tail(23)264        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index265        data_available_last = data_available_last.reindex(pd.date_range('2022-04-17 00:00:00', '2022-04-17 23:00:00', freq='H'),method = "bfill")266        print(data_available_last)267        n=24268        data_available = data_available.iloc[:-n]269        data_available = pd.concat([data_available,data_available_last])270        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,17),271                                                 calibration_window=calibration_window)272        # Saving the current prediction273        forecast_list.append(Yp)274        forecast.loc[date , :] = Yp275        residuals = data_available.Price - forecast276        residual_list.append(residuals)277    # Computing metrics up-to-current-date278        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))279        MAE_LIST.append(mae)280        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100281        SMAPE_LIST.append(smape)282        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))283        RMSE_LIST.append(rmse)284        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))285        #print(data_available.tail(48))286    elif(date >= datetime_published_index[3] and date < datetime_published_index[4]):287        print(date)288        data_available = data_available_5289        print(data_available)290        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)291        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN292        data_available_last =  data_available_forecast.tail(23)293        print(data_available_last)294        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index295        data_available_last = data_available_last.reindex(pd.date_range('2022-04-24 00:00:00', '2022-04-24 23:00:00', freq='H'),method = "bfill")296        n=24297        data_available = data_available.iloc[:-n]298        data_available = pd.concat([data_available,data_available_last])299        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,4,24),300                                                 calibration_window=calibration_window)301        # Saving the current prediction302        forecast_list.append(Yp)303        forecast.loc[date , :] = Yp304        residuals = data_available.Price - forecast305        residual_list.append(residuals)306    # Computing metrics up-to-current-date307        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))308        MAE_LIST.append(mae)309        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100310        SMAPE_LIST.append(smape)311        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))312        RMSE_LIST.append(rmse)313        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))314        print(data_available.tail(48))315    elif(date >= datetime_published_index[4] and date < datetime_published_index[5]):316        print(date)317        data_available = data_available_6318        print(data_available)319        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)320        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN321        data_available_last =  data_available_forecast.tail(23)322        print(data_available_last)323        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index324        data_available_last = data_available_last.reindex(pd.date_range('2022-05-01 00:00:00', '2022-05-01 23:00:00', freq='H'),method = "bfill")325        n=24326        data_available = data_available.iloc[:-n]327        data_available = pd.concat([data_available,data_available_last])328        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,1),329                                                 calibration_window=calibration_window)330        # Saving the current prediction331        forecast_list.append(Yp)332        forecast.loc[date , :] = Yp333        residuals = data_available.Price - forecast334        residual_list.append(residuals)335    # Computing metrics up-to-current-date336        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))337        MAE_LIST.append(mae)338        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100339        SMAPE_LIST.append(smape)340        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))341        RMSE_LIST.append(rmse)342        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))343        print(data_available.tail(48))344    elif(date >= datetime_published_index[5] and date < datetime_published_index[6]):345        print(date)346        data_available = data_available_7347        print(data_available)348        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)349        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN350        data_available_last =  data_available_forecast.tail(23)351        print(data_available_last)352        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index353        data_available_last = data_available_last.reindex(pd.date_range('2022-05-08 00:00:00', '2022-05-08 23:00:00', freq='H'),method = "bfill")354        n=24355        data_available = data_available.iloc[:-n]356        data_available = pd.concat([data_available,data_available_last])357        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,8),358                                                 calibration_window=calibration_window)359        # Saving the current prediction360        forecast_list.append(Yp)361        forecast.loc[date , :] = Yp362        residuals = data_available.Price - forecast363        residual_list.append(residuals)364    # Computing metrics up-to-current-date365        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))366        MAE_LIST.append(mae)367        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100368        SMAPE_LIST.append(smape)369        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))370        RMSE_LIST.append(rmse)371        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))372        print(data_available.tail(48))373    elif(date >= datetime_published_index[6] and date < datetime_published_index[7]):374        print(date)375        data_available = data_available_8376        print(data_available)377        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)378        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN379        data_available_last =  data_available_forecast.tail(23)380        print(data_available_last)381        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index382        data_available_last = data_available_last.reindex(pd.date_range('2022-05-15 00:00:00', '2022-05-15 23:00:00', freq='H'),method = "bfill")383        n=24384        data_available = data_available.iloc[:-n]385        data_available = pd.concat([data_available,data_available_last])386        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,15),387                                                 calibration_window=calibration_window)388        # Saving the current prediction389        forecast_list.append(Yp)390        forecast.loc[date , :] = Yp391        residuals = data_available.Price - forecast392        residual_list.append(residuals)393    # Computing metrics up-to-current-date394        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))395        MAE_LIST.append(mae)396        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100397        SMAPE_LIST.append(smape)398        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))399        RMSE_LIST.append(rmse)400        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))401        print(data_available.tail(48))402    elif(date >= datetime_published_index[7] ):403        print(date)404        data_available = data_available_9405        print(data_available)406        data_available_forecast = pd.concat([df_train,df_test.loc[date:date + pd.Timedelta(hours = 23) ,:]],axis= 0)407        data_available_forecast.loc[date:date + pd.Timedelta(hours = 23),'Price'] = np.NaN408        data_available_last =  data_available_forecast.tail(23)409        print(data_available_last)410        index_test = df_test.loc[forecast_dates[0]:forecast_dates[0] + pd.Timedelta(hours = 23),:].index411        data_available_last = data_available_last.reindex(pd.date_range('2022-05-22 00:00:00', '2022-05-22 23:00:00', freq='H'),method = "bfill")412        n=24413        data_available = data_available.iloc[:-n]414        data_available = pd.concat([data_available,data_available_last])415        Yp= model.recalibrate_and_forecast_next_day(df=data_available, next_day_date= datetime.datetime(2022,5,22),416                                                 calibration_window=calibration_window)417        # Saving the current prediction418        forecast_list.append(Yp)419        forecast.loc[date , :] = Yp420        residuals = data_available.Price - forecast421        residual_list.append(residuals)422    # Computing metrics up-to-current-date423        mae = np.mean(MAE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))424        MAE_LIST.append(mae)425        smape = np.mean(sMAPE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values)) * 100426        SMAPE_LIST.append(smape)427        rmse = np.mean(RMSE(forecast.loc[:date].values.squeeze(), real_values.loc[:date].values))428        RMSE_LIST.append(rmse)429        print('{} - sMAPE: {:.2f}%  |  MAE: {:.3f}  | RMSE: {:.4f}'.format(str(date)[:10], smape, mae,rmse))430        print(data_available.tail(48))431######## Transform Forecast into dataframe and analyze432forecast.to_csv(forecast_file_path)433print(forecast.head(40))434forecast_df = forecast.copy()435forecast_df['Time'] = forecast.index436forecast_df437#print(forecast_df)438mask = (df_may_recalibration.index >=begin_test_date) & (df_may_recalibration.index <= end_test_date + pd.Timedelta(hours = 23))439april_may_data = df_may_recalibration.loc[mask]440#print(april_may_data)441#pril_may_data = df_may_recalibration.loc[df_may_recalibration.index == "2022-05-23 23:00:00":"2022-05-23 00:00:00"]442forecast_df = (forecast_df.replace(0, np.nan)443            .set_index('Time', append=True)444            .stack()445            .reset_index()446            .rename(columns={0:'Pred'})447            .drop('level_2',1))448print(forecast_df)449print(april_may_data)450forecast_df = forecast_df.set_index(april_may_data.index)451forecast_df = forecast_df.drop('level_0',axis=1)452forecast_df = forecast_df.drop('Time',axis=1)453print(forecast_df)454plot_forecast_nc(forecast_df["Pred"][forecast_df.index >= '2022-4-01 00:00:00'],455              df_train["Price"][df_train.index > '2022-5-21 00:00:00'],456              df_test["Price"][df_test.index >= '2022-4-01 00:00:00'],457              )458plt.plot(forecast_df)459plt.savefig(f'./report/LEAR/aa.png',bbox_inches = 'tight', dpi = 300)460plt.show()461forecast_df['Real'] = april_may_data['Price']462forecast_df["Real_sign"] =  [1 if int(x) >= 0 else 0 for x in np.sign(forecast_df["Real"])]463forecast_df["Prediction_sign"] = [1 if int(x) >= 0 else 0 for x in np.sign(forecast_df["Pred"])]464forecast_df.to_csv('Forecast_DA_Balancing_difference_correct.csv')465comparison_column = np.where(forecast_df["Prediction_sign"] == forecast_df["Real_sign"], True, False)466count = np.count_nonzero(comparison_column)467print("We have a total of True values : " , count)468print("Accuracy of True values compared to the total observations :" ,count/len(comparison_column))469cf_matrix = confusion_matrix(forecast_df["Real_sign"] , forecast_df["Prediction_sign"])470ax = sns.heatmap(cf_matrix/np.sum(cf_matrix), annot=True,471            fmt='.2%', cmap='Blues')472ax.set_title('Seaborn Confusion Matrix with labels\n\n');473ax.set_xlabel('\nPredicted Values')474ax.set_ylabel('Actual Values ');475## Ticket labels - List must be in alphabetical order476ax.xaxis.set_ticklabels(['False','True'])477ax.yaxis.set_ticklabels(['False','True'])478## Display the visualization of the Confusion Matrix.479# plt.savefig(f'./report/LEAR/Confusion_matrix.png',bbox_inches = 'tight', dpi = 300)480# plt.show()481#482#483# plt.plot(RMSE_LIST)484# plt.savefig(f'./report/LEAR/RMSE.png',bbox_inches = 'tight', dpi = 300)485# plt.show()486# plt.plot(MAE_LIST)487# plt.savefig(f'./report/LEAR/MAE.png',bbox_inches = 'tight', dpi = 300)488# plt.show()489# plt.plot(SMAPE_LIST)490# plt.savefig(f'./report/LEAR/SMAPE.png',bbox_inches = 'tight', dpi = 300)...funs.py
Source:funs.py  
1from dashboard.models import sleep, activity, readiness2import datetime as dt3import pandas as pd4from django.db import connection5def sleep_score():6      data_available = True7      # setting dates for sql query8      today = (dt.datetime.now() - dt.timedelta(days=0)).date().strftime('%Y-%m-%d %H:%M:%S')9      yesterday = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')10      # getting data from db11      try:12            sleep_score = [sleep.objects.raw(f"SELECT * FROM dashboard_sleep WHERE timestamp='{today}'")[0].sleep_score, sleep.objects.raw(f"SELECT * FROM dashboard_sleep WHERE timestamp='{yesterday}'")[0].sleep_score]13      except IndexError:14            sleep_score = ['NA', 'NA']15            data_available = False16      # calculating difference to yesterday17      if data_available:18            sleep_score[1] = sleep_score[0] - sleep_score[1]19      return sleep_score20def activity_score():21      data_available = True22      # setting dates for sql query23      today = (dt.datetime.now() - dt.timedelta(days=0)).date().strftime('%Y-%m-%d %H:%M:%S')24      yesterday = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')25      # getting data form db26      try:27            activity_score = [activity.objects.raw(f"SELECT * FROM dashboard_activity WHERE timestamp='{today}'")[0].activity_score, activity.objects.raw(f"SELECT * FROM dashboard_activity WHERE timestamp='{yesterday}'")[0].activity_score]28      except IndexError:29            activity_score = ['NA', 'NA']30            data_available = False31      # calculating difference to yesterday32      if data_available:33            activity_score[1] = activity_score[0] - activity_score[1]34      return activity_score35def readiness_score():36      data_available = True37      # setting dates for query38      today = (dt.datetime.now() - dt.timedelta(days=1)).date().strftime('%Y-%m-%d %H:%M:%S')39      yesterday = (dt.datetime.now() - dt.timedelta(days=2)).date().strftime('%Y-%m-%d %H:%M:%S')40      # getting data from yesterday41      try:42            readiness_score = [readiness.objects.raw(f"SELECT * FROM dashboard_readiness WHERE timestamp='{today}'")[0].readiness_score, readiness.objects.raw(f"SELECT * FROM dashboard_readiness WHERE timestamp='{yesterday}'")[0].readiness_score]43      except IndexError:44            readiness_score = ['NA', 'NA']45            data_available = False46      # calculating difference to yesterday47      if data_available:48            readiness_score[1] = readiness_score[0] - readiness_score[1]49      return readiness_score50def sleep_data_detailed(date:str):51      data_available = True52      # querying data from db53      df = pd.read_sql_query(f"SELECT * FROM dashboard_sleep WHERE timestamp='{date}'", connection)54      55      # exceptions for not synchronized data56      try:57            df = df.iloc[0,:]58            df = df.drop(index=['id', 'timestamp'])59            data = df.to_dict()60      except IndexError:61            data = {}62            data_available = False63      # data transformations64      if data_available:65            context = []66            for key in list(data.keys()):67                  context.append({'name': key, 'value': data[key]})68            for i in [1, 2, 5, 6, 7]:69                  context[i]['value'] = str(dt.timedelta(seconds=int(context[i]['value'])))[:-3]70      else:71            context = []72      73      return context74def get_last_7_sleep(filter_string:str):75      data_available = True76      df = pd.read_sql_query("SELECT * FROM dashboard_sleep ORDER BY timestamp DESC LIMIT 10", connection)77      df = df.drop(columns=['id'])78      df['timestamp'] = df['timestamp'].apply(lambda x: x.rstrip('00:00:00'))79      df = df[['timestamp', filter_string]]80      df.columns = ['timestamp', 'data']81      data = df.to_dict('records')82      return data83def activity_data_detailed(date:str):84      data_available = True85      # querying data from db86      df = pd.read_sql_query(f"SELECT * FROM dashboard_activity WHERE timestamp='{date}'", connection)87      88      # exceptions for not synchronized data89      try:90            df = df.iloc[0,:]91            df = df.drop(index=['id', 'timestamp'])92            data = df.to_dict()93      except IndexError:94            data = {}95            data_available = False96      print(df)97      # data transformations98      if data_available:99            context = []100            for key in list(data.keys()):101                  context.append({'name': key, 'value': data[key]})102      else:103            context = []104      105      return context106def get_last_7_activity(filter_string:str):107      data_available = True108      df = pd.read_sql_query("SELECT * FROM dashboard_activity ORDER BY timestamp DESC LIMIT 10", connection)109      df = df.drop(columns=['id'])110      df['timestamp'] = df['timestamp'].apply(lambda x: x.rstrip('00:00:00'))111      df = df[['timestamp', filter_string]]112      df.columns = ['timestamp', 'data']113      data = df.to_dict('records')114      return data115def get_readiness_data():116      df = pd.read_sql_query("SELECT * FROM dashboard_readiness", connection)117      df = df.reset_index(drop=True)118      print(df)119      df = df.iloc[:10,:]120      data = df.to_dict('records')...synchronization.py
Source:synchronization.py  
1import concurrent.futures2import queue3import random4import threading5import time6from collections import deque7class DoublerWithEvents:8    def __init__(self):9        self.waiting_for_data = threading.Event()10        self.data_available = threading.Event()11        self.waiting_for_data.set()12        self.data_available.clear()13        self.data = None14    def generate(self):15        if self.waiting_for_data.wait():16            self.waiting_for_data.clear()17            self.data = random.randint(0, 10)18            print(f"generated {self.data}")19            self.data_available.set()20    def report(self):21        if self.data_available.wait():22            self.data_available.clear()23            print(f"{self.data} -> {2 * self.data}")24            self.waiting_for_data.set()25class DoublerWithEventsAndLocks:26    def __init__(self):27        self.waiting_for_data_lock = threading.Lock()28        self.waiting_for_data = threading.Event()29        self.data_available_lock = threading.Lock()30        self.data_available = threading.Event()31        self.waiting_for_data.set()32        self.data_available.clear()33        self.data = None34    def generate(self):35        # Grab lock first, to avoid multiple generate functions running simultaneously36        with self.waiting_for_data_lock:37            # Wait for waiting_for_data to be set38            if self.waiting_for_data.wait():39                self.data = random.randint(0, 10)40                print(f"Generated {self.data}")41                self.waiting_for_data.clear()  # Note: order is important42                self.data_available.set()43    def report(self):44        with self.data_available_lock:45            if self.data_available.wait():46                print(f"{self.data} -> {2 * self.data}")47                self.data = None48                self.data_available.clear()49                self.waiting_for_data.set()50class DoublerWithCondition:51    def __init__(self):52        self.data_available = threading.Condition()53        self.data = None54    def generate(self):55        with self.data_available:56            data = random.randint(0, 10)57            self.data = data58            print(f"Generated {data}.")59            self.data_available.notify()60    def report(self):61        with self.data_available:62            while self.data is None:63                self.data_available.wait()64            print(f"{self.data} -> {2 * self.data}\n")65            self.data = None66class DoublerWithTwoConditions:67    def __init__(self):68        self.data_available = threading.Condition()69        self.data_consumed = threading.Condition()70        self.data = None71    def generate(self):72        with self.data_consumed:73            while self.data is not None:74                self.data_consumed.wait()75            with self.data_available:76                data = random.randint(0, 10)77                self.data = data78                print(f"Generated {data}.")79                self.data_available.notify()80    def report(self):81        with self.data_available:82            while self.data is None:83                self.data_available.wait()84            print(f"{self.data} -> {2 * self.data}\n")85            with self.data_consumed:86                self.data = None87                self.data_consumed.notify()88class AndGate:89    def __init__(self):90        self.barrier = threading.Barrier(2, action=self.report)91        self.data1 = queue.Queue()92        self.data2 = queue.Queue()93        self.d1 = None94        self.d2 = None95        self.lock1 = threading.Lock()96        self.lock2 = threading.Lock()97    def input1(self):98        with self.lock1:99            self.d1 = self.data1.get()100            print(f"input 1 got {self.d1}")101            self.barrier.wait()102    def input2(self):103        with self.lock2:104            self.d2 = self.data2.get()105            print(f"input 2 got {self.d2}")106            self.barrier.wait()107    def report(self):108        print(f"{self.d1} and {self.d2} -> {self.d1 and self.d2}")109class DoublerWithQueue:110    def __init__(self):111        self.data_queue = queue.Queue(maxsize=1)112    def generate(self):113        data = random.randint(0, 10)114        self.data_queue.put(data)115        print(f"Generated {data}\n")116    def report(self):117        data = self.data_queue.get()118        print(f"{data} -> {2 * data}\n")119def main():120    doubler = DoublerWithTwoConditions()121    tasks = [doubler.generate, doubler.report] * 20122    random.shuffle(tasks)123    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:124        for t in tasks:125            executor.submit(t)126def main_and_gate():127    and_gate = AndGate()128    tasks = [and_gate.input1, and_gate.input2] * 10129    random.shuffle(tasks)130    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:131        for t in tasks:132            executor.submit(t)133        data = [True, False] * 10134        queue = [and_gate.data1, and_gate.data2] * 10135        random.shuffle(data)136        random.shuffle(queue)137        for q, d in zip(queue, data):138            q.put(d)139if __name__ == "__main__":...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
