How to use test_new method in pytest-django

Best Python code snippet using pytest-django_python

test_jama_sync_gherkin.py

Source:test_jama_sync_gherkin.py Github

copy

Full Screen

1import jama_sync as jama_sync2import pytest3from pathlib import Path4from libraries.unit_test_helper import replace_section_in_file, standardize_file5testing = True6path_to_folder = str(Path(__file__).parent)7root_test_data_folder = path_to_folder + "/test_data/gherkin_jama_sync_test"8test_path = f"{root_test_data_folder}/test_docs.feature"9standard_file_path = path_to_folder + "/test_data/docs_standards/test_docs_standard.feature"10# Test that Jama Sync will accept correct documentation11def test_correct_documentation(capfd, username, password):12 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,13 "-test_path", test_path,14 "-jama_user", username, "-jama_pass", password,15 "-interpreter", "gherkin"])16 standardize_file(test_path, standard_file_path)17 jama_sync.update_jama(arguments, testing)18 output, err = capfd.readouterr()19 assert "the following 'gherkin' test cases were read:\n" \20 "test_NEW docs" in output21def test_add(capfd, username, password):22 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,23 "-test_path", test_path,24 "-jama_user", username, "-jama_pass", password,25 "-interpreter", "gherkin", "--add"])26 standardize_file(test_path, standard_file_path)27 jama_sync.update_jama(arguments, testing)28 output, err = capfd.readouterr()29 assert "the following 'gherkin' test cases were read:\n" \30 "test_NEW docs" in output31 assert "Successfully updated: test_gid_" in output32# WARNING: this test case depends on the last test case33@pytest.mark.dependency(depends=["test_add"])34def test_add_update(capfd, username, password):35 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,36 "-test_path", test_path,37 "-jama_user", username, "-jama_pass", password,38 "-interpreter", "gherkin", "--update"])39 jama_sync.update_jama(arguments, testing)40 output, err = capfd.readouterr()41 assert "the following 'gherkin' test cases were read:\ntest_" in output and "docs" in output42# WARNING: this test case depends on the last test case43@pytest.mark.dependency(depends=["test_add_update"])44@pytest.mark.parametrize("section, section_change",45 [("Description:",46 " Testing Jama Sync table writing in Description\n"47 " *begin table*\n"48 " testing | testing | testing |\n"49 " testing | testing | testing |\n"50 " *end table*"),51 ("Prerequisites:",52 " 1) Testing Jama Sync table writing in Prerequisites\n"53 " *begin table*\n"54 " testing | testing | testing\n"55 " testing | testing | testing\n"56 " *end table*"),57 ("Test Data:",58 " 1) Testing Jama Sync table writing in Test Data\n"59 " *begin table*\n"60 " testing | testing | testing\n"61 " testing | testing | testing\n"62 " *end table*"),63 ]64 )65def test_table_documentation_happy_path(capfd, username, password, section, section_change):66 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,67 "-test_path", test_path,68 "-jama_user", username, "-jama_pass", password,69 "-interpreter", "gherkin", "--update"])70 replace_section_in_file(test_path, section, section_change, "gherkin")71 jama_sync.update_jama(arguments, testing)72 output, err = capfd.readouterr()73 assert "the following 'gherkin' test cases were read:\ntest_" in output and "docs" in output74@pytest.mark.parametrize("section, delete_section, expected_string",75 [("Description:", True, "Expecting 'Description' line in test case test_NEW docs "76 "(if this section is present, check that it is the first line "77 "in the doc-string)"),78 ("Description:", False, "Expecting 'Prerequisites' line in test case test_NEW docs or the "79 "previous section data (if this section is present, check that it "80 "is directly 2 lines below the last section)"),81 ("Prerequisites:", True, "Expecting 'Prerequisites' line in test case test_NEW docs or the "82 "previous section data (if this section is present, check that it "83 "is directly 2 lines below the last section)"),84 ("Prerequisites:", False, "Failed to read test file: Expecting either '1)', another dash "85 "'-', or '*begin table*' in the Prerequisites section in "86 "test_NEW docs"),87 ("Test Data:", True, "Expecting 'Test Data' line in test case test_NEW docs or the "88 "previous section data (if this section is present, check that it "89 "is directly 2 lines below the last section)"),90 ("Test Data:", False, "Failed to read test file: Expecting either '1)', another dash '-', or"91 " '*begin table*' in the Test Data section in test_NEW docs"),92 ("Steps:", True, "Expecting 'Steps' line in test case test_NEW docs or the previous section "93 "data (if this section is present, check that it is directly 2 lines below "94 "the last section)"),95 ("Steps:", False, "Expecting '1)' line under 'Steps' in test case test_NEW docs"),96 ]97 )98def test_basic_incorrect_documentation(capfd, username, password, section, delete_section, expected_string):99 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,100 "-test_path", test_path,101 "-jama_user", username, "-jama_pass", password,102 "-interpreter", "gherkin", "--add"])103 standardize_file(test_path, standard_file_path)104 replace_section_in_file(test_path, section, "", "gherkin", delete_section=delete_section)105 jama_sync.update_jama(arguments, testing)106 output, err = capfd.readouterr()107 if expected_string not in output:108 print(output)109 assert expected_string in output110@pytest.mark.parametrize("section_change, expected_string",111 [(" 1) Testing Jama Sync writing in Steps\n"112 " Notes: Testing Jama Sync writing in Notes",113 "Expecting 'ER:' line under 'Steps' in test case test_NEW docs"),114 (" 1) Testing Jama Sync writing in Steps\n"115 " ER: Testing Jama Sync writing in Expected Results",116 "Expecting 'Notes:' line under 'Steps' in test case test_NEW docs"),117 ]118 )119def test_incorrect_steps_documentation(capfd, username, password, section_change, expected_string):120 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,121 "-test_path", test_path,122 "-jama_user", username, "-jama_pass", password,123 "-interpreter", "gherkin", "--add"])124 standardize_file(test_path, standard_file_path)125 replace_section_in_file(test_path, "Steps:", section_change, "gherkin")126 jama_sync.update_jama(arguments, testing)127 output, err = capfd.readouterr()128 if expected_string not in output:129 print(output)130 assert expected_string in output131@pytest.mark.parametrize("section, section_change, expected_string",132 [("Prerequisites:",133 " 1) Testing Jama Sync writing in Prerequisites\n"134 " 3) Testing Jama Sync writing in Prerequisites\n",135 "Expecting either '2)', another dash '-', or '*begin table*' in the "136 "Prerequisites section in test_NEW docs"),137 ("Test Data:",138 " 1) Testing Jama Sync writing in Test Data\n"139 " 3) Testing Jama Sync writing in Test Data\n",140 "Expecting either '2)', another dash '-', or '*begin table*' in the "141 "Test Data section in test_NEW docs"),142 ("Steps:",143 " 1) Testing Jama Sync writing in Steps\n"144 " ER: Testing Jama Sync writing in Expected Results\n"145 " Notes: Testing Jama Sync writing in Notes\n"146 " 3) Testing Jama Sync writing in Steps\n",147 "Expecting '2)' line under 'Steps' in test case test_NEW docs"),148 ]149 )150def test_incorrect_numbering_documentation(capfd, username, password, section, section_change, expected_string):151 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,152 "-test_path", test_path,153 "-jama_user", username, "-jama_pass", password,154 "-interpreter", "gherkin", "--add"])155 standardize_file(test_path, standard_file_path)156 replace_section_in_file(test_path, section, section_change, "gherkin")157 jama_sync.update_jama(arguments, testing)158 output, err = capfd.readouterr()159 if expected_string not in output:160 print(output)161 assert expected_string in output162@pytest.mark.parametrize("section, section_change, expected_string",163 [("Description:",164 " Testing Jama Sync table writing in Description\n"165 " *begin table*\n"166 " testing | testing | testing\n"167 " testing | testing\n"168 " *end table*",169 "Table row: 'testing | testing' does not have the expected amount of columns:3 as the rest "170 "of the table in test_NEW docs"),171 ("Description:",172 " Testing Jama Sync table writing in Description\n"173 " *begin table*\n"174 " testing | testing | testing\n"175 " testing | testing\n",176 "Expecting '*end table*' in the Description section in test_NEW docs"),177 ("Prerequisites:",178 " 1) Testing Jama Sync table writing in Prerequisites\n"179 " *begin table*\n"180 " testing | testing | testing\n"181 " testing | testing\n"182 " *end table*",183 "Table row: 'testing | testing' does not have the expected amount of columns:3 as the rest "184 "of the table in test_NEW docs"),185 ("Prerequisites:",186 " 1) Testing Jama Sync table writing in Prerequisites\n"187 " *begin table*\n"188 " testing | testing | testing\n"189 " testing | testing\n",190 "Expecting '*end table*' in the Prerequisites section in test_NEW docs"),191 ("Test Data:",192 " 1) Testing Jama Sync table writing in Test Data\n"193 " *begin table*\n"194 " testing | testing | testing\n"195 " testing | testing\n"196 " *end table*",197 "Table row: 'testing | testing' does not have the expected amount of columns:3 as the rest "198 "of the table in test_NEW docs"),199 ("Test Data:",200 " 1) Testing Jama Sync table writing in Test Data\n"201 " *begin table*\n"202 " testing | testing | testing\n"203 " testing | testing\n",204 "Expecting '*end table*' in the Test Data section in test_NEW docs"),205 ]206 )207def test_incorrect_table_documentation(capfd, username, password, section, section_change, expected_string):208 arguments = jama_sync.parse_args(["-root_path", root_test_data_folder,209 "-test_path", test_path,210 "-jama_user", username, "-jama_pass", password,211 "-interpreter", "gherkin", "--add"])212 standardize_file(test_path, standard_file_path)213 replace_section_in_file(test_path, section, section_change, "gherkin")214 jama_sync.update_jama(arguments, testing)215 output, err = capfd.readouterr()216 if expected_string not in output:217 print(output)...

Full Screen

Full Screen

eda.py

Source:eda.py Github

copy

Full Screen

1import pandas as pd2from sklearn import preprocessing3import numpy as np4from eda_tools import *5PATH = "D:/Documents/Machine Learning/Walmart Trip Type"6DATA_IMPORT_PATH = PATH + "/data/"78CAT_DUMMIES = True9DAY_DUMMIES = True10DAY_SIN_COS = True11DPT_ENCODE = True12FAVOURITE_FINELINES =True13fav_fnls = 414PERC_NAN = 015export = True1617if __name__ == '__main__':1819 train, test, sample_submission = read_data()2021 # enumareate days22 train = enum_days(train)23 test = enum_days(test)2425 # and calculate day index26 train = day_index(train)27 test = day_index(test)2829 # calculate week number30 train, test = week_number(train, test)31 test_correct = no_dup_upcs(test)32 train_correct = no_dup_upcs(train)3334 # visitnumber scancount sum, total items purchased35 train_new = train.groupby("VisitNumber")["ScanCount"]\36 .agg("sum")\37 .reset_index()\38 .rename(columns={"ScanCount": "CartNetItems"})39 test_new = test.groupby("VisitNumber")["ScanCount"]\40 .agg("sum")\41 .reset_index()\42 .rename(columns={"ScanCount": "CartNetItems"})43 train_new = items_bought(train, train_new)44 test_new = items_bought(test, test_new)4546 # add triptype to each visitnumber47 train_new = add_triptype(train, train_new)4849 # rearrange columns50 train_new = train_new[["VisitNumber", "TripType", "CartNetItems", "ItemsBought"]]5152 # fineline entropy53 train_new = fineline_entropy(train, train_new)54 test_new = fineline_entropy(test, test_new)55 #fineline chaos56 train_new = fineline_chaos(train, train_new)57 test_new = fineline_chaos(test, test_new)5859 train_new = refunded_exact_items(train_correct, train_new)60 test_new = refunded_exact_items(test_correct, test_new)61 train_new = replaced_refunded_not_exact(train_correct, train_new)62 test_new = replaced_refunded_not_exact(test_correct, test_new)6364 # bought exactly the number of items i returned(same upc)65 train_new = replaced_exact_items(train_correct, train_new)66 test_new = replaced_exact_items(test_correct, test_new)6768 # total items returned69 train_new = items_returned(train, train_new)70 test_new = items_returned(test, test_new)7172 # returned to bought ratio73 train_new["RtnTobtRatio"] = (train_new["ItemsReturned"] / train_new["ItemsBought"]).round(3)74 test_new["RtnTobtRatio"] = (test_new["ItemsReturned"] / test_new["ItemsBought"]).round(3)7576 # Total Items77 train_new["TotalItems"] = train_new["ItemsBought"] + train_new["ItemsReturned"]78 test_new["TotalItems"] = test_new["ItemsBought"] + test_new["ItemsReturned"]7980 # add day eum81 temp = train.groupby(["VisitNumber", "WeekdayEnum"]).first().reset_index()82 train_new = train_new.merge(temp[["WeekdayEnum", "VisitNumber"]], on="VisitNumber", how="left")83 temp = test.groupby(["VisitNumber", "WeekdayEnum"]).first().reset_index()84 test_new = test_new.merge(temp[["WeekdayEnum", "VisitNumber"]], on="VisitNumber", how="left")8586 train["DepartmentDescription"] = train["DepartmentDescription"].fillna("None")87 test["DepartmentDescription"] = test["DepartmentDescription"].fillna("None")8889 # nunique UPCs for buyght items and returned90 temp = train[train["ScanCount"] > 0].groupby("VisitNumber")["Upc"].nunique().reset_index()91 train_new = train_new.merge(temp, how="left", on="VisitNumber").rename(columns={"Upc": "UniqueUpcsBought"})92 train_new["UniqueUpcsBought"] = train_new["UniqueUpcsBought"].fillna(0)9394 temp = test[test["ScanCount"] > 0].groupby("VisitNumber")["Upc"].nunique().reset_index()95 test_new = test_new.merge(temp, how="left", on="VisitNumber").rename(columns={"Upc": "UniqueUpcsBought"})96 test_new["UniqueUpcsBought"] = test_new["UniqueUpcsBought"].fillna(0)9798 temp = train[train["ScanCount"] < 0].groupby("VisitNumber")["Upc"].nunique().reset_index()99 train_new = train_new.merge(temp, how="left", on="VisitNumber").rename(columns={"Upc": "UniqueUpcsReturned"})100 train_new["UniqueUpcsReturned"] = train_new["UniqueUpcsReturned"].fillna(0)101102 temp = test[test["ScanCount"] < 0].groupby("VisitNumber")["Upc"].nunique().reset_index()103 test_new = test_new.merge(temp, how="left", on="VisitNumber").rename(columns={"Upc": "UniqueUpcsReturned"})104 test_new["UniqueUpcsReturned"] = test_new["UniqueUpcsReturned"].fillna(0)105106 # total unique UPCs107 train_new["TotalUniqueUpcs"] = train_new["UniqueUpcsBought"] + train_new["UniqueUpcsReturned"]108 test_new["TotalUniqueUpcs"] = test_new["UniqueUpcsBought"] + test_new["UniqueUpcsReturned"]109110 # total unique UPCs to total items perc111 train_new["TotalUniqueUpcs_perc"] = (train_new["TotalUniqueUpcs"] / train_new["TotalItems"]).round(3)112 test_new["TotalUniqueUpcs_perc"] = (test_new["TotalUniqueUpcs"] / test_new["TotalItems"]).round(3)113114 # UniqueUpcs bought /items bought115 train_new["UniqueUpcsBought_perc"] = (train_new["UniqueUpcsBought"] / train_new["ItemsBought"]).round(3)116 train_new["UniqueUpcsReturned_perc"] = (train_new["UniqueUpcsReturned"] / train_new["ItemsReturned"]).round(3)117118 train_new["UniqueUpcsBought_perc"] = train_new["UniqueUpcsBought_perc"].fillna(PERC_NAN)119 train_new["UniqueUpcsReturned_perc"] = train_new["UniqueUpcsReturned_perc"].fillna(PERC_NAN)120 test_new["UniqueUpcsBought_perc"] = (test_new["UniqueUpcsBought"] / test_new["ItemsBought"]).round(3)121 test_new["UniqueUpcsReturned_perc"] = (test_new["UniqueUpcsReturned"] / test_new["ItemsReturned"]).round(3)122123 test_new["UniqueUpcsBought_perc"] = test_new["UniqueUpcsBought_perc"].fillna(PERC_NAN)124 test_new["UniqueUpcsReturned_perc"] = test_new["UniqueUpcsReturned_perc"].fillna(PERC_NAN)125126 # unique finelineNumbers127 temp = train[train["ScanCount"] > 0].groupby("VisitNumber")["FinelineNumber"].nunique().reset_index()128 train_new = train_new.merge(temp, how="left", on="VisitNumber")\129 .rename(columns={"FinelineNumber": "UniqueFinelinesBought"})130 train_new["UniqueFinelinesBought"] = train_new["UniqueFinelinesBought"].fillna(0)131132 temp = test[test["ScanCount"] > 0].groupby("VisitNumber")["FinelineNumber"].nunique().reset_index()133 test_new = test_new.merge(temp, how="left", on="VisitNumber")\134 .rename(columns={"FinelineNumber": "UniqueFinelinesBought"})135136 test_new["UniqueFinelinesBought"] = test_new["UniqueFinelinesBought"].fillna(0)137138 temp = train[train["ScanCount"] < 0].groupby("VisitNumber")["FinelineNumber"]\139 .nunique()\140 .reset_index()141 train_new = train_new.merge(temp, how="left", on="VisitNumber")\142 .rename(columns={"FinelineNumber": "UniqueFinelinesReturned"})143144 train_new["UniqueFinelinesReturned"] = train_new["UniqueFinelinesReturned"].fillna(0)145 temp = test[test["ScanCount"] < 0].groupby("VisitNumber")["FinelineNumber"]\146 .nunique()\147 .reset_index()148 test_new = test_new.merge(temp, how="left", on="VisitNumber")\149 .rename(columns={"FinelineNumber": "UniqueFinelinesReturned"})150 test_new["UniqueFinelinesReturned"] = test_new["UniqueFinelinesReturned"].fillna(0)151152 # total Unique finelines153 train_new["TotalUniqueFinelines"] = train_new["UniqueFinelinesReturned"] + train_new["UniqueFinelinesBought"]154 test_new["TotalUniqueFinelines"] = test_new["UniqueFinelinesReturned"] + test_new["UniqueFinelinesBought"]155156 # total unique finlines to total items percentage157 train_new["TotalUniqueFinelines_perc"] = (train_new["TotalUniqueFinelines"] / train_new["TotalItems"]).round(3)158 test_new["TotalUniqueFinelines_perc"] = (test_new["TotalUniqueFinelines"] / test_new["TotalItems"]).round(3)159160 # finelines percentage161 train_new["UniqueFinelinesBought_perc"] = (train_new["UniqueFinelinesBought"] / train_new["ItemsBought"]).round(3)162 train_new["UniqueFinelinesReturned_perc"] = (train_new["UniqueFinelinesReturned"] / train_new["ItemsReturned"])\163 .round(3)164165 train_new["UniqueFinelinesBought_perc"] = train_new["UniqueFinelinesBought_perc"].fillna(PERC_NAN)166 train_new["UniqueFinelinesReturned_perc"] = train_new["UniqueFinelinesReturned_perc"].fillna(PERC_NAN)167 test_new["UniqueFinelinesBought_perc"] = (test_new["UniqueFinelinesBought"] / test_new["ItemsBought"]).round(3)168 test_new["UniqueFinelinesReturned_perc"] = (test_new["UniqueFinelinesReturned"] / test_new["ItemsReturned"]).round(169 3)170171 test_new["UniqueFinelinesBought_perc"] = test_new["UniqueFinelinesBought_perc"].fillna(PERC_NAN)172 test_new["UniqueFinelinesReturned_perc"] = test_new["UniqueFinelinesReturned_perc"].fillna(PERC_NAN)173174 # finelines entropy175176 train_new.replace(np.inf, 0, inplace=True)177 test_new.replace(np.inf, 0, inplace=True)178179 if FAVOURITE_FINELINES:180 train_new = get_fav_flnumbers(train, train_new, n=fav_fnls)181 test_new = get_fav_flnumbers(test, test_new, n=fav_fnls)182183 if CAT_DUMMIES:184 train_new = category_dummies(train, train_new, train=True)185 test_new = category_dummies(test, test_new)186 train_new = train_new.drop(columns="HEALTH AND BEAUTY AIDS", axis=1)187188 # onehot encode days189 if DAY_DUMMIES:190 train_new = day_dummies(train, train_new, train=True)191 test_new = day_dummies(test, test_new)192193 if DAY_SIN_COS:194 train_new = days_sin_cos(train_new)195 test_new = days_sin_cos(test_new)196 print()197 # encode department description198 if DPT_ENCODE:199 department_description_le = preprocessing.LabelEncoder()200 department_description_le.fit(pd.concat([train['DepartmentDescription'], test["DepartmentDescription"]]))201 department_description_mapping = dict(zip(department_description_le.classes_,202 department_description_le.transform(department_description_le.classes_)))203 department_description_mapping_inv = dict(204 zip(department_description_le.transform(department_description_le.classes_),205 department_description_le.classes_))206 train["DepartmentDescription"] = train["DepartmentDescription"].map(department_description_mapping)207208 if export:209 chonda = ""210 if DAY_SIN_COS:211 chonda = chonda + "_sincos"212 if CAT_DUMMIES:213 chonda = chonda + "_catdum"214 if DAY_DUMMIES:215 chonda = chonda + "_daydum"216 if FAVOURITE_FINELINES:217 chonda = chonda + "_" + str(fav_fnls) + "favFL"218 train_new.to_csv(PATH + "/eda final/" + "train_new_temp" + chonda+".csv", index=False)219 # train.to_csv(PATH + "/temp/" + "train_temp.csv", index=False)220 # test.to_csv(PATH + "/temp/" + "test_temp.csv", index=False)221 test_new.to_csv(PATH + "/eda final/" + "test_new_temp" + chonda + ".csv", index=False)222 ...

Full Screen

Full Screen

utils.py

Source:utils.py Github

copy

Full Screen

1"""2Helper functions for categorical encodings3"""4from pandas.api.types import is_string_dtype, is_numeric_dtype5from sklearn.preprocessing import OneHotEncoder6from sklearn.feature_extraction import FeatureHasher7from sklearn.model_selection import KFold8import pandas as pd9from sklearn.ensemble import RandomForestClassifier10from sklearn.metrics import roc_auc_score11def kfold_target_encoder(train, test, cols_encode, target, folds=10):12 """13 Mean regularized target encoding based on kfold14 """15 train_new = train.copy()16 test_new = test.copy()17 kf = KFold(n_splits=folds, random_state=1)18 for col in cols_encode:19 global_mean = train_new[target].mean()20 for train_index, test_index in kf.split(train):21 mean_target = train_new.iloc[train_index].groupby(col)[target].mean()22 train_new.loc[test_index, col + "_mean_enc"] = train_new.loc[test_index, col].map(mean_target)23 train_new[col + "_mean_enc"].fillna(global_mean, inplace=True)24 # making test encoding using full training data25 col_mean = train_new.groupby(col)[target].mean()26 test_new[col + "_mean_enc"] = test_new[col].map(col_mean)27 test_new[col + "_mean_enc"].fillna(global_mean, inplace=True)28 29 # filtering only mean enc cols30 train_new = train_new.filter(like="mean_enc", axis=1)31 test_new = test_new.filter(like="mean_enc", axis=1)32 return train_new, test_new33 34def catboost_target_encoder(train, test, cols_encode, target):35 """36 Encoding based on ordering principle37 """38 train_new = train.copy()39 test_new = test.copy()40 for column in cols_encode:41 global_mean = train[target].mean()42 cumulative_sum = train.groupby(column)[target].cumsum() - train[target]43 cumulative_count = train.groupby(column).cumcount()44 train_new[column + "_cat_mean_enc"] = cumulative_sum/cumulative_count45 train_new[column + "_cat_mean_enc"].fillna(global_mean, inplace=True)46 # making test encoding using full training data47 col_mean = train_new.groupby(column).mean()[column + "_cat_mean_enc"] #48 test_new[column + "_cat_mean_enc"] = test[column].map(col_mean)49 test_new[column + "_cat_mean_enc"].fillna(global_mean, inplace=True)50 # filtering only mean enc cols51 train_new = train_new.filter(like="cat_mean_enc", axis=1)52 test_new = test_new.filter(like="cat_mean_enc", axis=1)53 return train_new, test_new54def one_hot_encoder(train, test, cols_encode, target=None):55 """ one hot encoding"""56 ohc_enc = OneHotEncoder(handle_unknown='ignore')57 ohc_enc.fit(train[cols_encode])58 train_ohc = ohc_enc.transform(train[cols_encode])59 test_ohc = ohc_enc.transform(test[cols_encode])60 return train_ohc, test_ohc61 62def label_encoder(train, test, cols_encode=None, target=None):63 """64 Code borrowed from fast.ai and is tweaked a little.65 Convert columns in a training and test dataframe into numeric labels 66 """67 train_new = train.drop(target, axis=1).copy()68 test_new = test.drop(target, axis=1).copy()69 70 for n,c in train_new.items():71 if is_string_dtype(c) or n in cols_encode : train_new[n] = c.astype('category').cat.as_ordered()72 73 if test_new is not None:74 for n,c in test_new.items():75 if (n in train_new.columns) and (train_new[n].dtype.name=='category'):76 test_new[n] = pd.Categorical(c, categories=train_new[n].cat.categories, ordered=True)77 78 cols = list(train_new.columns[train_new.dtypes == 'category'])79 for c in cols:80 train_new[c] = train_new[c].astype('category').cat.codes81 if test_new is not None: test_new[c] = test_new[c].astype('category').cat.codes82 return train_new, test_new83def hash_encoder(train, test, cols_encode, target=None, n_features=10):84 """hash encoder"""85 h = FeatureHasher(n_features=n_features, input_type="string")86 for col_encode in cols_encode:87 h.fit(train[col_encode])88 train_hash = h.transform(train[col_encode])89 test_hash = h.transform(test[col_encode])90 return train_hash, test_hash91def fitmodel_and_auc_score(encoder, train, test, cols_encode, target, **kwargs):92 """93 Fits and returns scores of a random forest model. Uses ROCAUC as scoring metric94 """95 model = RandomForestClassifier(n_estimators=500,96 n_jobs=-1, 97 class_weight="balanced",98 max_depth=10)99 if encoder:100 train_encoder, test_encoder = encoder(train, test, cols_encode=cols_encode, target=target)101 else:102 train_encoder, test_encoder = train.drop(target, axis=1), test.drop(target, axis=1)103 model.fit(train_encoder, train[target])104 train_score = roc_auc_score(train[target], model.predict(train_encoder))105 valid_score = roc_auc_score(test[target], model.predict(test_encoder))...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pytest-django automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful