How to use test_complete method in Testify

Best Python code snippet using Testify_python

data.py

Source:data.py Github

copy

Full Screen

1import numpy as np2import pandas as pd3from scipy.io.arff import loadarff4# functions to prep dataset5import sklearn.datasets as skdata6from sklearn.datasets import fetch_openml7from sklearn.model_selection import train_test_split8from sklearn.experimental import enable_iterative_imputer9from sklearn.impute import SimpleImputer, IterativeImputer10import miceforest as mf11# import tensorflow_datasets12import os13# define path locations relative to this file14dir_path = os.path.dirname(os.path.realpath(__file__))15thoracic_path = os.path.join(dir_path, "ThoracicSurgery.arff")16abalone_path = os.path.join(dir_path, "abalone.data")17bank_path = os.path.join(dir_path, "bank-additional/bank-additional.csv")18anneal_path_train = os.path.join(dir_path, "anneal.data")19anneal_path_test = os.path.join(dir_path, "anneal.test")20# convenience imputation functions21def simple(train, valid, test, dtypes=None):22 if dtypes is None:23 imp = SimpleImputer(missing_values=np.nan, strategy='mean')24 imp.fit(train)25 train = imp.transform(train)26 if valid is not None:27 valid = imp.transform(valid)28 if test is not None: 29 test = imp.transform(test)30 if dtypes is not None:31 cont = np.array(dtypes) == 032 cat = np.array(dtypes) == 133 imp1 = SimpleImputer(missing_values=np.nan, strategy='mean')34 imp2 = SimpleImputer(missing_values=np.nan, strategy='most_frequent')35 imp1.fit(train[:, cont])36 imp2.fit(train[:, cat])37 train[:, cont] = imp1.transform(train[:, cont])38 train[:, cat] = imp2.transform(train[:, cat])39 if valid is not None:40 valid[:, cont] = imp1.transform(valid[:, cont])41 valid[:, cat] = imp2.transform(valid[:, cat])42 if test is not None: 43 test[:, cont] = imp1.transform(test[:, cont])44 test[:, cat] = imp2.transform(test[:, cat])45 return train, valid, test46def iterative(train, rng_key, dtypes=None, valid=None, test=None):47 imp = IterativeImputer(max_iter=10, random_state=rng_key)48 imp.fit(train)49 train = imp.transform(train)50 if valid is not None:51 valid = imp.transform(valid)52 if test is not None: 53 test = imp.transform(test)54 return train, valid, test55def miceforest(train, rng_key, dtypes=None, valid=None, test=None):56 colnames = [str(i) for i in range(train.shape[1])]57 df = pd.DataFrame(train, columns=colnames)58 kernel = mf.MultipleImputedKernel(59 df,60 datasets=20,61 save_all_iterations=True,62 random_state=10,63 mean_match_candidates=064 )65 kernel.mice(3)66 train = kernel.complete_data(0).values67 if valid is not None:68 valid_imp = kernel.impute_new_data(69 new_data=pd.DataFrame(valid, columns=colnames))70 valid = valid_imp.complete_data(0).values71 if test is not None:72 test_imp = kernel.impute_new_data(73 new_data=pd.DataFrame(test, columns=colnames))74 test = test_imp.complete_data(0).values75 return train, valid, test76# dataset generating functions77def spiral(78 N,79 missing=None,80 imputation=None, # one of none, simple, iterative, miceforest81 train_complete=False,82 test_complete=True,83 split=0.33,84 rng_key=0,85 p=0.5,86 cols_miss=187 ):88 rng = np.random.default_rng(rng_key)89 theta = np.sqrt(rng.uniform(0,1,N))*2*np.pi # np.linspace(0,2*pi,100)90 r_a = 2*theta + np.pi91 data_a = np.array([np.cos(theta)*r_a, np.sin(theta)*r_a]).T92 x_a = data_a + rng.standard_normal((N,2))93 r_b = -2*theta - np.pi94 data_b = np.array([np.cos(theta)*r_b, np.sin(theta)*r_b]).T95 x_b = data_b + rng.standard_normal((N,2))96 res_a = np.append(x_a, np.zeros((N,1)), axis=1)97 res_b = np.append(x_b, np.ones((N,1)), axis=1)98 res = np.append(res_a, res_b, axis=0)99 rng.shuffle(res)100 X_ = res[:, :2]101 y = res[:, 2]102 103 # create a noise column x3 and x4 transformation using x1, x2104 x3 = rng.standard_normal((N*2,1)) * 5105 x4 = (y).reshape((-1,1)) + rng.uniform(0,1,(N*2, 1)) # y with noise - should be highly informative...106 107 X_ = np.hstack([X_, x3, x4])108 109 key = rng.integers(9999)110 if missing is None:111 train_complete = True112 test_complete = True113 if train_complete and test_complete:114 X = X_115 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=key)116 key = rng.integers(9999)117 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.33, random_state=key)118 elif train_complete and not test_complete: # TRAIN COMPLETE IS TRUE AND TEST COMPLETE IS FALSE119 X_train, X, y_train, y_test = train_test_split(X_, y, test_size=0.33, random_state=key)120 key = rng.integers(9999)121 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.33, random_state=key)122 123 elif not train_complete and test_complete:124 X, X_test, y_train, y_test = train_test_split(X_, y, test_size=0.33, random_state=key)125 126 elif not train_complete and not test_complete:127 X = X_128 # create missingness mask129 cols = X.shape[1]130 if missing == "MAR":131 cols_miss = np.minimum(cols - 1, cols_miss) # clip cols missing 132 q = rng.uniform(0.3,0.7,(cols-1,))133 corrections = []134 for col in range(cols-1):135 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x136 corrections.append(correction)137 corrections = np.concatenate(corrections)138 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols - 1))139 print(corrections.shape, X.shape)140 rand_arr = rng.uniform(0,1,(X.shape[0], cols - 1)) * corrections141 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)142 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is shifted to the left, therefore MAR143 if missing == "MNAR":144 cols_miss = np.minimum(cols, cols_miss) # clip cols missing 145 q = rng.uniform(0.3,0.7,(cols,))146 corrections = []147 for col in range(cols):148 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x149 corrections.append(correction)150 corrections = np.concatenate(corrections)151 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols))152 rand_arr = rng.uniform(0,1,(X.shape[0], cols)) * corrections153 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)154 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is not shifted to the left, therefore MNAR155 if type(missing) == float or missing == "MCAR":156 cols_miss = np.minimum(cols, cols_miss) # clip cols missing157 if type(missing) == float: p = missing158 rand_arr = rng.uniform(0,1,(X.shape[0], cols_miss))159 nan_arr = np.where(rand_arr < p, np.nan, 1.0)160 X[:, -cols_miss:] *= nan_arr161 if type(missing) == tuple and missing[1] == "MNAR":162 correction1 = X[:,-1:] < np.quantile(X[:,-1:], 0.2, keepdims=True) # dependency on x4 MNAR163 correction2 = X[:,:1] < np.quantile(X[:,:1], 0.2, keepdims=True) # dependency on x1 MAR164 correction3 = X[:,1:2] < np.quantile(X[:,1:2], 0.5, keepdims=True) # dependency on x2 MAR165 correction = (correction1 | correction2) | correction3166 correction = np.where(correction, 0.0, 1.0).reshape((-1,1)) # dependency on x4167 rand_arr = rng.uniform(0,1,(X.shape[0], 1)) * correction168 # missingness is dependent on unobserved missing values169 nan_arr = np.where(rand_arr > (1 - missing[0]), np.nan, 1.0)170 X[:, -1:] *= nan_arr171 172 # generate train, validate, test datasets and impute training 173 key = rng.integers(9999)174 if train_complete and test_complete:175 pass176 elif train_complete and not test_complete:177 X_test = X178 179 elif not train_complete and test_complete:180 X_train = X181 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.33, random_state=key)182 183 elif not train_complete and not test_complete:184 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=key)185 key = rng.integers(9999)186 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.33, random_state=key)187 # missingness diagnostics188 # diagnostics = {"X_train":{}, "X_valid":{}, "X_test":{}}189 # diagnostics["X_train"]["cols"] = np.isnan(X_train).sum(0) / X_train.shape[0]190 # diagnostics["X_train"]["rows"] = np.any(np.isnan(X_train), axis=1).sum() / X_train.shape[0]191 # diagnostics["X_valid"]["cols"] = np.isnan(X_valid).sum(0) / X_valid.shape[0]192 # diagnostics["X_valid"]["rows"] = np.any(np.isnan(X_valid), axis=1).sum() / X_valid.shape[0]193 # diagnostics["X_test"]["cols"] = np.isnan(X_test).sum(0) / X_test.shape[0]194 # diagnostics["X_test"]["rows"] = np.any(np.isnan(X_test), axis=1).sum() / X_test.shape[0]195 # print(diagnostics)196 # perform desired imputation strategy197 if imputation == "simple" and missing is not None:198 X_train, X_valid, X_test = simple(199 X_train,200 dtypes=None,201 valid=X_valid,202 test=X_test)203 204 key = rng.integers(9999)205 if imputation == "iterative" and missing is not None:206 X_train, X_valid, X_test = iterative(207 X_train,208 key,209 dtypes=None,210 valid=X_valid,211 test=X_test)212 213 key = rng.integers(9999)214 if imputation == "miceforest" and missing is not None:215 if test_complete:216 test_input = None217 else:218 test_input = X_test219 X_train, X_valid, test_input = miceforest(220 X_train,221 int(key),222 dtypes=None,223 valid=X_valid,224 test=test_input)225 if test_complete:226 X_test = X_test227 else:228 X_test = test_input229 return X_train, X_valid, X_test, y_train, y_valid, y_test, (x_a, x_b), 2230def thoracic(231 missing="MAR", 232 imputation=None, # one of none, simple, iterative, miceforest233 train_complete=False,234 test_complete=True,235 split=0.33,236 rng_key=0,237 p=0.5,238 cols_miss=1239 ):240 # import data241 rng = np.random.default_rng(rng_key)242 data, meta = loadarff(thoracic_path)243 d = pd.DataFrame(data)244 # convert categorical variables to integer encoding245 cols = []246 for name in meta.names():247 m = meta[name]248 if m[0] == 'nominal':249 cols.append(1)250 l = list(m[1])251 d[name] = [l.index(x.decode('UTF-8')) for x in d[name].values]252 else:253 cols.append(0)254 cols = cols[:-1]255 X_ = d.values[:, :-1]256 y = d.values[:, -1]257 258 if missing is None:259 train_complete = True260 test_complete = True261 if train_complete and test_complete:262 X = X_263 key = rng.integers(9999)264 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)265 key = rng.integers(9999)266 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)267 elif train_complete and not test_complete: # TRAIN COMPLETE IS TRUE AND TEST COMPLETE IS FALSE268 key = rng.integers(9999)269 X_train, X, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)270 key = rng.integers(9999)271 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)272 273 elif not train_complete and test_complete:274 key = rng.integers(9999)275 X, X_test, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)276 277 elif not train_complete and not test_complete:278 X = X_279 cols = X.shape[1]280 if missing == "MCAR":281 cols_miss = np.minimum(cols, cols_miss) # clip cols missing 282 rand_arr = rng.uniform(0, 1, (X.shape[0], cols_miss))283 nan_arr = np.where(rand_arr < p, np.nan, 1.0)284 X[:, -cols_miss:] *= nan_arr285 if missing == "MAR":286 cols_miss = np.minimum(cols - 1, cols_miss) # clip cols missing 287 q = rng.uniform(0.3,0.7,(cols-1,))288 corrections = []289 for col in range(cols-1):290 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x291 corrections.append(correction)292 corrections = np.concatenate(corrections)293 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols - 1))294 print(corrections.shape, X.shape)295 rand_arr = rng.uniform(0,1,(X.shape[0], cols - 1)) * corrections296 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)297 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is shifted to the left, therefore MAR298 if missing == "MNAR":299 cols_miss = np.minimum(cols, cols_miss) # clip cols missing 300 q = rng.uniform(0.3,0.7,(cols,))301 corrections = []302 for col in range(cols):303 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x304 corrections.append(correction)305 corrections = np.concatenate(corrections)306 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols))307 rand_arr = rng.uniform(0,1,(X.shape[0], cols)) * corrections308 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)309 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is not shifted to the left, therefore MNAR310 # generate train, validate, test datasets and impute training 311 if train_complete and test_complete:312 pass313 elif train_complete and not test_complete:314 X_test = X315 316 elif not train_complete and test_complete:317 X_train = X318 key = rng.integers(9999)319 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)320 321 elif not train_complete and not test_complete:322 key = rng.integers(9999)323 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)324 key = rng.integers(9999)325 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)326 # missingness diagnostics327 # diagnostics = {"X_train":{}, "X_valid":{}, "X_test":{}}328 # diagnostics["X_train"]["cols"] = np.isnan(X_train).sum(0) / X_train.shape[0]329 # diagnostics["X_train"]["rows"] = np.any(np.isnan(X_train), axis=1).sum() / X_train.shape[0]330 # diagnostics["X_valid"]["cols"] = np.isnan(X_valid).sum(0) / X_valid.shape[0]331 # diagnostics["X_valid"]["rows"] = np.any(np.isnan(X_valid), axis=1).sum() / X_valid.shape[0]332 # diagnostics["X_test"]["cols"] = np.isnan(X_test).sum(0) / X_test.shape[0]333 # diagnostics["X_test"]["rows"] = np.any(np.isnan(X_test), axis=1).sum() / X_test.shape[0]334 # print(diagnostics)335 # perform desired imputation strategy336 if imputation == "simple" and missing is not None:337 X_train, X_valid, X_test = simple(338 X_train,339 dtypes=None,340 valid=X_valid,341 test=X_test)342 343 key = rng.integers(9999)344 if imputation == "iterative" and missing is not None:345 X_train, X_valid, X_test = iterative(346 X_train,347 key,348 dtypes=None,349 valid=X_valid,350 test=X_test)351 352 key = rng.integers(9999)353 if imputation == "miceforest" and missing is not None:354 if test_complete:355 test_input = None356 else:357 test_input = X_test358 X_train, X_valid, test_input = miceforest(359 X_train,360 int(key),361 dtypes=None,362 valid=X_valid,363 test=test_input)364 if test_complete:365 X_test = X_test366 else:367 X_test = test_input368 return X_train, X_valid, X_test, y_train, y_valid, y_test, 2369def abalone(370 missing="MAR", 371 imputation=None, # one of none, simple, iterative, miceforest372 train_complete=False,373 test_complete=True,374 split=0.33,375 rng_key=0,376 p=0.5,377 cols_miss=1378 ):379 rng = np.random.default_rng(rng_key)380 data = pd.read_csv(abalone_path, header=None)381 cat = list(data[0].unique())382 data[0] = [cat.index(i) for i in data[0].values]383 X_ = data.values[:, :-1]384 y = data.values[:, -1]385 unique = list(np.unique(y))386 y = np.array([unique.index(v) for v in y])387 coltypes = [1] + [0 for i in range(X_.shape[1] - 1)]388 if missing is None:389 train_complete = True390 test_complete = True391 if train_complete and test_complete:392 X = X_393 key = rng.integers(9999)394 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)395 key = rng.integers(9999)396 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)397 elif train_complete and not test_complete: # TRAIN COMPLETE IS TRUE AND TEST COMPLETE IS FALSE398 key = rng.integers(9999)399 X_train, X, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)400 key = rng.integers(9999)401 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)402 403 elif not train_complete and test_complete:404 key = rng.integers(9999)405 X, X_test, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)406 407 elif not train_complete and not test_complete:408 X = X_409 cols = X.shape[1]410 if missing == "MCAR":411 cols_miss = np.minimum(cols, cols_miss) # clip cols missing 412 rand_arr = rng.uniform(0, 1, (X.shape[0], cols_miss))413 nan_arr = np.where(rand_arr < p, np.nan, 1.0)414 X[:, -cols_miss:] *= nan_arr415 if missing == "MAR":416 cols_miss = np.minimum(cols - 1, cols_miss) # clip cols missing 417 q = rng.uniform(0.3,0.7,(cols-1,))418 corrections = []419 for col in range(cols-1):420 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x421 corrections.append(correction)422 corrections = np.concatenate(corrections)423 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols - 1))424 print(corrections.shape, X.shape)425 rand_arr = rng.uniform(0,1,(X.shape[0], cols - 1)) * corrections426 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)427 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is shifted to the left, therefore MAR428 if missing == "MNAR":429 cols_miss = np.minimum(cols, cols_miss) # clip cols missing 430 q = rng.uniform(0.3,0.7,(cols,))431 corrections = []432 for col in range(cols):433 correction = X[:,col] > np.quantile(X[:,col], q[col], keepdims=True) # dependency on each x434 corrections.append(correction)435 corrections = np.concatenate(corrections)436 corrections = np.where(corrections, 0.0, 1.0).reshape((-1,cols))437 rand_arr = rng.uniform(0,1,(X.shape[0], cols)) * corrections438 nan_arr = np.where(rand_arr > (1-p), np.nan, 1.0)439 X[:, -cols_miss:] *= nan_arr[:, -cols_miss:] # dependency is not shifted to the left, therefore MNAR440 # generate train, validate, test datasets and impute training 441 if train_complete and test_complete:442 pass443 elif train_complete and not test_complete:444 X_test = X445 446 elif not train_complete and test_complete:447 X_train = X448 key = rng.integers(9999)449 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)450 451 elif not train_complete and not test_complete:452 key = rng.integers(9999)453 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)454 key = rng.integers(9999)455 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)456 # missingness diagnostics457 # diagnostics = {"X_train":{}, "X_valid":{}, "X_test":{}}458 # diagnostics["X_train"]["cols"] = np.isnan(X_train).sum(0) / X_train.shape[0]459 # diagnostics["X_train"]["rows"] = np.any(np.isnan(X_train), axis=1).sum() / X_train.shape[0]460 # diagnostics["X_valid"]["cols"] = np.isnan(X_valid).sum(0) / X_valid.shape[0]461 # diagnostics["X_valid"]["rows"] = np.any(np.isnan(X_valid), axis=1).sum() / X_valid.shape[0]462 # diagnostics["X_test"]["cols"] = np.isnan(X_test).sum(0) / X_test.shape[0]463 # diagnostics["X_test"]["rows"] = np.any(np.isnan(X_test), axis=1).sum() / X_test.shape[0]464 # print(diagnostics)465 # perform desired imputation strategy466 if imputation == "simple" and missing is not None:467 X_train, X_valid, X_test = simple(468 X_train,469 dtypes=None,470 valid=X_valid,471 test=X_test)472 473 key = rng.integers(9999)474 if imputation == "iterative" and missing is not None:475 X_train, X_valid, X_test = iterative(476 X_train,477 key,478 dtypes=None,479 valid=X_valid,480 test=X_test)481 482 key = rng.integers(9999)483 if imputation == "miceforest" and missing is not None:484 if test_complete:485 test_input = None486 else:487 test_input = X_test488 X_train, X_valid, test_input = miceforest(489 X_train,490 int(key),491 dtypes=None,492 valid=X_valid,493 test=test_input)494 if test_complete:495 X_test = X_test496 else:497 X_test = test_input498 return X_train, X_valid, X_test, y_train, y_valid, y_test, 1499def banking(imputation=None, split=0.33, rng_key=0):500 rng = np.random.default_rng(rng_key)501 data = pd.read_csv(bank_path, sep=";")502 cont = ['age', 'duration', 'campaign', 'pdays', 'previous', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']503 cat = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome', 'y']504 def lab_2_num(array):505 unique_list = [l for l in list(np.unique(array)) if l != "unknown"]506 return np.array([unique_list.index(l) if l != "unknown" else np.nan for l in array])507 508 for c in cat:509 data[c] = lab_2_num(data[c].values)510 511 data = data[cont + cat]512 coltype = [1 if i in cat else 0 for i in cont+cat]513 coltype = coltype[:-1]514 X = data.values[:, :-1]515 y = data.values[:, -1]516 # split data517 518 key = rng.integers(9999)519 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)520 key = rng.integers(9999)521 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)522 # diagnostics = {"X_train":{}, "X_valid":{}, "X_test":{}}523 # diagnostics["X_train"]["cols"] = np.isnan(X_train).sum(0) / X_train.shape[0]524 # diagnostics["X_train"]["rows"] = np.any(np.isnan(X_train), axis=1).sum() / X_train.shape[0]525 # diagnostics["X_valid"]["cols"] = np.isnan(X_valid).sum(0) / X_valid.shape[0]526 # diagnostics["X_valid"]["rows"] = np.any(np.isnan(X_valid), axis=1).sum() / X_valid.shape[0]527 # diagnostics["X_test"]["cols"] = np.isnan(X_test).sum(0) / X_test.shape[0]528 # diagnostics["X_test"]["rows"] = np.any(np.isnan(X_test), axis=1).sum() / X_test.shape[0]529 # print(diagnostics)530 # perform desired imputation strategy531 rng = np.random.default_rng(rng_key)532 if imputation == "simple":533 X_train, _, X_test = simple(534 X_train,535 dtypes=coltype,536 valid=None,537 test=X_test)538 539 key = rng.integers(9999)540 if imputation == "iterative":541 X_train, _, X_test = iterative(542 X_train,543 int(key),544 valid=None,545 test=X_test)546 547 key = rng.integers(9999)548 if imputation == "miceforest":549 X_train, _, X_test = miceforest(550 X_train,551 int(key),552 valid=None,553 test=X_test)554 return X_train, X_valid, X_test, y_train, y_valid, y_test, 2555def anneal(imputation=None, split=0.33, rng_key=0):556 cont = [3,4,8,32,33,34]557 def prep_data(train, test):558 cols = []559 for i in range(39):560 if i not in cont:561 d = train.values[:, i].astype(str)562 t = test.values[:, i].astype(str)563 vals = np.unique(np.concatenate([d[d != 'nan'], t[t != 'nan']]))564 vals = list(vals)565 dcoded = [np.nan if j == 'nan' else vals.index(j) for j in d]566 tcoded = [np.nan if j == 'nan' else vals.index(j) for j in t]567 if np.all(np.isnan(dcoded)):568 pass569 else:570 cols.append(i)571 train[i] = dcoded572 test[i] = tcoded573 else:574 d = train.values[:, i].astype(np.float64)575 t = test.values[:, i].astype(np.float64)576 train[i] = d577 test[i] = t578 if np.all(np.isnan(d)):579 pass580 else:581 cols.append(i)582 train[i] = dcoded583 test[i] = tcoded584 return train[cols].values, test[cols].values585 training = pd.read_csv(anneal_path_train, header=None, na_values=["?"])586 testing = pd.read_csv(anneal_path_test, header=None, na_values=["?"])587 training, testing = prep_data(training, testing)588 X_train, y_train = training[:,:-1], training[:,-1]589 X_test, y_test = testing[:,:-1], testing[:,-1]590 # perform desired imputation strategy591 rng = np.random.default_rng(rng_key)592 if imputation == "simple":593 X_train, _, X_test = simple(594 X_train,595 dtypes=[0 if i in cont else 1 for i in range(X_train.shape[1])],596 valid=None,597 test=X_test)598 599 key = rng.integers(9999)600 if imputation == "iterative":601 X_train, _, X_test = iterative(602 X_train,603 int(key),604 valid=None,605 test=X_test)606 607 key = rng.integers(9999)608 if imputation == "miceforest":609 X_train, _, X_test = miceforest(610 X_train,611 int(key),612 valid=None,613 test=X_test)614 # can't presplit before imputation as data is too sparse few 615 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=rng_key+1)616 return X_train, X_valid, X_test, y_train, y_valid, y_test, 6617def mnist(618 missing="MCAR", 619 imputation=None, # one of none, simple, iterative, miceforest620 train_complete=False,621 test_complete=True,622 split=0.33,623 rng_key=0,624 p=0.5,625 ):626 rng = np.random.default_rng(rng_key)627 # X_, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)628 X_, y = skdata.load_digits(return_X_y=True)629 if missing is None:630 train_complete = True631 test_complete = True632 if train_complete and test_complete:633 X = X_634 key = rng.integers(9999)635 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)636 key = rng.integers(9999)637 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)638 elif train_complete and not test_complete: # TRAIN COMPLETE IS TRUE AND TEST COMPLETE IS FALSE639 key = rng.integers(9999)640 X_train, X, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)641 key = rng.integers(9999)642 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)643 644 elif not train_complete and test_complete:645 key = rng.integers(9999)646 X, X_test, y_train, y_test = train_test_split(X_, y, test_size=split, random_state=key)647 648 elif not train_complete and not test_complete:649 X = X_650 if missing == "MCAR":651 rand_arr = rng.uniform(0, 1, X.shape)652 nan_arr = np.where(rand_arr < p, np.nan, 1.0)653 X *= nan_arr654 elif missing == "MAR":655 # delete a square based on location. Not 'technically' MAR but less 'random' than MCAR implementation656 square = np.ones((1, 8, 8))657 for xi in range(8):658 for yi in range(8):659 if (0 < xi <= 4) and (0 < yi <= 4):660 square[:, xi, yi] = np.nan661 X *= square.reshape((1, 64))662 elif missing is not None:663 print("not implemented")664 # generate train, validate, test datasets and impute training 665 if train_complete and test_complete:666 pass667 elif train_complete and not test_complete:668 X_test = X669 670 elif not train_complete and test_complete:671 X_train = X672 key = rng.integers(9999)673 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)674 675 elif not train_complete and not test_complete:676 key = rng.integers(9999)677 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=key)678 key = rng.integers(9999)679 X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=split, random_state=key)680 # missingness diagnostics681 # diagnostics = {"X_train":{}, "X_valid":{}, "X_test":{}}682 # diagnostics["X_train"]["cols"] = np.isnan(X_train).sum(0) / X_train.shape[0]683 # diagnostics["X_train"]["rows"] = np.any(np.isnan(X_train), axis=1).sum() / X_train.shape[0]684 # diagnostics["X_valid"]["cols"] = np.isnan(X_valid).sum(0) / X_valid.shape[0]685 # diagnostics["X_valid"]["rows"] = np.any(np.isnan(X_valid), axis=1).sum() / X_valid.shape[0]686 # diagnostics["X_test"]["cols"] = np.isnan(X_test).sum(0) / X_test.shape[0]687 # diagnostics["X_test"]["rows"] = np.any(np.isnan(X_test), axis=1).sum() / X_test.shape[0]688 # print(diagnostics)689 # perform desired imputation strategy690 if imputation == "simple" and missing is not None:691 X_train, X_valid, X_test = simple(692 X_train,693 dtypes=None,694 valid=X_valid,695 test=X_test)696 697 key = rng.integers(9999)698 if imputation == "iterative" and missing is not None:699 X_train, X_valid, X_test = iterative(700 X_train,701 key,702 dtypes=None,703 valid=X_valid,704 test=X_test)705 706 key = rng.integers(9999)707 if imputation == "miceforest" and missing is not None:708 if test_complete:709 test_input = None710 else:711 test_input = X_test712 X_train, X_valid, test_input = miceforest(713 X_train,714 int(key),715 dtypes=None,716 valid=X_valid,717 test=test_input)718 if test_complete:719 X_test = X_test720 else:721 X_test = test_input...

Full Screen

Full Screen

models.py

Source:models.py Github

copy

Full Screen

1from django.db import models2from django.contrib.auth.models import User, UserManager3import xlrd4from django.utils.timezone import now5from transliterate import translit, get_available_language_codes6# Create your models here.7class Flow(models.Model):8 flow_index = models.IntegerField(unique=True, default=0)9 def __str__(self):10 return "Поток: " + str(self.flow_index)11class Group(models.Model):12 group_index = models.IntegerField(unique=True)13 group_from_flow = models.ForeignKey('Flow', on_delete=models.CASCADE)14 def __str__(self):15 return "Группа: " + str(self.group_index) + " поток: " + str(self.group_from_flow)16class Profile(models.Model):17 WHO_IS_LIST = (18 ('T', 'Teacher'),19 ('S', 'Student'),20 ('A', 'Administrator'),21 )22 profile_user = models.OneToOneField(User, related_name='profile')23 profile_group = models.ForeignKey(Group, blank=True, null=True, on_delete=models.CASCADE)24 profile_permissions = models.CharField(max_length=1, choices=WHO_IS_LIST, default='S')25 def __str__(self):26 return "Права: " + str(self.profile_permissions) + " Фамилия: " + str(self.profile_user.last_name) + " Имя: " + str(self.profile_user.first_name)27class Subject(models.Model):28 subject_name = models.CharField(max_length=15)29 subject_tutor = models.ForeignKey(Profile, blank=True, null=True, on_delete=models.SET_NULL, related_name='subjects_tutors')30 subject_flow = models.ForeignKey(Flow, blank=True, null=True, on_delete=models.SET_NULL, related_name='subjects_flow')31 translit = models.CharField(max_length=15, blank=True, null=True)32 def transliting(self):33 self.translit = translit(self.subject_name, 'ru', reversed=True)34class Question(models.Model):35 TYPE_QUESTION = (36 ('V', 'Variable'),37 ('S', 'Single'),38 ('I', 'Input'),39 )40 question_text = models.TextField()41 question_for_test = models.ForeignKey('Test', on_delete=models.CASCADE, related_name='test_questions')42 question_type = models.CharField(max_length=1, choices=TYPE_QUESTION, default='S')43class Answer(models.Model):44 answer_text = models.CharField(max_length=512)45 answer_is_correct = models.BooleanField(blank=True, default=False)46 answer_to_question = models.ForeignKey('Question', on_delete=models.CASCADE, related_name='answers')47def test_directory_path(instance, filename):48 # file will be uploaded to MEDIA_ROOT/user_<id>/<filename>49 return '{0}/{1}'.format(instance.test_subject.translit, filename)50class Test(models.Model):51 test_name = models.CharField(max_length=20)52 test_subject = models.ForeignKey('Subject', on_delete=models.CASCADE, related_name='tests')53 test_num_select = models.IntegerField(blank=True, null=True)54 test_file = models.FileField(upload_to=test_directory_path, blank=True, null=True)55 test_is_active = models.BooleanField(blank=True, default=False)56 test_2 = models.IntegerField(blank=True, null=True)57 test_3 = models.IntegerField(blank=True, null=True)58 test_4 = models.IntegerField(blank=True, null=True)59 test_time = models.IntegerField(blank=True, null=True)60 def check_file(self):61 if self.test_file:62 return True63 else:64 return False65 def get_absolute_url(self):66 print('.{0}'.format(self.test_file.url))67 return '.{0}'.format(self.test_file.url)68 def get_url_to_del(self):69 print('{0}'.format(self.test_file.url))70 return '{0}'.format(self.test_file.url)71 def rang_set_null(self):72 self.test_2 = 073 self.test_3 = 074 self.test_4 = 075 self.test_num_select = 076 self.test_time = 077 def pre_load(self):78 rb = xlrd.open_workbook(self.get_absolute_url(), formatting_info=True)79 sheet = rb.sheet_by_index(0)80 buf_pk = 081 for rownum in range(sheet.nrows):82 if sheet.cell(rownum, 0).value:83 question = Question(question_text=sheet.cell(rownum, 0).value, question_for_test=self, question_type=sheet.cell(rownum, 1).value)84 question.save()85 buf_pk = question.pk86 continue87 else:88 status = sheet.cell(rownum, 2).value89 if status == '+':90 status = True91 else:92 status = False93 value = sheet.cell(rownum, 1).value94 if type(value) is float:95 if value == int(value):96 value = int(value)97 answer = Answer(answer_text=value, answer_is_correct=status, answer_to_question=Question.objects.get(pk=buf_pk))98 answer.save()99 def is_complete(self, user):100 test_complete = TestComplete.objects.filter(student=user)101 test_complete.get(test=self.pk)102 if test_complete:103 return True104 else:105 return False106class TestComplete(models.Model):107 test = models.ForeignKey(Test, blank=True, null=True, on_delete=models.CASCADE)108 student = models.ForeignKey(Profile, blank=True, null=True)109 rang = models.IntegerField(blank=True, null=True)110 correct_answers = models.IntegerField(blank=True, null=True)111 data = models.DateTimeField(default=now)112 num_select = models.IntegerField(blank=True, null=True)113 mark = models.IntegerField(blank=True, null=True)114 def create(self, test, user, num):115 self.student = user116 self.test = test117 self.num_select = num118 self.save()119class TestSelection(models.Model):120 test_complete = models.ForeignKey(TestComplete)121 question = models.ForeignKey(Question)122 student_answer = models.CharField(max_length=127, blank=True, null=True)123 is_correct = models.BooleanField(blank=True, default=False)124def identification(questions):125 result = 0126 for question in questions:127 if question.question.question_type == 'I':128 answer = Answer.objects.get(answer_to_question=question.question.pk)129 if str(answer.answer_text) == str(question.student_answer):130 question.is_correct = True131 question.save()132 result += 1133 elif question.question.question_type == 'S':134 answer = Answer.objects.filter(answer_to_question=question.question.pk).get(answer_is_correct=True)135 if str(answer.pk) == str(question.student_answer):136 question.is_correct = True137 question.save()138 result += 1139 else:140 buf = True141 list_of_answers = question.student_answer.split(',')142 answers = Answer.objects.filter(answer_to_question=question.question.pk).filter(answer_is_correct=True)143 for answer in answers:144 if str(answer.pk) not in list_of_answers:145 buf = False146 break147 else:148 list_of_answers.remove(str(answer.pk))149 if len(list_of_answers) > 0:150 buf = False151 question.is_correct = buf152 question.save()153 if buf:154 result += 1155 return result156def rang(pk, questions):157 test_complete = TestComplete.objects.get(pk=pk)158 test_complete.correct_answers = identification(questions)159 test_complete.save()160 test_complete.rang = (test_complete.correct_answers / test_complete.num_select) * 100161 test_complete.save()162 test = Test.objects.get(pk=test_complete.test.pk)163 r = test_complete.rang164 if r < test.test_2:165 test_complete.mark = 2166 test_complete.save()167 elif r < test.test_3:168 test_complete.mark = 3169 test_complete.save()170 elif r < test.test_4:171 test_complete.mark = 4172 test_complete.save()173 else:174 test_complete.mark = 5...

Full Screen

Full Screen

p_impute_quad.py

Source:p_impute_quad.py Github

copy

Full Screen

1import pandas as pd2from sklearn import linear_model3from sklearn.cross_validation import cross_val_score, cross_val_predict4from sklearn.model_selection import train_test_split5from matplotlib import pyplot as plt6from sklearn.model_selection import KFold7test_complete_with_ID = pd.read_csv("../datasets/Cancer/cleaning/breast-cancer.shuf.test.complete.csv")8test_missing_quads_with_ID = pd.read_csv("../datasets/Cancer/cleaning/breast-cancer.shuf.test.missing_quad.csv")9# drop ID column and convert everything to categorical data, then map numerical data on it10ages = {'20-29': 0, '30-39': 1, '40-49': 2, '50-59': 3, '60-69': 4, '70-79': 5}11meno = {'premeno': 0, 'lt40': 1, 'ge40': 2}12size = {'0-4': 0, '5-9': 1, '10-14': 2, '15-19': 3, '20-24': 4, '25-29': 5, '30-34': 6, '35-39': 7, '40-44': 8, '45-49': 9, '50-54': 10}13inv_nodes = {'0-2': 0, '3-5': 1, '6-8': 2, '9-11': 3, '12-14': 4, '15-17': 5, '24-26':6}14caps = {'no': 0, 'yes': 1}15breast = {'left': 0, 'right': 1}16quad = {'central': 0, 'left_low': 1, 'right_low': 2, 'left_up': 3, 'right_up': 4}17rad = {'no': 0, 'yes': 1}18classes = {'no-recurrence-events': 0, "recurrence-events": 1}19## DataPreparation TEST Start20test_complete = test_complete_with_ID.drop(["ID", "Class"], axis=1)21test_complete["age"] = test_complete["age"].astype('category').map(ages)22test_complete["menopause"] = test_complete["menopause"].astype('category').map(meno)23test_complete["tumor-size"] = test_complete["tumor-size"].astype('category').map(size)24test_complete["inv-nodes"] = test_complete["inv-nodes"].astype('category').map(inv_nodes)25test_complete["node-caps"] = test_complete["node-caps"].astype('category').map(caps)26test_complete["breast"] = test_complete["breast"].astype('category').map(breast)27test_complete["breast-quad"] = test_complete["breast-quad"].astype('category').map(quad)28test_complete["irradiat"] = test_complete["irradiat"].astype('category').map(rad)29test_missing_quads = test_missing_quads_with_ID.drop(["ID", "Class", "breast-quad"], axis=1)30test_missing_quads["age"] = test_missing_quads["age"].astype('category').map(ages)31test_missing_quads["menopause"] = test_missing_quads["menopause"].astype('category').map(meno)32test_missing_quads["tumor-size"] = test_missing_quads["tumor-size"].astype('category').map(size)33test_missing_quads["inv-nodes"] = test_missing_quads["inv-nodes"].astype('category').map(inv_nodes)34test_missing_quads["node-caps"] = test_missing_quads["node-caps"].astype('category').map(caps)35test_missing_quads["breast"] = test_missing_quads["breast"].astype('category').map(breast)36test_missing_quads["irradiat"] = test_missing_quads["irradiat"].astype('category').map(rad)37# create logistic regression model with 80/20 split38y = test_complete["breast-quad"]39templ = test_complete.drop("breast-quad", axis=1)40X_train, X_test, y_train, y_test = train_test_split(templ, y, test_size=0.2)41lm = linear_model.LogisticRegression()42model = lm.fit(X_train, y_train)43predictions = lm.predict(X_test)44print ("Score:", model.score(X_test, y_test))45# plt.scatter(y_test, predictions)46# plt.xlabel("True Values")47# plt.ylabel("Predictions")48# plt.show()49# create model on complete data and use 6-fold cross-validation to test the model50lm = linear_model.LogisticRegression()51model = lm.fit(templ, y)52kf = KFold(n_splits = 5, shuffle = True)53scores = cross_val_score(model, templ, y, cv=6)54print("cross-scores: ", scores)55predictions = cross_val_predict(model, templ, y, cv=6)56# plt.scatter(y, predictions)57# plt.xlabel("True Values")58# plt.ylabel("Predictions")59# plt.show()60# now take the complete model and predict the missing values in testset...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run Testify automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful