Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use test_features method in lettuce_webdriver

Best Python code snippet using lettuce_webdriver_python

CRScope.py

Source:CRScope.py

1#!/usr/bin/python2import os3import sys4import time5import pandas as pd6import numpy as np7from multiprocessing import Process, Manager8from multiprocessing.managers import BaseManager9from sklearn.model_selection import TimeSeriesSplit, train_test_split10from imblearn.over_sampling import *11from imblearn.under_sampling import *12sys.path.append('./src')13import arg14import util15from model import *16from case import Case17from log import Logger18from data import Dataset19from docs import Docx, Xlsx20from joblib import dump, load21def load_dataset(file_name):22    dataset = Dataset(logger, file_name)23    columns = ['date', 'security', 'feature']24    dataset.drop_na(columns)25    dataset.factorize_label()26    dataset.sort_date()27    return dataset28def preprocess_data(df, flag_preprocess):29    df['crash_function_full'] = df.crash_function.str.split('(').str[0].str.split('<').str[0]30    def preprocess_crash_function(df):31        df['crash_function_namespace'], df['crash_function_class'], df['crash_function_func'] = \32        zip(*df.crash_function.apply(lambda x: util.parse_function(x)))33        return df34    split_backtrace = df.backtrace.str.split(' - ')35    full_bt = []36    for backtrace in split_backtrace:37        full_bt.append(' - '.join([bt.split('(')[0].split('<')[0] for bt in backtrace]))38    df['backtrace_full'] = full_bt39    def preprocess_backtrace(df):40        name_bt = []41        for backtrace in split_backtrace:42            name_bt.append(' - '.join([util.parse_function(bt)[2] for bt in backtrace]))43        df['backtrace_func'] = name_bt44        return df45    if flag_preprocess:46        df = preprocess_crash_function(df)47        df = preprocess_backtrace(df)48    return df49def extract_features(df_train, df_test, flag_preprocess, flag_tfidvector, flag_countvector):50    if True:51        from sklearn.preprocessing import LabelEncoder52        from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer53        54        df_train = preprocess_data(df_train, flag_preprocess)55        df_test = preprocess_data(df_test, flag_preprocess)56        train_features = pd.DataFrame()57        test_features = pd.DataFrame()58        label = LabelEncoder()59        tfidf = TfidfVectorizer(sublinear_tf=True, norm='l2', ngram_range=(1, 5), stop_words='english')60        count = CountVectorizer(ngram_range=(1, 5), stop_words='english')61        def update(new, label_encoder):62            label_encoder.classes_ = np.append(label_encoder.classes_, new)63            return len(label_encoder.classes_) - 164        def process_engine(train_features, test_features):65            train_features['engine'] = label.fit_transform(df_train['engine'])66            test_features['engine'] = df_test['engine'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])67            return train_features, test_features68        def process_arch(train_features, test_features):69            train_features['arch'] = label.fit_transform(df_train['arch'])70            test_features['arch'] = df_test['arch'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])71            return train_features, test_features72        def process_mode(train_features, test_features):73            train_features['mode'] = label.fit_transform(df_train['mode'])74            test_features['mode'] = df_test['mode'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])75            return train_features, test_features76        def process_signal(train_features, test_features):77            train_features['signal'] = label.fit_transform(df_train['signal'])78            test_features['signal'] = df_test['signal'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])79            return train_features, test_features80        def process_crash_type(train_features, test_features):81            train_features['crash_type'] = label.fit_transform(df_train['crash_type'])82            test_features['crash_type'] = df_test['crash_type'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])83            return train_features, test_features84        def process_crash_instruction(train_features, test_features):85            if flag_tfidvector:86                train_crash_instruction_tfidf = tfidf.fit_transform(df_train['crash_instruction']).toarray()87                test_crash_instruction_tfidf = tfidf.transform(df_test['crash_instruction']).toarray()88                names = tfidf.get_feature_names()89                for i in range(len(names)):90                    train_features['inst_tfid-%s' %names[i]] = [row[i] for row in train_crash_instruction_tfidf]91                    test_features['inst_tfid-%s' %names[i]] = [row[i] for row in test_crash_instruction_tfidf]92            if flag_countvector:93                train_crash_instruction_count = count.fit_transform(df_train['crash_instruction']).toarray()94                test_crash_instruction_count = count.transform(df_test['crash_instruction']).toarray()95                names = count.get_feature_names()96                for i in range(len(names)):97                    train_features['inst_count-%s' %names[i]] = [row[i] for row in train_crash_instruction_count]98                    test_features['inst_count-%s' %names[i]] = [row[i] for row in test_crash_instruction_count]99            return train_features, test_features100        def process_crash_function(train_features, test_features):101            if flag_tfidvector:102                train_crash_function_tfidf = tfidf.fit_transform(df_train['crash_function_full']).toarray()103                test_crash_function_tfidf = tfidf.transform(df_test['crash_function_full']).toarray()104                names = tfidf.get_feature_names()105                for i in range(len(names)):106                    train_features['func_full_tfid-%s' %names[i]] = [row[i] for row in train_crash_function_tfidf]107                    test_features['func_full_tfid-%s' %names[i]] = [row[i] for row in test_crash_function_tfidf]108            if flag_countvector:109                train_crash_function_count = count.fit_transform(df_train['crash_function_full']).toarray()110                test_crash_function_count = count.transform(df_test['crash_function_full']).toarray()111                names = count.get_feature_names()112                for i in range(len(names)):113                    train_features['func_full_count-%s' %names[i]] = [row[i] for row in train_crash_function_count]114                    test_features['func_full_count-%s' %names[i]] = [row[i] for row in test_crash_function_count]115            if flag_preprocess:116                if flag_tfidvector:117                    train_crash_function_tfidf = tfidf.fit_transform(df_train['crash_function_func']).toarray()118                    test_crash_function_tfidf = tfidf.transform(df_test['crash_function_func']).toarray()119                    names = tfidf.get_feature_names()120                    for i in range(len(names)):121                        train_features['func_tfid-%s' %names[i]] = [row[i] for row in train_crash_function_tfidf]122                        test_features['func_tfid-%s' %names[i]] = [row[i] for row in test_crash_function_tfidf]123                if flag_countvector:124                    train_crash_function_count = count.fit_transform(df_train['crash_function_func']).toarray()125                    test_crash_function_count = count.transform(df_test['crash_function_func']).toarray()126                    names = count.get_feature_names()127                    for i in range(len(names)):128                        train_features['func_count-%s' %names[i]] = [row[i] for row in train_crash_function_count]129                        test_features['func_count-%s' %names[i]] = [row[i] for row in test_crash_function_count]130            return train_features, test_features131        def process_backtrace(train_features, test_features):132            if flag_tfidvector:133                train_backtrace_tfidf = tfidf.fit_transform(df_train['backtrace_full']).toarray()134                test_backtrace_tfidf = tfidf.transform(df_test['backtrace_full']).toarray()135                names = tfidf.get_feature_names()136                for i in range(len(names)):137                    train_features['bt_full_tfid-%s' %names[i]] = [row[i] for row in train_backtrace_tfidf]138                    test_features['bt_full_tfid-%s' %names[i]] = [row[i] for row in test_backtrace_tfidf]139            if flag_countvector:140                train_backtrace_count = count.fit_transform(df_train['backtrace_full']).toarray()141                test_backtrace_count = count.transform(df_test['backtrace_full']).toarray()142                names = count.get_feature_names()143                for i in range(len(names)):144                    train_features['bt_full_count-%s' %names[i]] = [row[i] for row in train_backtrace_count]145                    test_features['bt_full_count-%s' %names[i]] = [row[i] for row in test_backtrace_count]146            if flag_preprocess:147                if flag_tfidvector:148                    train_backtrace_tfidf = tfidf.fit_transform(df_train['backtrace_func']).toarray()149                    test_backtrace_tfidf = tfidf.transform(df_test['backtrace_func']).toarray()150                    names = tfidf.get_feature_names()151                    for i in range(len(names)):152                        train_features['bt_tfid-%s' %names[i]] = [row[i] for row in train_backtrace_tfidf]153                        test_features['bt_tfid-%s' %names[i]] = [row[i] for row in test_backtrace_tfidf]154                if flag_countvector:155                    train_backtrace_count = count.fit_transform(df_train['backtrace_func']).toarray()156                    test_backtrace_count = count.transform(df_test['backtrace_func']).toarray()157                    names = count.get_feature_names()158                    for i in range(len(names)):159                        train_features['bt_count-%s' %names[i]] = [row[i] for row in train_backtrace_count]160                        test_features['bt_count-%s' %names[i]] = [row[i] for row in test_backtrace_count]161            return train_features, test_features162        def process_exniffer(train_features, test_features):163            import ast164            for i in range(1, 45):165                train_features['feature%d' %i] = df_train['feature'].apply(lambda x: i in ast.literal_eval(x))166                train_features['feature%d' %i] = train_features['feature%d' %i].factorize(sort=True)[0]167                test_features['feature%d' %i] = df_test['feature'].apply(lambda x: i in ast.literal_eval(x))168                test_features['feature%d' %i] = test_features['feature%d' %i].factorize(sort=True)[0]169            return train_features, test_features170        train_features, test_features = process_engine(train_features, test_features)171        train_features.index = df_train.index172        test_features.index = df_test.index173        train_features, test_features = process_arch(train_features, test_features)174        train_features, test_features = process_mode(train_features, test_features)175        train_features, test_features = process_signal(train_features, test_features)176        train_features, test_features = process_crash_type(train_features, test_features)177        train_features, test_features = process_crash_instruction(train_features, test_features)178        train_features, test_features = process_crash_function(train_features, test_features)179        train_features, test_features = process_backtrace(train_features, test_features)180        train_features, test_features = process_exniffer(train_features, test_features)181        train_labels = df_train.security_id182        test_labels = df_test.security_id183        names = train_features.columns.values184    return train_features, test_features, train_labels, test_labels, names185def select_features(features, labels, names):186    corr = features.corr()187    columns = np.full((corr.shape[0],), True, dtype=bool)188    for i in range(corr.shape[0]):189        for j in range(i+1, corr.shape[0]):190            if corr.iloc[i,j] >= 0.9:191                if columns[j]:192                    columns[j] = False193    names = features.columns[columns]194    features = features[names]195    from sklearn.feature_selection import SelectKBest, chi2196    if features.shape[1] > 100:197        k = 100198    else:199        k = 'all'200    selected_features = SelectKBest(chi2, k=k).fit(features, labels).get_support()201    names = [names[i] for i in range(len(selected_features)) if selected_features[i]]202    features = features[names]203    features = features.values204    return features, names205def sampling(X, y, over, option):206    if over == 'over':207        if option == 1:208            X_sampled, y_sampled = RandomOverSampler().fit_sample(X, y)209        elif option == 2:210            X_sampled, y_sampled = ADASYN().fit_sample(X, y)211        elif option == 3:212            X_sampled, y_sampled = SMOTE().fit_sample(X, y)213    elif over == 'under':214        if option == 1:215            X_sampled, y_sampled = RandomUnderSampler().fit_sample(X, y)216        elif option == 2:217            X_sampled, y_sampled = TomekLinks().fit_sample(X, y)218        elif option == 3:219            X_sampled, y_sampled = CondensedNearestNeighbour().fit_sample(X, y)220        elif option == 4:221            X_sampled, y_sampled = OneSidedSelection().fit_sample(X, y)222        elif option == 5:223            X_sampled, y_sampled = EditedNearestNeighbours().fit_sample(X, y)224        elif option == 6:225            X_sampled, y_sampled = NeighbourhoodCleaningRule().fit_sample(X, y)226    return X_sampled, y_sampled227def drop_features(names, choice):228    s_names = []229    for name in names:230        if 'feature' in name:231            if choice == 'exniffer' or choice == 'combi':232                s_names.append(name)233        else:234            if choice == 'crscope' or choice == 'combi':235                s_names.append(name)236    return s_names237def run(case, model, set_list, names, xlsx):238    model.learn(logger, set_list, names, 4, args.engine)239    model.log_data()240    case.add_accuracy(model.name, model.accuracy_score)241    case.add_aucs(model.name, model.roc_auc_score)242    case.add_tprs(model.name, model.tprs)243    xlsx.write(case.get_name(), model)244    print model.name245    print model.accuracy_score246    print model.roc_auc_score247if __name__ == "__main__":248    start_time = time.time()249    BaseManager.register('Logger', Logger)250    BaseManager.register('Case', Case)251    BaseManager.register('Xlsx', Xlsx)252    manager = BaseManager()253    manager.start()254    # parse arguments255    args = arg.parse(sys.argv[1:])256    # create logger257    logger = manager.Logger('%s.v%s' %(args.engine, args.version))258    259    # load dataset260    dataset = load_dataset(args.datafile.name)261    # create docx, xlsx for report262    docx = Docx(dataset, args.engine, args.version)263    xlsx = manager.Xlsx(args.engine, args.version)264    # create models265    cases = [266    #    manager.Case(False, True, False),267    #    manager.Case(False, False, True),268    #    manager.Case(False, True, True),269    #    manager.Case(True, True, False),270    #    manager.Case(True, False, True),271        manager.Case(True, True, True)272    ]273    models = [274        MyLogisticRegression(),275        MyRandomForestClassifier(),276        MyMultinomialNB(),277        MyDecisionTreeClassifier(),278        MyLinearSVC(),279        MyMLPClassifier(),280    ]281    label_list = [model.name for model in models]282    if not os.path.exists('./dump/%s' %(args.engine)):283        os.makedirs('./dump/%s' %(args.engine))284    n = 4285    tscv = TimeSeriesSplit(n_splits=n)286    procs = []287    for case in cases:288        case.init_array(label_list)289        set_list = []290        names_list = []291        dump_file = './dump/%s/%s_%s_%s.dataset' %(args.engine, case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector())292        if os.path.isfile(dump_file):293            set_list, names_list = load(dump_file)294        else:295            for i, [train_index, test_index] in enumerate(tscv.split(dataset.df)):296                train_features, test_features, train_labels, test_labels, names \297                = extract_features(dataset.df.iloc[train_index], dataset.df.iloc[test_index], case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector())298                X_sample1, y_sample1 = sampling(train_features, train_labels, args.sampling, args.option)299                X_sample2, y_sample2 = sampling(test_features, test_labels, args.sampling, args.option)300                new_X = np.vstack([X_sample1, X_sample2])301                new_y = np.append(y_sample1, y_sample2)302                X_train, X_test, y_train, y_test = train_test_split(new_X, new_y, test_size=len(test_index), stratify=new_y)303                set_list.append([X_train, X_test, y_train, y_test])304                names_list.append(names)305            dump([set_list, names_list], dump_file)306        dump_file = './dump/%s/%s_%s_%s.%s_dataset' %(args.engine, case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector(), args.choice)307        if os.path.isfile(dump_file):308            new_set_list, new_names_list, info = load(dump_file)309        else:310            new_set_list = []311            new_names_list = []312            info = ['', '']313            for [X_train, X_test, y_train, y_test], names in zip(set_list, names_list):314                s_names = drop_features(names, args.choice)315                train_df = pd.DataFrame(X_train, columns=names)316                test_df = pd.DataFrame(X_test, columns=names)317                if args.choice == 'exniffer':318                    ss_names = s_names319                    new_X_train = train_df[s_names].values320                    new_X_test = test_df[s_names].values321                else:322                    new_X_train, ss_names = select_features(train_df[s_names], y_train, s_names)323                    new_X_test = test_df[ss_names].values324                new_set_list.append([new_X_train, new_X_test, y_train, y_test])325                new_names_list.append(ss_names)326                info[0] += '%s / %s\n' %(str(X_train.shape), str(X_test.shape))327                info[1] += '%s / %s\n' %(str(new_X_train.shape), str(new_X_test.shape))328            dump([new_set_list, new_names_list, info], dump_file)329        330        for model in models:331            proc = Process(target=run, name=model.name, args=(case, model, new_set_list, new_names_list, xlsx, ))332            procs.append(proc)333            proc.start()334        for proc in procs:335            proc.join()336        case.draw(docx.image_dir)337        docx.write(case.get_name(), case.get_figname(), info)338        xlsx.reset_col()339    docx.close()340    xlsx.close()...

lgbm_model.py

Source:lgbm_model.py

1import numpy as np2import pandas as pd3from src.data import preprocess as pp4from sklearn.model_selection import KFold5import lightgbm as lgb6import gc7def lightgbm_model(training_features, test_features, n_folds=3):8    """Light gradient boosting model with cross validation.9    Input parameters10        training_features (pd.DataFrame):11            df containing the training_set with Target values.12        test_features (pd.DataFrame):13            df containing the testings features14        n_folds (Integer):15            sets the number of desired folds for the cross validation16    Return17        submit (pd.DataFrame):18            df with `SK_ID_CURR` and `TARGET` probabilities of model prediction19    """20    # Extracting ID and Target21    test_id = test_features['SK_ID_CURR']22    training_labels = training_features['TARGET']23    # Deleting the ID and Target columns24    training_features = training_features.drop(columns=['SK_ID_CURR', 'TARGET'])25    test_features = test_features.drop(columns=['SK_ID_CURR'])26    training_features, test_features = training_features.align(test_features, join='inner', axis=1)27    # Encoding categorical values and imputing and scaling the dataframes28    training_features, test_features = pp.encode_categorical(training_features, test_features)29    training_features, test_features = pp.impute(training_features, test_features)30    training_features, test_features = pp.scale(training_features, test_features)31    # Create the kfold object32    k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=100)33    # Empty array for test predictions34    prediction_test = np.zeros(test_features.shape[0])35    # For loop for iterating through the defined folds36    for t_index, v_index in k_fold.split(training_features):37        # Training data and validation data for the fold38        train_features, train_labels = training_features[t_index], training_labels[t_index]39        valid_features, valid_labels = training_features[v_index], training_labels[v_index]40        # Building the model - parameters were calculated with bayesian optimization41        classifier = lgb.LGBMClassifier(n_estimators=10309, objective='binary',42                                        class_weight='balanced', learning_rate=0.0192,43                                        max_depth=7,44                                        min_child_weight=49,45                                        min_split_gain=0.0803,46                                        num_leaves=33, random_state=50,47                                        reg_alpha=0.1, reg_lambda=0.1,48                                        subsample=0.8, n_jobs=-1)49        # fitting the model50        classifier.fit(train_features, train_labels, eval_metric='auc',51                       eval_set=[(valid_features, valid_labels), (train_features, train_labels)],52                       eval_names=['valid', 'train'],53                       early_stopping_rounds=100, verbose=200)54        best_iteration = classifier.best_iteration_55        # Make predictions56        prediction_test += classifier.predict_proba(test_features,57                                                    num_iteration=best_iteration)[:, 1] / k_fold.n_splits58        # Save the model59        classifier.booster_.save_model('./model/lgbm_classifier.txt', num_iteration=best_iteration)60        # Cleaning up memory61        gc.enable()62        del classifier, train_features, valid_features63        gc.collect()64    # create the result dataframe for the submission65    submit = pd.DataFrame({'SK_ID_CURR': test_id, 'TARGET': prediction_test})...

d2v_test.py

Source:d2v_test.py

1#!/usr/bin/env python32from src.libsvm.python.svmutil import *3import random4import json5import glob6import sys7import pdb8import pylab as plt910train_features = []11train_labels = []12for inp in range(1000):13    train_features.append([random.random(),random.random()])14    if train_features[-1][0] * train_features[-1][0] - train_features[-1][1] * train_features[-1][1] < 0:15        train_labels.append(0)16    else:17        train_labels.append(1)18model = svm_train(train_labels,train_features, "-s 0 -t 2")1920test_features = []21test_labels = []22for inp in range(1000):23    test_features.append([random.random(),random.random()])24    if test_features[-1][0] * test_features[-1][0] - test_features[-1][1] * test_features[-1][1] < 0:25        test_labels.append(0)26    else:27        test_labels.append(1)2829predictions,[acc,mse,cor], oth = svm_predict(test_labels,test_features,model)303132all = 033cor = 034j=035for i in range(len(test_features)):36    all += 137    if test_features[i][0] * test_features[i][0] - test_features[i][1] * test_features[i][1] < 0:38        if predictions[i] == 0:39            cor += 140    else:41        if predictions[i] == 1:42            cor+=143
...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.