Best Python code snippet using yandex-tank
experimental_environment.py
Source:experimental_environment.py  
...172                            self._update_performance_measures(classifier_type_name, num_of_features,173                                                              predictions, predictions_proba, performance_measure,174                                                              test_class, classes)175                    self._calculate_average_for_performance_measures(classifier_type_name, valid_k, num_of_features)176        self._write_results_into_file()177        self._write_results_as_table()178        if not unlabeled_features_dataframe.empty:179            self._create_best_classifier_train_save_and_predict(labeled_features_dataframe, targeted_class_series,180                                                                unlabeled_features_dataframe,181                                                                unlabeled_index_field_series,182                                                                unlabeled_targeted_class_series)183        else:184            print("The dataset is not include unlabeled authors!! so that is it!!")185    def predict_on_prepared_clssifier(self):186        selected_classifier = self._get_trained_classifier()187        author_features_dataframe = self._get_author_features_dataframe()188        unlabeled_features_dataframe = self._retreive_unlabeled_authors_dataframe(author_features_dataframe)189        unlabeled_features_dataframe, unlabeled_targeted_class_series, unlabeled_index_field_series = \190            self._prepare_dataframe_for_learning(unlabeled_features_dataframe)191        #unlabeled_features_dataframe, dataframe_column_names = self._reduce_dimensions_by_num_of_features(192        #    unlabeled_features_dataframe, unlabeled_targeted_class_series, self._num_of_features)193        predictions_series, predictions_proba_series = self._predict_classifier(selected_classifier,194                                                                                unlabeled_features_dataframe)195        self._write_predictions_into_file(self._trained_classifier_type_name, self._trained_classifier_num_of_features,196                                          unlabeled_index_field_series, predictions_series,197                                          predictions_proba_series)198    def train_one_class_classifier_and_predict(self):199        self._one_class_column_names = ['Combination', '#Bad_Actors_Training_Set',200                                        '#Bad_Actors_Errors_Test_Set', 'STDEV_Bad_Actors_Errors_Test_Set',201                                        '#Bad_Actors_Corrected_Test_Set', 'STDEV_Bad_Actors_Corrected_Test_Set',202                                        '#Good_Actors_Errors_Test_Set', 'STDEV_Good_Actors_Errors_Test_Set',203                                        '#Good_Actors_Errors_Test_Set', 'STDEV_Good_Actors_Errors_Test_Set',204                                        '#Total_Test_Set'205                                        ]206        self._one_class_dict = self._create_one_class_dictionary()207        labeled_features_dataframe, unlabeled_features_dataframe, targeted_class_series, \208        unlabeled_targeted_class_series, unlabeled_index_field_series = self._create_labeled_and_unlabeled_based_on_author_features()209        # path = self._path + "labeled_features_dataframe.txt"210        # labeled_features_dataframe.to_csv(path, sep=',')211        feature_names = list(labeled_features_dataframe.columns.values)212        self._train_one_class_classifiers_for_each_combination(labeled_features_dataframe, targeted_class_series)213        one_class_result_dataframe = pd.DataFrame(self._one_class_dict, columns=self._one_class_column_names)214        full_path = self._path + "one_class_results.csv"215        # results_dataframe.to_csv(full_path)216        one_class_result_dataframe.to_csv(full_path, index=False)217        best_combination_elements = self._find_best_combination(one_class_result_dataframe)218        labeled_features_dataframe, unlabeled_features_dataframe = self._create_labeled_and_unlabeled_based_on_combination(219            best_combination_elements, feature_names,220            labeled_features_dataframe, unlabeled_features_dataframe)221        one_class_classifier = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)222        one_class_classifier.fit(labeled_features_dataframe)223        unlabeled_predictions = one_class_classifier.predict(unlabeled_features_dataframe)224        distances = one_class_classifier.decision_function(unlabeled_features_dataframe)225        self._write_predictions_into_file("one_class", str(len(best_combination_elements)),226                                          unlabeled_index_field_series,227                                          unlabeled_predictions, distances)228    def train_one_class_classifier_by_k_best_and_predict(self):229        self._one_class_column_names = ['Combination', 'Num_of_features', '#Bad_Actors_Training_Set',230                                        '#Errors_Bad_Actors_Test_Set',231                                        'STDEV_Bad_Actors_Errors_Test_Set', '#Corrected_Bad_Actors_Test_Set',232                                        'STDEV_Corrected_Bad_Actors_Test_Set',233                                        '#Errors_Good_Actors_Test_Set', 'STDEV_Good_Actors_Errors_Test_Set',234                                        '#Corrects_Good_Actors_Test_Set', 'STDEV_Good_Actors_Corrects_Test_Set',235                                        '#Errors_Test_Set', 'STDEV_Errors_Test_Set',236                                        '#Corrects_Test_Set', 'STDEV_Corrects_Test_Set',237                                        '#Total_Test_Set']238        self._one_class_dict = self._create_one_class_dictionary()239        labeled_features_dataframe, unlabeled_features_dataframe, unlabeled_targeted_class_series, \240        unlabeled_index_field_series = self._create_unlabeled_authors_dataframe_and_raw_labeled_authors_dataframe()241        good_actors_dataframe = labeled_features_dataframe.loc[242            labeled_features_dataframe[self._targeted_class_name] == 'good_actor']243        manually_bad_actors_dataframe = labeled_features_dataframe.loc[244            (labeled_features_dataframe[self._targeted_class_name] == 'bad_actor') & (245            labeled_features_dataframe['author_sub_type'].isnull())]246        isis_bad_actors_dataframe = labeled_features_dataframe.loc[247            (labeled_features_dataframe[self._targeted_class_name] == 'bad_actor') & (248            labeled_features_dataframe['author_sub_type'] == 'ISIS_terrorist')]249        good_actors_dataframe, good_actors_targeted_class_series, good_actors_index_field_series = \250            self._prepare_dataframe_for_learning(good_actors_dataframe)251        isis_bad_actors_dataframe, isis_bad_actors_targeted_class_series, isis_bad_actors_index_field_series = \252            self._prepare_dataframe_for_learning(isis_bad_actors_dataframe)253        for num_of_features in self._num_of_features_to_train:254            reduced_isis_bad_actors_dataframe, selected_column_names = self._reduce_dimensions_by_num_of_features(255                isis_bad_actors_dataframe, isis_bad_actors_targeted_class_series, num_of_features)256            feature_names = list(labeled_features_dataframe.columns.values)257            best_combination_set = set(selected_column_names)258            feature_names_set = set(feature_names)259            features_to_remove_set = feature_names_set - best_combination_set260            features_to_remove = list(features_to_remove_set)261            reduced_good_actors_dataframe = self._remove_features(features_to_remove, good_actors_dataframe.copy())262            combination_name = "+".join(selected_column_names)263            self._one_class_dict['Combination'].append(combination_name)264            self._one_class_dict['Num_of_features'].append(num_of_features)265            isis_bad_actors_training_set_size_count = 0266            isis_bad_actors_test_set_size_count = 0267            good_actors_test_set_size_count = 0268            bad_actors_test_set_errors_count = 0269            bad_actors_test_set_errors = []270            bad_actors_test_set_corrects_count = 0271            bad_actors_test_set_corrects = []272            good_actors_test_set_errors_count = 0273            good_actors_test_set_errors = []274            good_actors_test_set_corrects_count = 0275            good_actors_test_set_corrects = []276            total_test_set_errors_count = 0277            total_test_set_errors = []278            total_test_set_corrects_count = 0279            total_test_set_corrects = []280            test_set_total = 0281            k_folds, valid_k = self._select_valid_k(isis_bad_actors_targeted_class_series)282            for train_indexes, test_indexes in k_folds:283                isis_bad_actors_train_set_dataframe, isis_bad_actors_test_set_dataframe, train_class, test_class = self._create_train_and_test_dataframes_and_classes(284                    reduced_isis_bad_actors_dataframe,285                    train_indexes, test_indexes,286                    isis_bad_actors_targeted_class_series)287                training_size = isis_bad_actors_train_set_dataframe.shape[0]288                isis_bad_actors_training_set_size_count += training_size289                isis_bad_actors_test_set_size = isis_bad_actors_test_set_dataframe.shape[0]290                isis_bad_actors_test_set_size_count += isis_bad_actors_test_set_size291                good_actors_test_set_size = reduced_good_actors_dataframe.shape[0]292                good_actors_test_set_size_count += good_actors_test_set_size293                one_class_classifier = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)294                one_class_classifier.fit(isis_bad_actors_train_set_dataframe)295                bad_actors_test_set_predictions = one_class_classifier.predict(isis_bad_actors_test_set_dataframe)296                # distances = one_class_classifier.decision_function(test_set_dataframe)297                num_error_bad_actors_test_set = bad_actors_test_set_predictions[298                    bad_actors_test_set_predictions == -1].size299                bad_actors_test_set_errors.append(num_error_bad_actors_test_set)300                bad_actors_test_set_errors_count += num_error_bad_actors_test_set301                test_set_total += num_error_bad_actors_test_set302                num_correct_bad_actors_test_set = bad_actors_test_set_predictions[303                    bad_actors_test_set_predictions == 1].size304                bad_actors_test_set_corrects.append(num_correct_bad_actors_test_set)305                bad_actors_test_set_corrects_count += num_correct_bad_actors_test_set306                test_set_total += num_correct_bad_actors_test_set307                good_actors_test_set_predictions = one_class_classifier.predict(reduced_good_actors_dataframe)308                # distances = one_class_classifier.decision_function(test_set_dataframe)309                num_error_good_actors_test_set = good_actors_test_set_predictions[310                    good_actors_test_set_predictions == 1].size311                good_actors_test_set_errors.append(num_error_good_actors_test_set)312                good_actors_test_set_errors_count += num_error_good_actors_test_set313                test_set_total += num_error_good_actors_test_set314                num_correct_good_actors_test_set = good_actors_test_set_predictions[315                    good_actors_test_set_predictions == -1].size316                good_actors_test_set_corrects.append(num_correct_good_actors_test_set)317                good_actors_test_set_corrects_count += num_correct_good_actors_test_set318                test_set_total += num_correct_good_actors_test_set319                total_test_errors = num_error_bad_actors_test_set + num_error_good_actors_test_set320                total_test_set_errors_count += total_test_errors321                total_test_set_errors.append(total_test_errors)322                total_test_corrects = num_correct_bad_actors_test_set + num_correct_good_actors_test_set323                total_test_set_corrects_count += total_test_corrects324                total_test_set_corrects.append(total_test_corrects)325            isis_bad_actors_training_set_size_count = float(isis_bad_actors_training_set_size_count) / self._k_for_fold326            self._one_class_dict['#Bad_Actors_Training_Set'].append(isis_bad_actors_training_set_size_count)327            bad_actors_test_set_errors_count = float(bad_actors_test_set_errors_count) / self._k_for_fold328            self._one_class_dict['#Errors_Bad_Actors_Test_Set'].append(bad_actors_test_set_errors_count)329            test_set_errors_stdev = self._calculate_stdev(bad_actors_test_set_errors)330            self._one_class_dict['STDEV_Bad_Actors_Errors_Test_Set'].append(test_set_errors_stdev)331            bad_actors_test_set_corrects_count = float(bad_actors_test_set_corrects_count) / self._k_for_fold332            self._one_class_dict['#Corrected_Bad_Actors_Test_Set'].append(bad_actors_test_set_corrects_count)333            test_set_corrects_stdev = self._calculate_stdev(bad_actors_test_set_corrects)334            self._one_class_dict['STDEV_Corrected_Bad_Actors_Test_Set'].append(test_set_corrects_stdev)335            good_actors_test_set_errors_count = float(good_actors_test_set_errors_count) / self._k_for_fold336            self._one_class_dict['#Errors_Good_Actors_Test_Set'].append(good_actors_test_set_errors_count)337            good_actors_test_set_errors_stdev = self._calculate_stdev(good_actors_test_set_errors)338            self._one_class_dict['STDEV_Good_Actors_Errors_Test_Set'].append(good_actors_test_set_errors_stdev)339            good_actors_test_set_corrects_count = float(good_actors_test_set_corrects_count) / self._k_for_fold340            self._one_class_dict['#Corrects_Good_Actors_Test_Set'].append(good_actors_test_set_corrects_count)341            good_actors_test_set_corrects_stdev = self._calculate_stdev(good_actors_test_set_corrects)342            self._one_class_dict['STDEV_Good_Actors_Corrects_Test_Set'].append(good_actors_test_set_corrects_stdev)343            test_set_errors_count = float(total_test_set_errors_count) / self._k_for_fold344            self._one_class_dict['#Errors_Test_Set'].append(test_set_errors_count)345            total_test_set_errors_stdev = self._calculate_stdev(total_test_set_errors)346            self._one_class_dict['STDEV_Errors_Test_Set'].append(total_test_set_errors_stdev)347            test_set_corrects_count = float(total_test_set_corrects_count) / self._k_for_fold348            self._one_class_dict['#Corrects_Test_Set'].append(test_set_corrects_count)349            total_test_set_corrects_stdev = self._calculate_stdev(total_test_set_corrects)350            self._one_class_dict['STDEV_Corrects_Test_Set'].append(total_test_set_corrects_stdev)351            self._one_class_dict['#Total_Test_Set'].append(352                isis_bad_actors_test_set_size_count + good_actors_test_set_size_count)353            # self._one_class_dict['#Total_Test_Set'].append(good_actors_test_set_size_count)354        one_class_result_dataframe = pd.DataFrame(self._one_class_dict, columns=self._one_class_column_names)355        full_path = self._path + "one_class_results.csv"356        # results_dataframe.to_csv(full_path)357        one_class_result_dataframe.to_csv(full_path, index=False)358        best_combination_elements = self._find_best_combination(one_class_result_dataframe)359        labeled_features_dataframe, unlabeled_features_dataframe = self._create_labeled_and_unlabeled_based_on_combination(360            best_combination_elements, feature_names,361            labeled_features_dataframe, unlabeled_features_dataframe)362        one_class_classifier = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)363        labeled_features_dataframe = labeled_features_dataframe.fillna(0)364        one_class_classifier.fit(labeled_features_dataframe)365        unlabeled_predictions = one_class_classifier.predict(unlabeled_features_dataframe)366        distances = one_class_classifier.decision_function(unlabeled_features_dataframe)367        self._write_predictions_into_file("one_class", str(len(best_combination_elements)),368                                          unlabeled_index_field_series,369                                          unlabeled_predictions, distances)370    def transfer_learning(self):371        for source_domain in self._source_domains:372            for target_domain in self._target_domains:373                for iteration in range(0, self._num_iterations):374                    print(iteration)375                    number_of_transfered_instances = 0376                    if self._source_input_type == 'csv':  # load source data from csv377                        source_df = pd.read_csv(self._source_input_path + '/' + source_domain + '.csv')378                    elif self._source_input_type == 'table':  # load from authors_features table379                        source_df = self._get_author_features_dataframe()380                    if self._target_input_type == 'csv':  # load target data from csv381                        target_df = pd.read_csv(self._target_input_path + '/' + target_domain + '.csv')382                    elif self._target_input_type == 'table':383                        target_df = self._get_author_features_dataframe()384                    # feature pre-processing385                    source_df = self._preprocess_dataframe(source_df)386                    target_df = self._preprocess_dataframe(target_df)387                    # find features in common388                    common_features = list(389                        set(source_df.columns) & set(target_df.columns))  # train and test must have the same features390                    source_df = source_df[common_features]391                    target_df = target_df[common_features]392                    # If we do not use instance based transfer learning we need to iterate only once through the num_neighbors loop393                    if not self._transfer_instances:394                        self._num_neighbors = [395                            -1]  # If we are not transferring instances iterate only once through the for loop396                    for k in self._num_neighbors:397                        if source_domain == target_domain:  # split 'target' dataset into train and test, ignore 'source'398                            msk = np.random.rand(len(target_df)) < (1 - self._target_train_test_split)399                            train_df = target_df[~msk]400                            test_df = target_df[msk]401                        elif source_domain != target_domain and not self._transfer_instances:  # train on dataset source and test on dataset target402                            train_df = source_df403                            test_df = target_df404                        elif source_domain != target_domain and self._transfer_instances:  # transfer knowledge from source to target405                            msk = np.random.rand(len(target_df)) < (1 - self._target_train_test_split)406                            train_df = target_df[~msk]407                            test_df = target_df[msk]408                            if self._target_train_percent_limit > 0:409                                train_df = train_df.sample(frac=self._target_train_percent_limit)410                            if self._target_test_percent_limit > 0:411                                test_df = test_df.sample(frac=self._target_test_percent_limit)412                            train_size_before_transfer = len(train_df)413                            if self._transfer_algo == 'BURAK':414                                '''415                                    B. Turhan, T. Menzies, A. B. Bener, and J. DiStefano.416                                    On the relative value of cross-company and within-company data for defect prediction.417                                    Burak Algorithm for instance-based transfer learning:418                                    The dataset we wish to improve or add additional data is called 'Target Dataset'.419                                    This algorithm first splits the target dataset into train and test sets.420                                    Then, for every object in the test set, it selects the k nearest neighbors wihtin any external 'Source dataset'421                                    and transfer these neighbors from the 'Source dataset' to the train set of the target dataset.422                                '''423                                nbrs = NearestNeighbors(n_neighbors=k, algorithm='ball_tree').fit(source_df)424                                for index, row in test_df.iterrows():425                                    nbr_idx = nbrs.kneighbors(row, return_distance=False)426                                    for idx in nbr_idx[0]:427                                        train_df = train_df.append(source_df.iloc[idx])428                                train_df.drop_duplicates(inplace=True)429                                number_of_transfered_instances = len(train_df) - train_size_before_transfer430                            elif self._transfer_algo == 'GRAVITY_WEIGHTING':431                                '''432                                Ying Ma, Guangchun Luo, Xue Zeng, Aiguo Chen433                                Transfer learning for cross-company software defect prediction434                                Gravity Weighting: training instances are weighted inversely435                                proportional to their distance from the test instances, based436                                on measure of similarity defined in the paper437                                '''438                                train_df['weight'] = 1439                                min_values = test_df.min(axis=0)440                                max_values = test_df.max(axis=0)441                                for idx, row in source_df.iterrows():442                                    si = 0443                                    for col in target_df.columns:444                                        min_j = min_values[col]445                                        max_j = max_values[col]446                                        if min_j <= row[col] <= max_j:447                                            si += 1448                                    w = si / (len(source_df.columns) - si + 1)449                                    row['weight'] = w450                                    train_df = train_df.append(row)451                                X_train_weights = train_df.pop('weight').as_matrix()452                                train_df.drop_duplicates(inplace=True)453                                number_of_transfered_instances = len(train_df) - train_size_before_transfer454                        else:455                            raise Exception("Transfer learning module not configured properly")456                        X_train = train_df[source_df.columns.drop(self._targeted_class_name)]457                        y_train = train_df[self._targeted_class_name]458                        X_test = test_df[target_df.columns.drop(self._targeted_class_name)]459                        y_test = test_df[self._targeted_class_name]460                        # feature scaling461                        for scaling in self._feature_scaling:462                            if scaling == 'StandardScaler':463                                scaler = StandardScaler()464                            elif scaling == 'RobustScaler':465                                scaler = RobustScaler()466                            elif scaling == 'MinMaxScaler':467                                scaler = MinMaxScaler()468                            if scaling != 'None':469                                cols = list(X_train.columns)470                                X_train[cols] = scaler.fit_transform(X_train[cols].as_matrix())471                                X_test[cols] = scaler.fit_transform(X_test[cols].as_matrix())472                            # feature selection473                            for num_features in self._num_of_features_to_train:474                                for selection_method in self._feature_selection:475                                    if num_features == 'all':476                                        num_features = len(X_train.columns)477                                    selector = SelectKBest(score_func=globals()[selection_method],478                                                           k=int(num_features), )479                                    selector.fit_transform(X_train, y_train)480                                    scores = {X_train.columns[i]: selector.scores_[i] for i in481                                              range(len(X_train.columns))}482                                    filename = 'selected_features_source_' + source_domain + '_target_' + target_domain + '.csv'483                                    with open(filename, 'ab') as csv_file:484                                        writer = csv.writer(csv_file)485                                        writer.writerow(486                                            ['Feature Scaling Method', 'Num features', 'Feature Selection Method',487                                             'Feature', 'Value'])488                                        for key, value in scores.items():489                                            writer.writerow([scaling, num_features, selection_method, key, value])490                                    sorted_features = sorted(scores, key=scores.get, reverse=True)[:int(num_features)]491                                    X_best_features_train = X_train[sorted_features]492                                    X_best_features_test = X_test[sorted_features]493                                    # model training494                                    trained_models = []495                                    for classifier_type_name in self._classifier_type_names:496                                        classifier = self._select_classifier_by_type(497                                            classifier_type_name=classifier_type_name)498                                        if source_domain != target_domain and self._transfer_instances and \499                                                (500                                                        self._transfer_algo == 'GRAVITY_WEIGHTING' or self._transfer_algo == 'MODIFIED_GRAVITY_WEIGHTING'):501                                            trained_model = classifier.fit(X=X_best_features_train, y=y_train,502                                                                           sample_weight=X_train_weights)503                                        else:504                                            trained_model = classifier.fit(X=X_best_features_train, y=y_train)505                                        trained_models.append(trained_model)506                                    one_time_flag = True507                                    for model in trained_models:508                                        model_name = model.__class__.__name__509                                        predictions_confidence = model.predict_proba(X_best_features_test)[:, 1]510                                        auc = roc_auc_score(y_test.values, predictions_confidence)511                                        predictions = model.predict(X_best_features_test)512                                        # conf = str(confusion_matrix(y_test.values, predictions))513                                        clasif_rep = precision_recall_fscore_support(y_test.values, predictions,514                                                                                     labels=[0, 1], pos_label=1)515                                        out_dict = {}516                                        if self._transfer_instances:517                                            out_dict['Transfer Learning'] = 'Transfer Learning'518                                            out_dict['Size of samples transferred'] = k519                                            out_dict['Algorithm'] = self._transfer_algo520                                        else:521                                            out_dict['Transfer Learning'] = 'No Transfer Learning'522                                            out_dict['Size of samples transferred'] = 0523                                            out_dict['Algorithm'] = 'No'524                                        out_dict['Source domain'] = source_domain525                                        out_dict['Target domain'] = target_domain526                                        out_dict['Number of Features'] = num_features527                                        out_dict['Feature Scaling Method'] = scaling528                                        out_dict['Feature Selection Method'] = selection_method529                                        if self._num_of_features == 'all':530                                            out_dict['Selected Features'] = 'all'531                                        else:532                                            out_dict['Selected Features'] = ', '.join(list(X_train.columns))533                                        out_dict['Number of Transfered Instances'] = number_of_transfered_instances534                                        if 'author_type' in train_df:535                                            if 0 in train_df['author_type'].value_counts():536                                                out_dict['Train Observations Good'] = int(537                                                    train_df['author_type'].value_counts()[0])538                                            else:539                                                out_dict['Train Observations Good'] = 0540                                            if 1 in train_df['author_type'].value_counts():541                                                out_dict['Train Observations Bad'] = int(542                                                    train_df['author_type'].value_counts()[1])543                                            else:544                                                out_dict['Train Observations Bad'] = 0545                                        else:546                                            out_dict['Train Observations Good'] = 0547                                            out_dict['Train Observations Bad'] = 0548                                        if 'author_type' in test_df:549                                            out_dict['Test Observations Good'] = int(550                                                test_df['author_type'].value_counts()[0])551                                            out_dict['Test Observations Bad'] = int(552                                                test_df['author_type'].value_counts()[1])553                                        else:554                                            out_dict['Test Observations Good'] = 0555                                            out_dict['Test Observations Bad'] = 0556                                        out_dict['Precision Good'] = clasif_rep[0][0].round(2)557                                        out_dict['Precision Bad'] = clasif_rep[0][1].round(2)558                                        out_dict['Recall Good'] = clasif_rep[1][0].round(2)559                                        out_dict['Recall Bad'] = clasif_rep[1][1].round(2)560                                        out_dict['F1-score Good'] = clasif_rep[2][0].round(2)561                                        out_dict['F1-score Bad'] = clasif_rep[2][1].round(2)562                                        tn, fp, fn, tp = confusion_matrix(y_test.values, predictions).ravel()563                                        out_dict['TP'] = tp564                                        out_dict['TN'] = tn565                                        out_dict['FP'] = fp566                                        out_dict['FN'] = fn567                                        out_dict['AUC'] = auc568                                        out_df = pd.DataFrame(out_dict, index=[model_name],569                                                              columns=['Source domain', 'Target domain',570                                                                       'Transfer Learning',571                                                                       'Size of samples transferred', 'Algorithm',572                                                                       'Feature Scaling Method', 'Number of Features',573                                                                       'Feature Selection Method', 'Selected Features',574                                                                       'Number of Transfered Instances',575                                                                       'Train Observations Good',576                                                                       'Train Observations Bad',577                                                                       'Test Observations Good',578                                                                       'Test Observations Bad', 'Precision Good',579                                                                       'Precision Bad', 'Recall Good',580                                                                       'Recall Bad', 'F1-score Good', 'F1-score Bad',581                                                                       'TP', 'TN', 'FP', 'FN', 'AUC'])582                                        tl = 'transfer_learning_' + out_dict['Transfer Learning']583                                        filename = 'results_' + tl + '__source_' + source_domain + '_target_' + target_domain + '.csv'584                                        if one_time_flag:585                                            out_df.to_csv(filename, mode='a', header=True)586                                            one_time_flag = False587                                        else:588                                            out_df.to_csv(filename, mode='a', header=False)589                                        print(model_name)590                                        print('AUC: ' + str(auc))591                                        print('Confusion: \n' + str(592                                            confusion_matrix(y_test.values, predictions, labels=[1, 0])))593                                        print('##################################################')594    def _preprocess_dataframe(self, df):595        df = self._remove_features(self._removed_features, df)# remove author_sub_type, user screen_name, etc.596        df = replace_nominal_class_to_numeric(df, self._optional_classes)  # replace 'bad_actor' to 1597        df.replace('?', np.NaN, inplace=True)  # replace ? for nan598        df.dropna(axis=0, how='any', subset=[self._targeted_class_name], inplace=True)  # drop row if target class is NaN599        df.dropna(axis=1, how='all', inplace=True)  # drop column if all values  are nan600        df = df.apply(pd.to_numeric)  # convert all values to numeric601        df = df.drop(df.std()[df.std() < self._stdev_threshold].index.values, axis=1)602        if self._replace_missing_values == 'mean':  # replace missing values with column mean or 0603            df.fillna(df.mean(), inplace=True)604        else:605            df.fillna(0, inplace=True)606        return df607    def _create_target_class_classifier_dictionary(self):608        #Dictionary = Dict["author_type"]["RandomForest"][5]["AUC"] = 0.99609        start_time = time.time()610        print("_create_target_class_classifier_dictionary started for " + self.__class__.__name__ + " started at " + str(start_time))611        performance_measure_dictionary = {}612        for performance_measure in self._performance_measures:613            performance_measure_dictionary[performance_measure] = 0614        num_of_features_performance_measure_dictionary = {}615        for num_of_features in self._num_of_features_to_train:616            deep_copy_results_dictionary = copy.deepcopy(performance_measure_dictionary)617            num_of_features_performance_measure_dictionary[num_of_features] = deep_copy_results_dictionary618        classifier_performance_measure_dictionary = {}619        for classifier_type_name in self._classifier_type_names:620            deep_copy_results_dictionary = copy.deepcopy(num_of_features_performance_measure_dictionary)621            classifier_performance_measure_dictionary[classifier_type_name] = deep_copy_results_dictionary622        target_class_classifier_dictionary = {}623        deep_copy_classifier_performance_measure_dictionary = copy.deepcopy(classifier_performance_measure_dictionary)624        target_class_classifier_dictionary[self._targeted_class_name] = deep_copy_classifier_performance_measure_dictionary625        end_time = time.time()626        print("_create_target_class_classifier_dictionary started for " + self.__class__.__name__ + " ended at " + str(627            end_time))628        return target_class_classifier_dictionary629    def _get_author_features_dataframe(self):630        start_time = time.time()631        print("_get_author_features_dataframe started for " + self.__class__.__name__ + " started at " + str(start_time))632        data_frame_creator = DataFrameCreator(self._db)633        data_frame_creator.create_author_features_data_frame()634        author_features_dataframe = data_frame_creator.get_author_features_data_frame()635        end_time = time.time()636        print("_get_author_features_dataframe ended for " + self.__class__.__name__ + " ended at " + str(end_time))637        return author_features_dataframe638    def _write_results_into_file(self):639        start_time = time.time()640        print("_write_results_into_file started for " + self.__class__.__name__ + " started at " + str(start_time))641        full_path_file_name = self._path + self._results_file_name642        if not os.path.exists(full_path_file_name):643            open(full_path_file_name, 'w').close()644        with open(full_path_file_name, "w") as text_file:645            text_file.write("Supervised learning results:" + "\n")646            text_file.write("----------------------------------------------------------" + "\n")647            text_file.write("Target class name: {0}".format(self._targeted_class_name) + "\n")648            for classifier_type_name in self._classifier_type_names:649                text_file.write("Selected classifier: {0}".format(classifier_type_name) + "\n")650                for num_of_features in self._num_of_features_to_train:651                    text_file.write("Num of features: {0}".format(num_of_features) + "\n")652                    self._print_correctly_and_not_correctly_instances(classifier_type_name, num_of_features, text_file)...plugin.py
Source:plugin.py  
...45        self.stepper_wrapper.prepare_stepper()46        with open(self.report_filename, 'w'):47            pass48        self.core.add_artifact_file(self.report_filename)49    def _write_results_into_file(self):50        """listens for messages on the q, writes to file. """51        reader = BfgReader(self.bfg.results, self.close_event)52        columns = ['receive_ts', 'tag', 'interval_real', 'connect_time', 'send_time', 'latency', 'receive_time',53                   'interval_event', 'size_out', 'size_in', 'net_code', 'proto_code']54        for entry in reader:55            if entry is not None:56                entry.receive_ts = entry.receive_ts.round(3)57                with open(self.report_filename, 'a') as report_file:58                    report_file.write(entry.to_csv(index=False, header=False, sep='\t', columns=columns))59            time.sleep(0.1)60    def get_reader(self, parser=string_to_df):61        if self.reader is None:62            self.reader = FileMultiReader(self.report_filename, self.close_event)63        return PhantomReader(self.reader.get_file(), parser=parser)...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
