Best Python code snippet using hypothesis
experiment_association_only.py
Source:experiment_association_only.py  
1import matplotlib2matplotlib.use('Agg')3import matplotlib.pyplot as plt4from matplotlib.ticker import MaxNLocator5import numpy as np6import os7import ast8import operator9import pickle10import pandas as pd11from scipy import stats12from matplotlib.ticker import FormatStrFormatter13import matplotlib.patches as patches141516# Say, "the default sans-serif font is COMIC SANS"17matplotlib.rcParams['font.sans-serif'] = "Times New Roman"18# Then, "ALWAYS use sans-serif fonts"19matplotlib.rcParams['font.family'] = "serif"20matplotlib.rcParams.update({'font.size': 8})21222324def GetCategory(class_label):25    category = None26    '''27    if class_label in ['volume_ring[disc]', 'volume_system[disc]', 'volume_notification[disc]', 'volume_music[disc]']:28        category = 'VOLUME'29    '''30    if class_label in ['display_orientation[cat]']:31        category = 'APP'32    elif class_label in ['class']:33        category = 'Availability'34    '''35    elif 'battery' in class_label:36        category = 'BATTERY'37    '''38    return category394041def Entropy(values):42    value_cnt_dict = dict()43    for value in values:44        if value not in value_cnt_dict:45            value_cnt_dict[value] = 046        value_cnt_dict[value] += 147    value_ratio_list = list()48    for value in value_cnt_dict:49        value_ratio_list.append(value_cnt_dict[value] / float(sum(value_cnt_dict.values())))50    # return max(value_ratio_list), value_cnt_dict5152    if len(value_ratio_list) == 1:53        entropy = 1.054    else:55        entropy = stats.entropy(value_ratio_list) / np.log(len(value_ratio_list)) # Normalized56        entropy = 1 - entropy57    return entropy, value_cnt_dict585960def GetEntropyEverage(entropy_info_list, granularity_min):61    time_key_class_values_dict = dict()62    for class_info in entropy_info_list:63        class_timestamp = class_info[0]64        class_value = class_info[1]65        granularity_timestamp = pd.DatetimeIndex(((np.round(pd.DatetimeIndex([class_timestamp]).asi8 / (1e9 * 60 * granularity_min))) * 1e9 * 60 * granularity_min).astype(np.int64))[0]66        daily_time = granularity_timestamp.strftime("%H:%M:%S")67        time_key = "time_daily:" + str(daily_time)68        if time_key not in time_key_class_values_dict:69            time_key_class_values_dict[time_key] = list()70        time_key_class_values_dict[time_key].append(class_value)71    entropy_list = list()72    instance_num_list = list()73    for time_key in time_key_class_values_dict:74        # print Entropy(time_key_class_values_dict[time_key])[0]75        entropy_list.append(Entropy(time_key_class_values_dict[time_key])[0])76        instance_num_list.append(len(time_key_class_values_dict[time_key]))77    return np.mean(entropy_list), instance_num_list7879def GetTimesFromPattern(patterns):80    time_list = list()81    for pattern in patterns:82        for item in pattern:83            # print item84            if "time_daily" in item:85                time_list.append(item)86    return tuple(sorted(time_list))8788def PlotEntropyForPatternPair(entropy_result_path, output_dir, granularity_min):89    if not os.path.exists(output_dir):90        os.makedirs(output_dir)9192    if True or not os.path.exists(output_dir + '/sequence_only_result_class.pickle'):93        getall = [[files, os.path.getsize(entropy_result_path + "/" + files)] for files in os.listdir(entropy_result_path)]94        file_info_list = list()95        for file_name, file_size in sorted(getall, key=operator.itemgetter(1)):96            if file_name[-7:] == '.pickle' and 'U8' not in file_name:97                file_info_list.append(file_name)98        print "# All Processed Users: %d" % len(file_info_list)99        print file_info_list100        class_label_user_pattern_pair_info = dict()101        for i, user_result_path in enumerate(file_info_list):102            user_result_path = "%s/%s" % (entropy_result_path, user_result_path)103            print "%d/%d - %s" % (i, len(file_info_list), user_result_path)104            user_result = pickle.load(open(user_result_path))105            print "Loaded"106            for line_info in user_result:107                user = line_info[0]108                class_label = line_info[1]109                if class_label != 'class':#'class':110                    continue111                mcpp_len = line_info[2]112                condition_len = line_info[3]113                fully_pattern = str(line_info[4])114                unfully_pattern = str(line_info[5])115                fully_class_list = line_info[6]116                unfully_class_list = line_info[7]117                fully_class_cnt = len(fully_class_list)118                unfully_class_cnt = len(unfully_class_list)119                # fully_entropy = line_info[8]120                # unfully_entropy = line_info[9]121                fully_entropy, fully_instance_num_list = GetEntropyEverage(fully_class_list, granularity_min)122                unfully_entropy, unfully_instance_num_list = GetEntropyEverage(unfully_class_list, granularity_min)123124                print (user, class_label, mcpp_len, condition_len, str(fully_instance_num_list), str(unfully_instance_num_list), fully_entropy, unfully_entropy)125126                '''127                if condition_len <= 1:128                    continue129                '''130                if mcpp_len != len(fully_instance_num_list) or mcpp_len != len(unfully_instance_num_list):131                    continue132133                for instance_num in fully_instance_num_list + unfully_instance_num_list:134                    if instance_num < 5:135                        continue136137                if class_label not in class_label_user_pattern_pair_info:138                    class_label_user_pattern_pair_info[class_label] = dict()139140                if user not in class_label_user_pattern_pair_info[class_label]:141                    class_label_user_pattern_pair_info[class_label][user] = list()142                class_label_user_pattern_pair_info[class_label][user].append((user, class_label, mcpp_len, condition_len, fully_pattern, unfully_pattern, fully_class_cnt, unfully_class_cnt, fully_entropy, unfully_entropy))143        user_feature_pattern_pair_info_list_dict = dict()144        user_fully_unfully_time_class_cnt = dict()145        for class_label in class_label_user_pattern_pair_info:146            user_plot_data = dict()147            print class_label_user_pattern_pair_info[class_label].keys()148            for user in class_label_user_pattern_pair_info[class_label]:149                pattern_pair_info_list = class_label_user_pattern_pair_info[class_label][user]150                fully_entropy_list = list()151                unfully_entropy_list = list()152                for pattern_pair_info in pattern_pair_info_list:153                    condition_len = pattern_pair_info[3]154                    fully_class_cnt = pattern_pair_info[-4]155                    fully_entropy = pattern_pair_info[-2]156                    unfully_class_cnt = pattern_pair_info[-3]157                    unfully_entropy = pattern_pair_info[-1]158159                    '''160                    if condition_len <= 1 or fully_class_cnt < 10 or unfully_class_cnt < 10:161                        continue162                    '''163                    # Store the number of class in fully time and unfully time164                    if user not in user_fully_unfully_time_class_cnt:165                        user_fully_unfully_time_class_cnt[user] = dict()166167                    if class_label not in user_fully_unfully_time_class_cnt[user]:168                        user_fully_unfully_time_class_cnt[user][class_label] = dict()169                        user_fully_unfully_time_class_cnt[user][class_label]["fully"] = dict()170                        user_fully_unfully_time_class_cnt[user][class_label]["unfully"] = dict()171172                    fully_left = ast.literal_eval(pattern_pair_info[4].split('-')[0])173                    unfully_left = tuple([ast.literal_eval(unfully_pattern.split('-')[0])[0] for unfully_pattern in ast.literal_eval(pattern_pair_info[5])])174175                    fully_timelist = GetTimesFromPattern(fully_left)176                    unfully_timelist = GetTimesFromPattern(unfully_left)177178                    user_fully_unfully_time_class_cnt[user][class_label]["fully"][fully_timelist] = fully_class_cnt179                    user_fully_unfully_time_class_cnt[user][class_label]["unfully"][unfully_timelist] = unfully_class_cnt180181                    # print pattern_pair_info182                    fully_entropy_list.append(fully_entropy)183                    unfully_entropy_list.append(unfully_entropy)184                    if user not in user_feature_pattern_pair_info_list_dict:185                        user_feature_pattern_pair_info_list_dict[user] = dict()186                    if class_label not in user_feature_pattern_pair_info_list_dict[user]:187                        user_feature_pattern_pair_info_list_dict[user][class_label] = list()188                    # DONE Store fully, unfully of user for the class label189                    user_feature_pattern_pair_info_list_dict[user][class_label].append(pattern_pair_info)190191                if len(fully_entropy_list) == 0:192                    continue193                # print len(fully_entropy_list)194                user_plot_data[user] = (fully_entropy_list, unfully_entropy_list)195196        ### Result Analysis ###197        # 1. Select feature for user as increasing entropy difference.198        # 2. User grouping based on selected features. (3~4 features)199        print "# Users: %d" % (len(user_feature_pattern_pair_info_list_dict.keys()))200        user_category_pair_avg_entropy_diff_dict = dict()201        class_label_user_list_dict = dict() # Plotting distribution features in the top 3 entorpy diff features of a each user.202        for user in user_feature_pattern_pair_info_list_dict:203            class_label_entropy_diff_list = list()204            for class_label in user_feature_pattern_pair_info_list_dict[user]:205                pattern_pair_info_list = user_feature_pattern_pair_info_list_dict[user][class_label]206                entropy_diff_list = [pattern_pair_info[-2] - pattern_pair_info[-1] for pattern_pair_info in pattern_pair_info_list]207                entropy_diff_mean = np.mean(entropy_diff_list)208                class_label_entropy_diff_list.append((user, class_label, entropy_diff_mean))209            class_label_entropy_diff_list = sorted(class_label_entropy_diff_list, key=lambda x: x[2])210            for class_label_entropy_diff in class_label_entropy_diff_list[:3]:211                print "%s\t%s\t%f" % class_label_entropy_diff212                class_label = class_label_entropy_diff[1]213                if class_label not in class_label_user_list_dict:214                    class_label_user_list_dict[class_label] = list()215                class_label_user_list_dict[class_label].append(user)216            # DONE Calculating average entropy of subfeatures in the feature category(App, Volume, Battery).217            class_category_feature_entropy_diff_list_dict = dict()218            for class_label_entropy_diff in class_label_entropy_diff_list:219                class_label = class_label_entropy_diff[1]220                category = GetCategory(class_label)221                if category is not None:222                    if category not in class_category_feature_entropy_diff_list_dict:223                        class_category_feature_entropy_diff_list_dict[category] = list()224                    class_category_feature_entropy_diff_list_dict[category].append(class_label_entropy_diff)225            for category in class_category_feature_entropy_diff_list_dict:226                category_feature_entropy_diff_list = class_category_feature_entropy_diff_list_dict[category]227                # for category_feature_entropy_diff in category_feature_entropy_diff_list:228                #     print "%s\t%s\t%f" % category_feature_entropy_diff229                category_feature_entropy_diff_list = [entropy_info[2] for entropy_info in category_feature_entropy_diff_list]230                category_feature_entropy_diff_mean = np.mean(category_feature_entropy_diff_list) # Mean of entropy diff between pattern pair on subfeatures of the category feature231                print ">> %s\t%s\t%f" % (user, category, category_feature_entropy_diff_mean)232                if user not in user_category_pair_avg_entropy_diff_dict:233                    user_category_pair_avg_entropy_diff_dict[user] = dict()234                user_category_pair_avg_entropy_diff_dict[user][category] = category_feature_entropy_diff_mean235236        ax = plt.subplot(1, 1, 1)237        x_list = class_label_user_list_dict.keys()238        y_list = [len(class_label_user_list_dict[class_label]) for class_label in x_list]239        ind = np.arange(len(x_list))240        width = 0.35241        ax.bar(ind, y_list, width, align='center')242        ax.set_ylabel('# Users')243        ax.set_xlabel('Features of Top 3 in a each user')244        ax.set_xticks(ind)245        ax.set_xticklabels(x_list, rotation=270)246        ax.yaxis.set_major_locator(MaxNLocator(integer=True))247        ax.margins(0.02, 0.0)248249        # plt.show()250        fig = plt.gcf()251        fig.set_size_inches(3, 3)252        fig.tight_layout()253        plt.savefig(output_dir + "/feature_distribution.pdf")254        plt.clf()255256        # Plotting distribution of entropy in the sequential and nonsequential pattern257        user_class_category_pattern_type_feature_pattern_info_list_dict = dict()258        for user in user_feature_pattern_pair_info_list_dict:259            class_label_entropy_diff_list = list()260            for class_label in user_feature_pattern_pair_info_list_dict[user]:261                category = GetCategory(class_label)262                if category is not None:263                    if user not in user_class_category_pattern_type_feature_pattern_info_list_dict:264                        user_class_category_pattern_type_feature_pattern_info_list_dict[user] = dict()265                    if category not in user_class_category_pattern_type_feature_pattern_info_list_dict[user]:266                        user_class_category_pattern_type_feature_pattern_info_list_dict[user][category] = dict()267                        user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"] = dict()268                        user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"] = dict()269                    pattern_pair_info_list = user_feature_pattern_pair_info_list_dict[user][class_label]270                    for pattern_pair_info in pattern_pair_info_list:271                        fully_pattern = pattern_pair_info[4]272                        fully_entropy = pattern_pair_info[-2]273                        unfully_pattern = pattern_pair_info[5]274                        unfully_entropy = pattern_pair_info[-1]275                        if fully_pattern not in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"]:276                            user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern] = dict()277                        if unfully_pattern not in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"]:278                            user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern] = dict()279280                        user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern][class_label] = fully_entropy281                        user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern][class_label] = unfully_entropy282                        # Complete to check with manual result283284285        def GetAllTimesFromTimeKeyList(time_key_list):286            all_time_key_list = set()287            for time_key in time_key_list:288                for time in time_key:289                    all_time_key_list.add(time)290            return all_time_key_list291292        # Availability Instance Counts293        # class_label = 'class'294        # category = 'Availability'295        class_label = 'class'296        category = 'Availability'297        category_class_count_list = dict()298        for user in user_fully_unfully_time_class_cnt:299            if class_label in user_fully_unfully_time_class_cnt[user]:300                fully_class_count_list = list()301                unfully_class_count_list = list()302                for time_key in user_fully_unfully_time_class_cnt[user][class_label]['fully']:303                    time_len = len(time_key)304                    class_cnt = user_fully_unfully_time_class_cnt[user][class_label]['fully'][time_key]305                    class_cnt_mean = class_cnt # / float(time_len)306                    # print class_cnt_mean307                    fully_class_count_list.append(class_cnt_mean)308                for time_key in user_fully_unfully_time_class_cnt[user][class_label]['unfully']:309                    time_len = len(time_key)310                    class_cnt = user_fully_unfully_time_class_cnt[user][class_label]['unfully'][time_key]311                    class_cnt_mean = class_cnt # / float(time_len)312                    # print class_cnt_mean313                    unfully_class_count_list.append(class_cnt_mean)314                fully_class_count_mean = np.mean(fully_class_count_list)315                unfully_class_count_mean = np.mean(unfully_class_count_list)316                all_class_count_mean = (fully_class_count_mean + unfully_class_count_mean)317                # all_class_count_mean = fully_pattern_count318                if category not in category_class_count_list:319                    category_class_count_list[category] = list()320                category_class_count_list[category].append((user, class_label, all_class_count_mean))321322        f = open(output_dir + '/sequence_only_result_class.pickle', 'w')323        pickle.dump(user_class_category_pattern_type_feature_pattern_info_list_dict, f)324        f.close()325        f = open(output_dir + '/sequence_only_category_class_count_list.pickle', 'w')326        pickle.dump(category_class_count_list, f)327        f.close()328329    else:330        user_class_category_pattern_type_feature_pattern_info_list_dict = pickle.load(open(output_dir + '/sequence_only_result_class.pickle'))331        category_class_count_list = pickle.load(open(output_dir + '/sequence_only_category_class_count_list.pickle'))332333    category = 'Availability'334    class_top_k_list = [len(category_class_count_list[category])]335    x_label = ["PAS", "A-Only"]336    ind = [1, 2]337    fig, axarr = plt.subplots(1, 1)338    # category = 'Availability'339    category = 'Availability'340    for class_top_k_idx, class_top_k in enumerate(class_top_k_list):341        user_class_cnt_list = category_class_count_list[category] # user_fully_unfully_time_cnt_dict[category]342        user_class_cnt_list = sorted(user_class_cnt_list, key=lambda x:x[2], reverse=True)343        user_class_cnt_list = user_class_cnt_list[:class_top_k]344345        entropy_diff_list = list()346        overall_fully_entropy_list = list()347        overall_unfully_entropy_list = list()348        for user_idx, user_class_count in enumerate(user_class_cnt_list):349            user = user_class_count[0]350            fully_x_list = list()351            fully_y_list = list()352            unfully_x_list = list()353            unfully_y_list = list()354            fully_pattern_time_list = list()355            unfully_pattern_time_list = list()356            for fully_pattern in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"]:357                fully_left = ast.literal_eval(fully_pattern.split('-')[0])358                fully_timelist = GetTimesFromPattern(fully_left)359                fully_pattern_entropy_list = user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern].values()360                fully_pattern_entropy_mean = np.mean(fully_pattern_entropy_list)361                if fully_timelist not in fully_pattern_time_list:362                    fully_x_list.append(ind[0])363                    fully_y_list.append(fully_pattern_entropy_mean)364                    fully_pattern_time_list.append(fully_timelist)365            for unfully_pattern in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"]:366                unfully_left = tuple([ast.literal_eval(unfully_pattern_item.split('-')[0])[0] for unfully_pattern_item in ast.literal_eval(unfully_pattern)])367                unfully_timelist = GetTimesFromPattern(unfully_left)368                unfully_pattern_entropy_list = user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern].values()369                unfully_pattern_entropy_mean = np.mean(unfully_pattern_entropy_list)370                if unfully_timelist not in unfully_pattern_time_list:371                    unfully_x_list.append(ind[1])372                    unfully_y_list.append(unfully_pattern_entropy_mean)373                    unfully_pattern_time_list.append(unfully_timelist)374                    # print unfully_timelist375            # print fully_y_list376            # print unfully_y_list377            # print "%s\t%d\t%d\t%d\t%d\t%f" % (user, len(fully_pattern_time_list), len(unfully_pattern_time_list), len(fully_pattern_time_list) + len(unfully_pattern_time_list), user_class_count[2], np.median(fully_y_list) - np.median(unfully_y_list))378            entropy_diff_list.append(np.median(fully_y_list) - np.median(unfully_y_list))379            # overall_fully_entropy_list += fully_y_list380            # overall_unfully_entropy_list += unfully_y_list381            overall_fully_entropy_list.append(np.median(fully_y_list))382            overall_unfully_entropy_list.append(np.median(unfully_y_list))383384        overall_ax = axarr#[class_top_k_idx]385        overall_ax.scatter([ind[0]] * len(overall_fully_entropy_list), overall_fully_entropy_list, facecolors='none', edgecolors='c')386        overall_ax.scatter([ind[1]] * len(overall_unfully_entropy_list), overall_unfully_entropy_list, facecolors='none', edgecolors='c')387        bp = overall_ax.boxplot([overall_fully_entropy_list, overall_unfully_entropy_list], widths=(0.3, 0.3))388        print "User Cnt: %d" % len(user_class_cnt_list)389        print "Consistency in PAS: %f" % bp['medians'][0].get_ydata()[0]390        print "Consistency in A-Only: %f" % bp['medians'][1].get_ydata()[0]391        '''392        print "-----"393        print np.mean(overall_fully_entropy_list) - np.mean(overall_unfully_entropy_list)394        print np.median(overall_fully_entropy_list) - np.median(overall_unfully_entropy_list)395        print np.mean(entropy_diff_list)396        print np.median(entropy_diff_list)397        print "-----"398        print np.median(overall_fully_entropy_list)399        print np.median(overall_unfully_entropy_list)400        print bp['medians'][0].get_ydata()[0]401        print bp['medians'][1].get_ydata()[0]402        print bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0] # Median in boxplot403        '''404        if class_top_k_idx == 0:405            overall_ax.set_ylabel('Consistency')406            # overall_ax.set_xlabel('Top %d Users' % (class_top_k))407        # else:408            # overall_ax.set_xlabel('All Users')409        # overall_ax.text(0.8, 0.8, )410        # result_text = "%f" % (bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0])411        # ax.text(0.5, 0.5, result_text, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=15)412        # ax.set_xticks(ind)413        # overall_ax.xaxis.tick_top()414        overall_ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))415        overall_ax.set_xticklabels(x_label)416        overall_ax.margins(0.1, 0.1)417        overall_ax.set_xlabel('1')418        overall_ax.xaxis.label.set_color('white')419        diff_text = "%.3f" % (bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0])420        diff_percen = "%.1f" % ((bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0]) / bp['medians'][1].get_ydata()[0])421422        # overall_ax.text(1.27,0.25, diff_percen, fontsize=7)423        overall_ax.arrow(1.85,0.200,-0.65,0.035)424    fig.set_size_inches(2, 1.5)425    fig.tight_layout()426    # matplotlib.rcParams.update({'font.size': 30})427    plt.savefig(output_dir + "/entropy_distribution.pdf", bbox_inches='tight')428    # plt.show()429430    print len(overall_fully_entropy_list)431    print len(overall_unfully_entropy_list)432    user_class_cnt_list = user_class_cnt_list[:class_top_k]433    print len(user_class_cnt_list)434435    fig, ax = plt.subplots(1)436    ind = np.arange(len(user_class_cnt_list))437438    for x in ind:439        entropy_diff = overall_fully_entropy_list[x] - overall_unfully_entropy_list[x]440        if entropy_diff > 0:441            # ax.plot([x, x], [overall_fully_entropy_list[x], overall_unfully_entropy_list[x]], c='r', linewidth=1.5)442            width = 0.4443            p = patches.Rectangle(444                (x-width/2.0, overall_fully_entropy_list[x]),445                width, overall_unfully_entropy_list[x]-overall_fully_entropy_list[x],446                hatch='////',447                fill=False,448                edgecolor="red"449            )450            ax.add_patch(p)451        else:452            # ax.plot([x, x], [overall_unfully_entropy_list[x], overall_fully_entropy_list[x]], c='b', linewidth=1.5)453            p = patches.Rectangle(454                (x-width/2.0, overall_unfully_entropy_list[x]),455                width, overall_fully_entropy_list[x]-overall_unfully_entropy_list[x],456                fill=False,457                edgecolor="blue"458            )459            ax.add_patch(p)460461    ax.plot(ind, overall_fully_entropy_list, 'ro', c='k', marker='o', markerfacecolor='black', label='1', markersize=5)462    ax.plot(ind, overall_unfully_entropy_list, 'ro', c='k', marker='o', markerfacecolor='None', label='2', markersize=5)463464465    ax.set_xticks(ind)466    ax.set_xticklabels([str(x+1) for x in ind])467    ax.set_xlabel('User Index', labelpad=1)468    ax.set_ylabel('Consistency')469    ax.margins(0.1, 0.1)470    # ax.set_position([0.2,0.2,0.5,0.8])471    lgd = ax.legend(loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.2), fontsize=7, frameon=False)472    fig.set_size_inches(2, 1.5)473    fig.tight_layout()474    # matplotlib.rcParams.update({'font.size': 30})475    plt.savefig(output_dir + "/entropy_distribution_all_users.pdf", bbox_inches='tight')476477    # plt.show()478479480481482483484485486if __name__ == '__main__':487    entropy_result_path = 'entropy_result'488    output_dir = 'output'
...evaluate.py
Source:evaluate.py  
1import numpy as np2''''3evaluates predictions in a multiclass multilabel setting, i.e. we predict all classes at the same time4and each instance can belong to several classes (gold_annotations)5'''6def evaluate_multiclass_one_vs_all(preds_one_hot, true_labels_one_hot, class_labels):7    # for each class, calculate metrics8    precision = dict()9    recall = dict()10    f = dict()11    stats = dict()12    collector = np.zeros(4)13    for i in range(len(class_labels)):14        class_label = class_labels[i]15        precision[class_label], recall[class_label], f[class_label], stats[class_label] = evaluate_binary(binarize_multi_labels(preds_one_hot, i), binarize_multi_labels(true_labels_one_hot, i))16        for j in range(len(stats[class_label])):17            collector[j] += stats[class_label][j]18    p_avg, r_avg, f_avg = micro_average(collector)19    return {'p':precision, 'r': recall, 'f': f, 'p_avg': p_avg, 'r_avg': r_avg, 'f_avg': f_avg,20            'p_macro_avg': np.mean([precision[c] for c in precision.keys()]),21            'r_macro_avg': np.mean([recall[c] for c in recall.keys()]),22            'f_macro_avg': np.mean([f[c] for c in f.keys()])}23''''24evaluates predictions in a multiclass multilabel setting, i.e. we predict all classes at the same time25and each instance can belong to several classes (gold_annotations)26'''27def evaluate_multiclass(preds_one_hot, true_labels_one_hot, class_labels):28    # for each class, calculate metrics29    precision = dict()30    recall = dict()31    f = dict()32    # 0 tp, 1 fp, 2 fn33    collector = np.zeros((len(class_labels), 4))34    for i in range(len(preds_one_hot)):35        gold = true_labels_one_hot[i]36        gold_classes = [idx for idx in range(len(gold)) if gold[idx] == 1]37        pred = preds_one_hot[i]38        predicted_class = np.argmax(pred)39        for gold_class in gold_classes:40            if predicted_class == gold_class:41                # tp for predicted class42                collector[predicted_class, 0] += 1.43            else:44                # fp for predicted class45                collector[predicted_class, 1] += 1.46                # fn for gold class47                collector[gold_class, 2] += 1.48    for c, class_label in enumerate(class_labels):49        # tp /(tp + fp)50        if (collector[c,0] + collector[c,1]) == 0:51            precision[class_label] = 052        else:53            precision[class_label] = collector[c,0]/(collector[c,0] + collector[c,1])54        # tp /(tp + fn)55        if (collector[c,0] + collector[c,2]) == 0:56            recall[class_label] = 057        else:58            recall[class_label] = collector[c, 0] / (collector[c, 0] + collector[c, 2])59        # 2pr / (p+r)60        if (precision[class_label]+recall[class_label]) == 0:61            f[class_label] = 062        else:63            f[class_label] = 2*precision[class_label]*recall[class_label]/(precision[class_label]+recall[class_label])64    p_avg = np.sum(collector[:,0])/(np.sum(collector[:,0]) + np.sum(collector[:,1]))65    r_avg = np.sum(collector[:, 0]) / (np.sum(collector[:, 0]) + np.sum(collector[:, 2]))66    f_avg = 2*p_avg*r_avg /(p_avg + r_avg)67    return {'p': precision, 'r': recall, 'f': f, 'p_avg': p_avg, 'r_avg': r_avg, 'f_avg': f_avg,68            'p_macro_avg': np.mean([precision[c] for c in precision.keys()]),69            'r_macro_avg': np.mean([recall[c] for c in recall.keys()]),70            'f_macro_avg': np.mean([f[c] for c in f.keys()])}71def binarize_multi_labels(one_hot, target_idx):72    if type(one_hot) == list:73        one_hot= np.array(one_hot)74    return one_hot[:,target_idx]75def generate_random_labels(numLabels, one_hot=False):76    frow = list(np.random.randint(2, size=numLabels))77    if one_hot == False:78        return frow79    else:80        srow = [1 - i for i in frow]81        a = np.zeros((len(frow), 2))82        a[:, 0] = frow83        a[:, 1] = srow84        return a85def evaluate_binary(preds, true_labels):86    tp = 087    fp = 088    tn = 089    fn = 090    for i in range(len(preds)):91        pred = preds[i]92        gold = true_labels[i]93        if pred == 1 and gold == 1:94            tp += 195        elif pred == 0 and gold == 0:96            tn += 197        elif pred == 1 and gold == 0:98            fp += 199        elif pred == 0 and gold == 1:100            fn += 1101    if (tp + fp) > 0:102        prec = float(tp) / (tp + fp)103    else:104        prec = 0105    if (tp + fn) > 0:106        rec = float(tp) / (tp + fn)107    else:108        rec = 0109    if (prec + rec) > 0:110        f = (2*prec*rec)/(prec + rec)111    else:112        f = 0113    return prec, rec, f, np.array([tp, tn, fp, fn])114def count(target, l):115    return len([i for i in l if i == target])116def micro_average(collector):117    tp = collector[0]118    fp = collector[2]119    fn = collector[3]120    p = tp/(float(tp) + fp)121    r = tp/(float(tp) + fn)122    f = (2*p*r)/(p+r)...classification_metrics.py
Source:classification_metrics.py  
1#run command as example2#python classification_metrics.py --n_classes 10 --n_examples 1000 --class_label 0 --seed 423import numpy as np4import argparse5parser = argparse.ArgumentParser(description='Calculating Classification Metrics')6parser.add_argument('--n_classes', type=int, help='Number of classes')7parser.add_argument('--n_examples', type=int, help='Number of examples')8parser.add_argument('--class_label', type=int, help='Class Label')9parser.add_argument('--seed', type=int, help='SEED')10args = parser.parse_args()11class classification_metrics():12    def __init__(self, n_classes, n_examples, class_label, seed):13        self.n_classes = n_classes14        self.n_examples = n_examples15        self.class_label = class_label16        self.seed=seed17        18    def make_data(self):19        20        np.random.seed(self.seed)21        22        classes = np.arange(self.n_classes)23        actual_labels = np.random.choice(classes, self.n_examples)24        predicted_labels = np.random.choice(classes, self.n_examples)25        26        return actual_labels, predicted_labels27    28    def calculate_accuracy(self):29        actual_labels, predicted_labels = self.make_data()30        31        return sum(actual_labels==predicted_labels) / len(actual_labels)32    33    def find_outcome(self,x,y,class_label):34        35        if (x==y):36            if (x==class_label):37                return 'TP'38            else:39                return 'TN'40        else:41            if (x==class_label):42                return 'FN'43            else:44                return 'FP'45        46    def find_confusion_matrix(self,class_label):47        actual_labels, predicted_labels = self.make_data()48        outcomes = np.array(list(map(lambda x,y: self.find_outcome(x,y,class_label), actual_labels, predicted_labels)))49        50        tp = sum(outcomes=='TP')51        tn = sum(outcomes=='TN')52        fp = sum(outcomes=='FP')53        fn = sum(outcomes=='FN')54        55        outcome_dict = {'TP':tp, 'TN':tn, 'FP':fp, 'FN':fn}56        return outcome_dict57    58    def calculate_precision(self, class_label):59        outcome_dict = self.find_confusion_matrix(class_label)60        61        return outcome_dict['TP'] / (outcome_dict['TP'] + outcome_dict['FP'])62    63    def calculate_recall(self, class_label):64        outcome_dict = self.find_confusion_matrix(class_label)65        66        return outcome_dict['TP'] / (outcome_dict['TP'] + outcome_dict['FN'])67    68    def calculate_f1_score(self, class_label):69        prec = self.calculate_precision(class_label)70        rec = self.calculate_recall(class_label)71        72        return 2 * prec * rec / (prec + rec)   73    74    def calculate_balanced_accuracy(self):75        76        classes = np.arange(self.n_classes)77        return np.mean(list(map(lambda x: self.calculate_recall(x), classes)))78    79    80if __name__=='__main__':81    metrics = classification_metrics(args.n_classes, args.n_examples, args.class_label, args.seed)82    83    prec = metrics.calculate_precision(args.class_label).round(4) * 10084    rec = metrics.calculate_recall(args.class_label).round(4) * 10085    f1_score = metrics.calculate_f1_score(args.class_label).round(4) * 10086    acc = metrics.calculate_accuracy().round(4) * 10087    balanced_acc = metrics.calculate_balanced_accuracy().round(4) * 10088    89    print('Precision:', prec, '%')90    print('Recall:', rec, '%')91    print('F1 Score:', f1_score, '%')92    print('Accuracy:', acc, '%')...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
