Best Python code snippet using slash
train_model.py
Source:train_model.py  
1#!/usr/bin/env python32# -*- encoding: utf-8 -*-3'''4@File    :   train_model.py5@Time    :   2019/12/05 13:49:586@Author  :   Yan Yang7@Contact :   yanyangbupt@gmail.com8@Desc    :   None9'''10#                        .::::.11#                      .::::::::.12#                     :::::::::::13#                  ..:::::::::::'14#               '::::::::::::'15#                 .::::::::::16#            '::::::::::::::..17#                 ..::::::::::::.18#               ``::::::::::::::::19#                ::::``:::::::::'        .:::.20#               ::::'   ':::::'       .::::::::.21#             .::::'      ::::     .:::::::'::::.22#            .:::'       :::::  .:::::::::' ':::::.23#           .::'        :::::.:::::::::'      ':::::.24#          .::'         ::::::::::::::'         ``::::.25#      ...:::           ::::::::::::'              ``::.26#     ```` ':.          ':::::::::'                  ::::..27#                        '.:::::'                    ':'````..28#                     ç¾å¥³ä¿ä½ æ°¸æ BUG29from channel2_v2 import *30import os31from xgboost import XGBClassifier32from catboost import CatBoostClassifier33from lightgbm import LGBMClassifier34from sklearn.ensemble import RandomForestClassifier35add_text_feature_for_train()36create_feature()37models = [38    {   # 0.85926333738039 original best39        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'sm-191125-nosetinfo-extend3-sample11.pkl'),40        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-nosetinfo-extend3-sample11.pkl'),41        'cols': BASE_COLS,42        'score': 0.85926333738039,43        'name': 'sm-191125-nosetinfo-extend3-sample11.pkl',44        'model': [45            [46                CatBoostClassifier(47                    iterations=180, learning_rate=0.1, depth=7, loss_function='Logloss',48                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED49                ),50                CatBoostClassifier(51                    iterations=500, learning_rate=0.1, depth=4, loss_function='Logloss',52                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED53                ),54                XGBClassifier(55                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,56                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED57                ),58                XGBClassifier(59                    max_depth=4, learning_rate=0.05, n_estimators=350, subsample=0.8,60                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED61                ),62                LGBMClassifier(63                    max_depth=7, learning_rate=0.01, n_estimators=800, objective='binary',64                    subsample=0.8, n_jobs=23, num_leaves=82, random_state=RANDOM_SEED65                ),66                LGBMClassifier(67                    max_depth=4, learning_rate=0.01, n_estimators=2000, objective='binary',68                    subsample=0.8, n_jobs=23, num_leaves=12, random_state=RANDOM_SEED69                ),70                RandomForestClassifier(71                    n_estimators=1000, max_depth=35, n_jobs=-1, verbose=0, random_state=RANDOM_SEED72                ),73            ],74            [75                CatBoostClassifier(76                    iterations=150, learning_rate=0.1, depth=2, loss_function='Logloss',77                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED78                ),79            ],80        ],81        'model_param': [82            [83                {'verbose': False},84                {'verbose': False},85                {'verbose': False},86                {'verbose': False},87                {'verbose': False},88                {'verbose': False},89                {},90            ],91            [92                {'verbose': False},93            ],94        ],95    },96    {   # 0.858031834386063 with set info97        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'test-2-sm-191127-withsetinfo-sample11.pkl'),98        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-withsetinfo-sample11.pkl'),99        'cols': BASE_COLS + SET_INFO_COLS,100        'score': 0.858031834386063,101        'name': 'test-2-sm-191127-withsetinfo-sample11.pkl',102        'model': [103            [104                CatBoostClassifier(105                    iterations=400, learning_rate=0.05, depth=7, loss_function='Logloss',106                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED107                ),108                CatBoostClassifier(109                    iterations=1000, learning_rate=0.05, depth=4, loss_function='Logloss',110                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED111                ),112                XGBClassifier(113                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,114                    n_jobs=-1, min_child_weight=4, random_state=RANDOM_SEED115                ),116                XGBClassifier(117                    max_depth=4, learning_rate=0.03, n_estimators=500, subsample=0.8,118                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED119                ),120                LGBMClassifier(121                    max_depth=7, learning_rate=0.01, n_estimators=1000, objective='binary',122                    subsample=0.8, n_jobs=23, num_leaves=35, random_state=RANDOM_SEED123                ),124                LGBMClassifier(125                    max_depth=4, learning_rate=0.01, n_estimators=3500, objective='binary',126                    subsample=0.8, n_jobs=23, num_leaves=5, random_state=RANDOM_SEED127                ),128                RandomForestClassifier(129                    n_estimators=1000, max_depth=35, n_jobs=-1, verbose=0, random_state=RANDOM_SEED130                ),131            ],132            [133                CatBoostClassifier(134                    iterations=800, learning_rate=0.01, depth=3, loss_function='Logloss',135                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED136                ),137            ],138        ],139        'model_param': [140            [141                {'verbose': False},142                {'verbose': False},143                {'verbose': False},144                {'verbose': False},145                {'verbose': False},146                {'verbose': False},147                {},148            ],149            [150                {'verbose': False},151            ],152        ],153    },154    {   # 0.856180351089599 with set info155        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'sm-191127-withsetinfo-11.pkl'),156        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-withsetinfo-11.pkl'),157        'cols': BASE_COLS + SET_INFO_COLS,158        'score': 0.856180351089599,159        'name': 'sm-191127-withsetinfo-11.pkl',160        'model': [161            [162                CatBoostClassifier(163                    iterations=400, learning_rate=0.05, depth=7, loss_function='Logloss',164                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED165                ),166                CatBoostClassifier(167                    iterations=1000, learning_rate=0.05, depth=4, loss_function='Logloss',168                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED169                ),170                XGBClassifier(171                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,172                    n_jobs=-1, min_child_weight=4, random_state=RANDOM_SEED173                ),174                XGBClassifier(175                    max_depth=4, learning_rate=0.03, n_estimators=500, subsample=0.8,176                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED177                ),178                LGBMClassifier(179                    max_depth=7, learning_rate=0.01, n_estimators=1000, objective='binary',180                    subsample=0.8, n_jobs=23, num_leaves=35, random_state=RANDOM_SEED181                ),182                LGBMClassifier(183                    max_depth=4, learning_rate=0.01, n_estimators=3500, objective='binary',184                    subsample=0.8, n_jobs=23, num_leaves=5, random_state=RANDOM_SEED185                ),186                RandomForestClassifier(187                    n_estimators=1000, max_depth=35, n_jobs=-1, verbose=0, random_state=RANDOM_SEED188                ),189            ],190            [191                CatBoostClassifier(192                    iterations=800, learning_rate=0.01, depth=3, loss_function='Logloss',193                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED194                ),195            ],196        ],197        'model_param': [198            [199                {'verbose': False},200                {'verbose': False},201                {'verbose': False},202                {'verbose': False},203                {'verbose': False},204                {'verbose': False},205                {},206            ],207            [208                {'verbose': False},209            ],210        ],211    },212    {   # 0.855763586778158 with set info and title info213        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'sm-191128-withsetinfo-title-11-norf.pkl'),214        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-withsetinfo-title-11.pkl'),215        'cols': BASE_COLS + SET_INFO_COLS + TITLE_COLS,216        'score': 0.855763586778158,217        'name': 'sm-191128-withsetinfo-title-11-norf.pkl',218        'model': [219            [220                CatBoostClassifier(221                    iterations=320, learning_rate=0.05, depth=7, loss_function='Logloss',222                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED223                ),224                CatBoostClassifier(225                    iterations=900, learning_rate=0.05, depth=4, loss_function='Logloss',226                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED227                ),228                XGBClassifier(229                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,230                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED231                ),232                XGBClassifier(233                    max_depth=4, learning_rate=0.03, n_estimators=500, subsample=0.8,234                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED235                ),236                LGBMClassifier(237                    max_depth=7, learning_rate=0.01, n_estimators=1000, objective='binary',238                    subsample=0.8, n_jobs=-1, num_leaves=82, random_state=RANDOM_SEED239                ),240                LGBMClassifier(241                    max_depth=4, learning_rate=0.01, n_estimators=3500, objective='binary',242                    subsample=0.8, n_jobs=-1, num_leaves=5, random_state=RANDOM_SEED243                ),244            ],245            [246                CatBoostClassifier(247                    iterations=1200, learning_rate=0.01, depth=2, loss_function='Logloss',248                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED249                ),250            ],251        ],252        'model_param': [253            [254                {'verbose': False},255                {'verbose': False},256                {'verbose': False},257                {'verbose': False},258                {'verbose': False},259                {'verbose': False},260            ],261            [262                {'verbose': False},263            ],264        ],265    },266    {   # 0.85364791527539267        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'sm-191126-withsetinfo-sample11.pkl'),268        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-withsetinfo-sample11.pkl'),269        'cols': BASE_COLS + SET_INFO_COLS,270        'score': 0.85364791527539,271        'name': 'sm-191126-withsetinfo-sample11.pkl',272        'model': [273            [274                CatBoostClassifier(275                    iterations=180, learning_rate=0.1, depth=7, loss_function='Logloss',276                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED277                ),278                CatBoostClassifier(279                    iterations=500, learning_rate=0.1, depth=4, loss_function='Logloss',280                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED281                ),282                XGBClassifier(283                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,284                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED285                ),286                XGBClassifier(287                    max_depth=4, learning_rate=0.05, n_estimators=350, subsample=0.8,288                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED289                ),290                LGBMClassifier(291                    max_depth=7, learning_rate=0.01, n_estimators=800, objective='binary',292                    subsample=0.8, n_jobs=-1, num_leaves=82, random_state=RANDOM_SEED293                ),294                LGBMClassifier(295                    max_depth=4, learning_rate=0.01, n_estimators=2000, objective='binary',296                    subsample=0.8, n_jobs=-1, num_leaves=12, random_state=RANDOM_SEED297                ),298                RandomForestClassifier(299                    n_estimators=1000, max_depth=35, n_jobs=-1, verbose=0, random_state=RANDOM_SEED300                ),301            ],302            [303                CatBoostClassifier(304                    iterations=150, learning_rate=0.1, depth=2, loss_function='Logloss',305                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED306                ),307            ],308        ],309        'model_param': [310            [311                {'verbose': False},312                {'verbose': False},313                {'verbose': False},314                {'verbose': False},315                {'verbose': False},316                {'verbose': False},317                {},318            ],319            [320                {'verbose': False},321            ],322        ],323    },324    {   # 0.855538436984147325        'model_path': os.path.join(STACK_MODEL_DIR_v2, 'sm-191128-withsetinfo-title-11.pkl'),326        'ss_path': os.path.join(STACK_MODEL_DIR_v2, 'standardscaler-last1year-withsetinfo-title-11.pkl'),327        'cols': BASE_COLS + SET_INFO_COLS + TITLE_COLS,328        'score': 0.855538436984147,329        'name': 'sm-191128-withsetinfo-title-11.pkl',330        'model': [331            [332                CatBoostClassifier(333                    iterations=320, learning_rate=0.05, depth=7, loss_function='Logloss',334                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED335                ),336                CatBoostClassifier(337                    iterations=900, learning_rate=0.05, depth=4, loss_function='Logloss',338                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED339                ),340                XGBClassifier(341                    max_depth=7, learning_rate=0.05, n_estimators=180, subsample=0.8,342                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED343                ),344                XGBClassifier(345                    max_depth=4, learning_rate=0.03, n_estimators=500, subsample=0.8,346                    n_jobs=-1, min_child_weight=6, random_state=RANDOM_SEED347                ),348                LGBMClassifier(349                    max_depth=7, learning_rate=0.01, n_estimators=1000, objective='binary',350                    subsample=0.8, n_jobs=-1, num_leaves=82, random_state=RANDOM_SEED351                ),352                LGBMClassifier(353                    max_depth=4, learning_rate=0.01, n_estimators=3500, objective='binary',354                    subsample=0.8, n_jobs=-1, num_leaves=5, random_state=RANDOM_SEED355                ),356                RandomForestClassifier(357                    n_estimators=1000, max_depth=60, n_jobs=-1, verbose=0, random_state=RANDOM_SEED358                ),359            ],360            [361                CatBoostClassifier(362                    iterations=1200, learning_rate=0.01, depth=2, loss_function='Logloss',363                    eval_metric='Logloss', task_type='GPU', random_seed=RANDOM_SEED364                ),365            ],366        ],367        'model_param': [368            [369                {'verbose': False},370                {'verbose': False},371                {'verbose': False},372                {'verbose': False},373                {'verbose': False},374                {'verbose': False},375                {},376            ],377            [378                {'verbose': False},379            ],380        ],381    },382]383for model_info in models:384    print('--'*50)...network_generators.py
Source:network_generators.py  
1import json2import math3# abc - Abstract Base Class -4from abc import ABC, abstractmethod5from pathlib import Path6import networkx as nx7from relnet.evaluation.file_paths import FilePaths8from relnet.state.graph_state import S2VGraph, random_action_init9from relnet.utils.config_utils import get_logger_instance10class NetworkGenerator(ABC):11    """12    Abstract parent class for graph generators13    """14    # Forces no unconnected elements15    enforce_connected = True16    def __init__(self, store_graphs=False,17                 graph_storage_root=None,18                 logs_file=None,19                 game_type='majority',20                 enforce_connected=True,21                 institution=False,22                 tax=0.1):23        """24        :param store_graphs: Bool - Stores the generated graph instances25        :param graph_storage_root: Path for storing the graphs26        :param logs_file: File for logging graph data27        """28        super().__init__()29        # Path for storing graphs30        self.store_graphs = store_graphs31        if self.store_graphs:32            self.graph_storage_root = graph_storage_root33            self.graph_storage_dir = graph_storage_root / game_type / self.name34            self.graph_storage_dir.mkdir(parents=True, exist_ok=True)35        # Log file36        if logs_file is not None:37            self.logger_instance = get_logger_instance(logs_file)38        else:39            self.logger_instance = None40        # Set the game type41        self.game_type = game_type42        self.enforce_connected = enforce_connected43        self.use_inst = institution44        self.tax = tax45    def generate(self, gen_params, random_seed):46        """47        Generate a single graph48        :param gen_params: Parameters for the graph - number of nodes/edges49        :param random_seed: Random seed for generating the graph - does this make it deterministic50        :return: A graph of desired type51        """52        # Store if required53        if self.store_graphs:54            # Get graphs from stored graphs55            filename = self.get_data_filename(gen_params, random_seed)56            filepath = self.graph_storage_dir / filename57            should_create = True58            # If the file path exists then instance from graphml and get state from S2V - if not then create the graph59            if filepath.exists():60                try:61                    instance = self.read_graphml_with_ordered_int_labels(filepath, random_seed)62                    state = self.post_generate_instance(instance, random_seed)63                    should_create = False64                except Exception:65                    should_create = True66            # If we need to create the graph then create using parameters and get the state67            if should_create:68                instance = self.generate_instance(gen_params, random_seed)69                state = self.post_generate_instance(instance, random_seed)70                # Write the networkX graph to graphml file71                nx.readwrite.write_graphml(instance, filepath.resolve())72                # Draws S2V graph to file73                drawing_filename = self.get_drawing_filename(gen_params, random_seed)74                drawing_path = self.graph_storage_dir / drawing_filename75                state.draw_to_file(drawing_path)76        # If not using a file then generate graph instance and convert to S2V77        else:78            instance = self.generate_instance(gen_params, random_seed)79            state = self.post_generate_instance(instance, random_seed)80        # Return S2V state81        return state82    @staticmethod83    def read_graphml_with_ordered_int_labels(filepath, random_seed):84        """85        Reads the graphml file and generates graph instance86        :param filepath: Path to graphml file87        :return: NetworkX graph88        """89        # Read the file, get the number of nodes and label map - relabel nodes90        instance = nx.readwrite.read_graphml(filepath.resolve())91        num_nodes = len(instance.nodes)92        relabel_map = {str(i): i for i in range(num_nodes)}93        nx.relabel_nodes(instance, relabel_map, copy=False)94        # Initialises the graph, populate it with nodes then edges and return95        G = nx.Graph()96        G.add_nodes_from(sorted(instance.nodes(data=True)))97        G.add_edges_from(instance.edges(data=True))98        G = random_action_init(G)99        return G100    def generate_many(self, gen_params, random_seeds):101        # Returns many graphs using a list of random seeds, one for each graph102        return [self.generate(gen_params, random_seed) for random_seed in random_seeds]103    @abstractmethod104    def generate_instance(self, gen_params, random_seed):105        pass106    @abstractmethod107    def post_generate_instance(self, instance, seed):108        pass109    @staticmethod110    def get_data_filename(gen_params, random_seed):111        # Creates a file name from the parameters used112        n = gen_params['n']113        filename = f"{n}-{random_seed}.graphml"114        return filename115    @staticmethod116    def get_drawing_filename(gen_params, random_seed):117        # Creates a drawing file name from the parameters used118        n = gen_params['n']119        filename = f"{n}-{random_seed}.png"120        return filename121    @staticmethod122    def compute_number_edges(n, edge_percentage):123        # Returns the integer number of edges from the total number of edges available and the edge percentage124        total_possible_edges = (n * (n - 1)) / 2125        return int(math.ceil((total_possible_edges * edge_percentage / 100)))126    @staticmethod127    def compute_edges_changes(m, edge_percentage):128        # Returns the integer number of edges from the total number of edges available and the edge percentage129        return int(math.ceil((m * edge_percentage / 100)))130    @staticmethod131    def construct_network_seeds(num_train_graphs, num_validation_graphs, num_test_graphs):132        # Random seeds for network generation, each unique in ascending order133        train_seeds = list(range(0, num_train_graphs))134        validation_seeds = list(range(num_train_graphs, num_train_graphs + num_validation_graphs))135        offset = num_train_graphs + num_validation_graphs136        test_seeds = list(range(offset, offset + num_test_graphs))137        return train_seeds, validation_seeds, test_seeds138class OrdinaryGraphGenerator(NetworkGenerator, ABC):139    # Extends the NetworkGenerator class to create a S2V object140    def post_generate_instance(self, instance, seed):141        """142        Takes the networkX instance of a graph and converts to a S2V object143        :param instance: NetworkX graph instance144        :return: S2V object for easier manipulation with S2V145        """146        state = S2VGraph(instance,147                         self.game_type,148                         enforce_connected=self.enforce_connected,149                         institution=self.use_inst,150                         tax=self.tax,151                         seed=seed)152        # Builds a list of banned actions for the graph for quick access153        state.populate_banned_actions()154        return state155class GNMNetworkGenerator(OrdinaryGraphGenerator):156    """157    Creates a random network with a set number of nodes and edges158    """159    name = 'erdos_renyi'160    num_tries = 10000161    def generate_instance(self, gen_params, random_seed):162        """163        Creates Graph164        :param gen_params: Parameters for creating the graph in a dict165        :param random_seed: Integer value for random_seed to deterministically create graphs166        :return: NetworkX graph167        """168        # Get edges and vertices from parameters169        number_vertices = gen_params['n']170        p = gen_params['er_p']171        # If we can have disconnections then simply generate the required networks172        if not self.enforce_connected:173            random_graph = nx.generators.random_graphs.fast_gnp_random_graph(number_vertices, p, seed=random_seed)174            return random_action_init(random_graph, random_seed)175        # Otherwise attempt to make graphs with no breaks, abort if break and try again176        else:177            for try_num in range(0, self.num_tries):178                random_graph = \179                    nx.generators.random_graphs.fast_gnp_random_graph(number_vertices,180                                                                      p,181                                                                      seed=(random_seed + (try_num * 1000)))182                if nx.is_connected(random_graph):183                    return random_action_init(random_graph, random_seed)184                else:185                    continue186            raise ValueError("Maximum number of tries exceeded, giving up...")187    @staticmethod188    def get_data_filename(gen_params, random_seed):189        # Creates a file name from the parameters used190        n, m = gen_params['n'], gen_params['er_p']191        filename = f"{n}-{m}-{random_seed}.graphml"192        return filename193    @staticmethod194    def get_drawing_filename(gen_params, random_seed):195        # Creates a drawing file name from the parameters used196        n, m = gen_params['n'], gen_params['er_p']197        filename = f"{n}-{m}-{random_seed}.png"198        return filename199class BANetworkGenerator(OrdinaryGraphGenerator):200    # Name of approach201    name = 'barabasi_albert'202    def generate_instance(self, gen_params, random_seed):203        """204        Creates Barabasi-Albert Graph205        :param gen_params: Parameters for creating the graph in a dict206        :param random_seed: Integer value for random_seed to deterministically create graphs207        :return: NetworkX graph using Barabasi Albert structure208        """209        # Gets parameters and creates the graph210        n, m = gen_params['n'], gen_params['m_ba']211        ba_graph = nx.generators.random_graphs.barabasi_albert_graph(n, m, seed=random_seed)212        # Init the actions and rewards and return213        ba_graph = random_action_init(ba_graph, random_seed)214        return ba_graph215    @staticmethod216    def get_data_filename(gen_params, random_seed):217        # Creates a file name from the parameters used218        n, m = gen_params['n'], gen_params['m_ba']219        filename = f"{n}-{m}-{random_seed}.graphml"220        return filename221    @staticmethod222    def get_drawing_filename(gen_params, random_seed):223        # Creates a drawing file name from the parameters used224        n, m = gen_params['n'], gen_params['m_ba']225        filename = f"{n}-{m}-{random_seed}.png"226        return filename227class WSNetworkGenerator(OrdinaryGraphGenerator):228    # Name of approach229    name = 'watts_strogatz'230    def generate_instance(self, gen_params, random_seed):231        """232        Creates Watts Strogatz Graph233        :param gen_params: Parameters for creating the graph in a dict234        :param random_seed: Integer value for random_seed to deterministically create graphs235        :return: NetworkX graph using  Watts Strogatz structure - captures small-world property found in many social236        and biological networks, which generates networks with high clustering coefficient237        """238        # Gets parameters and creates the graph239        n, k, p = gen_params['n'], gen_params['k_ws'], gen_params['p_ws']240        ws_graph = nx.generators.random_graphs.connected_watts_strogatz_graph(n, k, p, seed=random_seed)241        # Init the actions and rewards and return242        ws_graph = random_action_init(ws_graph, random_seed)243        return ws_graph244    @staticmethod245    def get_data_filename(gen_params, random_seed):246        # Creates a file name from the parameters used247        n, k, p = gen_params['n'], gen_params['k_ws'], gen_params['p_ws']248        filename = f"{n}-{k}-{p}-{random_seed}.graphml"249        return filename250    @staticmethod251    def get_drawing_filename(gen_params, random_seed):252        # Creates a drawing file name from the parameters used253        n, k, p = gen_params['n'], gen_params['k_ws'], gen_params['p_ws']254        filename = f"{n}-{k}-{p}-{random_seed}.png"255        return filename256class KarateClub(OrdinaryGraphGenerator):257    name = 'karate_club'258    def generate_instance(self, gen_params, random_seed):259        g = nx.karate_club_graph()260        g = random_action_init(g, random_seed)261        return g262    @staticmethod263    def get_data_filename(gen_params, random_seed):264        filename = f"karate_club_{random_seed}.graphml"265        return filename266    @staticmethod267    def get_drawing_filename(gen_params, random_seed):268        filename = f"karate_club_{random_seed}.png"269        return filename270class SawMill(OrdinaryGraphGenerator):271    name = 'saw_mill'272    def generate_instance(self, gen_params, random_seed):273        path = Path('/experiment_data/real_world_graphs/raw_data/Sawmill.net')274        g = nx.read_pajek(path)275        g = nx.Graph(g)276        g = nx.convert_node_labels_to_integers(g)277        g = random_action_init(g, random_seed)278        return g279    @staticmethod280    def get_data_filename(gen_params, random_seed):281        filename = f"saw_mill_{random_seed}.graphml"282        return filename283    @staticmethod284    def get_drawing_filename(gen_params, random_seed):285        filename = f"saw_mill_{random_seed}.png"...datasets.py
Source:datasets.py  
1# Copyright 2017 Google, Inc. All Rights Reserved.2#3# Licensed under the Apache License, Version 2.0 (the "License");4# you may not use this file except in compliance with the License.5# You may obtain a copy of the License at6#7#     http://www.apache.org/licenses/LICENSE-2.08#9# Unless required by applicable law or agreed to in writing, software10# distributed under the License is distributed on an "AS IS" BASIS,11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12# See the License for the specific language governing permissions and13# limitations under the License.14# ==============================================================================15"""Functions to generate or load datasets for supervised learning."""16from __future__ import absolute_import17from __future__ import division18from __future__ import print_function19from collections import namedtuple20import numpy as np21from sklearn.datasets import make_classification22MAX_SEED = 429496729523class Dataset(namedtuple("Dataset", "data labels")):24  """Helper class for managing a supervised learning dataset.25  Args:26    data: an array of type float32 with N samples, each of which is the set27      of features for that sample. (Shape (N, D_i), where N is the number of28      samples and D_i is the number of features for that sample.)29    labels: an array of type int32 or int64 with N elements, indicating the30      class label for the corresponding set of features in data.31  """32  # Since this is an immutable object, we don't need to reserve slots.33  __slots__ = ()34  @property35  def size(self):36    """Dataset size (number of samples)."""37    return len(self.data)38  def batch_indices(self, num_batches, batch_size):39    """Creates indices of shuffled minibatches.40    Args:41      num_batches: the number of batches to generate42      batch_size: the size of each batch43    Returns:44      batch_indices: a list of minibatch indices, arranged so that the dataset45          is randomly shuffled.46    Raises:47      ValueError: if the data and labels have different lengths48    """49    if len(self.data) != len(self.labels):50      raise ValueError("Labels and data must have the same number of samples.")51    batch_indices = []52    # Follows logic in mnist.py to ensure we cover the entire dataset.53    index_in_epoch = 054    dataset_size = len(self.data)55    dataset_indices = np.arange(dataset_size)56    np.random.shuffle(dataset_indices)57    for _ in range(num_batches):58      start = index_in_epoch59      index_in_epoch += batch_size60      if index_in_epoch > dataset_size:61        # Finished epoch, reshuffle.62        np.random.shuffle(dataset_indices)63        # Start next epoch.64        start = 065        index_in_epoch = batch_size66      end = index_in_epoch67      batch_indices.append(dataset_indices[start:end].tolist())68    return batch_indices69def noisy_parity_class(n_samples,70                       n_classes=2,71                       n_context_ids=5,72                       noise_prob=0.25,73                       random_seed=None):74  """Returns a randomly generated sparse-to-sparse dataset.75  The label is a parity class of a set of context classes.76  Args:77    n_samples: number of samples (data points)78    n_classes: number of class labels (default: 2)79    n_context_ids: how many classes to take the parity of (default: 5).80    noise_prob: how often to corrupt the label (default: 0.25)81    random_seed: seed used for drawing the random data (default: None)82  Returns:83    dataset: A Dataset namedtuple containing the generated data and labels84  """85  np.random.seed(random_seed)86  x = np.random.randint(0, n_classes, [n_samples, n_context_ids])87  noise = np.random.binomial(1, noise_prob, [n_samples])88  y = (np.sum(x, 1) + noise) % n_classes89  return Dataset(x.astype("float32"), y.astype("int32"))90def random(n_features, n_samples, n_classes=2, sep=1.0, random_seed=None):91  """Returns a randomly generated classification dataset.92  Args:93    n_features: number of features (dependent variables)94    n_samples: number of samples (data points)95    n_classes: number of class labels (default: 2)96    sep: separation of the two classes, a higher value corresponds to97      an easier classification problem (default: 1.0)98    random_seed: seed used for drawing the random data (default: None)99  Returns:100    dataset: A Dataset namedtuple containing the generated data and labels101  """102  # Generate the problem data.103  x, y = make_classification(n_samples=n_samples,104                             n_features=n_features,105                             n_informative=n_features,106                             n_redundant=0,107                             n_classes=n_classes,108                             class_sep=sep,109                             random_state=random_seed)110  return Dataset(x.astype("float32"), y.astype("int32"))111def random_binary(n_features, n_samples, random_seed=None):112  """Returns a randomly generated dataset of binary values.113  Args:114    n_features: number of features (dependent variables)115    n_samples: number of samples (data points)116    random_seed: seed used for drawing the random data (default: None)117  Returns:118    dataset: A Dataset namedtuple containing the generated data and labels119  """120  random_seed = (np.random.randint(MAX_SEED) if random_seed is None121                 else random_seed)122  np.random.seed(random_seed)123  x = np.random.randint(2, size=(n_samples, n_features))124  y = np.zeros((n_samples, 1))125  return Dataset(x.astype("float32"), y.astype("int32"))126def random_symmetric(n_features, n_samples, random_seed=None):127  """Returns a randomly generated dataset of values and their negatives.128  Args:129    n_features: number of features (dependent variables)130    n_samples: number of samples (data points)131    random_seed: seed used for drawing the random data (default: None)132  Returns:133    dataset: A Dataset namedtuple containing the generated data and labels134  """135  random_seed = (np.random.randint(MAX_SEED) if random_seed is None136                 else random_seed)137  np.random.seed(random_seed)138  x1 = np.random.normal(size=(int(n_samples/2), n_features))139  x = np.concatenate((x1, -x1), axis=0)140  y = np.zeros((n_samples, 1))141  return Dataset(x.astype("float32"), y.astype("int32"))142def random_mlp(n_features, n_samples, random_seed=None, n_layers=6, width=20):143  """Returns a generated output of an MLP with random weights.144  Args:145    n_features: number of features (dependent variables)146    n_samples: number of samples (data points)147    random_seed: seed used for drawing the random data (default: None)148    n_layers: number of layers in random MLP149    width: width of the layers in random MLP150  Returns:151    dataset: A Dataset namedtuple containing the generated data and labels152  """153  random_seed = (np.random.randint(MAX_SEED) if random_seed is None154                 else random_seed)155  np.random.seed(random_seed)156  x = np.random.normal(size=(n_samples, n_features))157  y = x158  n_in = n_features159  scale_factor = np.sqrt(2.) / np.sqrt(n_features)160  for _ in range(n_layers):161    weights = np.random.normal(size=(n_in, width)) * scale_factor162    y = np.dot(y, weights).clip(min=0)163    n_in = width164  y = y[:, 0]165  y[y > 0] = 1166  return Dataset(x.astype("float32"), y.astype("int32"))167EMPTY_DATASET = Dataset(np.array([], dtype="float32"),...main.py
Source:main.py  
1import numpy as np2import random3import util4import clustering5import dimred6import cluster_NN7import DR_NN8import dr_cluster9import copy10import vis11def run_ul_algos(filename, result_col, debug=False, numFolds=10, njobs=-1, scalar=1, make_graphs=False, nolegend=False,12                 verbose=False, random_seed=1, rNN=False, pNN={}):13    np.random.seed(random_seed)14    random.seed(random_seed)15    X_train, X_test, y_train, y_test = util.data_load(filename, result_col, debug, scalar, make_graphs, random_seed)16    vis.gen_vis(X_train, X_test, random_seed, filename[:-4])17    clustering.ul_Kmeans(X_train, y_train, random_seed, filename[:-4], result_col, verbose)18    clustering.ul_EM(X_train, y_train, random_seed, filename[:-4], result_col, verbose)19    dimred.ulPCA(X_train, y_train, random_seed, filename[:-4], verbose)20    dimred.ulICA(X_train, y_train, random_seed, filename[:-4], verbose)21    dimred.randProj(X_train, y_train, random_seed, filename[:-4], verbose)22    dimred.ul_LLE(X_train, y_train, random_seed, filename[:-4], verbose)23    new_Xtrain = copy.deepcopy(X_train)24    new_ytrain = copy.deepcopy(y_train)25    dr_cluster.pca_clust(new_Xtrain, new_ytrain, random_seed, filename[:-4], result_col, verbose)26    new_Xtrain = copy.deepcopy(X_train)27    new_ytrain = copy.deepcopy(y_train)28    dr_cluster.ica_clust(new_Xtrain, new_ytrain, random_seed, filename[:-4], result_col, verbose)29    new_Xtrain = copy.deepcopy(X_train)30    new_ytrain = copy.deepcopy(y_train)31    dr_cluster.rp_clust(new_Xtrain, new_ytrain, random_seed, filename[:-4], result_col, verbose)32    new_Xtrain = copy.deepcopy(X_train)33    new_ytrain = copy.deepcopy(y_train)34    dr_cluster.lle_clust(new_Xtrain, new_ytrain, random_seed, filename[:-4], result_col, verbose)35    # Run NNs for Dim Reduction36    if rNN:37        for n in range(1, 21):38            print('PCA', n)39            new_Xtrain = copy.deepcopy(X_train)40            new_Xtest = copy.deepcopy(X_test)41            new_ytrain = copy.deepcopy(y_train)42            new_ytest = copy.deepcopy(y_test)43            DR_NN.train_NN_PCA(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,44                               random_seed=random_seed, scalar=scalar,45                               njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,46                               pNN=pNN, num_dim=n)47            print('ICA', n)48            new_Xtrain = copy.deepcopy(X_train)49            new_Xtest = copy.deepcopy(X_test)50            new_ytrain = copy.deepcopy(y_train)51            new_ytest = copy.deepcopy(y_test)52            DR_NN.train_NN_ICA(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,53                               random_seed=random_seed, scalar=scalar,54                               njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,55                               pNN=pNN, num_dim=n)56            print('RP', n)57            new_Xtrain = copy.deepcopy(X_train)58            new_Xtest = copy.deepcopy(X_test)59            new_ytrain = copy.deepcopy(y_train)60            new_ytest = copy.deepcopy(y_test)61            DR_NN.train_NN_RP(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,62                              random_seed=random_seed, scalar=scalar,63                              njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,64                              pNN=pNN, num_dim=n)65        for n in range(1, 21):66            print('LLE', n)67            new_Xtrain = copy.deepcopy(X_train)68            new_Xtest = copy.deepcopy(X_test)69            new_ytrain = copy.deepcopy(y_train)70            new_ytest = copy.deepcopy(y_test)71            DR_NN.train_NN_LLE(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,72                               random_seed=random_seed, scalar=scalar,73                               njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,74                               pNN=pNN, num_dim=n)75    # Run NN for clustering76    if rNN:77        for n in [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]:78            print('kmeans', n)79            new_Xtrain = copy.deepcopy(X_train)80            new_Xtest = copy.deepcopy(X_test)81            new_ytrain = copy.deepcopy(y_train)82            new_ytest = copy.deepcopy(y_test)83            cluster_NN.train_kmeansNN(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,84                                      random_seed=random_seed, scalar=scalar, debug=verbose,85                                      njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,86                                      pNN=pNN, num_clusts=n)87        print('diag')88        # cov_types = ['diag', 'tied', 'full', 'spherical']89        # for cov in cov_types:90        for n in [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]:91            print('diag', n)92            new_Xtrain = copy.deepcopy(X_train)93            new_Xtest = copy.deepcopy(X_test)94            new_ytrain = copy.deepcopy(y_train)95            new_ytest = copy.deepcopy(y_test)96            cluster_NN.train_EM_NN(filename[:-4], new_Xtrain, new_Xtest, new_ytrain, new_ytest,97                                   random_seed=random_seed, scalar=scalar,98                                   njobs=njobs, numFolds=numFolds, make_graphs=make_graphs, nolegend=nolegend,99                                   pNN=pNN, num_clusts=n, cov_type='diag')100def main():101    if 1 == 1:102        run_ul_algos(filename='Mobile_Prices.csv',103                     result_col='price_range',104                     scalar=0,105                     random_seed=1,106                     make_graphs=False,107                     verbose=True,108                     rNN=True,109                     pNN={'hidden_layer_sizes': (512, 512, 512, 512),110                          'activation'        : 'relu',111                          'solver'            : 'adam',112                          'alpha'             : 0.1,113                          'learning_rate_init': 0.01,114                          'max_iter'          : 10000,115                          'warm_start'        : True,116                          'early_stopping'    : True,117                          'random_state'      : 1},118                     )119    if 1 == 1:120        run_ul_algos(filename='Chess.csv',121                     result_col='OpDepth',122                     scalar=2,123                     random_seed=1,124                     make_graphs=False,125                     verbose=True)126if __name__ == "__main__":...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
