Best Python code snippet using slash
models.py
Source:models.py  
1import pandas as pd2import numpy as np3import math4import code.algorithms.config as config5import random6import statsmodels.api as sm7import statsmodels.formula.api as smf8from scipy.stats import poisson, skellam9logger = config.config_logger(__name__, 10)10random.seed(1111)11class Fixture(object):12    """13    Class for the fixture of a league.14    """15    def __init__(self, fixture, name, local_fixture=True):16        """17        Notes: The are certain headers that the my_fixture arg must have. Careful.18        Args:19            fixture (:obj: `pd.Dataframe`): fixture of a league.20            name (str): name of the fixture.21            local_fixture (bool): True if all local team ids are included in a single22                column named 'localteam_id'. False if locality is determined by a23                column names 'is_home' and team ids are under the column 'team_id'.24        """25        self.local_fixture = local_fixture26        self.fixture = fixture27        self.name = name28    @property29    def fixture(self):30        return self._fixture31    @fixture.setter32    def fixture(self, fixture):33        if self.local_fixture:34            if 'localTeam.data.name' not in list(fixture.columns.values):35                raise ValueError('localTeam.data.name is missing but local_fixture is True')36            else:37                self._fixture = fixture38        else:39            if 'is_home' not in list(fixture.columns.values):40                raise ValueError('is_home is missing but local_fixture is False')41            else:42                self._fixture = fixture43    def __str__(self):44        """45        Returns:46            Print name of the league and dimensions of the fixture DataFrame.47        """48        return 'League: {0} - Shape: {1}'.format(self.name, self.fixture.shape)49    def get_last_match(self):50        return max(pd.to_datetime(self.fixture['time.starting_at.date'], format="%Y-%m-%d"))51    def get_match_years(self):52        return self.fixture['time.starting_at.date'].apply(lambda x: x[:4])53    def get_last_year(self):54        temp_year = max(set(self.get_match_years()))55        last_match = self.get_last_match()56        if last_match.month < 6:57            return int(temp_year)58        else:59            return int(temp_year) + 160    def get_seasons(self):61        """62        Get set with the seasons in the fixture dataset.63        Returns:64            Set with the seasons in the fixture dataset.65        """66        if 'season_id' not in list(self.fixture.columns.values):67            raise ValueError('season_id not in fixture')68        else:69            return set(self.fixture['season_id'])70    def get_team_ids(self):71        if self.local_fixture:72            return set(self.fixture['localteam_id'])73        else:74            return set(self.fixture['team_id'])75    def get_team_names(self):76        if self.local_fixture:77            return set(self.fixture['localTeam.data.name'])78        else:79            return set(self.fixture['Team.data.name'])80    def subset_season(self, season_id):81        """82        Extract matches played in a certain season.83        Args:84            season_id (str): id of the season requested.85        Returns:86            Fixture object with the games played in the season requested.87        """88        df = self.fixture89        local_fixture = self.local_fixture90        output = df.loc[df['season_id'] == season_id]91        name = self.name + ' - season {0}'.format(season_id)92        return Fixture(fixture=output, name=name, local_fixture=local_fixture)93    def get_team_games(self, team_id, home):94        """95        Extract the games of certain team as local, visit or both.96        Args:97            team_id (str): id of the requested team.98            home (int: from 0 to 2): 0 for visit games.99                                     1 for local games.100                                     2 for both.101        Returns:102            Fixture object containing the games of certain team.103        """104        df = self.fixture105        local_fixture = self.local_fixture106        if local_fixture:107            team_name = df.loc[df['localteam_id'] == team_id]['localTeam.data.name'].iloc[0]108            name = self.name + ' - {0}'.format(team_name)109            if home == 0:110                output = df.loc[df['visitorteam_id'] == team_id]111            elif home == 1:112                output = df.loc[df['localteam_id'] == team_id]113            elif home == 2:114                output = df.loc[(df['localteam_id'] == team_id) | (df['visitorteam_id'] == team_id)]115            else:116                raise ValueError('home must be an integer between 0 and 2')117        else:118            if home == 0:119                output = df.loc[(df['team_id'] == team_id) & (df['is_home'] == 0)]120                output = output.append(df.loc[(df['op_team_id'] == team_id) & (df['is_home'] == 1)])121            elif home == 1:122                output = df.loc[(df['team_id'] == team_id) & (df['is_home'] == 1)]123                output = output.append(df.loc[(df['op_team_id'] == team_id) & (df['is_home'] == 0)])124            elif home == 2:125                output = df.loc[(df['team_id'] == team_id) | (df['op_team_id'] == team_id)]126            else:127                raise ValueError('home must be an integer between 0 and 2')128            team_name = df.loc[df['team_id'] == team_id]['Team.data.name'].iloc[0]129            name = self.name + ' - {0}'.format(team_name)130        return Fixture(fixture=output, name=name, local_fixture=local_fixture)131    def get_team_scores(self, team_id):132        """133        Convert fixture database from a local_fixture to a non local_fixture for a certain team.134        Args:135            team_id (str): id of the requested team.136        Returns:137            Fixture object containing the games of certain team in a non local_fixture structure.138        """139        local = self.get_team_games(team_id=team_id, home=1)140        local_fixture = local.fixture.copy()141        local_fixture['is_home'] = 1142        local_fixture = local_fixture.rename(columns={'scores.localteam_score': 'score',143                                                      'scores.visitorteam_score': 'op_score',144                                                      'localteam_id': 'team_id',145                                                      'localTeam.data.name': 'Team.data.name',146                                                      'visitorteam_id': 'op_team_id',147                                                      'visitorTeam.data.name': 'op_Team.data.name'})148        visitor = self.get_team_games(team_id=team_id, home=0)149        visitor_fixture = visitor.fixture.copy()150        visitor_fixture['is_home'] = 0151        visitor_fixture = visitor_fixture.rename(columns={'scores.visitorteam_score': 'score',152                                                          'scores.localteam_score': 'op_score',153                                                          'visitorteam_id': 'team_id',154                                                          'visitorTeam.data.name': 'Team.data.name',155                                                          'localteam_id': 'op_team_id',156                                                          'localTeam.data.name': 'op_Team.data.name'})157        output_fixture = local_fixture158        output_fixture = output_fixture.append(visitor_fixture)159        output_fixture = output_fixture.sort_values('time.starting_at.date')160        output = Fixture(name=team_id, fixture=output_fixture, local_fixture=False)161        return output162    def clean_fixture(self):163        """164        Keep only variables selected. Drop matches that have not been played yet. Drop duplicates.165        Drop missing values.166        Returns:167            Fixture object.168        """169        fixture = self.fixture170        season_dict_inv = invert_dictionary(self.get_seasons_dict())171        if not self.local_fixture:172            fixture = fixture.loc[fixture['is_home'] == 1]173        else:174            try:175                condition = (fixture['season_id'] == season_dict_inv[self.get_last_year()]) & \176                            (np.isnan(fixture['team_id']))177                fixture = fixture.loc[~condition]178            except KeyError:179                logger.warning('There was a KeyError omited - clean_fixture method')180        if self.local_fixture:181            vars_to_keep = self.variables_to_keep()182        else:183            vars_to_keep = self.variables_to_keep()184        fixture = fixture[vars_to_keep].drop_duplicates().dropna()185        self.fixture = fixture186        return self187    def drop_x_games_first_last(self, x):188        """189        Drop the first and last x games of a Fixture.190        Args:191            x (int): number of games to be dropped at the start/end.192        Returns:193            Fixture object.194        """195        if x == 0:196            return self197        my_fixture = self.fixture198        my_fixture = my_fixture.sort_values('time.starting_at.date')199        if len(my_fixture) > 15:200            output = my_fixture.iloc[x:-x]201        else:202            output = my_fixture.iloc[x:]203            logger.warning('Team has less than 15 matches: {0}'.format(self.name))204        name = self.name + ' - {0} dropped'.format(x)205        return Fixture(fixture=output, name=name, local_fixture=self.local_fixture)206    def remove_x_games(self, n):207        """208        Drop the first and last n matches in each season.209        Args:210            n (int): number of games to be dropped at the start/end.211        Returns:212            Fixture object.213        """214        output = pd.DataFrame([])215        original_name = self.name216        logger.info('Main Fixture original size: {0}'.format(self.fixture.shape))217        for season in self.get_seasons():218            temp_season = self.subset_season(season)219            teams = temp_season.get_team_ids()220            logger.info('Season {1} original size: {0}'.format(temp_season.fixture.shape, season))221            for team_id in teams:222                temp_team = temp_season.get_team_games(team_id, home=1)223                temp_clean = temp_team.drop_x_games_first_last(n)224                output = output.append(temp_clean.fixture)225        output = output.sort_values('time.starting_at.date')226        name = '{0} - {1} games dropped'.format(original_name, n)227        return Fixture(output, name=name, local_fixture=self.local_fixture)228    def get_score_rolling_mean(self, window_scored, window_conceded):229        """230        Generate the rolling mean of goals scored and goals recieved. For the former,231        the window is window_scored, for the latter, window_conceded. Afterwards, the232        result vector is shifted one position. This way, we do not include the current233        result (score of the match) into the rolling mean computation.234        Args:235            window_scored (int): size of the window for goals scored.236            window_conceded (int): size of the window for goals conceded.237        Returns:238            Fixture object with rolling mean included.239        """240        original_name = self.name241        team_ids = self.get_team_ids()242        output = pd.DataFrame([])243        for team_id in team_ids:244            team_fixture = self.get_team_scores(team_id=team_id).fixture245            team_fixture['roll_score'] = team_fixture['score'].rolling(246                window=window_scored).sum().shift(1)247            team_fixture['roll_op_score'] = team_fixture['op_score'].rolling(248                window=window_conceded).sum().shift(1)249            output = output.append(team_fixture)250        only_main_team = output[['team_id', 'Team.data.name', 'time.starting_at.date',251                                 'roll_op_score']]252        only_main_team = only_main_team.sort_values(['time.starting_at.date',253                                                     'team_id']).reset_index(drop=True)254        only_main_team = only_main_team['roll_op_score']255        output = output.sort_values(['time.starting_at.date', 'op_team_id']).reset_index(drop=True)256        output['roll_op_score'] = only_main_team257        output = output.sort_values('time.starting_at.date')258        name = original_name + ' - roll sum'259        return Fixture(name=name, fixture=output, local_fixture=False)260    def generate_dataset(self, win_scored=10, win_conceded=2, games_removed=0):261        """262        Generate rolling means of goals scored and recieved. Drop the first263        and last games_removed. Drop missing values.264        Args:265            win_scored (int): size of the window for goals scored.266            win_conceded (int): size of the window for goals conceded.267            games_removed (int): number of games to be removed at the start268                and end of the season.269        Returns:270            Fixture object.271        """272        output = self.get_score_rolling_mean(window_scored=win_scored, window_conceded=win_conceded)273        output = output.remove_x_games(games_removed)274        output.fixture = output.fixture.dropna()275        return output276    def get_team_names_and_ids(self):277        if self.local_fixture:278            names_and_ids = set(zip(self.fixture['localteam_id'], self.fixture['localTeam.data.name']))279        else:280            names_and_ids = set(zip(self.fixture['team_id'], self.fixture['Team.data.name']))281        output = {}282        for team_id, team_name in names_and_ids:283            output[team_name] = team_id284        return output285    def add_champion_dummy(self, champions_df):286        origin_name = self.name287        teams_dict = self.get_team_names_and_ids()288        champions_df = champions_df.replace(teams_dict)289        output = pd.DataFrame([])290        for season in self.get_seasons():291            season_dict = self.get_seasons_dict()292            target_year = season_dict[season]293            champions = self.get_champions_in_period(champions_df, target_year, 4)294            temp_fixture = self.subset_season(season).fixture.copy()295            if self.local_fixture:296                temp_fixture['champion'] = temp_fixture['localteam_id'].apply(297                    lambda x: create_dummy_for_champions(x, champions))298            else:299                temp_fixture['champion'] = temp_fixture['team_id'].apply(300                    lambda x: create_dummy_for_champions(x, champions))301            output = output.append(temp_fixture)302        output = output.sort_values('time.starting_at.date')303        name = origin_name + ' - add champion'304        self.fixture = output305        self.name = name306        return self307    def get_seasons_dict(self):308        my_fixture = self.fixture309        my_fixture['year'] = self.get_match_years().astype('int')310        output = {}311        league_name = self.name.split(' ')[0]312        last_season = self.get_last_season()[league_name]313        for season in self.get_seasons():314            if season == last_season:315                continue316            temp = my_fixture.loc[my_fixture['season_id'] == season, :]317            year_mean = math.ceil(np.mean(temp['year']))318            output.update({season: year_mean})319        output.update({last_season: self.get_last_year()})320        return output321    def train_model(self):322        my_fixture = self.fixture[self.variables_in_model()]323        model = smf.glm(formula='score ~ is_home + roll_score + roll_op_score + champion', data=my_fixture,324                        family=sm.families.Poisson()).fit()325        return model326    def exclude_last_x_seasons(self, n):327        seasons_dict = invert_dictionary(self.get_seasons_dict())328        max_year = max(list(seasons_dict.keys()))329        drop_range = range(max_year+1-n, max_year+1)330        my_fixture = self.fixture.copy()331        keep, drop = my_fixture, pd.DataFrame([])332        for year in drop_range:333            target = seasons_dict[year]334            drop = drop.append(my_fixture.loc[my_fixture['season_id'] == target, :])335            keep = keep.loc[keep['season_id'] != target, :]336        drop = drop_single_matches(drop)337        name_keep = self.name + ' - train'338        name_drop = self.name + ' - test'339        keep = Fixture(name=name_keep, fixture=keep, local_fixture=self.local_fixture)340        drop = Fixture(name=name_drop, fixture=drop, local_fixture=self.local_fixture)341        return keep, drop342    def add_predictions(self, predictions):343        my_fixture = self.fixture.copy()344        my_fixture['expected_score'] = predictions345        name = self.name + ' - predicted'346        return Fixture(fixture=my_fixture, name=name, local_fixture=self.local_fixture)347    def convert_to_matches(self):348        my_fixture = self.fixture.copy()349        my_fixture = my_fixture.sort_values(['time.starting_at.date', 'fixture_id'])350        my_fixture = drop_single_matches(my_fixture)351        output = my_fixture.loc[my_fixture['is_home'] == 1].copy().reset_index(drop=True)352        temp = my_fixture.loc[my_fixture['is_home'] == 0].copy().reset_index(drop=True)353        output['op_expected_score'] = temp['expected_score']354        return Fixture(name='match predictions', fixture=output, local_fixture=self.local_fixture)355    def get_matches_prediction(self, model):356        prediction = model.predict(self.fixture)357        my_fixture = self.add_predictions(prediction)358        output = my_fixture.convert_to_matches()359        output = output.get_match_probabilities()360        output.fixture['winner'] = output.fixture.apply(get_winner, axis=1)361        return output362    def get_match_probabilities(self):363        my_fixture = self.fixture.copy()364        local_score = self.fixture['expected_score']365        visitor_score = self.fixture['op_expected_score']366        local_prob_list, visitor_prob_list, tie_prob_list, winner = [], [], [], []367        for i in range(self.fixture.shape[0]):368            match_prob = simulate_match(local_score.iloc[i], visitor_score.iloc[i], max_goals=10)369            local_prob, tie_prob, visitor_prob = sum_triangle_and_diagonal_from_matrix(match_prob)370            local_prob_list.append(local_prob)371            tie_prob_list.append(tie_prob)372            visitor_prob_list.append(visitor_prob)373            if local_prob == max(local_prob, tie_prob, visitor_prob):374                winner.append(self.fixture['Team.data.name'].iloc[i])375            elif visitor_prob == max(local_prob, tie_prob, visitor_prob):376                winner.append(self.fixture['op_Team.data.name'].iloc[i])377            else:378                winner.append('tie')379        my_fixture['local_prob'] = pd.Series(local_prob_list, index=my_fixture.index)380        my_fixture['tie_prob'] = pd.Series(tie_prob_list, index=my_fixture.index)381        my_fixture['visitor_prob'] = pd.Series(visitor_prob_list, index=my_fixture.index)382        my_fixture['expected_winner'] = pd.Series(winner, index=my_fixture.index)383        return Fixture(fixture=my_fixture, name=self.name, local_fixture=self.local_fixture)384    def clean_results(self):385        self.fixture = self.fixture[self.result_variables()]\386            .sort_values(['time.starting_at.date', 'fixture_id'])387        return388    def get_accuracy(self):389        my_fixture = self.fixture.copy()390        total = my_fixture.shape[0]391        good = np.sum([my_fixture['expected_winner'] == my_fixture['winner']])392        return good/total393    def determine_winner(self):394        output = self395        output.fixture['winner_mod'] = output.fixture.apply(get_winner_mod, axis=1)396        return output397    def variables_to_keep(self):398        if self.local_fixture:399            return ['league_id', 'season_id', 'fixture_id', 'localteam_id', 'visitorteam_id',400                    'time.starting_at.date', 'localTeam.data.name', 'visitorTeam.data.name',401                    'scores.localteam_score', 'scores.visitorteam_score']402        else:403            return ['league_id', 'season_id', 'fixture_id', 'team_id', 'op_team_id', 'is_home',404                    'time.starting_at.date', 'Team.data.name', 'op_Team.data.name', 'score', 'op_score']405    def convert_2match_to_1match(self):406        if self.local_fixture:407            raise ValueError('local_fixture is True, fixture already in 1match format')408        else:409            my_df = self.fixture410            my_df = my_df[self.variables_to_keep()].sort_values('fixture_id')411            output = my_df.loc[my_df['is_home'] == 1]412            output = output.rename(columns={'Team.data.name': 'localTeam.data.name',413                                            'op_Team.data.name': 'visitorTeam.data.name',414                                            'team_id': 'localteam_id',415                                            'op_team_id': 'visitorteam_id',416                                            'score': 'scores.localteam_score',417                                            'op_score': 'scores.visitorteam_score'})418            del output['is_home']419            self.local_fixture = True420            self.fixture = output421            return self422    @staticmethod423    def get_last_season():424        last_season = {'82Bundesliga': 8026,425                       '8Premier_League': 6397,426                       '564La_Liga': 8442,427                       '301Ligue_1': 6405,428                       'MLS': 2014,429                       'la_liga': 2014,430                       'premier': 2014,431                       'comebol': 2,432                       'wc2018': 2019}433        return last_season434    @staticmethod435    def get_champions_in_period(champions_df, year, window):436        output = set()437        for i in range(year-window, year):438            output.update(champions_df[str(i)].tolist())439        return output440    @staticmethod441    def variables_in_model():442        output = ['score', 'roll_score', 'roll_op_score', 'champion', 'is_home']443        return output444    @staticmethod445    def result_variables():446        output = ['fixture_id', 'time.starting_at.date', 'Team.data.name', 'op_Team.data.name', 'expected_score',447                  'op_expected_score', 'local_prob', 'tie_prob', 'visitor_prob', 'expected_winner', 'winner',448                  'score', 'op_score', 'is_home']449        return output450def convert_format_time(my_series):451    return my_series.apply(lambda x: x[:10])452def get_results_frequency(fixture):453    seasons_dict = fixture.get_seasons_dict()454    my_league = fixture.determine_winner().fixture[['season_id', 'winner_mod']]455    my_league = pd.get_dummies(my_league, columns=['winner_mod'])456    my_league['total'] = 1457    output = my_league.groupby('season_id').sum().reset_index().replace({'season_id': seasons_dict})458    output['season_id'] = output['season_id'].astype('int')459    output = output.sort_values('season_id')460    return output461def get_league_dictionary():462    league_dict = {'82Bundesliga': 82,463                   '8Premier_League': 8,464                   '564La_Liga': 564,465                   '301Ligue_1': 301}466    return league_dict467def create_dummy_for_champions(team_id, champion_set):468        if team_id in champion_set:469            return 1470        else:471            return 0472def invert_dictionary(my_dict):473    return {v: k for k, v in my_dict.items()}474def simulate_match(home_goals_avg, away_goals_avg, max_goals=10):475    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for476                 team_avg in [home_goals_avg, away_goals_avg]]477    return np.outer(np.array(team_pred[0]), np.array(team_pred[1]))478def drop_single_matches(df):479    df = df.sort_values('fixture_id')480    i = 0481    output = pd.DataFrame([])482    fixture_id = df['fixture_id']483    while i < df.shape[0]-1:484        if fixture_id.iloc[i] == fixture_id.iloc[i+1]:485            output = output.append(df.iloc[i])486            output = output.append(df.iloc[i+1])487            i += 2488        else:489            i += 1490    return output491def convert_2match_to_1match(my_df):492    my_df = my_df.sort_values('fixture_id')493    local = my_df.loc[my_df['is_home'] == 1]494    visitor = my_df.loc[my_df['is_home'] == 0]495    visitor = visitor.rename(columns={'expected_goals': 'op_expected_goals'})496    del visitor['is_home']497    output = pd.merge(local, visitor, how='inner', on=['fixture_id'])498    return output499def sum_triangle_and_diagonal_from_matrix(my_matrix):500    upper, lower, diagonal = 0, 0, 0501    for i in range(my_matrix.shape[0]):502        for j in range(my_matrix.shape[1]):503            if i > j:504                lower += my_matrix[i, j]505            if i < j:506                upper += my_matrix[i, j]507            if i == j:508                diagonal += my_matrix[i, j]509    return lower, diagonal, upper510def get_winner(row):511    local = row['score']512    visitor = row['op_score']513    if local > visitor:514        return row['Team.data.name']515    elif local < visitor:516        return row['op_Team.data.name']517    else:518        return 'tie'519def get_winner_mod(row):520    local = row['scores.localteam_score']521    visitor = row['scores.visitorteam_score']522    if local > visitor:523        return 'local'524    elif local < visitor:525        return 'visit'526    else:527        return 'tie'528def predict_model(model, test, ignore_cols):529    """ Runs a simple predictor that will predict if we expect a team to530        win.531    """532    x_test = _splice(_coerce(_clone_and_drop(test, ignore_cols)))533    x_test['intercept'] = 1.0534    predicted = model.predict(x_test)535    result = test.copy()536    result['predicted'] = predicted537    return result538def _clone_and_drop(data, drop_cols):539    """ Returns a copy of a dataframe that doesn't have certain columns. """540    clone = data.copy()541    for col in drop_cols:542        if col in clone.columns:543            del clone[col]544    return clone545def _splice(data):546    """ Splice both rows representing a game into a single one. """547    data = data.copy()548    opp = data.copy()549    opp_cols = ['opp_%s' % (col,) for col in opp.columns]550    opp.columns = opp_cols551    opp = opp.apply(_swap_pairwise)552    del opp['opp_is_home']553    return data.join(opp)554def _swap_pairwise(col):555    """ Swap rows pairwise; i.e. swap row 0 and 1, 2 and 3, etc.  """556    col = pd.np.array(col)557    for index in range(0, len(col), 2):558        val = col[index]559        col[index] = col[index + 1]560        col[index+1] = val561    return col562def _coerce_types(vals):563    """ Makes sure all of the values in a list are floats. """564    return [1.0 * val for val in vals]565def _coerce(data):566    """ Coerces a dataframe to all floats, and standardizes the values. """567    return _standardize(data.apply(_coerce_types))568def non_feature_cols():569    return ['league_id', 'season_id', 'matchid', 'time.starting_at.date', 'teamid', 'op_teamid',570            'op_Team.data.name', 'Team.data.name', 'op_team_name', 'score', 'op_score',571            'op_points', 'round_id', 'referee_id', 'formation', 'op_formation', 'points',572            'time.minute']573def train_model(data, ignore_cols):574    """ Trains a logistic regression model over the data. Columns that575        are passed in ignore_cols are considered metadata and not used576        in the model building.577    """578    # Validate the data579    data = prepare_data(data)580    logger.info('Observations used in the model: {0}'.format(len(data)))581    target_col = 'points'582    (train, test) = split(data)583    train = train.loc[data['points'] != 1]584    (y_train, x_train) = _extract_target(train, target_col)585    x_train2 = _splice(_coerce(_clone_and_drop(x_train, ignore_cols)))586    y_train2 = [int(yval) == 3 for yval in y_train]587    logger.info('Training model')588    model = build_model_logistic(y_train2, x_train2, alpha=8.0)589    return model, test590def prepare_data(data):591    """ Drops all matches where we don't have data for both teams. """592    data = data.copy()593    data = _drop_unbalanced_matches(data)594    _check_data(data)595    return data596L1_ALPHA = 16.0597def build_model_logistic(target, data, acc=0.00000001, alpha=L1_ALPHA):598    """ Trains a logistic regresion model. target is the target.599        data is a dataframe of samples for training. The length of600        target must match the number of rows in data.601    """602    data = data.copy()603    data['intercept'] = 1.0604    logit = sm.Logit(target, data, disp=False)605    return logit.fit_regularized(maxiter=1024, alpha=alpha, acc=acc, disp=False)606def _drop_unbalanced_matches(data):607    """  Because we don't have data on both teams during a match, we608         want to drop any match we don't have info about both teams.609         This can happen if we have fewer than 10 previous games from610         a particular team.611    """612    keep = []613    index = 0614    data = data.dropna()615    while index < len(data) - 1:616        skipped = False617        for col in data:618            if isinstance(col, float) and math.isnan(col):619                keep.append(False)620                index += 1621                skipped = True622        if skipped:623            pass624        elif data.iloc[index]['matchid'] == data.iloc[index + 1]['matchid']:625            keep.append(True)626            keep.append(True)627            index += 2628        else:629            keep.append(False)630            index += 1631    while len(keep) < len(data):632        keep.append(False)633    results = data[keep]634    if len(results) % 2 != 0:635        raise Exception('Unexpected results')636    return results637def _check_data(data):638    """ Walks a dataframe and make sure that all is well. """639    i = 0640    if len(data) % 2 != 0:641        raise Exception('Unexpeted length')642    matches = data['matchid']643    teams = data['teamid']644    op_teams = data['op_teamid']645    while i < len(data) - 1:646        if matches.iloc[i] != matches.iloc[i + 1]:647            raise Exception('Match mismatch: %s vs %s ' % (648                            matches.iloc[i], matches.iloc[i + 1]))649        if teams.iloc[i] != op_teams.iloc[i + 1]:650            raise Exception('Team mismatch: match %s team %s vs %s' % (651                            matches.iloc[i], teams.iloc[i],652                            op_teams.iloc[i + 1]))653        if teams.iloc[i + 1] != op_teams.iloc[i]:654            raise Exception('Team mismatch: match %s team %s vs %s' % (655                            matches.iloc[i], teams.iloc[i + 1],656                            op_teams.iloc[i]))657        i += 2658def split(data, test_proportion=0.2):659    """ Splits a dataframe into a training set and a test set.660        Must be careful because back-to-back rows are expeted to661        represent the same game, so they both must go in the662        test set or both in the training set.663    """664    train_vec = []665    if len(data) % 2 != 0:666        raise Exception('Unexpected data length')667    while len(train_vec) < len(data):668        rnd = random.random()669        train_vec.append(rnd > test_proportion)670        train_vec.append(rnd > test_proportion)671    test_vec = [not val for val in train_vec]672    train = data[train_vec]673    test = data[test_vec]674    if len(train) % 2 != 0:675        raise Exception('Unexpected train length')676    if len(test) % 2 != 0:677        raise Exception('Unexpected test length')678    return (train, test)679def _extract_target(data, target_col):680    """ Removes the target column from a data frame, returns the target681        col and a new data frame minus the target. """682    target = data[target_col]683    train_df = data.copy()684    del train_df[target_col]685    return target, train_df686def _standardize_col(col):687    """ Standardizes a single column (subtracts mean and divides by std688        dev).689    """690    std = np.std(col)691    mean = np.mean(col)692    if abs(std) > 0.001:693        return col.apply(lambda val: (val - mean)/std)694    else:695        return col696def _standardize(data):697    """ Standardizes a dataframe. All fields must be numeric. """698    return data.apply(_standardize_col)699def get_expected_winner(row):700    if row['predicted'] > 0.5:701        return row['Team.data.name']702    else:703        return row['op_Team.data.name']704def get_winners(my_df, tie_prob):705    my_df['winner'] = my_df.apply(get_winner, axis=1)706    output = my_df.loc[my_df['is_home'] == 1].copy().reset_index(drop=True)707    temp = my_df.loc[my_df['is_home'] == 0].copy().reset_index(drop=True)708    output['op_predicted'] = temp['predicted']709    my_df = output.copy()710    my_df['expected_winner'] = my_df.apply(get_expected_winner, axis=1)711    my_df = normalize_predictions(my_df, tie_prob)712    my_df = my_df[result_variables()].rename(columns={'matchid': 'fixture_id'})713    my_df = my_df.sort_values(['time.starting_at.date', 'fixture_id'])714    return my_df715def get_accuracy(my_df, prefix):716    total = my_df.shape[0]717    expected_winner = prefix + 'expected_winner'718    good = np.sum([my_df[expected_winner] == my_df['winner']])719    return good / total720def result_variables():721    output = ['matchid', 'time.starting_at.date', 'Team.data.name', 'op_Team.data.name',722              'expected_winner', 'winner', 'predicted', 'op_predicted', 'tie_predicted']723    return output724def normalize_predictions(my_df, tie_prob):725    my_df = my_df.copy()726    my_df['tie_predicted'] = tie_prob727    my_df['a'] = my_df['predicted'] * (1-my_df['tie_predicted']) / (my_df['predicted'] + my_df['op_predicted'])728    my_df['b'] = my_df['op_predicted'] * (1-my_df['tie_predicted']) / (my_df['predicted'] + my_df['op_predicted'])729    my_df['predicted'] = my_df['a']730    my_df['op_predicted'] = my_df['b']731    del my_df['a']732    del my_df['b']733    return my_df734def squared_error(a, b):735    if len(a) != len(b):736        raise ValueError('Squared error fuction: vectors do not have same lenght')737    dif = a-b738    return dif.dot(dif)739def get_squared_error(my_df):740    my_df = my_df.copy()741    gs_local = squared_error(my_df['gs_expected_score'], my_df['score'])742    gs_visit = squared_error(my_df['gs_op_expected_score'], my_df['op_score'])743    google_local = squared_error(my_df['expected_goals'], my_df['score'])744    google_visit = squared_error(my_df['op_expected_goals'], my_df['op_score'])745    gs_output = (gs_local + gs_visit)/my_df.shape[0]746    google_output = (google_local + google_visit)/my_df.shape[0]...simple_fixture.py
Source:simple_fixture.py  
1# example 4 :2# same role as setup and teardown method of unittest.TestCase3import pytest4def test_with_local_fixture(local_fixture): # local_fixture define later this code5    '''6    fixtures can be invoked simply by having a positional arg7    with the same name as fixture8    '''9    print('running test_with_local_fixture')10    assert True11# local fixture : local to module12@pytest.fixture13def local_fixture():14    '''fixtures are callables(functions) decoratred with @fixture'''15    print('doing local fixture setup stuff')16# globla fixture : other module can access17@pytest.fixture(scope='module') # important18def global_fixture():19    '''fixtures are callables(functions) decoratred with @fixture'''20    print('doing global fixture setup stuff')21########################22# in another file , to use global_fixture23# import pytest24# from simple_fixture import global_fixture25# def test_with_global_fixture1(global_fixture): # local_fixture define later this code26#     print('running test_with_global_fixture')27#     assert True...03_simple_fixture_test.py
Source:03_simple_fixture_test.py  
1import pytest2def test_with_local_fixture(local_fixture):3    """4    Fixtures can be invoked simple by having a positional arg5    :param local_fixture:6    :return:7    """8    print("Running test_with_local_fixture...")9    assert True10@pytest.fixture11def local_fixture():12    """13    Fixtures are callables decorated with @fixture14    :return:15    """16    print("\n(Doing local fixture setup stuff!)")17def test_with_global_fixture(global_fixture):18    """"19    Fixtures can also be shared across test files (see confest.py)20    """...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
