How to use is_not_too_large method in hypothesis

Best Python code snippet using hypothesis

Genetic Algorithm Population Size.py

Source:Genetic Algorithm Population Size.py

1import numpy as np2import pandas as pd3class DataSet:4    def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):5        """Loads a dataset and divides its contents into variables """6        self.Asset_File = Asset_File7        self.N = N # Total number of assets in a dataset8        self.K = K # Total number of assets in a solution9        self.epsilon = epsilon # Min investment10        self.delta = delta # Max investment11        self.number_of_stocks = 012        self.returns_deviations = []13        self.correlations = []14        self.covariance = np.nan15        temp_li_1 = []16        temp_li_2 = []17        # Splitting rows based on what they contain18        with open('Datasets/{}'.format(Asset_File), newline='') as datafile:19            for row in datafile:20                if len(row.split()) == 1: # if row is len of 1 it will be number of assets21                    for x in row.split(' '):22                        if x == '':23                            continue24                        self.number_of_stocks = (int(x))25                elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation26                    for x in row.split(' '):27                        if x == '':28                            continue29                        self.returns_deviations.append(float(x))30                elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets31                    for x in row.split(' '):32                        if x == '':33                            continue34                        self.correlations.append(float(x))35                # Variable for storing standard deviations of returns36            for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):37                temp_li_1.append([i, z])38            self.returns_deviations = temp_li_139            # Variable for storing correlations between asset40            zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))41            for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):42                temp_li_2.append([x, y, z])43                zeros[int(x)-1][int(y)-1] = z44            self.correlations = temp_li_245            # Creates a matrix of  returns and deviations46            self.returns_deviations=np.array(self.returns_deviations)47            # Splitting the data into variables needed for calculation48            self.deviations = self.returns_deviations[:, 1]49            self.mu = self.returns_deviations[:, 0]50            self.covariance = zeros*self.deviations*self.deviations.reshape((self.deviations.shape[0],1))51            self.sigma = self.covariance+ self.covariance.T - np.diag(self.covariance.diagonal()) # Fills in the second part of the covariance matrix52            # Making sure constraints on minimum and maximum investments are met53            if K * epsilon > 1.0:54                print("Minimum investment is too large")55                raise ValueError56            if K * delta < 1.0:57                print("Maximum investment is too small")58                raise ValueError59            self.F = 1.0 - K * epsilon60class Population():61    def __init__(self, size):62        """Population of solutions"""63        self.Population_size = size #Changed through iterations64        self.population_weights = [] # Weights of the population65        self.population_assets = [] # A list containing assets of individuals in the population66        self.fitness = []67        self.population_proportions = []68        self.best_fitness = 0 # Best f69        self.best_proportions = 0 # Best proportions by which each asset is in an individual70        self.best_weights = 071        self.best_assets = 072        self.best_covariance = 073        self.best_return = 074        self.Obj1 = []75        self.Obj2 = []76    def check_valid_solution(self, weights, proportions, assets, data):77        """Checks whether a solution is valid given constraints"""78            # Checking whether correct number of solutions has been picked79        if np.sum(weights >= data.epsilon) != K:80            raise ValueError("More than " + str(K) + " assets selected (" + str(np.sum(weights > 0.0)) + ") in solution: " + str(weights))81            # Checking whether number and size of proportions is correct82        if np.any(proportions > 1) or np.any(proportions < 0) or len(proportions) != K:83            raise ValueError("The values of proportions are not valid: " + str(proportions))84            # Checking whether proportions sum up to 185        elif not np.isclose(weights.sum(), 1):86            raise ValueError("Proportions don't sum up to 1 (" + str(weights.sum()) + ") in solution: " + str(weights))87            # Checking whether maximum investment amount has not been exceeded88        elif np.any(weights > data.delta):89            raise ValueError("There's at least one proportion larger than delta: " + str(weights))90            # Checking for duplicate assets in a solution91        elif len(np.unique(assets)) != len(assets):92            raise ValueError("Duplicated assets in the portfolio: " + str(assets))93    def create_Population(Population, Lambda, l, data):94        """Initializes random population of solutions"""95        for i in range(Population.Population_size):96            #Initializing individuals in the popuplation97            R = np.random.permutation(N)[:K]98            # Random weights of the 10 assets99            s = np.random.rand(K)100            # Initializes weights101            w = np.zeros(N)102            # Initialized to make sure that the weights sum to 1103            L = s.sum()104            # Making sure that the random weights sum up to 1 given min investment105            w_temp = data.epsilon + s * data.F / L106            # Making sure the highest investment is met107            is_too_large = (w_temp > data.delta)108            # If an investment would be too large the loop would stop109            while is_too_large.sum() > 0:110                # Reversing logic111                is_not_too_large = np.logical_not(is_too_large)112                # Sum of weights113                L = s[is_not_too_large].sum()114                # Calculates temporary F value115                F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())116                # Adding minimal investment and making sure the actual weights sum to 1 given min investment117                w_temp = data.epsilon + s * F_temp / L118                # Implementing Max investment amount119                w_temp[is_too_large] = data.delta120                # Checking for invesments that are too large121                is_too_large = (w_temp > data.delta)122            w[:] = 0123            w[R] = w_temp # Actual weights124            s = w_temp - data.epsilon # Investment proportions125            # Checking whether our solution is valid126            Population.check_valid_solution(w, s, R, data)127            # Adding valid solution to our population128            Population.population_proportions.append(s)129            Population.population_weights.append(w)130            Population.population_assets.append(R.tolist())131            # Calculating fitness of the population132        for i in Population.population_weights:133            obj1 = np.sum((i * i.reshape((i.shape[0], 1))) * data.sigma)134            obj2 = np.sum(i * data.mu)135            f = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2136            Population.fitness.append(f)137            Population.Obj1.append(obj1) # Covariance138            Population.Obj2.append(obj2) # Expected return139    def Genetic_Algorithm(Population, Lambda, l, data):140            """Applies the logic of genetic algorithm to the whole population"""141            if Population.Population_size == 1: # If population has only 4 individual we cannot select 4 at random142                picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()*4143            else:144                # Selecting 4 different individuals from the population145                picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist() # Selecting 4 non-reccuring individuals from the population146            # Initializing child of the selected individuals147            child_assets = []148            child_proportions = []149            child_weights = np.zeros(N)150            l = 0151            #Pool_1152            pair_1_assets = [Population.population_assets[picked_individuals[0]], Population.population_assets[picked_individuals[1]]]153            pair_1_fitness = [Population.fitness[picked_individuals[0]], Population.fitness[picked_individuals[1]]]154            pair_1_proportions = [Population.population_proportions[picked_individuals[0]], Population.population_proportions[picked_individuals[1]]]155            # Pool_2156            pair_2_assets = [Population.population_assets[picked_individuals[2]], Population.population_assets[picked_individuals[3]]]157            pair_2_fitness = [Population.fitness[picked_individuals[2]], Population.fitness[picked_individuals[3]]]158            pair_2_proportions = [Population.population_proportions[picked_individuals[2]], Population.population_proportions[picked_individuals[3]]]159            # Selecting parents for the uniform crossover160            parent_1_assets = pair_1_assets[pair_1_fitness.index(min(pair_1_fitness))]161            parent_1_proportions = pair_1_proportions[pair_1_fitness.index(min(pair_1_fitness))]162            parent_2_assets = pair_2_assets[pair_2_fitness.index(min(pair_2_fitness))]163            parent_2_proportions = pair_2_proportions[pair_2_fitness.index(min(pair_2_fitness))]164            # Looking for same assets in parents and inputting them into child165            common_assets = []166            for i in parent_1_assets:167                if i in parent_2_assets:168                    common_assets.append(i)169            child_assets += common_assets170            # Finding out what are the indexes of those assets in parents171            indexes_1 = []172            indexes_2 = []173            for i in common_assets:174                indexes_1.append(parent_1_assets.index(i))175                indexes_2.append(parent_2_assets.index(i))176            # Adding the proportions of same assets to child with 50% chance177            for m, h in zip(indexes_1, indexes_2):178                rand_1 = np.random.rand()179                if rand_1 > 0.5:180                    child_proportions.append(parent_1_proportions[m])181                else:182                    child_proportions.append(parent_2_proportions[h])183            # Creating new lists with assets that each parent don't have in common184            temp_parent_1_assets = []185            temp_parent_2_assets = []186            for m, h in zip(parent_1_assets, parent_2_assets):187                temp_parent_1_assets.append(m)188                temp_parent_2_assets.append(h)189            for i in common_assets:190                if i in temp_parent_1_assets:191                    temp_parent_1_assets.remove(i)192            for i in common_assets:193                if i in temp_parent_2_assets:194                    temp_parent_2_assets.remove(i)195            # Adding other assets and their corresponding proportions to the child196            for m, h in zip(temp_parent_1_assets, temp_parent_2_assets):197                rand_2 = np.random.rand()198                if rand_2 > 0.5:199                    child_assets.append(m)200                    child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])201                else:202                    child_assets.append(h)203                    child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])204            # Creating A*205            # A* is a set of assets that are in the parents, but are not in the child (together with their associated values)206            parent_minus_child_assets = []207            parent_minus_child_proportions = []208            for m, h in zip(parent_1_assets, parent_2_assets):209                if m not in child_assets:210                    parent_minus_child_assets.append(m)211                    parent_minus_child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])212                if h not in child_assets:213                    parent_minus_child_assets.append(h)214                    parent_minus_child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])215            # Assets that can be potentially added to the child in case parent_minus_child assets (A*) are empty216            other_assets = np.random.permutation(N).tolist()217            for i in other_assets:218                if i in child_assets:219                    other_assets.remove(i)220            # Mutation221            mutated_asset = np.random.choice(child_proportions)222            rand_3 = np.random.rand()223            if rand_3 > 0.5:224                child_proportions[child_proportions.index(mutated_asset)] = (0.9 * (data.epsilon + mutated_asset) - data.epsilon)  # m=1225            else:226                child_proportions[child_proportions.index(mutated_asset)] = (1.1 * (data.epsilon + mutated_asset) - data.epsilon)  # m=2227            mutated_child_proportions = child_proportions228            # Making sure the child does not have two identical assets229            for i in child_assets:230                if child_assets.count(i) > 1:231                    mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])232                    child_assets.remove(i)233            # Making sure all child proportion are between 0 and 1 (if not they get excluded)234            for i in mutated_child_proportions:235                if i < 0 or i > 1:236                    child_assets.remove(child_assets[mutated_child_proportions.index(i)])237                    mutated_child_proportions.remove(i)238            # Ensure that child has exactly 10 assets and proportions239            while len(child_assets) > data.K and len(mutated_child_proportions) > data.K:240                child_assets.remove(child_assets.index(min(mutated_child_proportions)))241                mutated_child_proportions.remove(min(mutated_child_proportions))242                # Add assets from A* to child243            while len(child_assets) < data.K and len(mutated_child_proportions) < data.K:244                if len(parent_minus_child_assets) != 0:245                    rand_4 = np.random.choice(parent_minus_child_assets)246                    child_assets.append(rand_4)247                    mutated_child_proportions.append(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])248                    parent_minus_child_proportions.remove(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])249                    parent_minus_child_assets.remove(rand_4)250                    for i in mutated_child_proportions:251                        if i < 0 or i > 1:252                            child_assets.remove(child_assets[mutated_child_proportions.index(i)])253                            mutated_child_proportions.remove(i)254                    for i in child_assets:255                        if child_assets.count(i) > 1:256                            mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])257                            child_assets.remove(i)258                else: #In case A* is empty259                    rand_5=np.random.choice(other_assets)260                    child_assets.append(rand_5)261                    other_assets.remove(rand_5)262                    mutated_child_proportions.append(0)263                    for i in mutated_child_proportions:264                        if i < 0 or i > 1:265                            child_assets.remove(child_assets[mutated_child_proportions.index(i)])266                            mutated_child_proportions.remove(i)267                    for i in child_assets:268                        if child_assets.count(i) > 1:269                            mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])270                            child_assets.remove(i)271            # Given large amount of iterations and randomness all child proportions could be 0 hence set 1 at random to 0.01272            # Does not influence the overall result as it ist immediately replaced by a stronger individual273            if sum(mutated_child_proportions) == 0:274                mutated_child_proportions[mutated_child_proportions.index(np.random.choice(mutated_child_proportions))]=0.01275            # Evaluating child276            mutated_child_proportions = np.array(mutated_child_proportions)277            L = mutated_child_proportions.sum()278            w_temp = data.epsilon + mutated_child_proportions * data.F / L279            is_too_large = (w_temp > data.delta)280            while is_too_large.sum() > 0:281                is_not_too_large = np.logical_not(is_too_large)282                L = mutated_child_proportions[is_not_too_large].sum()283                F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())284                w_temp = data.epsilon + mutated_child_proportions * F_temp / L285                w_temp[is_too_large] = data.delta286                is_too_large = (w_temp > data.delta)287            # Assigning weights to child288            child_weights[:] = 0289            child_weights[child_assets] = w_temp290            mutated_child_proportions = w_temp - data.epsilon291            # Calculating child fitness292            obj1 = np.sum((child_weights * child_weights.reshape((child_weights.shape[0], 1))) * data.sigma)293            obj2 = np.sum(child_weights * data.mu)294            child_fitness = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2295            # Checking whether child is valid296            Population.check_valid_solution(child_weights, mutated_child_proportions, child_assets, data)297            # Substituting child into the population and removing the weakest member298            index_worst_member = np.argmax(Population.fitness)299            Population.fitness[index_worst_member] = child_fitness300            Population.population_proportions[index_worst_member] = mutated_child_proportions301            Population.population_weights[index_worst_member] = child_weights302            Population.population_assets[index_worst_member] = child_assets303            Population.Obj1[index_worst_member] = obj1304            Population.Obj2[index_worst_member] = obj2305            # Finding the best member of the population306            index_best_member = np.argmin(Population.fitness)307            Population.best_fitness = Population.fitness[index_best_member]308            Population.best_proportions = Population.population_proportions[index_best_member]309            Population.best_weights = Population.population_weights[index_best_member]310            Population.best_assets = Population.population_assets[index_best_member]311            Population.best_covariance = Population.Obj1[index_best_member]312            Population.best_return = Population.Obj2[index_best_member]313            return Population.best_fitness, Population.best_proportions, Population.best_assets, Population.best_weights, Population.best_covariance, Population.best_return314# Iterating through data files315stock_lengths = [31,85,89,98,225]316asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']317for n, file in zip(stock_lengths, asset_files):318    # Population sizes used in iteration319    population_sizes = [1, 10, 50, 100, 200, 500]320    # Calculating the values for different population sizes321    for pop_size in population_sizes:322        l = 0323        N = n # Total number of assets in data file324        Nvalues = [N]325        Asset_File = file326        K = 10 # Number of assets to include in the portfolio327        Lambda = np.array([0.5]) # Fixed lambda for this calculation328        # Initializing variables for collecting data on different lambdas329        Results_fitness = []330        Results_weights = []331        Results_assets = []332        Results_proportions = []333        Results_Covariances = []334        Results_Returns = []335        # Initializing the dataset336        dataset = DataSet(Asset_File, N, K)337        nevals = 0 # Counter for the number of iterations338        maxEvals = 1000*N # Solution evaluations per run339        # 30 runs for 30 different random seeds340        Runs = 30341        while nevals < Runs:342            # Changing the random seed 30 times343            seed = nevals + 12345344            np.random.seed(seed)345            # Initializing population346            population = Population(pop_size)347            population.create_Population(Lambda, l, dataset)348            for i in range(maxEvals):349                population.Genetic_Algorithm(Lambda, l, dataset)350            # Collecting results351            Results_fitness.append(population.best_fitness)352            Results_weights.append(population.best_weights)353            Results_assets.append(population.best_assets)354            Results_Covariances.append(population.best_covariance)355            Results_Returns.append(population.best_return)356            print(nevals) # Tracking current iteration357            nevals += 1358        Results_fitness = np.array(Results_fitness)359        Results_Returns = np.array(Results_Returns)360        Results_Covariances = np.array(Results_Covariances)361        # Statistics about f values362        f_stats = [Results_fitness.min(), Results_fitness.max(), Results_fitness.mean(), Results_fitness.std()]363        # Statistics about returns364        r_stats = [Results_Returns.min(), Results_Returns.max(), Results_Returns.mean(), Results_Returns.std()]365        # Statistics of the covariances366        cov_stats = [Results_Covariances.min(), Results_Covariances.max(), Results_Covariances.mean(), Results_Covariances.std()]367        # Statistical values about the F, Cov and R368        stats = pd.DataFrame(f_stats)369        stats[1] = r_stats370        stats[2] = cov_stats371        stats.columns = ['F value stats', 'Return stats', 'Covariance stats']372        # Results for the 30 seeds373        results = pd.DataFrame(Results_fitness)374        results[1] = Results_Returns375        results[2] = Results_Covariances376        results.columns = ['F values', 'Returns', 'Covariances']377        # Weights of the best portfolios for the 30 seeds378        weights = pd.DataFrame(Results_weights, columns=list(range(1, N+1)))379        # Indexes of  Assets used in each of the best portfolios for the 30 seeds380        col_names = ['asset_{}'.format(i) for i in range(1, 11)]381        assets = pd.DataFrame(Results_assets, columns=col_names)382        # Creating CSV files for further analysis383        df_results = pd.concat([results, assets, weights], axis=1)384        stats.to_csv('Generated data/Different Populations/stats_GA_p={}_{}.csv'.format(str(pop_size),file[:-4]), index=False)...

Genetic Algorithm.py

Source:Genetic Algorithm.py

1import numpy as np2import pandas as pd3class DataSet:4    def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):5        """Loads a dataset and divides its contents into variables """6        self.Asset_File = Asset_File7        self.N = N # Total number of assets in a dataset8        self.K = K9        self.epsilon = epsilon # Min investment10        self.delta = delta # Max investment11        self.number_of_stocks = 012        self.returns_deviations = []13        self.correlations = []14        self.covariance = np.nan15        temp_li_1 = []16        temp_li_2 = []17        # Splitting rows based on what they contain18        with open('Datasets/{}'.format(Asset_File), newline='') as datafile:19            for row in datafile:20                if len(row.split()) == 1: # if row is len of 1 it will be number of assets21                    for x in row.split(' '):22                        if x == '':23                            continue24                        self.number_of_stocks = (int(x))25                elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation26                    for x in row.split(' '):27                        if x == '':28                            continue29                        self.returns_deviations.append(float(x))30                elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets31                    for x in row.split(' '):32                        if x == '':33                            continue34                        self.correlations.append(float(x))35            # Variable for storing standard deviations of returns36            for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):37                temp_li_1.append([i, z])38            self.returns_deviations = temp_li_139            # Variable for storing correlations between assets40            zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))41            for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):42                temp_li_2.append([x, y, z])43                zeros[int(x)-1][int(y)-1] = z44            self.correlations = temp_li_245            # Creates a matrix of  returns and deviations46            self.returns_deviations = np.array(self.returns_deviations)47            # Splitting the data into variables needed for calculation48            self.deviations = self.returns_deviations[:, 1]49            self.mu = self.returns_deviations[:, 0]50            self.covariance = zeros * self.deviations * self.deviations.reshape((self.deviations.shape[0], 1))51            self.sigma = self.covariance + self.covariance.T - np.diag(self.covariance.diagonal()) #Fills in the second part of the covariance matrix52            # Making sure constraints on minimum and maximum investments are met53            if K * epsilon > 1.0:54                print("Minimum investment is too large")55                raise ValueError56            if K * delta < 1.0:57                print("Maximum investment is too small")58                raise ValueError59            self.F = 1.0 - K * epsilon60class Population:61    def __init__(self):62        """Population of solutions"""63        self.Population_size = 100 #Arbitrarily chosen64        self.population_weights = [] # Weights of the individuals in the population65        self.population_assets = [] # Assets of individuals in the population66        self.fitness = [] #A list containg the fitness of the individuals in the population67        self.population_proportions=[]68        self.best_fitness = 0 # Best f69        self.best_proportions = 0 # Best proportions by which each asset is in an individual70        self.best_weights = 071        self.best_assets = 072        self.best_covariance = 073        self.best_return = 074        self.Obj1 = []75        self.Obj2 = []76    def check_valid_solution(self, weights, proportions, assets, data):77        """Checks whether a solution is valid given constraints"""78            # Checking whether correct number of solutions has been picked79        if np.sum(weights != 0) != K:80            raise ValueError("More than " + str(K) + " assets selected (", weights.tolist(), ") in solution: " + str(weights))81            # Checking whether number and size of proportions is correct82        if np.any(proportions > 1) or np.any(proportions < 0) or len(proportions) != K:83            raise ValueError("The values of proportions are not valid: " + str(proportions))84            # Checking whether proportions sum up to 185        elif not np.isclose(weights.sum(), 1):86            raise ValueError("Proportions don't sum up to 1 (" + str(weights.sum()) + ") in solution: " + str(weights))87            # Checking whether maximum investment amount has not been exceeded88        elif np.any(weights > data.delta):89            raise ValueError("There's at least one proportion larger than delta: " + str(weights))90            # Checking for duplicate assets in a solution91        elif len(np.unique(assets)) != len(assets):92            raise ValueError("Duplicated assets in the portfolio: " + str(assets))93    def create_Population(Population, Lambda, l, data):94        """Initializes random population of solutions"""95        for i in range(Population.Population_size):96            #Initializing individuals in the popuplation97            R = np.random.permutation(N)[:K]98            # Random weights of the 10 assets99            s = np.random.rand(K)100            # Initializes weights101            w = np.zeros(N)102            # Initialized to make sure that the weights sum to 1103            L = s.sum()104            # Making sure that the random weights sum up to 1 given min investment105            w_temp = data.epsilon + s * data.F / L106            # Making sure the highest investment is met107            is_too_large = (w_temp > data.delta)108            # If an investment would be too large the loop would stop109            while is_too_large.sum() > 0:110                # Reversing logic111                is_not_too_large = np.logical_not(is_too_large)112                # Sum of weights113                L = s[is_not_too_large].sum()114                # Calculates temporary F value115                F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())116                # Adding minimal investment and making sure the actual weights sum to 1 given min investment117                w_temp = data.epsilon + s * F_temp / L118                # Implementing Max investment amount119                w_temp[is_too_large] = data.delta120                # Checking for invesments that are too large121                is_too_large = (w_temp > data.delta)122            w[:] = 0123            w[R] = w_temp # Actual weights124            s = w_temp - data.epsilon # Investment proportions125            # Checking whether our solution is valid126            Population.check_valid_solution(w, s, R, data)127            # Adding valid solution to our population128            Population.population_proportions.append(s)129            Population.population_weights.append(w)130            Population.population_assets.append(R.tolist())131            # Calculating fitness of the population132        for i in Population.population_weights:133            obj1 = np.sum((i * i.reshape((i.shape[0], 1))) * data.sigma)134            obj2 = np.sum(i * data.mu)135            f = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2136            Population.fitness.append(f)137            Population.Obj1.append(obj1) # Covariance138            Population.Obj2.append(obj2) # Expected Return139    def Genetic_Algorithm(Population, Lambda, l, data):140            """Applies the logic of genetic algorithm to the whole population"""141            if Population.Population_size == 1: # Used in case of different population sizes142                picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()*4143            else:144                # Selecting 4 different individuals from the population145                picked_individuals = np.random.permutation(Population.Population_size)[:4].tolist()146            # Initializing child of the selected individuals147            child_assets = []148            child_proportions = []149            child_weights = np.zeros(N)150            l = 0151            #Pool_1152            pair_1_assets = [Population.population_assets[picked_individuals[0]], Population.population_assets[picked_individuals[1]]]153            pair_1_fitness = [Population.fitness[picked_individuals[0]], Population.fitness[picked_individuals[1]]]154            pair_1_proportions = [Population.population_proportions[picked_individuals[0]], Population.population_proportions[picked_individuals[1]]]155            # Pool_2156            pair_2_assets = [Population.population_assets[picked_individuals[2]], Population.population_assets[picked_individuals[3]]]157            pair_2_fitness = [Population.fitness[picked_individuals[2]], Population.fitness[picked_individuals[3]]]158            pair_2_proportions = [Population.population_proportions[picked_individuals[2]], Population.population_proportions[picked_individuals[3]]]159            # Selecting parents for the uniform crossover160            parent_1_assets = pair_1_assets[pair_1_fitness.index(min(pair_1_fitness))]161            parent_1_proportions = pair_1_proportions[pair_1_fitness.index(min(pair_1_fitness))]162            parent_2_assets = pair_2_assets[pair_2_fitness.index(min(pair_2_fitness))]163            parent_2_proportions = pair_2_proportions[pair_2_fitness.index(min(pair_2_fitness))]164            # Looking for same assets in parents and inputting them into child165            common_assets = []166            for i in parent_1_assets:167                if i in parent_2_assets:168                    common_assets.append(i)169            child_assets += common_assets170            # Finding out what are the indexes of those assets in parents171            indexes_1 = []172            indexes_2 = []173            for i in common_assets:174                indexes_1.append(parent_1_assets.index(i))175                indexes_2.append(parent_2_assets.index(i))176            # Adding the proportions of same assets to child with 50% chance177            for m, h in zip(indexes_1, indexes_2):178                rand_1 = np.random.rand()179                if rand_1 > 0.5:180                    child_proportions.append(parent_1_proportions[m])181                else:182                    child_proportions.append(parent_2_proportions[h])183            # Creating new lists with assets that each parent don't have in common184            temp_parent_1_assets = []185            temp_parent_2_assets = []186            for m, h in zip(parent_1_assets, parent_2_assets):187                temp_parent_1_assets.append(m)188                temp_parent_2_assets.append(h)189            for i in common_assets:190                if i in temp_parent_1_assets:191                    temp_parent_1_assets.remove(i)192            for i in common_assets:193                if i in temp_parent_2_assets:194                    temp_parent_2_assets.remove(i)195            # Adding other assets and their corresponding proportions to the child196            for m, h in zip(temp_parent_1_assets, temp_parent_2_assets):197                rand_2 = np.random.rand()198                if rand_2 > 0.5:199                    child_assets.append(m)200                    child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])201                else:202                    child_assets.append(h)203                    child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])204            # Creating A*205            # A* is a set of assets that are in the parents, but are not in the child (together with their associated values)206            parent_minus_child_assets = []207            parent_minus_child_proportions = []208            for m, h in zip(parent_1_assets, parent_2_assets):209                if m not in child_assets:210                    parent_minus_child_assets.append(m)211                    parent_minus_child_proportions.append(parent_1_proportions[parent_1_assets.index(m)])212                if h not in child_assets:213                    parent_minus_child_assets.append(h)214                    parent_minus_child_proportions.append(parent_2_proportions[parent_2_assets.index(h)])215            # Assets that can be potentially added to the child in case parent_minus_child assets (A*) are empty216            other_assets = np.random.permutation(N).tolist()217            for i in other_assets:218                if i in child_assets:219                    other_assets.remove(i)220            # Mutation221            mutated_asset = np.random.choice(child_proportions)222            rand_3 = np.random.rand()223            if rand_3 > 0.5:224                child_proportions[child_proportions.index(mutated_asset)] = (0.9 * (data.epsilon + mutated_asset) - data.epsilon)  # m=1225            else:226                child_proportions[child_proportions.index(mutated_asset)] = (1.1 * (data.epsilon + mutated_asset) - data.epsilon)  # m=2227            mutated_child_proportions = child_proportions228            # Making sure the child does not have two identical assets229            for i in child_assets:230                if child_assets.count(i) > 1:231                    mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])232                    child_assets.remove(i)233            # Making sure all child proportion are between 0 and 1 (if not they get excluded)234            for i in mutated_child_proportions:235                if i < 0 or i > 1:236                    child_assets.remove(child_assets[mutated_child_proportions.index(i)])237                    mutated_child_proportions.remove(i)238            # Ensure that child has exactly 10 assets and proportions239            while len(child_assets) > data.K and len(mutated_child_proportions) > data.K:240                child_assets.remove(child_assets.index(min(mutated_child_proportions)))241                mutated_child_proportions.remove(min(mutated_child_proportions))242                # Add assets from A* to child243            while len(child_assets) < data.K and len(mutated_child_proportions) < data.K:244                if len(parent_minus_child_assets) != 0:245                    rand_4 = np.random.choice(parent_minus_child_assets)246                    child_assets.append(rand_4)247                    mutated_child_proportions.append(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])248                    parent_minus_child_proportions.remove(parent_minus_child_proportions[parent_minus_child_assets.index(rand_4)])249                    parent_minus_child_assets.remove(rand_4)250                    for i in mutated_child_proportions:251                        if i < 0 or i > 1:252                            child_assets.remove(child_assets[mutated_child_proportions.index(i)])253                            mutated_child_proportions.remove(i)254                    for i in child_assets:255                        if child_assets.count(i) > 1:256                            mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])257                            child_assets.remove(i)258                else: #In case A* is empty259                    rand_5=np.random.choice(other_assets)260                    child_assets.append(rand_5)261                    other_assets.remove(rand_5)262                    mutated_child_proportions.append(0)263                    for i in mutated_child_proportions:264                        if i < 0 or i > 1:265                            child_assets.remove(child_assets[mutated_child_proportions.index(i)])266                            mutated_child_proportions.remove(i)267                    for i in child_assets:268                        if child_assets.count(i) > 1:269                            mutated_child_proportions.remove(mutated_child_proportions[child_assets.index(i)])270                            child_assets.remove(i)271                # Given large amount of iterations and randomness all child proportions could be 0 hence set 1 at random to 0.01272                # Does not influence the overall result as it ist immediately replaced by a stronger individual273            if sum(mutated_child_proportions) == 0:274                mutated_child_proportions[mutated_child_proportions.index(np.random.choice(mutated_child_proportions))]= 0.01275            # Evaluating child276            mutated_child_proportions = np.array(mutated_child_proportions)277            L = mutated_child_proportions.sum()278            w_temp = data.epsilon + mutated_child_proportions * data.F / L279            is_too_large = (w_temp > data.delta)280            while is_too_large.sum() > 0:281                is_not_too_large = np.logical_not(is_too_large)282                L = mutated_child_proportions[is_not_too_large].sum()283                F_temp = 1.0 - (data.epsilon * is_not_too_large.sum() + data.delta * is_too_large.sum())284                w_temp = data.epsilon + mutated_child_proportions * F_temp / L285                w_temp[is_too_large] = data.delta286                is_too_large = (w_temp > data.delta)287            # Assigning weights to child288            child_weights[:] = 0289            child_weights[child_assets] = w_temp290            mutated_child_proportions = w_temp - data.epsilon291            # Calculating child fitness292            obj1 = np.sum((child_weights * child_weights.reshape((child_weights.shape[0], 1))) * data.sigma)293            obj2 = np.sum(child_weights * data.mu)294            child_fitness = Lambda[l] * obj1 - (1 - Lambda[l]) * obj2295            # Checking whether child is valid296            Population.check_valid_solution(child_weights, mutated_child_proportions, child_assets, data)297            # Substituting child into the population and removing the weakest member298            index_worst_member = np.argmax(Population.fitness)299            Population.fitness[index_worst_member] = child_fitness300            Population.population_proportions[index_worst_member] = mutated_child_proportions301            Population.population_weights[index_worst_member] = child_weights302            Population.population_assets[index_worst_member] = child_assets303            Population.Obj1[index_worst_member] = obj1304            Population.Obj2[index_worst_member] = obj2305            # Finding the best member of the population306            index_best_member = np.argmin(Population.fitness)307            Population.best_fitness = Population.fitness[index_best_member]308            Population.best_proportions = Population.population_proportions[index_best_member]309            Population.best_weights = Population.population_weights[index_best_member]310            Population.best_assets = Population.population_assets[index_best_member]311            Population.best_covariance = Population.Obj1[index_best_member]312            Population.best_return = Population.Obj2[index_best_member]313            return Population.best_fitness, Population.best_proportions, Population.best_assets, Population.best_weights, Population.best_covariance, Population.best_return314# Iterating through data files315stock_lengths = [31,85,89,98,225]316asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']317for n, file in zip(stock_lengths, asset_files):318    l = 0319    N = n # Total number of assets in data file320    Nvalues = [N]321    Asset_File = file322    K = 10 # Number of assets to include in the portfolio323    E = 50 # Number of different lambda values324    # Initializing variables for collecting data on different lambdas325    lambdas = []326    Results_fitness = []327    Results_weights = []328    Results_assets = []329    Results_proportions = []330    Results_Covariances = []331    Results_Returns = []332    # Initializing the dataset333    dataset = DataSet(Asset_File, N, K)334    nevals = 0  # Counter for the number of iterations335    maxEvals = 1000 * N  # Solution evaluations per run336    # Sets a random seed for solution repeatability337    seed = 12345338    np.random.seed(seed)339    # Iterating through different values of lambda340    for e in range(1, E+1):341        Lambda = np.array([(e-1)/(E-1)]) # 50 lambda values equally spaced from 0 to 1342        lambdas.append(Lambda[l])343        # Initializing population344        population = Population()345        population.create_Population(Lambda, l, dataset)346        for i in range(maxEvals):347            population.Genetic_Algorithm(Lambda, l, dataset)348        # Collecting results349        Results_fitness.append(population.best_fitness)350        Results_weights.append(population.best_weights)351        Results_assets.append(population.best_assets)352        Results_Covariances.append(population.best_covariance)353        Results_Returns.append(population.best_return)354        # Tracking which lambda values is being currently calculated355        print(e)356        print("N={0}, Lambda = {1}, f = {2}".format(N, Lambda[l], population.best_fitness))357    Results_fitness = np.array(Results_fitness)358    Results_Returns = np.array(Results_Returns)359    Results_Covariances = np.array(Results_Covariances)360    # Statistics about f values361    f_stats = [Results_fitness.min(), Results_fitness.max(), Results_fitness.mean(), Results_fitness.std()]362    # Statistics about returns363    r_stats = [Results_Returns.min(), Results_Returns.max(), Results_Returns.mean(), Results_Returns.std()]364    # Statisitcs of the covariances365    cov_stats = [Results_Covariances.min(), Results_Covariances.max(), Results_Covariances.mean(), Results_Covariances.std()]366    # Statistical values about the F, Cov and R367    stats = pd.DataFrame(f_stats)368    stats[1] = r_stats369    stats[2] = cov_stats370    stats.columns = ['F value stats', 'Return stats', 'Covariance stats']371    # Results for the 50 lambda values372    results = pd.DataFrame(Results_fitness)373    results[1] = Results_Returns374    results[2] = Results_Covariances375    results.columns = ['F values', 'Returns', 'Covariances']376    # Lambdas used377    lambdas = pd.DataFrame(lambdas, columns=['Lambda'])378    # Weights of the best portfolios for the 50 lambda values379    weights = pd.DataFrame(Results_weights, columns=list(range(1, N+1)))380    # Indexes of  Assets used in each of the best portfolios for the 50 lambda values381    col_names = ['asset_{}'.format(i) for i in range(1, 11)]382    assets = pd.DataFrame(Results_assets, columns= col_names)383    # Creating CSV files for further analysis384    df_results = pd.concat([lambdas, results, assets, weights], axis=1)385    stats.to_csv('Generated data/Different Lambdas/stats_GA_'+file[:-4]+'.csv', index = False)...

Random Search.py

Source:Random Search.py

1import numpy as np2import pandas as pd34class DataSet:56    def __init__(self, Asset_File, N, K, epsilon=0.01, delta=1.0):7        """Loads a dataset and divides its contents into variables """8        self.Asset_File = Asset_File9        self.N = N # Total number of assets in a dataset10        self.K = K # Total number of assets in a solution11        self.epsilon = epsilon # Min investment12        self.delta = delta # Max investment13        self.number_of_stocks = 014        self.returns_deviations = []15        self.correlations = []16        self.covariance = np.nan17        temp_li_1 = []18        temp_li_2 = []1920        # Splitting rows based on what they contain21        with open('Datasets/{}'.format(Asset_File), newline='') as datafile:22            for row in datafile:23                if len(row.split()) == 1: # if row is len of 1 it will be number of assets24                    for x in row.split(' '):25                        if x == '':26                            continue27                        self.number_of_stocks=(int(x))28                elif len(row.split()) == 2: # if row is len of 2 it will be the assets return and standard deviation29                    for x in row.split(' '):30                        if x == '':31                            continue32                        self.returns_deviations.append(float(x))33                elif len(row.split()) == 3: # if row is len of 3 it will be the correlation between assets34                    for x in row.split(' '):35                        if x == '':36                            continue37                        self.correlations.append(float(x))3839            # Variable for storing standard deviations of returns40            for i, z in zip(self.returns_deviations[0::2], self.returns_deviations[1::2]):41                temp_li_1.append([i, z])42            self.returns_deviations = temp_li_14344            # Variable for storing correlations between assets45            zeros = np.zeros((int(self.number_of_stocks), int(self.number_of_stocks)))46            for x, y, z in zip(self.correlations[0::3], self.correlations[1::3], self.correlations[2::3]):47                temp_li_2.append([x, y, z])48                zeros[int(x)-1][int(y)-1] = z49            self.correlations = temp_li_25051            # Creates a matrix of  returns and deviations52            self.returns_deviations=np.array(self.returns_deviations)5354            # Splitting the data into variables needed for calculation55            self.deviations = self.returns_deviations[:, 1]56            self.mu = self.returns_deviations[:, 0]57            self.covariance = zeros * self.deviations * self.deviations.reshape((self.deviations.shape[0], 1))58            self.sigma = self.covariance + self.covariance.T - np.diag(self.covariance.diagonal()) #Fills in the second part of the covariance matrix5960            # Making sure constraints on minimum and maximum investments are met61            if K * epsilon > 1.0:62                print("Minimum investment is too large")63                raise ValueError64            if K * delta < 1.0:65                print("Maximum investment is too small")66                raise ValueError6768            self.F = 1.0 - K * epsilon6970class Solution:71    def __init__(self, N, K):72        """Initializes a solution"""73        # Initializing random attributes of a solution74        self.Q = np.random.permutation(N)[:K]75        self.s = np.random.rand(K)76        self.w = np.zeros(N)77        self.obj1 = np.nan78        self.obj2 = np.nan7980def check_valid_solution(solution, dataset):81    """Checks whether a solution is valid given constraints"""82    w = solution.w83        # Checking whether correct number of solutions has been picked84    if np.sum(w >= dataset.epsilon) != K:85        raise ValueError("More than " + str(K) + " assets selected (" + str(np.sum(w > 0.0)) + ") in solution: " + str(w))86        # Checking whether number and size of proportions is correct87    elif np.any(solution.s > 1) or np.any(solution.s < 0) or len(solution.s) != K:88        raise ValueError("The values of solution.s are not valid: " + str(solution.s))89        # Checking whether proportions sum up to 190    elif not np.isclose(w.sum(), 1):91        raise ValueError("Proportions don't sum up to 1 (" + str(w.sum()) + ") in solution: " + str(w))92        # Checking whether maximum investment amount has not been exceeded93    elif np.any(w > dataset.delta):94        raise ValueError("There's at least one proportion larger than delta: " + str(w))95        # Checking for duplicate assets in a solution96    elif len(np.unique(solution.Q)) != len(solution.Q):97        raise ValueError("Duplicated assets in the portfolio: " + str(w))9899100def evaluate(solution, dataset, l, Lambda, best_value_found, best_solutions):101    """ Creates a solution - calculates its covariance, expected return and f """102    improved = False103    # Initializing weights104    w = solution.w105    # Initialzed to make sure weights sum to 1 in the next step106    L = solution.s.sum()107    # Calculating weights from random numbers to sum to 1108    w_temp = dataset.epsilon + solution.s * dataset.F / L109    # Making sure the highest investment is met110    is_too_large = (w_temp > dataset.delta)111    # If an investment would be too large the loop would stop112    while is_too_large.sum() > 0:113        # Reversing logic114        is_not_too_large = np.logical_not(is_too_large)115        # Sum of weights116        L = solution.s[is_not_too_large].sum()117        # Temporary f value118        F_temp = 1.0 - (dataset.epsilon * is_not_too_large.sum() + dataset.delta * is_too_large.sum())119        # Calculating acutal weights to sum to 1 (adding minimal investmet)120        w_temp = dataset.epsilon + solution.s * F_temp / L121        # Implementing Max investment amount122        w_temp[is_too_large] = dataset.delta123        # Checking for invesments that are too large124        is_too_large = (w_temp > dataset.delta)125126    w[:] = 0 127    w[solution.Q] = w_temp # Actual weights128    solution.s = w_temp - dataset.epsilon # Investment proportions129130    # Checks whether a solution is valid given constraints131    check_valid_solution(solution, dataset)132133    # Calculates covariance for a solution134    solution.obj1 = np.sum((w * w.reshape((w.shape[0], 1))) * dataset.sigma)135136    # Calculates expected return for a solution137    solution.obj2 = np.sum(w * dataset.mu)138139    # Calculate f140    f = Lambda[l] * solution.obj1 - (1 - Lambda[l]) * solution.obj2141142    # Replace current solution with new solution if new is better143    if f < best_value_found[l]:144        improved = True145        best_value_found[l] = f146        best_solutions.append(solution)147148149    return improved, best_value_found[-1]150151152def RandomSearch(maxEvals, Lambda):153    """Calculates solutions based on the logic of the Random Search algorithm"""154    # An array of weights to weight the two objectives.155    if Lambda == 0.0:156        Lambda = np.array([Lambda])157        best_value_found = np.array([0.0])158    # Best value found for each weight.159    else:160        Lambda = np.array([Lambda])161        best_value_found = np.array(Lambda * [np.inf])162163    # List of best solutions ever found.164    best_solutions = []165166    nevals = 0 # Counter for the number of iterations167    l = 0168    # Generate and evaluate a new solution until maximum solution evaluations not reached169    while nevals < maxEvals:170        s = Solution(N,K)171        improved, f = evaluate(s, dataset, l, Lambda, best_value_found, best_solutions)172        nevals += 1173174    # Collecting information on the best solution175    cov.append(best_solutions[-1].obj1)176    r.append(best_solutions[-1].obj2)177    assets.append(best_solutions[-1].Q)178    weights.append(best_solutions[-1].w)179180    return s, f181182# Iterating through data files183stock_lengths = [31,85,89,98,225]184asset_files = ['assets1.txt', 'assets2.txt', 'assets3.txt', 'assets4.txt', 'assets5.txt']185for n, file in zip(stock_lengths, asset_files):186    N= n # Total number of assets in data file187    Nvalues = [N]188    K = 10 # Number of assets to include in the portfolio189    E = 50 # Number of different lambda values190191    # Initializing variables for collecting data on different lambdas192    Asset_File = file193    cov = []194    r = []195    weights = []196    assets = []197    fvalues = np.empty(E)198    lambvalues = np.empty(E)199200    # Initializing the dataset201    dataset = DataSet(Asset_File, N, K, epsilon=0.01,)202203204    maxEvals = 1000 * N  # Maximum solution evaluations205206    # Sets a random seed for solution repeatability207    seed = 12345208    np.random.seed(seed)209210    # Iterating through different values of lambda211    for e in range(1, E + 1):212        Lambda = np.array([(e - 1) / (E - 1)]) # 50 lambda values equally spaced from 0 to 1213        s, f = RandomSearch(maxEvals, Lambda[0])214        print("N={0}, Lambda = {1}, f = {2}".format(N, Lambda[0], f))215        fvalues[e-1] = f216        lambvalues[e-1] = Lambda[0]217218        # Tracking which lambda values is being currently calculated219        print(e)220    print("N={0}, mean = {1}, sd = {2}, min = {3}, max = {4}, lamb={5}".format(N, fvalues.mean(), fvalues.std(), fvalues.min(), fvalues.max(),lambvalues))221222    # Returns223    r = np.array(r)224    # Weights225    weights = np.array(weights)226    # Covariances227    cov = np.array(cov)228    # Statistics about f values229    f_stats = [fvalues.min(), fvalues.max(), fvalues.mean(), fvalues.std()]230    # Statistics about returns231    r_stats = [r.min(), r.max(), r.mean(), r.std()]232    # Statisitcs of the covariances233    cov_stats = [cov.min(), cov.max(), cov.mean(), cov.std()]234    # The actual f values235    fs = np.array(fvalues)236    # Lambda values237    ls = np.array(lambvalues)238239    # Statistical values about the F, Cov and R240    stats = pd.DataFrame(f_stats)241    stats[1] = r_stats242    stats[2] = cov_stats243    stats.columns = ['F value stats', 'Return stats', 'Covariance stats']244245    # Results for the 50 lambda values246    results = pd.DataFrame(fs)247    results[1] = r248    results[2] = cov249    results.columns = ['F values', 'Returns', 'Covariances']250251    # Weights of the best portfolios for the 50 lambda values252    weights = pd.DataFrame(weights, columns=list(range(1, Nvalues[0]+1)))253254    # Indexes of  Assets used in each of the best portfolios for the 50 lambda values255    col_names = ['asset_{}'.format(i) for i in range(1, 11)]256    assets = pd.DataFrame(assets, columns=col_names)257258    #Lambdas used259    Lambdas = pd.DataFrame(ls, columns=['Lambda'])260261    # Creating CSV files for further analysis262    df_results = pd.concat([Lambdas, results, assets, weights], axis=1)263    stats.to_csv('Generated data/Different Lambdas/stats_RS_'+file[:-4]+'.csv', index=False)
...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.