Best Python code snippet using pandera_python
Challange_Python.py
Source:Challange_Python.py  
...191        if self.right is not None:192            big_count+=self.right._less(compare)193        return big_count194    195    def _less_or_equal(self,compare):196        """197        _less_or_equal private funtion that iterates over the leafs so could deliver the amount of values 198        less or equal than the compare value199    200        param compare:  a value in which you want to compare to the whole tree values. 201        202        Return big_count: value of the quantity of the numbers less thatn the compare param value, 203            includin selfrepeting quantity value204        """205        big_count=0206        if self.val <= compare:207            big_count+= 1 + self.counter208        if self.left is not None:209            big_count+=self.left._less_or_equal(compare)210        if self.right is not None:211            big_count+=self.right._less_or_equal(compare)212        return big_count213    214    def _between (self,inf,sup):215        """216        _between private funtion that iterates over the leafs so could deliver the quantity of values 217        in bettween the inf and sup parameter. It doesnt matter the order of the inf,sup values, since 218        it willalways take the difference quantity between them. 219    220        param1 inf:  One limit of the between  interval221        param2 sup:  One limit of the between  interval222        223        Return big_count: value of the quantity of the numbers in betweent the param values 224            includin selfrepeting quantity value225        """226        if inf > sup:227            inf,sup = sup,inf228        return abs(self._less_or_equal(sup)-self._less(inf))229            230    231    def postorder(self, vals):232        if self.left is not None:233            self.left.postorder(vals)234        if self.right is not None:235            self.right.postorder(vals)236        if self.val is not None:237            vals.append(self.val)238        return vals239    def build_stats(self):240        innerStatisticsAccess = StaticsAccess()241        innerStatisticsAccess.bigger=self._greater242        innerStatisticsAccess.less=self._less...decision_tree.py
Source:decision_tree.py  
1import numpy as np2class DecisionTree:3    """4    Decision Tree Classifier5    Attributes:6        root: Root Node of the tree.7        max_depth: Max depth allowed for the tree8        size_allowed : Min_size split, smallest size allowed for split9        n_features: Number of features to use during building the tree.(Random Forest)10        n_split:  Number of split for each feature. (Random Forest)11    """12    def __init__(self, max_depth=1000, size_allowed=1, n_features=None, n_split=None):13        """14            Initializations for class attributes.15        """16        self.root = 117        self.max_depth = max_depth18        self.size_allowed = size_allowed19        self.n_features = n_features20        self.n_split = n_split21    class Node:22        """23            Node Class for the building the tree.24            Attribute:25                threshold: The threshold like if x1 < threshold, for spliting.26                feature: The index of feature on this current node.27                left: Pointer to the node on the left.28                right: Pointer to the node on the right.29                pure: Bool, describe if this node is pure.30                predict: Class, indicate what the most common Y on this node.31        """32        def __init__(self, threshold=None, feature=None):33            """34                Initializations for class attributes.35            """36            self.threshold = threshold37            self.feature = feature38            self.left = None39            self.right = None40            self.pure = False41            self.depth = 142            self.predict = None43    def entropy(self, lst):44        """45            Function Calculate the entropy given lst.46            Attributes:47                entro: variable store entropy for each step.48                classes: all possible classes. (without repeating terms)49                counts: counts of each possible classes.50                total_counts: number of instances in this lst.51            lst is vector of labels.52        """53        entro = 054        classes, counts = np.unique(lst, return_counts=True)55        total_counts = len(lst)56        probs = counts / total_counts57        for i in probs:58            # ignore prob with 059            if i != 0:60                entro = entro - i * np.log(i)61        return entro62    def information_gain(self, lst, values, threshold):63        """64            Function Calculate the information gain, by using entropy function.65            lst is vector of labels.66            values is vector of values for individule feature.67            threshold is the split threshold we want to use for calculating the entropy.68        """69        # find the left and right indices70        _less_or_equal = np.where(values <= threshold)[0]71        _above = np.where(values > threshold)[0]72        left_prop = len(_less_or_equal) / len(values)73        right_prop = len(_above) / len(values)74        left_entropy = self.entropy(_less_or_equal)75        right_entropy = self.entropy(_above)76        return self.entropy(lst) - (77            left_prop * left_entropy + right_prop * right_entropy78        )79    def find_rules(self, data):80        """81            Helper function to find the split rules.82            data is a matrix or 2-D numpy array, represnting training instances.83            Each training instance is a feature vector.84        """85        n, m = 1, 186        rules = []87        for i in data.T:88            unique_value = np.unique(i)89            # get the midpoint between each unique addjaacent value90            diff = [91                (unique_value[x] + unique_value[x + 1]) / 292                for x in range(len(unique_value) - 1)93            ]94            rules.append(diff)95        return rules96    def next_split(self, data, label):97        """98            Helper function to find the split with most information gain, 99            by using find_rules, and information gain.100            data is a matrix or 2-D numpy array, represnting training instances.101            Each training instance is a feature vector.102            label contains the corresponding labels. 103        """104        rules = self.find_rules(data)105        max_info = -float("inf")106        num_col = None107        threshold = None108        """109            Check Number of features to use, None means all features. (Decision Tree always use all feature)110            If n_features is a int, use n_features of features by random choice.111            If n_features == 'sqrt', use sqrt(Total Number of Features ) by random choice112        """113        if not self.n_features:114            index_col = np.arange(data.shape[1])115        else:116            if self.n_features == "sqrt":117                num_index = int(np.sqrt(data.shape[1]))118            elif isinstance(self.n_features, int):119                num_index = self.n_features120            np.random.seed()121            index_col = np.random.choice(data.shape[1], num_index, replace=False)122        """123            Do the similar selection we did for features, n_split take in None or int or 'sqrt'.124            For all selected feature and corresponding rules, we check it's information gain.125        """126        _data_T = data.T127        for i in index_col:128            count_temp_rules = len(rules[i])129            if not self.n_split:130                index_rules = np.arange(count_temp_rules)131            else:132                if self.n_split == "sqrt":133                    num_rules = int(np.sqrt(len(count_temp_rules)))134                elif isinstance(self.n_split, int):135                    num_rules = self.n_split136                np.random.seed()137                # get partial indices in a rule list138                index_rules = np.random.choice(139                    count_temp_rules, num_rules, replace=False140                )141            for j in index_rules:142                info = self.information_gain(label, _data_T[i], rules[i][j])143                if info > max_info:144                    max_info = info145                    num_col = i146                    threshold = rules[i][j]147        return threshold, num_col148    def build_tree(self, X, y, depth):149        """150            Helper function for building the tree.151        """152        first_threshold, first_feature = self.next_split(X, y)153        current = self.Node(first_threshold, first_feature)154        self.root = current155        """156        Check if we pass the max_depth, check if the first_feature is None, min split size.157        If some of those condition met, change current to pure, and set predict to the most popular label158            and return current159        """160        if (161            depth > self.max_depth162            or first_feature == None163            or X.shape[0] <= self.size_allowed164        ):165            _values, _counts = np.unique(y, return_counts=True)166            ind = np.argmax(_counts)167            current.predict = _values[ind]168            current.pure = True169            return current170        # Check if there is only 1 label in this node, change current to pure, and set predict to the label171        if len(np.unique(y)) == 1:172            current.predict = y[0]173            current.pure = True174            return current175        # Find the left node index with feature i <= threshold  Right with feature i > threshold.176        left_index = X[:, first_feature] <= first_threshold177        right_index = X[:, first_feature] > first_threshold178        # If we either side is empty, change current to pure, and set predict to the label179        if len(left_index) == 0 or len(right_index) == 0:180            _values, _counts = np.unique(y, return_counts=True)181            ind = np.argmax(_counts)182            current.predict = _values[ind]183            current.pure = True184            return current185        left_X, left_y = X[left_index, :], y[left_index]186        current.left = self.build_tree(left_X, left_y, depth + 1)187        right_X, right_y = X[right_index, :], y[right_index]188        current.right = self.build_tree(right_X, right_y, depth + 1)189        return current190    def fit(self, X, y):191        """192            The fit function fits the Decision Tree model based on the training data. 193            X_train is a matrix or 2-D numpy array, represnting training instances. 194            Each training instance is a feature vector. 195            y_train contains the corresponding labels. There might be multiple (i.e., > 2) classes.196        """197        self.root = self.build_tree(X, y, 1)198        return self199    def ind_predict(self, inp):200        """201            Predict the most likely class label of one test instance based on its feature vector x.202        """203        cur = self.root204        # Stop condition we are at a node is pure.205        while not cur.pure:206            feature = cur.feature207            threshold = cur.threshold208            if inp[feature] <= threshold:209                cur = cur.left210            else:211                cur = cur.right212        return cur.predict213    def predict(self, inp):214        """215            X is a matrix or 2-D numpy array, represnting testing instances. 216            Each testing instance is a feature vector. 217            Return the predictions of all instances in a list.218        """219        result = [self.ind_predict(inp[i]) for i in inp.shape[0]]...DecisionTree.py
Source:DecisionTree.py  
1import numpy as np2class DecisionTree:3    """4    Decision Tree Classifier5    Attributes:6        root: Root Node of the tree.7        max_depth: Max depth allowed for the tree8        size_allowed : Min_size split, smallest size allowed for split9        n_features: Number of features to use during building the tree.(Random Forest)10        n_split:  Number of split for each feature. (Random Forest)11    """12    def __init__(self, max_depth=1000, size_allowed=1, n_features=None, n_split=None):13        """14            Initializations for class attributes.15        """16        self.root = 117        self.max_depth = max_depth18        self.size_allowed = size_allowed19        self.n_features = n_features20        self.n_split = n_split21    class Node:22        """23            Node Class for the building the tree.24            Attribute:25                threshold: The threshold like if x1 < threshold, for spliting.26                feature: The index of feature on this current node.27                left: Pointer to the node on the left.28                right: Pointer to the node on the right.29                pure: Bool, describe if this node is pure.30                predict: Class, indicate what the most common Y on this node.31        """32        def __init__(self, threshold=None, feature=None):33            """34                Initializations for class attributes.35            """36            self.threshold = threshold37            self.feature = feature38            self.left = None39            self.right = None40            self.pure = False41            self.depth = 142            self.predict = None43    def entropy(self, lst):44        """45            Function Calculate the entropy given lst.46            Attributes:47                entro: variable store entropy for each step.48                classes: all possible classes. (without repeating terms)49                counts: counts of each possible classes.50                total_counts: number of instances in this lst.51            lst is vector of labels.52        """53        entro = 054        classes, counts = np.unique(lst, return_counts=True)55        total_counts = len(lst)56        probs = counts / total_counts57        for i in probs:58            # ignore prob with 059            if i != 0:60                entro = entro - i * np.log(i)61        return entro62    def information_gain(self, lst, values, threshold):63        """64            Function Calculate the information gain, by using entropy function.65            lst is vector of labels.66            values is vector of values for individule feature.67            threshold is the split threshold we want to use for calculating the entropy.68        """69        # find the left and right indices70        _less_or_equal = np.where(values <= threshold)[0]71        _above = np.where(values > threshold)[0]72        left_prop = len(_less_or_equal) / len(values)73        right_prop = len(_above) / len(values)74        left_entropy = self.entropy(_less_or_equal)75        right_entropy = self.entropy(_above)76        return self.entropy(lst) - (77            left_prop * left_entropy + right_prop * right_entropy78        )79    def find_rules(self, data):80        """81            Helper function to find the split rules.82            data is a matrix or 2-D numpy array, represnting training instances.83            Each training instance is a feature vector.84        """85        n, m = 1, 186        rules = []87        for i in data.T:88            unique_value = np.unique(i)89            # get the midpoint between each unique addjaacent value90            diff = [91                (unique_value[x] + unique_value[x + 1]) / 292                for x in range(len(unique_value) - 1)93            ]94            rules.append(diff)95        return rules96    def next_split(self, data, label):97        """98            Helper function to find the split with most information gain, 99            by using find_rules, and information gain.100            data is a matrix or 2-D numpy array, represnting training instances.101            Each training instance is a feature vector.102            label contains the corresponding labels. 103        """104        rules = self.find_rules(data)105        max_info = -float("inf")106        num_col = None107        threshold = None108        """109            Check Number of features to use, None means all features. (Decision Tree always use all feature)110            If n_features is a int, use n_features of features by random choice.111            If n_features == 'sqrt', use sqrt(Total Number of Features ) by random choice112        """113        if not self.n_features:114            index_col = np.arange(data.shape[1])115        else:116            if self.n_features == "sqrt":117                num_index = int(np.sqrt(data.shape[1]))118            elif isinstance(self.n_features, int):119                num_index = self.n_features120            np.random.seed()121            index_col = np.random.choice(data.shape[1], num_index, replace=False)122        """123            Do the similar selection we did for features, n_split take in None or int or 'sqrt'.124            For all selected feature and corresponding rules, we check it's information gain.125        """126        _data_T = data.T127        for i in index_col:128            count_temp_rules = len(rules[i])129            if not self.n_split:130                index_rules = np.arange(count_temp_rules)131            else:132                if self.n_split == "sqrt":133                    num_rules = int(np.sqrt(len(count_temp_rules)))134                elif isinstance(self.n_split, int):135                    num_rules = self.n_split136                np.random.seed()137                # get partial indices in a rule list138                index_rules = np.random.choice(139                    count_temp_rules, num_rules, replace=False140                )141            for j in index_rules:142                info = self.information_gain(label, _data_T[i], rules[i][j])143                if info > max_info:144                    max_info = info145                    num_col = i146                    threshold = rules[i][j]147        return threshold, num_col148    def build_tree(self, X, y, depth):149        """150            Helper function for building the tree.151        """152        first_threshold, first_feature = self.next_split(X, y)153        current = self.Node(first_threshold, first_feature)154        self.root = current155        """156        Check if we pass the max_depth, check if the first_feature is None, min split size.157        If some of those condition met, change current to pure, and set predict to the most popular label158            and return current159        """160        if (161            depth > self.max_depth162            or first_feature == None163            or X.shape[0] <= self.size_allowed164        ):165            _values, _counts = np.unique(y, return_counts=True)166            ind = np.argmax(_counts)167            current.predict = _values[ind]168            current.pure = True169            return current170        # Check if there is only 1 label in this node, change current to pure, and set predict to the label171        if len(np.unique(y)) == 1:172            current.predict = y[0]173            current.pure = True174            return current175        # Find the left node index with feature i <= threshold  Right with feature i > threshold.176        left_index = X[:, first_feature] <= first_threshold177        right_index = X[:, first_feature] > first_threshold178        # If we either side is empty, change current to pure, and set predict to the label179        if len(left_index) == 0 or len(right_index) == 0:180            _values, _counts = np.unique(y, return_counts=True)181            ind = np.argmax(_counts)182            current.predict = _values[ind]183            current.pure = True184            return current185        left_X, left_y = X[left_index, :], y[left_index]186        current.left = self.build_tree(left_X, left_y, depth + 1)187        right_X, right_y = X[right_index, :], y[right_index]188        current.right = self.build_tree(right_X, right_y, depth + 1)189        return current190    def fit(self, X, y):191        """192            The fit function fits the Decision Tree model based on the training data. 193            X_train is a matrix or 2-D numpy array, represnting training instances. 194            Each training instance is a feature vector. 195            y_train contains the corresponding labels. There might be multiple (i.e., > 2) classes.196        """197        self.root = self.build_tree(X, y, 1)198        return self199    def ind_predict(self, inp):200        """201            Predict the most likely class label of one test instance based on its feature vector x.202        """203        cur = self.root204        # Stop condition we are at a node is pure.205        while not cur.pure:206            feature = cur.feature207            threshold = cur.threshold208            if inp[feature] <= threshold:209                cur = cur.left210            else:211                cur = cur.right212        return cur.predict213    def predict(self, inp):214        """215            X is a matrix or 2-D numpy array, represnting testing instances. 216            Each testing instance is a feature vector. 217            Return the predictions of all instances in a list.218        """219        result = [self.ind_predict(inp[i]) for i in inp.shape[0]]...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
