How to use _less_or_equal method in pandera

Best Python code snippet using pandera_python

Challange_Python.py

Source:Challange_Python.py Github

copy

Full Screen

...191 if self.right is not None:192 big_count+=self.right._less(compare)193 return big_count194 195 def _less_or_equal(self,compare):196 """197 _less_or_equal private funtion that iterates over the leafs so could deliver the amount of values 198 less or equal than the compare value199 200 param compare: a value in which you want to compare to the whole tree values. 201 202 Return big_count: value of the quantity of the numbers less thatn the compare param value, 203 includin selfrepeting quantity value204 """205 big_count=0206 if self.val <= compare:207 big_count+= 1 + self.counter208 if self.left is not None:209 big_count+=self.left._less_or_equal(compare)210 if self.right is not None:211 big_count+=self.right._less_or_equal(compare)212 return big_count213 214 def _between (self,inf,sup):215 """216 _between private funtion that iterates over the leafs so could deliver the quantity of values 217 in bettween the inf and sup parameter. It doesnt matter the order of the inf,sup values, since 218 it willalways take the difference quantity between them. 219 220 param1 inf: One limit of the between interval221 param2 sup: One limit of the between interval222 223 Return big_count: value of the quantity of the numbers in betweent the param values 224 includin selfrepeting quantity value225 """226 if inf > sup:227 inf,sup = sup,inf228 return abs(self._less_or_equal(sup)-self._less(inf))229 230 231 def postorder(self, vals):232 if self.left is not None:233 self.left.postorder(vals)234 if self.right is not None:235 self.right.postorder(vals)236 if self.val is not None:237 vals.append(self.val)238 return vals239 def build_stats(self):240 innerStatisticsAccess = StaticsAccess()241 innerStatisticsAccess.bigger=self._greater242 innerStatisticsAccess.less=self._less...

Full Screen

Full Screen

decision_tree.py

Source:decision_tree.py Github

copy

Full Screen

1import numpy as np2class DecisionTree:3 """4 Decision Tree Classifier5 Attributes:6 root: Root Node of the tree.7 max_depth: Max depth allowed for the tree8 size_allowed : Min_size split, smallest size allowed for split9 n_features: Number of features to use during building the tree.(Random Forest)10 n_split: Number of split for each feature. (Random Forest)11 """12 def __init__(self, max_depth=1000, size_allowed=1, n_features=None, n_split=None):13 """14 Initializations for class attributes.15 """16 self.root = 117 self.max_depth = max_depth18 self.size_allowed = size_allowed19 self.n_features = n_features20 self.n_split = n_split21 class Node:22 """23 Node Class for the building the tree.24 Attribute:25 threshold: The threshold like if x1 < threshold, for spliting.26 feature: The index of feature on this current node.27 left: Pointer to the node on the left.28 right: Pointer to the node on the right.29 pure: Bool, describe if this node is pure.30 predict: Class, indicate what the most common Y on this node.31 """32 def __init__(self, threshold=None, feature=None):33 """34 Initializations for class attributes.35 """36 self.threshold = threshold37 self.feature = feature38 self.left = None39 self.right = None40 self.pure = False41 self.depth = 142 self.predict = None43 def entropy(self, lst):44 """45 Function Calculate the entropy given lst.46 Attributes:47 entro: variable store entropy for each step.48 classes: all possible classes. (without repeating terms)49 counts: counts of each possible classes.50 total_counts: number of instances in this lst.51 lst is vector of labels.52 """53 entro = 054 classes, counts = np.unique(lst, return_counts=True)55 total_counts = len(lst)56 probs = counts / total_counts57 for i in probs:58 # ignore prob with 059 if i != 0:60 entro = entro - i * np.log(i)61 return entro62 def information_gain(self, lst, values, threshold):63 """64 Function Calculate the information gain, by using entropy function.65 lst is vector of labels.66 values is vector of values for individule feature.67 threshold is the split threshold we want to use for calculating the entropy.68 """69 # find the left and right indices70 _less_or_equal = np.where(values <= threshold)[0]71 _above = np.where(values > threshold)[0]72 left_prop = len(_less_or_equal) / len(values)73 right_prop = len(_above) / len(values)74 left_entropy = self.entropy(_less_or_equal)75 right_entropy = self.entropy(_above)76 return self.entropy(lst) - (77 left_prop * left_entropy + right_prop * right_entropy78 )79 def find_rules(self, data):80 """81 Helper function to find the split rules.82 data is a matrix or 2-D numpy array, represnting training instances.83 Each training instance is a feature vector.84 """85 n, m = 1, 186 rules = []87 for i in data.T:88 unique_value = np.unique(i)89 # get the midpoint between each unique addjaacent value90 diff = [91 (unique_value[x] + unique_value[x + 1]) / 292 for x in range(len(unique_value) - 1)93 ]94 rules.append(diff)95 return rules96 def next_split(self, data, label):97 """98 Helper function to find the split with most information gain, 99 by using find_rules, and information gain.100 data is a matrix or 2-D numpy array, represnting training instances.101 Each training instance is a feature vector.102 label contains the corresponding labels. 103 """104 rules = self.find_rules(data)105 max_info = -float("inf")106 num_col = None107 threshold = None108 """109 Check Number of features to use, None means all features. (Decision Tree always use all feature)110 If n_features is a int, use n_features of features by random choice.111 If n_features == 'sqrt', use sqrt(Total Number of Features ) by random choice112 """113 if not self.n_features:114 index_col = np.arange(data.shape[1])115 else:116 if self.n_features == "sqrt":117 num_index = int(np.sqrt(data.shape[1]))118 elif isinstance(self.n_features, int):119 num_index = self.n_features120 np.random.seed()121 index_col = np.random.choice(data.shape[1], num_index, replace=False)122 """123 Do the similar selection we did for features, n_split take in None or int or 'sqrt'.124 For all selected feature and corresponding rules, we check it's information gain.125 """126 _data_T = data.T127 for i in index_col:128 count_temp_rules = len(rules[i])129 if not self.n_split:130 index_rules = np.arange(count_temp_rules)131 else:132 if self.n_split == "sqrt":133 num_rules = int(np.sqrt(len(count_temp_rules)))134 elif isinstance(self.n_split, int):135 num_rules = self.n_split136 np.random.seed()137 # get partial indices in a rule list138 index_rules = np.random.choice(139 count_temp_rules, num_rules, replace=False140 )141 for j in index_rules:142 info = self.information_gain(label, _data_T[i], rules[i][j])143 if info > max_info:144 max_info = info145 num_col = i146 threshold = rules[i][j]147 return threshold, num_col148 def build_tree(self, X, y, depth):149 """150 Helper function for building the tree.151 """152 first_threshold, first_feature = self.next_split(X, y)153 current = self.Node(first_threshold, first_feature)154 self.root = current155 """156 Check if we pass the max_depth, check if the first_feature is None, min split size.157 If some of those condition met, change current to pure, and set predict to the most popular label158 and return current159 """160 if (161 depth > self.max_depth162 or first_feature == None163 or X.shape[0] <= self.size_allowed164 ):165 _values, _counts = np.unique(y, return_counts=True)166 ind = np.argmax(_counts)167 current.predict = _values[ind]168 current.pure = True169 return current170 # Check if there is only 1 label in this node, change current to pure, and set predict to the label171 if len(np.unique(y)) == 1:172 current.predict = y[0]173 current.pure = True174 return current175 # Find the left node index with feature i <= threshold Right with feature i > threshold.176 left_index = X[:, first_feature] <= first_threshold177 right_index = X[:, first_feature] > first_threshold178 # If we either side is empty, change current to pure, and set predict to the label179 if len(left_index) == 0 or len(right_index) == 0:180 _values, _counts = np.unique(y, return_counts=True)181 ind = np.argmax(_counts)182 current.predict = _values[ind]183 current.pure = True184 return current185 left_X, left_y = X[left_index, :], y[left_index]186 current.left = self.build_tree(left_X, left_y, depth + 1)187 right_X, right_y = X[right_index, :], y[right_index]188 current.right = self.build_tree(right_X, right_y, depth + 1)189 return current190 def fit(self, X, y):191 """192 The fit function fits the Decision Tree model based on the training data. 193 X_train is a matrix or 2-D numpy array, represnting training instances. 194 Each training instance is a feature vector. 195 y_train contains the corresponding labels. There might be multiple (i.e., > 2) classes.196 """197 self.root = self.build_tree(X, y, 1)198 return self199 def ind_predict(self, inp):200 """201 Predict the most likely class label of one test instance based on its feature vector x.202 """203 cur = self.root204 # Stop condition we are at a node is pure.205 while not cur.pure:206 feature = cur.feature207 threshold = cur.threshold208 if inp[feature] <= threshold:209 cur = cur.left210 else:211 cur = cur.right212 return cur.predict213 def predict(self, inp):214 """215 X is a matrix or 2-D numpy array, represnting testing instances. 216 Each testing instance is a feature vector. 217 Return the predictions of all instances in a list.218 """219 result = [self.ind_predict(inp[i]) for i in inp.shape[0]]...

Full Screen

Full Screen

DecisionTree.py

Source:DecisionTree.py Github

copy

Full Screen

1import numpy as np2class DecisionTree:3 """4 Decision Tree Classifier5 Attributes:6 root: Root Node of the tree.7 max_depth: Max depth allowed for the tree8 size_allowed : Min_size split, smallest size allowed for split9 n_features: Number of features to use during building the tree.(Random Forest)10 n_split: Number of split for each feature. (Random Forest)11 """12 def __init__(self, max_depth=1000, size_allowed=1, n_features=None, n_split=None):13 """14 Initializations for class attributes.15 """16 self.root = 117 self.max_depth = max_depth18 self.size_allowed = size_allowed19 self.n_features = n_features20 self.n_split = n_split21 class Node:22 """23 Node Class for the building the tree.24 Attribute:25 threshold: The threshold like if x1 < threshold, for spliting.26 feature: The index of feature on this current node.27 left: Pointer to the node on the left.28 right: Pointer to the node on the right.29 pure: Bool, describe if this node is pure.30 predict: Class, indicate what the most common Y on this node.31 """32 def __init__(self, threshold=None, feature=None):33 """34 Initializations for class attributes.35 """36 self.threshold = threshold37 self.feature = feature38 self.left = None39 self.right = None40 self.pure = False41 self.depth = 142 self.predict = None43 def entropy(self, lst):44 """45 Function Calculate the entropy given lst.46 Attributes:47 entro: variable store entropy for each step.48 classes: all possible classes. (without repeating terms)49 counts: counts of each possible classes.50 total_counts: number of instances in this lst.51 lst is vector of labels.52 """53 entro = 054 classes, counts = np.unique(lst, return_counts=True)55 total_counts = len(lst)56 probs = counts / total_counts57 for i in probs:58 # ignore prob with 059 if i != 0:60 entro = entro - i * np.log(i)61 return entro62 def information_gain(self, lst, values, threshold):63 """64 Function Calculate the information gain, by using entropy function.65 lst is vector of labels.66 values is vector of values for individule feature.67 threshold is the split threshold we want to use for calculating the entropy.68 """69 # find the left and right indices70 _less_or_equal = np.where(values <= threshold)[0]71 _above = np.where(values > threshold)[0]72 left_prop = len(_less_or_equal) / len(values)73 right_prop = len(_above) / len(values)74 left_entropy = self.entropy(_less_or_equal)75 right_entropy = self.entropy(_above)76 return self.entropy(lst) - (77 left_prop * left_entropy + right_prop * right_entropy78 )79 def find_rules(self, data):80 """81 Helper function to find the split rules.82 data is a matrix or 2-D numpy array, represnting training instances.83 Each training instance is a feature vector.84 """85 n, m = 1, 186 rules = []87 for i in data.T:88 unique_value = np.unique(i)89 # get the midpoint between each unique addjaacent value90 diff = [91 (unique_value[x] + unique_value[x + 1]) / 292 for x in range(len(unique_value) - 1)93 ]94 rules.append(diff)95 return rules96 def next_split(self, data, label):97 """98 Helper function to find the split with most information gain, 99 by using find_rules, and information gain.100 data is a matrix or 2-D numpy array, represnting training instances.101 Each training instance is a feature vector.102 label contains the corresponding labels. 103 """104 rules = self.find_rules(data)105 max_info = -float("inf")106 num_col = None107 threshold = None108 """109 Check Number of features to use, None means all features. (Decision Tree always use all feature)110 If n_features is a int, use n_features of features by random choice.111 If n_features == 'sqrt', use sqrt(Total Number of Features ) by random choice112 """113 if not self.n_features:114 index_col = np.arange(data.shape[1])115 else:116 if self.n_features == "sqrt":117 num_index = int(np.sqrt(data.shape[1]))118 elif isinstance(self.n_features, int):119 num_index = self.n_features120 np.random.seed()121 index_col = np.random.choice(data.shape[1], num_index, replace=False)122 """123 Do the similar selection we did for features, n_split take in None or int or 'sqrt'.124 For all selected feature and corresponding rules, we check it's information gain.125 """126 _data_T = data.T127 for i in index_col:128 count_temp_rules = len(rules[i])129 if not self.n_split:130 index_rules = np.arange(count_temp_rules)131 else:132 if self.n_split == "sqrt":133 num_rules = int(np.sqrt(len(count_temp_rules)))134 elif isinstance(self.n_split, int):135 num_rules = self.n_split136 np.random.seed()137 # get partial indices in a rule list138 index_rules = np.random.choice(139 count_temp_rules, num_rules, replace=False140 )141 for j in index_rules:142 info = self.information_gain(label, _data_T[i], rules[i][j])143 if info > max_info:144 max_info = info145 num_col = i146 threshold = rules[i][j]147 return threshold, num_col148 def build_tree(self, X, y, depth):149 """150 Helper function for building the tree.151 """152 first_threshold, first_feature = self.next_split(X, y)153 current = self.Node(first_threshold, first_feature)154 self.root = current155 """156 Check if we pass the max_depth, check if the first_feature is None, min split size.157 If some of those condition met, change current to pure, and set predict to the most popular label158 and return current159 """160 if (161 depth > self.max_depth162 or first_feature == None163 or X.shape[0] <= self.size_allowed164 ):165 _values, _counts = np.unique(y, return_counts=True)166 ind = np.argmax(_counts)167 current.predict = _values[ind]168 current.pure = True169 return current170 # Check if there is only 1 label in this node, change current to pure, and set predict to the label171 if len(np.unique(y)) == 1:172 current.predict = y[0]173 current.pure = True174 return current175 # Find the left node index with feature i <= threshold Right with feature i > threshold.176 left_index = X[:, first_feature] <= first_threshold177 right_index = X[:, first_feature] > first_threshold178 # If we either side is empty, change current to pure, and set predict to the label179 if len(left_index) == 0 or len(right_index) == 0:180 _values, _counts = np.unique(y, return_counts=True)181 ind = np.argmax(_counts)182 current.predict = _values[ind]183 current.pure = True184 return current185 left_X, left_y = X[left_index, :], y[left_index]186 current.left = self.build_tree(left_X, left_y, depth + 1)187 right_X, right_y = X[right_index, :], y[right_index]188 current.right = self.build_tree(right_X, right_y, depth + 1)189 return current190 def fit(self, X, y):191 """192 The fit function fits the Decision Tree model based on the training data. 193 X_train is a matrix or 2-D numpy array, represnting training instances. 194 Each training instance is a feature vector. 195 y_train contains the corresponding labels. There might be multiple (i.e., > 2) classes.196 """197 self.root = self.build_tree(X, y, 1)198 return self199 def ind_predict(self, inp):200 """201 Predict the most likely class label of one test instance based on its feature vector x.202 """203 cur = self.root204 # Stop condition we are at a node is pure.205 while not cur.pure:206 feature = cur.feature207 threshold = cur.threshold208 if inp[feature] <= threshold:209 cur = cur.left210 else:211 cur = cur.right212 return cur.predict213 def predict(self, inp):214 """215 X is a matrix or 2-D numpy array, represnting testing instances. 216 Each testing instance is a feature vector. 217 Return the predictions of all instances in a list.218 """219 result = [self.ind_predict(inp[i]) for i in inp.shape[0]]...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful