How to use use_partition method in autotest

Best Python code snippet using autotest_python

decision_tree_classifier.py

Source:decision_tree_classifier.py Github

copy

Full Screen

1import collections2import math3"""This program creates a decision tree for data pased on the attributes of the data. 4It currently works with categorical and quantitative attributes. It returns a preorder list of the decision tree."""5training_data = [6 ['Sunny', 'Hot', 'High', 'False', 'No'],7 ['Sunny', 'Hot', 'High', 'True', 'No'],8 ['Overcast', 'Hot', 'High', 'False', 'Yes'],9 ['Rainy', 'Mild', 'High', 'False', 'Yes'],10 ['Rainy', 'Cool', 'Normal', 'False', 'Yes'],11 ['Rainy', 'Cool', 'Normal', 'True', 'No'],12 ['Overcast', 'Cool', 'Normal', 'True', 'Yes'],13 ['Sunny', 'Mild', 'High', 'False', 'No'],14 ['Sunny', 'Cool', 'Normal', 'False', 'Yes'],15 ['Rainy', 'Mild', 'Normal', 'False', 'Yes'],16 ['Sunny', 'Mild', 'Normal', 'True', 'Yes'],17 ['Overcast', 'Mild', 'High', 'True', 'Yes'],18 ['Overcast', 'Hot', 'Normal', 'False', 'Yes'],19 ['Rainy', 'Mild', 'High', 'True', 'No']20]21orig_entropy = 022outcomes = collections.Counter(j[-1] for j in training_data)23den = sum(outcomes.values())24entropy = 025for j in outcomes.values():26 orig_entropy -= (j / den) * math.log((j / den), 10)27def recursive(dataset, used_attributes):28 if not dataset:29 print('stop1')30 return None31 if len(collections.Counter(j[-1] for j in dataset)) == 1:32 print('stop3', 'classify as', dataset[0][-1])33 return None34 if len(used_attributes) == (len(dataset[0]) - 1): # these are the termination conditions35 print('stop2', 'likely need more attibutes')36 return None37 max_info_gain = 038 for i in range(len(dataset[0]) - 1): # checks each attribute of a data vector, -1 to not include the respondent39 if i not in used_attributes:40 if type(dataset[0][i]) is str:41 cur_attr_values = collections.Counter([j[i] for j in dataset])42 partitions = {}43 for k in cur_attr_values.keys():44 partitions[k] = []45 for m in dataset:46 partitions[m[i]].append(m)47 weighted_entropy = 048 for n in partitions:49 outcomes = collections.Counter(j[-1] for j in partitions[n])50 den = sum(outcomes.values())51 entropy = 052 for j in outcomes.values():53 entropy -= (j / den) * math.log((j / den), 10)54 weighted_entropy += entropy * len(partitions[n]) / len(dataset)55 cur_info_gain = orig_entropy - weighted_entropy56 if cur_info_gain >= max_info_gain:57 max_info_gain = cur_info_gain58 use_partition = partitions59 attribute = i60 else: #this attribute is non-categorical data, i.e., real number61 sorted_values = []62 for p in dataset:63 if p[i] not in sorted_values:64 sorted_values.append(p[i])65 sorted_values.sort()66 split_values = [(sorted_values[0] / 2)]67 for t in range(len(sorted_values) - 1):68 split_values.append((sorted_values[t] + sorted_values[t + 1]) / 2)69 partitions = {}70 for k in split_values:71 partitions[k] = [[],[]]72 for m in dataset:73 for l in split_values:74 if m[i] < l:75 partitions[l][0].append(m)76 else:77 partitions[l][1].append(m)78 for h in partitions:79 weighted_entropy = 080 for r in range(2):81 outcomes = collections.Counter(j[-1] for j in partitions[h][r])82 den = sum(outcomes.values())83 entropy = 084 for j in outcomes.values():85 entropy -= (j / den) * math.log((j / den), 10)86 weighted_entropy += entropy * len(partitions[h][r]) / len(dataset)87 cur_info_gain = orig_entropy - weighted_entropy88 if cur_info_gain >= max_info_gain:89 max_info_gain = cur_info_gain90 use_partition = {h : partitions[h]}91 attribute = i92 print('new node for attribute', attribute)93 if type(dataset[0][attribute]) is str:94 for k in use_partition.values(): 95 print('under', k[0][attribute], 'for attribute',attribute)96 recursive(k, (used_attributes + [attribute]))97 98 else:99 for k in use_partition.values():100 for b in k:101 if b == k[0]:102 temp = 'less than'103 else:104 temp = 'greater than'105 print('under', temp, use_partition.keys())106 recursive(b, (used_attributes + [attribute]))...

Full Screen

Full Screen

regression_tree.py

Source:regression_tree.py Github

copy

Full Screen

1import collections2"""This program creates a regression tree for data pased on the attributes of the data. 3It currently works with categorical and quantitative attributes. It returns a preorder list of the regression tree."""4training_data = [5 ['Sunny', 'Hot', 'High', 'False', 25],6 ['Sunny', 'Hot', 'High', 'True', 30],7 ['Overcast', 'Hot', 'High', 'False', 46],8 ['Rainy', 'Mild', 'High', 'False', 45],9 ['Rainy', 'Cool', 'Normal', 'False', 52],10 ['Rainy', 'Cool', 'Normal', 'True', 23],11 ['Overcast', 'Cool', 'Normal', 'True', 43],12 ['Sunny', 'Mild', 'High', 'False', 35],13 ['Sunny', 'Cool', 'Normal', 'False', 38],14 ['Rainy', 'Mild', 'Normal', 'False', 46],15 ['Sunny', 'Mild', 'Normal', 'True', 48],16 ['Overcast', 'Mild', 'High', 'True', 52],17 ['Overcast', 'Hot', 'Normal', 'False', 44],18 ['Rainy', 'Mild', 'High', 'True', 30]19]20tolerance = 621def std_dev(dataset, population):22 if len(dataset) == 1:23 return 024 values = []25 for i in dataset:26 values.append(i[-1])27 mean = sum(values) / len(values)28 squared_deviations = []29 for j in values:30 squared_deviations.append((j - mean) ** 2)31 if population:32 den = len(dataset)33 else:34 den = len(dataset) - 135 variance = sum(squared_deviations) / den36 std_dev = variance ** 0.537 return std_dev38orig_std_dev = std_dev(training_data, True)39def recursive(dataset, used_attributes):40 if not dataset:41 print('stop1')42 return None43 if len(dataset) == 1:44 print('stop3', 'classify as', dataset[0][-1])45 return None46 if std_dev(dataset, False) <= tolerance:47 print('stop4 classify as', (sum(j[-1] for j in dataset) / len(dataset)))48 return None49 if len(used_attributes) == (len(dataset[0]) - 1): # these are the termination conditions50 print('stop2', 'likely need more attibutes')51 return None52 max_std_red = 053 for i in range(len(dataset[0]) - 1): # checks each attribute of a data vector, -1 to not include the respondent54 if i not in used_attributes:55 if type(dataset[0][i]) is str:56 cur_attr_values = collections.Counter([j[i] for j in dataset])57 partitions = {}58 for k in cur_attr_values.keys():59 partitions[k] = []60 for m in dataset:61 partitions[m[i]].append(m)62 weighted_std_dev = 063 for k in partitions:64 cur_std_dev = std_dev(partitions[k], False)65 weighted_std_dev += cur_std_dev * len(partitions[k]) / len(dataset)66 cur_std_red = orig_std_dev - weighted_std_dev67 if cur_std_red >= max_std_red:68 max_std_red = cur_std_red69 use_partition = partitions70 attribute = i71 else: #this attribute is non-categorical data, i.e., real number72 sorted_values = []73 for p in dataset:74 if p[i] not in sorted_values:75 sorted_values.append(p[i])76 sorted_values.sort()77 split_values = [(sorted_values[0] / 2)]78 for t in range(len(sorted_values) - 1):79 split_values.append((sorted_values[t] + sorted_values[t + 1]) / 2)80 partitions = {}81 for k in split_values:82 partitions[k] = [[],[]]83 for m in dataset:84 for l in split_values:85 if m[i] < l:86 partitions[l][0].append(m)87 else:88 partitions[l][1].append(m)89 for h in partitions:90 cur_std_dev = std_dev(h,False)91 cur_std_red = orig_std_dev - cur_std_dev92 if cur_std_red >= max_std_red:93 max_std_red = cur_std_red94 use_partition = {h : partitions[h]}95 attribute = i96 print('new node for attribute', attribute)97 if type(dataset[0][attribute]) is str:98 for k in use_partition.values(): 99 print('under', k[0][attribute], 'for attribute',attribute)100 recursive(k, (used_attributes + [attribute]))101 102 else:103 for k in use_partition.values():104 for b in k:105 if b == k[0]:106 temp = 'less than'107 else:108 temp = 'greater than'109 print('under', temp, use_partition.keys())110 recursive(b, (used_attributes + [attribute]))...

Full Screen

Full Screen

text_lengths_collector.py

Source:text_lengths_collector.py Github

copy

Full Screen

1from __future__ import annotations2import json3import logging4import statistics5from pytorch_ie.annotations import Span6from transformers import AutoTokenizer7from pie_utils.statistics import WithStatistics8from ..types import DocumentWithPartitions9logger = logging.getLogger(__name__)10class TextLengthsCollector(WithStatistics):11 """This document processor collects the text lengths in means of token numbers and allows to12 show them as json dict and, if plotext is installed, as histogram. Its nature is purely13 statistical, it does not modify the documents.14 Presented values:15 * min, max, mean, and stddev of the collected text lengths,16 * num_docs (number of processed documents), and17 * if use_partition is enabled, num_parts (number of precessed parts)18 :param tokenizer_name_or_path the identifier of the Huggingface tokenizer that will be used19 :param use_partition a boolean flag to enable considering a partition, i.e. tokenize and20 collect the lengths for the partition entries (e.g. sentences or sections) individually.21 :param tokenizer_kwargs a dictionary containing further keyword arguments passed when calling22 the tokenizer23 :param plotext_kwargs a dictionary containing further keyword arguments passed when calling24 plotext.hist().25 """26 def __init__(27 self,28 tokenizer_name_or_path: str,29 use_partition: bool | None = False,30 tokenizer_kwargs: dict | None = None,31 plotext_kwargs: dict | None = None,32 ):33 self.use_partition = use_partition34 self.tokenizer_name_or_path = tokenizer_name_or_path35 self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name_or_path)36 self.tokenizer_kwargs = tokenizer_kwargs or {}37 self.plotext_kwargs = plotext_kwargs or {}38 self.reset_statistics()39 def reset_statistics(self):40 self.text_lengths = []41 self.num_docs = 042 self.num_parts = 043 def show_statistics(self, description: str | None = None):44 description = description or "Statistics for text lengths"45 caption = f"{description} (tokenizer_name_or_path={self.tokenizer_name_or_path})"46 try:47 import plotext as plt48 plt.clf()49 plt.hist(data=self.text_lengths, **self.plotext_kwargs)50 plt.title(caption)51 plt.show()52 # exclude from test coverage since this would require to uninstall plotext and53 # just a simple logging is performed here54 except ModuleNotFoundError: # pragma: no cover55 logger.info("install plotext to display the data as histogram at the console")56 stats = {57 "min": min(self.text_lengths),58 "max": max(self.text_lengths),59 "mean": statistics.mean(self.text_lengths),60 "stddev": statistics.pstdev(self.text_lengths),61 "num_docs": self.num_docs,62 }63 if self.use_partition:64 stats["num_parts"] = self.num_parts65 logger.info(f"{caption}):\n{json.dumps(stats, indent=2)}")66 def __call__(self, document: DocumentWithPartitions) -> DocumentWithPartitions:67 partition = (68 document.partitions if self.use_partition else [Span(start=0, end=len(document.text))]69 )70 tokenized = self.tokenizer(71 [document.text[part.start : part.end] for part in partition], **self.tokenizer_kwargs72 )73 new_lengths = [len(encoding) for encoding in tokenized.encodings]74 self.text_lengths.extend(new_lengths)75 self.num_parts += len(partition)76 self.num_docs += 1...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run autotest automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful