Best Python code snippet using yandex-tank
create_stats.py
Source:create_stats.py  
1import os2from pathlib import Path3import numpy as np4def remove_zero_entries(d: dict):5    new_d = {k: v for k, v in d.items() if v != 0}6    return new_d7class DatasetSplit:8    def __init__(self, data_folder: Path,9                 examples_file_name: str,10                 definitions_file_name: str,11                 hypernyms_file_name: str,12                 labels_file_name: str,13                 domains_file_name: str = None):14        self.domain_dict = {15            'wnt': ["0"],16            'msh': ["1"],17            'ctl': ["2"],18            'cps': ["3"],19            'domains': ["1", "2", "3"]20        }21        self.examples = [line.split('\t') for line in22                         (data_folder / examples_file_name).read_text().strip('\n').split('\n')]23        self.defs = [line for line in24                     (data_folder / definitions_file_name).read_text().strip('\n').split('\n')]25        self.hyps = [line.split('\t') for line in26                     (data_folder / hypernyms_file_name).read_text().strip('\n').split('\n')]27        self.labels = [line for line in28                       (data_folder / labels_file_name).read_text().strip('\n').split('\n')]29        if domains_file_name is not None:30            self.domains = [line for line in31                            (data_folder / domains_file_name).read_text().strip('\n').split('\n')]32        else:33            self.domains = None34        self.targets = ([l[0] for l in self.examples])35    def num_total_examples(self, domain=None):36        if domain is not None:37            return len(list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0]))38        return len(self.examples)39    def num_unique_senses(self, domain=None):40        if domain is not None:41            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])42            return len(set(list(np.array(self.defs)[indices])))43        return len(set(self.defs))44    def num_unique_targets(self, domain):45        if domain is not None:46            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])47            return len(set(list(np.array(self.targets)[indices])))48        return len(set(self.targets))49    def examples_per_sense(self, domain=None):50        """51        creates a list of number of examples per sense provided in the dataset52        outcome: ordered list by numbers of used examples per sense53        example: {1:10, 2:5, 4:15} means that for one sense 10 examples have been provided,54        for 2 senses 5 examples have been provided, for 4 senses have been provided and so on55        """56        if domain is not None:57            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])58            domain_defs = list(np.array(self.defs)[indices])59            examples_per_sense = [domain_defs.count(x) for x in set(domain_defs)]60        else:61            examples_per_sense = [self.defs.count(x) for x in set(self.defs)]62        count_examples_per_sense = remove_zero_entries({i: examples_per_sense.count(i) for i in range(1, 100)})63        return count_examples_per_sense64    def examples_per_target(self, domain: list = None):65        if domain is not None:66            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])67            domain_tgts = list(np.array(self.targets)[indices])68            nof_examples_per_target = [domain_tgts.count(x) for x in set(domain_tgts)]69        else:70            nof_examples_per_target = [self.targets.count(x) for x in set(self.targets)]71        count_examples_per_target = remove_zero_entries({i: nof_examples_per_target.count(i) for i in range(1, 100)})72        return count_examples_per_target73    def percent_positive_examples(self, domain=None):74        if domain is not None:75            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])76            return list(np.array(self.labels)[indices]).count('T') / len(indices)77        return self.labels.count('T') / len(self.labels)78    def target_overlap(self, overlap_dataset, second_dataset=None, domain=None):79        if domain is not None:80            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])81        else:82            indices = list(range(len(self.targets)))83        if second_dataset is not None:84            unique_targets_a = set(np.array(self.targets)[indices]) | set(second_dataset.targets)85        else:86            unique_targets_a = set(np.array(self.targets)[indices])87        overlap_targets = unique_targets_a & set(overlap_dataset.targets)88        return len(overlap_targets) / len(overlap_dataset.targets)89    def sense_overlap(self, overlap_dataset, second_dataset=None, domain=None):90        if domain is not None:91            indices = list(np.where(np.isin(self.domains, self.domain_dict[domain]))[0])92        else:93            indices = list(range(len(self.defs)))94        if second_dataset is not None:95            unique_defs_a = set(np.array(self.defs)[indices]) | set(second_dataset.defs)96        else:97            unique_defs_a = set(np.array(self.defs)[indices])98        overlap_defs = unique_defs_a & set(overlap_dataset.defs)99        return len(overlap_defs) / len(overlap_dataset.defs)100    def instance_overlap(self, overlap_dataset, second_dataset=None):101        instances_self = set([e + d for e, d in zip(self.examples, self.defs)])102        instances_overlap = set([e + d for e, d in zip(overlap_dataset.examples, overlap_dataset.defs)])103        if second_dataset is not None:104            instances_second = set([e + d for e, d in zip(second_dataset.examples, second_dataset.defs)])105            unique_instances_a = instances_self | instances_second106        else:107            unique_instances_a = instances_self108        overlap_defs = unique_instances_a & set(instances_overlap)109        return len(overlap_defs) / len(instances_overlap)110def get_pos_label_percent(folder: str, prefix: str, label_indices: list = None):111    labels = open(os.path.join(folder, prefix + 'labels.txt')).read().split("\n")112    if label_indices is not None:113        labels = list(np.array(labels)[label_indices])114    return labels.count('T') / len(labels)115def get_unique_senses(folder: str, prefix: str, definition_indices: list = None):116    definitions = open(os.path.join(folder, prefix + 'definitions.txt')).read().split("\n")117    if definition_indices is not None:118        definitions = list(np.array(definitions)[definition_indices])119    return len(set(definitions))120def get_total_number(folder: str, prefix: str, example_indices: list = None):121    examples = open(os.path.join(folder, prefix + 'definitions.txt')).read().split("\n")122    if example_indices is not None:123        examples = list(np.array(examples)[example_indices])124    return len(examples)125def print_stats(dataset: DatasetSplit, domain=None):126    print('total number', dataset.num_total_examples(domain=domain))127    print('number unique senses', dataset.num_unique_senses(domain=domain))128    print('number unique targets', dataset.num_unique_targets(domain=domain))129    print('percent positive labels', dataset.percent_positive_examples(domain=domain))130if __name__ == '__main__':131    location = Path(__file__).parent.parent132    train_folder = location / 'data' /'en' / 'Training'133    dev_folder = location / 'data' /'en' /'Development'134    test_folder = location / 'data' /'en' / 'Test'135    train_split = DatasetSplit(data_folder=Path(train_folder),136                               examples_file_name='train_examples.txt',137                               definitions_file_name='train_definitions.txt',138                               hypernyms_file_name='train_hypernyms.txt',139                               labels_file_name='train_labels.txt')140    dev_split = DatasetSplit(data_folder=Path(dev_folder),141                             examples_file_name='dev_examples.txt',142                             definitions_file_name='dev_definitions.txt',143                             hypernyms_file_name='dev_hypernyms.txt',144                             labels_file_name='dev_labels.txt')145    # test_split = DatasetSplit(data_folder=Path(test_folder),146    #                          examples_file_name='test_examples.txt',147    #                          definitions_file_name='test_definitions.txt',148    #                          hypernyms_file_name='test_hypernyms.txt',149    #                          labels_file_name='test_labels.txt',150    #                          domains_file_name='test_domains.txt')151    print(train_split.target_overlap(dev_split))152    # print(train_split.target_overlap(test_split))153    # print(train_split.target_overlap(test_split, dev_split))154    print()155    # print(test_split.target_overlap(train_split))156    # print(test_split.target_overlap(train_split, domain='wnt'))157    # print(test_split.target_overlap(train_split, domain='msh'))158    # print(test_split.target_overlap(train_split, domain='ctl'))159    # print(test_split.target_overlap(train_split, domain='cps'))160    print()161    print(train_split.sense_overlap(dev_split))162    # print(train_split.sense_overlap(test_split))163    # print(train_split.sense_overlap(test_split, dev_split))164    print()165    # print(test_split.sense_overlap(train_split))166    # print(test_split.sense_overlap(train_split, domain='wnt'))167    # print(test_split.sense_overlap(train_split, domain='msh'))168    # print(test_split.sense_overlap(train_split, domain='ctl'))169    # print(test_split.sense_overlap(train_split, domain='cps'))170    print('\n##### train #####')171    print_stats(train_split)172    print('\n##### dev #####')173    print_stats(dev_split)174    # print('\n##### test #####')175    # print_stats(test_split)176    #177    # print('\n##### wnt')178    # print_stats(test_split, 'wnt')179    #180    # print('\n##### msh')181    # print_stats(test_split, 'msh')182    #183    # print('\n##### ctl')184    # print_stats(test_split, 'ctl')185    #186    # print('\n##### cps')187    # print_stats(test_split, 'cps')188    #189    # print('\n##### all domains')...boston_housing.py
Source:boston_housing.py  
1"""Boston housing price regression dataset.2"""3from __future__ import absolute_import4from __future__ import division5from __future__ import print_function6from ..utils.data_utils import get_file7import numpy as np8def load_data(path='boston_housing.npz', test_split=0.2, seed=113):9    """Loads the Boston Housing dataset.10    # Arguments11        path: path where to cache the dataset locally12            (relative to ~/.keras/datasets).13        test_split: fraction of the data to reserve as test set.14        seed: Random seed for shuffling the data15            before computing the test split.16    # Returns17        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.18    """19    assert 0 <= test_split < 120    path = get_file(path,21                    origin='https://s3.amazonaws.com/keras-datasets/boston_housing.npz',22                    file_hash='f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5')23    f = np.load(path)24    x = f['x']25    y = f['y']26    f.close()27    np.random.seed(seed)28    indices = np.arange(len(x))29    np.random.shuffle(indices)30    x = x[indices]31    y = y[indices]32    x_train = np.array(x[:int(len(x) * (1 - test_split))])33    y_train = np.array(y[:int(len(x) * (1 - test_split))])34    x_test = np.array(x[int(len(x) * (1 - test_split)):])35    y_test = np.array(y[int(len(x) * (1 - test_split)):])...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
