How to use clean_len method in yandex-tank

Best Python code snippet using yandex-tank

Statistics_Generator.py

Source:Statistics_Generator.py

1import pandas2import string3import argparse4import numpy as np5import matplotlib.pyplot as plt6from collections import Counter7from Utilities.Distance import *8from Utilities.Plot import *9from Constants import *10if __name__ == "__main__":11    parser = argparse.ArgumentParser()12    parser.add_argument('--file_path', help='Path to csv file with noised and clean columns',13                        nargs='?', default='Data/mispelled_pure_noised.csv', type=str)14    args = parser.parse_args()15    file_path = args.file_path16    df = pandas.read_csv(file_path)17def get_summary_stats_tensor(noised: list, clean: list):18    ins_probs, del_probs, sub_probs = get_edit_distributions_percents(19        noised, clean)20    noise_outside_clean_probs = get_percent_of_noise_outside_clean(21        clean, noised)22    digits_outside_clean_probs = get_percent_of_digit_noise_outside_clean(23        clean, noised)24    punc_outside_clean_probs = get_percent_of_punc_noise_outside_clean(25        clean, noised)26    alpha_outside_clean_probs = get_percent_of_alpha_noise_outside_clean(27        clean, noised)28    upper_outside_clean_probs = get_percent_of_upper_alpha_noise_outside_clean(29        clean, noised)30    lower_outside_clean_probs = get_percent_of_lower_alpha_noise_outside_clean(31        clean, noised)32    vowel_outside_clean_probs = get_percent_of_vowel_noise_outside_clean(33        clean, noised)34    consonants_outside_clean_probs = get_percent_of_consonants_noise_outside_clean(35        clean, noised)36    return torch.FloatTensor([ins_probs, del_probs, sub_probs, noise_outside_clean_probs, digits_outside_clean_probs, punc_outside_clean_probs, alpha_outside_clean_probs, upper_outside_clean_probs, lower_outside_clean_probs, vowel_outside_clean_probs, consonants_outside_clean_probs]).to(DEVICE)37def get_levenshtein_stats(noiseds: list, cleans: list):38    clean_len = len(cleans)39    noise_len = len(noiseds)40    if clean_len != noise_len:41        raise Exception('Clean list and noise list are not the same length')42    counts_dict = dict()43    for idx in range(clean_len):44        noised = str(noiseds[idx])45        correct = str(cleans[idx])46        distance = levenshtein(noised, correct)47        if distance in counts_dict.keys():48            counts_dict[distance] += 149        else:50            counts_dict[distance] = 151    for key in counts_dict.keys():52        counts_dict[key] = round(counts_dict[key] / clean_len, 4)53    return counts_dict54def get_edit_distributions_percents(noiseds: list, cleans: list):55    clean_len = len(cleans)56    noise_len = len(noiseds)57    if clean_len != noise_len:58        raise Exception('Clean list and noise list are not the same length')59    ins_total, dels_total, subs_total, total = 0, 0, 0, 060    for idx in range(clean_len):61        noised = str(noiseds[idx])62        correct = str(cleans[idx])63        ins, dels, subs = get_levenshtein_w_counts(correct, noised)64        ins_total += ins65        dels_total += dels66        subs_total += subs67    total = ins_total + dels_total + subs_total68    return float(ins_total/total), float(dels_total/total), float(subs_total/total)69def get_edit_percents_distribution(noiseds: list, cleans: list):70    clean_len = len(cleans)71    noise_len = len(noiseds)72    if clean_len != noise_len:73        raise Exception('Clean list and noise list are not the same length')74    sub_percents = []75    del_percents = []76    ins_percents = []77    for idx in range(clean_len):78        noised = str(noiseds[idx])79        correct = str(cleans[idx])80        ins, dels, subs = get_levenshtein_w_counts(correct, noised)81        total = ins + dels + subs82        del_percents.append(float(dels/total))83        sub_percents.append(float(subs/total))84        ins_percents.append(float(ins/total))85    return ins_percents, sub_percents, del_percents86def get_percent_of_noise_outside_clean(clean: list, noise: list):87    return get_percent(clean, noise, string.printable)88def get_percent_of_digit_noise_outside_clean(clean: list, noise: list):89    return get_percent(clean, noise, string.digits)90def get_percent_of_white_space_outside_clean(clean: list, noise: list):91    return get_percent(clean, noise, string.whitespace)92def get_percent_of_punc_noise_outside_clean(clean: list, noise: list):93    return get_percent(clean, noise, string.punctuation)94def get_percent_of_alpha_noise_outside_clean(clean: list, noise: list):95    return get_percent(clean, noise, string.ascii_letters)96def get_percent_of_upper_alpha_noise_outside_clean(clean: list, noise: list):97    return get_percent(clean, noise, string.ascii_uppercase)98def get_percent_of_lower_alpha_noise_outside_clean(clean: list, noise: list):99    return get_percent(clean, noise, string.ascii_lowercase)100def get_percent_of_upper_vowel_noise_outside_clean(clean: list, noise: list):101    return get_percent(clean, noise, 'AEIOU')102def get_percent_of_lower_vowel_noise_outside_clean(clean: list, noise: list):103    return get_percent(clean, noise, 'aeiou')104def get_percent_of_vowel_noise_outside_clean(clean: list, noise: list):105    return get_percent(clean, noise, 'AEIOUaeiou')106def get_percent_of_consonants_noise_outside_clean(clean: list, noise: list):107    set_of_letters = ''.join(108        [c for c in string.ascii_letters if c not in 'aeiouAEIOU'])109    return get_percent(clean, noise, set_of_letters)110def get_percent_of_upper_consonants_noise(clean: list, noise: list):111    set_of_letters = ''.join(112        [c for c in string.ascii_uppercase if c not in 'AEIOU'])113    return get_percent(clean, noise, set_of_letters)114def get_percent_of_lower_consonants_noise(clean: list, noise: list):115    set_of_letters = ''.join(116        [c for c in string.ascii_lowercase if c not in 'aeiou'])117    return get_percent(clean, noise, set_of_letters)118def get_percent(clean_lst: list, noised_lst: list, exclusion_str: str):119    clean_len = len(clean_lst)120    noise_len = len(noised_lst)121    levenshtein_sum, outside_count = 0, 0122    if clean_len != noise_len:123        raise Exception('Clean list and noise list are not the same length')124    else:125        length = clean_len126    for idx in range(length):127        noised_word = str(noised_lst[idx])128        clean_word = str(clean_lst[idx])129        levenshtein_sum += levenshtein(clean_word, noised_word)130        outside_count += count_outside_clean_in_noisy_in_set(131            clean_word, noised_word, exclusion_str)132    return float(outside_count/levenshtein_sum)133def count_outside_clean_in_noisy_in_set(clean: str, noisy: str, set_str: str):134    outside_count = 0135    for char in noisy:136        if char not in clean and char in set_str:137            outside_count += 1138    return outside_count139def get_points_for_edit_idx_to_clean_length(clean: list, noise: list):140    clean_len = len(clean)141    noise_len = len(noise)142    x, y = [], []143    if clean_len != noise_len:144        raise Exception('Clean list and noise list are not the same length')145    for idx in range(clean_len):146        noised_word = str(noise[idx])147        clean_word = str(clean[idx])148        idxes_list = get_indexes_of_edits(clean_word, noised_word)149        x.extend([len(clean_word)] * len(idxes_list))150        y.extend(idxes_list)151    # x representes the list of clean word length, y is the index in the word that had the error152    return x, y153def get_stats_of_noised_len_to_clean(clean: list, noise: list):154    '''155    Gets stats on length of noised compared to clean representation. Divided into 3 categories 'larger', 'smaller' and 'equal'156    in length157    '''158    clean_len = len(clean)159    noise_len = len(noise)160    if clean_len != noise_len:161        raise Exception('Clean list and noise list are not the same length')162    count_smaller = 0163    count_equal = 0164    count_larger = 0165    for i in range(clean_len):166        clean_word_len = len(str(clean[i]))167        noised_word_len = len(str(noise[i]))168        if noised_word_len > clean_word_len:169            count_larger += 1170        elif noised_word_len < clean_word_len:171            count_smaller += 1172        else:173            count_equal += 1174    return float(count_larger/clean_len), float(count_smaller/clean_len), float(count_equal/clean_len)175def get_percent_of_duplicate_char_noise(clean: list, noise: list):176    clean_len = len(clean)177    noise_len = len(noise)178    duplicated_char_count = 0179    total_edit_count = 0180    if clean_len != noise_len:181        raise Exception('Clean list and noise list are not the same length')182    for i in range(clean_len):183        clean_word = str(clean[i])184        noised_word = str(noise[i])185        total_edit_count += levenshtein(clean_word, noised_word)186        def get_letter_count_dict(word: str):187            letter_count_dict = dict()188            for j in range(len(word)):189                char = word[j]190                letter_count_dict[char] = letter_count_dict[char] + \191                    1 if char in letter_count_dict.keys() else 1192            return letter_count_dict193        clean_word_char_dict = get_letter_count_dict(clean_word)194        noised_word_char_dict = get_letter_count_dict(noised_word)195        for key in clean_word_char_dict.keys():196            noised_key_count = noised_word_char_dict[key] if key in noised_word_char_dict.keys(197            ) else 0198            clean_key_count = clean_word_char_dict[key]199            difference = clean_key_count - noised_key_count200            if difference < 0:201                duplicated_char_count += abs(difference)...

dataframe_test.py

Source:dataframe_test.py

1import pytest2import json3import pnguin as pn4def _fetch_mock_payloads():5    with open("test/data/mockInit.json") as f:6        return json.load(f)7def _fetch_mock_filter_payloads():8    with open("test/data/mockFilters.json") as f:9        return json.load(f)10def _nanify(payload):11    def _modify(x):12        if isinstance(x, dict):13            x["name"] = ["Raghav", float("NaN")]14            return x, 115        else:16            x[1] = {17                "name": float("NaN"),18                "occupation": "Bored2",19                "message": "Another one",20            }21            return x, 122    axis, data = payload["axis"], payload["data"]23    mod_data, clean_len = _modify(data)24    return {"axis": axis, "data": mod_data, "clean_len": clean_len}25@pytest.mark.parametrize("payload", [x for x in _fetch_mock_payloads()])26def test_create(payload):27    data, axis = payload["data"], payload["axis"]28    df = pn.DataFrame(data=data, axis=axis)29    if type(df.data) == type(data):30        assert df.data == data31    if axis == "row":32        assert isinstance(df.data, list)33    else:34        assert isinstance(df.data, dict)35@pytest.mark.parametrize("payload", [x for x in _fetch_mock_payloads()])36def test_head(payload):37    def _eval_head(head, original_len, x):38        assert len(head._data_as_rows()) == (x if original_len > x else original_len)39    data, axis = payload["data"], payload["axis"]40    df = pn.DataFrame(data=data, axis=axis)41    ranges = [1, 2, 3, 4, 5]42    for r in ranges:43        original_len = (44            len(data) if isinstance(data, list) else len(next(iter(data.values())))45        )46        _eval_head(df.head(n=r), original_len, r)47@pytest.mark.parametrize("payload", [_nanify(x) for x in _fetch_mock_payloads()])48def test_dropna(payload):49    data, clean_len, axis = payload["data"], payload["clean_len"], payload["axis"]50    df = pn.DataFrame(data=data, axis=axis).dropna()51    rows = df._data_as_rows()52    assert len(rows) is clean_len53@pytest.mark.parametrize("payload", [x for x in _fetch_mock_filter_payloads()])54def test_filter(payload):55    data, op, target, length, axis = (56        payload["data"],57        payload["op"],58        payload["target"],59        payload["length"],60        payload["axis"],61    )62    df = pn.DataFrame(data=data, axis=axis)63    f = pn.Filter("test", op, target)64    res = df.filter(f)65    assert len(res.data) is length66def test_apply():67    assert True is True68@pytest.mark.parametrize("payload", [_nanify(x) for x in _fetch_mock_payloads()])69def test_tostring(payload):70    data, axis = payload["data"], payload["axis"]71    df = pn.DataFrame(data=data, axis=axis)72    x = df._to_string()...

__init__.py

Source:__init__.py

1import os2import random3import numpy as np4import soundfile as sf5from scipy import signal6class NoiseSynthesis(object):7    def __init__(self, max_snr=20):8        self.max_snr = max_snr9        self.noise_dataset = self._process_noise_dataset()10        11    def __call__(self, sig, snr=10, sr=16000):12        noise = random.choice(self.noise_dataset)13        14        clean, noise = self._cut_noise(sig, noise)15        synthesized = self._synthesis(clean, noise, snr)16        return clean, synthesized17        18    def _process_noise_dataset(self, path='./noise_dataset'):19        noise_files = []20        for root, dirs, files in os.walk(path):21            for file in files:22                noise_files.append(os.path.join(root,file))23                24        noise_data = []25        for file in noise_files:26            noise_data.append(self._load_audio(file)[0])27        return noise_data28        29    def _load_audio(self, path, sr=16000):30        ext = path.split('.')[-1]31        if ext=='pcm':32            try:33                sig, sr = np.memmap(path, dtype='h', mode='r').astype('float32'), sr34            except:35                with open (path, 'rb') as f:36                    buf = f.read()37                    if len(buf)%2==1:38                        buf = buf[:-1]39                sig, sr = np.frombuffer(buf, dtype='int16'), sr40        else:41            sig, sr = sf.read(path, sr)42        return sig, sr43    44    def _cut_noise(self, clean, noise):45        clean_len = len(clean)46        noise_len = len(noise)47        48        if clean_len>noise_len:49            r = round(clean_len/noise_len)50            noise = np.repeat(noise, r+1)51            noise_len = len(noise)52            53        if noise_len>clean_len:54            t = np.random.randint(0,noise_len-clean_len)55            noise = noise[t:t+clean_len]56        return clean, noise57    58    def _rms(self, sig):59        """Calculate root mean square"""60        return np.sqrt(np.mean(np.square(sig), axis=-1))61    def _rate(self, sig, snr):62        """Calculate noise rms rate"""63        snr = snr/self.max_snr64        rate = sig / (10**snr)65        return rate66    def _norm(self, sig, sr=16000):67        return sig/((1<<(sr//1000-1))-1)68    69    def _synthesis(self, clean, noise, snr):70        clean_rms = self._rms(clean)71        noise_rms = self._rms(noise)72        amp = self._rate(clean_rms, snr)/noise_rms73        return clean+noise*amp74        75        76    def _pre_emphasis(signal_batch, emph_coeff=0.95):77        return signal.lfilter([1, -emph_coeff], [1], signal_batch)78    def _de_emphasis(signal_batch, emph_coeff=0.95):...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.