How to use clean_len method in yandex-tank

Best Python code snippet using yandex-tank

Statistics_Generator.py

Source:Statistics_Generator.py Github

copy

Full Screen

1import pandas2import string3import argparse4import numpy as np5import matplotlib.pyplot as plt6from collections import Counter7from Utilities.Distance import *8from Utilities.Plot import *9from Constants import *10if __name__ == "__main__":11 parser = argparse.ArgumentParser()12 parser.add_argument('--file_path', help='Path to csv file with noised and clean columns',13 nargs='?', default='Data/mispelled_pure_noised.csv', type=str)14 args = parser.parse_args()15 file_path = args.file_path16 df = pandas.read_csv(file_path)17def get_summary_stats_tensor(noised: list, clean: list):18 ins_probs, del_probs, sub_probs = get_edit_distributions_percents(19 noised, clean)20 noise_outside_clean_probs = get_percent_of_noise_outside_clean(21 clean, noised)22 digits_outside_clean_probs = get_percent_of_digit_noise_outside_clean(23 clean, noised)24 punc_outside_clean_probs = get_percent_of_punc_noise_outside_clean(25 clean, noised)26 alpha_outside_clean_probs = get_percent_of_alpha_noise_outside_clean(27 clean, noised)28 upper_outside_clean_probs = get_percent_of_upper_alpha_noise_outside_clean(29 clean, noised)30 lower_outside_clean_probs = get_percent_of_lower_alpha_noise_outside_clean(31 clean, noised)32 vowel_outside_clean_probs = get_percent_of_vowel_noise_outside_clean(33 clean, noised)34 consonants_outside_clean_probs = get_percent_of_consonants_noise_outside_clean(35 clean, noised)36 return torch.FloatTensor([ins_probs, del_probs, sub_probs, noise_outside_clean_probs, digits_outside_clean_probs, punc_outside_clean_probs, alpha_outside_clean_probs, upper_outside_clean_probs, lower_outside_clean_probs, vowel_outside_clean_probs, consonants_outside_clean_probs]).to(DEVICE)37def get_levenshtein_stats(noiseds: list, cleans: list):38 clean_len = len(cleans)39 noise_len = len(noiseds)40 if clean_len != noise_len:41 raise Exception('Clean list and noise list are not the same length')42 counts_dict = dict()43 for idx in range(clean_len):44 noised = str(noiseds[idx])45 correct = str(cleans[idx])46 distance = levenshtein(noised, correct)47 if distance in counts_dict.keys():48 counts_dict[distance] += 149 else:50 counts_dict[distance] = 151 for key in counts_dict.keys():52 counts_dict[key] = round(counts_dict[key] / clean_len, 4)53 return counts_dict54def get_edit_distributions_percents(noiseds: list, cleans: list):55 clean_len = len(cleans)56 noise_len = len(noiseds)57 if clean_len != noise_len:58 raise Exception('Clean list and noise list are not the same length')59 ins_total, dels_total, subs_total, total = 0, 0, 0, 060 for idx in range(clean_len):61 noised = str(noiseds[idx])62 correct = str(cleans[idx])63 ins, dels, subs = get_levenshtein_w_counts(correct, noised)64 ins_total += ins65 dels_total += dels66 subs_total += subs67 total = ins_total + dels_total + subs_total68 return float(ins_total/total), float(dels_total/total), float(subs_total/total)69def get_edit_percents_distribution(noiseds: list, cleans: list):70 clean_len = len(cleans)71 noise_len = len(noiseds)72 if clean_len != noise_len:73 raise Exception('Clean list and noise list are not the same length')74 sub_percents = []75 del_percents = []76 ins_percents = []77 for idx in range(clean_len):78 noised = str(noiseds[idx])79 correct = str(cleans[idx])80 ins, dels, subs = get_levenshtein_w_counts(correct, noised)81 total = ins + dels + subs82 del_percents.append(float(dels/total))83 sub_percents.append(float(subs/total))84 ins_percents.append(float(ins/total))85 return ins_percents, sub_percents, del_percents86def get_percent_of_noise_outside_clean(clean: list, noise: list):87 return get_percent(clean, noise, string.printable)88def get_percent_of_digit_noise_outside_clean(clean: list, noise: list):89 return get_percent(clean, noise, string.digits)90def get_percent_of_white_space_outside_clean(clean: list, noise: list):91 return get_percent(clean, noise, string.whitespace)92def get_percent_of_punc_noise_outside_clean(clean: list, noise: list):93 return get_percent(clean, noise, string.punctuation)94def get_percent_of_alpha_noise_outside_clean(clean: list, noise: list):95 return get_percent(clean, noise, string.ascii_letters)96def get_percent_of_upper_alpha_noise_outside_clean(clean: list, noise: list):97 return get_percent(clean, noise, string.ascii_uppercase)98def get_percent_of_lower_alpha_noise_outside_clean(clean: list, noise: list):99 return get_percent(clean, noise, string.ascii_lowercase)100def get_percent_of_upper_vowel_noise_outside_clean(clean: list, noise: list):101 return get_percent(clean, noise, 'AEIOU')102def get_percent_of_lower_vowel_noise_outside_clean(clean: list, noise: list):103 return get_percent(clean, noise, 'aeiou')104def get_percent_of_vowel_noise_outside_clean(clean: list, noise: list):105 return get_percent(clean, noise, 'AEIOUaeiou')106def get_percent_of_consonants_noise_outside_clean(clean: list, noise: list):107 set_of_letters = ''.join(108 [c for c in string.ascii_letters if c not in 'aeiouAEIOU'])109 return get_percent(clean, noise, set_of_letters)110def get_percent_of_upper_consonants_noise(clean: list, noise: list):111 set_of_letters = ''.join(112 [c for c in string.ascii_uppercase if c not in 'AEIOU'])113 return get_percent(clean, noise, set_of_letters)114def get_percent_of_lower_consonants_noise(clean: list, noise: list):115 set_of_letters = ''.join(116 [c for c in string.ascii_lowercase if c not in 'aeiou'])117 return get_percent(clean, noise, set_of_letters)118def get_percent(clean_lst: list, noised_lst: list, exclusion_str: str):119 clean_len = len(clean_lst)120 noise_len = len(noised_lst)121 levenshtein_sum, outside_count = 0, 0122 if clean_len != noise_len:123 raise Exception('Clean list and noise list are not the same length')124 else:125 length = clean_len126 for idx in range(length):127 noised_word = str(noised_lst[idx])128 clean_word = str(clean_lst[idx])129 levenshtein_sum += levenshtein(clean_word, noised_word)130 outside_count += count_outside_clean_in_noisy_in_set(131 clean_word, noised_word, exclusion_str)132 return float(outside_count/levenshtein_sum)133def count_outside_clean_in_noisy_in_set(clean: str, noisy: str, set_str: str):134 outside_count = 0135 for char in noisy:136 if char not in clean and char in set_str:137 outside_count += 1138 return outside_count139def get_points_for_edit_idx_to_clean_length(clean: list, noise: list):140 clean_len = len(clean)141 noise_len = len(noise)142 x, y = [], []143 if clean_len != noise_len:144 raise Exception('Clean list and noise list are not the same length')145 for idx in range(clean_len):146 noised_word = str(noise[idx])147 clean_word = str(clean[idx])148 idxes_list = get_indexes_of_edits(clean_word, noised_word)149 x.extend([len(clean_word)] * len(idxes_list))150 y.extend(idxes_list)151 # x representes the list of clean word length, y is the index in the word that had the error152 return x, y153def get_stats_of_noised_len_to_clean(clean: list, noise: list):154 '''155 Gets stats on length of noised compared to clean representation. Divided into 3 categories 'larger', 'smaller' and 'equal'156 in length157 '''158 clean_len = len(clean)159 noise_len = len(noise)160 if clean_len != noise_len:161 raise Exception('Clean list and noise list are not the same length')162 count_smaller = 0163 count_equal = 0164 count_larger = 0165 for i in range(clean_len):166 clean_word_len = len(str(clean[i]))167 noised_word_len = len(str(noise[i]))168 if noised_word_len > clean_word_len:169 count_larger += 1170 elif noised_word_len < clean_word_len:171 count_smaller += 1172 else:173 count_equal += 1174 return float(count_larger/clean_len), float(count_smaller/clean_len), float(count_equal/clean_len)175def get_percent_of_duplicate_char_noise(clean: list, noise: list):176 clean_len = len(clean)177 noise_len = len(noise)178 duplicated_char_count = 0179 total_edit_count = 0180 if clean_len != noise_len:181 raise Exception('Clean list and noise list are not the same length')182 for i in range(clean_len):183 clean_word = str(clean[i])184 noised_word = str(noise[i])185 total_edit_count += levenshtein(clean_word, noised_word)186 def get_letter_count_dict(word: str):187 letter_count_dict = dict()188 for j in range(len(word)):189 char = word[j]190 letter_count_dict[char] = letter_count_dict[char] + \191 1 if char in letter_count_dict.keys() else 1192 return letter_count_dict193 clean_word_char_dict = get_letter_count_dict(clean_word)194 noised_word_char_dict = get_letter_count_dict(noised_word)195 for key in clean_word_char_dict.keys():196 noised_key_count = noised_word_char_dict[key] if key in noised_word_char_dict.keys(197 ) else 0198 clean_key_count = clean_word_char_dict[key]199 difference = clean_key_count - noised_key_count200 if difference < 0:201 duplicated_char_count += abs(difference)...

Full Screen

Full Screen

dataframe_test.py

Source:dataframe_test.py Github

copy

Full Screen

1import pytest2import json3import pnguin as pn4def _fetch_mock_payloads():5 with open("test/data/mockInit.json") as f:6 return json.load(f)7def _fetch_mock_filter_payloads():8 with open("test/data/mockFilters.json") as f:9 return json.load(f)10def _nanify(payload):11 def _modify(x):12 if isinstance(x, dict):13 x["name"] = ["Raghav", float("NaN")]14 return x, 115 else:16 x[1] = {17 "name": float("NaN"),18 "occupation": "Bored2",19 "message": "Another one",20 }21 return x, 122 axis, data = payload["axis"], payload["data"]23 mod_data, clean_len = _modify(data)24 return {"axis": axis, "data": mod_data, "clean_len": clean_len}25@pytest.mark.parametrize("payload", [x for x in _fetch_mock_payloads()])26def test_create(payload):27 data, axis = payload["data"], payload["axis"]28 df = pn.DataFrame(data=data, axis=axis)29 if type(df.data) == type(data):30 assert df.data == data31 if axis == "row":32 assert isinstance(df.data, list)33 else:34 assert isinstance(df.data, dict)35@pytest.mark.parametrize("payload", [x for x in _fetch_mock_payloads()])36def test_head(payload):37 def _eval_head(head, original_len, x):38 assert len(head._data_as_rows()) == (x if original_len > x else original_len)39 data, axis = payload["data"], payload["axis"]40 df = pn.DataFrame(data=data, axis=axis)41 ranges = [1, 2, 3, 4, 5]42 for r in ranges:43 original_len = (44 len(data) if isinstance(data, list) else len(next(iter(data.values())))45 )46 _eval_head(df.head(n=r), original_len, r)47@pytest.mark.parametrize("payload", [_nanify(x) for x in _fetch_mock_payloads()])48def test_dropna(payload):49 data, clean_len, axis = payload["data"], payload["clean_len"], payload["axis"]50 df = pn.DataFrame(data=data, axis=axis).dropna()51 rows = df._data_as_rows()52 assert len(rows) is clean_len53@pytest.mark.parametrize("payload", [x for x in _fetch_mock_filter_payloads()])54def test_filter(payload):55 data, op, target, length, axis = (56 payload["data"],57 payload["op"],58 payload["target"],59 payload["length"],60 payload["axis"],61 )62 df = pn.DataFrame(data=data, axis=axis)63 f = pn.Filter("test", op, target)64 res = df.filter(f)65 assert len(res.data) is length66def test_apply():67 assert True is True68@pytest.mark.parametrize("payload", [_nanify(x) for x in _fetch_mock_payloads()])69def test_tostring(payload):70 data, axis = payload["data"], payload["axis"]71 df = pn.DataFrame(data=data, axis=axis)72 x = df._to_string()...

Full Screen

Full Screen

__init__.py

Source:__init__.py Github

copy

Full Screen

1import os2import random3import numpy as np4import soundfile as sf5from scipy import signal6class NoiseSynthesis(object):7 def __init__(self, max_snr=20):8 self.max_snr = max_snr9 self.noise_dataset = self._process_noise_dataset()10 11 def __call__(self, sig, snr=10, sr=16000):12 noise = random.choice(self.noise_dataset)13 14 clean, noise = self._cut_noise(sig, noise)15 synthesized = self._synthesis(clean, noise, snr)16 return clean, synthesized17 18 def _process_noise_dataset(self, path='./noise_dataset'):19 noise_files = []20 for root, dirs, files in os.walk(path):21 for file in files:22 noise_files.append(os.path.join(root,file))23 24 noise_data = []25 for file in noise_files:26 noise_data.append(self._load_audio(file)[0])27 return noise_data28 29 def _load_audio(self, path, sr=16000):30 ext = path.split('.')[-1]31 if ext=='pcm':32 try:33 sig, sr = np.memmap(path, dtype='h', mode='r').astype('float32'), sr34 except:35 with open (path, 'rb') as f:36 buf = f.read()37 if len(buf)%2==1:38 buf = buf[:-1]39 sig, sr = np.frombuffer(buf, dtype='int16'), sr40 else:41 sig, sr = sf.read(path, sr)42 return sig, sr43 44 def _cut_noise(self, clean, noise):45 clean_len = len(clean)46 noise_len = len(noise)47 48 if clean_len>noise_len:49 r = round(clean_len/noise_len)50 noise = np.repeat(noise, r+1)51 noise_len = len(noise)52 53 if noise_len>clean_len:54 t = np.random.randint(0,noise_len-clean_len)55 noise = noise[t:t+clean_len]56 return clean, noise57 58 def _rms(self, sig):59 """Calculate root mean square"""60 return np.sqrt(np.mean(np.square(sig), axis=-1))61 def _rate(self, sig, snr):62 """Calculate noise rms rate"""63 snr = snr/self.max_snr64 rate = sig / (10**snr)65 return rate66 def _norm(self, sig, sr=16000):67 return sig/((1<<(sr//1000-1))-1)68 69 def _synthesis(self, clean, noise, snr):70 clean_rms = self._rms(clean)71 noise_rms = self._rms(noise)72 amp = self._rate(clean_rms, snr)/noise_rms73 return clean+noise*amp74 75 76 def _pre_emphasis(signal_batch, emph_coeff=0.95):77 return signal.lfilter([1, -emph_coeff], [1], signal_batch)78 def _de_emphasis(signal_batch, emph_coeff=0.95):...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run yandex-tank automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful