Best Python code snippet using autotest_python
file_utils.py
Source:file_utils.py  
...85    with open(os.path.expanduser(input_path_y), 'r') as f:86        read_data_y = f.readlines()87    f.close()88    x_train, x_test, y_train, y_test = train_test_split(read_data_x, read_data_y)89    write_lines_to_file(output_dir + '/' + 'x_train' + '.' + 'tsv', x_train)90    write_lines_to_file(output_dir + '/' + 'y_train' + '.' + 'tsv', y_train)91    write_lines_to_file(output_dir + '/' + 'x_test' + '.' + 'tsv', x_test)92    write_lines_to_file(output_dir + '/' + 'y_test' + '.' + 'tsv', y_test)93def split_dataset_repeated_segments(input_path_x, input_path_y, output_dir, number_of_segments):94    # Avoid using same data by different systems95    with open(os.path.expanduser(input_path_x), 'r') as f:96        read_data_x = f.readlines()97    f.close()98    with open(os.path.expanduser(input_path_y), 'r') as f:99        read_data_y = f.readlines()100    f.close()101    segment_numbers = range(0, len(read_data_x))102    number_of_batches = int(len(read_data_x)/number_of_segments)103    train_length = int(round(number_of_segments * 80 / 100))104    test_length = int(round(number_of_segments * 20 / 100))105    x_train = []106    y_train = []107    x_test = []108    y_test = []109    for i in range(number_of_batches):110        print('\n'.join([str(x + 1) for x in segment_numbers[i * number_of_segments + train_length:i * number_of_segments + train_length + test_length]]))111        x_train += read_data_x[i * number_of_segments:i * number_of_segments + train_length]112        y_train += read_data_y[i * number_of_segments:i * number_of_segments + train_length]113        x_test += read_data_x[i * number_of_segments + train_length:i * number_of_segments + train_length + test_length]114        y_test += read_data_y[i * number_of_segments + train_length:i * number_of_segments + train_length + test_length]115    write_lines_to_file(output_dir + '/' + 'x_train' + '.' + 'tsv', x_train)116    write_lines_to_file(output_dir + '/' + 'y_train' + '.' + 'tsv', y_train)117    write_lines_to_file(output_dir + '/' + 'x_test' + '.' + 'tsv', x_test)118    write_lines_to_file(output_dir + '/' + 'y_test' + '.' + 'tsv', y_test)119def concatenate_features_files(file_paths):120    feature_arrays = []121    for fp in file_paths:122        feature_arrays.append(read_features_file(fp, "\t"))123    return np.concatenate(feature_arrays, axis=1)124def write_reference_file(output_path, labels):125    output_file = codecs.open(output_path, 'w', 'utf-8')126    for l in labels:127        output_file.write(str(l) + '\n')128    output_file.close()129def write_feature_file(output_path, feature_matrix):130    output_file = codecs.open(output_path, 'w', 'utf-8')131    for row in feature_matrix:132        output_file.write('\t'.join([str(x) for x in row]) + '\n')133    output_file.close()134def write_lines_to_file(file_path, lines):135    with open(os.path.expanduser(file_path), 'w') as f:136        for line in lines:137            f.write(line)138    f.close()139def combine_alignment_files(language_pairs, directory, file_name):140    # Combine alignment files for different languages in a single file141    output_file = codecs.open(directory + "/" + "full_dataset/" + file_name, "w", "utf-8")142    count = 0143    for language_pair in language_pairs:144        lines = codecs.open(directory + "/" + language_pair + "/" + "we" + "/" + file_name, "r", "utf-8")145        for line in lines:146            if "Sentence #" in line:147                count += 1148                output_file.write("Sentence #" + str(count) + "\n")...create_training_dev_test_split.py
Source:create_training_dev_test_split.py  
...18   for line in input_file:19      lines.append(line.rstrip('\n'))20   input_file.close()21   return lines22def write_lines_to_file(output_filename, lines):23    output_file = codecs.open(output_filename, 'w', encoding='utf-8')24    for line in lines:25        output_file.write(line)26        output_file.write('\n')27    output_file.close()28filename = sys.argv[1]29csv_reader = csv.reader(open(filename))30print "Reading translation CSV from", filename31header_index = {}32for i, header in enumerate(csv_reader.next()):33   header_index[header] = i34hits = []35for hit in csv_reader:36    if hit[header_index['AssignmentStatus']] != 'Rejected':37        hits.append(hit)38seg_id_hash = {}39source_sentences = {}40segments = {}41turkers = {}42for hit in hits:43   for i in range(1, 11):44      seg_id = hit[header_index['Input.seg_id' + str(i)]]45      seg_id_hash[seg_id] = 146      source = hit[header_index['Input.seg' + str(i)]]47      source_sentences[seg_id] = source48      translation = hit[header_index['Answer.translation' + str(i)]]49      translation = translation.replace('\n', ' ')50      translation = translation.replace('\r', ' ')51      translation = translation.replace('\t', ' ')52      translation = translation.replace('Translation of the first sentence goes here.', '')53      translation = translation.replace('Translation of the second sentence goes here.', '')54      translation = translation.replace('Translation of the first sentence goes here', '')55      translation = translation.replace('Translation of the second sentence goes here', '')56      worker = hit[header_index['WorkerId']]57      translations = []58      workers = []59      if seg_id in segments:60         translations = segments[seg_id]61         workers = turkers[seg_id]62      translations.append(translation)63      workers.append(worker)64      segments[seg_id] = translations65      turkers[seg_id] = workers66lines = []67for seg_id in seg_id_hash.keys():68   line = ''.decode('utf-8')69   if seg_id in segments:70      line = unicode(seg_id)71      source = source_sentences[seg_id]72      line = line + '\t' + source.decode('utf-8')73      if len(segments[seg_id]) == 4:74         for translation in segments[seg_id]:75            line = line + '\t' + translation.decode('utf-8')76         any_blanks = False77         for field in line.split('\t'):78            field = field.replace(' ', '')79            if field == '':80               any_blanks = True81         if not any_blanks:82            lines.append(line)83dev_set_size = min(1000, int(round(len(lines) * 0.1)))84test_set_size = min(1000, int(round(len(lines) * 0.1)))85random.shuffle(lines)86#lang_pair = hit[header_index['Input.lang_pair']]87lang_pair = 'ur-en'88(source_lang, target_lang) = lang_pair.split('-')89print "Collected translations for ", len(lines), "segments"   90dev_set = lines[0:dev_set_size]91test_set = lines[dev_set_size:(dev_set_size+test_set_size)]92training_set = lines[(dev_set_size+test_set_size):len(lines)]93def write_data_to_files(output_filename, lang_pair, lines, combine_translations=False):94   (source_lang, target_lang) = lang_pair.split('-')95   if combine_translations:96      seg_ids = []97      sources = []98      translations = []99      for line in lines:100         (seg_id, source, trans0, trans1, trans2, trans3) = line.split('\t')101         seg_ids.append(seg_id)102         seg_ids.append(seg_id)103         seg_ids.append(seg_id)104         seg_ids.append(seg_id)105         sources.append(source)106         sources.append(source)107         sources.append(source)108         sources.append(source)109         translations.append(trans0)110         translations.append(trans1)111         translations.append(trans2)112         translations.append(trans3)113      write_lines_to_file(lang_pair + "/" + output_filename + ".seg_ids", seg_ids)114      write_lines_to_file(lang_pair + "/" + output_filename + "." + source_lang, sources)115      write_lines_to_file(lang_pair + "/" + output_filename + "." + target_lang, translations)116   else:117      seg_ids = []118      sources = []119      translations = [[], [], [], []]120      for line in lines:121         (seg_id, source, trans0, trans1, trans2, trans3) = line.split('\t')122         seg_ids.append(seg_id)123         sources.append(source)124         translations[0].append(trans0)125         translations[1].append(trans1)126         translations[2].append(trans2)127         translations[3].append(trans3)128      write_lines_to_file(lang_pair + "/" + output_filename + ".seg_ids", seg_ids)129      write_lines_to_file(lang_pair + "/" + output_filename + "." + source_lang, sources)130      write_lines_to_file(lang_pair + "/" + output_filename + "." + target_lang + ".0", translations[0])131      write_lines_to_file(lang_pair + "/" + output_filename + "." + target_lang + ".1", translations[1])132      write_lines_to_file(lang_pair + "/" + output_filename + "." + target_lang + ".2", translations[2])133      write_lines_to_file(lang_pair + "/" + output_filename + "." + target_lang + ".3", translations[3])134if not os.path.isdir(lang_pair):135   os.makedirs(lang_pair)136write_data_to_files("training", lang_pair, training_set, combine_translations=True)137write_data_to_files("dev", lang_pair, dev_set)138write_data_to_files("test", lang_pair, test_set)139   140def extract_dictionary(dict_csv_file):141   dict_reader = csv.reader(open(dict_csv_file))142   headers = {}143   dictionary = {}144   for i, header in enumerate(dict_reader.next()):145      headers[header] = i146   for row in dict_reader:147      status = row[headers['AssignmentStatus']]148      if status == 'Approved':149         for i in range(1, 13):150            word = row[headers['Input.word_' + str(i)]].decode('utf8')151            translation = row[headers['Answer.translation_' + str(i) + '_1']].decode('utf8')152            if not word.replace(' ', '') == '' and not translation.replace(' ', '') == '':153               if not word in dictionary:154                  dictionary[word] = []155               dictionary[word].append(translation)156   return dictionary157# write out a dictionary if we have a dictionary HIT CSV158if len(sys.argv) > 2:159   dict_translation_filename = sys.argv[2]160   print "Reading dictionary CSV from", dict_translation_filename161   dictonary = extract_dictionary(dict_translation_filename)162   source_words = []163   translations = []164   for source_word in dictonary:165      for translation in dictonary[source_word]:166         source_words.append(source_word)167         translations.append(translation)168   write_lines_to_file(lang_pair + "/dict" + "." + source_lang, source_words)169   write_lines_to_file(lang_pair + "/dict" + "." + target_lang, translations)...preprocess_text.py
Source:preprocess_text.py  
1# This file needs to be run in the main folder2# %%3import text4from utils import read_lines_from_file5def write_lines_to_file(path, lines, mode='w', encoding='utf-8'):6    with open(path, mode, encoding=encoding) as f:7        for i, line in enumerate(lines):8            if i == len(lines)-1:9                f.write(line)10                break11            f.write(line + '\n')12# %%13lines = read_lines_from_file('./data/train-orthographic-transcript.txt')14#lines = read_lines_from_file('./data/test-orthographic-transcript.txt')15new_lines_arabic = []16new_lines_phonetic = []17new_lines_buckw = []18for line in lines:19    wav_name, utterance = line.split('" "')20    wav_name, utterance = wav_name[1:], utterance[:-1]21    utterance = utterance.replace("a~", "~a") \22                         .replace("i~", "~i") \23                         .replace("u~", "~u") \24                         .replace(" - ", " ")25    utterance_arab = text.buckwalter_to_arabic(utterance)26    utterance_phon = text.buckwalter_to_phonemes(utterance)27    line_new_ara = f'"{wav_name}" "{utterance_arab}"'28    new_lines_arabic.append(line_new_ara)29    line_new_pho = f'"{wav_name}" "{utterance_phon}"'30    new_lines_phonetic.append(line_new_pho)31    line_new_buckw = f'"{wav_name}" "{utterance}"'32    new_lines_buckw.append(line_new_buckw)33# %% train34write_lines_to_file('./data/train_arab.txt', new_lines_arabic)35write_lines_to_file('./data/train_phon.txt', new_lines_phonetic)36write_lines_to_file('./data/train_buckw.txt', new_lines_buckw)37# %% test38write_lines_to_file('./data/test_arab.txt', new_lines_arabic)39write_lines_to_file('./data/test_phon.txt', new_lines_phonetic)...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
