How to use create_vocabulary method in localstack

Best Python code snippet using localstack_python

classify.py

Source:classify.py Github

copy

Full Screen

...18 bow = create_bow(vocab, directory+subdir+f)19 dataset.append({'label': label, 'bow': bow})20 return dataset21#Completed for you22def create_vocabulary(directory, cutoff):23 """ Create a vocabulary from the training directory24 return a sorted vocabulary list25 """26 top_level = os.listdir(directory)27 a = cutoff28 vocab = {}29 for d in top_level:30 subdir = d if d[-1] == '/' else d+'/'31 files = os.listdir(directory+subdir)32 for f in files:33 with open(directory+subdir+f,'r', encoding="utf-8") as doc:34 for word in doc:35 word = word.strip()36 if not word in vocab and len(word) > 0:37 vocab[word] = 138 elif len(word) > 0:39 vocab[word] += 140 return sorted([word for word in vocab if vocab[word] >= cutoff])41#The rest of the functions need modifications ------------------------------42#Needs modifications43def create_bow(vocab, filepath):44 """ Create a single dictionary for the data45 Note: label may be None46 """47 bow = {}48 # TODO: add your code here49 wordcount = 050 wordcountnone = 051 c = 052 for i in vocab:53 c+=154 with open(filepath, 'r', encoding="utf-8") as doc: ###############################################55 for word in doc:56 word = word.strip()57 if(c==1):58 if (word not in vocab):59 wordcountnone += 160 if(i == str(word)):61 wordcount += 162 #print(wordcount)63 if(wordcount > 0):64 bow[i] = wordcount65 wordcount = 066 if(wordcountnone != 0):67 bow[None] = wordcountnone68 return bow69#Needs modifications70def prior(training_data, label_list):71 """ return the prior probability of the label in the training set72 => frequency of DOCUMENTS73 """74 smooth = 1 # smoothing factor75 logprob = {}76 # TODO: add your code here77 numfile1 = 078 numfile2 = 079 for dic in training_data:80 if(dic["label"] == label_list[0]):81 numfile1 += 182 elif(dic["label"] == label_list[1]):83 numfile2 += 184 numtotal = numfile1 + numfile285 prob1 = (numfile1+smooth)/(numtotal+2)86 prob2 = (numfile2 + smooth) / (numtotal + 2)87 logprob[label_list[0]] = math.log(prob1)88 logprob[label_list[1]] = math.log(prob2)89 return logprob90#Needs modifications91def p_word_given_label(vocab, training_data, label):92 """ return the class conditional probability of label over all words, with smoothing """93 smooth = 1 # smoothing factor94 word_prob = {}95 # TODO: add your code here96 total_word = 097 word_prob[None] = 098 for dic in training_data:99 for index0, i0 in enumerate(dic['bow']):100 if (list(dic['bow'])[index0] in word_prob):101 continue;102 word_prob[list(dic['bow'])[index0]] = 0103 #word_prob[None] = 0104 if(dic["label"] == label):105 for index, i in enumerate(dic["bow"]):106 if(list(dic['bow'])[index] in vocab):107 if(list(dic['bow'])[index] in word_prob):108 word_prob[list(dic['bow'])[index]] += dic["bow"][i]109 else:110 word_prob[list(dic['bow'])[index]] = dic["bow"][i]111 else:112 if(None in word_prob):113 word_prob[None] += dic["bow"][i]114 else:115 word_prob[None] = 0116 total_word += dic["bow"][i]117 #word_prob [None] = 5118 for h in word_prob:119 word_prob[h] = math.log((word_prob[h] + smooth*1)) - math.log((total_word + smooth*(len(vocab) +1)))120 return word_prob121#Needs modifications122def train(training_directory, cutoff):123 """ return a dictionary formatted as follows:124 {125 'vocabulary': <the training set vocabulary>,126 'log prior': <the output of prior()>,127 'log p(w|y=2016)': <the output of p_word_given_label() for 2016>,128 'log p(w|y=2020)': <the output of p_word_given_label() for 2020>129 }130 """131 retval = {}132 label_list = os.listdir(training_directory)133 # TODO: add your code here134 vocal = create_vocabulary(training_directory, cutoff)135 training_data = load_training_data(vocal, training_directory)136 log_prior = prior(load_training_data(label_list, training_directory), label_list)137 label_word2020 = p_word_given_label(vocal,training_data, label_list[1])138 label_word2016 = p_word_given_label(vocal, training_data, label_list[0])139 retval['vocabulary'] = vocal140 retval['log prior'] = log_prior141 retval['log p(w|y=2016)'] = label_word2016142 retval['log p(w|y=2020)'] = label_word2020143 return retval144#Needs modifications145def classify(model, filepath):146 """ return a dictionary formatted as follows:147 {148 'predicted y': <'2016' or '2020'>,149 'log p(y=2016|x)': <log probability of 2016 label for the document>,150 'log p(y=2020|x)': <log probability of 2020 label for the document>151 }152 """153 retval = {}154 # TODO: add your code here155 vocab_x = model['vocabulary']156 prob_word_2016_total = 0157 prob_word_2020_total = 0158 bow_doc = create_bow(vocab_x,filepath)159 for i in bow_doc:160 prob_word_2016_total += model['log p(w|y=2016)'][i] * bow_doc[i]161 prob_word_2020_total += model['log p(w|y=2020)'][i] * bow_doc[i]162 label_2016 = model['log prior']['2016'] + prob_word_2016_total163 label_2020 = model['log prior']['2020'] + prob_word_2020_total164 if label_2016 > label_2020:165 label_x = '2016'166 else:167 label_x = '2020'168 retval['predicted y'] = label_x169 retval['log p(y=2016|x)'] = label_2016170 retval['log p(y=2020|x)'] = label_2020171 return retval172if __name__ == '__main__':173 vocab = {}174 #vocab = create_vocabulary('./EasyFiles/', 2)175 #print(vocab)176 #print(create_bow(vocab, './EasyFiles/2016/1.txt'))177 #vocab = create_vocabulary('./corpus/training/', 2)178 #training_data = load_training_data(vocab,'./corpus/training/')179 #print(training_data)180 #vocab = create_vocabulary('./corpus/training/', 2)181 #training_data = load_training_data(vocab, './corpus/training/')182 #print(prior(training_data, ['2020', '2016']))183 #vocab = create_vocabulary('./EasyFiles/', 1)184 #load_data = load_training_data(vocab, './EasyFiles/')185 #print(load_data)186 #for x in load_data:187 # for key in x["bow"]:188 # print(x['bow'][key])189 #vocab = create_vocabulary('./EasyFiles/', 1)190 #training_data = load_training_data(vocab, './EasyFiles/')191 #print(training_data)192 #print(p_word_given_label(vocab, training_data, '2016'))193 #print(train('./EasyFiles/', 2))194 #model = train('./corpus/test/', 2)195 #print(classify(model, './corpus/test/2016/0.txt'))196 #vocab = create_vocabulary('./EasyFiles/', 1)197 #print(create_bow(vocab, './EasyFiles/2016/1.txt'))198 #vocab = create_vocabulary('./EasyFiles/', 1)199 #print(load_training_data(vocab, './EasyFiles/'))200 #vocab = create_vocabulary('./corpus/training/', 2)201 #training_data = load_training_data(vocab, './corpus/training/')202 #print(prior(training_data, ['2020', '2016']))203 #vocab = create_vocabulary('./EasyFiles/', 1)204 #training_data = load_training_data(vocab, './EasyFiles/')205 #print(training_data)206 #print(p_word_given_label(vocab, training_data, '2016'))207 #print(p_word_given_label(vocab, training_data, '2020'))208 #print(train('./EasyFiles/', 2))209####210 #print(create_vocabulary('./EasyFiles/', 2))211 #vocab = create_vocabulary('./EasyFiles/', 2)212 #print(create_bow(vocab, './EasyFiles/2016/1.txt'))213 #vocab = create_vocabulary('./EasyFiles/', 1)214 #print(load_training_data(vocab, './EasyFiles/'))215 #vocab = create_vocabulary('./corpus/training/', 2)216 #training_data = load_training_data(vocab, './corpus/training/')217 #print(prior(training_data, ['2020', '2016']))218 #vocab = create_vocabulary('./EasyFiles/', 1)219 #training_data = load_training_data(vocab, './EasyFiles/')220 #print(p_word_given_label(vocab, training_data, '2016'))221 #vocab = create_vocabulary('./EasyFiles/', 2)222 #training_data = load_training_data(vocab, './EasyFiles/')223 #print(p_word_given_label(vocab, training_data, '2016'))224 #print(train('./EasyFiles/', 2))225 model = train('./corpus/training/', 2)...

Full Screen

Full Screen

test.py

Source:test.py Github

copy

Full Screen

1'''2HW4 is to be written in a file called classify.py with the following interface:3create_vocabulary(training_directory: str, cutoff: int)4create_bow(vocab: dict, filepath: str)5load_training_data(vocab: list, directory: str)6prior(training_data: list, label_list: list)7p_word_given_label(vocab: list, training_data: list, label: str)8train(training_directory: str, cutoff: int)9classify(model: dict, filepath: str)10'''11__author__ = 'cs540-testers'12__credits__ = ['Saurabh Kulkarni', 'Alex Moon', 'Stephen Jasina',13 'Harrison Clark']14version = 'V1.1.2'15from classify import train, create_bow, load_training_data, prior, \16 p_word_given_label, classify, create_vocabulary17import unittest18class TestClassify(unittest.TestCase):19 def compare_dicts(self, a, b):20 '''Compares two dicts that map strings to other (non-container) data'''21 # Check that all elements of a are in b22 for k in a:23 self.assertIn(k, b)24 if isinstance(a[k], float):25 self.assertAlmostEqual(a[k], b[k])26 elif isinstance(a[k], dict):27 self.compare_dicts(a[k], b[k])28 else:29 self.assertEqual(a[k], b[k])30 # Check if b has unexpected extra entries31 for k in b:32 self.assertIn(k, a)33 # create_vocabulary(training_directory: str, cutoff: int)34 # returns a list35 def test_create_vocabulary(self):36 vocab = create_vocabulary('./EasyFiles/', 1)37 expected_vocab = [',', '.', '19', '2020', 'a', 'cat', 'chases', 'dog',38 'february', 'hello', 'is', 'it', 'world']39 self.assertEqual(vocab, expected_vocab)40 vocab = create_vocabulary('./EasyFiles/', 2)41 expected_vocab = ['.', 'a']42 self.assertEqual(vocab, expected_vocab)43 # create_bow(vocab: dict, filepath: str)44 # returns a dict45 def test_create_bow(self):46 vocab = create_vocabulary('./EasyFiles/', 1)47 bow = create_bow(vocab, './EasyFiles/2016/1.txt')48 expected_bow = {'a': 2, 'dog': 1, 'chases': 1, 'cat': 1, '.': 1}49 self.assertEqual(bow, expected_bow)50 bow = create_bow(vocab, './EasyFiles/2020/2.txt')51 expected_bow = {'it': 1, 'is': 1, 'february': 1, '19': 1, ',': 1,52 '2020': 1, '.': 1}53 self.assertEqual(bow, expected_bow)54 vocab = create_vocabulary('./EasyFiles/', 2)55 bow = create_bow(vocab, './EasyFiles/2016/1.txt')56 expected_bow = {'a': 2, None: 3, '.': 1}57 self.assertEqual(bow, expected_bow)58 # load_training_data(vocab: list, directory: str)59 # returns a list of dicts60 def test_load_training_data(self):61 vocab = create_vocabulary('./EasyFiles/', 1)62 training_data = load_training_data(vocab, './EasyFiles/')63 expected_training_data = [64 {65 'label': '2020',66 'bow': {'it': 1, 'is': 1, 'february': 1, '19': 1, ',': 1,67 '2020': 1, '.': 1}68 },69 {70 'label': '2016',71 'bow': {'hello': 1, 'world': 1}72 },73 {74 'label': '2016',75 'bow': {'a': 2, 'dog': 1, 'chases': 1, 'cat': 1, '.': 1}76 }77 ]78 self.assertCountEqual(training_data, expected_training_data)79 # prior(training_data: list, label_list: list)80 # returns a dict mapping labels to floats81 # assertAlmostEqual(a, b) can be handy here82 def test_prior(self):83 vocab = create_vocabulary('./corpus/training/', 2)84 training_data = load_training_data(vocab, './corpus/training/')85 log_probabilities = prior(training_data, ['2020', '2016'])86 expected_log_probabilities = {'2020': -0.32171182103809226,87 '2016': -1.2906462863976689}88 self.compare_dicts(log_probabilities, expected_log_probabilities)89 # p_word_given_label(vocab: list, training_data: list, label: str)90 # returns a dict mapping words to floats91 # assertAlmostEqual(a, b) can be handy here92 def test_p_word_given_label_2020(self):93 vocab = create_vocabulary('./EasyFiles/', 1)94 training_data = load_training_data(vocab, './EasyFiles/')95 log_probabilities = p_word_given_label(vocab, training_data, '2020')96 expected_log_probabilities = {',': -2.3513752571634776,97 '.': -2.3513752571634776, '19': -2.3513752571634776,98 '2020': -2.3513752571634776, 'a': -3.044522437723423,99 'cat': -3.044522437723423, 'chases': -3.044522437723423,100 'dog': -3.044522437723423, 'february': -2.3513752571634776,101 'hello': -3.044522437723423, 'is': -2.3513752571634776,102 'it': -2.3513752571634776, 'world': -3.044522437723423,103 None: -3.044522437723423}104 self.compare_dicts(log_probabilities, expected_log_probabilities)105 vocab = create_vocabulary('./EasyFiles/', 2)106 training_data = load_training_data(vocab, './EasyFiles/')107 log_probabilities = p_word_given_label(vocab, training_data, '2020')108 expected_log_probabilities = {'.': -1.6094379124341005,109 'a': -2.302585092994046, None: -0.35667494393873267}110 self.compare_dicts(log_probabilities, expected_log_probabilities)111 def test_p_word_given_label_2016(self):112 vocab = create_vocabulary('./EasyFiles/', 1)113 training_data = load_training_data(vocab, './EasyFiles/')114 log_probabilities = p_word_given_label(vocab, training_data, '2016')115 expected_log_probabilities = {',': -3.091042453358316,116 '.': -2.3978952727983707, '19': -3.091042453358316,117 '2020': -3.091042453358316, 'a': -1.9924301646902063,118 'cat': -2.3978952727983707, 'chases': -2.3978952727983707,119 'dog': -2.3978952727983707, 'february': -3.091042453358316,120 'hello': -2.3978952727983707, 'is': -3.091042453358316,121 'it': -3.091042453358316, 'world': -2.3978952727983707,122 None: -3.091042453358316}123 self.compare_dicts(log_probabilities, expected_log_probabilities)124 vocab = create_vocabulary('./EasyFiles/', 2)125 training_data = load_training_data(vocab, './EasyFiles/')126 log_probabilities = p_word_given_label(vocab, training_data, '2016')127 expected_log_probabilities = {'.': -1.7047480922384253,128 'a': -1.2992829841302609, None: -0.6061358035703157}129 self.compare_dicts(log_probabilities, expected_log_probabilities)130 # train(training_directory: str, cutoff: int)131 # returns a dict132 def test_train(self):133 model = train('./EasyFiles/', 2)134 expected_model = {135 'vocabulary': ['.', 'a'],136 'log prior': {137 '2020': -0.916290731874155,138 '2016': -0.5108256237659905...

Full Screen

Full Screen

flow_configuration.py

Source:flow_configuration.py Github

copy

Full Screen

...33 return 'Prepared images'34def create_vocab(**kwargs):35 voc = Vocabulary(imgs_path=kwargs['imgdir'], vocab_size = 800, feature_type='daisy')36 all_features = voc.feature_extraction()37 voc.create_vocabulary(all_features)38 voc.save_vocabulary()39 return 'Created vocabulary'40def create_hists(**kwargs):41 ph = PatternHistograms(path=kwargs['imgdir'], feature_type='daisy')42 hists = ph.create_histogram()43 ph.save_histograms(hists)44 return 'Created histograms'45def create_hists(**kwargs):46 ph = PatternHistograms(path=kwargs['imgdir'], feature_type='daisy')47 hists = ph.create_histogram()48 ph.save_histograms(hists)49 return 'Created histograms'50def compute_similarity(**kwargs):51 image_similarity = ImageSimilarity(path=sys.argv[1])...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run localstack automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful