Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use create_vocabulary method in localstack

Best Python code snippet using localstack_python

classify.py

Source:classify.py

...18            bow = create_bow(vocab, directory+subdir+f)19            dataset.append({'label': label, 'bow': bow})20    return dataset21#Completed for you22def create_vocabulary(directory, cutoff):23    """ Create a vocabulary from the training directory24        return a sorted vocabulary list25    """26    top_level = os.listdir(directory)27    a = cutoff28    vocab = {}29    for d in top_level:30        subdir = d if d[-1] == '/' else d+'/'31        files = os.listdir(directory+subdir)32        for f in files:33            with open(directory+subdir+f,'r', encoding="utf-8") as doc:34                for word in doc:35                    word = word.strip()36                    if not word in vocab and len(word) > 0:37                        vocab[word] = 138                    elif len(word) > 0:39                        vocab[word] += 140    return sorted([word for word in vocab if vocab[word] >= cutoff])41#The rest of the functions need modifications ------------------------------42#Needs modifications43def create_bow(vocab, filepath):44    """ Create a single dictionary for the data45        Note: label may be None46    """47    bow = {}48    # TODO: add your code here49    wordcount = 050    wordcountnone = 051    c = 052    for i in vocab:53        c+=154        with open(filepath, 'r', encoding="utf-8") as doc: ###############################################55            for word in doc:56                word = word.strip()57                if(c==1):58                    if (word not in vocab):59                        wordcountnone += 160                if(i == str(word)):61                    wordcount += 162                #print(wordcount)63            if(wordcount > 0):64                bow[i] = wordcount65        wordcount = 066    if(wordcountnone != 0):67        bow[None] = wordcountnone68    return bow69#Needs modifications70def prior(training_data, label_list):71    """ return the prior probability of the label in the training set72        => frequency of DOCUMENTS73    """74    smooth = 1 # smoothing factor75    logprob = {}76    # TODO: add your code here77    numfile1 = 078    numfile2 = 079    for dic in training_data:80        if(dic["label"] == label_list[0]):81            numfile1 += 182        elif(dic["label"] == label_list[1]):83            numfile2 += 184        numtotal = numfile1 + numfile285        prob1 = (numfile1+smooth)/(numtotal+2)86        prob2 = (numfile2 + smooth) / (numtotal + 2)87        logprob[label_list[0]] = math.log(prob1)88        logprob[label_list[1]] = math.log(prob2)89    return logprob90#Needs modifications91def p_word_given_label(vocab, training_data, label):92    """ return the class conditional probability of label over all words, with smoothing """93    smooth = 1 # smoothing factor94    word_prob = {}95    # TODO: add your code here96    total_word = 097    word_prob[None] = 098    for dic in training_data:99        for index0, i0 in enumerate(dic['bow']):100            if (list(dic['bow'])[index0] in word_prob):101                continue;102            word_prob[list(dic['bow'])[index0]] = 0103            #word_prob[None] = 0104        if(dic["label"] == label):105            for index, i in enumerate(dic["bow"]):106                if(list(dic['bow'])[index] in vocab):107                    if(list(dic['bow'])[index] in word_prob):108                        word_prob[list(dic['bow'])[index]] += dic["bow"][i]109                    else:110                        word_prob[list(dic['bow'])[index]] = dic["bow"][i]111                else:112                    if(None in word_prob):113                        word_prob[None] += dic["bow"][i]114                    else:115                        word_prob[None] = 0116                total_word += dic["bow"][i]117                #word_prob [None] = 5118    for h in word_prob:119        word_prob[h] = math.log((word_prob[h] + smooth*1)) - math.log((total_word + smooth*(len(vocab) +1)))120    return word_prob121#Needs modifications122def train(training_directory, cutoff):123    """ return a dictionary formatted as follows:124            {125             'vocabulary': <the training set vocabulary>,126             'log prior': <the output of prior()>,127             'log p(w|y=2016)': <the output of p_word_given_label() for 2016>,128             'log p(w|y=2020)': <the output of p_word_given_label() for 2020>129            }130    """131    retval = {}132    label_list = os.listdir(training_directory)133    # TODO: add your code here134    vocal = create_vocabulary(training_directory, cutoff)135    training_data = load_training_data(vocal, training_directory)136    log_prior = prior(load_training_data(label_list, training_directory), label_list)137    label_word2020 = p_word_given_label(vocal,training_data, label_list[1])138    label_word2016 = p_word_given_label(vocal, training_data, label_list[0])139    retval['vocabulary'] = vocal140    retval['log prior'] = log_prior141    retval['log p(w|y=2016)'] = label_word2016142    retval['log p(w|y=2020)'] = label_word2020143    return retval144#Needs modifications145def classify(model, filepath):146    """ return a dictionary formatted as follows:147            {148             'predicted y': <'2016' or '2020'>,149             'log p(y=2016|x)': <log probability of 2016 label for the document>,150             'log p(y=2020|x)': <log probability of 2020 label for the document>151            }152    """153    retval = {}154    # TODO: add your code here155    vocab_x = model['vocabulary']156    prob_word_2016_total = 0157    prob_word_2020_total = 0158    bow_doc = create_bow(vocab_x,filepath)159    for i in bow_doc:160        prob_word_2016_total += model['log p(w|y=2016)'][i] * bow_doc[i]161        prob_word_2020_total += model['log p(w|y=2020)'][i] * bow_doc[i]162    label_2016 = model['log prior']['2016'] + prob_word_2016_total163    label_2020 = model['log prior']['2020'] + prob_word_2020_total164    if label_2016 > label_2020:165        label_x = '2016'166    else:167        label_x = '2020'168    retval['predicted y'] = label_x169    retval['log p(y=2016|x)'] = label_2016170    retval['log p(y=2020|x)'] = label_2020171    return retval172if __name__ == '__main__':173    vocab = {}174    #vocab = create_vocabulary('./EasyFiles/', 2)175    #print(vocab)176    #print(create_bow(vocab, './EasyFiles/2016/1.txt'))177    #vocab = create_vocabulary('./corpus/training/', 2)178    #training_data = load_training_data(vocab,'./corpus/training/')179    #print(training_data)180    #vocab = create_vocabulary('./corpus/training/', 2)181    #training_data = load_training_data(vocab, './corpus/training/')182    #print(prior(training_data, ['2020', '2016']))183    #vocab = create_vocabulary('./EasyFiles/', 1)184    #load_data = load_training_data(vocab, './EasyFiles/')185    #print(load_data)186    #for x in load_data:187     #   for key in x["bow"]:188     #      print(x['bow'][key])189    #vocab = create_vocabulary('./EasyFiles/', 1)190    #training_data = load_training_data(vocab, './EasyFiles/')191    #print(training_data)192    #print(p_word_given_label(vocab, training_data, '2016'))193    #print(train('./EasyFiles/', 2))194    #model = train('./corpus/test/', 2)195    #print(classify(model, './corpus/test/2016/0.txt'))196    #vocab = create_vocabulary('./EasyFiles/', 1)197    #print(create_bow(vocab, './EasyFiles/2016/1.txt'))198    #vocab = create_vocabulary('./EasyFiles/', 1)199    #print(load_training_data(vocab, './EasyFiles/'))200    #vocab = create_vocabulary('./corpus/training/', 2)201    #training_data = load_training_data(vocab, './corpus/training/')202    #print(prior(training_data, ['2020', '2016']))203    #vocab = create_vocabulary('./EasyFiles/', 1)204    #training_data = load_training_data(vocab, './EasyFiles/')205    #print(training_data)206    #print(p_word_given_label(vocab, training_data, '2016'))207    #print(p_word_given_label(vocab, training_data, '2020'))208    #print(train('./EasyFiles/', 2))209####210    #print(create_vocabulary('./EasyFiles/', 2))211    #vocab = create_vocabulary('./EasyFiles/', 2)212    #print(create_bow(vocab, './EasyFiles/2016/1.txt'))213    #vocab = create_vocabulary('./EasyFiles/', 1)214    #print(load_training_data(vocab, './EasyFiles/'))215    #vocab = create_vocabulary('./corpus/training/', 2)216    #training_data = load_training_data(vocab, './corpus/training/')217    #print(prior(training_data, ['2020', '2016']))218    #vocab = create_vocabulary('./EasyFiles/', 1)219    #training_data = load_training_data(vocab, './EasyFiles/')220    #print(p_word_given_label(vocab, training_data, '2016'))221    #vocab = create_vocabulary('./EasyFiles/', 2)222    #training_data = load_training_data(vocab, './EasyFiles/')223    #print(p_word_given_label(vocab, training_data, '2016'))224    #print(train('./EasyFiles/', 2))225    model = train('./corpus/training/', 2)...

test.py

Source:test.py

1'''2HW4 is to be written in a file called classify.py with the following interface:3create_vocabulary(training_directory: str, cutoff: int)4create_bow(vocab: dict, filepath: str)5load_training_data(vocab: list, directory: str)6prior(training_data: list, label_list: list)7p_word_given_label(vocab: list, training_data: list, label: str)8train(training_directory: str, cutoff: int)9classify(model: dict, filepath: str)10'''11__author__ = 'cs540-testers'12__credits__ = ['Saurabh Kulkarni', 'Alex Moon', 'Stephen Jasina',13               'Harrison Clark']14version = 'V1.1.2'15from classify import train, create_bow, load_training_data, prior, \16    p_word_given_label, classify, create_vocabulary17import unittest18class TestClassify(unittest.TestCase):19    def compare_dicts(self, a, b):20        '''Compares two dicts that map strings to other (non-container) data'''21        # Check that all elements of a are in b22        for k in a:23            self.assertIn(k, b)24            if isinstance(a[k], float):25                self.assertAlmostEqual(a[k], b[k])26            elif isinstance(a[k], dict):27                self.compare_dicts(a[k], b[k])28            else:29                self.assertEqual(a[k], b[k])30        # Check if b has unexpected extra entries31        for k in b:32            self.assertIn(k, a)33    # create_vocabulary(training_directory: str, cutoff: int)34    # returns a list35    def test_create_vocabulary(self):36        vocab = create_vocabulary('./EasyFiles/', 1)37        expected_vocab = [',', '.', '19', '2020', 'a', 'cat', 'chases', 'dog',38                'february', 'hello', 'is', 'it', 'world']39        self.assertEqual(vocab, expected_vocab)40        vocab = create_vocabulary('./EasyFiles/', 2)41        expected_vocab = ['.', 'a']42        self.assertEqual(vocab, expected_vocab)43    # create_bow(vocab: dict, filepath: str)44    # returns a dict45    def test_create_bow(self):46        vocab = create_vocabulary('./EasyFiles/', 1)47        bow = create_bow(vocab, './EasyFiles/2016/1.txt')48        expected_bow = {'a': 2, 'dog': 1, 'chases': 1, 'cat': 1, '.': 1}49        self.assertEqual(bow, expected_bow)50        bow = create_bow(vocab, './EasyFiles/2020/2.txt')51        expected_bow = {'it': 1, 'is': 1, 'february': 1, '19': 1, ',': 1,52                '2020': 1, '.': 1}53        self.assertEqual(bow, expected_bow)54        vocab = create_vocabulary('./EasyFiles/', 2)55        bow = create_bow(vocab, './EasyFiles/2016/1.txt')56        expected_bow = {'a': 2, None: 3, '.': 1}57        self.assertEqual(bow, expected_bow)58    # load_training_data(vocab: list, directory: str)59    # returns a list of dicts60    def test_load_training_data(self):61        vocab = create_vocabulary('./EasyFiles/', 1)62        training_data = load_training_data(vocab, './EasyFiles/')63        expected_training_data = [64            {65                'label': '2020',66                'bow': {'it': 1, 'is': 1, 'february': 1, '19': 1, ',': 1,67                        '2020': 1, '.': 1}68            },69            {70                'label': '2016',71                'bow': {'hello': 1, 'world': 1}72            },73            {74                'label': '2016',75                'bow': {'a': 2, 'dog': 1, 'chases': 1, 'cat': 1, '.': 1}76            }77        ]78        self.assertCountEqual(training_data, expected_training_data)79    # prior(training_data: list, label_list: list)80    # returns a dict mapping labels to floats81    # assertAlmostEqual(a, b) can be handy here82    def test_prior(self):83        vocab = create_vocabulary('./corpus/training/', 2)84        training_data = load_training_data(vocab, './corpus/training/')85        log_probabilities = prior(training_data, ['2020', '2016'])86        expected_log_probabilities = {'2020': -0.32171182103809226,87                '2016': -1.2906462863976689}88        self.compare_dicts(log_probabilities, expected_log_probabilities)89    # p_word_given_label(vocab: list, training_data: list, label: str)90    # returns a dict mapping words to floats91    # assertAlmostEqual(a, b) can be handy here92    def test_p_word_given_label_2020(self):93        vocab = create_vocabulary('./EasyFiles/', 1)94        training_data = load_training_data(vocab, './EasyFiles/')95        log_probabilities = p_word_given_label(vocab, training_data, '2020')96        expected_log_probabilities = {',': -2.3513752571634776,97                '.': -2.3513752571634776, '19': -2.3513752571634776,98                '2020': -2.3513752571634776, 'a': -3.044522437723423,99                'cat': -3.044522437723423, 'chases': -3.044522437723423,100                'dog': -3.044522437723423, 'february': -2.3513752571634776,101                'hello': -3.044522437723423, 'is': -2.3513752571634776,102                'it': -2.3513752571634776, 'world': -3.044522437723423,103                None: -3.044522437723423}104        self.compare_dicts(log_probabilities, expected_log_probabilities)105        vocab = create_vocabulary('./EasyFiles/', 2)106        training_data = load_training_data(vocab, './EasyFiles/')107        log_probabilities = p_word_given_label(vocab, training_data, '2020')108        expected_log_probabilities = {'.': -1.6094379124341005,109                'a': -2.302585092994046, None: -0.35667494393873267}110        self.compare_dicts(log_probabilities, expected_log_probabilities)111    def test_p_word_given_label_2016(self):112        vocab = create_vocabulary('./EasyFiles/', 1)113        training_data = load_training_data(vocab, './EasyFiles/')114        log_probabilities = p_word_given_label(vocab, training_data, '2016')115        expected_log_probabilities = {',': -3.091042453358316,116                '.': -2.3978952727983707, '19': -3.091042453358316,117                '2020': -3.091042453358316, 'a': -1.9924301646902063,118                'cat': -2.3978952727983707, 'chases': -2.3978952727983707,119                'dog': -2.3978952727983707, 'february': -3.091042453358316,120                'hello': -2.3978952727983707, 'is': -3.091042453358316,121                'it': -3.091042453358316, 'world': -2.3978952727983707,122                None: -3.091042453358316}123        self.compare_dicts(log_probabilities, expected_log_probabilities)124        vocab = create_vocabulary('./EasyFiles/', 2)125        training_data = load_training_data(vocab, './EasyFiles/')126        log_probabilities = p_word_given_label(vocab, training_data, '2016')127        expected_log_probabilities = {'.': -1.7047480922384253,128                'a': -1.2992829841302609, None: -0.6061358035703157}129        self.compare_dicts(log_probabilities, expected_log_probabilities)130    # train(training_directory: str, cutoff: int)131    # returns a dict132    def test_train(self):133        model = train('./EasyFiles/', 2)134        expected_model = {135            'vocabulary': ['.', 'a'],136            'log prior': {137                '2020': -0.916290731874155,138                '2016': -0.5108256237659905...

flow_configuration.py

Source:flow_configuration.py

...33    return 'Prepared images'34def create_vocab(**kwargs):35    voc = Vocabulary(imgs_path=kwargs['imgdir'], vocab_size = 800, feature_type='daisy')36    all_features = voc.feature_extraction()37    voc.create_vocabulary(all_features)38    voc.save_vocabulary()39    return 'Created vocabulary'40def create_hists(**kwargs):41    ph = PatternHistograms(path=kwargs['imgdir'], feature_type='daisy')42    hists = ph.create_histogram()43    ph.save_histograms(hists)44    return 'Created histograms'45def create_hists(**kwargs):46    ph = PatternHistograms(path=kwargs['imgdir'], feature_type='daisy')47    hists = ph.create_histogram()48    ph.save_histograms(hists)49    return 'Created histograms'50def compute_similarity(**kwargs):51    image_similarity = ImageSimilarity(path=sys.argv[1])...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.