Best Python code snippet using localstack_python
process_data.py
Source:process_data.py  
1from textblob import TextBlob2import codecs3import sys4from nltk import tokenize5import os6import numpy as np7import PhraseBaseSummarization8import sentence9from os import walk10mypath = "data/topics"11list_topics = []12for (dirpath, dirnames, filenames) in walk(mypath):13    list_topics.extend(filenames)14reload(sys)15sys.setdefaultencoding('utf8')16number_topic = len(list_topics)17#number_topic= 2018############# PREPARE ###############################################################################19def count_num_sen(dir_gold_corpus, dir_out, list_topics):20    number_topic = len(list_topics)21    out = open(dir_out + "budget", mode="w")22    for topic in list_topics:23        #get the list of file in topic24        list_docs = []25        for (dirpath, dirnames, filenames) in walk(dir_gold_corpus + (topic[:-9])):26            list_docs.extend(filenames)27        num_char = 028        for doc in list_docs:29            f = open(dir_gold_corpus+topic[:-9]+"/" + doc, mode="r")30            num_char += len(f.read().split())31        budget = int(num_char*1.0/len(list_docs))32        out.write(str(budget))33        out.write("\n")34    out.close()35def split_gold(dir_gold_corpus, dir_out, list_topics):36    number_topic = len(list_topics)37    for topic in list_topics:38        # get the list of file in topic39        list_docs = []40        for (dirpath, dirnames, filenames) in walk(dir_gold_corpus + (topic[:-9])):41            list_docs.extend(filenames)42        num_char = 043        if not os.path.exists(dir_out+topic[:-9]):44            os.makedirs(dir_out+topic[:-9])45        for doc in list_docs:46            f = open(dir_gold_corpus + topic[:-9] + "/" + doc, mode="r")47            sentences = f.read().decode("utf8", "replace").split('\n')48            out = open(dir_out+topic[:-9]+"/"+doc,"w")49            list_sentece = []50            for i in range(len(sentences)):51                if (i%2 != 0):52                    list_sentece.append(sentences[i])53                    list_sentece.append("\n")54            out.writelines(list_sentece[:-1])55            out.close()56#57# split_gold("data/gold/","data/goldvn/", list_topics[:number_topic])58# count_num_sen("data/goldvn/", "data/budget/", list_topics[:number_topic])59##################################################################################################60def find_index(list_words, phrase, start, end):61    index = -162    for i in range(start, end, 1):63        if (list_words[i] == phrase[0]):64            index = i65            for j in range(len(phrase)):66                if (list_words[i + j] != phrase[j]):67                    index = -168            if index != -1: return index69    return index70def translated(list_topics, start_idx, end_idx, dir_topic, dir_out, paramter):71#Translate72    fileBudget = open("data/budget/budget","r")73    budget_string = fileBudget.read().split("\n")74    budget = []75    for i in range(len(budget_string)-1):76        budget.append(int(budget_string[i]))77    number_sentence = 078    for i in range(start_idx, end_idx,1):79        print("process " + list_topics[i])80        file = open(dir_topic + list_topics[i], mode="rb")81        parameter[-1] = budget[i]82        sentences = file.read().decode("utf8","replace").split('\r\n')83        phrase_sentences = []84        for line in sentences:85            if len(line) != 0:86                number_sentence += 187                phrase= []88                try:89                    blob = TextBlob(line)90                    translated_blob = blob.translate(to='vi')91                    out_sen = " ".join(translated_blob.tokens)92                    start = 093                    out_sen_tmp = out_sen.lower().split()94                    end = len(out_sen_tmp)95                    for nphrase in translated_blob.noun_phrases:96                        phrase_ele = []97                        nphrase = nphrase.split()98                        k = find_index(out_sen_tmp,nphrase,start, end)99                        start = k + len(nphrase)100                        for j in range(k,k + len(nphrase),1):101                            phrase_ele.append(j+1)102                        phrase.append(phrase_ele)103                except:104                    out_sen = line105                    phrase = []106            if (out_sen != "" ):107                sen = sentence.sentence(out_sen,phrase)108                phrase_sentences.append(sen)109        summarizer = PhraseBaseSummarization.phrase_based_summarization(phrase_sentences)110        summary =summarizer.summarizer(parameter)111        fileOut = open(dir_out+list_topics[i],"w")112        fileOut.write(summary)113        print("finish " + list_topics[i])114        fileOut.close()115    print "no.sentence: ", number_sentence116parameter=[]117parameter.append(0.025)  # paramter_d118parameter.append(-1.75)  # nuy119parameter.append(-1.75)  # parameter_epi120parameter.append(0.0001)  # parameter_select121parameter.append(0.0001)  # parameter_pre122parameter.append(0.0001)  # paramter_next123parameter.append(0)  # budget124# translated(list_topics[:number_topic],0, number_topic,"data/topics/","data/summary/", parameter)125#126# def expand_data(list_topics, dir_root, dir_topic, dir_translated, dir_out):127#     for i in range(number_topic):128#         file_en = open(dir_root + "/en")129#         file_vn = open(dir_root + "/vn")130#131#         file_en_translated = open(dir_topic + "/" + list_topics[i] , mode="r")132#         file_vn_translated = open(dir_translated + "/" + list_topics[i] , mode="r")133#134#         fileOut = open(dir_out + "/en" + str(i) , mode="w")135#         fileOut.write(file_en.read())136#         sentences = file_en_translated.read().decode("utf8", "replace").split('\n')137#         for line in sentences:138#             words = tokenize.word_tokenize(line)139#             fileOut.write(" ".join(words))140#             fileOut.write("\n")141#         fileOutvn = open(dir_out + "/vn" + str(i), mode = "w")142#         fileOutvn.write(file_vn.read())143#         fileOutvn.write(file_vn_translated.read())144#145#146#         fileOut.close()147#         fileOutvn.close()148#...topic_check.py
Source:topic_check.py  
2from confluent_kafka.admin import AdminClient3def topic_exists(topic):4    """Checks if the given topic exists in Kafka"""5    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})6    topic_metadata = client.list_topics(timeout=5)7    return topic in set(t.topic for t in iter(topic_metadata.topics.values()))8def contains_substring(to_test, substr):9    _before, match, _after = to_test.partition(substr)10    return len(match) > 011def topic_pattern_match(pattern):12    """13        Takes a string `pattern`14        Returns `True` if one or more topic names contains substring `pattern`.15        Returns `False` if not.16    """17    topic_metadata = client.list_topics()18    topics = topic_metadata.topics19    filtered_topics = {key: value for key, value in topics.items() if contains_substring(key, pattern)}20    return len(filtered_topics) > 021from confluent_kafka.admin import AdminClient22def topic_exists(topic):23    """Checks if the given topic exists in Kafka"""24    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})25    topic_metadata = client.list_topics(timeout=5)26    return topic in topic_metadata.topics27def contains_substring(to_test, substr):28    _before, match, _after = to_test.partition(substr)29    return len(match) > 030def topic_pattern_match(pattern):31    """32        Takes a string `pattern`33        Returns `True` if one or more topic names contains substring `pattern`.34        Returns `False` if not.35    """36    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})37    topic_metadata = client.list_topics(timeout=5)38    topic_metadata = client.list_topics()39    topics = topic_metadata.topics40    filtered_topics = {key: value for key, value in topics.items() if contains_substring(key, pattern)}41    return len(filtered_topics) > 042'''43'''44from confluent_kafka.admin import AdminClient45def topic_exists(topic):46    """Checks if the given topic exists in Kafka"""47    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})48    topic_metadata = client.list_topics(timeout=5)49    return topic in set(t.topic for t in iter(topic_metadata.topics.values()))50'''51from confluent_kafka.admin import AdminClient52def topic_exists(topic):53    """Checks if the given topic exists in Kafka"""54    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})55    topic_metadata = client.list_topics(timeout=5)56    return topic in topic_metadata.topics57def contains_substring(to_test, substr):58    _before, match, _after = to_test.partition(substr)59    return len(match) > 060def topic_pattern_match(pattern):61    """62        Takes a string `pattern`63        Returns `True` if one or more topic names contains substring `pattern`.64        Returns `False` if not.65    """66    client = AdminClient({"bootstrap.servers": "PLAINTEXT://localhost:9092"})67    topic_metadata = client.list_topics(timeout=5)68    topic_metadata = client.list_topics()69    topics = topic_metadata.topics70    filtered_topics = {key: value for key, value in topics.items() if contains_substring(key, pattern)}...controlmethod.py
Source:controlmethod.py  
1# from summarize import summarize2# import sys3# from os import walk4# import os5# from BingTranslator import Translator6#7# # from sum_lib import summarize8# #9# # import nltk10# # nltk.download(['stopwords', 'punkt'])11#12# # def count_num_sen(dir_gold_corpus, dir_out, list_topics):13# #     number_topic = len(list_topics)14# #     out = open(dir_out + "budget_sentence.txt", mode="w+")15# #     for topic in list_topics:16# #         #get the list of file in topic17# #         list_docs = []18# #         for (dirpath, dirnames, filenames) in walk(dir_gold_corpus + (topic[:-9])):19# #             list_docs.extend(filenames)20# #         num_char = 021# #         for doc in list_docs:22# #             f = open(dir_gold_corpus+topic[:-9]+"/" + doc, mode="r+").read()23# #             num_char += len(f.split("\n"))24# #         budget = int(num_char*1.0/len(list_docs))25# #         out.write(str(budget))26# #         out.write("\n")27# #     out.close()28# #29# mypath = "data/topics"30#31# list_topics = []32# for (dirpath, dirnames, filenames) in walk(mypath):33#     list_topics.extend(filenames)34#35# reload(sys)36# sys.setdefaultencoding('utf-8')37#38# number_topic = len(list_topics)39#40# #count_num_sen(dir_gold_corpus="data/goldvn/", dir_out="data/budget/",list_topics=list_topics)41#42# budget = open("data/budget/budget_sentence.txt","r").read().split("\n")43# # print budget44#45# # ss = summarize.SimpleSummarizer()46#47# client_id = "tuesdayhcm"48# client_secret = "123456789secretkey123456789"49#50# translator = Translator(client_id, client_secret)51#52# for i in range (len(list_topics)):53#     text = open("data/topics/" + list_topics[i],"r+").read()54#     output = open("data/baselinesummary/"+list_topics[i], "w")55#     summary = ""56#     #text=text.encode("utf-8")57#     try:58#         # summary = ss.summarize(text, budget[i])59#         summary = summarize(text=text,sentence_count=int(budget[i]),language='english')60#61#         #translated62#         phrase_translated = translator.translate(summary, "vi") #translating phrase63#         output.write(phrase_translated)64#     except:65#         print(list_topics[i])66#         output.write("")67#         output.close()68#         #os.remove("data/baselinesummary/"+list_topics[i])69#     print(i)70#     #summary = ss.summarize(text,budget[i])7172#73#74#75#767778# import urllib79#80from textblob import TextBlob81# from translate import translator82# from BingTranslator import Translator83#84#85text = "I am a student"86blob = TextBlob(text)
...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
