Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use clean_string method in Kiwi

Best Python code snippet using Kiwi_python

extract_bibtex_file.py

Source:extract_bibtex_file.py

...8sys.path.insert(0,'../database_queries/')9from database_queries_biblio import *10from models import Biblio11selection = Biblio.select()#.where(Biblio.julio_state != "Excluded")12def clean_string(dirty_string):13    result = dirty_string.replace("&", "\\&")14    return result15def building_bibtex_conference(element):16    result = "@inproceedings{"+element.bibtex_id+",\n"17    if not element.bibtex_full_author:18        print "Empty authors in "+element.bibtex_id19    else:20        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"21    if not element.bibtex_title:22        print "Empty title in "+element.bibtex_id23    else:24        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"25    if not element.journal:26        print "Empty conference in "+element.bibtex_id27    else:28        result += "\tbooktitle={"+clean_string(element.journal)+"},\n"29    if element.place:30        result += "\taddress={"+clean_string(element.place)+"},\n"31    if element.bibtex_publisher:32        result += "\tpublisher={"+clean_string(element.bibtex_publisher)+"},\n"33    if element.volume:34        result += "\tvolume={"+clean_string(element.volume)+"},\n"35    if element.pages:36        result += "\tpages={"+clean_string(element.pages)+"},\n"37    if element.isbn_issn:38        result += "\tISBN={"+clean_string(element.isbn_issn)+"},\n"39    if element.doi:40        result += "\tDOI={"+clean_string(element.doi)+"},\n"41    #if element.url or element.url!=None:42    #    result += "\turl={"+clean_string(element.url)+"},\n"43    result += "\tyear={"+str(element.year)+"},\n"44    if not element.year:45        print "Empty year in "+element.bibtex_id46    result += "\ttype={"+clean_string(element.type)+"},\n"47    result += "}\n"48    return result.encode('utf-8')49def building_bibtex_journal(element):50    result = "@article{"+element.bibtex_id+",\n"51    if not element.bibtex_full_author:52        print "Empty authors in "+element.bibtex_id53    else:54        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"55    if not element.bibtex_title:56        print "Empty title in "+element.bibtex_id57    else:58        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"59    if not element.journal:60        print "Empty journal in "+element.bibtex_id61    else:62        result += "\tjournal={"+clean_string(element.journal)+"},\n"63    if element.volume:64        result += "\tvolume={"+clean_string(element.volume)+"},\n"65    if element.issue:66        result += "\tnumber={"+clean_string(element.issue)+"},\n"67    if element.pages:68        result += "\tpages={"+clean_string(element.pages)+"},\n"69    if element.isbn_issn:70        result += "\tISSN={"+clean_string(element.isbn_issn)+"},\n"71    if element.doi:72        result += "\tDOI={"+clean_string(element.doi)+"},\n"73    # if element.url or element.url!=None:74    #     result += "\turl={"+clean_string(element.url)+"},\n"75    result += "\tyear={"+str(element.year)+"},\n"76    if not element.year:77        print "Empty year in "+element.bibtex_id78    result += "\ttype={"+clean_string(element.type)+"},\n"79    result += "}\n"80    return result.encode('utf-8')81def building_bibtex_booksection(element):82    result = "@inbook{"+element.bibtex_id+",\n"83    result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"84    if not element.bibtex_full_author:85        print "Empty authors in "+element.bibtex_id86    result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"87    if not element.bibtex_title:88        print "Empty title in "+element.bibtex_id89    result += "\tbooktitle={"+clean_string(element.journal)+"},\n"90    if not element.journal:91        print "Empty booktitle in "+element.bibtex_id92    result += "\tpublisher={"+clean_string(element.bibtex_publisher)+"},\n"93    if not element.bibtex_publisher:94        print "Empty publisher in "+element.bibtex_id95    result += "\tpages={"+clean_string(element.pages)+"},\n"96    if not element.pages:97        print "Empty pages in "+element.bibtex_id98    if element.volume:99        result += "\tvolume={"+clean_string(element.volume)+"},\n"100    if element.isbn_issn:101        result += "\tISSN={"+clean_string(element.isbn_issn)+"},\n"102    if element.doi:103        result += "\tDOI={"+clean_string(element.doi)+"},\n"104    # if element.url or element.url!=None:105    #     result += "\turl={"+clean_string(element.url)+"},\n"106    result += "\tyear={"+str(element.year)+"},\n"107    if not element.year:108        print "Empty year in "+element.bibtex_id109    result += "\ttype={"+clean_string(element.type)+"},\n"110    result += "}\n"111    return result.encode('utf-8')112def building_bibtex_thesis(element):113    result = "@phdthesis{"+element.bibtex_id+",\n"114    if not element.bibtex_full_author:115        print "Empty authors in "+element.bibtex_id116    else:117        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"118    if not element.bibtex_title:119        print "Empty title in "+element.bibtex_id120    else:121        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"122    if not element.bibtex_publisher:123        print "Empty school in "+element.bibtex_id124    else:125        result += "\tschool={"+clean_string(element.bibtex_publisher)+"},\n"126    if element.doi:127        result += "\tDOI={"+clean_string(element.doi)+"},\n"128    # if element.url or element.url!=None:129    #     result += "\turl={"+clean_string(element.url)+"},\n"130    result += "\tyear={"+str(element.year)+"},\n"131    if not element.year:132        print "Empty year in "+element.bibtex_id133    result += "\ttype={"+clean_string(element.type)+"},\n"134    result += "}\n"135    return result.encode('utf-8')136def building_bibtex_report(element):137    result = "@techreport{"+element.bibtex_id+",\n"138    if not element.bibtex_full_author:139        print "Empty authors in "+element.bibtex_id140    else:141        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"142    if not element.bibtex_title:143        print "Empty title in "+element.bibtex_id144    else:145        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"146    if not element.bibtex_publisher:147        print "Empty institution in "+element.bibtex_id148    else:149        result += "\tinstitution={"+clean_string(element.bibtex_publisher)+"},\n"150    if element.doi:151        result += "\tDOI={"+clean_string(element.doi)+"},\n"152    # if element.url or element.url!=None:153    #     result += "\turl={"+clean_string(element.url)+"},\n"154    result += "\tyear={"+str(element.year)+"},\n"155    if not element.year:156        print "Empty year in "+element.bibtex_id157    result += "\ttype={"+clean_string(element.type)+"},\n"158    result += "}\n"159    return result.encode('utf-8')160def building_bibtex_book(element):161    result = "@book{"+element.bibtex_id+",\n"162    if not element.bibtex_full_author:163        print "Empty authors in "+element.bibtex_id164    else:165        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"166    if not element.bibtex_title:167        print "Empty title in "+element.bibtex_id168    else:169        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"170    if not element.bibtex_publisher:171        print "Empty publisher in "+element.bibtex_id172    else:173        result += "\tpublisher={"+clean_string(element.bibtex_publisher)+"},\n"174    if element.volume:175        result += "\tvolume={"+clean_string(element.volume)+"},\n"176    if element.doi:177        result += "\tDOI={"+clean_string(element.doi)+"},\n"178    # if element.url or element.url!=None:179    #     result += "\turl={"+clean_string(element.url)+"},\n"180    result += "\tyear={"+str(element.year)+"},\n"181    if not element.year:182        print "Empty year in "+element.bibtex_id183    result += "\ttype={"+clean_string(element.type)+"},\n"184    result += "}\n"185    return result.encode('utf-8')186def building_bibtex_web(element):187    result = "@misc{"+element.bibtex_id+",\n"188    if element.bibtex_full_author:189        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"190    if element.bibtex_title:191        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"192    if element.bibtex_publisher:193        result += "\tpublisher={"+clean_string(element.bibtex_publisher)+"},\n"194    if element.doi:195        result += "\tDOI={"+clean_string(element.doi)+"},\n"196    # if element.url or element.url!=None:197    #     result += "\turl={"+clean_string(element.url)+"},\n"198    if not element.url:199        print "Empty URL in "+element.bibtex_id200    else:201        result += "\turl={"+str(element.url)+"},\n"202    if not element.year:203        print "Empty year in "+element.bibtex_id204    else:205        result += "\tyear={"+str(element.year)+"},\n"206    if not element.date:207        print "Empty date accessed in "+element.bibtex_id208    else:209        result += "note\t={(Date last accessed "+str(element.date)+")},\n"210    result += "\ttype={"+clean_string(element.type)+"},\n"211    result += "}\n"212    return result.encode('utf-8')213def building_bibtex_misc(element):214    result = "@misc{"+element.bibtex_id+",\n"215    if element.bibtex_full_author:216        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"217    if element.bibtex_title:218        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"219    if element.bibtex_publisher:220        result += "\tpublisher={"+clean_string(element.bibtex_publisher)+"},\n"221    if element.volume:222        result += "\tvolume={"+clean_string(element.volume)+"},\n"223    if element.doi:224        result += "\tDOI={"+clean_string(element.doi)+"},\n"225    # if element.url or element.url!=None:226    #     result += "\turl={"+clean_string(element.url)+"},\n"227    result += "\tyear={"+str(element.year)+"},\n"228    if not element.year:229        print "Empty year in "+element.bibtex_id230    result += "\ttype={"+clean_string(element.type)+"},\n"231    result += "}\n"232    return result.encode('utf-8')233def extracting_bibtex_file(filename):234    there_are_journal = False235    there_are_conferenceproceedings = False236    there_are_book_section = False237    there_are_thesis = False238    there_are_reports = False239    there_are_books = False240    there_are_webs = False241    there_are_other_things = False242    with open(filename, 'w') as f:243        for element in selection:244            if element.type == "Conference Proceedings":...

mostCommonLines.py

Source:mostCommonLines.py

1#find and list repeated lines from a book (txt file)2import re3print("                                                                    ")4print("              .__=\__                  .__==__,         Most        ")5print("            jf       ~~=\,         _=/~       `\,       Common      ")6print("        ._jZ             `\q,   /=~             `\__    Lines       ")7print("       j5(/                 `\./                  V\\,              ")8print("     .Z))' _____              |             .____, \)/\   -D.Murray ")9print("    j5(K=~~     ~~~~\=_,      |      _/=~~~~     `~~+K\\,           ")10print("  .Z)\/                `~=L   |  _=/~                 t\ZL          ")11print(" j5(_/.__/===========\__   ~q |j/   .__============___/\J(N,        ")12print(" 4L#XXXL_________________XGm, \P  .mXL_________________JXXXW8L      ")13print("")14print("Opening the book..")15bookFile = open("alice.txt","r") # Open the file16string=bookFile.read()  #reads the whole book into a big big giant string for python.17string=string.lower() #make it all lowercase. GeTs riD of THis. i!=I , l!=L, p !=P etc.18bookFile.close() #python has the string in it's head now, so can close the text file.19#clean up the file and put each sentence on a new line20clean_string= re.sub("\n", " ", string)    #join it all back up into one long long line.21clean_string= re.sub("\.\.\.\.\.\.", "", clean_string)22clean_string= re.sub("\.\.\.\.\.", "", clean_string)23clean_string= re.sub("\.\.\.\.\.", "", clean_string)     #get rid of ....24clean_string= re.sub("\.\.\.\.", "", clean_string)25clean_string= re.sub("\.\.\.", "", clean_string)26clean_string= re.sub("\.\.", "", clean_string)27clean_string= re.sub("\. \. \. \.", "", clean_string)28clean_string= re.sub("\. \. \.", "", clean_string)      #get rid of . . . . . 29clean_string= re.sub("\. \.", "", clean_string)30clean_string= re.sub("\.", "\n", clean_string)31clean_string= re.sub("\?", "\n", clean_string)    #make these all markers for new lines32clean_string= re.sub("\!", "\n", clean_string)33clean_string= re.sub("         ", "", clean_string)34clean_string= re.sub("        ", "", clean_string)35clean_string= re.sub("       ", "", clean_string)36clean_string= re.sub("      ", "", clean_string)   #get rid of random spaces in the document37clean_string= re.sub("     ", "", clean_string)38clean_string= re.sub("    ", "", clean_string)39clean_string= re.sub("   ", "", clean_string)40clean_string= re.sub("  ", "", clean_string)41# The next part writes the cleaned up file to a text file that I can open to check it42# all looks nice and neat and ready for finding duplicate lines.43newFile = open("editedVersion.txt", "w") #creates new text file called editedVersion44newFile.write(clean_string) #writes the big long edited string to a this txt file45newFile.close() #closes the file46#Right, lets open this file again and find some duplicate lines.47#Wait why did you close if if you were going to use it again straight away?48#Yeah I just wanted to be sure it's worked up to this point and I can't look at the text file49#without having python close it properly. It's nice to be able to open and view the edited50#text file that you're about to work with to check it's clean.51newFile = open("editedVersion.txt", "r") #opens the file again. 52allBookLinesInAList=newFile.readlines() #reads it all into a big big string in it's head53#figures out how many lines there so the program knows when to stop.54totalNumberOfLines=len(allBookLinesInAList)55print("This book has", totalNumberOfLines, "lines")56print("This would take about", totalNumberOfLines//23, "minutes to read")57if totalNumberOfLines>3000:58    print("This is actually pretty big. This could take up to ", totalNumberOfLines//400, "seconds..")59print("Okay, here we go")60print("========================================================")61finalList=[]  #prepares an empty list to print the matching lines into later62#The bit below makes takes a line (n) and compares it to all lines after it (z)63#It might help to imaging "n" as your left finger keeping track of the row and "z" as your right finger going across the columns64n=065while n<totalNumberOfLines:  #no point in comparing past the number of lines in the doc!66    z=167    while z<totalNumberOfLines:            68            try:69                if allBookLinesInAList[n]==allBookLinesInAList[n+z]:70                    finalList.append(allBookLinesInAList[n])71                z=z+172                73                #print(n,z)74                    75            except (IndexError):76                break77    n=n+178newFile.close()79for i in set(finalList):  #goes throught the final list and prints each item80    print(i)              #the set() removes duplicates eg. said harry, said harry81print("=================================================")82print("The lines above were all repeated at least once.\n")...

21. For Loops 08 -.py

Source:21. For Loops 08 -.py

1import re2from collections import Counter3import plotly.plotly4from plotly.graph_objs import Bar, Layout5word = []6word_count = []7file = open("sample_text.txt","r") #http://www.pythonforbeginners.com/files/reading-and-writing-files-in-python8string = file.read()9file.close()10clean_string = re.sub('[^a-zA-Z0-9 \n\.]', ' ', string) #https://lzone.de/blog/Python+re.sub+Examples11clean_string = re.sub('[0-9]', ' ', clean_string)12clean_string = re.sub('\t', ' ', clean_string)13clean_string = re.sub('\n', ' ', clean_string)14clean_string = re.sub(r'\b\w{1,4}\b', '', clean_string) # removes words three or less charcters https://stackoverflow.com/questions/24332025/remove-words-of-length-less-than-4-from-string15clean_string = re.sub('   ', ' ', clean_string)16clean_string = re.sub('  ', ' ', clean_string)17clean_string = re.sub('\.', '', clean_string)18clean_string = re.sub('Ã¢â¬â¢', '', clean_string)19clean_string = re.sub('Ã¢â¬â', '', clean_string)20clean_string = clean_string.lower()21string_array = clean_string.split(' ')22string_array = [x for x in string_array if x != ''] #https://stackoverflow.com/questions/2793324/is-there-a-simple-way-to-delete-a-list-element-by-value23c = Counter(string_array)       #https://pymotw.com/2/collections/counter.html24unique_elements = set(string_array) #https://stackoverflow.com/questions/7961363/removing-duplicates-in-lists25for letter in unique_elements:26    word.append(letter)         #https://www.thegeekstuff.com/2013/06/python-list/?utm_source=feedly27    word_count.append(c[letter])28print ('Most common:')29for letter, count in c.most_common(20):30    print('%s: %7d' % (letter, count))31plotly.offline.plot({           #https://plot.ly/python/getting-started/#initialization-for-offline-plotting32    "data": [Bar(x=word, y=word_count)],33    "layout": Layout(title="word count")34})35# If possible collect samples from your peers and compare the vocabulary distribution, avergae number of words, word frequency etc....

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.