How to use after_find method in SeleniumBase

Best Python code snippet using SeleniumBase

Preprocessing.py

Source:Preprocessing.py Github

copy

Full Screen

1# -*- coding:utf-8 -*-2import re3import nltk4import string5import codecs6from collections import Counter7from nltk.corpus import stopwords8from nltk.stem import WordNetLemmatizer9from nltk.tokenize import sent_tokenize10from nltk.tokenize import RegexpTokenizer11from nltk import pos_tag12class Get_HTML_Information:13 """get HTML datas like (1) published year,month (2) title (3) abstract (4) full_text (5) only_text"""14 def __init__(self, soup):15 self.soup = soup16 def get_pub_year(self):17 """ get publish year from HTML soup"""18 pattern_date_value = re.compile('year|date|issue',re.I)19 pattern_day_month_year = re.compile('(\d+)/(\d+)/(\d+)')20 pattern_month = re.compile('January|February|March|April|May|June|July|August|September|October|November|December|january|february|march|april|june|july|august|september|october|november|december')21 pattern_year = re.compile('(\d{4})')22 date = ""23 int_date = 024 date_group = []25 for i in range(len(self.soup.find_all('meta'))):26 list_value = list(self.soup.find_all('meta')[i].attrs.values())27 list_key = list(self.soup.find_all('meta')[i].attrs.keys())28 for j in range(len(list_value)):29 value_find = pattern_date_value.search(list_value[j])30 if value_find and list_key[j]=='name':31 date_find = pattern_day_month_year.search(self.soup.find_all('meta')[i].attrs['content'])32 if date_find:33 for k in range(1,4):34 if len(date_find.group(k)) == 4:35 int_date = int(date_find.group(k))36 date_group.append(int_date)37 break38 if date_group:39 return min(date_group)40 else:41 if self.soup.body:42 each_sentence= self.soup.body.get_text().split()43 else:44 each_sentence= self.soup.get_text().split()45 print('soup error')46 for j in range(len(each_sentence)):47 month_find = pattern_month.search(each_sentence[j])48 if month_find:49 before_find = pattern_year.search(each_sentence[j-1])50 after_find = pattern_year.search(each_sentence[j+1])51 if after_find:52 date_group.append(int(after_find.group(0)))53 elif before_find:54 date_group.append(int(before_find.group(0)))55 else:56 before_find = pattern_year.search(each_sentence[j-2])57 after_find = pattern_year.search(each_sentence[j+2])58 if after_find:59 date_group.append(int(after_find.group(0)))60 elif before_find:61 date_group.append(int(before_find.group(0)))62 if date_group:63 return min(date_group)64 else:65 pattern_month = re.compile('Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|jan|feb|mar|apr|jun|jul|aug|sep|oct|nov|dec') #### 'may'66 for j in range(len(each_sentence)):67 month_find = pattern_month.search(each_sentence[j])68 if month_find:69 before_find = pattern_year.search(each_sentence[j-1])70 after_find = pattern_year.search(each_sentence[j+1])71 if after_find:72 date_group.append(int(after_find.group(0)))73 elif before_find:74 date_group.append(int(before_find.group(0)))75 else:76 before_find = pattern_year.search(each_sentence[j-2])77 after_find = pattern_year.search(each_sentence[j+2])78 if after_find:79 date_group.append(int(after_find.group(0)))80 elif before_find:81 date_group.append(int(before_find.group(0)))82 if date_group:83 return min(date_group)84 else:85 return 123486 def get_pub_year_month(self):87 pattern_day_month_year = re.compile('(\d+)/(\d+)/(\d+)')88 pattern_day_month_year_2 = re.compile('(\d+)-(\d+)-(\d+)')89 pattern_date_value = re.compile('year|date|issue',re.I)90 pattern_month = re.compile('January|February|March|April|May|June|July|August|September|October|November|December|january|february|march|april|june|july|august|september|october|november|december')91 pattern_month_2 = re.compile('Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|jan|feb|mar|apr|jun|jul|aug|sep|oct|nov|dec') 92 pattern_year = re.compile('(\d{4})')93 pattern_pub = re.compile('publish|publicat')94 month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']95 month_list_2 = ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']96 date_group=[]97 float_date=0.098 #from meta tag99 for i in self.soup.find_all('meta'):100 list_value = list(i.attrs.values())101 list_key = list(i.attrs.keys())102 for j in range(len(list_value)):103 value_find = pattern_date_value.search(list_value[j])104 if value_find and list_key[j]=='name':105 date_find = pattern_day_month_year.search(i.attrs['content'])106 month_find = pattern_month.search(i.attrs['content'])107 month_2_find = pattern_month_2.search(i.attrs['content'])108 if not date_find:109 date_find = pattern_day_month_year_2.search(i.attrs['content'])110 if date_find:111 #print('date find')112 for k in range(1,4):113 if len(date_find.group(1)) == 4: # Ambiguous month and date XXXX/10/11114 float_date = int(date_find.group(1)) + (int(date_find.group(2))-1)/12115 date_group.append(round(float_date,2))116 elif len(date_find.group(3)) == 4:117 if int(date_find.group(1)) > 12 and int(date_find.group(2)) < 13:118 float_date = int(date_find.group(3)) + (int(date_find.group(2))-1)/12119 date_group.append(round(float_date,2))120 elif int(date_find.group(2)) > 12 and int(date_find.group(1)) < 13:121 float_date = int(date_find.group(3)) + (int(date_find.group(1))-1)/12122 date_group.append(round(float_date,2))123 else:124 float_date = int(date_find.group(3)) + (int(date_find.group(2))-1)/12125 date_group.append(round(float_date,2))126 else:127 float_date = int(date_find.group(2)) + (int(date_find.group(1))-1)/12128 return min(date_group), int(min(date_group))129 elif month_find:130 #print('month_find')131 for k in range(12):132 if month_list[k] == month_find.group(0).lower():133 month_index = k134 token = i.attrs['content'].split()135 token.sort()136 if len(token) == 3 and token[0].isdigit() and token[1].isdigit():137 for word in token:138 if len(word) == 4 and word.isdigit():139 float_date = int(word) + month_index/12140 date_group.append(round(float_date,2))141 return min(date_group), int(min(date_group))142 elif month_2_find:143 #print('month2_find')144 for k in range(12):145 if month_list_2[k] == month_2_find.group(0).lower():146 month_index = k147 token = i.attrs['content'].split()148 token.sort()149 if len(token) == 3 and token[0].isdigit() and token[1].isdigit():150 for word in token:151 if len(word) == 4 and word.isdigit():152 float_date = int(word) + month_index/12153 date_group.append(round(float_date,2))154 return min(date_group), int(min(date_group))155 else:156 #print('year find')157 year_find = pattern_year.search(i.attrs['content'])158 if year_find and len(i.attrs['content']) == 4:159 float_date = int(i.attrs['content']) + 0.0160 date_group.append(round(float_date,2))161 if date_group:162 return min(date_group), int(min(date_group))163 # from pub date sentence164 else:165 if self.soup.body:166 each_sentence= self.soup.body.get_text().split()167 else:168 each_sentence= self.soup.get_text().split()169 for j in range(len(each_sentence)):170 month_find = pattern_month.search(each_sentence[j])171 month_index = -1172 if month_find:173 for k in range(len(month_list)):174 if month_list[k] == month_find.group(0).lower():175 month_index = k176 before_find = pattern_year.search(each_sentence[j-1])177 after_find = pattern_year.search(each_sentence[j+1])178 if after_find:179 float_date = int(after_find.group(0)) + month_index/12180 date_group.append(round(float_date,2))181 elif before_find:182 float_date = int(before_find.group(0)) + month_index/12183 date_group.append(round(float_date,2))184 else:185 before_find = pattern_year.search(each_sentence[j-2])186 after_find = pattern_year.search(each_sentence[j+2])187 if after_find:188 float_date = int(after_find.group(0)) + month_index/12189 date_group.append(round(float_date,2))190 elif before_find:191 float_date = int(after_find.group(0)) + month_index/12192 date_group.append(round(float_date,2))193 if date_group:194 return min(date_group), int(min(date_group))195 else:196 for j in range(len(each_sentence)):197 month_find = pattern_month_2.search(each_sentence[j])198 month_index = -1199 if month_find:200 for k in range(len(month_list_2)):201 if month_list_2[k]==month_find.group(0).lower():202 month_index = k203 before_find = pattern_year.search(each_sentence[j-1])204 after_find = pattern_year.search(each_sentence[j+1])205 if after_find:206 float_date = int(after_find.group(0)) + month_index/12207 date_group.append(round(float_date,2))208 elif before_find:209 float_date = int(before_find.group(0)) + month_index/12210 date_group.append(round(float_date,2))211 else:212 before_find = pattern_year.search(each_sentence[j-2])213 after_find = pattern_year.search(each_sentence[j+2])214 if after_find:215 float_date = int(after_find.group(0)) + month_index/12216 date_group.append(round(float_date,2))217 elif before_find:218 float_date = int(after_find.group(0)) + month_index/12219 date_group.append(round(float_date,2))220 if date_group:221 return min(date_group), int(min(date_group))222 else:223 return 1234.0, 1234224 def get_title_sentence(self):225 """get title sentence from HTML meta, title tag"""226 pattern_title = re.compile('title',re.I)227 title = ""228 for i in range(len(self.soup.find_all('meta'))):229 list_value = list(self.soup.find_all('meta')[i].attrs.values())230 list_key = list(self.soup.find_all('meta')[i].attrs.keys())231 for j in range(len(list_value)):232 title_find = pattern_title.search(list_value[j])233 if title_find and list_key[j]=='name':234 if title:235 if len(title) < len(self.soup.find_all('meta')[i].attrs['content']):236 title = self.soup.find_all('meta')[i].attrs['content']237 else:238 title = self.soup.find_all('meta')[i].attrs['content']239 break240 if title:241 return title242 elif not self.soup.find_all('title') or not self.soup.find_all('title')[0].contents:243 title = 'no title'244 return title245 else:246 title = self.soup.find_all('title')[0].contents[0].strip()247 pattern_journal = re.compile('- [A-Z]{1}[a-z]+')248 journal_find = pattern_journal.findall(title)249 if journal_find:250 pattern_journal = re.compile(journal_find[-1])251 journal_find = pattern_journal.search(title)252 title = title[0:journal_find.start()]253 return title254 else:255 return title256 def get_only_text(self):257 for j in self.soup(['title','button','table']):258 j.extract()259 if self.soup.body:260 text = self.soup.body.get_text()261 else:262 text = self.soup.get_text()263 #text_lines = sent_tokenize(text)264 text_lines = text.splitlines()265 text_line_word = []266 for i in text_lines:267 if len(i) > 3:268 i.strip()269 text_line_word.append(i.split())270 else:271 continue272 full_text = ""273 for i in range(len(text_line_word)):274 for j in range(len(text_line_word[i])):275 full_text=full_text+" "+text_line_word[i][j]276 full_text.lstrip()277 full_text=full_text+"\n"278 return full_text279 def get_full_text(self):280 text = self.soup.prettify()281 return text282 def get_abst(self,title):283 tag_list = ['sub','sup']284 for tag in tag_list:285 sub_list = self.soup.find_all(tag,string=True)286 for sub in sub_list:287 sub.replace_with(sub.string.strip()+'qorwns')288 #if self.soup.body:289 #text = self.soup.body.get_text()290 #text = re.sub(r'\n\s*\n', r'\n', self.soup.body.get_text().strip(), flags=re.M)291 #else:292 #text = self.soup.get_text()293 text = re.sub(r'\n\s*\n', r'\n', self.soup.get_text().strip(), flags=re.M)294 list = text.splitlines()295 title_pre = title[:10]296 #### remove before abstract297 full_text=""298 for i in range(len(list)):299 list[i] = list[i].lstrip()300 if list[i]:301 if list[i][-1] == " ":302 full_text = full_text+''+list[i]303 if list[i][-6:] == 'qorwns':304 full_text = full_text[:-6]305 elif list[i][-6:] == 'qorwns':306 full_text = full_text+''+list[i][:-6]307 elif list[i-1][-6:] == 'qorwns' and len(list[i])==1:308 full_text = full_text+''+list[i]309 else:310 full_text = full_text+' '+list[i]311 index = full_text.find(title_pre)312 if index == -1:313 full_text = full_text314 else:315 full_text = full_text[index:]316 index = full_text.find('Abstract')317 if index == -1:318 index = full_text.find('abstract')319 if index == -1:320 pattern_pub_date = re.compile('publish|publicat',re.I)321 pub_suffix=""322 for i in range(len(list)):323 if list[i]:324 pub_find = pattern_pub_date.search(list[i])325 if pub_find:326 pub_index = list[i].find(pub_find.group(0))327 pub_suffix = list[i][pub_index:-1]328 break329 else:330 pub_suffix = ""331 else:332 pub_suffix = ""333 index = full_text.find(pub_suffix)334 if not pub_suffix:335 full_text = full_text336 else:337 full_text = full_text[index+len(pub_suffix)+1:]338 else:339 full_text = full_text[index+8:]340 #### removed after conclusion341 head_list = ['Acknowled','acknowled','Reference','Copyright','copyright','Advertisement','COLLAPSE','Article Information', 'Cookies']342 index_list = []343 for i in head_list:344 index = full_text.find(i)345 if index > 0:346 index_list.append(index)347 if index_list:348 index = min(index_list)349 else:350 index = -1351 #print('error\n')352 full_text = full_text[:index]353 return full_text354def get_tokens(text):355 p2 = re.compile('\(.{0,10}\)|\[.{0,10}\]|\{.{0,10}\}')356 p = re.compile('(?P<name>[A-Z]+)(?P<bar>[-−———––]{1})')357 text2 = p.sub("\g<name>",text)358 lowers = text2.lower()359 lowers = p2.sub(' ', lowers)360 no_punct = lowers.translate(str.maketrans(',-−——→—×≪‖⊥∼〉〈≤≥→"′‘≈“”&\'()≡+:;<=>_`{|}~·––/'," "))361 #no_punct = lowers.translate(str.maketrans(',→×≪‖⊥∼〉〈≤≥→"′‘≈“”&\'≡+:;<=>_`{|}~·/'," ",'-−———––'))362 no_punct = no_punct.translate(str.maketrans("","", '!©∧↑χσηϕμτθ∞φ∑()γλ†#±⋯$δ°⋅β%α*.á?@\\^âåå[]'+string.punctuation ))363 #no_punct = lowers.translate(str.maketrans("","", '!ש∧↑χ≪ση–‖ϕ⊥μ∼τθ〉∞φ〈≤∑≥γλ—→†"′#‘⋯$δ°≈⋅“”β%≡&\'()α*+,-.á:;<=>?@\\^_`{|}~−·âåå­'+string.punctuation ))364 #no_punct = lowers.translate(str.maketrans("","", '!ש∧↑χ≪ση–‖ϕ⊥μ∼τθ〉∞φ〈≤∑≥γλ—→†"′#‘⋯$δ°≈⋅“”β%≡&\'()α*+,-.á:;<=>?@\\^_`{|}~−·âåå­'+string.punctuation ))365 #no_punct = lowers.translate(str.maketrans("","", '!ש∧↑χ≪ση–‖ϕ⊥μ∼τθ〉∞φ〈≤∑≥γλ—→†"′#‘⋯$δ°≈⋅“”β%≡&\'()α*+,-.á:;<=>?@\\^_`{|}~−·âåå­'+string.punctuation ))366 tokenizer = RegexpTokenizer('\s+',gaps=True)367 tokens = tokenizer.tokenize(no_punct)368 real_tokens = []369 for item in tokens:370 if item.isdigit():371 continue372 else:373 if item.isalnum:374 real_tokens.append(item)375 return real_tokens376def lemmatize_tokens(tokens, lemmatizer):377 lemma = []378 for item in tokens:379 lemma.append(lemmatizer.lemmatize(item))380 return lemma381def stem_tokens(tokens, stemmer):382 stem = []383 for item in tokens:384 stem.append(stemmer.stem(item))385 return stem386def lemmatize_tokens_for_pos(tokens, lemmatizer):387 lemma = []388 for word, pos in tokens:389 tag = pos[0].lower()390 tag = tag if tag in ['a','r','n','v'] else None391 if not tag:392 lemma.append(word)393 elif tag in ['a', 'r']:394 continue395 else:396 lemma.append(lemmatizer.lemmatize(word, tag))...

Full Screen

Full Screen

event_firing_webdriver_tests.py

Source:event_firing_webdriver_tests.py Github

copy

Full Screen

...88def test_should_fire_find_event(driver, log, pages):89 class EventListener(AbstractEventListener):90 def before_find(self, by, value, driver):91 log.write(("before_find by %s %s" % (by, value)).encode())92 def after_find(self, by, value, driver):93 log.write(("after_find by %s %s" % (by, value)).encode())94 ef_driver = EventFiringWebDriver(driver, EventListener())95 ef_driver.get(pages.url("simpleTest.html"))96 e = ef_driver.find_element_by_id("oneline")97 assert "A single line of text" == e.text98 e = ef_driver.find_element_by_xpath("/html/body/p[1]")99 assert "A single line of text" == e.text100 ef_driver.get(pages.url("frameset.html"))101 elements = ef_driver.find_elements_by_css_selector("frame#sixth")102 assert 1 == len(elements)103 assert "frame" == elements[0].tag_name.lower()104 assert "sixth" == elements[0].get_attribute("id")105 assert (b"before_find by id oneline"106 b"after_find by id oneline"...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run SeleniumBase automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful