Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use before_find method in SeleniumBase

Best Python code snippet using SeleniumBase

Preprocessing.py

Source:Preprocessing.py

1# -*- coding:utf-8 -*-2import re3import nltk4import string5import codecs6from collections import Counter7from nltk.corpus import stopwords8from nltk.stem import WordNetLemmatizer9from nltk.tokenize import sent_tokenize10from nltk.tokenize import RegexpTokenizer11from nltk import pos_tag12class Get_HTML_Information:13    """get HTML datas like (1) published year,month (2) title (3) abstract (4) full_text (5) only_text"""14    def __init__(self, soup):15        self.soup = soup16    def get_pub_year(self):17        """ get publish year from HTML soup"""18        pattern_date_value = re.compile('year|date|issue',re.I)19        pattern_day_month_year = re.compile('(\d+)/(\d+)/(\d+)')20        pattern_month = re.compile('January|February|March|April|May|June|July|August|September|October|November|December|january|february|march|april|june|july|august|september|october|november|december')21        pattern_year = re.compile('(\d{4})')22        date = ""23        int_date = 024        date_group = []25        for i in range(len(self.soup.find_all('meta'))):26            list_value = list(self.soup.find_all('meta')[i].attrs.values())27            list_key = list(self.soup.find_all('meta')[i].attrs.keys())28            for j in range(len(list_value)):29                value_find = pattern_date_value.search(list_value[j])30                if value_find and list_key[j]=='name':31                    date_find = pattern_day_month_year.search(self.soup.find_all('meta')[i].attrs['content'])32                    if date_find:33                        for k in range(1,4):34                            if len(date_find.group(k)) == 4:35                                int_date = int(date_find.group(k))36                                date_group.append(int_date)37                    break38        if date_group:39            return min(date_group)40        else:41            if self.soup.body:42                each_sentence= self.soup.body.get_text().split()43            else:44                each_sentence= self.soup.get_text().split()45                print('soup error')46            for j in range(len(each_sentence)):47                month_find = pattern_month.search(each_sentence[j])48                if month_find:49                    before_find = pattern_year.search(each_sentence[j-1])50                    after_find = pattern_year.search(each_sentence[j+1])51                    if after_find:52                        date_group.append(int(after_find.group(0)))53                    elif before_find:54                        date_group.append(int(before_find.group(0)))55                    else:56                        before_find = pattern_year.search(each_sentence[j-2])57                        after_find = pattern_year.search(each_sentence[j+2])58                        if after_find:59                            date_group.append(int(after_find.group(0)))60                        elif before_find:61                            date_group.append(int(before_find.group(0)))62            if date_group:63                return min(date_group)64            else:65                pattern_month = re.compile('Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|jan|feb|mar|apr|jun|jul|aug|sep|oct|nov|dec')       #### 'may'66                for j in range(len(each_sentence)):67                    month_find = pattern_month.search(each_sentence[j])68                    if month_find:69                        before_find = pattern_year.search(each_sentence[j-1])70                        after_find = pattern_year.search(each_sentence[j+1])71                        if after_find:72                            date_group.append(int(after_find.group(0)))73                        elif before_find:74                            date_group.append(int(before_find.group(0)))75                        else:76                            before_find = pattern_year.search(each_sentence[j-2])77                            after_find = pattern_year.search(each_sentence[j+2])78                            if after_find:79                                date_group.append(int(after_find.group(0)))80                            elif before_find:81                                date_group.append(int(before_find.group(0)))82                if date_group:83                    return min(date_group)84                else:85                    return 123486    def get_pub_year_month(self):87        pattern_day_month_year = re.compile('(\d+)/(\d+)/(\d+)')88        pattern_day_month_year_2 = re.compile('(\d+)-(\d+)-(\d+)')89        pattern_date_value = re.compile('year|date|issue',re.I)90        pattern_month = re.compile('January|February|March|April|May|June|July|August|September|October|November|December|january|february|march|april|june|july|august|september|october|november|december')91        pattern_month_2 = re.compile('Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|jan|feb|mar|apr|jun|jul|aug|sep|oct|nov|dec') 92        pattern_year = re.compile('(\d{4})')93        pattern_pub = re.compile('publish|publicat')94        month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']95        month_list_2 = ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']96        date_group=[]97        float_date=0.098        #from meta tag99        for i in self.soup.find_all('meta'):100            list_value = list(i.attrs.values())101            list_key = list(i.attrs.keys())102            for j in range(len(list_value)):103                value_find = pattern_date_value.search(list_value[j])104                if value_find and list_key[j]=='name':105                    date_find = pattern_day_month_year.search(i.attrs['content'])106                    month_find = pattern_month.search(i.attrs['content'])107                    month_2_find = pattern_month_2.search(i.attrs['content'])108                    if not date_find:109                        date_find = pattern_day_month_year_2.search(i.attrs['content'])110                    if date_find:111                        #print('date find')112                        for k in range(1,4):113                            if len(date_find.group(1)) == 4:                        # Ambiguous month and date XXXX/10/11114                                float_date = int(date_find.group(1)) + (int(date_find.group(2))-1)/12115                                date_group.append(round(float_date,2))116                            elif len(date_find.group(3)) == 4:117                                if int(date_find.group(1)) > 12 and int(date_find.group(2)) < 13:118                                    float_date = int(date_find.group(3)) + (int(date_find.group(2))-1)/12119                                    date_group.append(round(float_date,2))120                                elif int(date_find.group(2)) > 12 and int(date_find.group(1)) < 13:121                                    float_date = int(date_find.group(3)) + (int(date_find.group(1))-1)/12122                                    date_group.append(round(float_date,2))123                                else:124                                    float_date = int(date_find.group(3)) + (int(date_find.group(2))-1)/12125                                    date_group.append(round(float_date,2))126                            else:127                                float_date = int(date_find.group(2)) + (int(date_find.group(1))-1)/12128                        return min(date_group), int(min(date_group))129                    elif month_find:130                        #print('month_find')131                        for k in range(12):132                            if month_list[k] == month_find.group(0).lower():133                                month_index = k134                        token = i.attrs['content'].split()135                        token.sort()136                        if len(token) == 3 and token[0].isdigit() and token[1].isdigit():137                            for word in token:138                                if len(word) == 4 and word.isdigit():139                                    float_date = int(word) + month_index/12140                                    date_group.append(round(float_date,2))141                            return min(date_group), int(min(date_group))142                    elif month_2_find:143                        #print('month2_find')144                        for k in range(12):145                            if month_list_2[k] == month_2_find.group(0).lower():146                                month_index = k147                        token = i.attrs['content'].split()148                        token.sort()149                        if len(token) == 3 and token[0].isdigit() and token[1].isdigit():150                            for word in token:151                                if len(word) == 4 and word.isdigit():152                                    float_date = int(word) + month_index/12153                                    date_group.append(round(float_date,2))154                            return min(date_group), int(min(date_group))155                    else:156                        #print('year find')157                        year_find = pattern_year.search(i.attrs['content'])158                        if year_find and len(i.attrs['content']) == 4:159                            float_date = int(i.attrs['content']) + 0.0160                            date_group.append(round(float_date,2))161        if date_group:162            return min(date_group), int(min(date_group))163        # from pub date sentence164        else:165            if self.soup.body:166                each_sentence= self.soup.body.get_text().split()167            else:168                each_sentence= self.soup.get_text().split()169            for j in range(len(each_sentence)):170                month_find = pattern_month.search(each_sentence[j])171                month_index = -1172                if month_find:173                    for k in range(len(month_list)):174                        if month_list[k] == month_find.group(0).lower():175                            month_index = k176                    before_find = pattern_year.search(each_sentence[j-1])177                    after_find = pattern_year.search(each_sentence[j+1])178                    if after_find:179                        float_date = int(after_find.group(0)) + month_index/12180                        date_group.append(round(float_date,2))181                    elif before_find:182                        float_date = int(before_find.group(0)) + month_index/12183                        date_group.append(round(float_date,2))184                    else:185                        before_find = pattern_year.search(each_sentence[j-2])186                        after_find = pattern_year.search(each_sentence[j+2])187                        if after_find:188                            float_date = int(after_find.group(0)) + month_index/12189                            date_group.append(round(float_date,2))190                        elif before_find:191                            float_date = int(after_find.group(0)) + month_index/12192                            date_group.append(round(float_date,2))193            if date_group:194                return min(date_group), int(min(date_group))195            else:196                for j in range(len(each_sentence)):197                    month_find = pattern_month_2.search(each_sentence[j])198                    month_index = -1199                    if month_find:200                        for k in range(len(month_list_2)):201                            if month_list_2[k]==month_find.group(0).lower():202                                month_index = k203                        before_find = pattern_year.search(each_sentence[j-1])204                        after_find = pattern_year.search(each_sentence[j+1])205                        if after_find:206                            float_date = int(after_find.group(0)) + month_index/12207                            date_group.append(round(float_date,2))208                        elif before_find:209                            float_date = int(before_find.group(0)) + month_index/12210                            date_group.append(round(float_date,2))211                        else:212                            before_find = pattern_year.search(each_sentence[j-2])213                            after_find = pattern_year.search(each_sentence[j+2])214                            if after_find:215                                float_date = int(after_find.group(0)) + month_index/12216                                date_group.append(round(float_date,2))217                            elif before_find:218                                float_date = int(after_find.group(0)) + month_index/12219                                date_group.append(round(float_date,2))220                    if date_group:221                        return min(date_group), int(min(date_group))222                    else:223                        return 1234.0, 1234224    def get_title_sentence(self):225        """get title sentence from HTML meta, title tag"""226        pattern_title = re.compile('title',re.I)227        title = ""228        for i in range(len(self.soup.find_all('meta'))):229            list_value = list(self.soup.find_all('meta')[i].attrs.values())230            list_key = list(self.soup.find_all('meta')[i].attrs.keys())231            for j in range(len(list_value)):232                title_find = pattern_title.search(list_value[j])233                if title_find and list_key[j]=='name':234                    if title:235                        if len(title) < len(self.soup.find_all('meta')[i].attrs['content']):236                            title = self.soup.find_all('meta')[i].attrs['content']237                    else:238                        title = self.soup.find_all('meta')[i].attrs['content']239                    break240        if title:241            return title242        elif not self.soup.find_all('title') or not self.soup.find_all('title')[0].contents:243            title = 'no title'244            return title245        else:246            title = self.soup.find_all('title')[0].contents[0].strip()247            pattern_journal = re.compile('- [A-Z]{1}[a-z]+')248            journal_find = pattern_journal.findall(title)249            if journal_find:250                pattern_journal = re.compile(journal_find[-1])251                journal_find = pattern_journal.search(title)252                title = title[0:journal_find.start()]253                return title254            else:255                return title256    def get_only_text(self):257        for j in self.soup(['title','button','table']):258            j.extract()259        if self.soup.body:260            text = self.soup.body.get_text()261        else:262            text = self.soup.get_text()263        #text_lines = sent_tokenize(text)264        text_lines = text.splitlines()265        text_line_word = []266        for i in text_lines:267            if len(i) > 3:268                i.strip()269                text_line_word.append(i.split())270            else:271                continue272        full_text = ""273        for i in range(len(text_line_word)):274            for j in range(len(text_line_word[i])):275                full_text=full_text+" "+text_line_word[i][j]276                full_text.lstrip()277            full_text=full_text+"\n"278        return full_text279    def get_full_text(self):280        text = self.soup.prettify()281        return text282    def get_abst(self,title):283        tag_list = ['sub','sup']284        for tag in tag_list:285            sub_list = self.soup.find_all(tag,string=True)286            for sub in sub_list:287                sub.replace_with(sub.string.strip()+'qorwns')288        #if self.soup.body:289            #text = self.soup.body.get_text()290            #text = re.sub(r'\n\s*\n', r'\n', self.soup.body.get_text().strip(), flags=re.M)291        #else:292            #text = self.soup.get_text()293        text = re.sub(r'\n\s*\n', r'\n', self.soup.get_text().strip(), flags=re.M)294        list = text.splitlines()295        title_pre = title[:10]296        #### remove before abstract297        full_text=""298        for i in range(len(list)):299            list[i] = list[i].lstrip()300            if list[i]:301                if list[i][-1] == " ":302                    full_text = full_text+''+list[i]303                    if list[i][-6:] == 'qorwns':304                        full_text = full_text[:-6]305                elif list[i][-6:] == 'qorwns':306                    full_text = full_text+''+list[i][:-6]307                elif list[i-1][-6:] == 'qorwns' and len(list[i])==1:308                    full_text = full_text+''+list[i]309                else:310                    full_text = full_text+' '+list[i]311        index = full_text.find(title_pre)312        if index == -1:313            full_text = full_text314        else:315            full_text = full_text[index:]316        index = full_text.find('Abstract')317        if index == -1:318            index = full_text.find('abstract')319        if index == -1:320            pattern_pub_date = re.compile('publish|publicat',re.I)321            pub_suffix=""322            for i in range(len(list)):323                if list[i]:324                    pub_find = pattern_pub_date.search(list[i])325                    if pub_find:326                        pub_index = list[i].find(pub_find.group(0))327                        pub_suffix = list[i][pub_index:-1]328                        break329                    else:330                        pub_suffix = ""331                else:332                    pub_suffix = ""333            index = full_text.find(pub_suffix)334            if not pub_suffix:335                full_text = full_text336            else:337                full_text = full_text[index+len(pub_suffix)+1:]338        else:339            full_text = full_text[index+8:]340        #### removed after conclusion341        head_list = ['Acknowled','acknowled','Reference','Copyright','copyright','Advertisement','COLLAPSE','Article Information', 'Cookies']342        index_list = []343        for i in head_list:344            index = full_text.find(i)345            if index > 0:346                index_list.append(index)347        if index_list:348            index = min(index_list)349        else:350            index = -1351            #print('error\n')352        full_text = full_text[:index]353        return full_text354def get_tokens(text):355    p2 = re.compile('\(.{0,10}\)|\[.{0,10}\]|\{.{0,10}\}')356    p = re.compile('(?P<name>[A-Z]+)(?P<bar>[-ââââââ]{1})')357    text2 = p.sub("\g<name>",text)358    lowers = text2.lower()359    lowers = p2.sub(' ', lowers)360    no_punct = lowers.translate(str.maketrans(',-âââââÃâªââ¥â¼ããâ¤â¥â"â²ââââ&\'()â¡+:;<=>_`{|}~Â·ââ/',"                                            "))361    #no_punct = lowers.translate(str.maketrans(',âÃâªââ¥â¼ããâ¤â¥â"â²ââââ&\'â¡+:;<=>_`{|}~Â·/',"                                   ",'-ââââââ'))362    no_punct = no_punct.translate(str.maketrans("","", '!Â©â§âÏÏÎ·ÏÎ¼ÏÎ¸âÏâ()Î³Î»â #Â±â¯$Î´Â°âÎ²%Î±*.Ã¡?@\\^Ã¢Ã¥Ã¥[]'+string.punctuation ))363    #no_punct = lowers.translate(str.maketrans("","", '!ÃÂ©â§âÏâªÏÎ·ââÏâ¥Î¼â¼ÏÎ¸ãâÏãâ¤ââ¥Î³Î»âââ "â²#ââ¯$Î´Â°ââââÎ²%â¡&\'()Î±*+,-.Ã¡:;<=>?@\\^_`{|}~âÂ·Ã¢Ã¥Ã¥Â'+string.punctuation ))364    #no_punct = lowers.translate(str.maketrans("","", '!ÃÂ©â§âÏâªÏÎ·ââÏâ¥Î¼â¼ÏÎ¸ãâÏãâ¤ââ¥Î³Î»âââ "â²#ââ¯$Î´Â°ââââÎ²%â¡&\'()Î±*+,-.Ã¡:;<=>?@\\^_`{|}~âÂ·Ã¢Ã¥Ã¥Â'+string.punctuation ))365    #no_punct = lowers.translate(str.maketrans("","", '!ÃÂ©â§âÏâªÏÎ·ââÏâ¥Î¼â¼ÏÎ¸ãâÏãâ¤ââ¥Î³Î»âââ "â²#ââ¯$Î´Â°ââââÎ²%â¡&\'()Î±*+,-.Ã¡:;<=>?@\\^_`{|}~âÂ·Ã¢Ã¥Ã¥Â'+string.punctuation ))366    tokenizer = RegexpTokenizer('\s+',gaps=True)367    tokens = tokenizer.tokenize(no_punct)368    real_tokens = []369    for item in tokens:370        if item.isdigit():371            continue372        else:373            if item.isalnum:374                real_tokens.append(item)375    return real_tokens376def lemmatize_tokens(tokens, lemmatizer):377    lemma = []378    for item in tokens:379        lemma.append(lemmatizer.lemmatize(item))380    return lemma381def stem_tokens(tokens, stemmer):382    stem = []383    for item in tokens:384        stem.append(stemmer.stem(item))385    return stem386def lemmatize_tokens_for_pos(tokens, lemmatizer):387    lemma = []388    for word, pos in tokens:389        tag = pos[0].lower()390        tag = tag if tag in ['a','r','n','v'] else None391        if not tag:392            lemma.append(word)393        elif tag in ['a', 'r']:394            continue395        else:396            lemma.append(lemmatizer.lemmatize(word, tag))...

event_firing_webdriver_tests.py

Source:event_firing_webdriver_tests.py

...88            b"before_change_value_of"89            b"after_change_value_of") == log.getvalue()90def test_should_fire_find_event(driver, log, pages):91    class EventListener(AbstractEventListener):92        def before_find(self, by, value, driver):93            log.write(("before_find by %s %s" % (by, value)).encode())94        def after_find(self, by, value, driver):95            log.write(("after_find by %s %s" % (by, value)).encode())96    ef_driver = EventFiringWebDriver(driver, EventListener())97    ef_driver.get(pages.url("simpleTest.html"))98    e = ef_driver.find_element_by_id("oneline")99    assert "A single line of text" == e.text100    e = ef_driver.find_element_by_xpath("/html/body/p[1]")101    assert "A single line of text" == e.text102    ef_driver.get(pages.url("frameset.html"))103    elements = ef_driver.find_elements_by_css_selector("frame#sixth")104    assert 1 == len(elements)105    assert "frame" == elements[0].tag_name.lower()106    assert "sixth" == elements[0].get_attribute("id")...

230.二叉树第k小元素(树）.py

Source:230.二叉树第k小元素(树）.py

...8class Solution:9    step = 010    ans = -1 * float('inf')11    def kthSmallest(self, root, k: int) -> int:12        def before_find(root):13            if root == None or self.step == -1:14                return15            before_find(root.left)16            if self.step == -1:17                return18            self.step += 119            if self.step == k:20                self.ans = root.val21                self.step = -122                return23            before_find(root.right)24        before_find(root)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.