How to use open_html_file method in SeleniumBase

Best Python code snippet using SeleniumBase

readisbns.py

Source:readisbns.py

1# May need to install the requests module ('pip install requests')2#3# Import modules needed for this code to work4import requests5import json6import time7# See if there is an file called 'isbn.txt'8try:9    # Open isbn text and read the ISBN's10    open_isbn_file = open("isbn.txt")11except:12    # Tell the user to have a file called isbn.txt in the same directory13    print("Please have an file called 'isbn.txt' in the same directory as this python file")14    time.sleep(10)15# Create a list of all ISBNS in the file and print out that list16with open("isbn.txt") as f:17    list_all_books_w_isbn = f.read().splitlines()18print ("List of all ISBN's:")19print(list_all_books_w_isbn)20# If no ISBN is found inside the text file then let the user know that they have to place ISBN numbers inside the file21if not list_all_books_w_isbn:22    print("Please Input an ISBN numbers inside the 'isbn.txt' file")23    time.sleep(10)24# Create a new HTML document that is going to output the book covers and information25open_html_file = open("index.html", "w")26# Make a variable (html_content_begin) holding the html content that will begin the page27html_content_begin = """28    <html>29    <head>30    <style>31    body {32        background-color: rgb(44, 61, 81);33    }34    #container {35    width: 90%;36    padding-top: 40px;37    padding-bottom: 50px;38    background-color: rgb(226, 116, 105);39    height: 500px;40    border-radius: 15px;41    margin-top:50px;42    margin-bottom:50px;43    margin-left:auto;44    margin-right:auto;45    }46    img {47        padding: 20px;48        float: left;49        max-width:100%;50        max-height:100%;51    }52    p {53        font-family: 'Roboto', sans-serif;54        color: rgb(255, 255, 255);55    }56    #header {57        width: 100%;58        height: 100px;59        text-align: center;60    }61    h1 {62        font-family: 'Roboto', sans-serif;63        color: rgb(255, 255, 255);64        font-size: 50px;65    }66    </style></head>67    <body>68    <div id="header">69    <h1>Find book information from an ISBN </h1>70    </div>71"""72# Finish the above html content, creating a variable called (html_content_end)73html_content_end = """</body> </html>"""74# Write the html content that will begin the page (this includes the css) to the page75open_html_file.write(html_content_begin)76# Loop through all Isbns, reading the data and update that information to the webpage77print ("Getting all data, this may take a minute or two")78for list_books in open_isbn_file:79    80    params = [81        ['bibkeys', list_books],82        ['jscmd', 'data'],83        ['format', 'json'],84    ]85    # Request the information from Openlibrary86    response = requests.get('https://openlibrary.org/api/books', params=params)87    # Make a json list of all the book data 88    book_data = response.json()89    # Clean up the json list and print it in a easy to read format90    print(json.dumps(book_data, indent=4))91    # Can openlibrary find the ISBN, if not skip the all the code below92    if list_books in book_data:93        print("Yay, ISBN found")94    else:95        print("ISBN not found")96        continue97        98    # Create variables for all the data that will appear on the webpage, reading it from the json list99    isbn_data = book_data[list_books]100    book_title = isbn_data["title"]101    book_url = isbn_data["url"]102    book_publish_date = isbn_data["publish_date"]103    104    # Is the author variable in the ISBN's json list, if yes then find the name of the author105    # otherwise tell the user that no author was found106    if "authors" in isbn_data:  107        book_author = isbn_data["authors"]108        book_author_get_name = book_author[0]109        book_author_name = book_author_get_name["name"]110    else:111        print("no author found")112        113    # Create a div to hold each book on the webpage114    open_html_file.write("""<div id="container">""")115    116    # Find book cover image117    open_html_file.write("""<img src="http://covers.openlibrary.org/b/isbn/"""+ list_books + """-L.jpg"/>""")118    # Write the ISBN number to the webpage119    open_html_file.write("""<p><b> ISBN: </b></p>""")120    open_html_file.write("""<p>""" + list_books + """</p>""")121    # Check if the title is in the json list, if yes then write that title to the webpage and print that 122    # the book has been loaded. If there is no title then print no title found123    if "title" in isbn_data:124        open_html_file.write("""<p><b>Title: </b></p>""")125        open_html_file.write("""<p>""" + book_title + """</p>""")126        print("Loaded:")127        print(book_title)128    else:129        print("No Title found")130    131    # Write the books url (links to the openlibraries website for that book), the books publish data and the books132    # authors name to the webpage133    open_html_file.write("""<p><b>URL: </b></p>""")134    open_html_file.write(""" <button onclick="window.location.href = '""" + book_url + """';">Book Url</button> """)135    open_html_file.write("""<p><b> Publish date: </b></p>""")136    open_html_file.write("""<p>""" + book_publish_date + """</p>""")137    open_html_file.write("""<p><b> Author: </b></p>""")138    open_html_file.write("""<p>""" + book_author_name + """</p>""")139    140    # Check if there is a number of pages in that books json list, if there is than write the number of pages to the141    # website. If there isn't any number of pages than print that there is no number of pages found142    if "number_of_pages" in isbn_data:143        book_pages = isbn_data["number_of_pages"]144        open_html_file.write("""<p><b>Number of pages: </b></p>""")145        open_html_file.write("""<p>""" + str(book_pages) + """</p>""")146    else:147        print("no number of pages found")148    # Finish of the div created above149    open_html_file.write("""</div>""")150# Finish the html tags and print that the html content has been created151open_html_file.write(html_content_end)152print("html content created")153# Close all the files opened 154open_isbn_file.close155open_html_file.close156# Print that the user can now open the html page with their ISBNs data found...

data2_membership_list.py

Source:data2_membership_list.py

...24    print("Files are read from: {}".format(read_path))25    print('New dataset file: {}'.format(write_file))26    return write_path, read_path, write_file27# open the research library membership HTML files28def open_html_file(open_file, name):29    open_html_file.data = open(open_file + '/' + name + '.html')  # open membership HTML page30    open_html_file.domain = organization.split('.')[1] + '.' + organization.split('.')[31        2]  # parse organization name to exclude internal website links and create filename32    open_html_file.soup = BeautifulSoup(open_html_file.data, 'lxml')  # BeautifulSoup object containing HTML data33# extract and write research library names and URLs to a .CSV file34def get_urls(organization, num, tags, check, writer):35    count_member = 036    for tag in tags[num].find_all('a'):37        try:38            if tag is not None:39                next_url = tag.get('href', None)40                if not next_url.startswith("http"):41                    continue42                test_org = re.findall(check, next_url)43                test_goog = re.findall('google', next_url)44                if 'webjunction' in next_url:45                    break46                if 'hangingtogether' in next_url:47                    break48                if len(test_org) + len(test_goog) == 0:49                    next_name = tag.contents[0].strip(",")50                    count_member += 151                    next_name = next_name.replace(',', '')52                    print('{} member {} -  {} @ {}'.format(organization, count_member, next_name, next_url))53                    writer.writerow([organization, next_name, next_url])54        except Exception as e:55            print56            print("something is wrong with the HTML tag {}: {}".format(tag, e))57            print58# list the membership organization urls59organizations = ['www.arl.org', 'www.diglib.org', 'www.oclc.org']60# set read / write path and file names61write_path, read_path, write_file = pick_dataset('2018-07-30T15_45_32.751109')62with open(write_file, 'w') as toWrite:63    writer = csv.writer(toWrite, delimiter=',')64    writer.writerow(['membership', 'school', 'url'])65    for organization in organizations:66        count = 067        open_html_file(read_path, organization)68        # find HTML tags encasing Membership data69        if organization == organizations[0]:70            tags = open_html_file.soup.find_all('div', class_='article-content')  # ARL71        elif organization == organizations[1]:72            tags = open_html_file.soup.find_all('div', class_=re.compile('entry-content'))  # DLF73        else:74            tags = open_html_file.soup.find_all('div', class_='text parbase section')  # OCLC75        for i in tags:76            get_urls(organization, count, tags, open_html_file.domain, writer)...

analyzehtml.py

Source:analyzehtml.py

...7    # def FindFileList(self):8    #     '''è·åæä»¶åè¡¨'''9    #     for Root,Dirs,Files in os.walk(self.Filedir):10    #         return Files11    def open_html_file(self,FileName):12        '''æå¼Htmlæä»¶,å¹¶è¿åæä»¶handle'''13        FileDir=self.Filedir+'/'+FileName14        with open(FileDir,encoding='utf-8') as file:15            HtmlHandle=file.read()16            return HtmlHandle17    def analyze_html(self,File):18        '''åæHtml,è¿åæå®èç¹åå®¹'''19        # Files=self.FindFileList()20        # for File in Files:21        FileContent=self.open_html_file(File)22        soup=BeautifulSoup(FileContent,"html.parser")23        ulist=[]24        for tr in soup.find('tbody').children:25            if isinstance(tr,bs4.element.Tag):26                tds = tr('td')27                BugId=tds[0].find('a')28                Title=tds[3]['title']29                ConfirmOrNot=tds[3].find('span').string30                BugTitleAndConfirmOrNot=ConfirmOrNot+" "+Title31                ulist.append([int(BugId.string), int(tds[1].string),BugTitleAndConfirmOrNot,tds[4].string])32        return ulist33if __name__=='__main__':34    parentdir = os.path.dirname(__file__)35    filedir=parentdir+r"/html"...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.