Test your AI Agents with the all-new Agent to Agent Testing Platform.Learn More

How to use save_page_source method in SeleniumBase

Best Python code snippet using SeleniumBase

scrape_pages.py

Source:scrape_pages.py

...34    df['year'] = df.datetime_timestamp.dt.year35    df['month'] = df.datetime_timestamp.dt.month36    df = df.drop_duplicates(['year', 'month'], keep = 'last')37    return df38def save_page_source(url, filename):39    '''40    Save html page source for a given url41    '''42    os.makedirs(os.path.dirname(filename), exist_ok=True)43    try:44        session = HTMLSession()45        r = session.get(url)46        r.html.render(timeout=0, sleep=0) # makes sure the page renders Javascript47        page_content = r.html.html48        with open(filename, 'w', encoding='utf-8') as fid:49            fid.write(page_content)50    except Exception as e:51        print('\nCould not save: ')52        print(url)53        print(e, '\n')54        pass55def save_snapshots(snapshot_df, folder):56    '''57    Save page source for snapshots in the dataframe 58    '''59    for i, r in snapshot_df.iterrows():60        filename = folder + '/{}.html'.format(r.timestamp)61        print(filename)62        if not os.path.isfile(filename):63            save_page_source(r.archive_url, filename)64def main():65    # save_page_source('https://web.archive.org/web/20210630080419/https://www.nba.com/celtics/contact/front-office','data/test.html')66    # save snapshots for each team67    teams = pd.read_csv('team-links.csv')68    # keep teams with staff directory links69    teams = teams[teams.staff_directory_link != 'none']70    print(teams.head())71    for i,r  in teams.iterrows():72        print(r.prefix_2)73        snapshots = get_snapshots(r.staff_directory_link)74        folder = 'data/raw/{}'.format(r.prefix_2)75        save_snapshots(snapshots,folder)76        77if __name__ == '__main__':...

main.py

Source:main.py

1from InstagramScraper import InstagramSpider2import json3def save_page_source(page_source, filename, append=False):4    with open(filename, "w" if not append else "a", encoding="utf-8") as file:5        file.write(page_source)6def get_post_links():7    url = "https://www.instagram.com/explore/tags/instamoments/"8    bot = InstagramSpider()9    bot.goto(url)10    11    for i in range(20):12        bot.mousewheel_vscroll(5)13        page_source = bot._browser.page_source14        postfix = str(i) + ".html"15        filename = "instamoments/instamoments" + postfix16        save_page_source(page_source, filename, append=True)17    bot._browser.close()18def get_user_link():19    bot = InstagramSpider()20    urls = [21        "https://www.instagram.com/p/CHfqIVrFOsA/",22        "https://www.instagram.com/p/CHfX3K5lsUk/"23    ]24    for i, url in enumerate(urls):25        bot.goto(url)26        page_source = bot._browser.page_source27        save_page_source(page_source,  filename=str(i) + ".html")28    bot._browser.close()29    30def get_all_post_pagesources(links):31    bot = InstagramSpider()32    prefix = "https://instagram.com"33    for link in links:34        bot.goto(prefix + link)35        page_source = bot._browser.page_source36        filename = "post_sources/" + link.split('/')[-2] + ".html"37        save_page_source(page_source, filename)38    bot._browser.close()39if __name__ == "__main__":40    # get_user_link()41    with open("./instamoments_post_links.json", "r") as file:42        links = json.load(file)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.