Unlock 30% off on Manual Testing Annual Plans this Holiday Season.

Copied to Clipboard!

How to use __fix_unicode_conversion method in SeleniumBase

Best Python code snippet using SeleniumBase

base_case.py

Source:base_case.py

...1902            "a"->"href", "img"->"src", "link"->"href", and "script"->"src". """1903        page_url = self.get_current_url()1904        soup = self.get_beautiful_soup(self.get_page_source())1905        page_utils._print_unique_links_with_status_codes(page_url, soup)1906    def __fix_unicode_conversion(self, text):1907        """ Fixing Chinese characters when converting from PDF to HTML. """1908        text = text.replace(u'\u2f8f', u'\u884c')1909        text = text.replace(u'\u2f45', u'\u65b9')1910        text = text.replace(u'\u2f08', u'\u4eba')1911        text = text.replace(u'\u2f70', u'\u793a')1912        return text1913    def get_pdf_text(self, pdf, page=None, maxpages=None,1914                     password=None, codec='utf-8', wrap=False, nav=False,1915                     override=False):1916        """ Gets text from a PDF file.1917            PDF can be either a URL or a file path on the local file system.1918            @Params1919            pdf - The URL or file path of the PDF file.1920            page - The page number (or a list of page numbers) of the PDF.1921                    If a page number is provided, looks only at that page.1922                        (1 is the first page, 2 is the second page, etc.)1923                    If no page number is provided, returns all PDF text.1924            maxpages - Instead of providing a page number, you can provide1925                       the number of pages to use from the beginning.1926            password - If the PDF is password-protected, enter it here.1927            codec - The compression format for character encoding.1928                    (The default codec used by this method is 'utf-8'.)1929            wrap - Replaces ' \n' with ' ' so that individual sentences1930                   from a PDF don't get broken up into seperate lines when1931                   getting converted into text format.1932            nav - If PDF is a URL, navigates to the URL in the browser first.1933                  (Not needed because the PDF will be downloaded anyway.)1934            override - If the PDF file to be downloaded already exists in the1935                       downloaded_files/ folder, that PDF will be used1936                       instead of downloading it again. """1937        from pdfminer.high_level import extract_text1938        if not password:1939            password = ''1940        if not maxpages:1941            maxpages = 01942        if not pdf.lower().endswith('.pdf'):1943            raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)1944        file_path = None1945        if page_utils.is_valid_url(pdf):1946            if nav:1947                if self.get_current_url() != pdf:1948                    self.open(pdf)1949            file_name = pdf.split('/')[-1]1950            file_path = self.get_downloads_folder() + '/' + file_name1951            if not os.path.exists(file_path):1952                self.download_file(pdf)1953            elif override:1954                self.download_file(pdf)1955        else:1956            if not os.path.exists(pdf):1957                raise Exception("%s is not a valid URL or file path!" % pdf)1958            file_path = os.path.abspath(pdf)1959        page_search = None  # (Pages are delimited by '\x0c')1960        if type(page) is list:1961            pages = page1962            page_search = []1963            for page in pages:1964                page_search.append(page - 1)1965        elif type(page) is int:1966            page = page - 11967            if page < 0:1968                page = 01969            page_search = [page]1970        else:1971            page_search = None1972        pdf_text = extract_text(1973            file_path, password='', page_numbers=page_search,1974            maxpages=maxpages, caching=False, codec=codec)1975        pdf_text = self.__fix_unicode_conversion(pdf_text)1976        if wrap:1977            pdf_text = pdf_text.replace(' \n', ' ')1978        return pdf_text1979    def assert_pdf_text(self, pdf, text, page=None, maxpages=None,1980                        password=None, codec='utf-8', wrap=True, nav=False,1981                        override=False):1982        """ Asserts text in a PDF file.1983            PDF can be either a URL or a file path on the local file system.1984            @Params1985            pdf - The URL or file path of the PDF file.1986            text - The expected text to verify in the PDF.1987            page - The page number of the PDF to use (optional).1988                    If a page number is provided, looks only at that page.1989                        (1 is the first page, 2 is the second page, etc.)1990                    If no page number is provided, looks at all the pages.1991            maxpages - Instead of providing a page number, you can provide1992                       the number of pages to use from the beginning.1993            password - If the PDF is password-protected, enter it here.1994            codec - The compression format for character encoding.1995                    (The default codec used by this method is 'utf-8'.)1996            wrap - Replaces ' \n' with ' ' so that individual sentences1997                   from a PDF don't get broken up into seperate lines when1998                   getting converted into text format.1999            nav - If PDF is a URL, navigates to the URL in the browser first.2000                  (Not needed because the PDF will be downloaded anyway.)2001            override - If the PDF file to be downloaded already exists in the2002                       downloaded_files/ folder, that PDF will be used2003                       instead of downloading it again. """2004        text = self.__fix_unicode_conversion(text)2005        if not codec:2006            codec = 'utf-8'2007        pdf_text = self.get_pdf_text(2008            pdf, page=page, maxpages=maxpages, password=password, codec=codec,2009            wrap=wrap, nav=nav, override=override)2010        if type(page) is int:2011            if text not in pdf_text:2012                raise Exception("PDF [%s] is missing expected text [%s] on "2013                                "page [%s]!" % (pdf, text, page))2014        else:2015            if text not in pdf_text:2016                raise Exception("PDF [%s] is missing expected text [%s]!"2017                                "" % (pdf, text))2018        return True...

asserts.py

Source:asserts.py

...724        override - If the PDF file to be downloaded already exists in the725                   downloaded_files/ folder, that PDF will be used726                   instead of downloading it again.727        caching - If resources should be cached via pdfminer."""728        text = self.__fix_unicode_conversion(text)729        if not codec:730            codec = "utf-8"731        pdf_text = self.get_pdf_text(732            pdf,733            page=page,734            maxpages=maxpages,735            password=password,736            codec=codec,737            wrap=wrap,738            nav=nav,739            override=override,740            caching=caching,741        )742        if type(page) is int:...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.