Best Python code snippet using yandex-tank
comment_scraper.py
Source:comment_scraper.py  
...5import re6from pathlib import Path7class GetComments():8    """Class containing every function for obtaining reddit comment data."""9    def dict_formatter(variable: object,10                       input_text: str,11                       fail_return: object,12                       slice: int = 0):13        """14        Format data that is inserted into a Pandas DataFrame.15        Used to clean and format data that will be placed into a Pandas16        DataFrame by the comment_date() function.17        Args:18            variable: An object, primarily a dictionary obtained by using19                the requests .json() method. type: dict or None20            input_text: The dictionary key name that will be passed21                into the variable if there is a variable. type: str or None22            fail_return: The function returns this if the formatting failed.23                type: object24            slice: An integer representing where the variable key's value25                will be sliced if there is a variable. type: int26        Returns:27            String that will be the dictionary keys value.28        """29        invalid = ['[deleted]', '[removed]']30        if slice:31            return variable[input_text][slice:] if input_text in\32                variable.keys() and variable[input_text] not in\33                invalid else fail_return34        elif not slice and variable:35            return variable[input_text] if input_text in variable.keys()\36                and variable[input_text] not in invalid else fail_return37        elif not variable and not slice:38            return input_text if input_text not in invalid else fail_return39    def comment_data(request: dict) -> pd.core.frame.DataFrame:40        """41        Create a formatted list of dictionaries of comment data.42        Args:43            request: A dictionary obtained from the requests library44                using the .json() method. type: dict45        Returns:46            pd.DataFrame(comment_list), num_comments: A tuple of a47                Pandas DataFrame object and an integer representation48                for the number of comments retrieved.49                type: pandas.core.frame.DataFrame, int50        """51        num_comments = 052        comment_list = []53        for request in request.get('data'):54            comment_body = request['body'].replace('\n', ' ')\55                                          .encode('ascii', 'ignore')\56                                          .decode()57            comment_body = re.sub(r' +', ' ', comment_body).strip()58            comment_list.append(59                {'comment_id': GetComments.dict_formatter(60                     request, 'id', None),61                 'submission_id': GetComments.dict_formatter(62                     request, 'link_id', None, 3),63                 'subreddit': GetComments.dict_formatter(64                     request, 'subreddit', None),65                 'subreddit_id': GetComments.dict_formatter(66                     request, 'subreddit_id', None, 3),67                 'author_id': GetComments.dict_formatter(68                     request, 'author_fullname', None, 3),69                 'author': GetComments.dict_formatter(70                     request, 'author', None),71                 'dt': GetComments.dict_formatter(72                     request, 'created_utc', 0),73                 'score': GetComments.dict_formatter(74                     request, 'score', None),75                 'awards_received': GetComments.dict_formatter(76                     request, 'total_awards_received', 0),77                 'body': GetComments.dict_formatter(78                     None, comment_body, None),79                 'parent_id': GetComments.dict_formatter(80                     request, 'parent_id', None, 3)})81            num_comments += 182        return pd.DataFrame(comment_list), num_comments83    def get_comments(df: pd.core.frame.DataFrame) -> int:84        """85        Retrieve all comments from submissions in a Pandas DataFrame.86        Uses the pushshift API to retrieve every comment for a87        submission. The function retrieves 10000 comments at a time88        until there are no comments remaining for the submission.89        Args:90            df: A Pandas DataFrame containing an id, title, and date91                for every submisson that is pending comment retrieval.92                type: pandas.core.frame.DataFrame93        Returns:...typefmt.py
Source:typefmt.py  
...41        :param value:42            Value to check43    """44    return value.name45def dict_formatter(view, value):46    """47        Removes unicode entities when displaying dict as string. Also unescapes48        non-ASCII characters stored in the JSON.49        :param value:50            Dict to convert to string51    """52    return json.dumps(value, ensure_ascii=False)53BASE_FORMATTERS = {54    type(None): empty_formatter,55    bool: bool_formatter,56    list: list_formatter,57    dict: dict_formatter,58}59EXPORT_FORMATTERS = {...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
