How to use _strip_xml method in unittest-xml-reporting

Best Python code snippet using unittest-xml-reporting_python

clean_other.py

Source:clean_other.py Github

copy

Full Screen

...16 """17 logging.info(f'stripping {fname}')18 if fname.endswith('.bz2'):19 with bz2.open(fname, 'rt', encoding='utf-8') as in_file, open(fname.replace('.xml.bz2', '.clean.txt'), 'w', encoding='utf-8') as out_file:20 out_file.write(_strip_xml(in_file.read()))21 if fname.endswith('.xml'):22 with open(fname, 'r', encoding='utf-8') as in_file, open(fname.replace('.xml', '.clean.txt'), 'w', encoding='utf-8') as out_file:23 out_file.write(_strip_xml(in_file.read()))24 if fname.endswith('.txt'):25 with open(fname, 'r', encoding='utf-8') as in_file, open(fname.replace('.txt', '.clean.txt'), 'w', encoding='utf-8') as out_file:26 out_file.write(_strip_xml(in_file.read()))27 logging.info(f'completed stripping {fname}')28@log_timer29def big_strip_file(fname, lines_per_chunk=1e6):30 """Strip xml and other tags from a Wikipedia dump that doesn't fit into RAM.31 Processes Wikipedia dump in chunks and then concatenates the junks into a single text file.32 :param fname: Wikipedia dump file, in xml or bzip2 format.33 :param lines_per_chunk: number of lines in each chunk (default is 1e6, one million lines)34 """35 logging.info(f'stripping {fname}')36 if fname.endswith('.bz2'):37 with bz2.open(fname, 'rt', encoding='utf-8') as in_file, open(fname.replace('.xml.bz2', '.clean.txt'), 'w', encoding='utf-8') as out_file:38 i = 039 j = 040 lines = []41 for line in in_file:42 lines.append(line)43 if i > ((j + 1) * int(lines_per_chunk)):44 out_file.write(_strip_xml(''.join(lines)))45 lines = []46 j += 147 out_file.write(_strip_xml(''.join(lines)))48 if fname.endswith('.xml'):49 with open(fname, 'r', encoding='utf-8') as in_file, open(fname.replace('.xml', '.clean.txt'), 'w', encoding='utf-8') as out_file:50 out_file.write(_strip_xml(in_file.read()))51 if fname.endswith('.txt'):52 with open(fname, 'r', encoding='utf-8') as in_file, open(fname.replace('.txt', '.clean.txt'), 'w', encoding='utf-8') as out_file:53 out_file.write(_strip_xml(in_file.read()))54 logging.info(f'completed stripping {fname}')55regeces = [56 (r'(?s)<ref.*?</ref>', ''), # strip reference links57 (r'(?s)<references.*?</references>', ''), # strip references58 (r'(?s)<table.*?</table>', ''), # strip tables59 (r'(?s)<gallery.*?</gallery>', ''), # strip galleries60 (r'(?s)<kml.*?</kml>', ''), # strip KML tags61 (r'<.*?>', ''), # strip other xml tags62 (r'http.*?(?:[\s\n\]]|$)', ''), # strip external http(s) links63 (r'\[\[[^\]]*?:.*\|(.*?)\]\]', '\\1'), # strip links to files, etc. but keep labels64 (r'\[\[[^\]]*?:(.*?)\]\]', ''), # strip category links65 (r'\[\[[^\]]*?\|(.*?)\]\]', '\\1'), # convert labeled links to just labels66 (r'(?m)^[\s]*[!?*;:=+\-|#_].*?$', ''), # strip lines that do not start with alphanumerics, quotes, or brackets67 (r'(?m)^.*?\(UTC\).*?$', ''), # strip lines containing a time stamp68 (r'\s\(.*?\)', ''), # remove everything in parentheses69 (r'([^\s.!?:;]{2})[.!?:;]+?[\s\n]|$', '\\1\n'), # break sentences at periods70 (r"[-–—/']", ' '), # replace hyphens, apostrophes and slashes with spaces71 (r'\s*\n\s*', '\n'), # strip empty lines and lines containing whitespace72 (r'\s{2,}', ' '), # strip excessive spaces73]74patterns = [(re.compile(regec[0], re.IGNORECASE), regec[1]) for regec in regeces]75def _strip_xml(txts):76 """Strip xml and other tags from Wikipedia text.77 :param txts: Wikipedia dump text containing multiple articles78 :return: stripped Wikipedia text79 """80 txts = html.unescape(html.unescape(txts)) # double unescape because Wikipedia dumps are a mess81 txts = txts.split('\n')82 for i in range(len(txts)):83 for pattern in patterns:84 txts[i] = pattern[0].sub(pattern[1], txts[i])85 txts = [''.join([letter for letter in txt if (letter.isalnum() or letter.isspace())]) for txt in txts if txt != '']86 return '\n'.join(txts)87if __name__ == '__main__':88 argparser = argparse.ArgumentParser(description='strip text files of xml and other tags')89 argparser.add_argument('fname', help='name of file')...

Full Screen

Full Screen

youtube_insights.py

Source:youtube_insights.py Github

copy

Full Screen

...105 dwriter.writerow([x.strip() for x in group.split(',', 1)])106 return [('stats.csv', stats.getvalue()), 107 ('locations.csv', locations.getvalue()),108 ('demographics.csv', demographics.getvalue())]109def _strip_xml(data):110 """111 Strips invalid XML from beginning and end of insights response from youtube112 """113 return '\n'.join(data.split('\n')[1:-1])114def insights(video_id):115 """116 Gets and returns insights for a given video id. 117 """118 url = _make_url('/insight_ajax', action_get_statistics_and_data=1,v=video_id)119 result = _request(url)120 return _strip_xml(result)121def run(video_id, attrs=[], since=None):122 """ 123 Searches public facebook posts for objects that contain the query. If since124 is specified, returns objects until a certain date, else returns as many 125 objects as it can. since can be any date accepted by PHP's strtotime. 126 """127 objs = insights(video_id)128 return objs129if __name__ == "__name__":...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run unittest-xml-reporting automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful