1#!/usr/bin/env python32import unittest3import autotagger4import sys5from lxml import etree6class Test(unittest.TestCase):7 def test_d48(self):8 # tests output of diary 48 of svoboda diaries9 self.run_Test("texts\d48_clean.txt", "outputs\d48_out.xml")10 11 def test_double_space(self):12 # tests output of double space test13 self.run_Test("texts\double_space_test.txt", "outputs\double_out.xml")14 def test_emma(self):15 # tests output of emma b. andrews file16 self.run_Test("texts\emma-vol-1-autotagger-test-fixed.txt", "outputs\emma_out.xml")17 def test_generic_divs(self):18 # tests output of generic div test file19 self.run_Test("texts\generic_test.txt", "outputs\generic_out.xml")20 def test_sultan_1(self):21 # tests output of first sultan's dream file22 self.run_Test("texts\KM_test_Justin_1.txt", "outputs\sultan_1_out.xml")23 def test_sultan_2(self):24 # tests output of second sultan' dream file25 self.run_Test("texts\KM_test_Justin_2.txt", "outputs\sultan_2_out.xml")26 def run_Test(self, input, output):27 # runs test for each input file28 29 file = open(input, encoding="utf-8")30 lines = file.readlines()31 file.close()32 # lines is a big (collection of) string(s)33 tf = autotagger.TranscriptionFile(lines)34 document = autotagger.setup_DOM()35 div2s, marginheaders, margins_dict = autotagger.create_dom_nodes(document, tf)36 autotagger.organize_nodes(document, tf, div2s, marginheaders, margins_dict)37 # document is an XML object38 # don't reprint the document as a string, just 39 # use the document40 if input == "texts\d48_clean.txt":41 print(document.toprettyxml('\t', '\n', None))42 # note that text nodes should only be 43 # in p, l, note, etc (and the values of attributes).44 # all other text is ignorable.45 parser = etree.XMLParser(remove_blank_text=True)46 correct_document = etree.parse(output, parser)47 correct_doc_element = correct_document.getroot()48 isEqual =, correct_doc_element)49 self.assertTrue(isEqual)50 def compare(self, test_element, correct_element):51 # compares the two documents 52 53 if test_element is None and correct_element is None:54 return True55 elif test_element is None or correct_element is None:56 return False57 else:58 test_is_text = test_element.nodeType == test_element.TEXT_NODE59 correct_is_text = correct_element.text != ""60 if test_is_text and correct_is_text:61 if == correct_element.text:62 return True63 else:64 return False65 elif test_is_text or correct_is_text:66 return False67 else:68 test_attr = test_element.attributes69 correct_attr = correct_element.attrib70 tag_name_equal = test_element.tagName == correct_element.tag71 attr_length_equal = test_attr.length == len(correct_attr)72 if not tag_name_equal or not attr_length_equal:73 return False74 else:75 for i in range (0, test_attr.length):76 same_name = correct_attr.get(test_attr.item(i).name) != None77 same_value = correct_attr.get(test_attr.item(i).name) == test_attr.item(i).value78 if not same_name or not same_value:79 return False80 test_children = test_element.childNodes81 correct_children = len(correct_element)82 if test_children.length != correct_children:83 return False84 else:85 for i in range (0, test_children.length):86 test_child = test_children.item(i)87 correct_child = correct_element[i]88 match =, correct_child)89 if match == False:90 return False91 return True92if __name__ == '__main__':...

1import pytest2import modules.utils as utils3def test_is_text():4 content_type = "text/html"...

