Best Python code snippet using unittest-xml-reporting_python
tofea_test.py
Source:tofea_test.py  
1import os2import unittest3from volto import VoltToFea4from io import StringIO5from tempfile import NamedTemporaryFile6class ToFeaTest(unittest.TestCase):7    def test_def_glyph_base(self):8        fea = self.parse('DEF_GLYPH ".notdef" ID 0 TYPE BASE END_GLYPH')9        self.assertEqual(10            fea,11            "@GDEF_base = [.notdef];\n"12            "table GDEF {\n"13            "    GlyphClassDef @GDEF_base, , , ;\n"14            "} GDEF;\n",15        )16    def test_def_glyph_base_2_components(self):17        fea = self.parse(18            'DEF_GLYPH "glyphBase" ID 320 TYPE BASE COMPONENTS 2 END_GLYPH'19        )20        self.assertEqual(21            fea,22            "@GDEF_base = [glyphBase];\n"23            "table GDEF {\n"24            "    GlyphClassDef @GDEF_base, , , ;\n"25            "} GDEF;\n",26        )27    def test_def_glyph_ligature_2_components(self):28        fea = self.parse('DEF_GLYPH "f_f" ID 320 TYPE LIGATURE COMPONENTS 2 END_GLYPH')29        self.assertEqual(30            fea,31            "@GDEF_ligature = [f_f];\n"32            "table GDEF {\n"33            "    GlyphClassDef , @GDEF_ligature, , ;\n"34            "} GDEF;\n",35        )36    def test_def_glyph_mark(self):37        fea = self.parse('DEF_GLYPH "brevecomb" ID 320 TYPE MARK END_GLYPH')38        self.assertEqual(39            fea,40            "@GDEF_mark = [brevecomb];\n"41            "table GDEF {\n"42            "    GlyphClassDef , , @GDEF_mark, ;\n"43            "} GDEF;\n",44        )45    def test_def_glyph_component(self):46        fea = self.parse('DEF_GLYPH "f.f_f" ID 320 TYPE COMPONENT END_GLYPH')47        self.assertEqual(48            fea,49            "@GDEF_component = [f.f_f];\n"50            "table GDEF {\n"51            "    GlyphClassDef , , , @GDEF_component;\n"52            "} GDEF;\n",53        )54    def test_def_glyph_no_type(self):55        fea = self.parse('DEF_GLYPH "glyph20" ID 20 END_GLYPH')56        self.assertEqual(fea, "")57    def test_def_glyph_case_sensitive(self):58        fea = self.parse(59            'DEF_GLYPH "A" ID 3 UNICODE 65 TYPE BASE END_GLYPH\n'60            'DEF_GLYPH "a" ID 4 UNICODE 97 TYPE BASE END_GLYPH\n'61        )62        self.assertEqual(63            fea,64            "@GDEF_base = [A a];\n"65            "table GDEF {\n"66            "    GlyphClassDef @GDEF_base, , , ;\n"67            "} GDEF;\n",68        )69    def test_def_group_glyphs(self):70        fea = self.parse(71            'DEF_GROUP "aaccented"\n'72            'ENUM GLYPH "aacute" GLYPH "abreve" GLYPH "acircumflex" '73            'GLYPH "adieresis" GLYPH "ae" GLYPH "agrave" GLYPH "amacron" '74            'GLYPH "aogonek" GLYPH "aring" GLYPH "atilde" END_ENUM\n'75            "END_GROUP\n"76        )77        self.assertEqual(78            fea,79            "# Glyph classes\n"80            "@aaccented = [aacute abreve acircumflex adieresis ae"81            " agrave amacron aogonek aring atilde];",82        )83    def test_def_group_groups(self):84        fea = self.parse(85            'DEF_GROUP "Group1"\n'86            'ENUM GLYPH "a" GLYPH "b" GLYPH "c" GLYPH "d" END_ENUM\n'87            "END_GROUP\n"88            'DEF_GROUP "Group2"\n'89            'ENUM GLYPH "e" GLYPH "f" GLYPH "g" GLYPH "h" END_ENUM\n'90            "END_GROUP\n"91            'DEF_GROUP "TestGroup"\n'92            'ENUM GROUP "Group1" GROUP "Group2" END_ENUM\n'93            "END_GROUP\n"94        )95        self.assertEqual(96            fea,97            "# Glyph classes\n"98            "@Group1 = [a b c d];\n"99            "@Group2 = [e f g h];\n"100            "@TestGroup = [@Group1 @Group2];",101        )102    def test_def_group_groups_not_yet_defined(self):103        fea = self.parse(104            'DEF_GROUP "Group1"\n'105            'ENUM GLYPH "a" GLYPH "b" GLYPH "c" GLYPH "d" END_ENUM\n'106            "END_GROUP\n"107            'DEF_GROUP "TestGroup1"\n'108            'ENUM GROUP "Group1" GROUP "Group2" END_ENUM\n'109            "END_GROUP\n"110            'DEF_GROUP "TestGroup2"\n'111            'ENUM GROUP "Group2" END_ENUM\n'112            "END_GROUP\n"113            'DEF_GROUP "TestGroup3"\n'114            'ENUM GROUP "Group2" GROUP "Group1" END_ENUM\n'115            "END_GROUP\n"116            'DEF_GROUP "Group2"\n'117            'ENUM GLYPH "e" GLYPH "f" GLYPH "g" GLYPH "h" END_ENUM\n'118            "END_GROUP\n"119        )120        self.assertEqual(121            fea,122            "# Glyph classes\n"123            "@Group1 = [a b c d];\n"124            "@Group2 = [e f g h];\n"125            "@TestGroup1 = [@Group1 @Group2];\n"126            "@TestGroup2 = [@Group2];\n"127            "@TestGroup3 = [@Group2 @Group1];"128        )129    def test_def_group_glyphs_and_group(self):130        fea = self.parse(131            'DEF_GROUP "aaccented"\n'132            'ENUM GLYPH "aacute" GLYPH "abreve" GLYPH "acircumflex" '133            'GLYPH "adieresis" GLYPH "ae" GLYPH "agrave" GLYPH "amacron" '134            'GLYPH "aogonek" GLYPH "aring" GLYPH "atilde" END_ENUM\n'135            "END_GROUP\n"136            'DEF_GROUP "KERN_lc_a_2ND"\n'137            'ENUM GLYPH "a" GROUP "aaccented" END_ENUM\n'138            "END_GROUP"139        )140        self.assertEqual(141            fea,142            "# Glyph classes\n"143            "@aaccented = [aacute abreve acircumflex adieresis ae"144            " agrave amacron aogonek aring atilde];\n"145            "@KERN_lc_a_2ND = [a @aaccented];",146        )147    def test_def_group_range(self):148        fea = self.parse(149            'DEF_GLYPH "a" ID 163 UNICODE 97 TYPE BASE END_GLYPH\n'150            'DEF_GLYPH "agrave" ID 194 UNICODE 224 TYPE BASE END_GLYPH\n'151            'DEF_GLYPH "aacute" ID 195 UNICODE 225 TYPE BASE END_GLYPH\n'152            'DEF_GLYPH "acircumflex" ID 196 UNICODE 226 TYPE BASE END_GLYPH\n'153            'DEF_GLYPH "atilde" ID 197 UNICODE 227 TYPE BASE END_GLYPH\n'154            'DEF_GLYPH "c" ID 165 UNICODE 99 TYPE BASE END_GLYPH\n'155            'DEF_GLYPH "ccaron" ID 209 UNICODE 269 TYPE BASE END_GLYPH\n'156            'DEF_GLYPH "ccedilla" ID 210 UNICODE 231 TYPE BASE END_GLYPH\n'157            'DEF_GLYPH "cdotaccent" ID 210 UNICODE 267 TYPE BASE END_GLYPH\n'158            'DEF_GROUP "KERN_lc_a_2ND"\n'159            'ENUM RANGE "a" TO "atilde" GLYPH "b" RANGE "c" TO "cdotaccent" '160            "END_ENUM\n"161            "END_GROUP"162        )163        self.assertEqual(164            fea,165            "# Glyph classes\n"166            "@KERN_lc_a_2ND = [a - atilde b c - cdotaccent];\n"167            "@GDEF_base = [a agrave aacute acircumflex atilde c"168            " ccaron ccedilla cdotaccent];\n"169            "table GDEF {\n"170            "    GlyphClassDef @GDEF_base, , , ;\n"171            "} GDEF;\n",172        )173    def test_script_without_langsys(self):174        fea = self.parse('DEF_SCRIPT NAME "Latin" TAG "latn"\n' "END_SCRIPT")175        self.assertEqual(fea, "")176    def test_langsys_normal(self):177        fea = self.parse(178            'DEF_SCRIPT NAME "Latin" TAG "latn"\n'179            'DEF_LANGSYS NAME "Romanian" TAG "ROM "\n'180            "END_LANGSYS\n"181            'DEF_LANGSYS NAME "Moldavian" TAG "MOL "\n'182            "END_LANGSYS\n"183            "END_SCRIPT"184        )185        self.assertEqual(fea, "")186    def test_langsys_no_script_name(self):187        fea = self.parse(188            'DEF_SCRIPT TAG "latn"\n'189            'DEF_LANGSYS NAME "Default" TAG "dflt"\n'190            "END_LANGSYS\n"191            "END_SCRIPT"192        )193        self.assertEqual(fea, "")194    def test_langsys_lang_in_separate_scripts(self):195        fea = self.parse(196            'DEF_SCRIPT NAME "Default" TAG "DFLT"\n'197            'DEF_LANGSYS NAME "Default" TAG "dflt"\n'198            "END_LANGSYS\n"199            'DEF_LANGSYS NAME "Default" TAG "ROM "\n'200            "END_LANGSYS\n"201            "END_SCRIPT\n"202            'DEF_SCRIPT NAME "Latin" TAG "latn"\n'203            'DEF_LANGSYS NAME "Default" TAG "dflt"\n'204            "END_LANGSYS\n"205            'DEF_LANGSYS NAME "Default" TAG "ROM "\n'206            "END_LANGSYS\n"207            "END_SCRIPT"208        )209        self.assertEqual(fea, "")210    def test_langsys_no_lang_name(self):211        fea = self.parse(212            'DEF_SCRIPT NAME "Latin" TAG "latn"\n'213            'DEF_LANGSYS TAG "dflt"\n'214            "END_LANGSYS\n"215            "END_SCRIPT"216        )217        self.assertEqual(fea, "")218    def test_feature(self):219        fea = self.parse(220            'DEF_SCRIPT NAME "Latin" TAG "latn"\n'221            'DEF_LANGSYS NAME "Romanian" TAG "ROM "\n'222            'DEF_FEATURE NAME "Fractions" TAG "frac"\n'223            'LOOKUP "fraclookup"\n'224            "END_FEATURE\n"225            "END_LANGSYS\n"226            "END_SCRIPT\n"227            'DEF_LOOKUP "fraclookup" PROCESS_BASE PROCESS_MARKS ALL '228            "DIRECTION LTR\n"229            "IN_CONTEXT\n"230            "END_CONTEXT\n"231            "AS_SUBSTITUTION\n"232            'SUB GLYPH "one" GLYPH "slash" GLYPH "two"\n'233            'WITH GLYPH "one_slash_two.frac"\n'234            "END_SUB\n"235            "END_SUBSTITUTION"236        )237        self.assertEqual(238            fea,239            "\n# Lookups\n"240            "lookup fraclookup {\n"241            "    sub one slash two by one_slash_two.frac;\n"242            "} fraclookup;\n"243            "\n"244            "# Features\n"245            "feature frac {\n"246            "    script latn;\n"247            "    language ROM exclude_dflt;\n"248            "    lookup fraclookup;\n"249            "} frac;\n",250        )251    def test_feature_sub_lookups(self):252        fea = self.parse(253            'DEF_SCRIPT NAME "Latin" TAG "latn"\n'254            'DEF_LANGSYS NAME "Romanian" TAG "ROM "\n'255            'DEF_FEATURE NAME "Fractions" TAG "frac"\n'256            'LOOKUP "fraclookup\\1"\n'257            'LOOKUP "fraclookup\\1"\n'258            "END_FEATURE\n"259            "END_LANGSYS\n"260            "END_SCRIPT\n"261            'DEF_LOOKUP "fraclookup\\1" PROCESS_BASE PROCESS_MARKS ALL '262            "DIRECTION RTL\n"263            "IN_CONTEXT\n"264            "END_CONTEXT\n"265            "AS_SUBSTITUTION\n"266            'SUB GLYPH "one" GLYPH "slash" GLYPH "two"\n'267            'WITH GLYPH "one_slash_two.frac"\n'268            "END_SUB\n"269            "END_SUBSTITUTION\n"270            'DEF_LOOKUP "fraclookup\\2" PROCESS_BASE PROCESS_MARKS ALL '271            "DIRECTION RTL\n"272            "IN_CONTEXT\n"273            "END_CONTEXT\n"274            "AS_SUBSTITUTION\n"275            'SUB GLYPH "one" GLYPH "slash" GLYPH "three"\n'276            'WITH GLYPH "one_slash_three.frac"\n'277            "END_SUB\n"278            "END_SUBSTITUTION"279        )280        self.assertEqual(281            fea,282            "\n# Lookups\n"283            "lookup fraclookup {\n"284            "    lookupflag RightToLeft;\n"285            "    # fraclookup\\1\n"286            "    sub one slash two by one_slash_two.frac;\n"287            "    subtable;\n"288            "    # fraclookup\\2\n"289            "    sub one slash three by one_slash_three.frac;\n"290            "} fraclookup;\n"291            "\n"292            "# Features\n"293            "feature frac {\n"294            "    script latn;\n"295            "    language ROM exclude_dflt;\n"296            "    lookup fraclookup;\n"297            "} frac;\n",298        )299    def test_lookup_comment(self):300        fea = self.parse(301            'DEF_LOOKUP "smcp" PROCESS_BASE PROCESS_MARKS ALL '302            "DIRECTION LTR\n"303            'COMMENTS "Smallcaps lookup for testing"\n'304            "IN_CONTEXT\n"305            "END_CONTEXT\n"306            "AS_SUBSTITUTION\n"307            'SUB GLYPH "a"\n'308            'WITH GLYPH "a.sc"\n'309            "END_SUB\n"310            'SUB GLYPH "b"\n'311            'WITH GLYPH "b.sc"\n'312            "END_SUB\n"313            "END_SUBSTITUTION"314        )315        self.assertEqual(316            fea,317            "\n# Lookups\n"318            "lookup smcp {\n"319            "    # Smallcaps lookup for testing\n"320            "    sub a by a.sc;\n"321            "    sub b by b.sc;\n"322            "} smcp;\n",323        )324    def test_substitution_single(self):325        fea = self.parse(326            'DEF_LOOKUP "smcp" PROCESS_BASE PROCESS_MARKS ALL '327            "DIRECTION LTR\n"328            "IN_CONTEXT\n"329            "END_CONTEXT\n"330            "AS_SUBSTITUTION\n"331            'SUB GLYPH "a"\n'332            'WITH GLYPH "a.sc"\n'333            "END_SUB\n"334            'SUB GLYPH "b"\n'335            'WITH GLYPH "b.sc"\n'336            "END_SUB\n"337            "SUB WITH\n"  # Empty substitution, will be ignored338            "END_SUB\n"339            "END_SUBSTITUTION"340        )341        self.assertEqual(342            fea,343            "\n# Lookups\n"344            "lookup smcp {\n"345            "    sub a by a.sc;\n"346            "    sub b by b.sc;\n"347            "} smcp;\n",348        )349    def test_substitution_single_in_context(self):350        fea = self.parse(351            'DEF_GROUP "Denominators" ENUM GLYPH "one.dnom" GLYPH "two.dnom" '352            "END_ENUM END_GROUP\n"353            'DEF_LOOKUP "fracdnom" PROCESS_BASE PROCESS_MARKS ALL '354            "DIRECTION LTR\n"355            'IN_CONTEXT LEFT ENUM GROUP "Denominators" GLYPH "fraction" '356            "END_ENUM\n"357            "END_CONTEXT\n"358            "AS_SUBSTITUTION\n"359            'SUB GLYPH "one"\n'360            'WITH GLYPH "one.dnom"\n'361            "END_SUB\n"362            'SUB GLYPH "two"\n'363            'WITH GLYPH "two.dnom"\n'364            "END_SUB\n"365            "END_SUBSTITUTION"366        )367        self.assertEqual(368            fea,369            "# Glyph classes\n"370            "@Denominators = [one.dnom two.dnom];\n"371            "\n"372            "# Lookups\n"373            "lookup fracdnom {\n"374            "    sub [@Denominators fraction] one' by one.dnom;\n"375            "    sub [@Denominators fraction] two' by two.dnom;\n"376            "} fracdnom;\n",377        )378    def test_substitution_single_in_contexts(self):379        fea = self.parse(380            'DEF_GROUP "Hebrew" ENUM GLYPH "uni05D0" GLYPH "uni05D1" '381            "END_ENUM END_GROUP\n"382            'DEF_LOOKUP "HebrewCurrency" PROCESS_BASE PROCESS_MARKS ALL '383            "DIRECTION LTR\n"384            "IN_CONTEXT\n"385            'RIGHT GROUP "Hebrew"\n'386            'RIGHT GLYPH "one.Hebr"\n'387            "END_CONTEXT\n"388            "IN_CONTEXT\n"389            'LEFT GROUP "Hebrew"\n'390            'LEFT GLYPH "one.Hebr"\n'391            "END_CONTEXT\n"392            "AS_SUBSTITUTION\n"393            'SUB GLYPH "dollar"\n'394            'WITH GLYPH "dollar.Hebr"\n'395            "END_SUB\n"396            "END_SUBSTITUTION"397        )398        self.assertEqual(399            fea,400            "# Glyph classes\n"401            "@Hebrew = [uni05D0 uni05D1];\n"402            "\n"403            "# Lookups\n"404            "lookup HebrewCurrency {\n"405            "    sub dollar' @Hebrew one.Hebr by dollar.Hebr;\n"406            "    sub @Hebrew one.Hebr dollar' by dollar.Hebr;\n"407            "} HebrewCurrency;\n",408        )409    def test_substitution_single_except_context(self):410        fea = self.parse(411            'DEF_GROUP "Hebrew" ENUM GLYPH "uni05D0" GLYPH "uni05D1" '412            "END_ENUM END_GROUP\n"413            'DEF_LOOKUP "HebrewCurrency" PROCESS_BASE PROCESS_MARKS ALL '414            "DIRECTION LTR\n"415            "EXCEPT_CONTEXT\n"416            'RIGHT GROUP "Hebrew"\n'417            'RIGHT GLYPH "one.Hebr"\n'418            "END_CONTEXT\n"419            "IN_CONTEXT\n"420            'LEFT GROUP "Hebrew"\n'421            'LEFT GLYPH "one.Hebr"\n'422            "END_CONTEXT\n"423            "AS_SUBSTITUTION\n"424            'SUB GLYPH "dollar"\n'425            'WITH GLYPH "dollar.Hebr"\n'426            "END_SUB\n"427            "END_SUBSTITUTION"428        )429        self.assertEqual(430            fea,431            "# Glyph classes\n"432            "@Hebrew = [uni05D0 uni05D1];\n"433            "\n"434            "# Lookups\n"435            "lookup HebrewCurrency {\n"436            "    ignore sub dollar' @Hebrew one.Hebr;\n"437            "    sub @Hebrew one.Hebr dollar' by dollar.Hebr;\n"438            "} HebrewCurrency;\n",439        )440    def test_substitution_skip_base(self):441        fea = self.parse(442            'DEF_GROUP "SomeMarks" ENUM GLYPH "marka" GLYPH "markb" '443            "END_ENUM END_GROUP\n"444            'DEF_LOOKUP "SomeSub" SKIP_BASE PROCESS_MARKS ALL '445            "DIRECTION LTR\n"446            "IN_CONTEXT\n"447            "END_CONTEXT\n"448            "AS_SUBSTITUTION\n"449            'SUB GLYPH "A"\n'450            'WITH GLYPH "A.c2sc"\n'451            "END_SUB\n"452            "END_SUBSTITUTION"453        )454        self.assertEqual(455            fea,456            "# Glyph classes\n"457            "@SomeMarks = [marka markb];\n"458            "\n"459            "# Lookups\n"460            "lookup SomeSub {\n"461            "    lookupflag IgnoreBaseGlyphs;\n"462            "    sub A by A.c2sc;\n"463            "} SomeSub;\n",464        )465    def test_substitution_process_base(self):466        fea = self.parse(467            'DEF_GROUP "SomeMarks" ENUM GLYPH "marka" GLYPH "markb" '468            "END_ENUM END_GROUP\n"469            'DEF_LOOKUP "SomeSub" PROCESS_BASE PROCESS_MARKS ALL '470            "DIRECTION LTR\n"471            "IN_CONTEXT\n"472            "END_CONTEXT\n"473            "AS_SUBSTITUTION\n"474            'SUB GLYPH "A"\n'475            'WITH GLYPH "A.c2sc"\n'476            "END_SUB\n"477            "END_SUBSTITUTION"478        )479        self.assertEqual(480            fea,481            "# Glyph classes\n"482            "@SomeMarks = [marka markb];\n"483            "\n"484            "# Lookups\n"485            "lookup SomeSub {\n"486            "    sub A by A.c2sc;\n"487            "} SomeSub;\n",488        )489    def test_substitution_process_marks_all(self):490        fea = self.parse(491            'DEF_GROUP "SomeMarks" ENUM GLYPH "marka" GLYPH "markb" '492            "END_ENUM END_GROUP\n"493            'DEF_LOOKUP "SomeSub" PROCESS_BASE PROCESS_MARKS "ALL"'494            "DIRECTION LTR\n"495            "IN_CONTEXT\n"496            "END_CONTEXT\n"497            "AS_SUBSTITUTION\n"498            'SUB GLYPH "A"\n'499            'WITH GLYPH "A.c2sc"\n'500            "END_SUB\n"501            "END_SUBSTITUTION"502        )503        self.assertEqual(504            fea,505            "# Glyph classes\n"506            "@SomeMarks = [marka markb];\n"507            "\n"508            "# Lookups\n"509            "lookup SomeSub {\n"510            "    sub A by A.c2sc;\n"511            "} SomeSub;\n",512        )513    def test_substitution_process_marks_none(self):514        fea = self.parse(515            'DEF_GROUP "SomeMarks" ENUM GLYPH "marka" GLYPH "markb" '516            "END_ENUM END_GROUP\n"517            'DEF_LOOKUP "SomeSub" PROCESS_BASE PROCESS_MARKS "NONE"'518            "DIRECTION LTR\n"519            "IN_CONTEXT\n"520            "END_CONTEXT\n"521            "AS_SUBSTITUTION\n"522            'SUB GLYPH "A"\n'523            'WITH GLYPH "A.c2sc"\n'524            "END_SUB\n"525            "END_SUBSTITUTION"526        )527        self.assertEqual(528            fea,529            "# Glyph classes\n"530            "@SomeMarks = [marka markb];\n"531            "\n"532            "# Lookups\n"533            "lookup SomeSub {\n"534            "    lookupflag IgnoreMarks;\n"535            "    sub A by A.c2sc;\n"536            "} SomeSub;\n",537        )538    def test_substitution_skip_marks(self):539        fea = self.parse(540            'DEF_GROUP "SomeMarks" ENUM GLYPH "marka" GLYPH "markb" '541            "END_ENUM END_GROUP\n"542            'DEF_LOOKUP "SomeSub" PROCESS_BASE SKIP_MARKS '543            "DIRECTION LTR\n"544            "IN_CONTEXT\n"545            "END_CONTEXT\n"546            "AS_SUBSTITUTION\n"547            'SUB GLYPH "A"\n'548            'WITH GLYPH "A.c2sc"\n'549            "END_SUB\n"550            "END_SUBSTITUTION"551        )552        self.assertEqual(553            fea,554            "# Glyph classes\n"555            "@SomeMarks = [marka markb];\n"556            "\n"557            "# Lookups\n"558            "lookup SomeSub {\n"559            "    lookupflag IgnoreMarks;\n"560            "    sub A by A.c2sc;\n"561            "} SomeSub;\n",562        )563    def test_substitution_mark_attachment(self):564        fea = self.parse(565            'DEF_GROUP "SomeMarks" ENUM GLYPH "acutecmb" GLYPH "gravecmb" '566            "END_ENUM END_GROUP\n"567            'DEF_LOOKUP "SomeSub" PROCESS_BASE '568            'PROCESS_MARKS "SomeMarks" \n'569            "DIRECTION RTL\n"570            "AS_SUBSTITUTION\n"571            'SUB GLYPH "A"\n'572            'WITH GLYPH "A.c2sc"\n'573            "END_SUB\n"574            "END_SUBSTITUTION"575        )576        self.assertEqual(577            fea,578            "# Glyph classes\n"579            "@SomeMarks = [acutecmb gravecmb];\n"580            "\n"581            "# Lookups\n"582            "lookup SomeSub {\n"583            "    lookupflag RightToLeft MarkAttachmentType"584            " @SomeMarks;\n"585            "    sub A by A.c2sc;\n"586            "} SomeSub;\n",587        )588    def test_substitution_mark_glyph_set(self):589        fea = self.parse(590            'DEF_GROUP "SomeMarks" ENUM GLYPH "acutecmb" GLYPH "gravecmb" '591            "END_ENUM END_GROUP\n"592            'DEF_LOOKUP "SomeSub" PROCESS_BASE '593            'PROCESS_MARKS MARK_GLYPH_SET "SomeMarks" \n'594            "DIRECTION RTL\n"595            "AS_SUBSTITUTION\n"596            'SUB GLYPH "A"\n'597            'WITH GLYPH "A.c2sc"\n'598            "END_SUB\n"599            "END_SUBSTITUTION"600        )601        self.assertEqual(602            fea,603            "# Glyph classes\n"604            "@SomeMarks = [acutecmb gravecmb];\n"605            "\n"606            "# Lookups\n"607            "lookup SomeSub {\n"608            "    lookupflag RightToLeft UseMarkFilteringSet"609            " @SomeMarks;\n"610            "    sub A by A.c2sc;\n"611            "} SomeSub;\n",612        )613    def test_substitution_process_all_marks(self):614        fea = self.parse(615            'DEF_GROUP "SomeMarks" ENUM GLYPH "acutecmb" GLYPH "gravecmb" '616            "END_ENUM END_GROUP\n"617            'DEF_LOOKUP "SomeSub" PROCESS_BASE '618            "PROCESS_MARKS ALL \n"619            "DIRECTION RTL\n"620            "AS_SUBSTITUTION\n"621            'SUB GLYPH "A"\n'622            'WITH GLYPH "A.c2sc"\n'623            "END_SUB\n"624            "END_SUBSTITUTION"625        )626        self.assertEqual(627            fea,628            "# Glyph classes\n"629            "@SomeMarks = [acutecmb gravecmb];\n"630            "\n"631            "# Lookups\n"632            "lookup SomeSub {\n"633            "    lookupflag RightToLeft;\n"634            "    sub A by A.c2sc;\n"635            "} SomeSub;\n",636        )637    def test_substitution_no_reversal(self):638        # TODO: check right context with no reversal639        fea = self.parse(640            'DEF_LOOKUP "Lookup" PROCESS_BASE PROCESS_MARKS ALL '641            "DIRECTION LTR\n"642            "IN_CONTEXT\n"643            'RIGHT ENUM GLYPH "a" GLYPH "b" END_ENUM\n'644            "END_CONTEXT\n"645            "AS_SUBSTITUTION\n"646            'SUB GLYPH "a"\n'647            'WITH GLYPH "a.alt"\n'648            "END_SUB\n"649            "END_SUBSTITUTION"650        )651        self.assertEqual(652            fea,653            "\n# Lookups\n"654            "lookup Lookup {\n"655            "    sub a' [a b] by a.alt;\n"656            "} Lookup;\n",657        )658    def test_substitution_reversal(self):659        fea = self.parse(660            'DEF_GROUP "DFLT_Num_standardFigures"\n'661            'ENUM GLYPH "zero" GLYPH "one" GLYPH "two" END_ENUM\n'662            "END_GROUP\n"663            'DEF_GROUP "DFLT_Num_numerators"\n'664            'ENUM GLYPH "zero.numr" GLYPH "one.numr" GLYPH "two.numr" END_ENUM\n'665            "END_GROUP\n"666            'DEF_LOOKUP "RevLookup" PROCESS_BASE PROCESS_MARKS ALL '667            "DIRECTION LTR REVERSAL\n"668            "IN_CONTEXT\n"669            'RIGHT ENUM GLYPH "a" GLYPH "b" END_ENUM\n'670            "END_CONTEXT\n"671            "AS_SUBSTITUTION\n"672            'SUB GROUP "DFLT_Num_standardFigures"\n'673            'WITH GROUP "DFLT_Num_numerators"\n'674            "END_SUB\n"675            "END_SUBSTITUTION"676        )677        self.assertEqual(678            fea,679            "# Glyph classes\n"680            "@DFLT_Num_standardFigures = [zero one two];\n"681            "@DFLT_Num_numerators = [zero.numr one.numr two.numr];\n"682            "\n"683            "# Lookups\n"684            "lookup RevLookup {\n"685            "    rsub @DFLT_Num_standardFigures' [a b] by @DFLT_Num_numerators;\n"686            "} RevLookup;\n",687        )688    def test_substitution_single_to_multiple(self):689        fea = self.parse(690            'DEF_LOOKUP "ccmp" PROCESS_BASE PROCESS_MARKS ALL '691            "DIRECTION LTR\n"692            "IN_CONTEXT\n"693            "END_CONTEXT\n"694            "AS_SUBSTITUTION\n"695            'SUB GLYPH "aacute"\n'696            'WITH GLYPH "a" GLYPH "acutecomb"\n'697            "END_SUB\n"698            'SUB GLYPH "agrave"\n'699            'WITH GLYPH "a" GLYPH "gravecomb"\n'700            "END_SUB\n"701            "END_SUBSTITUTION"702        )703        self.assertEqual(704            fea,705            "\n# Lookups\n"706            "lookup ccmp {\n"707            "    sub aacute by a acutecomb;\n"708            "    sub agrave by a gravecomb;\n"709            "} ccmp;\n",710        )711    def test_substitution_multiple_to_single(self):712        fea = self.parse(713            'DEF_LOOKUP "liga" PROCESS_BASE PROCESS_MARKS ALL '714            "DIRECTION LTR\n"715            "IN_CONTEXT\n"716            "END_CONTEXT\n"717            "AS_SUBSTITUTION\n"718            'SUB GLYPH "f" GLYPH "i"\n'719            'WITH GLYPH "f_i"\n'720            "END_SUB\n"721            'SUB GLYPH "f" GLYPH "t"\n'722            'WITH GLYPH "f_t"\n'723            "END_SUB\n"724            "END_SUBSTITUTION"725        )726        self.assertEqual(727            fea,728            "\n# Lookups\n"729            "lookup liga {\n"730            "    sub f i by f_i;\n"731            "    sub f t by f_t;\n"732            "} liga;\n",733        )734    def test_substitution_reverse_chaining_single(self):735        fea = self.parse(736            'DEF_LOOKUP "numr" PROCESS_BASE PROCESS_MARKS ALL '737            "DIRECTION LTR REVERSAL\n"738            "IN_CONTEXT\n"739            "RIGHT ENUM "740            'GLYPH "fraction" '741            'RANGE "zero.numr" TO "nine.numr" '742            "END_ENUM\n"743            "END_CONTEXT\n"744            "AS_SUBSTITUTION\n"745            'SUB RANGE "zero" TO "nine"\n'746            'WITH RANGE "zero.numr" TO "nine.numr"\n'747            "END_SUB\n"748            "END_SUBSTITUTION"749        )750        self.assertEqual(751            fea,752            "\n# Lookups\n"753            "lookup numr {\n"754            "    rsub zero - nine' [fraction zero.numr - nine.numr] by zero.numr - nine.numr;\n"755            "} numr;\n",756        )757    # GPOS758    #  ATTACH_CURSIVE759    #  ATTACH760    #  ADJUST_PAIR761    #  ADJUST_SINGLE762    def test_position_attach(self):763        fea = self.parse(764            'DEF_LOOKUP "anchor_top" PROCESS_BASE PROCESS_MARKS ALL '765            "DIRECTION RTL\n"766            "IN_CONTEXT\n"767            "END_CONTEXT\n"768            "AS_POSITION\n"769            'ATTACH GLYPH "a" GLYPH "e"\n'770            'TO GLYPH "acutecomb" AT ANCHOR "top" '771            'GLYPH "gravecomb" AT ANCHOR "top"\n'772            "END_ATTACH\n"773            "END_POSITION\n"774            'DEF_ANCHOR "MARK_top" ON 120 GLYPH acutecomb COMPONENT 1 '775            "AT POS DX 0 DY 450 END_POS END_ANCHOR\n"776            'DEF_ANCHOR "MARK_top" ON 121 GLYPH gravecomb COMPONENT 1 '777            "AT POS DX 0 DY 450 END_POS END_ANCHOR\n"778            'DEF_ANCHOR "top" ON 31 GLYPH a COMPONENT 1 '779            "AT POS DX 210 DY 450 END_POS END_ANCHOR\n"780            'DEF_ANCHOR "top" ON 35 GLYPH e COMPONENT 1 '781            "AT POS DX 215 DY 450 END_POS END_ANCHOR\n"782        )783        self.assertEqual(784            fea,785            "\n# Mark classes\n"786            "markClass acutecomb <anchor 0 450> @top;\n"787            "markClass gravecomb <anchor 0 450> @top;\n"788            "\n"789            "# Lookups\n"790            "lookup anchor_top {\n"791            "    lookupflag RightToLeft;\n"792            "    pos base a <anchor 210 450> mark @top;\n"793            "    pos base e <anchor 215 450> mark @top;\n"794            "} anchor_top;\n",795        )796    def test_position_attach_mkmk(self):797        fea = self.parse(798            'DEF_GLYPH "brevecomb" ID 1 TYPE MARK END_GLYPH\n'799            'DEF_GLYPH "gravecomb" ID 2 TYPE MARK END_GLYPH\n'800            'DEF_LOOKUP "anchor_top" PROCESS_BASE PROCESS_MARKS ALL '801            "DIRECTION RTL\n"802            "IN_CONTEXT\n"803            "END_CONTEXT\n"804            "AS_POSITION\n"805            'ATTACH GLYPH "gravecomb"\n'806            'TO GLYPH "acutecomb" AT ANCHOR "top"\n'807            "END_ATTACH\n"808            "END_POSITION\n"809            'DEF_ANCHOR "MARK_top" ON 1 GLYPH acutecomb COMPONENT 1 '810            "AT POS DX 0 DY 450 END_POS END_ANCHOR\n"811            'DEF_ANCHOR "top" ON 2 GLYPH gravecomb COMPONENT 1 '812            "AT POS DX 210 DY 450 END_POS END_ANCHOR\n"813        )814        self.assertEqual(815            fea,816            "\n# Mark classes\n"817            "markClass acutecomb <anchor 0 450> @top;\n"818            "\n"819            "# Lookups\n"820            "lookup anchor_top {\n"821            "    lookupflag RightToLeft;\n"822            "    pos mark gravecomb <anchor 210 450> mark @top;\n"823            "} anchor_top;\n"824            "\n"825            "@GDEF_mark = [brevecomb gravecomb];\n"826            "table GDEF {\n"827            "    GlyphClassDef , , @GDEF_mark, ;\n"828            "} GDEF;\n",829        )830    def test_position_attach_in_context(self):831        fea = self.parse(832            'DEF_LOOKUP "test" PROCESS_BASE PROCESS_MARKS ALL '833            "DIRECTION RTL\n"834            'EXCEPT_CONTEXT LEFT GLYPH "a" END_CONTEXT\n'835            "AS_POSITION\n"836            'ATTACH GLYPH "a"\n'837            'TO GLYPH "acutecomb" AT ANCHOR "top" '838            'GLYPH "gravecomb" AT ANCHOR "top"\n'839            "END_ATTACH\n"840            "END_POSITION\n"841            'DEF_ANCHOR "MARK_top" ON 120 GLYPH acutecomb COMPONENT 1 '842            "AT POS DX 0 DY 450 END_POS END_ANCHOR\n"843            'DEF_ANCHOR "MARK_top" ON 121 GLYPH gravecomb COMPONENT 1 '844            "AT POS DX 0 DY 450 END_POS END_ANCHOR\n"845            'DEF_ANCHOR "top" ON 31 GLYPH a COMPONENT 1 '846            "AT POS DX 210 DY 450 END_POS END_ANCHOR\n"847        )848        self.assertEqual(849            fea,850            "\n# Mark classes\n"851            "markClass acutecomb <anchor 0 450> @top;\n"852            "markClass gravecomb <anchor 0 450> @top;\n"853            "\n"854            "# Lookups\n"855            "lookup test_target {\n"856            "    pos base a <anchor 210 450> mark @top;\n"857            "} test_target;\n"858            "\n"859            "lookup test {\n"860            "    lookupflag RightToLeft;\n"861            "    ignore pos a [acutecomb gravecomb]';\n"862            "    pos [acutecomb gravecomb]' lookup test_target;\n"863            "} test;\n",864        )865    def test_position_attach_cursive(self):866        fea = self.parse(867            'DEF_LOOKUP "SomeLookup" PROCESS_BASE PROCESS_MARKS ALL '868            "DIRECTION RTL\n"869            "IN_CONTEXT\n"870            "END_CONTEXT\n"871            "AS_POSITION\n"872            'ATTACH_CURSIVE EXIT GLYPH "a" GLYPH "b" '873            'ENTER GLYPH "a" GLYPH "c"\n'874            "END_ATTACH\n"875            "END_POSITION\n"876            'DEF_ANCHOR "exit"  ON 1 GLYPH a COMPONENT 1 AT POS END_POS END_ANCHOR\n'877            'DEF_ANCHOR "entry" ON 1 GLYPH a COMPONENT 1 AT POS END_POS END_ANCHOR\n'878            'DEF_ANCHOR "exit"  ON 2 GLYPH b COMPONENT 1 AT POS END_POS END_ANCHOR\n'879            'DEF_ANCHOR "entry" ON 3 GLYPH c COMPONENT 1 AT POS END_POS END_ANCHOR\n'880        )881        self.assertEqual(882            fea,883            "\n# Lookups\n"884            "lookup SomeLookup {\n"885            "    lookupflag RightToLeft;\n"886            "    pos cursive a <anchor 0 0> <anchor 0 0>;\n"887            "    pos cursive c <anchor 0 0> <anchor NULL>;\n"888            "    pos cursive b <anchor NULL> <anchor 0 0>;\n"889            "} SomeLookup;\n",890        )891    def test_position_adjust_pair(self):892        fea = self.parse(893            'DEF_LOOKUP "kern1" PROCESS_BASE PROCESS_MARKS ALL '894            "DIRECTION RTL\n"895            "IN_CONTEXT\n"896            "END_CONTEXT\n"897            "AS_POSITION\n"898            "ADJUST_PAIR\n"899            ' FIRST GLYPH "A" FIRST GLYPH "V"\n'900            ' SECOND GLYPH "A" SECOND GLYPH "V"\n'901            " 1 2 BY POS ADV -30 END_POS POS END_POS\n"902            " 2 1 BY POS ADV -25 END_POS POS END_POS\n"903            "END_ADJUST\n"904            "END_POSITION\n"905        )906        self.assertEqual(907            fea,908            "\n# Lookups\n"909            "lookup kern1 {\n"910            "    lookupflag RightToLeft;\n"911            "    enum pos A V -30;\n"912            "    enum pos V A -25;\n"913            "} kern1;\n",914        )915    def test_position_adjust_pair_in_context(self):916        fea = self.parse(917            'DEF_LOOKUP "kern1" PROCESS_BASE PROCESS_MARKS ALL '918            "DIRECTION LTR\n"919            'EXCEPT_CONTEXT LEFT GLYPH "A" END_CONTEXT\n'920            "AS_POSITION\n"921            "ADJUST_PAIR\n"922            ' FIRST GLYPH "A" FIRST GLYPH "V"\n'923            ' SECOND GLYPH "A" SECOND GLYPH "V"\n'924            " 2 1 BY POS ADV -25 END_POS POS END_POS\n"925            "END_ADJUST\n"926            "END_POSITION\n"927        )928        self.assertEqual(929            fea,930            "\n# Lookups\n"931            "lookup kern1_target {\n"932            "    enum pos V A -25;\n"933            "} kern1_target;\n"934            "\n"935            "lookup kern1 {\n"936            "    ignore pos A V' A';\n"937            "    pos V' lookup kern1_target A' lookup kern1_target;\n"938            "} kern1;\n",939        )940    def test_position_adjust_single(self):941        fea = self.parse(942            'DEF_LOOKUP "TestLookup" PROCESS_BASE PROCESS_MARKS ALL '943            "DIRECTION LTR\n"944            "IN_CONTEXT\n"945            "END_CONTEXT\n"946            "AS_POSITION\n"947            "ADJUST_SINGLE"948            ' GLYPH "glyph1" BY POS ADV 0 DX 123 END_POS\n'949            ' GLYPH "glyph2" BY POS ADV 0 DX 456 END_POS\n'950            "END_ADJUST\n"951            "END_POSITION\n"952        )953        self.assertEqual(954            fea,955            "\n# Lookups\n"956            "lookup TestLookup {\n"957            "    pos glyph1 <123 0 0 0>;\n"958            "    pos glyph2 <456 0 0 0>;\n"959            "} TestLookup;\n",960        )961    def test_position_adjust_single_in_context(self):962        fea = self.parse(963            'DEF_LOOKUP "TestLookup" PROCESS_BASE PROCESS_MARKS ALL '964            "DIRECTION LTR\n"965            "EXCEPT_CONTEXT\n"966            'LEFT GLYPH "leftGlyph"\n'967            'RIGHT GLYPH "rightGlyph"\n'968            "END_CONTEXT\n"969            "AS_POSITION\n"970            "ADJUST_SINGLE"971            ' GLYPH "glyph1" BY POS ADV 0 DX 123 END_POS\n'972            ' GLYPH "glyph2" BY POS ADV 0 DX 456 END_POS\n'973            "END_ADJUST\n"974            "END_POSITION\n"975        )976        self.assertEqual(977            fea,978            "\n# Lookups\n"979            "lookup TestLookup_target {\n"980            "    pos glyph1 <123 0 0 0>;\n"981            "    pos glyph2 <456 0 0 0>;\n"982            "} TestLookup_target;\n"983            "\n"984            "lookup TestLookup {\n"985            "    ignore pos leftGlyph [glyph1 glyph2]' rightGlyph;\n"986            "    pos [glyph1 glyph2]' lookup TestLookup_target;\n"987            "} TestLookup;\n",988        )989    def test_def_anchor(self):990        fea = self.parse(991            'DEF_LOOKUP "TestLookup" PROCESS_BASE PROCESS_MARKS ALL '992            "DIRECTION LTR\n"993            "IN_CONTEXT\n"994            "END_CONTEXT\n"995            "AS_POSITION\n"996            'ATTACH GLYPH "a"\n'997            'TO GLYPH "acutecomb" AT ANCHOR "top"\n'998            "END_ATTACH\n"999            "END_POSITION\n"1000            'DEF_ANCHOR "top" ON 120 GLYPH a '1001            "COMPONENT 1 AT POS DX 250 DY 450 END_POS END_ANCHOR\n"1002            'DEF_ANCHOR "MARK_top" ON 120 GLYPH acutecomb '1003            "COMPONENT 1 AT POS DX 0 DY 450 END_POS END_ANCHOR"1004        )1005        self.assertEqual(1006            fea,1007            "\n# Mark classes\n"1008            "markClass acutecomb <anchor 0 450> @top;\n"1009            "\n"1010            "# Lookups\n"1011            "lookup TestLookup {\n"1012            "    pos base a <anchor 250 450> mark @top;\n"1013            "} TestLookup;\n",1014        )1015    def test_def_anchor_multi_component(self):1016        fea = self.parse(1017            'DEF_LOOKUP "TestLookup" PROCESS_BASE PROCESS_MARKS ALL '1018            "DIRECTION LTR\n"1019            "IN_CONTEXT\n"1020            "END_CONTEXT\n"1021            "AS_POSITION\n"1022            'ATTACH GLYPH "f_f"\n'1023            'TO GLYPH "acutecomb" AT ANCHOR "top"\n'1024            "END_ATTACH\n"1025            "END_POSITION\n"1026            'DEF_GLYPH "f_f" ID 120 TYPE LIGATURE COMPONENTS 2 END_GLYPH\n'1027            'DEF_ANCHOR "top" ON 120 GLYPH f_f '1028            "COMPONENT 1 AT POS DX 250 DY 450 END_POS END_ANCHOR\n"1029            'DEF_ANCHOR "top" ON 120 GLYPH f_f '1030            "COMPONENT 2 AT POS DX 450 DY 450 END_POS END_ANCHOR\n"1031            'DEF_ANCHOR "MARK_top" ON 120 GLYPH acutecomb '1032            "COMPONENT 1 AT POS  END_POS END_ANCHOR"1033        )1034        self.assertEqual(1035            fea,1036            "\n# Mark classes\n"1037            "markClass acutecomb <anchor 0 0> @top;\n"1038            "\n"1039            "# Lookups\n"1040            "lookup TestLookup {\n"1041            "    pos ligature f_f <anchor 250 450> mark @top\n"1042            "        ligComponent <anchor 450 450> mark @top;\n"1043            "} TestLookup;\n"1044            "\n"1045            "@GDEF_ligature = [f_f];\n"1046            "table GDEF {\n"1047            "    GlyphClassDef , @GDEF_ligature, , ;\n"1048            "} GDEF;\n",1049        )1050    def test_anchor_adjust_device(self):1051        fea = self.parse(1052            'DEF_ANCHOR "MARK_top" ON 123 GLYPH diacglyph '1053            "COMPONENT 1 AT POS DX 0 DY 456 ADJUST_BY 12 AT 34 "1054            "ADJUST_BY 56 AT 78 END_POS END_ANCHOR"1055        )1056        self.assertEqual(1057            fea,1058            "\n# Mark classes\n"1059            "#markClass diacglyph <anchor 0 456 <device NULL>"1060            " <device 34 12, 78 56>> @top;",1061        )1062    def test_use_extension(self):1063        fea = self.parse(1064            'DEF_LOOKUP "kern1" PROCESS_BASE PROCESS_MARKS ALL '1065            "DIRECTION LTR\n"1066            "IN_CONTEXT\n"1067            "END_CONTEXT\n"1068            "AS_POSITION\n"1069            "ADJUST_PAIR\n"1070            ' FIRST GLYPH "A" FIRST GLYPH "V"\n'1071            ' SECOND GLYPH "A" SECOND GLYPH "V"\n'1072            " 1 2 BY POS ADV -30 END_POS POS END_POS\n"1073            " 2 1 BY POS ADV -25 END_POS POS END_POS\n"1074            "END_ADJUST\n"1075            "END_POSITION\n"1076            "COMPILER_USEEXTENSIONLOOKUPS\n"1077        )1078        self.assertEqual(1079            fea,1080            "\n# Lookups\n"1081            "lookup kern1 useExtension {\n"1082            "    enum pos A V -30;\n"1083            "    enum pos V A -25;\n"1084            "} kern1;\n",1085        )1086    def test_unsupported_compiler_flags(self):1087        with self.assertLogs(level="WARNING") as logs:1088            fea = self.parse("CMAP_FORMAT 0 3 4")1089            self.assertEqual(fea, "")1090        self.assertEqual(1091            logs.output, ["WARNING:root:Unsupported setting ignored: CMAP_FORMAT"]1092        )1093    def test_sanitize_lookup_name(self):1094        fea = self.parse(1095            'DEF_LOOKUP "Test Lookup" PROCESS_BASE PROCESS_MARKS ALL '1096            "DIRECTION LTR IN_CONTEXT END_CONTEXT\n"1097            "AS_POSITION ADJUST_PAIR END_ADJUST END_POSITION\n"1098            'DEF_LOOKUP "Test-Lookup" PROCESS_BASE PROCESS_MARKS ALL '1099            "DIRECTION LTR IN_CONTEXT END_CONTEXT\n"1100            "AS_POSITION ADJUST_PAIR END_ADJUST END_POSITION\n"1101        )1102        self.assertEqual(1103            fea,1104            "\n# Lookups\n"1105            "lookup Test_Lookup {\n"1106            "    \n"1107            "} Test_Lookup;\n"1108            "\n"1109            "lookup Test_Lookup_ {\n"1110            "    \n"1111            "} Test_Lookup_;\n",1112        )1113    def test_sanitize_group_name(self):1114        fea = self.parse(1115            'DEF_GROUP "aaccented glyphs"\n'1116            'ENUM GLYPH "aacute" GLYPH "abreve" END_ENUM\n'1117            "END_GROUP\n"1118            'DEF_GROUP "aaccented+glyphs"\n'1119            'ENUM GLYPH "aacute" GLYPH "abreve" END_ENUM\n'1120            "END_GROUP\n"1121        )1122        self.assertEqual(1123            fea,1124            "# Glyph classes\n"1125            "@aaccented_glyphs = [aacute abreve];\n"1126            "@aaccented_glyphs_ = [aacute abreve];",1127        )1128    def test_cli_vtp(self):1129        from volto import main as volto1130        path, _ = os.path.split(__file__)1131        vtp = os.path.join(path, "Nutso.vtp")1132        fea = os.path.join(path, "Nutso.fea")1133        with NamedTemporaryFile(mode="r") as temp:1134            volto([vtp, temp.name])1135            res = temp.read()1136            with open(fea, mode="r") as f:1137                ref = f.read()1138            self.assertEqual(ref, res)1139    def test_group_order(self):1140        from volto import main as volto1141        path, _ = os.path.split(__file__)1142        vtp = os.path.join(path, "NamdhinggoSIL1006.vtp")1143        fea = os.path.join(path, "NamdhinggoSIL1006.fea")1144        with NamedTemporaryFile(mode="r") as temp:1145            volto([vtp, temp.name])1146            res = temp.read()1147            with open(fea, mode="r") as f:1148                ref = f.read()1149            self.assertEqual(ref, res)1150    def test_cli_ttf(self):1151        from volto import main as volto1152        path, _ = os.path.split(__file__)1153        ttf = os.path.join(path, "Nutso.ttf")1154        fea = os.path.join(path, "Nutso.fea")1155        with NamedTemporaryFile(mode="r") as temp:1156            volto([ttf, temp.name])1157            res = temp.read()1158            with open(fea, mode="r") as f:1159                ref = f.read()1160            self.assertEqual(ref, res)1161    def test_cli_ttf_no_TSIV(self):1162        from volto import main as volto1163        path, _ = os.path.split(__file__)1164        ttf = os.path.join(path, "Empty.ttf")1165        with NamedTemporaryFile() as temp:1166            self.assertEqual(1, volto([ttf, temp.name]))1167    def parse(self, text):1168        return VoltToFea(StringIO(text)).convert()1169if __name__ == "__main__":1170    import sys...sampler.py
Source:sampler.py  
1# -*- coding: utf-8 -*-2__copyright__ = "Copyright (c) 2018-2020 Opscidia"3__maintainer__ = "Loic Rakotoson"4__status__ = "Development"5__all__ = [6    "Sampler"7]8import numpy as np9import pandas as pd10import tensorflow as tf11import transformers as tr12from tensorflow.keras import backend as K13from tensorflow.python.framework import ops14import nltk15nltk.download('punkt')16from nltk.tokenize import sent_tokenize17AUTO = tf.data.experimental.AUTOTUNE18class Sampler:19    def __init__(self,20                 boolean_tokenizer,21                 abstractive_tokenizer=None,22                 extractive_tokenizer=None,23                 **kwargs):24        """25        Sampler handle formating, tokenizer and bridge between models26        :param *_tokenizer: model name, path or tokenizer model27        :param *_length: encoder, decoder and boolean tokenizers lengths28        :param window_size: number of sentence per window29        :param window_stride: moving step. High value is better for abstractive30            while low value yield more data for extractive31        """32        33        self.encoder_length = kwargs.get('encoder_length', 512)34        self.decoder_length = kwargs.get('decoder_length', 80)35        self.boolean_length = kwargs.get('boolean_length', 512)36        self.extract_length = kwargs.get('extract_length', 300)37        38        if extractive_tokenizer:39            self.window_size = kwargs.get('window_size', 5)40            self.window_stride = kwargs.get('window_stride', 2)41        else:42            self.window_size = kwargs.get('window_size', 10)43            self.window_stride = kwargs.get('window_stride', 10)44            45        self.get_tokenizers(boolean_tokenizer, abstractive_tokenizer,46                            extractive_tokenizer)47    def get_tokenizers(self, tok_bool, tok_abs, tok_ext):48        def assign(tok):49            if isinstance(tok, str):50                tok = tr.AutoTokenizer.from_pretrained(tok)51            return tok52        if not (tok_abs or tok_ext):53            print("""54                Only boolean tokenizer was initialised.55                Ignore this message if the boolean_pipeline will be used, otherwise56                use at least one of abstractive_tokenizer or extractive_tokenizer57            """)58        self.tok_bool = assign(tok_bool)59        self.tok_abs = assign(tok_abs)60        self.tok_ext = assign(tok_ext)61        62    def window(self, sentences):63        return [64            sentences[w:w + self.window_size]65            for w in range(0, len(sentences), self.window_stride)66        ]67    68    69    def to_context(self, question, text):70            return list(map(71                lambda x:f'question: {question} </s> context: {" ".join(x)} </s>',72                self.window(sent_tokenize(text))73            ))74    75    @staticmethod76    def tensor_window(a, size):77        shape = a.shape[:-1] + (a.shape[-1] - size + 1, size)78        strides = a.strides + (a. strides[-1],)79        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)80        81    @staticmethod82    def to_train_dataset(data):83        cols = {"question", "context", "answer"}84        def context(q, c):85            q = q.encode().decode("utf8")86            c = c.encode().decode("utf8")87            return f'question: {q} </s> context: {c} </s>'88        89        assert isinstance(data, pd.DataFrame), "only pandas.DataFrame supported"90        assert cols.issubset(data), f"data must contains {cols} columns"91        92        data['f_context'] = data.apply(93            lambda x: context(x.question, x.context),94            axis = 195        )96        97        return data98    99    100    @staticmethod101    def to_predict_dataset(data):102        cols = {"question", "context"}103        assert isinstance(104            data,105            (pd.DataFrame, ops.EagerTensor, tf.RaggedTensor, list)106        ), "only pandas.DataFrame, list of arrays, EagerTensor and RaggedTensor supported"107        if isinstance(data, pd.DataFrame):108            assert cols.issubset(data), f"data must contains {cols} columns"109    110    def to_train_abstractive_dataset(self, data, batch_size = 16, buffer = 1e4):111        """112        format pandas.DataFrame to create dataset for training AbstractiveQA113        :param data: dataframe with question, context and answer114        :param batch_size: batch size115        :param buffer: buffer for shuffle116        :return: tensorflow dataset of (n, encode_length) size117        :rtype: tf.Tensor118        """119        data = self.to_train_dataset(data)120        def answer(a):121            return f'{a.encode().decode("utf8")} </s>'122        data['f_answer'] = data.apply(lambda x: answer(x.answer), axis=1)123        cont = self.tok_abs.batch_encode_plus(124            data['f_context'].to_list(),125            truncation = True,126            return_tensors = 'tf',127            max_length = self.encoder_length,128            padding = "max_length")129        ans = self.tok_abs.batch_encode_plus(130            data['f_answer'].to_list(),131            truncation = True,132            return_tensors = 'tf',133            max_length = self.decoder_length,134            padding = "max_length")135        136        data = {'input_ids': cont['input_ids'], 'labels': ans['input_ids']}137        138        dataset = (139            tf.data.Dataset140            .from_tensor_slices(data)141            .shuffle(int(buffer))142            .batch(batch_size)143            .prefetch(AUTO)144            .repeat()145        )146        147        return dataset148    149    150    def to_train_boolean_dataset(self, data, labels = None, batch_size = 16, buffer = 1e4):151        """152        format pandas.DataFrame to create dataset for training BooleanQA153        :param data: dataframe with question, context and answer154        :param labels: dict of labels, by default {'no':0, 'yes':1, 'neutral':2}155        :param batch_size: batch size156        :param buffer: buffer for shuffle157        :return: tensorflow dataset of (n, boolean_length) size158        :rtype: tf.Tensor159        """160        161        data = self.to_train_dataset(data)162        if labels:163            assert isinstance(labels, dict), "Labels must be a dict"164            assert len(labels) == 3, "Labels must have no/yes/neutral keys"165        else:166            labels = {'no':0, 'yes':1, 'neutral':2}167        168        data['f_answer'] = data.apply(lambda x: labels.get(x.answer, 2), axis=1)169        170        cont = self.tok_bool.batch_encode_plus(171            data['f_context'].to_list(),172            truncation = True,173            return_tensors = 'tf',174            max_length = self.boolean_length,175            padding = "max_length")176        cont = K.constant(cont['input_ids'], dtype = tf.int32)177        ans = K.constant(data['f_answer'].to_list(), dtype = tf.int32)178        179        dataset = (180            tf.data.Dataset181            .from_tensor_slices((cont, ans))182            .shuffle(int(buffer))183            .batch(batch_size)184            .prefetch(AUTO)185            .repeat()186        )187        188        return dataset189    190    191    def to_train_extractive_dataset(self, data, batch_size = 16, buffer = 1e4):192        """193        format pandas.DataFrame to create dataset for training AbstractiveQA194        :param data: dataframe with question, context and answer195        :param batch_size: batch size196        :param buffer: buffer for shuffle197        :return: tensorflow dataset of (n, encode_length) size, start and end indices198        :rtype: tf.Tensor199        """200        data = self.to_train_dataset(data)201        202        def answer(a):203            return f' {a.encode().decode("utf8")} </s>'204        data['f_answer'] = data.apply(lambda x: answer(x.answer), axis=1)205        context = self.tok_ext.batch_encode_plus(206            data['f_context'].to_list(),207            truncation = True,208            max_length = self.extract_length209        )210        answer = self.tok_ext.batch_encode_plus(211            data['f_answer'].to_list(),212            return_attention_mask = False213        )['input_ids']214        answer = list(map(lambda x:x[1:-1], answer))215        216        indexes = list()217        for idx, (c, a) in enumerate(zip(context, answer)):218            indices = np.all(self.tensor_window(np.array(c), len(a)) == a, axis=1)219            start = np.mgrid[0:len(indices)][indices]220            if len(start):221                d = {222                    "f_context": data.loc[idx, 'f_context'],223                    "start": start[0],224                    "end": start[0] + len(a)225                }226                indexes.append(d)227        data = pd.DataFrame(indexes)228        229        context = self.tok_ext.batch_encode_plus(230            data['f_context'].to_list(),231            truncation = True,232            return_tensors = 'tf',233            max_length = self.extract_length,234            padding = "max_length",235            return_attention_mask = False236        )['input_ids']237        start = K.constant(data['start'].to_list(), dtype = tf.int32)238        end = K.constant(data['end'].to_list(), dtype = tf.int32)239        240        dataset = (241            tf.data.Dataset242            .from_tensor_slices((context, (start, end)))243            .shuffle(int(buffer))244            .batch(batch_size)245            .prefetch(AUTO)246            .repeat()247        )248        249        return dataset250    251    252    def to_predict_abstractive_dataset(self, data):253        """254        format pandas.DataFrame to create dataset for AbstractiveQA predict255        :param data: dataframe with question, context, title and id256        :return: context, id, title and last index of each context257        :rtype: tf.Tensor, list, list, list258        """259        260        self.to_predict_dataset(data)261        data = data.copy()262        data['context'] = data.apply(lambda x: self.to_context(x.question, x.context), axis=1)263        data['end_context'] = K.cumsum(data.apply(lambda x:len(x.context), axis=1)).numpy()264        context = self.tok_abs.batch_encode_plus(265            data.context.explode().to_list(),266            truncation = True,267            return_tensors = 'tf',268            max_length = self.encoder_length,269            padding = "max_length")['input_ids']270        271        _id, title, end_context = list(272            data[['id', 'title', 'end_context']]273            .to_dict('list').values())274        del data275        276        return context, _id, title, end_context277    278    279    def to_predict_boolean_dataset(self, data, *args, **kwargs):280        """281        format pandas.DataFrame to create dataset for BooleanQA predict282        :param data: dataframe with question, context283        :param from_abs: bool, if data is abstractiveQA output284        :param from_ext: bool, if data is extractiveQA output285        :param question: if from_abs, question string286        :param end_context: if from_abs, last index of each context287        :param return_selected: if from_ext, return extracted sequences else data288        :return: context, and last index of each context289        :rtype: tf.Tensor, list (,list)290        """291        question = kwargs.get('question')292        end_context = kwargs.get('end_context')293        from_abs = kwargs.get('from_abs')294        from_ext = kwargs.get('from_ext')295        return_selected = kwargs.get('return_selected')296        297        self.to_predict_dataset(data)298        299        if from_abs:300            assert not isinstance(data, pd.DataFrame), "abs output should be Tensor"301            assert question, "question argument needed if from_abs is True"302            assert end_context, "end_context list needed if from_abs"303            selected = data.numpy().tolist()304            context_ids = zip([0] + end_context[:-1], end_context)305            decoded = self.tok_abs.batch_decode(data)306            context = self.tok_bool.batch_encode_plus(307                [f'{question} </s> '+' '.join(decoded[start:end+1])308                for start, end in context_ids],309                truncation = True,310                return_tensors = 'tf',311                max_length = self.boolean_length,312                padding = "max_length")['input_ids']313            end_context = list(range(1, len(context)))314        315        elif from_ext:316            assert isinstance(data, list), "ext output should be list of 2 array"317            assert len(args) > 0, "data must be list of 2 array, and context is required"318            assert question, "question argument needed if from_ext is True"319            assert end_context, "end_context list needed if from_ext"320            context = args[0]321            idx = K.stack(tuple(map(lambda x: K.argmax(x, axis=1), data))).numpy().T322            selected = [x[start:end].numpy().tolist() for x, (start, end) in zip(context, idx)]323            324            decoded = self.tok_ext.batch_decode(selected)325            context_ids = zip([0] + end_context[:-1], end_context)326            context = self.tok_bool.batch_encode_plus(327                [f'{question} </s> '+' '.join(decoded[start:end+1]).split('</s>')[-1]328                for start, end in context_ids],329                truncation = True,330                return_tensors = 'tf',331                max_length = self.boolean_length,332                padding = "max_length")['input_ids']333            end_context = list(range(1, len(context)))334            335        else:336            assert isinstance(data, pd.DataFrame), "DataFrame only supported if not from_abs"337            selected = None338            data = data.copy()339            data['context'] = data.apply(lambda x: self.to_context(x.question, x.context), axis=1)340            data['end_context'] = K.cumsum(data.apply(lambda x:len(x.context), axis=1)).numpy()341            context = self.tok_bool.batch_encode_plus(342                data.context.explode().to_list(),343                truncation = True,344                return_tensors = 'tf',345                max_length = self.encoder_length,346                padding = "max_length")['input_ids']347            end_context = data.end_context.to_list()348        349        if return_selected: return context, end_context, selected350        else: return context, end_context351    352    353    def to_predict_extractive_dataset(self, data):354        """355        format pandas.DataFrame to create dataset for ExtractiveQA predict356        :param data: dataframe with question, context, title and id357        :return: context, id, title and last index of each context358        :rtype: tf.Tensor, list, list, list359        """360        361        self.to_predict_dataset(data)362        data = data.copy()363        data['context'] = data.apply(lambda x: self.to_context(x.question, x.context), axis=1)364        data['end_context'] = K.cumsum(data.apply(lambda x:len(x.context), axis=1)).numpy()365        context = self.tok_ext.batch_encode_plus(366            data.context.explode().to_list(),367            truncation = True,368            return_tensors = 'tf',369            max_length = self.extract_length,370            padding = "max_length")['input_ids']371        372        _id, title, end_context = list(373            data[['id', 'title', 'end_context']]374            .to_dict('list').values())375        del data376        ...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
