Best Python code snippet using localstack_python
test_regex.py
Source:test_regex.py  
1import regex2import string3from weakref import proxy4import unittest5import copy6import pickle7from test.support import run_unittest8import sys9# String subclasses for issue 18468.10class StrSubclass(str):11    def __getitem__(self, index):12        return StrSubclass(super().__getitem__(index))13class BytesSubclass(bytes):14    def __getitem__(self, index):15        return BytesSubclass(super().__getitem__(index))16class RegexTests(unittest.TestCase):17    PATTERN_CLASS = "<class '_regex.Pattern'>"18    FLAGS_WITH_COMPILED_PAT = "cannot process flags argument with a compiled pattern"19    INVALID_GROUP_REF = "invalid group reference"20    MISSING_GT = "missing >"21    BAD_GROUP_NAME = "bad character in group name"22    MISSING_GROUP_NAME = "missing group name"23    MISSING_LT = "missing <"24    UNKNOWN_GROUP_I = "unknown group"25    UNKNOWN_GROUP = "unknown group"26    BAD_ESCAPE = r"bad escape \(end of pattern\)"27    BAD_OCTAL_ESCAPE = r"bad escape \\"28    BAD_SET = "unterminated character set"29    STR_PAT_ON_BYTES = "cannot use a string pattern on a bytes-like object"30    BYTES_PAT_ON_STR = "cannot use a bytes pattern on a string-like object"31    STR_PAT_BYTES_TEMPL = "expected str instance, bytes found"32    BYTES_PAT_STR_TEMPL = "expected a bytes-like object, str found"33    BYTES_PAT_UNI_FLAG = "cannot use UNICODE flag with a bytes pattern"34    MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible"35    MISSING_RPAREN = "missing \\)"36    TRAILING_CHARS = "unbalanced parenthesis"37    BAD_CHAR_RANGE = "bad character range"38    NOTHING_TO_REPEAT = "nothing to repeat"39    MULTIPLE_REPEAT = "multiple repeat"40    OPEN_GROUP = "cannot refer to an open group"41    DUPLICATE_GROUP = "duplicate group"42    CANT_TURN_OFF = "bad inline flags: cannot turn flags off"43    UNDEF_CHAR_NAME = "undefined character name"44    def assertTypedEqual(self, actual, expect, msg=None):45        self.assertEqual(actual, expect, msg)46        def recurse(actual, expect):47            if isinstance(expect, (tuple, list)):48                for x, y in zip(actual, expect):49                    recurse(x, y)50            else:51                self.assertIs(type(actual), type(expect), msg)52        recurse(actual, expect)53    def test_weakref(self):54        s = 'QabbbcR'55        x = regex.compile('ab+c')56        y = proxy(x)57        if x.findall('QabbbcR') != y.findall('QabbbcR'):58            self.fail()59    def test_search_star_plus(self):60        self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0))61        self.assertEqual(regex.search('x*', 'axx').span(), (0, 0))62        self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3))63        self.assertEqual(regex.search('x+', 'axx').span(), (1, 3))64        self.assertEqual(regex.search('x', 'aaa'), None)65        self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0))66        self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0))67        self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3))68        self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3))69        self.assertEqual(regex.match('a+', 'xxx'), None)70    def bump_num(self, matchobj):71        int_value = int(matchobj[0])72        return str(int_value + 1)73    def test_basic_regex_sub(self):74        self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')75        self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),76          '9.3 -3 24x100y')77        self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),78          '9.3 -3 23x99y')79        self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n")80        self.assertEqual(regex.sub('.', r"\n", 'x'), "\n")81        self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')82        self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')83        self.assertEqual(regex.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'),84          'xxxx')85        self.assertEqual(regex.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')86        self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D',87          'a'), "\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D")88        self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a")89        self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10)90          + chr(11) + chr(13) + chr(12) + chr(7))91        self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest')92        self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n")93        self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\n")94        self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\n")95        self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}",96          "x"), "A")97        self.assertEqual(regex.sub(br"x", br"\x0A", b"x"), b"\n")98        self.assertEqual(regex.sub(br"x", br"\u000A", b"x"), b"\\u000A")99        self.assertEqual(regex.sub(br"x", br"\U0000000A", b"x"),100          b"\\U0000000A")101        self.assertEqual(regex.sub(br"x", br"\N{LATIN CAPITAL LETTER A}",102          b"x"), b"\\N{LATIN CAPITAL LETTER A}")103    def test_bug_449964(self):104        # Fails for group followed by other escape.105        self.assertEqual(regex.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),106          "xx\bxx\b")107    def test_bug_449000(self):108        # Test for sub() on escaped characters.109        self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),110          "abc\ndef\n")111        self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),112          "abc\ndef\n")113        self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),114          "abc\ndef\n")115        self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'),116          "abc\ndef\n")117    def test_bug_1661(self):118        # Verify that flags do not get silently ignored with compiled patterns119        pattern = regex.compile('.')120        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,121          lambda: regex.match(pattern, 'A', regex.I))122        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,123          lambda: regex.search(pattern, 'A', regex.I))124        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,125          lambda: regex.findall(pattern, 'A', regex.I))126        self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,127          lambda: regex.compile(pattern, regex.I))128    def test_bug_3629(self):129        # A regex that triggered a bug in the sre-code validator130        self.assertEqual(repr(type(regex.compile("(?P<quote>)(?(quote))"))),131          self.PATTERN_CLASS)132    def test_sub_template_numeric_escape(self):133        # Bug 776311 and friends.134        self.assertEqual(regex.sub('x', r'\0', 'x'), "\0")135        self.assertEqual(regex.sub('x', r'\000', 'x'), "\000")136        self.assertEqual(regex.sub('x', r'\001', 'x'), "\001")137        self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8")138        self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9")139        self.assertEqual(regex.sub('x', r'\111', 'x'), "\111")140        self.assertEqual(regex.sub('x', r'\117', 'x'), "\117")141        self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111")142        self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1")143        self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00')144        self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07')145        self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8")146        self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9")147        self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a")148        self.assertEqual(regex.sub('x', r'\400', 'x'), "\u0100")149        self.assertEqual(regex.sub('x', r'\777', 'x'), "\u01FF")150        self.assertEqual(regex.sub(b'x', br'\400', b'x'), b"\x00")151        self.assertEqual(regex.sub(b'x', br'\777', b'x'), b"\xFF")152        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:153          regex.sub('x', r'\1', 'x'))154        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:155          regex.sub('x', r'\8', 'x'))156        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:157          regex.sub('x', r'\9', 'x'))158        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:159          regex.sub('x', r'\11', 'x'))160        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:161          regex.sub('x', r'\18', 'x'))162        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:163          regex.sub('x', r'\1a', 'x'))164        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:165          regex.sub('x', r'\90', 'x'))166        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:167          regex.sub('x', r'\99', 'x'))168        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:169          regex.sub('x', r'\118', 'x')) # r'\11' + '8'170        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:171          regex.sub('x', r'\11a', 'x'))172        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:173          regex.sub('x', r'\181', 'x')) # r'\18' + '1'174        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:175          regex.sub('x', r'\800', 'x')) # r'\80' + '0'176        # In Python 2.3 (etc), these loop endlessly in sre_parser.py.177        self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'),178          'x')179        self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),180          'xz8')181        self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),182          'xza')183    def test_qualified_re_sub(self):184        self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb')185        self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa')186    def test_bug_114660(self):187        self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),188          'hello there')189    def test_bug_462270(self):190        # Test for empty sub() behaviour, see SF bug #462270191        self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-')192        self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-')193        self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d')194    def test_bug_14462(self):195        # chr(255) is a valid identifier in Python 3.196        group_name = '\xFF'197        self.assertEqual(regex.search(r'(?P<' + group_name + '>a)',198          'abc').group(group_name), 'a')199    def test_symbolic_refs(self):200        self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda:201          regex.sub('(?P<a>x)', r'\g<a', 'xx'))202        self.assertRaisesRegex(regex.error, self.MISSING_GROUP_NAME, lambda:203          regex.sub('(?P<a>x)', r'\g<', 'xx'))204        self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda:205          regex.sub('(?P<a>x)', r'\g', 'xx'))206        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:207          regex.sub('(?P<a>x)', r'\g<a a>', 'xx'))208        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:209          regex.sub('(?P<a>x)', r'\g<1a1>', 'xx'))210        self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda:211          regex.sub('(?P<a>x)', r'\g<ab>', 'xx'))212        # The new behaviour of unmatched but valid groups is to treat them like213        # empty matches in the replacement template, like in Perl.214        self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')215        self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')216        # The old behaviour was to raise it as an IndexError.217        self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:218          regex.sub('(?P<a>x)', r'\g<-1>', 'xx'))219    def test_re_subn(self):220        self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))221        self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))222        self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0))223        self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4))224        self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2))225    def test_re_split(self):226        self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])227        self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])228        self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', ':',229          'b', '::', 'c'])230        self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])231        self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', ':',232          'b', ':', 'c'])233        self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a',234          ':b::', 'c'])235        self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':',236          'a', None, ':', '', 'b', None, '', None, '::', 'c'])237        self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '',238          '', 'c'])239        self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c'])240        self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a',241          'b', 'c'])242        self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', ''])243        self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c',244          'b', 'a', ''])245        self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a',246          'x', None, 'b', 'x', None, 'c'])247        self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")],248          ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c'])249        self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None,250          'b', 'x', None, 'a', 'x', None, ''])251        self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")],252          ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, ''])253        self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b',254          ' ', 'c', ''])255        self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ',256          'c'])257        self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c',258          ''])259    def test_qualified_re_split(self):260        self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])261        self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])262        self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':',263          'b::c'])264        self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', ':',265          'b::c'])266    def test_re_findall(self):267        self.assertEqual(regex.findall(":+", "abc"), [])268        self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::'])269        self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::',270          ':::'])271        self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''),272          (':', ':'), (':', '::')])273        self.assertEqual(regex.findall(r"\((?P<test>.{0,5}?TEST)\)",274          "(MY TEST)"), ["MY TEST"])275        self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?TEST)\)",276          "(MY TEST)"), ["MY TEST"])277        self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?T)\)", "(MY T)"),278          ["MY T"])279        self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n  S"), ['  S'])280        self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n  S"), ['\n  S'])281        self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n   S"), ['   S'])282        self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF",283          "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')])284        self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End",285          "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')])286    def test_bug_117612(self):287        self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b',288          'b'), ('a', '')])289    def test_re_match(self):290        self.assertEqual(regex.match('a', 'a')[:], ('a',))291        self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a'))292        self.assertEqual(regex.match(r'(a)', 'a')[0], 'a')293        self.assertEqual(regex.match(r'(a)', 'a')[1], 'a')294        self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a'))295        pat = regex.compile('((a)|(b))(c)?')296        self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None))297        self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None))298        self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c'))299        self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))300        self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))301        # A single group.302        m = regex.match('(a)', 'a')303        self.assertEqual(m.group(), 'a')304        self.assertEqual(m.group(0), 'a')305        self.assertEqual(m.group(1), 'a')306        self.assertEqual(m.group(1, 1), ('a', 'a'))307        pat = regex.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')308        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))309        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b',310          None))311        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))312    def test_re_groupref_exists(self):313        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:],314          ('(a)', '(', 'a'))315        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a',316          None, 'a'))317        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None)318        self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None)319        self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab',320          'a', 'b'))321        self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd',322          None, 'd'))323        self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd',324          None, 'd'))325        self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a',326          'a', ''))327        # Tests for bug #1177831: exercise groups other than the first group.328        p = regex.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')329        self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c'))330        self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd'))331        self.assertEqual(p.match('abd'), None)332        self.assertEqual(p.match('ac'), None)333    def test_re_groupref(self):334        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|',335          '|', 'a'))336        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a',337          None, 'a'))338        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None)339        self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None)340        self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a',341          'a'))342        self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None,343          None))344        self.assertEqual(regex.findall("(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\\3(\ |;)+(.{1,80}?)\\1",345          "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST',346          ' LEST', ' ', '123 ')])347    def test_groupdict(self):348        self.assertEqual(regex.match('(?P<first>first) (?P<second>second)',349          'first second').groupdict(), {'first': 'first', 'second': 'second'})350    def test_expand(self):351        self.assertEqual(regex.match("(?P<first>first) (?P<second>second)",352          "first second").expand(r"\2 \1 \g<second> \g<first>"),353          'second first second first')354    def test_repeat_minmax(self):355        self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None)356        self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None)357        self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None)358        self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None)359        self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c')360        self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c')361        self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c')362        self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')363        self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c')364        self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c')365        self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c')366        self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')367        self.assertEqual(regex.match("^x{1}$", "xxx"), None)368        self.assertEqual(regex.match("^x{1}?$", "xxx"), None)369        self.assertEqual(regex.match("^x{1,2}$", "xxx"), None)370        self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None)371        self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x')372        self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x')373        self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x')374        self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '')375        self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True)376        self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True)377        self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True)378        self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)379        self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True)380        self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True)381        self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True)382        self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)383        self.assertEqual(regex.match("^x{}$", "xxx"), None)384        self.assertEqual(bool(regex.match("^x{}$", "x{}")), True)385    def test_getattr(self):386        self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)')387        self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.I | regex.U |388          regex.DEFAULT_VERSION)389        self.assertEqual(regex.compile(b"(?i)(a)(b)").flags, regex.A | regex.I390          | regex.DEFAULT_VERSION)391        self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2)392        self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {})393        self.assertEqual(regex.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,394          {'first': 1, 'other': 2})395        self.assertEqual(regex.match("(a)", "a").pos, 0)396        self.assertEqual(regex.match("(a)", "a").endpos, 1)397        self.assertEqual(regex.search("b(c)", "abcdef").pos, 0)398        self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6)399        self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3))400        self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3))401        self.assertEqual(regex.match("(a)", "a").string, 'a')402        self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1)))403        self.assertEqual(repr(type(regex.match("(a)", "a").re)),404          self.PATTERN_CLASS)405        # Issue 14260.406        p = regex.compile(r'abc(?P<n>def)')407        p.groupindex["n"] = 0408        self.assertEqual(p.groupindex["n"], 1)409    def test_special_escapes(self):410        self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx')411        self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx')412        self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx",413          regex.LOCALE)[1], b'bx')414        self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd",415          regex.LOCALE)[1], b'bx')416        self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx",417          regex.UNICODE)[1], 'bx')418        self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd",419          regex.UNICODE)[1], 'bx')420        self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc')421        self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc')422        self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None)423        self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx")[1],424          b'bx')425        self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd")[1],426          b'bx')427        self.assertEqual(regex.search(br"^abc$", b"\nabc\n", regex.M)[0],428          b'abc')429        self.assertEqual(regex.search(br"^\Aabc\Z$", b"abc", regex.M)[0],430          b'abc')431        self.assertEqual(regex.search(br"^\Aabc\Z$", b"\nabc\n", regex.M),432          None)433        self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a')434        self.assertEqual(regex.search(br"\d\D\w\W\s\S", b"1aa! a",435          regex.LOCALE)[0], b'1aa! a')436        self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a",437          regex.UNICODE)[0], '1aa! a')438    def test_bigcharset(self):439        self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222")[1],440          '\u2222')441        self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222",442          regex.UNICODE)[1], '\u2222')443        self.assertEqual("".join(regex.findall(".",444          "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),445          'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')446        self.assertEqual("".join(regex.findall(r"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]",447          "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),448          'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')449        self.assertEqual("".join(regex.findall(r"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117",450          "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),451          'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')452    def test_anyall(self):453        self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb")454        self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0],455          "a\n\nb")456    def test_non_consuming(self):457        self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a')458        self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a')459        self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a')460        self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a')461        self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a')462        self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a')463        self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a')464        self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a')465        self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a')466        self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a')467        self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a')468    def test_ignore_case(self):469        self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC')470        self.assertEqual(regex.match(b"abc", b"ABC", regex.I)[0], b'ABC')471        self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1],472          'a bb')473        self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b')474        self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1],475          'a bb')476        self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a')477        self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1],478          'a aa')479        self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1],480          'a a')481        self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1],482          'a aa')483        # Issue 3511.484        self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1))485        self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1))486        self.assertEqual(bool(regex.match(r"(?i)nao", "nAo")), True)487        self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "n\xC3o")), True)488        self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "N\xC3O")), True)489        self.assertEqual(bool(regex.match(r"(?i)s", "\u017F")), True)490    def test_case_folding(self):491        self.assertEqual(regex.search(r"(?fi)ss", "SS").span(), (0, 2))492        self.assertEqual(regex.search(r"(?fi)SS", "ss").span(), (0, 2))493        self.assertEqual(regex.search(r"(?fi)SS",494          "\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1))495        self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LETTER SHARP S}",496          "SS").span(), (0, 2))497        self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",498          "ST").span(), (0, 2))499        self.assertEqual(regex.search(r"(?fi)ST",500          "\N{LATIN SMALL LIGATURE ST}").span(), (0, 1))501        self.assertEqual(regex.search(r"(?fi)ST",502          "\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1))503        self.assertEqual(regex.search(r"(?fi)SST",504          "\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2))505        self.assertEqual(regex.search(r"(?fi)SST",506          "s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2))507        self.assertEqual(regex.search(r"(?fi)SST",508          "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))509        self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",510          "SST").span(), (1, 3))511        self.assertEqual(regex.search(r"(?fi)SST",512          "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))513        self.assertEqual(regex.search(r"(?fi)FFI",514          "\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1))515        self.assertEqual(regex.search(r"(?fi)FFI",516          "\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2))517        self.assertEqual(regex.search(r"(?fi)FFI",518          "f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2))519        self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FFI}",520          "FFI").span(), (0, 3))521        self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FF}i",522          "FFI").span(), (0, 3))523        self.assertEqual(regex.search(r"(?fi)f\N{LATIN SMALL LIGATURE FI}",524          "FFI").span(), (0, 3))525        sigma = "\u03A3\u03C3\u03C2"526        for ch1 in sigma:527            for ch2 in sigma:528                if not regex.match(r"(?fi)" + ch1, ch2):529                    self.fail()530        self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),531          True)532        self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB01\uFB00")),533          True)534        self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),535          True)536        self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB01\uFB00")),537          True)538        self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),539          True)540        self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",541          "\uFB00\uFB01")), True)542        self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),543          True)544        self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),545          True)546        self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),547          True)548        self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",549          "\uFB00\uFB01")), True)550        self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),551          True)552        self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),553          True)554        self.assertEqual(regex.findall(r"(?iV0)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",555          "word word2 word word3 word word234 word23 word"), ["word234",556          "word23"])557        self.assertEqual(regex.findall(r"(?iV1)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",558          "word word2 word word3 word word234 word23 word"), ["word234",559          "word23"])560        self.assertEqual(regex.search(r"(?fi)a\N{LATIN SMALL LIGATURE FFI}ne",561          "  affine  ").span(), (2, 8))562        self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|x)ne",563           "  affine  ").span(), (2, 8))564        self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|xy)ne",565           "  affine  ").span(), (2, 8))566        self.assertEqual(regex.search(r"(?fi)a\L<options>ne", "affine",567          options=["\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6))568        self.assertEqual(regex.search(r"(?fi)a\L<options>ne",569          "a\N{LATIN SMALL LIGATURE FFI}ne", options=["ffi"]).span(), (0, 4))570    def test_category(self):571        self.assertEqual(regex.match(r"(\s)", " ")[1], ' ')572    def test_not_literal(self):573        self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b')574        self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb')575    def test_search_coverage(self):576        self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b')577        self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ')578    def test_re_escape(self):579        p = ""580        self.assertEqual(regex.escape(p), p)581        for i in range(0, 256):582            p += chr(i)583            self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))),584              True)585            self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(),586              (0, 1))587        pat = regex.compile(regex.escape(p))588        self.assertEqual(pat.match(p).span(), (0, 256))589    def test_re_escape_byte(self):590        p = b""591        self.assertEqual(regex.escape(p), p)592        for i in range(0, 256):593            b = bytes([i])594            p += b595            self.assertEqual(bool(regex.match(regex.escape(b), b)), True)596            self.assertEqual(regex.match(regex.escape(b), b).span(), (0, 1))597        pat = regex.compile(regex.escape(p))598        self.assertEqual(pat.match(p).span(), (0, 256))599    def test_constants(self):600        if regex.I != regex.IGNORECASE:601            self.fail()602        if regex.L != regex.LOCALE:603            self.fail()604        if regex.M != regex.MULTILINE:605            self.fail()606        if regex.S != regex.DOTALL:607            self.fail()608        if regex.X != regex.VERBOSE:609            self.fail()610    def test_flags(self):611        for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]:612            self.assertEqual(repr(type(regex.compile('^pattern$', flag))),613              self.PATTERN_CLASS)614    def test_sre_character_literals(self):615        for i in [0, 8, 16, 32, 64, 127, 128, 255]:616            self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True)617            self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")),618              True)619            self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")),620              True)621            self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True)622            self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")),623              True)624            self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")),625              True)626        self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:627          regex.match(r"\911", ""))628    def test_sre_character_class_literals(self):629        for i in [0, 8, 16, 32, 64, 127, 128, 255]:630            self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True)631            self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True)632            self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True)633            self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True)634            self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True)635            self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True)636        self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda:637          regex.match(r"[\911]", ""))638    def test_bug_113254(self):639        self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1)640        self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1)641        self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1))642    def test_bug_527371(self):643        # Bug described in patches 527371/672491.644        self.assertEqual(regex.match(r'(a)?a','a').lastindex, None)645        self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1)646        self.assertEqual(regex.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup,647          'a')648        self.assertEqual(regex.match("(?P<a>a(b))", "ab").lastgroup, 'a')649        self.assertEqual(regex.match("((a))", "a").lastindex, 1)650    def test_bug_545855(self):651        # Bug 545855 -- This pattern failed to cause a compile error as it652        # should, instead provoking a TypeError.653        self.assertRaisesRegex(regex.error, self.BAD_SET, lambda:654          regex.compile('foo[a-'))655    def test_bug_418626(self):656        # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code657        # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of658        # pattern '*?' on a long string.659        self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0),660          20001)661        self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' +662          'cde').end(0), 20003)663        self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0),664          60001)665        # Non-simple '*?' still used to hit the recursion limit, before the666        # non-recursive scheme was implemented.667        self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0),668          20001)669    def test_bug_612074(self):670        pat = "[" + regex.escape("\u2039") + "]"671        self.assertEqual(regex.compile(pat) and 1, 1)672    def test_stack_overflow(self):673        # Nasty cases that used to overflow the straightforward recursive674        # implementation of repeated groups.675        self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x')676        self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x')677        self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x')678    def test_scanner(self):679        def s_ident(scanner, token): return token680        def s_operator(scanner, token): return "op%s" % token681        def s_float(scanner, token): return float(token)682        def s_int(scanner, token): return int(token)683        scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*",684          s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+",685            None), ])686        self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)),687          self.PATTERN_CLASS)688        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum',689          'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))690    def test_bug_448951(self):691        # Bug 448951 (similar to 429357, but with single char match).692        # (Also test greedy matches.)693        for op in '', '?', '*':694            self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z',695              None, None))696            self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z',697              'a:', 'a'))698    def test_bug_725106(self):699        # Capturing groups in alternatives in repeats.700        self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a'))701        self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c',702          'b'))703        self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b',704          None))705        self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b',706          None))707        self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b',708          'a'))709        self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd',710          'c', 'b'))711        self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b',712          None))713        self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b',714          None))715    def test_bug_725149(self):716        # Mark_stack_base restoring before restoring marks.717        self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a',718          None))719        self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a',720          None, None))721    def test_bug_764548(self):722        # Bug 764548, regex.compile() barfs on str/unicode subclasses.723        class my_unicode(str): pass724        pat = regex.compile(my_unicode("abc"))725        self.assertEqual(pat.match("xyz"), None)726    def test_finditer(self):727        it = regex.finditer(r":+", "a:b::c:::d")728        self.assertEqual([item[0] for item in it], [':', '::', ':::'])729    def test_bug_926075(self):730        if regex.compile('bug_926075') is regex.compile(b'bug_926075'):731            self.fail()732    def test_bug_931848(self):733        pattern = "[\u002E\u3002\uFF0E\uFF61]"734        self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b',735          'c'])736    def test_bug_581080(self):737        it = regex.finditer(r"\s", "a b")738        self.assertEqual(next(it).span(), (1, 2))739        self.assertRaises(StopIteration, lambda: next(it))740        scanner = regex.compile(r"\s").scanner("a b")741        self.assertEqual(scanner.search().span(), (1, 2))742        self.assertEqual(scanner.search(), None)743    def test_bug_817234(self):744        it = regex.finditer(r".*", "asdf")745        self.assertEqual(next(it).span(), (0, 4))746        self.assertEqual(next(it).span(), (4, 4))747        self.assertRaises(StopIteration, lambda: next(it))748    def test_empty_array(self):749        # SF buf 1647541.750        import array751        for typecode in 'bBuhHiIlLfd':752            a = array.array(typecode)753            self.assertEqual(regex.compile(b"bla").match(a), None)754            self.assertEqual(regex.compile(b"").match(a)[1 : ], ())755    def test_inline_flags(self):756        # Bug #1700.757        upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Below758        lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Below759        p = regex.compile(upper_char, regex.I | regex.U)760        self.assertEqual(bool(p.match(lower_char)), True)761        p = regex.compile(lower_char, regex.I | regex.U)762        self.assertEqual(bool(p.match(upper_char)), True)763        p = regex.compile('(?i)' + upper_char, regex.U)764        self.assertEqual(bool(p.match(lower_char)), True)765        p = regex.compile('(?i)' + lower_char, regex.U)766        self.assertEqual(bool(p.match(upper_char)), True)767        p = regex.compile('(?iu)' + upper_char)768        self.assertEqual(bool(p.match(lower_char)), True)769        p = regex.compile('(?iu)' + lower_char)770        self.assertEqual(bool(p.match(upper_char)), True)771        self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)772        self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)773        self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)774        self.assertEqual(regex.match(r"a(?iV1)", "A"), None)775    def test_dollar_matches_twice(self):776        # $ matches the end of string, and just before the terminating \n.777        pattern = regex.compile('$')778        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')779        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')780        self.assertEqual(pattern.sub('#', '\n'), '#\n#')781        pattern = regex.compile('$', regex.MULTILINE)782        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#')783        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')784        self.assertEqual(pattern.sub('#', '\n'), '#\n#')785    def test_bytes_str_mixing(self):786        # Mixing str and bytes is disallowed.787        pat = regex.compile('.')788        bpat = regex.compile(b'.')789        self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:790          pat.match(b'b'))791        self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:792          bpat.match('b'))793        self.assertRaisesRegex(TypeError, self.STR_PAT_BYTES_TEMPL, lambda:794          pat.sub(b'b', 'c'))795        self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:796          pat.sub('b', b'c'))797        self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:798          pat.sub(b'b', b'c'))799        self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:800          bpat.sub(b'b', 'c'))801        self.assertRaisesRegex(TypeError, self.BYTES_PAT_STR_TEMPL, lambda:802          bpat.sub('b', b'c'))803        self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:804          bpat.sub('b', 'c'))805        self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:806          regex.compile(b'\w', regex.UNICODE))807        self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:808          regex.compile(b'(?u)\w'))809        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:810          regex.compile('\w', regex.UNICODE | regex.ASCII))811        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:812          regex.compile('(?u)\w', regex.ASCII))813        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:814          regex.compile('(?a)\w', regex.UNICODE))815        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:816          regex.compile('(?au)\w'))817    def test_ascii_and_unicode_flag(self):818        # String patterns.819        for flags in (0, regex.UNICODE):820            pat = regex.compile('\xc0', flags | regex.IGNORECASE)821            self.assertEqual(bool(pat.match('\xe0')), True)822            pat = regex.compile('\w', flags)823            self.assertEqual(bool(pat.match('\xe0')), True)824        pat = regex.compile('\xc0', regex.ASCII | regex.IGNORECASE)825        self.assertEqual(pat.match('\xe0'), None)826        pat = regex.compile('(?a)\xc0', regex.IGNORECASE)827        self.assertEqual(pat.match('\xe0'), None)828        pat = regex.compile('\w', regex.ASCII)829        self.assertEqual(pat.match('\xe0'), None)830        pat = regex.compile('(?a)\w')831        self.assertEqual(pat.match('\xe0'), None)832        # Bytes patterns.833        for flags in (0, regex.ASCII):834            pat = regex.compile(b'\xc0', flags | regex.IGNORECASE)835            self.assertEqual(pat.match(b'\xe0'), None)836            pat = regex.compile(b'\w')837            self.assertEqual(pat.match(b'\xe0'), None)838        self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:839          regex.compile('(?au)\w'))840    def test_subscripting_match(self):841        m = regex.match(r'(?<a>\w)', 'xy')842        if not m:843            self.fail("Failed: expected match but returned None")844        elif not m or m[0] != m.group(0) or m[1] != m.group(1):845            self.fail("Failed")846        if not m:847            self.fail("Failed: expected match but returned None")848        elif m[:] != ('x', 'x'):849            self.fail("Failed: expected \"('x', 'x')\" but got {} instead".format(ascii(m[:])))850    def test_new_named_groups(self):851        m0 = regex.match(r'(?P<a>\w)', 'x')852        m1 = regex.match(r'(?<a>\w)', 'x')853        if not (m0 and m1 and m0[:] == m1[:]):854            self.fail("Failed")855    def test_properties(self):856        self.assertEqual(regex.match(b'(?ai)\xC0', b'\xE0'), None)857        self.assertEqual(regex.match(br'(?ai)\xC0', b'\xE0'), None)858        self.assertEqual(regex.match(br'(?a)\w', b'\xE0'), None)859        self.assertEqual(bool(regex.match(r'\w', '\xE0')), True)860        # Dropped the following test. It's not possible to determine what the861        # correct result should be in the general case.862#        self.assertEqual(bool(regex.match(br'(?L)\w', b'\xE0')),863#          b'\xE0'.isalnum())864        self.assertEqual(bool(regex.match(br'(?L)\d', b'0')), True)865        self.assertEqual(bool(regex.match(br'(?L)\s', b' ')), True)866        self.assertEqual(bool(regex.match(br'(?L)\w', b'a')), True)867        self.assertEqual(regex.match(br'(?L)\d', b'?'), None)868        self.assertEqual(regex.match(br'(?L)\s', b'?'), None)869        self.assertEqual(regex.match(br'(?L)\w', b'?'), None)870        self.assertEqual(regex.match(br'(?L)\D', b'0'), None)871        self.assertEqual(regex.match(br'(?L)\S', b' '), None)872        self.assertEqual(regex.match(br'(?L)\W', b'a'), None)873        self.assertEqual(bool(regex.match(br'(?L)\D', b'?')), True)874        self.assertEqual(bool(regex.match(br'(?L)\S', b'?')), True)875        self.assertEqual(bool(regex.match(br'(?L)\W', b'?')), True)876        self.assertEqual(bool(regex.match(r'\p{Cyrillic}',877          '\N{CYRILLIC CAPITAL LETTER A}')), True)878        self.assertEqual(bool(regex.match(r'(?i)\p{Cyrillic}',879          '\N{CYRILLIC CAPITAL LETTER A}')), True)880        self.assertEqual(bool(regex.match(r'\p{IsCyrillic}',881          '\N{CYRILLIC CAPITAL LETTER A}')), True)882        self.assertEqual(bool(regex.match(r'\p{Script=Cyrillic}',883          '\N{CYRILLIC CAPITAL LETTER A}')), True)884        self.assertEqual(bool(regex.match(r'\p{InCyrillic}',885          '\N{CYRILLIC CAPITAL LETTER A}')), True)886        self.assertEqual(bool(regex.match(r'\p{Block=Cyrillic}',887          '\N{CYRILLIC CAPITAL LETTER A}')), True)888        self.assertEqual(bool(regex.match(r'[[:Cyrillic:]]',889          '\N{CYRILLIC CAPITAL LETTER A}')), True)890        self.assertEqual(bool(regex.match(r'[[:IsCyrillic:]]',891          '\N{CYRILLIC CAPITAL LETTER A}')), True)892        self.assertEqual(bool(regex.match(r'[[:Script=Cyrillic:]]',893          '\N{CYRILLIC CAPITAL LETTER A}')), True)894        self.assertEqual(bool(regex.match(r'[[:InCyrillic:]]',895          '\N{CYRILLIC CAPITAL LETTER A}')), True)896        self.assertEqual(bool(regex.match(r'[[:Block=Cyrillic:]]',897          '\N{CYRILLIC CAPITAL LETTER A}')), True)898        self.assertEqual(bool(regex.match(r'\P{Cyrillic}',899          '\N{LATIN CAPITAL LETTER A}')), True)900        self.assertEqual(bool(regex.match(r'\P{IsCyrillic}',901          '\N{LATIN CAPITAL LETTER A}')), True)902        self.assertEqual(bool(regex.match(r'\P{Script=Cyrillic}',903          '\N{LATIN CAPITAL LETTER A}')), True)904        self.assertEqual(bool(regex.match(r'\P{InCyrillic}',905          '\N{LATIN CAPITAL LETTER A}')), True)906        self.assertEqual(bool(regex.match(r'\P{Block=Cyrillic}',907          '\N{LATIN CAPITAL LETTER A}')), True)908        self.assertEqual(bool(regex.match(r'\p{^Cyrillic}',909          '\N{LATIN CAPITAL LETTER A}')), True)910        self.assertEqual(bool(regex.match(r'\p{^IsCyrillic}',911          '\N{LATIN CAPITAL LETTER A}')), True)912        self.assertEqual(bool(regex.match(r'\p{^Script=Cyrillic}',913          '\N{LATIN CAPITAL LETTER A}')), True)914        self.assertEqual(bool(regex.match(r'\p{^InCyrillic}',915          '\N{LATIN CAPITAL LETTER A}')), True)916        self.assertEqual(bool(regex.match(r'\p{^Block=Cyrillic}',917          '\N{LATIN CAPITAL LETTER A}')), True)918        self.assertEqual(bool(regex.match(r'[[:^Cyrillic:]]',919          '\N{LATIN CAPITAL LETTER A}')), True)920        self.assertEqual(bool(regex.match(r'[[:^IsCyrillic:]]',921          '\N{LATIN CAPITAL LETTER A}')), True)922        self.assertEqual(bool(regex.match(r'[[:^Script=Cyrillic:]]',923          '\N{LATIN CAPITAL LETTER A}')), True)924        self.assertEqual(bool(regex.match(r'[[:^InCyrillic:]]',925          '\N{LATIN CAPITAL LETTER A}')), True)926        self.assertEqual(bool(regex.match(r'[[:^Block=Cyrillic:]]',927          '\N{LATIN CAPITAL LETTER A}')), True)928        self.assertEqual(bool(regex.match(r'\d', '0')), True)929        self.assertEqual(bool(regex.match(r'\s', ' ')), True)930        self.assertEqual(bool(regex.match(r'\w', 'A')), True)931        self.assertEqual(regex.match(r"\d", "?"), None)932        self.assertEqual(regex.match(r"\s", "?"), None)933        self.assertEqual(regex.match(r"\w", "?"), None)934        self.assertEqual(regex.match(r"\D", "0"), None)935        self.assertEqual(regex.match(r"\S", " "), None)936        self.assertEqual(regex.match(r"\W", "A"), None)937        self.assertEqual(bool(regex.match(r'\D', '?')), True)938        self.assertEqual(bool(regex.match(r'\S', '?')), True)939        self.assertEqual(bool(regex.match(r'\W', '?')), True)940        self.assertEqual(bool(regex.match(r'\p{L}', 'A')), True)941        self.assertEqual(bool(regex.match(r'\p{L}', 'a')), True)942        self.assertEqual(bool(regex.match(r'\p{Lu}', 'A')), True)943        self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)944        self.assertEqual(bool(regex.match(r'(?i)a', 'a')), True)945        self.assertEqual(bool(regex.match(r'(?i)a', 'A')), True)946        self.assertEqual(bool(regex.match(r'\w', '0')), True)947        self.assertEqual(bool(regex.match(r'\w', 'a')), True)948        self.assertEqual(bool(regex.match(r'\w', '_')), True)949        self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))950        self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))951        self.assertEqual(regex.findall(r"\X",952          "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',953          '\xe9', 'e\u0301'])954        self.assertEqual(regex.findall(r"\X{3}",955          "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])956        self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),957          ['\r', '\r\n', '\u0301', 'A\u0301'])958        self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)959        chars_u = "-09AZaz_\u0393\u03b3"960        chars_b = b"-09AZaz_"961        word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split())962        tests = [963            (r"\w", chars_u, "09AZaz_\u0393\u03b3"),964            (r"[[:word:]]", chars_u, "09AZaz_\u0393\u03b3"),965            (r"\W", chars_u, "-"),966            (r"[[:^word:]]", chars_u, "-"),967            (r"\d", chars_u, "09"),968            (r"[[:digit:]]", chars_u, "09"),969            (r"\D", chars_u, "-AZaz_\u0393\u03b3"),970            (r"[[:^digit:]]", chars_u, "-AZaz_\u0393\u03b3"),971            (r"[[:alpha:]]", chars_u, "AZaz\u0393\u03b3"),972            (r"[[:^alpha:]]", chars_u, "-09_"),973            (r"[[:alnum:]]", chars_u, "09AZaz\u0393\u03b3"),974            (r"[[:^alnum:]]", chars_u, "-_"),975            (r"[[:xdigit:]]", chars_u, "09Aa"),976            (r"[[:^xdigit:]]", chars_u, "-Zz_\u0393\u03b3"),977            (r"\p{InBasicLatin}", "a\xE1", "a"),978            (r"\P{InBasicLatin}", "a\xE1", "\xE1"),979            (r"(?i)\p{InBasicLatin}", "a\xE1", "a"),980            (r"(?i)\P{InBasicLatin}", "a\xE1", "\xE1"),981            (br"(?L)\w", chars_b, b"09AZaz_"),982            (br"(?L)[[:word:]]", chars_b, b"09AZaz_"),983            (br"(?L)\W", chars_b, b"-"),984            (br"(?L)[[:^word:]]", chars_b, b"-"),985            (br"(?L)\d", chars_b, b"09"),986            (br"(?L)[[:digit:]]", chars_b, b"09"),987            (br"(?L)\D", chars_b, b"-AZaz_"),988            (br"(?L)[[:^digit:]]", chars_b, b"-AZaz_"),989            (br"(?L)[[:alpha:]]", chars_b, b"AZaz"),990            (br"(?L)[[:^alpha:]]", chars_b, b"-09_"),991            (br"(?L)[[:alnum:]]", chars_b, b"09AZaz"),992            (br"(?L)[[:^alnum:]]", chars_b, b"-_"),993            (br"(?L)[[:xdigit:]]", chars_b, b"09Aa"),994            (br"(?L)[[:^xdigit:]]", chars_b, b"-Zz_"),995            (br"(?a)\w", chars_b, b"09AZaz_"),996            (br"(?a)[[:word:]]", chars_b, b"09AZaz_"),997            (br"(?a)\W", chars_b, b"-"),998            (br"(?a)[[:^word:]]", chars_b, b"-"),999            (br"(?a)\d", chars_b, b"09"),1000            (br"(?a)[[:digit:]]", chars_b, b"09"),1001            (br"(?a)\D", chars_b, b"-AZaz_"),1002            (br"(?a)[[:^digit:]]", chars_b, b"-AZaz_"),1003            (br"(?a)[[:alpha:]]", chars_b, b"AZaz"),1004            (br"(?a)[[:^alpha:]]", chars_b, b"-09_"),1005            (br"(?a)[[:alnum:]]", chars_b, b"09AZaz"),1006            (br"(?a)[[:^alnum:]]", chars_b, b"-_"),1007            (br"(?a)[[:xdigit:]]", chars_b, b"09Aa"),1008            (br"(?a)[[:^xdigit:]]", chars_b, b"-Zz_"),1009        ]1010        for pattern, chars, expected in tests:1011            try:1012                if chars[ : 0].join(regex.findall(pattern, chars)) != expected:1013                    self.fail("Failed: {}".format(pattern))1014            except Exception as e:1015                self.fail("Failed: {} raised {}".format(pattern, ascii(e)))1016        self.assertEqual(bool(regex.match(r"\p{NumericValue=0}", "0")),1017          True)1018        self.assertEqual(bool(regex.match(r"\p{NumericValue=1/2}",1019          "\N{VULGAR FRACTION ONE HALF}")), True)1020        self.assertEqual(bool(regex.match(r"\p{NumericValue=0.5}",1021          "\N{VULGAR FRACTION ONE HALF}")), True)1022    def test_word_class(self):1023        self.assertEqual(regex.findall(r"\w+",1024          " \u0939\u093f\u0928\u094d\u0926\u0940,"),1025          ['\u0939\u093f\u0928\u094d\u0926\u0940'])1026        self.assertEqual(regex.findall(r"\W+",1027          " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ', ','])1028        self.assertEqual(regex.split(r"(?V1)\b",1029          " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ',1030          '\u0939\u093f\u0928\u094d\u0926\u0940', ','])1031        self.assertEqual(regex.split(r"(?V1)\B",1032          " \u0939\u093f\u0928\u094d\u0926\u0940,"), ['', ' \u0939',1033          '\u093f', '\u0928', '\u094d', '\u0926', '\u0940,', ''])1034    def test_search_anchor(self):1035        self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])1036    def test_search_reverse(self):1037        self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a'])1038        self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c',1039          'b', 'a'])1040        self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])1041        self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),1042          ['de', 'cd', 'bc', 'ab'])1043        self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c",1044          overlapped=True), [("b", "-", "c"), ("a", "-", "b")])1045        self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',1046          'b', 'a'])1047        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1048          overlapped=True)], ['de', 'cd', 'bc', 'ab'])1049        self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',1050          'b', 'a'])1051        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1052          overlapped=True)], ['de', 'cd', 'bc', 'ab'])1053        self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',1054          'bar'])1055        self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',1056          'bar'])1057        self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',1058          ''])1059        self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',1060          'foo', ''])1061        self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],1062          ['', 'foo', 'bar'])1063        self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",1064          "foo bar")], ['', 'foo', 'bar'])1065        self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",1066          "foo bar")], ['bar', 'foo', ''])1067        self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",1068          "foo bar")], ['bar', 'foo', ''])1069        self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])1070        self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd'])1071        self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), [])1072        self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef'])1073        self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', ''])1074        self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', ''])1075        self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', ''])1076        self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq',1077          ''])1078        self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b',1079          'c'])1080        self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b',1081          'c'])1082        self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,1083          endpos=3)], ['b', 'c'])1084        self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,1085          endpos=-1)], ['b', 'c'])1086        self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,1087          endpos=3)], ['c', 'b'])1088        self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,1089          endpos=-1)], ['c', 'b'])1090        self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c',1091          'b'])1092        self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1),1093          ['c', 'b'])1094        self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B'])1095        self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a'])1096        self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc'])1097        self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True),1098          ['bc', 'ab'])1099        self.assertEqual(regex.findall(r"(\w+) (\w+)",1100          "first second third fourth fifth"), [('first', 'second'), ('third',1101          'fourth')])1102        self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)",1103          "first second third fourth fifth"), [('fourth', 'fifth'), ('second',1104          'third')])1105        self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")],1106          ['bc'])1107        self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc",1108          overlapped=True)], ['bc', 'ab'])1109        self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)",1110          "first second third fourth fifth")], ['first second',1111          'third fourth'])1112        self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)",1113          "first second third fourth fifth")], ['fourth fifth',1114          'second third'])1115        self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6))1116        self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6))1117        self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6))1118        self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6))1119        self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc')1120        self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc')1121    def test_atomic(self):1122        # Issue 433030.1123        self.assertEqual(regex.search(r"(?>a*)a", "aa"), None)1124    def test_possessive(self):1125        # Single-character non-possessive.1126        self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1))1127        self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3))1128        self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3))1129        self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3))1130        # Multiple-character non-possessive.1131        self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2))1132        self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6))1133        self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6))1134        self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0,1135          6))1136        # Single-character possessive.1137        self.assertEqual(regex.search(r"a?+a", "a"), None)1138        self.assertEqual(regex.search(r"a*+a", "aaa"), None)1139        self.assertEqual(regex.search(r"a++a", "aaa"), None)1140        self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None)1141        # Multiple-character possessive.1142        self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None)1143        self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None)1144        self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None)1145        self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None)1146    def test_zerowidth(self):1147        # Issue 3262.1148        self.assertEqual(regex.split(r"\b", "a b"), ['a b'])1149        self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b',1150          ''])1151        # Issue 1647489.1152        self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',1153          'bar'])1154        self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],1155          ['', 'foo', 'bar'])1156        self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',1157          ''])1158        self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",1159          "foo bar")], ['bar', 'foo', ''])1160        self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',1161          'bar'])1162        self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",1163          "foo bar")], ['', 'foo', 'bar'])1164        self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',1165          'foo', ''])1166        self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",1167          "foo bar")], ['bar', 'foo', ''])1168        self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc'])1169        self.assertEqual([m for m in regex.splititer("", "xaxbxc")],1170          ['xaxbxc'])1171        self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc'])1172        self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],1173          ['xaxbxc'])1174        self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x',1175          'b', 'x', 'c', ''])1176        self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['',1177          'x', 'a', 'x', 'b', 'x', 'c', ''])1178        self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b',1179          'x', 'a', 'x', ''])1180        self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['',1181          'c', 'x', 'b', 'x', 'a', 'x', ''])1182    def test_scoped_and_inline_flags(self):1183        # Issues 433028, 433024, 433027.1184        self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2))1185        self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2))1186        self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2))1187        self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None)1188        self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda:1189          regex.search(r"(?V0-i)Ab", "ab", flags=regex.I))1190        self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None)1191        self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None)1192        self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None)1193        self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None)1194        self.assertEqual(regex.search(r"A(?V1-i)b", "ab",1195          flags=regex.I).span(), (0, 2))1196    def test_repeated_repeats(self):1197        # Issue 2537.1198        self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3))1199        self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0,1200          6))1201    def test_lookbehind(self):1202        self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4))1203        self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None)1204        self.assertEqual(regex.search(r"123(?<!a\d+)", "a123"), None)1205        self.assertEqual(regex.search(r"123(?<!a\d+)", "b123").span(), (1, 4))1206        self.assertEqual(bool(regex.match("(a)b(?<=b)(c)", "abc")), True)1207        self.assertEqual(regex.match("(a)b(?<=c)(c)", "abc"), None)1208        self.assertEqual(bool(regex.match("(a)b(?=c)(c)", "abc")), True)1209        self.assertEqual(regex.match("(a)b(?=b)(c)", "abc"), None)1210        self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)x|c))c", "abc"),1211          None)1212        self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)b|x))c", "abc"),1213          None)1214        self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(2)x|b))c",1215          "abc")), True)1216        self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(1)c|x))c", "abc"),1217          None)1218        self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(1)b|x))c",1219          "abc")), True)1220        self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",1221          "abc")), True)1222        self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(2)c|x))c", "abc"),1223          None)1224        self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",1225          "abc")), True)1226        self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(1)b|x))c", "abc"),1227          None)1228        self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(1)c|x))c",1229          "abc")), True)1230        self.assertEqual(regex.match("(a)b(?<=(?(2)x|c))(c)", "abc"), None)1231        self.assertEqual(regex.match("(a)b(?<=(?(2)b|x))(c)", "abc"), None)1232        self.assertEqual(regex.match("(a)b(?<=(?(1)c|x))(c)", "abc"), None)1233        self.assertEqual(bool(regex.match("(a)b(?<=(?(1)b|x))(c)", "abc")),1234          True)1235        self.assertEqual(bool(regex.match("(a)b(?=(?(2)x|c))(c)", "abc")),1236          True)1237        self.assertEqual(regex.match("(a)b(?=(?(2)b|x))(c)", "abc"), None)1238        self.assertEqual(bool(regex.match("(a)b(?=(?(1)c|x))(c)", "abc")),1239          True)1240        self.assertEqual(repr(type(regex.compile(r"(a)\2(b)"))),1241          self.PATTERN_CLASS)1242    def test_unmatched_in_sub(self):1243        # Issue 1519638.1244        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), 'y-x')1245        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-')1246        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x')1247        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-')1248        self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-')1249        self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--')1250    def test_bug_10328 (self):1251        # Issue 10328.1252        pat = regex.compile(r'(?mV0)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')1253        self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',1254          'foobar '), ('foobar<trailing_ws>', 1))1255        self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',1256          ''])1257        pat = regex.compile(r'(?mV1)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')1258        self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',1259          'foobar '), ('foobar<trailing_ws><no_final_newline>', 2))1260        self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',1261          ''])1262    def test_overlapped(self):1263        self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd'])1264        self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab',1265          'bc', 'cd', 'de'])1266        self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])1267        self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),1268          ['de', 'cd', 'bc', 'ab'])1269        self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True),1270          [("a", "-", "b"), ("b", "-", "c")])1271        self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab',1272          'cd'])1273        self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde",1274          overlapped=True)], ['ab', 'bc', 'cd', 'de'])1275        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")],1276          ['de', 'bc'])1277        self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1278          overlapped=True)], ['de', 'cd', 'bc', 'ab'])1279        self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)",1280          "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")])1281        self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)",1282          "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")])1283    def test_splititer(self):1284        self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', ''])1285        self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a',1286          'b', '', 'c', ''])1287    def test_grapheme(self):1288        self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))1289        self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))1290        self.assertEqual(regex.findall(r"\X",1291          "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',1292          '\xe9', 'e\u0301'])1293        self.assertEqual(regex.findall(r"\X{3}",1294          "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])1295        self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),1296          ['\r', '\r\n', '\u0301', 'A\u0301'])1297    def test_word_boundary(self):1298        text = 'The quick ("brown") fox can\'t jump 32.3 feet, right?'1299        self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ',1300          'quick', ' ("', 'brown', '") ', 'fox', ' ', 'can', "'", 't',1301          ' ', 'jump', ' ', '32', '.', '3', ' ', 'feet', ', ',1302          'right', '?'])1303        self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',1304          'quick', ' ', '(', '"', 'brown', '"', ')', ' ', 'fox', ' ',1305          "can't", ' ', 'jump', ' ', '32.3', ' ', 'feet', ',', ' ',1306          'right', '?', ''])1307        text = "The  fox"1308        self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', '  ',1309          'fox', ''])1310        self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',1311          ' ', 'fox', ''])1312        text = "can't aujourd'hui l'objectif"1313        self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'can', "'",1314          't', ' ', 'aujourd', "'", 'hui', ' ', 'l', "'", 'objectif',1315          ''])1316        self.assertEqual(regex.split(r'(?V1w)\b', text), ['', "can't", ' ',1317          "aujourd'hui", ' ', "l'", 'objectif', ''])1318    def test_line_boundary(self):1319        self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1",1320          "Line 2"])1321        self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"),1322          ["Line 1\rLine 2\r"])1323        self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"),1324          ["Line 1\r", "Line 2\r"])1325        self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"),1326          ["Line 1", "Line 2"])1327        self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"),1328          ["Line 1", "Line 2"])1329        self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"),1330          ["Line 1", "Line 2"])1331        self.assertEqual(regex.search(r"^abc", "abc").start(), 0)1332        self.assertEqual(regex.search(r"^abc", "\nabc"), None)1333        self.assertEqual(regex.search(r"^abc", "\rabc"), None)1334        self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0)1335        self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None)1336        self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None)1337        self.assertEqual(regex.search(r"abc$", "abc").start(), 0)1338        self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0)1339        self.assertEqual(regex.search(r"abc$", "abc\r"), None)1340        self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0)1341        self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0)1342        self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0)1343        self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0)1344        self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1)1345        self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None)1346        self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0)1347        self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1)1348        self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1)1349        self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0)1350        self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0)1351        self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None)1352        self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0)1353        self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0)1354        self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0)1355    def test_branch_reset(self):1356        self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a',1357          None, 'c'))1358        self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None,1359          'b', 'c'))1360        self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",1361          "ac").groups(), ('a', None, 'c'))1362        self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",1363          "bc").groups(), (None, 'b', 'c'))1364        self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",1365          "abd").groups(), ('a', 'b', None, 'd'))1366        self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",1367          "acd").groups(), ('a', None, 'c', 'd'))1368        self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(),1369          ('a', 'b', None, 'd'))1370        self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(),1371          ('a', None, 'c', 'd'))1372        self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(),1373          ('a', 'b', 'd'))1374        self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "ac").groups(),1375          ('a', None, 'c'))1376        self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "bc").groups(),1377          (None, 'b', 'c'))1378        self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "ac").groups(),1379          ('a', 'c'))1380        self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "bc").groups(),1381          ('b', 'c'))1382        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",1383          "abe").groups(), ('a', 'b', 'e'))1384        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",1385          "cde").groups(), ('d', 'c', 'e'))1386        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",1387          "abe").groups(), ('a', 'b', 'e'))1388        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",1389          "cde").groups(), ('d', 'c', 'e'))1390        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",1391          "abe").groups(), ('a', 'b', 'e'))1392        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",1393          "cde").groups(), ('c', 'd', 'e'))1394        # Hg issue 87: Allow duplicate names of groups1395        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1396          "abe").groups(), ("a", "b", "e"))1397        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1398          "abe").capturesdict(), {"a": ["a"], "b": ["b"]})1399        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1400          "cde").groups(), ("d", None, "e"))1401        self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1402          "cde").capturesdict(), {"a": ["c", "d"], "b": []})1403    def test_set(self):1404        self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1))1405        self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1))1406        self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1))1407        self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1))1408        self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c")1409        self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])1410        self.assertEqual(regex.findall(r"(?i)[\p{Alpha}]", "A0"), ["A"])1411        self.assertEqual(regex.findall(r"[a\p{Alpha}]", "ab0"), ["a", "b"])1412        self.assertEqual(regex.findall(r"[a\P{Alpha}]", "ab0"), ["a", "0"])1413        self.assertEqual(regex.findall(r"(?i)[a\p{Alpha}]", "ab0"), ["a",1414          "b"])1415        self.assertEqual(regex.findall(r"(?i)[a\P{Alpha}]", "ab0"), ["a",1416          "0"])1417        self.assertEqual(regex.findall(r"[a-b\p{Alpha}]", "abC0"), ["a",1418          "b", "C"])1419        self.assertEqual(regex.findall(r"(?i)[a-b\p{Alpha}]", "AbC0"), ["A",1420          "b", "C"])1421        self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])1422        self.assertEqual(regex.findall(r"[\P{Alpha}]", "a0"), ["0"])1423        self.assertEqual(regex.findall(r"[^\p{Alpha}]", "a0"), ["0"])1424        self.assertEqual(regex.findall(r"[^\P{Alpha}]", "a0"), ["a"])1425        self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")),1426          'a^bc')1427        self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")),1428          'a^bc-')1429        self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")),1430          'a^c-')1431        self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ')1432        self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ')1433        self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b')1434        all_chars = "".join(chr(c) for c in range(0x100))1435        self.assertEqual(len(regex.findall(r"\p{ASCII}", all_chars)), 128)1436        self.assertEqual(len(regex.findall(r"\p{Letter}", all_chars)),1437          117)1438        self.assertEqual(len(regex.findall(r"\p{Digit}", all_chars)), 10)1439        # Set operators1440        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Letter}]",1441          all_chars)), 52)1442        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]",1443          all_chars)), 52)1444        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]",1445          all_chars)), 10)1446        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Cc}]",1447          all_chars)), 33)1448        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Graph}]",1449          all_chars)), 94)1450        self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}--\p{Cc}]",1451          all_chars)), 95)1452        self.assertEqual(len(regex.findall(r"[\p{Letter}\p{Digit}]",1453          all_chars)), 127)1454        self.assertEqual(len(regex.findall(r"(?V1)[\p{Letter}||\p{Digit}]",1455          all_chars)), 127)1456        self.assertEqual(len(regex.findall(r"\p{HexDigit}", all_chars)),1457          22)1458        self.assertEqual(len(regex.findall(r"(?V1)[\p{HexDigit}~~\p{Digit}]",1459          all_chars)), 12)1460        self.assertEqual(len(regex.findall(r"(?V1)[\p{Digit}~~\p{HexDigit}]",1461          all_chars)), 12)1462        self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))),1463          self.PATTERN_CLASS)1464        self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b",1465          "c"])1466        self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b",1467          "c"])1468        self.assertEqual(regex.findall("(?V1)[\w--a]","abc"), ["b", "c"])1469        self.assertEqual(regex.findall("(?iV1)[\w--a]","abc"), ["b", "c"])1470    def test_various(self):1471        tests = [1472            # Test ?P< and ?P= extensions.1473            ('(?P<foo_123', '', '', regex.error, self.MISSING_GT),      # Unterminated group identifier.1474            ('(?P<1>a)', '', '', regex.error, self.BAD_GROUP_NAME),     # Begins with a digit.1475            ('(?P<!>a)', '', '', regex.error, self.BAD_GROUP_NAME),     # Begins with an illegal char.1476            ('(?P<foo!>a)', '', '', regex.error, self.BAD_GROUP_NAME),  # Begins with an illegal char.1477            # Same tests, for the ?P= form.1478            ('(?P<foo_123>a)(?P=foo_123', 'aa', '', regex.error,1479              self.MISSING_RPAREN),1480            ('(?P<foo_123>a)(?P=1)', 'aa', '1', ascii('a')),1481            ('(?P<foo_123>a)(?P=0)', 'aa', '', regex.error,1482              self.BAD_GROUP_NAME),1483            ('(?P<foo_123>a)(?P=-1)', 'aa', '', regex.error,1484              self.BAD_GROUP_NAME),1485            ('(?P<foo_123>a)(?P=!)', 'aa', '', regex.error,1486              self.BAD_GROUP_NAME),1487            ('(?P<foo_123>a)(?P=foo_124)', 'aa', '', regex.error,1488              self.UNKNOWN_GROUP),  # Backref to undefined group.1489            ('(?P<foo_123>a)', 'a', '1', ascii('a')),1490            ('(?P<foo_123>a)(?P=foo_123)', 'aa', '1', ascii('a')),1491            # Mal-formed \g in pattern treated as literal for compatibility.1492            (r'(?<foo_123>a)\g<foo_123', 'aa', '', ascii(None)),1493            (r'(?<foo_123>a)\g<1>', 'aa', '1', ascii('a')),1494            (r'(?<foo_123>a)\g<!>', 'aa', '', ascii(None)),1495            (r'(?<foo_123>a)\g<foo_124>', 'aa', '', regex.error,1496              self.UNKNOWN_GROUP),  # Backref to undefined group.1497            ('(?<foo_123>a)', 'a', '1', ascii('a')),1498            (r'(?<foo_123>a)\g<foo_123>', 'aa', '1', ascii('a')),1499            # Test octal escapes.1500            ('\\1', 'a', '', regex.error, self.INVALID_GROUP_REF),    # Backreference.1501            ('[\\1]', '\1', '0', "'\\x01'"),  # Character.1502            ('\\09', chr(0) + '9', '0', ascii(chr(0) + '9')),1503            ('\\141', 'a', '0', ascii('a')),1504            ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9',1505              '0,11', ascii(('abcdefghijklk9', 'k'))),1506            # Test \0 is handled everywhere.1507            (r'\0', '\0', '0', ascii('\0')),1508            (r'[\0a]', '\0', '0', ascii('\0')),1509            (r'[a\0]', '\0', '0', ascii('\0')),1510            (r'[^a\0]', '\0', '', ascii(None)),1511            # Test various letter escapes.1512            (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0',1513              ascii('\a\b\f\n\r\t\v')),1514            (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0',1515              ascii('\a\b\f\n\r\t\v')),1516            (r'\c\e\g\h\i\j\k\o\p\q\y\z', 'ceghijkopqyz', '0',1517              ascii('ceghijkopqyz')),1518            (r'\xff', '\377', '0', ascii(chr(255))),1519            # New \x semantics.1520            (r'\x00ffffffffffffff', '\377', '', ascii(None)),1521            (r'\x00f', '\017', '', ascii(None)),1522            (r'\x00fe', '\376', '', ascii(None)),1523            (r'\x00ff', '\377', '', ascii(None)),1524            (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),1525            ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),1526            (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', ascii(chr(9) + chr(10) +1527              chr(11) + chr(13) + chr(12) + chr(7))),1528            (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0',1529              ascii('\t\n\v\r\f\b')),1530            (r"^\w+=(\\[\000-\277]|[^\n\\])*",1531              "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0',1532              ascii("SRC=eval.c g.c blah blah blah \\\\")),1533            # Test that . only matches \n in DOTALL mode.1534            ('a.b', 'acb', '0', ascii('acb')),1535            ('a.b', 'a\nb', '', ascii(None)),1536            ('a.*b', 'acc\nccb', '', ascii(None)),1537            ('a.{4,5}b', 'acc\nccb', '', ascii(None)),1538            ('a.b', 'a\rb', '0', ascii('a\rb')),1539            # The new behaviour is that the inline flag affects only what follows.1540            ('a.b(?s)', 'a\nb', '0', ascii('a\nb')),1541            ('a.b(?sV1)', 'a\nb', '', ascii(None)),1542            ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),1543            ('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')),1544            ('a.*(?sV1)b', 'acc\nccb', '', ascii(None)),1545            ('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')),1546            ('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')),1547            (')', '', '', regex.error, self.TRAILING_CHARS),           # Unmatched right bracket.1548            ('', '', '0', "''"),    # Empty pattern.1549            ('abc', 'abc', '0', ascii('abc')),1550            ('abc', 'xbc', '', ascii(None)),1551            ('abc', 'axc', '', ascii(None)),1552            ('abc', 'abx', '', ascii(None)),1553            ('abc', 'xabcy', '0', ascii('abc')),1554            ('abc', 'ababc', '0', ascii('abc')),1555            ('ab*c', 'abc', '0', ascii('abc')),1556            ('ab*bc', 'abc', '0', ascii('abc')),1557            ('ab*bc', 'abbc', '0', ascii('abbc')),1558            ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),1559            ('ab+bc', 'abbc', '0', ascii('abbc')),1560            ('ab+bc', 'abc', '', ascii(None)),1561            ('ab+bc', 'abq', '', ascii(None)),1562            ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),1563            ('ab?bc', 'abbc', '0', ascii('abbc')),1564            ('ab?bc', 'abc', '0', ascii('abc')),1565            ('ab?bc', 'abbbbc', '', ascii(None)),1566            ('ab?c', 'abc', '0', ascii('abc')),1567            ('^abc$', 'abc', '0', ascii('abc')),1568            ('^abc$', 'abcc', '', ascii(None)),1569            ('^abc', 'abcc', '0', ascii('abc')),1570            ('^abc$', 'aabc', '', ascii(None)),1571            ('abc$', 'aabc', '0', ascii('abc')),1572            ('^', 'abc', '0', ascii('')),1573            ('$', 'abc', '0', ascii('')),1574            ('a.c', 'abc', '0', ascii('abc')),1575            ('a.c', 'axc', '0', ascii('axc')),1576            ('a.*c', 'axyzc', '0', ascii('axyzc')),1577            ('a.*c', 'axyzd', '', ascii(None)),1578            ('a[bc]d', 'abc', '', ascii(None)),1579            ('a[bc]d', 'abd', '0', ascii('abd')),1580            ('a[b-d]e', 'abd', '', ascii(None)),1581            ('a[b-d]e', 'ace', '0', ascii('ace')),1582            ('a[b-d]', 'aac', '0', ascii('ac')),1583            ('a[-b]', 'a-', '0', ascii('a-')),1584            ('a[\\-b]', 'a-', '0', ascii('a-')),1585            ('a[b-]', 'a-', '0', ascii('a-')),1586            ('a[]b', '-', '', regex.error, self.BAD_SET),1587            ('a[', '-', '', regex.error, self.BAD_SET),1588            ('a\\', '-', '', regex.error, self.BAD_ESCAPE),1589            ('abc)', '-', '', regex.error, self.TRAILING_CHARS),1590            ('(abc', '-', '', regex.error, self.MISSING_RPAREN),1591            ('a]', 'a]', '0', ascii('a]')),1592            ('a[]]b', 'a]b', '0', ascii('a]b')),1593            ('a[]]b', 'a]b', '0', ascii('a]b')),1594            ('a[^bc]d', 'aed', '0', ascii('aed')),1595            ('a[^bc]d', 'abd', '', ascii(None)),1596            ('a[^-b]c', 'adc', '0', ascii('adc')),1597            ('a[^-b]c', 'a-c', '', ascii(None)),1598            ('a[^]b]c', 'a]c', '', ascii(None)),1599            ('a[^]b]c', 'adc', '0', ascii('adc')),1600            ('\\ba\\b', 'a-', '0', ascii('a')),1601            ('\\ba\\b', '-a', '0', ascii('a')),1602            ('\\ba\\b', '-a-', '0', ascii('a')),1603            ('\\by\\b', 'xy', '', ascii(None)),1604            ('\\by\\b', 'yz', '', ascii(None)),1605            ('\\by\\b', 'xyz', '', ascii(None)),1606            ('x\\b', 'xyz', '', ascii(None)),1607            ('x\\B', 'xyz', '0', ascii('x')),1608            ('\\Bz', 'xyz', '0', ascii('z')),1609            ('z\\B', 'xyz', '', ascii(None)),1610            ('\\Bx', 'xyz', '', ascii(None)),1611            ('\\Ba\\B', 'a-', '', ascii(None)),1612            ('\\Ba\\B', '-a', '', ascii(None)),1613            ('\\Ba\\B', '-a-', '', ascii(None)),1614            ('\\By\\B', 'xy', '', ascii(None)),1615            ('\\By\\B', 'yz', '', ascii(None)),1616            ('\\By\\b', 'xy', '0', ascii('y')),1617            ('\\by\\B', 'yz', '0', ascii('y')),1618            ('\\By\\B', 'xyz', '0', ascii('y')),1619            ('ab|cd', 'abc', '0', ascii('ab')),1620            ('ab|cd', 'abcd', '0', ascii('ab')),1621            ('()ef', 'def', '0,1', ascii(('ef', ''))),1622            ('$b', 'b', '', ascii(None)),1623            ('a\\(b', 'a(b', '', ascii(('a(b',))),1624            ('a\\(*b', 'ab', '0', ascii('ab')),1625            ('a\\(*b', 'a((b', '0', ascii('a((b')),1626            ('a\\\\b', 'a\\b', '0', ascii('a\\b')),1627            ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),1628            ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),1629            ('a+b+c', 'aabbabc', '0', ascii('abc')),1630            ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),1631            ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),1632            ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),1633            (')(', '-', '', regex.error, self.TRAILING_CHARS),1634            ('[^ab]*', 'cde', '0', ascii('cde')),1635            ('abc', '', '', ascii(None)),1636            ('a*', '', '0', ascii('')),1637            ('a|b|c|d|e', 'e', '0', ascii('e')),1638            ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),1639            ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),1640            ('ab*', 'xabyabbbz', '0', ascii('ab')),1641            ('ab*', 'xayabbbz', '0', ascii('a')),1642            ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),1643            ('[abhgefdc]ij', 'hij', '0', ascii('hij')),1644            ('^(ab|cd)e', 'abcde', '', ascii(None)),1645            ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),1646            ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),1647            ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),1648            ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),1649            ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1650            ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1651            ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),1652            ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),1653            ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),1654            ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),1655            ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),1656            ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),1657            ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),1658            ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',1659              'effgz', None))),1660            ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',1661              'j'))),1662            ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),1663            ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),1664            ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',1665              'effgz', None))),1666            ('(((((((((a)))))))))', 'a', '0', ascii('a')),1667            ('multiple words of text', 'uh-uh', '', ascii(None)),1668            ('multiple words', 'multiple words, yeah', '0',1669              ascii('multiple words')),1670            ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),1671            ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),1672            ('[k]', 'ab', '', ascii(None)),1673            ('a[-]?c', 'ac', '0', ascii('ac')),1674            ('(abc)\\1', 'abcabc', '1', ascii('abc')),1675            ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),1676            ('^(.+)?B', 'AB', '1', ascii('A')),1677            ('(a+).\\1$', 'aaaaa', '0,1', ascii(('aaaaa', 'aa'))),1678            ('^(a+).\\1$', 'aaaa', '', ascii(None)),1679            ('(abc)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1680            ('([a-c]+)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1681            ('(a)\\1', 'aa', '0,1', ascii(('aa', 'a'))),1682            ('(a+)\\1', 'aa', '0,1', ascii(('aa', 'a'))),1683            ('(a+)+\\1', 'aa', '0,1', ascii(('aa', 'a'))),1684            ('(a).+\\1', 'aba', '0,1', ascii(('aba', 'a'))),1685            ('(a)ba*\\1', 'aba', '0,1', ascii(('aba', 'a'))),1686            ('(aa|a)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1687            ('(a|aa)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1688            ('(a+)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1689            ('([abc]*)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1690            ('(a)(b)c|ab', 'ab', '0,1,2', ascii(('ab', None, None))),1691            ('(a)+x', 'aaax', '0,1', ascii(('aaax', 'a'))),1692            ('([ac])+x', 'aacx', '0,1', ascii(('aacx', 'c'))),1693            ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1',1694              ascii(('d:msgs/tdir/sub1/', 'tdir/'))),1695            ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah',1696              '0,1,2,3', ascii(('track1.title:TBlah blah blah', 'track1',1697              'title', 'Blah blah blah'))),1698            ('([^N]*N)+', 'abNNxyzN', '0,1', ascii(('abNNxyzN', 'xyzN'))),1699            ('([^N]*N)+', 'abNNxyz', '0,1', ascii(('abNN', 'N'))),1700            ('([abc]*)x', 'abcx', '0,1', ascii(('abcx', 'abc'))),1701            ('([abc]*)x', 'abc', '', ascii(None)),1702            ('([xyz]*)x', 'abcx', '0,1', ascii(('x', ''))),1703            ('(a)+b|aac', 'aac', '0,1', ascii(('aac', None))),1704            # Test symbolic groups.1705            ('(?P<i d>aaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME),1706            ('(?P<id>aaa)a', 'aaaa', '0,id', ascii(('aaaa', 'aaa'))),1707            ('(?P<id>aa)(?P=id)', 'aaaa', '0,id', ascii(('aaaa', 'aa'))),1708            ('(?P<id>aa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP),1709            # Character properties.1710            (r"\g", "g", '0', ascii('g')),1711            (r"\g<1>", "g", '', regex.error, self.INVALID_GROUP_REF),1712            (r"(.)\g<1>", "gg", '0', ascii('gg')),1713            (r"(.)\g<1>", "gg", '', ascii(('gg', 'g'))),1714            (r"\N", "N", '0', ascii('N')),1715            (r"\N{LATIN SMALL LETTER A}", "a", '0', ascii('a')),1716            (r"\p", "p", '0', ascii('p')),1717            (r"\p{Ll}", "a", '0', ascii('a')),1718            (r"\P", "P", '0', ascii('P')),1719            (r"\P{Lu}", "p", '0', ascii('p')),1720            # All tests from Perl.1721            ('abc', 'abc', '0', ascii('abc')),1722            ('abc', 'xbc', '', ascii(None)),1723            ('abc', 'axc', '', ascii(None)),1724            ('abc', 'abx', '', ascii(None)),1725            ('abc', 'xabcy', '0', ascii('abc')),1726            ('abc', 'ababc', '0', ascii('abc')),1727            ('ab*c', 'abc', '0', ascii('abc')),1728            ('ab*bc', 'abc', '0', ascii('abc')),1729            ('ab*bc', 'abbc', '0', ascii('abbc')),1730            ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),1731            ('ab{0,}bc', 'abbbbc', '0', ascii('abbbbc')),1732            ('ab+bc', 'abbc', '0', ascii('abbc')),1733            ('ab+bc', 'abc', '', ascii(None)),1734            ('ab+bc', 'abq', '', ascii(None)),1735            ('ab{1,}bc', 'abq', '', ascii(None)),1736            ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),1737            ('ab{1,}bc', 'abbbbc', '0', ascii('abbbbc')),1738            ('ab{1,3}bc', 'abbbbc', '0', ascii('abbbbc')),1739            ('ab{3,4}bc', 'abbbbc', '0', ascii('abbbbc')),1740            ('ab{4,5}bc', 'abbbbc', '', ascii(None)),1741            ('ab?bc', 'abbc', '0', ascii('abbc')),1742            ('ab?bc', 'abc', '0', ascii('abc')),1743            ('ab{0,1}bc', 'abc', '0', ascii('abc')),1744            ('ab?bc', 'abbbbc', '', ascii(None)),1745            ('ab?c', 'abc', '0', ascii('abc')),1746            ('ab{0,1}c', 'abc', '0', ascii('abc')),1747            ('^abc$', 'abc', '0', ascii('abc')),1748            ('^abc$', 'abcc', '', ascii(None)),1749            ('^abc', 'abcc', '0', ascii('abc')),1750            ('^abc$', 'aabc', '', ascii(None)),1751            ('abc$', 'aabc', '0', ascii('abc')),1752            ('^', 'abc', '0', ascii('')),1753            ('$', 'abc', '0', ascii('')),1754            ('a.c', 'abc', '0', ascii('abc')),1755            ('a.c', 'axc', '0', ascii('axc')),1756            ('a.*c', 'axyzc', '0', ascii('axyzc')),1757            ('a.*c', 'axyzd', '', ascii(None)),1758            ('a[bc]d', 'abc', '', ascii(None)),1759            ('a[bc]d', 'abd', '0', ascii('abd')),1760            ('a[b-d]e', 'abd', '', ascii(None)),1761            ('a[b-d]e', 'ace', '0', ascii('ace')),1762            ('a[b-d]', 'aac', '0', ascii('ac')),1763            ('a[-b]', 'a-', '0', ascii('a-')),1764            ('a[b-]', 'a-', '0', ascii('a-')),1765            ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),1766            ('a[]b', '-', '', regex.error, self.BAD_SET),1767            ('a[', '-', '', regex.error, self.BAD_SET),1768            ('a]', 'a]', '0', ascii('a]')),1769            ('a[]]b', 'a]b', '0', ascii('a]b')),1770            ('a[^bc]d', 'aed', '0', ascii('aed')),1771            ('a[^bc]d', 'abd', '', ascii(None)),1772            ('a[^-b]c', 'adc', '0', ascii('adc')),1773            ('a[^-b]c', 'a-c', '', ascii(None)),1774            ('a[^]b]c', 'a]c', '', ascii(None)),1775            ('a[^]b]c', 'adc', '0', ascii('adc')),1776            ('ab|cd', 'abc', '0', ascii('ab')),1777            ('ab|cd', 'abcd', '0', ascii('ab')),1778            ('()ef', 'def', '0,1', ascii(('ef', ''))),1779            ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),1780            ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),1781            ('$b', 'b', '', ascii(None)),1782            ('a\\', '-', '', regex.error, self.BAD_ESCAPE),1783            ('a\\(b', 'a(b', '', ascii(('a(b',))),1784            ('a\\(*b', 'ab', '0', ascii('ab')),1785            ('a\\(*b', 'a((b', '0', ascii('a((b')),1786            ('a\\\\b', 'a\\b', '0', ascii('a\\b')),1787            ('abc)', '-', '', regex.error, self.TRAILING_CHARS),1788            ('(abc', '-', '', regex.error, self.MISSING_RPAREN),1789            ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),1790            ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),1791            ('a+b+c', 'aabbabc', '0', ascii('abc')),1792            ('a{1,}b{1,}c', 'aabbabc', '0', ascii('abc')),1793            ('a**', '-', '', regex.error, self.MULTIPLE_REPEAT),1794            ('a.+?c', 'abcabc', '0', ascii('abc')),1795            ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),1796            ('(a+|b){0,}', 'ab', '0,1', ascii(('ab', 'b'))),1797            ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),1798            ('(a+|b){1,}', 'ab', '0,1', ascii(('ab', 'b'))),1799            ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),1800            ('(a+|b){0,1}', 'ab', '0,1', ascii(('a', 'a'))),1801            (')(', '-', '', regex.error, self.TRAILING_CHARS),1802            ('[^ab]*', 'cde', '0', ascii('cde')),1803            ('abc', '', '', ascii(None)),1804            ('a*', '', '0', ascii('')),1805            ('([abc])*d', 'abbbcd', '0,1', ascii(('abbbcd', 'c'))),1806            ('([abc])*bcd', 'abcd', '0,1', ascii(('abcd', 'a'))),1807            ('a|b|c|d|e', 'e', '0', ascii('e')),1808            ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),1809            ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),1810            ('ab*', 'xabyabbbz', '0', ascii('ab')),1811            ('ab*', 'xayabbbz', '0', ascii('a')),1812            ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),1813            ('[abhgefdc]ij', 'hij', '0', ascii('hij')),1814            ('^(ab|cd)e', 'abcde', '', ascii(None)),1815            ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),1816            ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),1817            ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),1818            ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),1819            ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1820            ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1821            ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),1822            ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),1823            ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),1824            ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),1825            ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),1826            ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),1827            ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),1828            ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',1829              'effgz', None))),1830            ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',1831              'j'))),1832            ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),1833            ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),1834            ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',1835              'effgz', None))),1836            ('((((((((((a))))))))))', 'a', '10', ascii('a')),1837            ('((((((((((a))))))))))\\10', 'aa', '0', ascii('aa')),1838            # Python does not have the same rules for \\41 so this is a syntax error1839            #    ('((((((((((a))))))))))\\41', 'aa', '', ascii(None)),1840            #    ('((((((((((a))))))))))\\41', 'a!', '0', ascii('a!')),1841            ('((((((((((a))))))))))\\41', '', '', regex.error,1842              self.INVALID_GROUP_REF),1843            ('(?i)((((((((((a))))))))))\\41', '', '', regex.error,1844              self.INVALID_GROUP_REF),1845            ('(((((((((a)))))))))', 'a', '0', ascii('a')),1846            ('multiple words of text', 'uh-uh', '', ascii(None)),1847            ('multiple words', 'multiple words, yeah', '0',1848              ascii('multiple words')),1849            ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),1850            ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),1851            ('[k]', 'ab', '', ascii(None)),1852            ('a[-]?c', 'ac', '0', ascii('ac')),1853            ('(abc)\\1', 'abcabc', '1', ascii('abc')),1854            ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),1855            ('(?i)abc', 'ABC', '0', ascii('ABC')),1856            ('(?i)abc', 'XBC', '', ascii(None)),1857            ('(?i)abc', 'AXC', '', ascii(None)),1858            ('(?i)abc', 'ABX', '', ascii(None)),1859            ('(?i)abc', 'XABCY', '0', ascii('ABC')),1860            ('(?i)abc', 'ABABC', '0', ascii('ABC')),1861            ('(?i)ab*c', 'ABC', '0', ascii('ABC')),1862            ('(?i)ab*bc', 'ABC', '0', ascii('ABC')),1863            ('(?i)ab*bc', 'ABBC', '0', ascii('ABBC')),1864            ('(?i)ab*?bc', 'ABBBBC', '0', ascii('ABBBBC')),1865            ('(?i)ab{0,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1866            ('(?i)ab+?bc', 'ABBC', '0', ascii('ABBC')),1867            ('(?i)ab+bc', 'ABC', '', ascii(None)),1868            ('(?i)ab+bc', 'ABQ', '', ascii(None)),1869            ('(?i)ab{1,}bc', 'ABQ', '', ascii(None)),1870            ('(?i)ab+bc', 'ABBBBC', '0', ascii('ABBBBC')),1871            ('(?i)ab{1,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1872            ('(?i)ab{1,3}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1873            ('(?i)ab{3,4}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1874            ('(?i)ab{4,5}?bc', 'ABBBBC', '', ascii(None)),1875            ('(?i)ab??bc', 'ABBC', '0', ascii('ABBC')),1876            ('(?i)ab??bc', 'ABC', '0', ascii('ABC')),1877            ('(?i)ab{0,1}?bc', 'ABC', '0', ascii('ABC')),1878            ('(?i)ab??bc', 'ABBBBC', '', ascii(None)),1879            ('(?i)ab??c', 'ABC', '0', ascii('ABC')),1880            ('(?i)ab{0,1}?c', 'ABC', '0', ascii('ABC')),1881            ('(?i)^abc$', 'ABC', '0', ascii('ABC')),1882            ('(?i)^abc$', 'ABCC', '', ascii(None)),1883            ('(?i)^abc', 'ABCC', '0', ascii('ABC')),1884            ('(?i)^abc$', 'AABC', '', ascii(None)),1885            ('(?i)abc$', 'AABC', '0', ascii('ABC')),1886            ('(?i)^', 'ABC', '0', ascii('')),1887            ('(?i)$', 'ABC', '0', ascii('')),1888            ('(?i)a.c', 'ABC', '0', ascii('ABC')),1889            ('(?i)a.c', 'AXC', '0', ascii('AXC')),1890            ('(?i)a.*?c', 'AXYZC', '0', ascii('AXYZC')),1891            ('(?i)a.*c', 'AXYZD', '', ascii(None)),1892            ('(?i)a[bc]d', 'ABC', '', ascii(None)),1893            ('(?i)a[bc]d', 'ABD', '0', ascii('ABD')),1894            ('(?i)a[b-d]e', 'ABD', '', ascii(None)),1895            ('(?i)a[b-d]e', 'ACE', '0', ascii('ACE')),1896            ('(?i)a[b-d]', 'AAC', '0', ascii('AC')),1897            ('(?i)a[-b]', 'A-', '0', ascii('A-')),1898            ('(?i)a[b-]', 'A-', '0', ascii('A-')),1899            ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),1900            ('(?i)a[]b', '-', '', regex.error, self.BAD_SET),1901            ('(?i)a[', '-', '', regex.error, self.BAD_SET),1902            ('(?i)a]', 'A]', '0', ascii('A]')),1903            ('(?i)a[]]b', 'A]B', '0', ascii('A]B')),1904            ('(?i)a[^bc]d', 'AED', '0', ascii('AED')),1905            ('(?i)a[^bc]d', 'ABD', '', ascii(None)),1906            ('(?i)a[^-b]c', 'ADC', '0', ascii('ADC')),1907            ('(?i)a[^-b]c', 'A-C', '', ascii(None)),1908            ('(?i)a[^]b]c', 'A]C', '', ascii(None)),1909            ('(?i)a[^]b]c', 'ADC', '0', ascii('ADC')),1910            ('(?i)ab|cd', 'ABC', '0', ascii('AB')),1911            ('(?i)ab|cd', 'ABCD', '0', ascii('AB')),1912            ('(?i)()ef', 'DEF', '0,1', ascii(('EF', ''))),1913            ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),1914            ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),1915            ('(?i)$b', 'B', '', ascii(None)),1916            ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE),1917            ('(?i)a\\(b', 'A(B', '', ascii(('A(B',))),1918            ('(?i)a\\(*b', 'AB', '0', ascii('AB')),1919            ('(?i)a\\(*b', 'A((B', '0', ascii('A((B')),1920            ('(?i)a\\\\b', 'A\\B', '0', ascii('A\\B')),1921            ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS),1922            ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN),1923            ('(?i)((a))', 'ABC', '0,1,2', ascii(('A', 'A', 'A'))),1924            ('(?i)(a)b(c)', 'ABC', '0,1,2', ascii(('ABC', 'A', 'C'))),1925            ('(?i)a+b+c', 'AABBABC', '0', ascii('ABC')),1926            ('(?i)a{1,}b{1,}c', 'AABBABC', '0', ascii('ABC')),1927            ('(?i)a**', '-', '', regex.error, self.MULTIPLE_REPEAT),1928            ('(?i)a.+?c', 'ABCABC', '0', ascii('ABC')),1929            ('(?i)a.*?c', 'ABCABC', '0', ascii('ABC')),1930            ('(?i)a.{0,5}?c', 'ABCABC', '0', ascii('ABC')),1931            ('(?i)(a+|b)*', 'AB', '0,1', ascii(('AB', 'B'))),1932            ('(?i)(a+|b){0,}', 'AB', '0,1', ascii(('AB', 'B'))),1933            ('(?i)(a+|b)+', 'AB', '0,1', ascii(('AB', 'B'))),1934            ('(?i)(a+|b){1,}', 'AB', '0,1', ascii(('AB', 'B'))),1935            ('(?i)(a+|b)?', 'AB', '0,1', ascii(('A', 'A'))),1936            ('(?i)(a+|b){0,1}', 'AB', '0,1', ascii(('A', 'A'))),1937            ('(?i)(a+|b){0,1}?', 'AB', '0,1', ascii(('', None))),1938            ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS),1939            ('(?i)[^ab]*', 'CDE', '0', ascii('CDE')),1940            ('(?i)abc', '', '', ascii(None)),1941            ('(?i)a*', '', '0', ascii('')),1942            ('(?i)([abc])*d', 'ABBBCD', '0,1', ascii(('ABBBCD', 'C'))),1943            ('(?i)([abc])*bcd', 'ABCD', '0,1', ascii(('ABCD', 'A'))),1944            ('(?i)a|b|c|d|e', 'E', '0', ascii('E')),1945            ('(?i)(a|b|c|d|e)f', 'EF', '0,1', ascii(('EF', 'E'))),1946            ('(?i)abcd*efg', 'ABCDEFG', '0', ascii('ABCDEFG')),1947            ('(?i)ab*', 'XABYABBBZ', '0', ascii('AB')),1948            ('(?i)ab*', 'XAYABBBZ', '0', ascii('A')),1949            ('(?i)(ab|cd)e', 'ABCDE', '0,1', ascii(('CDE', 'CD'))),1950            ('(?i)[abhgefdc]ij', 'HIJ', '0', ascii('HIJ')),1951            ('(?i)^(ab|cd)e', 'ABCDE', '', ascii(None)),1952            ('(?i)(abc|)ef', 'ABCDEF', '0,1', ascii(('EF', ''))),1953            ('(?i)(a|b)c*d', 'ABCD', '0,1', ascii(('BCD', 'B'))),1954            ('(?i)(ab|ab*)bc', 'ABC', '0,1', ascii(('ABC', 'A'))),1955            ('(?i)a([bc]*)c*', 'ABC', '0,1', ascii(('ABC', 'BC'))),1956            ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),1957            ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),1958            ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', ascii(('ABCD', 'B', 'CD'))),1959            ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', ascii('ADCDCDE')),1960            ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', ascii(None)),1961            ('(?i)(ab|a)b*c', 'ABC', '0,1', ascii(('ABC', 'AB'))),1962            ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', ascii(('ABC', 'A', 'B',1963              'D'))),1964            ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', ascii('ALPHA')),1965            ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', ascii(('BH', None))),1966            ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', ascii(('EFFGZ',1967              'EFFGZ', None))),1968            ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', ascii(('IJ', 'IJ',1969              'J'))),1970            ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', ascii(None)),1971            ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', ascii(None)),1972            ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', ascii(('EFFGZ',1973              'EFFGZ', None))),1974            ('(?i)((((((((((a))))))))))', 'A', '10', ascii('A')),1975            ('(?i)((((((((((a))))))))))\\10', 'AA', '0', ascii('AA')),1976            #('(?i)((((((((((a))))))))))\\41', 'AA', '', ascii(None)),1977            #('(?i)((((((((((a))))))))))\\41', 'A!', '0', ascii('A!')),1978            ('(?i)(((((((((a)))))))))', 'A', '0', ascii('A')),1979            ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1',1980              ascii('A')),1981            ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1',1982              ascii('C')),1983            ('(?i)multiple words of text', 'UH-UH', '', ascii(None)),1984            ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0',1985             ascii('MULTIPLE WORDS')),1986            ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', ascii(('ABCDE', 'AB', 'DE'))),1987            ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', ascii(('B', 'A'))),1988            ('(?i)[k]', 'AB', '', ascii(None)),1989        #    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', ascii(ABCD-$&-\\ABCD)),1990        #    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', ascii(BC-$1-\\BC)),1991            ('(?i)a[-]?c', 'AC', '0', ascii('AC')),1992            ('(?i)(abc)\\1', 'ABCABC', '1', ascii('ABC')),1993            ('(?i)([a-c]*)\\1', 'ABCABC', '1', ascii('ABC')),1994            ('a(?!b).', 'abad', '0', ascii('ad')),1995            ('a(?=d).', 'abad', '0', ascii('ad')),1996            ('a(?=c|d).', 'abad', '0', ascii('ad')),1997            ('a(?:b|c|d)(.)', 'ace', '1', ascii('e')),1998            ('a(?:b|c|d)*(.)', 'ace', '1', ascii('e')),1999            ('a(?:b|c|d)+?(.)', 'ace', '1', ascii('e')),2000            ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', ascii(('c', 'e'))),2001            # Lookbehind: split by : but not if it is escaped by -.2002            ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', '1', ascii('bc-:de')),2003            # Escaping with \ as we know it.2004            ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', '1', ascii('bc\\:de')),2005            # Terminating with ' and escaping with ? as in edifact.2006            ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", '1', ascii("bc?'de")),2007            # Comments using the (?#...) syntax.2008            ('w(?# comment', 'w', '', regex.error, self.MISSING_RPAREN),2009            ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', '0', ascii('wxyz')),2010            # Check odd placement of embedded pattern modifiers.2011            # Not an error under PCRE/PRE:2012            # When the new behaviour is turned on positional inline flags affect2013            # only what follows.2014            ('w(?i)', 'W', '0', ascii('W')),2015            ('w(?iV1)', 'W', '0', ascii(None)),2016            ('w(?i)', 'w', '0', ascii('w')),2017            ('w(?iV1)', 'w', '0', ascii('w')),2018            ('(?i)w', 'W', '0', ascii('W')),2019            ('(?iV1)w', 'W', '0', ascii('W')),2020            # Comments using the x embedded pattern modifier.2021            ("""(?x)w# comment 12022x y2023# comment 22024z""", 'wxyz', '0', ascii('wxyz')),2025            # Using the m embedded pattern modifier.2026            ('^abc', """jkl2027abc2028xyz""", '', ascii(None)),2029            ('(?m)^abc', """jkl2030abc2031xyz""", '0', ascii('abc')),2032            ('(?m)abc$', """jkl2033xyzabc2034123""", '0', ascii('abc')),2035            # Using the s embedded pattern modifier.2036            ('a.b', 'a\nb', '', ascii(None)),2037            ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),2038            # Test \w, etc. both inside and outside character classes.2039            ('\\w+', '--ab_cd0123--', '0', ascii('ab_cd0123')),2040            ('[\\w]+', '--ab_cd0123--', '0', ascii('ab_cd0123')),2041            ('\\D+', '1234abc5678', '0', ascii('abc')),2042            ('[\\D]+', '1234abc5678', '0', ascii('abc')),2043            ('[\\da-fA-F]+', '123abc', '0', ascii('123abc')),2044            # Not an error under PCRE/PRE:2045            # ('[\\d-x]', '-', '', regex.error, self.BAD_CHAR_RANGE),2046            (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', '3,2,1', ascii(('',2047              'testing!1972', ' '))),2048            (r'(\s*)(\S*)(\s*)', ' testing!1972', '3,2,1', ascii(('',2049              'testing!1972', ' '))),2050            #2051            # Post-1.5.2 additions.2052            # xmllib problem.2053            (r'(([a-z]+):)?([a-z]+)$', 'smil', '1,2,3', ascii((None, None,2054              'smil'))),2055            # Bug 110866: reference to undefined group.2056            (r'((.)\1+)', '', '', regex.error, self.OPEN_GROUP),2057            # Bug 111869: search (PRE/PCRE fails on this one, SRE doesn't).2058            (r'.*d', 'abc\nabd', '0', ascii('abd')),2059            # Bug 112468: various expected syntax errors.2060            (r'(', '', '', regex.error, self.MISSING_RPAREN),2061            (r'[\41]', '!', '0', ascii('!')),2062            # Bug 114033: nothing to repeat.2063            (r'(x?)?', 'x', '0', ascii('x')),2064            # Bug 115040: rescan if flags are modified inside pattern.2065            # If the new behaviour is turned on then positional inline flags2066            # affect only what follows.2067            (r' (?x)foo ', 'foo', '0', ascii('foo')),2068            (r' (?V1x)foo ', 'foo', '0', ascii(None)),2069            (r'(?x) foo ', 'foo', '0', ascii('foo')),2070            (r'(?V1x) foo ', 'foo', '0', ascii('foo')),2071            (r'(?x)foo ', 'foo', '0', ascii('foo')),2072            (r'(?V1x)foo ', 'foo', '0', ascii('foo')),2073            # Bug 115618: negative lookahead.2074            (r'(?<!abc)(d.f)', 'abcdefdof', '0', ascii('dof')),2075            # Bug 116251: character class bug.2076            (r'[\w-]+', 'laser_beam', '0', ascii('laser_beam')),2077            # Bug 123769+127259: non-greedy backtracking bug.2078            (r'.*?\S *:', 'xx:', '0', ascii('xx:')),2079            (r'a[ ]*?\ (\d+).*', 'a   10', '0', ascii('a   10')),2080            (r'a[ ]*?\ (\d+).*', 'a    10', '0', ascii('a    10')),2081            # Bug 127259: \Z shouldn't depend on multiline mode.2082            (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', '1', ascii('')),2083            # Bug 128899: uppercase literals under the ignorecase flag.2084            (r'(?i)M+', 'MMM', '0', ascii('MMM')),2085            (r'(?i)m+', 'MMM', '0', ascii('MMM')),2086            (r'(?i)[M]+', 'MMM', '0', ascii('MMM')),2087            (r'(?i)[m]+', 'MMM', '0', ascii('MMM')),2088            # Bug 130748: ^* should be an error (nothing to repeat).2089            # In 'regex' we won't bother to complain about this.2090            # (r'^*', '', '', regex.error, self.NOTHING_TO_REPEAT),2091            # Bug 133283: minimizing repeat problem.2092            (r'"(?:\\"|[^"])*?"', r'"\""', '0', ascii(r'"\""')),2093            # Bug 477728: minimizing repeat problem.2094            (r'^.*?$', 'one\ntwo\nthree\n', '', ascii(None)),2095            # Bug 483789: minimizing repeat problem.2096            (r'a[^>]*?b', 'a>b', '', ascii(None)),2097            # Bug 490573: minimizing repeat problem.2098            (r'^a*?$', 'foo', '', ascii(None)),2099            # Bug 470582: nested groups problem.2100            (r'^((a)c)?(ab)$', 'ab', '1,2,3', ascii((None, None, 'ab'))),2101            # Another minimizing repeat problem (capturing groups in assertions).2102            ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', ascii(('ab', None))),2103            ('^([ab]*?)(?!(b))c', 'abc', '1,2', ascii(('ab', None))),2104            ('^([ab]*?)(?<!(a))c', 'abc', '1,2', ascii(('ab', None))),2105            # Bug 410271: \b broken under locales.2106            (r'\b.\b', 'a', '0', ascii('a')),2107            (r'\b.\b', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',2108              ascii('\xc4')),2109            (r'\w', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',2110              ascii('\xc4')),2111        ]2112        for t in tests:2113            excval = None2114            try:2115                if len(t) == 4:2116                    pattern, string, groups, expected = t2117                else:2118                    pattern, string, groups, expected, excval = t2119            except ValueError:2120                fields = ", ".join([ascii(f) for f in t[ : 3]] + ["..."])2121                self.fail("Incorrect number of test fields: ({})".format(fields))2122            else:2123                group_list = []2124                if groups:2125                    for group in groups.split(","):2126                        try:2127                            group_list.append(int(group))2128                        except ValueError:2129                            group_list.append(group)2130                if excval is not None:2131                    if (sys.version_info.major, sys.version_info.minor) >= (3, 4):2132                        with self.subTest(pattern=pattern, string=string):2133                            self.assertRaisesRegex(expected, excval, regex.search,2134                              pattern, string)2135                else:2136                    m = regex.search(pattern, string)2137                    if m:2138                        if group_list:2139                            actual = ascii(m.group(*group_list))2140                        else:2141                            actual = ascii(m[:])2142                    else:2143                        actual = ascii(m)2144                    self.assertEqual(actual, expected)2145    def test_replacement(self):2146        self.assertEqual(regex.sub("test\?", "result\?\.\a\q\m\n", "test?"),2147          "result\?\.\a\q\m\n")2148        self.assertEqual(regex.sub(r"test\?", "result\?\.\a\q\m\n", "test?"),2149          "result\?\.\a\q\m\n")2150        self.assertEqual(regex.sub('(.)', r"\1\1", 'x'), 'xx')2151        self.assertEqual(regex.sub('(.)', regex.escape(r"\1\1"), 'x'), r"\1\1")2152        self.assertEqual(regex.sub('(.)', r"\\1\\1", 'x'), r"\1\1")2153        self.assertEqual(regex.sub('(.)', lambda m: r"\1\1", 'x'), r"\1\1")2154    def test_common_prefix(self):2155        # Very long common prefix2156        all = string.ascii_lowercase + string.digits + string.ascii_uppercase2157        side = all * 42158        regexp = '(' + side + '|' + side + ')'2159        self.assertEqual(repr(type(regex.compile(regexp))), self.PATTERN_CLASS)2160    def test_captures(self):2161        self.assertEqual(regex.search(r"(\w)+", "abc").captures(1), ['a', 'b',2162          'c'])2163        self.assertEqual(regex.search(r"(\w{3})+", "abcdef").captures(0, 1),2164          (['abcdef'], ['abc', 'def']))2165        self.assertEqual(regex.search(r"^(\d{1,3})(?:\.(\d{1,3})){3}$",2166          "192.168.0.1").captures(1, 2), (['192', ], ['168', '0', '1']))2167        self.assertEqual(regex.match(r"^([0-9A-F]{2}){4} ([a-z]\d){5}$",2168          "3FB52A0C a2c4g3k9d3").captures(1, 2), (['3F', 'B5', '2A', '0C'],2169          ['a2', 'c4', 'g3', 'k9', 'd3']))2170        self.assertEqual(regex.match("([a-z]W)([a-z]X)+([a-z]Y)",2171          "aWbXcXdXeXfY").captures(1, 2, 3), (['aW'], ['bX', 'cX', 'dX', 'eX'],2172          ['fY']))2173        self.assertEqual(regex.search(r".*?(?=(.)+)b", "ab").captures(1),2174          ['b'])2175        self.assertEqual(regex.search(r".*?(?>(.){0,2})d", "abcd").captures(1),2176          ['b', 'c'])2177        self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a'])2178    def test_guards(self):2179        m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:",2180          "XY\nX Y\nX  Y\nXY\nXX AB:")2181        self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18)))2182        m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:",2183          "XY\nX Y\nX  Y\nXY\nXX AB:")2184        self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18)))2185        m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX")2186        self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6)))2187        m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X')2188        self.assertEqual(m.span(0, 1), ((0, 10), (5, 8)))2189        m = regex.search('Derde\s*:', 'aaaaaa:\nDerde:')2190        self.assertEqual(m.span(), (8, 14))2191        m = regex.search('Derde\s*:', 'aaaaa:\nDerde:')2192        self.assertEqual(m.span(), (7, 13))2193    def test_turkic(self):2194        # Turkish has dotted and dotless I/i.2195        pairs = "I=i;I=\u0131;i=\u0130"2196        all_chars = set()2197        matching = set()2198        for pair in pairs.split(";"):2199            ch1, ch2 = pair.split("=")2200            all_chars.update((ch1, ch2))2201            matching.add((ch1, ch1))2202            matching.add((ch1, ch2))2203            matching.add((ch2, ch1))2204            matching.add((ch2, ch2))2205        for ch1 in all_chars:2206            for ch2 in all_chars:2207                m = regex.match(r"(?i)\A" + ch1 + r"\Z", ch2)2208                if m:2209                    if (ch1, ch2) not in matching:2210                        self.fail("{} matching {}".format(ascii(ch1),2211                          ascii(ch2)))2212                else:2213                    if (ch1, ch2) in matching:2214                        self.fail("{} not matching {}".format(ascii(ch1),2215                          ascii(ch2)))2216    def test_named_lists(self):2217        options = ["one", "two", "three"]2218        self.assertEqual(regex.match(r"333\L<bar>444", "333one444",2219          bar=options).group(), "333one444")2220        self.assertEqual(regex.match(r"(?i)333\L<bar>444", "333TWO444",2221          bar=options).group(), "333TWO444")2222        self.assertEqual(regex.match(r"333\L<bar>444", "333four444",2223          bar=options), None)2224        options = [b"one", b"two", b"three"]2225        self.assertEqual(regex.match(br"333\L<bar>444", b"333one444",2226          bar=options).group(), b"333one444")2227        self.assertEqual(regex.match(br"(?i)333\L<bar>444", b"333TWO444",2228          bar=options).group(), b"333TWO444")2229        self.assertEqual(regex.match(br"333\L<bar>444", b"333four444",2230          bar=options), None)2231        self.assertEqual(repr(type(regex.compile(r"3\L<bar>4\L<bar>+5",2232          bar=["one", "two", "three"]))), self.PATTERN_CLASS)2233        self.assertEqual(regex.findall(r"^\L<options>", "solid QWERT",2234          options=set(['good', 'brilliant', '+s\\ol[i}d'])), [])2235        self.assertEqual(regex.findall(r"^\L<options>", "+solid QWERT",2236          options=set(['good', 'brilliant', '+solid'])), ['+solid'])2237        options = ["STRASSE"]2238        self.assertEqual(regex.match(r"(?fi)\L<words>",2239          "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,2240          6))2241        options = ["STRASSE", "stress"]2242        self.assertEqual(regex.match(r"(?fi)\L<words>",2243          "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,2244          6))2245        options = ["stra\N{LATIN SMALL LETTER SHARP S}e"]2246        self.assertEqual(regex.match(r"(?fi)\L<words>", "STRASSE",2247          words=options).span(), (0, 7))2248        options = ["kit"]2249        self.assertEqual(regex.search(r"(?i)\L<words>", "SKITS",2250          words=options).span(), (1, 4))2251        self.assertEqual(regex.search(r"(?i)\L<words>",2252          "SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS",2253          words=options).span(), (1, 4))2254        self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",2255          " stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15))2256        self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",2257          " STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15))2258        self.assertEqual(regex.search(r"^\L<options>$", "", options=[]).span(),2259          (0, 0))2260    def test_fuzzy(self):2261        # Some tests borrowed from TRE library tests.2262        self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))),2263          self.PATTERN_CLASS)2264        self.assertEqual(repr(type(regex.compile('(fuu){s}'))),2265          self.PATTERN_CLASS)2266        self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))),2267          self.PATTERN_CLASS)2268        self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))),2269          self.PATTERN_CLASS)2270        self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))),2271          self.PATTERN_CLASS)2272        self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))),2273          self.PATTERN_CLASS)2274        text = 'molasses anaconda foo bar baz smith anderson '2275        self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text),2276          None)2277        self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}',2278          text).span(0, 1), ((9, 17), (9, 17)))2279        self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None)2280        self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0],2281          "anaconda")2282        self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0],2283          "anaconda")2284        text = 'anaconda foo bar baz smith anderson'2285        self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0,2286          1), ((0, 0), (0, 0)))2287        self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}',2288          text).span(0, 1), ((9, 10), (9, 10)))2289        self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0,2290          1), ((7, 10), (7, 10)))2291        self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}',2292          text).span(0, 1), ((9, 10), (9, 10)))2293        self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1),2294          ((0, 0), (0, 0)))2295        self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0,2296          1), ((9, 10), (9, 10)))2297        self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))),2298          self.PATTERN_CLASS)2299        # No cost limit.2300        self.assertEqual(regex.search('(foobar){e}',2301          'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6)))2302        self.assertEqual(regex.search('(?e)(foobar){e}',2303          'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3)))2304        self.assertEqual(regex.search('(?b)(foobar){e}',2305          'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16)))2306        # At most two errors.2307        self.assertEqual(regex.search('(foobar){e<=2}',2308          'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9)))2309        self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None)2310        # At most two inserts or substitutions and max two errors total.2311        self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}',2312          'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11)))2313        # Find best whole word match for "foobar".2314        self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0,2315          1), ((0, 8), (0, 8)))2316        self.assertEqual(regex.search('\\b(foobar){e}\\b',2317          'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6)))2318        self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b',2319          'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21)))2320        # Match whole string, allow only 1 error.2321        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1),2322          ((0, 6), (0, 6)))2323        self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0,2324          1), ((0, 7), (0, 7)))2325        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0,2326          1), ((0, 7), (0, 7)))2327        self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0,2328          1), ((0, 7), (0, 7)))2329        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1),2330          ((0, 6), (0, 6)))2331        self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1),2332          ((0, 6), (0, 6)))2333        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1),2334          ((0, 6), (0, 6)))2335        self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1),2336          ((0, 5), (0, 5)))2337        self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1),2338          ((0, 5), (0, 5)))2339        self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1),2340          ((0, 5), (0, 5)))2341        self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None)2342        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None)2343        self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None)2344        self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None)2345        self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None)2346        # At most one insert, two deletes, and three substitutions.2347        # Additionally, deletes cost two and substitutes one, and total2348        # cost must be less than 4.2349        self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}',2350          '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6,2351          13)))2352        self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}',2353          '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((34, 39),2354          (34, 39)))2355        # Partially fuzzy matches.2356        self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0,2357          1), ((0, 9), (3, 6)))2358        self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None)2359        self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0,2360          1), ((0, 8), (3, 5)))2361        text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n'2362          'For useful information, use www.slashdot.org\nthis is demo data!\n')2363        self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0,2364          1), ((0, 120), (120, 120)))2365        self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0,2366          1), ((0, 120), (93, 100)))2367        self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1),2368          ((0, 119), (24, 101)))2369        # Behaviour is unexpected, but arguably not wrong. It first finds the2370        # best match, then the best in what follows, etc.2371        self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",2372          " book cot dog desk ", words="cat dog".split()), ["cot", "dog"])2373        self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",2374          " book dog cot desk ", words="cat dog".split()), [" dog", "cot"])2375        self.assertEqual(regex.findall(r"(?e)\b\L<words>{e<=1}\b",2376          " book dog cot desk ", words="cat dog".split()), ["dog", "cot"])2377        self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",2378          " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"])2379        self.assertEqual(regex.findall(r"(?er)\b\L<words>{e<=1}\b",2380          " book cot dog desk ", words="cat dog".split()), ["dog", "cot"])2381        self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",2382          " book dog cot desk ", words="cat dog".split()), ["cot", "dog"])2383        self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",2384          b" book cot dog desk ", words=b"cat dog".split()), [b"cot", b"dog"])2385        self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",2386          b" book dog cot desk ", words=b"cat dog".split()), [b" dog", b"cot"])2387        self.assertEqual(regex.findall(br"(?e)\b\L<words>{e<=1}\b",2388          b" book dog cot desk ", words=b"cat dog".split()), [b"dog", b"cot"])2389        self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",2390          b" book cot dog desk ", words=b"cat dog".split()), [b"dog ", b"cot"])2391        self.assertEqual(regex.findall(br"(?er)\b\L<words>{e<=1}\b",2392          b" book cot dog desk ", words=b"cat dog".split()), [b"dog", b"cot"])2393        self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",2394          b" book dog cot desk ", words=b"cat dog".split()), [b"cot", b"dog"])2395        self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(),2396          ("foo", "fou"))2397        self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)",2398          "foo fou").groups(), ("foo", "fou"))2399        self.assertEqual(regex.search(br"(\w+) (\1{e<=1})",2400          b"foo fou").groups(), (b"foo", b"fou"))2401        self.assertEqual(regex.findall(r"(?:(?:QR)+){e}","abcde"), ["abcde",2402          ""])2403        self.assertEqual(regex.findall(r"(?:Q+){e}","abc"), ["abc", ""])2404        # Hg issue 41: = for fuzzy matches2405        self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2406          "servic detection").span(), (0, 16))2407        self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2408          "service detect").span(), (0, 14))2409        self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2410          "service detecti").span(), (0, 15))2411        self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2412          "service detection"), None)2413        self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2414          "in service detection").span(), (0, 20))2415        # Hg issue 109: Edit distance of fuzzy match2416        self.assertEqual(regex.fullmatch(r"(?:cats|cat){e<=1}",2417          "cat").fuzzy_counts, (0, 0, 1))2418        self.assertEqual(regex.fullmatch(r"(?e)(?:cats|cat){e<=1}",2419          "cat").fuzzy_counts, (0, 0, 0))2420        self.assertEqual(regex.fullmatch(r"(?:cat|cats){e<=1}",2421          "cats").fuzzy_counts, (0, 1, 0))2422        self.assertEqual(regex.fullmatch(r"(?e)(?:cat|cats){e<=1}",2423          "cats").fuzzy_counts, (0, 0, 0))2424        self.assertEqual(regex.fullmatch(r"(?:cat){e<=1} (?:cat){e<=1}",2425          "cat cot").fuzzy_counts, (1, 0, 0))2426    def test_recursive(self):2427        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "xx")[ : ],2428          ("xx", "x", ""))2429        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "aba")[ : ],2430          ("aba", "a", "b"))2431        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "abba")[ : ],2432          ("abba", "a", None))2433        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")[ : ],2434          ("kayak", "k", None))2435        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "paper")[ : ],2436          ("pap", "p", "a"))2437        self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "dontmatchme"),2438          None)2439        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "xx")[ : ],2440          ("xx", "", "x"))2441        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "aba")[ : ],2442          ("aba", "b", "a"))2443        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "abba")[ :2444          ], ("abba", None, "a"))2445        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "kayak")[ :2446          ], ("kayak", None, "k"))2447        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "paper")[ :2448          ], ("pap", "a", "p"))2449        self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)",2450          "dontmatchme"), None)2451        self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", "(ab(cd)ef)")[2452          : ], ("(ab(cd)ef)", "ef"))2453        self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)",2454          "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"])2455        self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",2456          "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab"))2457        self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",2458          "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"])2459        self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)",2460          "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e"))2461        self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)",2462          "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a"))2463        self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))",2464          "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",2465          "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",2466          "bar(baz)+baz(bop)"))2467        self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))",2468          "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",2469          "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",2470          "bar(baz)+baz(bop)"))2471        rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""")2472        self.assertEqual(bool(rgx.search('<foo><bar></bar></foo>')), True)2473        self.assertEqual(bool(rgx.search('<foo><bar></foo></bar>')), False)2474        self.assertEqual(bool(rgx.search('<foo><bar/></foo>')), True)2475        self.assertEqual(bool(rgx.search('<foo><bar></foo>')), False)2476        self.assertEqual(bool(rgx.search('<foo bar=baz/>')), False)2477        self.assertEqual(bool(rgx.search('<foo bar="baz">')), False)2478        self.assertEqual(bool(rgx.search('<foo bar="baz"/>')), True)2479        self.assertEqual(bool(rgx.search('<    fooo   /  >')), True)2480        # The next regex should and does match. Perl 5.14 agrees.2481        #self.assertEqual(bool(rgx.search('<foo/>foo')), False)2482        self.assertEqual(bool(rgx.search('foo<foo/>')), False)2483        self.assertEqual(bool(rgx.search('<foo>foo</foo>')), True)2484        self.assertEqual(bool(rgx.search('<foo><bar/>foo</foo>')), True)2485        self.assertEqual(bool(rgx.search('<a><b><c></c></b></a>')), True)2486    def test_copy(self):2487        # PatternObjects are immutable, therefore there's no need to clone them.2488        r = regex.compile("a")2489        self.assert_(copy.copy(r) is r)2490        self.assert_(copy.deepcopy(r) is r)2491        # MatchObjects are normally mutable because the target string can be2492        # detached. However, after the target string has been detached, a2493        # MatchObject becomes immutable, so there's no need to clone it.2494        m = r.match("a")2495        self.assert_(copy.copy(m) is not m)2496        self.assert_(copy.deepcopy(m) is not m)2497        self.assert_(m.string is not None)2498        m2 = copy.copy(m)2499        m2.detach_string()2500        self.assert_(m.string is not None)2501        self.assert_(m2.string is None)2502        # The following behaviour matches that of the re module.2503        it = regex.finditer(".", "ab")2504        it2 = copy.copy(it)2505        self.assertEqual(next(it).group(), "a")2506        self.assertEqual(next(it2).group(), "b")2507        # The following behaviour matches that of the re module.2508        it = regex.finditer(".", "ab")2509        it2 = copy.deepcopy(it)2510        self.assertEqual(next(it).group(), "a")2511        self.assertEqual(next(it2).group(), "b")2512        # The following behaviour is designed to match that of copying 'finditer'.2513        it = regex.splititer(" ", "a b")2514        it2 = copy.copy(it)2515        self.assertEqual(next(it), "a")2516        self.assertEqual(next(it2), "b")2517        # The following behaviour is designed to match that of copying 'finditer'.2518        it = regex.splititer(" ", "a b")2519        it2 = copy.deepcopy(it)2520        self.assertEqual(next(it), "a")2521        self.assertEqual(next(it2), "b")2522    def test_format(self):2523        self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}",2524          "foo bar"), "foo bar => bar foo")2525        self.assertEqual(regex.subf(r"(?<word1>\w+) (?<word2>\w+)",2526          "{word2} {word1}", "foo bar"), "bar foo")2527        self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}",2528          "foo bar"), ("foo bar => bar foo", 1))2529        self.assertEqual(regex.subfn(r"(?<word1>\w+) (?<word2>\w+)",2530          "{word2} {word1}", "foo bar"), ("bar foo", 1))2531        self.assertEqual(regex.match(r"(\w+) (\w+)",2532          "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo")2533    def test_fullmatch(self):2534        self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True)2535        self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False)2536        self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True)2537        self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True)2538        self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False)2539        self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1,2540          endpos=4)), True)2541        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True)2542        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False)2543        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)),2544          True)2545        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)),2546          True)2547        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)),2548          False)2549        self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1,2550          endpos=4)), True)2551    def test_issue_18468(self):2552        # Applies only after Python 3.4 for compatibility with re.2553        if (sys.version_info.major, sys.version_info.minor) < (3, 4):2554            return2555        self.assertTypedEqual(regex.sub('y', 'a', 'xyz'), 'xaz')2556        self.assertTypedEqual(regex.sub('y', StrSubclass('a'),2557          StrSubclass('xyz')), 'xaz')2558        self.assertTypedEqual(regex.sub(b'y', b'a', b'xyz'), b'xaz')2559        self.assertTypedEqual(regex.sub(b'y', BytesSubclass(b'a'),2560          BytesSubclass(b'xyz')), b'xaz')2561        self.assertTypedEqual(regex.sub(b'y', bytearray(b'a'),2562          bytearray(b'xyz')), b'xaz')2563        self.assertTypedEqual(regex.sub(b'y', memoryview(b'a'),2564          memoryview(b'xyz')), b'xaz')2565        for string in ":a:b::c", StrSubclass(":a:b::c"):2566            self.assertTypedEqual(regex.split(":", string), ['', 'a', 'b', '',2567              'c'])2568            self.assertTypedEqual(regex.split(":*", string), ['', 'a', 'b',2569              'c'])2570            self.assertTypedEqual(regex.split("(:*)", string), ['', ':', 'a',2571              ':', 'b', '::', 'c'])2572        for string in (b":a:b::c", BytesSubclass(b":a:b::c"),2573          bytearray(b":a:b::c"), memoryview(b":a:b::c")):2574            self.assertTypedEqual(regex.split(b":", string), [b'', b'a', b'b',2575              b'', b'c'])2576            self.assertTypedEqual(regex.split(b":*", string), [b'', b'a', b'b',2577              b'c'])2578            self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':',2579              b'a', b':', b'b', b'::', b'c'])2580        for string in "a:b::c:::d", StrSubclass("a:b::c:::d"):2581            self.assertTypedEqual(regex.findall(":+", string), [":", "::",2582              ":::"])2583            self.assertTypedEqual(regex.findall("(:+)", string), [":", "::",2584              ":::"])2585            self.assertTypedEqual(regex.findall("(:)(:*)", string), [(":", ""),2586              (":", ":"), (":", "::")])2587        for string in (b"a:b::c:::d", BytesSubclass(b"a:b::c:::d"),2588          bytearray(b"a:b::c:::d"), memoryview(b"a:b::c:::d")):2589            self.assertTypedEqual(regex.findall(b":+", string), [b":", b"::",2590              b":::"])2591            self.assertTypedEqual(regex.findall(b"(:+)", string), [b":", b"::",2592              b":::"])2593            self.assertTypedEqual(regex.findall(b"(:)(:*)", string), [(b":",2594              b""), (b":", b":"), (b":", b"::")])2595        for string in 'a', StrSubclass('a'):2596            self.assertEqual(regex.match('a', string).groups(), ())2597            self.assertEqual(regex.match('(a)', string).groups(), ('a',))2598            self.assertEqual(regex.match('(a)', string).group(0), 'a')2599            self.assertEqual(regex.match('(a)', string).group(1), 'a')2600            self.assertEqual(regex.match('(a)', string).group(1, 1), ('a',2601              'a'))2602        for string in (b'a', BytesSubclass(b'a'), bytearray(b'a'),2603          memoryview(b'a')):2604            self.assertEqual(regex.match(b'a', string).groups(), ())2605            self.assertEqual(regex.match(b'(a)', string).groups(), (b'a',))2606            self.assertEqual(regex.match(b'(a)', string).group(0), b'a')2607            self.assertEqual(regex.match(b'(a)', string).group(1), b'a')2608            self.assertEqual(regex.match(b'(a)', string).group(1, 1), (b'a',2609              b'a'))2610    def test_partial(self):2611        self.assertEqual(regex.match('ab', 'a', partial=True).partial, True)2612        self.assertEqual(regex.match('ab', 'a', partial=True).span(), (0, 1))2613        self.assertEqual(regex.match(r'cats', 'cat', partial=True).partial,2614          True)2615        self.assertEqual(regex.match(r'cats', 'cat', partial=True).span(), (0,2616          3))2617        self.assertEqual(regex.match(r'cats', 'catch', partial=True), None)2618        self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',2619          partial=True).partial, False)2620        self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',2621          partial=True).span(), (0, 6))2622        self.assertEqual(regex.match(r'abc\w{3}', 'abcde',2623          partial=True).partial, True)2624        self.assertEqual(regex.match(r'abc\w{3}', 'abcde',2625          partial=True).span(), (0, 5))2626        self.assertEqual(regex.match(r'\d{4}$', '1234', partial=True).partial,2627          False)2628        self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,2629          words=['post']).partial, False)2630        self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,2631          words=['post']).span(), (0, 4))2632        self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,2633          words=['post']).partial, True)2634        self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,2635          words=['post']).span(), (0, 3))2636        self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,2637          words=['po\uFB06']).partial, False)2638        self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,2639          words=['po\uFB06']).span(), (0, 4))2640        self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,2641          words=['po\uFB06']).partial, True)2642        self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,2643          words=['po\uFB06']).span(), (0, 3))2644        self.assertEqual(regex.match(r'(?fi)\L<words>', 'po\uFB06',2645          partial=True, words=['POS']), None)2646        self.assertEqual(regex.match(r'[a-z]*4R$', 'a', partial=True).span(),2647          (0, 1))2648        self.assertEqual(regex.match(r'[a-z]*4R$', 'ab', partial=True).span(),2649          (0, 2))2650        self.assertEqual(regex.match(r'[a-z]*4R$', 'ab4', partial=True).span(),2651          (0, 3))2652        self.assertEqual(regex.match(r'[a-z]*4R$', 'a4', partial=True).span(),2653          (0, 2))2654        self.assertEqual(regex.match(r'[a-z]*4R$', 'a4R', partial=True).span(),2655          (0, 3))2656        self.assertEqual(regex.match(r'[a-z]*4R$', '4a', partial=True), None)2657        self.assertEqual(regex.match(r'[a-z]*4R$', 'a44', partial=True), None)2658    def test_hg_bugs(self):2659        # Hg issue 28: regex.compile("(?>b)") causes "TypeError: 'Character'2660        # object is not subscriptable"2661        self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True)2662        # Hg issue 29: regex.compile("^((?>\w+)|(?>\s+))*$") causes2663        # "TypeError: 'GreedyRepeat' object is not iterable"2664        self.assertEqual(bool(regex.compile(r"^((?>\w+)|(?>\s+))*$",2665          flags=regex.V1)), True)2666        # Hg issue 31: atomic and normal groups in recursive patterns2667        self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2668          "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])2669        self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)",2670          "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])2671        self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2672          "a(b(cd)e)f)g)h"), ['(b(cd)e)'])2673        self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2674          "a(bc(d(e)f)gh"), ['(d(e)f)'])2675        self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)",2676          "a(bc(d(e)f)gh"), ['(d(e)f)'])2677        self.assertEqual([m.group() for m in2678          regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")],2679          ['(c(de)fg)'])2680        # Hg issue 32: regex.search("a(bc)d", "abcd", regex.I|regex.V1) returns2681        # None2682        self.assertEqual(regex.search("a(bc)d", "abcd", regex.I |2683          regex.V1).group(0), "abcd")2684        # Hg issue 33: regex.search("([\da-f:]+)$", "E", regex.I|regex.V1)2685        # returns None2686        self.assertEqual(regex.search("([\da-f:]+)$", "E", regex.I |2687          regex.V1).group(0), "E")2688        self.assertEqual(regex.search("([\da-f:]+)$", "e", regex.I |2689          regex.V1).group(0), "e")2690        # Hg issue 34: regex.search("^(?=ab(de))(abd)(e)", "abde").groups()2691        # returns (None, 'abd', 'e') instead of ('de', 'abd', 'e')2692        self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(),2693          ('de', 'abd', 'e'))2694        # Hg issue 35: regex.compile("\ ", regex.X) causes "_regex_core.error:2695        # bad escape"2696        self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True)2697        # Hg issue 36: regex.search("^(a|)\1{2}b", "b") returns None2698        self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b',2699          ''))2700        # Hg issue 37: regex.search("^(a){0,0}", "abc").group(0,1) returns2701        # ('a', 'a') instead of ('', None)2702        self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('',2703          None))2704        # Hg issue 38: regex.search("(?>.*/)b", "a/b") returns None2705        self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b")2706        # Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't2707        # return None2708        self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1",2709          "blah BLAH").group(0, 1), ("blah BLAH", "blah"))2710        self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"),2711          None)2712        # Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0)2713        # returns "bcd" instead of "abcd"2714        self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)",2715          "(abcd").group(0), "abcd")2716        # Hg issue 42: regex.search("(a*)*", "a", flags=regex.V1).span(1)2717        # returns (0, 1) instead of (1, 1)2718        self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1))2719        self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2))2720        self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3))2721        # Hg issue 43: regex.compile("a(?#xxx)*") causes "_regex_core.error:2722        # nothing to repeat"2723        self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa")2724        # Hg issue 44: regex.compile("(?=abc){3}abc") causes2725        # "_regex_core.error: nothing to repeat"2726        self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0,2727          3))2728        # Hg issue 45: regex.compile("^(?:a(?:(?:))+)+") causes2729        # "_regex_core.error: nothing to repeat"2730        self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1))2731        self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2))2732        # Hg issue 46: regex.compile("a(?x: b c )d") causes2733        # "_regex_core.error: missing )"2734        self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd")2735        # Hg issue 47: regex.compile("a#comment\n*", flags=regex.X) causes2736        # "_regex_core.error: nothing to repeat"2737        self.assertEqual(regex.search("a#comment\n*", "aaa",2738          flags=regex.X).group(0), "aaa")2739        # Hg issue 48: regex.search("(a(?(1)\\1)){4}", "a"*10,2740        # flags=regex.V1).group(0,1) returns ('aaaaa', 'a') instead of ('aaaaaaaaaa', 'aaaa')2741        self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}",2742          "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1)))2743        self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}",2744          "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3)))2745        self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}",2746          "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6)))2747        self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}",2748          "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10)))2749        # Hg issue 49: regex.search("(a)(?<=b(?1))", "baz", regex.V1) returns2750        # None incorrectly2751        self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0),2752          "a")2753        # Hg issue 50: not all keywords are found by named list with2754        # overlapping keywords when full Unicode casefolding is required2755        self.assertEqual(regex.findall(r'(?fi)\L<keywords>',2756          'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05',2757          keywords=['post','pos']), ['POST', 'Post', 'post', 'po\u017Ft',2758          'po\uFB06', 'po\uFB05'])2759        self.assertEqual(regex.findall(r'(?fi)pos|post',2760          'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POS',2761          'Pos', 'pos', 'po\u017F', 'po\uFB06', 'po\uFB05'])2762        self.assertEqual(regex.findall(r'(?fi)post|pos',2763          'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',2764          'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])2765        self.assertEqual(regex.findall(r'(?fi)post|another',2766          'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',2767          'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])2768        # Hg issue 51: regex.search("((a)(?1)|(?2))", "a", flags=regex.V1)2769        # returns None incorrectly2770        self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1,2771          2), ('a', 'a', None))2772        # Hg issue 52: regex.search("(\\1xx|){6}", "xx",2773        # flags=regex.V1).span(0,1) returns incorrect value2774        self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1),2775          ((0, 2), (2, 2)))2776        # Hg issue 53: regex.search("(a|)+", "a") causes MemoryError2777        self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", ""))2778        # Hg issue 54: regex.search("(a|)*\\d", "a"*80) causes MemoryError2779        self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None)2780        # Hg issue 55: regex.search("^(?:a?b?)*$", "ac") take a very long time.2781        self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None)2782        # Hg issue 58: bad named character escape sequences like "\\N{1}"2783        # treats as "N"2784        self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda:2785          regex.compile("\\N{1}"))2786        # Hg issue 59: regex.search("\\Z", "a\na\n") returns None incorrectly2787        self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4))2788        # Hg issue 60: regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", "xayxay")2789        # returns None incorrectly2790        self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}",2791          "xayxay").group(0), "xayxay")2792        # Hg issue 61: regex.search("[^a]", "A", regex.I).group(0) returns ''2793        # incorrectly2794        self.assertEqual(regex.search("(?i)[^a]", "A"), None)2795        # Hg issue 63: regex.search("[[:ascii:]]", "\N{KELVIN SIGN}",2796        # flags=regex.I|regex.V1) doesn't return None2797        self.assertEqual(regex.search("(?i)[[:ascii:]]", "\N{KELVIN SIGN}"),2798          None)2799        # Hg issue 66: regex.search("((a|b(?1)c){3,5})", "baaaaca",2800        # flags=regex.V1).groups() returns ('baaaac', 'baaaac') instead of ('aaaa', 'a')2801        self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0,2802          1, 2), ('aaaa', 'aaaa', 'a'))2803        # Hg issue 71: non-greedy quantifier in lookbehind2804        self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"),2805          ['abc', 'def'])2806        self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"),2807          ['abc', 'def'])2808        self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"),2809          ['abc', 'def'])2810        self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"),2811          ['abc', 'def'])2812        # Hg issue 73: conditional patterns2813        self.assertEqual(regex.search(r"(?:fe)?male", "female").group(),2814          "female")2815        self.assertEqual([m.group() for m in2816          regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)",2817          "female: her dog; male: his cat. asdsasda")], ['female: her dog',2818          'male: his cat'])2819        # Hg issue 78: "Captures"doesn't work for recursive calls2820        self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))',2821          'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)',2822          '(((1+0)+1)+1)'])2823        # Hg issue 80: Escape characters throws an exception2824        self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda:2825          regex.sub('x', '\\', 'x'), )2826        # Hg issue 82: error range does not work2827        fz = "(CAGCCTCCCATTTCAGAATATACATCC){1<e<=2}"2828        seq = "tcagacgagtgcgttgtaaaacgacggccagtCAGCCTCCCATTCAGAATATACATCCcgacggccagttaaaaacaatgccaaggaggtcatagctgtttcctgccagttaaaaacaatgccaaggaggtcatagctgtttcctgacgcactcgtctgagcgggctggcaagg"2829        self.assertEqual(regex.search(fz, seq, regex.BESTMATCH)[0],2830          "tCAGCCTCCCATTCAGAATATACATCC")2831        # Hg issue 83: slash handling in presence of a quantifier2832        self.assertEqual(regex.findall(r"c..+/c", "cA/c\ncAb/c"), ['cAb/c'])2833        # Hg issue 85: Non-conformance to Unicode UAX#29 re: ZWJ / ZWNJ2834        self.assertEqual(ascii(regex.sub(r"(\w+)", r"[\1]",2835          '\u0905\u0928\u094d\u200d\u0928 \u0d28\u0d4d\u200d \u0915\u093f\u0928',2836          regex.WORD)),2837          ascii('[\u0905\u0928\u094d\u200d\u0928] [\u0d28\u0d4d\u200d] [\u0915\u093f\u0928]'))2838        # Hg issue 88: regex.match() hangs2839        self.assertEqual(regex.match(r".*a.*ba.*aa", "ababba"), None)2840        # Hg issue 87: Allow duplicate names of groups2841        self.assertEqual(regex.match(r'(?<x>a(?<x>b))', "ab").spans("x"), [(1,2842          2), (0, 2)])2843        # Hg issue 91: match.expand is extremely slow2844        # Check that the replacement cache works.2845        self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'),2846          'axbxc')2847        # Hg issue 94: Python crashes when executing regex updates2848        # pattern.findall2849        rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1)2850        self.assertEqual(rx.search("Some text"), None)2851        self.assertEqual(rx.findall("Some text"), [])2852        # Hg issue 95: 'pos' for regex.error2853        self.assertRaisesRegex(regex.error, self.MULTIPLE_REPEAT, lambda:2854          regex.compile(r'.???'))2855        # Hg issue 97: behaviour of regex.escape's special_only is wrong2856        #2857        # Hg issue 244: Make `special_only=True` the default in2858        # `regex.escape()`2859        self.assertEqual(regex.escape('foo!?', special_only=False), 'foo\\!\\?')2860        self.assertEqual(regex.escape('foo!?', special_only=True), 'foo!\\?')2861        self.assertEqual(regex.escape('foo!?'), 'foo!\\?')2862        self.assertEqual(regex.escape(b'foo!?', special_only=False), b'foo\\!\\?')2863        self.assertEqual(regex.escape(b'foo!?', special_only=True),2864          b'foo!\\?')2865        self.assertEqual(regex.escape(b'foo!?'), b'foo!\\?')2866        # Hg issue 100: strange results from regex.search2867        self.assertEqual(regex.search('^([^z]*(?:WWWi|W))?$',2868          'WWWi').groups(), ('WWWi', ))2869        self.assertEqual(regex.search('^([^z]*(?:WWWi|w))?$',2870          'WWWi').groups(), ('WWWi', ))2871        self.assertEqual(regex.search('^([^z]*?(?:WWWi|W))?$',2872          'WWWi').groups(), ('WWWi', ))2873        # Hg issue 101: findall() broken (seems like memory corruption)2874        pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE)2875        self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])2876        self.assertEqual(pat.findall('yxxx'), ['xxx'])2877        raw = 'yxxx'2878        self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])2879        self.assertEqual(pat.findall(raw), ['xxx'])2880        pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE |2881          regex.UNICODE)2882        self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])2883        self.assertEqual(pat.findall('yxxx'), ['xxx'])2884        raw = 'yxxx'2885        self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])2886        self.assertEqual(pat.findall(raw), ['xxx'])2887        # Hg issue 106: * operator not working correctly with sub()2888        self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'x')2889        self.assertEqual(regex.sub('(?V1).*', 'x', 'test'), 'xx')2890        self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|')2891        self.assertEqual(regex.sub('(?V1).*?', '|', 'test'), '|||||||||')2892        # Hg issue 112: re: OK, but regex: SystemError2893        self.assertEqual(regex.sub(r'^(@)\n(?!.*?@)(.*)',2894          r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n')2895        # Hg issue 109: Edit distance of fuzzy match2896        self.assertEqual(regex.match(r'(?:cats|cat){e<=1}',2897         'caz').fuzzy_counts, (1, 0, 0))2898        self.assertEqual(regex.match(r'(?e)(?:cats|cat){e<=1}',2899          'caz').fuzzy_counts, (1, 0, 0))2900        self.assertEqual(regex.match(r'(?b)(?:cats|cat){e<=1}',2901          'caz').fuzzy_counts, (1, 0, 0))2902        self.assertEqual(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts,2903          (1, 0, 0))2904        self.assertEqual(regex.match(r'(?e)(?:cat){e<=1}',2905          'caz').fuzzy_counts, (1, 0, 0))2906        self.assertEqual(regex.match(r'(?b)(?:cat){e<=1}',2907          'caz').fuzzy_counts, (1, 0, 0))2908        self.assertEqual(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts,2909          (1, 1, 0))2910        self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',2911          'c ats').fuzzy_counts, (0, 1, 0))2912        self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',2913          'c ats').fuzzy_counts, (0, 1, 0))2914        self.assertEqual(regex.match(r'(?:cats){e<=2}',2915          'c a ts').fuzzy_counts, (0, 2, 0))2916        self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',2917          'c a ts').fuzzy_counts, (0, 2, 0))2918        self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',2919          'c a ts').fuzzy_counts, (0, 2, 0))2920        self.assertEqual(regex.match(r'(?:cats){e<=1}', 'c ats').fuzzy_counts,2921          (0, 1, 0))2922        self.assertEqual(regex.match(r'(?e)(?:cats){e<=1}',2923          'c ats').fuzzy_counts, (0, 1, 0))2924        self.assertEqual(regex.match(r'(?b)(?:cats){e<=1}',2925          'c ats').fuzzy_counts, (0, 1, 0))2926        # Hg issue 115: Infinite loop when processing backreferences2927        self.assertEqual(regex.findall(r'\bof ([a-z]+) of \1\b',2928          'To make use of one of these modules'), [])2929        # Hg issue 125: Reference to entire match (\g<0>) in2930        # Pattern.sub() doesn't work as of 2014.09.22 release.2931        self.assertEqual(regex.sub(r'x', r'\g<0>', 'x'), 'x')2932        # Unreported issue: no such builtin as 'ascii' in Python 2.2933        self.assertEqual(bool(regex.match(r'a', 'a', regex.DEBUG)), True)2934        # Hg issue 131: nested sets behaviour2935        self.assertEqual(regex.findall(r'(?V1)[[b-e]--cd]', 'abcdef'), ['b',2936          'e'])2937        self.assertEqual(regex.findall(r'(?V1)[b-e--cd]', 'abcdef'), ['b',2938          'e'])2939        self.assertEqual(regex.findall(r'(?V1)[[bcde]--cd]', 'abcdef'), ['b',2940          'e'])2941        self.assertEqual(regex.findall(r'(?V1)[bcde--cd]', 'abcdef'), ['b',2942          'e'])2943        # Hg issue 132: index out of range on null property \p{}2944        self.assertRaisesRegex(regex.error, '^unknown property at position 4$',2945          lambda: regex.compile(r'\p{}'))2946        # Issue 23692.2947        self.assertEqual(regex.match('(?:()|(?(1)()|z)){2}(?(2)a|z)',2948          'a').group(0, 1, 2), ('a', '', ''))2949        self.assertEqual(regex.match('(?:()|(?(1)()|z)){0,2}(?(2)a|z)',2950          'a').group(0, 1, 2), ('a', '', ''))2951        # Hg issue 137: Posix character class :punct: does not seem to be2952        # supported.2953        # Posix compatibility as recommended here:2954        # http://www.unicode.org/reports/tr18/#Compatibility_Properties2955        # Posix in Unicode.2956        chars = ''.join(chr(c) for c in range(0x10000))2957        self.assertEqual(ascii(''.join(regex.findall(r'''[[:alnum:]]+''',2958          chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{PosixDigit}]+''',2959          chars))))2960        self.assertEqual(ascii(''.join(regex.findall(r'''[[:alpha:]]+''',2961          chars))), ascii(''.join(regex.findall(r'''\p{Alpha}+''',2962          chars))))2963        self.assertEqual(ascii(''.join(regex.findall(r'''[[:ascii:]]+''',2964          chars))), ascii(''.join(regex.findall(r'''[\p{InBasicLatin}]+''',2965          chars))))2966        self.assertEqual(ascii(''.join(regex.findall(r'''[[:blank:]]+''',2967          chars))), ascii(''.join(regex.findall(r'''[\p{gc=Space_Separator}\t]+''',2968          chars))))2969        self.assertEqual(ascii(''.join(regex.findall(r'''[[:cntrl:]]+''',2970          chars))), ascii(''.join(regex.findall(r'''\p{gc=Control}+''', chars))))2971        self.assertEqual(ascii(''.join(regex.findall(r'''[[:digit:]]+''',2972          chars))), ascii(''.join(regex.findall(r'''[0-9]+''', chars))))2973        self.assertEqual(ascii(''.join(regex.findall(r'''[[:graph:]]+''',2974          chars))), ascii(''.join(regex.findall(r'''[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''',2975          chars))))2976        self.assertEqual(ascii(''.join(regex.findall(r'''[[:lower:]]+''',2977          chars))), ascii(''.join(regex.findall(r'''\p{Lower}+''',2978          chars))))2979        self.assertEqual(ascii(''.join(regex.findall(r'''[[:print:]]+''',2980          chars))), ascii(''.join(regex.findall(r'''(?V1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))2981        self.assertEqual(ascii(''.join(regex.findall(r'''[[:punct:]]+''',2982          chars))),2983          ascii(''.join(regex.findall(r'''(?V1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',2984          chars))))2985        self.assertEqual(ascii(''.join(regex.findall(r'''[[:space:]]+''',2986          chars))), ascii(''.join(regex.findall(r'''\p{Whitespace}+''',2987          chars))))2988        self.assertEqual(ascii(''.join(regex.findall(r'''[[:upper:]]+''',2989          chars))), ascii(''.join(regex.findall(r'''\p{Upper}+''',2990          chars))))2991        self.assertEqual(ascii(''.join(regex.findall(r'''[[:word:]]+''',2992          chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''',2993          chars))))2994        self.assertEqual(ascii(''.join(regex.findall(r'''[[:xdigit:]]+''',2995          chars))), ascii(''.join(regex.findall(r'''[0-9A-Fa-f]+''',2996          chars))))2997        # Posix in ASCII.2998        chars = bytes(range(0x100))2999        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alnum:]]+''',3000          chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{PosixDigit}]+''',3001          chars))))3002        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alpha:]]+''',3003          chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Alpha}+''', chars))))3004        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:ascii:]]+''',3005          chars))), ascii(b''.join(regex.findall(br'''(?a)[\x00-\x7F]+''', chars))))3006        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:blank:]]+''',3007          chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{gc=Space_Separator}\t]+''',3008          chars))))3009        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:cntrl:]]+''',3010          chars))), ascii(b''.join(regex.findall(br'''(?a)\p{gc=Control}+''',3011          chars))))3012        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:digit:]]+''',3013          chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9]+''', chars))))3014        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:graph:]]+''',3015          chars))), ascii(b''.join(regex.findall(br'''(?a)[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''', chars))))3016        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:lower:]]+''',3017          chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Lower}+''', chars))))3018        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:print:]]+''',3019          chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))3020        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:punct:]]+''',3021          chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',3022          chars))))3023        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:space:]]+''',3024          chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Whitespace}+''', chars))))3025        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:upper:]]+''',3026          chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Upper}+''', chars))))3027        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:word:]]+''',3028          chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''', chars))))3029        self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:xdigit:]]+''',3030          chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9A-Fa-f]+''', chars))))3031        # Hg issue 138: grapheme anchored search not working properly.3032        self.assertEqual(ascii(regex.search(r'\X$', 'ab\u2103').group()),3033          ascii('\u2103'))3034        # Hg issue 139: Regular expression with multiple wildcards where first3035        # should match empty string does not always work.3036        self.assertEqual(regex.search("([^L]*)([^R]*R)", "LtR").groups(), ('',3037          'LtR'))3038        # Hg issue 140: Replace with REVERSE and groups has unexpected3039        # behavior.3040        self.assertEqual(regex.sub(r'(.)', r'x\1y', 'ab'), 'xayxby')3041        self.assertEqual(regex.sub(r'(?r)(.)', r'x\1y', 'ab'), 'xayxby')3042        self.assertEqual(regex.subf(r'(.)', 'x{1}y', 'ab'), 'xayxby')3043        self.assertEqual(regex.subf(r'(?r)(.)', 'x{1}y', 'ab'), 'xayxby')3044        # Hg issue 141: Crash on a certain partial match.3045        self.assertEqual(regex.fullmatch('(a)*abc', 'ab',3046          partial=True).span(), (0, 2))3047        self.assertEqual(regex.fullmatch('(a)*abc', 'ab',3048          partial=True).partial, True)3049        # Hg issue 143: Partial matches have incorrect span if prefix is '.'3050        # wildcard.3051        self.assertEqual(regex.search('OXRG', 'OOGOX', partial=True).span(),3052          (3, 5))3053        self.assertEqual(regex.search('.XRG', 'OOGOX', partial=True).span(),3054          (3, 5))3055        self.assertEqual(regex.search('.{1,3}XRG', 'OOGOX',3056          partial=True).span(), (1, 5))3057        # Hg issue 144: Latest version problem with matching 'R|R'.3058        self.assertEqual(regex.match('R|R', 'R').span(), (0, 1))3059        # Hg issue 146: Forced-fail (?!) works improperly in conditional.3060        self.assertEqual(regex.match(r'(.)(?(1)(?!))', 'xy'), None)3061        # Groups cleared after failure.3062        self.assertEqual(regex.findall(r'(y)?(\d)(?(1)\b\B)', 'ax1y2z3b'),3063          [('', '1'), ('', '2'), ('', '3')])3064        self.assertEqual(regex.findall(r'(y)?+(\d)(?(1)\b\B)', 'ax1y2z3b'),3065          [('', '1'), ('', '2'), ('', '3')])3066        # Hg issue 147: Fuzzy match can return match points beyond buffer end.3067        self.assertEqual([m.span() for m in3068          regex.finditer(r'(?i)(?:error){e}', 'regex failure')], [(0, 5), (5,3069          10), (10, 13), (13, 13)])3070        self.assertEqual([m.span() for m in3071          regex.finditer(r'(?fi)(?:error){e}', 'regex failure')], [(0, 5), (5,3072          10), (10, 13), (13, 13)])3073        # Hg issue 150: Have an option for POSIX-compatible longest match of3074        # alternates.3075        self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',3076          '10b12')[0], '10b12')3077        self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',3078          '10E+12')[0], '10E+12')3079        self.assertEqual(regex.search(r'(?p)(\w|ae|oe|ue|ss)', 'ae')[0], 'ae')3080        self.assertEqual(regex.search(r'(?p)one(self)?(selfsufficient)?',3081          'oneselfsufficient')[0], 'oneselfsufficient')3082        # Hg issue 151: Request: \K.3083        self.assertEqual(regex.search(r'(ab\Kcd)', 'abcd').group(0, 1), ('cd',3084          'abcd'))3085        self.assertEqual(regex.findall(r'\w\w\K\w\w', 'abcdefgh'), ['cd',3086          'gh'])3087        self.assertEqual(regex.findall(r'(\w\w\K\w\w)', 'abcdefgh'), ['abcd',3088          'efgh'])3089        self.assertEqual(regex.search(r'(?r)(ab\Kcd)', 'abcd').group(0, 1),3090          ('ab', 'abcd'))3091        self.assertEqual(regex.findall(r'(?r)\w\w\K\w\w', 'abcdefgh'), ['ef',3092          'ab'])3093        self.assertEqual(regex.findall(r'(?r)(\w\w\K\w\w)', 'abcdefgh'),3094          ['efgh', 'abcd'])3095        # Hg issue 152: Request: Request: (?(DEFINE)...).3096        self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)',3097          '5 elephants')[0], '5 elephants')3098        # Hg issue 153: Request: (*SKIP).3099        self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3')3100        self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3')3101        self.assertEqual(regex.search(r'\d+(*PRUNE)\d', '123'), None)3102        self.assertEqual(regex.search(r'\d+(?=(*PRUNE))\d', '123')[0], '123')3103        self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123bcd')[0],3104          '123bcd')3105        self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123zzd')[0],3106          'd')3107        self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123bcd')[0],3108          '3bcd')3109        self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123zzd')[0],3110          'd')3111        self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',3112          '123zzd')[0], '123zzd')3113        self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',3114          '124zzd')[0], 'd')3115        self.assertEqual(regex.search(r'\d++(?<=(*PRUNE)3)zzd|[4d]$',3116          '124zzd')[0], 'd')3117        self.assertEqual(regex.search(r'\d++(?<=2(*PRUNE)3)zzd|[3d]$',3118          '124zzd')[0], 'd')3119        self.assertEqual(regex.search(r'(?r)\d(*PRUNE)\d+', '123'), None)3120        self.assertEqual(regex.search(r'(?r)\d(?<=(*PRUNE))\d+', '123')[0],3121          '123')3122        self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',3123          '123bcd')[0], '123bcd')3124        self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',3125          '123zzd')[0], 'd')3126        self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',3127          '123zzd')[0], '123zzd')3128        self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',3129          '124zzd')[0], 'd')3130        self.assertEqual(regex.search(r'(?r)\d++(?<=(*PRUNE)3)zzd|[4d]$',3131          '124zzd')[0], 'd')3132        self.assertEqual(regex.search(r'(?r)\d++(?<=2(*PRUNE)3)zzd|[3d]$',3133          '124zzd')[0], 'd')3134        self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123bcd')[0],3135          '123bcd')3136        self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123zzd')[0],3137          'd')3138        self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123bcd')[0],3139          '3bcd')3140        self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123zzd')[0],3141          'd')3142        self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',3143          '123zzd')[0], '123zzd')3144        self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',3145          '124zzd')[0], 'd')3146        self.assertEqual(regex.search(r'\d++(?<=(*SKIP)3)zzd|[4d]$',3147          '124zzd')[0], 'd')3148        self.assertEqual(regex.search(r'\d++(?<=2(*SKIP)3)zzd|[3d]$',3149          '124zzd')[0], 'd')3150        self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123bcd')[0],3151          '123bcd')3152        self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123zzd')[0],3153          'd')3154        self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',3155          '123zzd')[0], '123zzd')3156        self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',3157          '124zzd')[0], 'd')3158        self.assertEqual(regex.search(r'(?r)\d++(?<=(*SKIP)3)zzd|[4d]$',3159          '124zzd')[0], 'd')3160        self.assertEqual(regex.search(r'(?r)\d++(?<=2(*SKIP)3)zzd|[3d]$',3161          '124zzd')[0], 'd')3162        # Hg issue 154: Segmentation fault 11 when working with an atomic group3163        text = """June 30, December 31, 2013 20123164some words follow:3165more words and numbers 1,234,567 9,876,5423166more words and numbers 1,234,567 9,876,542"""3167        self.assertEqual(len(regex.findall(r'(?<!\d)(?>2014|2013 ?2012)', text)), 1)3168        # Hg issue 156: regression on atomic grouping3169        self.assertEqual(regex.match('1(?>2)', '12').span(), (0, 2))3170        # Hg issue 157: regression: segfault on complex lookaround3171        self.assertEqual(regex.match(r'(?V1w)(?=(?=[^A-Z]*+[A-Z])(?=[^a-z]*+[a-z]))(?=\D*+\d)(?=\p{Alphanumeric}*+\P{Alphanumeric})\A(?s:.){8,255}+\Z',3172          'AAaa11!!')[0], 'AAaa11!!')3173        # Hg issue 158: Group issue with (?(DEFINE)...)3174        TEST_REGEX = regex.compile(r'''(?smx)3175(?(DEFINE)3176  (?<subcat>3177   ^,[^,]+,3178   )3179)3180# Group 2 is defined on this line3181^,([^,]+),3182(?:(?!(?&subcat)[\r\n]+(?&subcat)).)+3183''')3184        TEST_DATA = '''3185,Cat 1,3186,Brand 1,3187some3188thing3189,Brand 2,3190other3191things3192,Cat 2,3193,Brand,3194Some3195thing3196'''3197        self.assertEqual([m.span(1, 2) for m in3198          TEST_REGEX.finditer(TEST_DATA)], [((-1, -1), (2, 7)), ((-1, -1), (54,3199          59))])3200        # Hg issue 161: Unexpected fuzzy match results3201        self.assertEqual(regex.search('(abcdefgh){e}',3202          '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 14))3203        self.assertEqual(regex.search('(abcdefghi){e}',3204          '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 15))3205        # Hg issue 163: allow lookarounds in conditionals.3206        self.assertEqual(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc').span(),3207          (0, 6))3208        self.assertEqual(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'), None)3209        self.assertEqual(regex.search(r'(?(?<=love\s)you|(?<=hate\s)her)',3210          "I love you").span(), (7, 10))3211        self.assertEqual(regex.findall(r'(?(?<=love\s)you|(?<=hate\s)her)',3212          "I love you but I don't hate her either"), ['you', 'her'])3213        # Hg issue 180: bug of POSIX matching.3214        self.assertEqual(regex.search(r'(?p)a*(.*?)', 'aaabbb').group(0, 1),3215          ('aaabbb', 'bbb'))3216        self.assertEqual(regex.search(r'(?p)a*(.*)', 'aaabbb').group(0, 1),3217          ('aaabbb', 'bbb'))3218        self.assertEqual(regex.sub(r'(?p)a*(.*?)', r'\1', 'aaabbb'), 'bbb')3219        self.assertEqual(regex.sub(r'(?p)a*(.*)', r'\1', 'aaabbb'), 'bbb')3220        # Hg issue 192: Named lists reverse matching doesn't work with3221        # IGNORECASE and V13222        self.assertEqual(regex.match(r'(?irV0)\L<kw>', '21', kw=['1']).span(),3223          (1, 2))3224        self.assertEqual(regex.match(r'(?irV1)\L<kw>', '21', kw=['1']).span(),3225          (1, 2))3226        # Hg issue 193: Alternation and .REVERSE flag.3227        self.assertEqual(regex.search('a|b', '111a222').span(), (3, 4))3228        self.assertEqual(regex.search('(?r)a|b', '111a222').span(), (3, 4))3229        # Hg issue 194: .FULLCASE and Backreference3230        self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',3231          '<cli><cli>').span(), (0, 10))3232        self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',3233          '<cli><clI>').span(), (0, 10))3234        self.assertEqual(regex.search(r'(?ifr)<\1><(CLI)>',3235          '<cli><clI>').span(), (0, 10))3236        # Hg issue 195: Pickle (or otherwise serial) the compiled regex3237        r = regex.compile(r'\L<options>', options=['foo', 'bar'])3238        p = pickle.dumps(r)3239        r = pickle.loads(p)3240        self.assertEqual(r.match('foo').span(), (0, 3))3241        # Hg issue 196: Fuzzy matching on repeated regex not working as3242        # expected3243        self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxxx',3244          flags=regex.BESTMATCH).span(), (0, 6))3245        self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxx',3246          flags=regex.BESTMATCH).span(), (0, 5))3247        self.assertEqual(regex.match('(x{6}){e<=1}', 'x',3248          flags=regex.BESTMATCH), None)3249        self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxxx',3250          flags=regex.BESTMATCH).span(), (0, 6))3251        self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxx',3252          flags=regex.BESTMATCH).span(), (0, 5))3253        self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'x',3254          flags=regex.BESTMATCH), None)3255        # Hg issue 197: ValueError in regex.compile3256        self.assertRaises(regex.error, lambda:3257          regex.compile(b'00000\\0\\00\^\50\\00\U05000000'))3258        # Hg issue 198: ValueError in regex.compile3259        self.assertRaises(regex.error, lambda: regex.compile(b"{e<l"))3260        # Hg issue 199: Segfault in re.compile3261        self.assertEquals(bool(regex.compile('((?0)){e}')), True)3262        # Hg issue 200: AttributeError in regex.compile with latest regex3263        self.assertEquals(bool(regex.compile('\x00?(?0){e}')), True)3264        # Hg issue 201: ENHANCEMATCH crashes interpreter3265        self.assertEquals(regex.findall(r'((brown)|(lazy)){1<=e<=3} ((dog)|(fox)){1<=e<=3}',3266          'The quick borwn fax jumped over the lzy hog', regex.ENHANCEMATCH),3267          [('borwn', 'borwn', '', 'fax', '', 'fax'), ('lzy', '', 'lzy', 'hog',3268          'hog', '')])3269        # Hg issue 203: partial matching bug3270        self.assertEquals(regex.search(r'\d\d\d-\d\d-\d\d\d\d',3271          "My SSN is 999-89-76, but don't tell.", partial=True).span(), (36,3272          36))3273        # Hg issue 204: confusion of (?aif) flags3274        upper_i = '\N{CYRILLIC CAPITAL LETTER SHORT I}'3275        lower_i = '\N{CYRILLIC SMALL LETTER SHORT I}'3276        self.assertEquals(bool(regex.match(r'(?ui)' + upper_i,3277          lower_i)), True)3278        self.assertEquals(bool(regex.match(r'(?ui)' + lower_i,3279          upper_i)), True)3280        self.assertEquals(bool(regex.match(r'(?ai)' + upper_i,3281          lower_i)), False)3282        self.assertEquals(bool(regex.match(r'(?ai)' + lower_i,3283          upper_i)), False)3284        self.assertEquals(bool(regex.match(r'(?afi)' + upper_i,3285          lower_i)), False)3286        self.assertEquals(bool(regex.match(r'(?afi)' + lower_i,3287          upper_i)), False)3288        # Hg issue 205: Named list and (?ri) flags3289        self.assertEquals(bool(regex.search(r'(?i)\L<aa>', '22', aa=['121',3290          '22'])), True)3291        self.assertEquals(bool(regex.search(r'(?ri)\L<aa>', '22', aa=['121',3292          '22'])), True)3293        self.assertEquals(bool(regex.search(r'(?fi)\L<aa>', '22', aa=['121',3294          '22'])), True)3295        self.assertEquals(bool(regex.search(r'(?fri)\L<aa>', '22', aa=['121',3296          '22'])), True)3297        # Hg issue 208: Named list, (?ri) flags, Backreference3298        self.assertEquals(regex.search(r'(?r)\1dog..(?<=(\L<aa>))$', 'ccdogcc',3299          aa=['bcb', 'cc']). span(), (0, 7))3300        self.assertEquals(regex.search(r'(?ir)\1dog..(?<=(\L<aa>))$',3301          'ccdogcc', aa=['bcb', 'cc']). span(), (0, 7))3302        # Hg issue 210: Fuzzy matching and Backreference3303        self.assertEquals(regex.search(r'(2)(?:\1{5}){e<=1}',3304          '3222212').span(), (1, 7))3305        self.assertEquals(regex.search(r'(\d)(?:\1{5}){e<=1}',3306          '3222212').span(), (1, 7))3307        # Hg issue 211: Segmentation fault with recursive matches and atomic3308        # groups3309        self.assertEquals(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',3310          '((-))').span(), (0, 5))3311        self.assertEquals(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',3312          '((-)+)'), None)3313        # Hg issue 212: Unexpected matching difference with .*? between re and3314        # regex3315        self.assertEquals(regex.match(r"x.*? (.).*\1(.*)\1",3316          'x  |y| z|').span(), (0, 9))3317        self.assertEquals(regex.match(r"\.sr (.*?) (.)(.*)\2(.*)\2(.*)",3318          r'.sr  h |<nw>|<span class="locked">|').span(), (0, 35))3319        # Hg issue 213: Segmentation Fault3320        a = '"\\xF9\\x80\\xAEqdz\\x95L\\xA7\\x89[\\xFE \\x91)\\xF9]\\xDB\'\\x99\\x09=\\x00\\xFD\\x98\\x22\\xDD\\xF1\\xB6\\xC3 Z\\xB6gv\\xA5x\\x93P\\xE1r\\x14\\x8Cv\\x0C\\xC0w\\x15r\\xFFc%" '3321        py_regex_pattern = r'''(?P<http_referer>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)))) (?P<useragent>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))))'''3322        self.assertEqual(bool(regex.search(py_regex_pattern, a)), False)3323        # Hg Issue 216: Invalid match when using negative lookbehind and pipe3324        self.assertEqual(bool(regex.match('foo(?<=foo)', 'foo')), True)3325        self.assertEqual(bool(regex.match('foo(?<!foo)', 'foo')), False)3326        self.assertEqual(bool(regex.match('foo(?<=foo|x)', 'foo')), True)3327        self.assertEqual(bool(regex.match('foo(?<!foo|x)', 'foo')), False)3328        # Hg issue 217: Core dump in conditional ahead match and matching \!3329        # character3330        self.assertEqual(bool(regex.match(r'(?(?=.*\!.*)(?P<true>.*\!\w*\:.*)|(?P<false>.*))',3331          '!')), False)3332        # Hg issue 220: Misbehavior of group capture with OR operand3333        self.assertEqual(regex.match(r'\w*(ea)\w*|\w*e(?!a)\w*',3334          'easier').groups(), ('ea', ))3335        # Hg issue 225: BESTMATCH in fuzzy match not working3336        self.assertEqual(regex.search('(^1234$){i,d}', '12234',3337          regex.BESTMATCH).span(), (0, 5))3338        self.assertEqual(regex.search('(^1234$){i,d}', '12234',3339          regex.BESTMATCH).fuzzy_counts, (0, 1, 0))3340        self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',3341          regex.BESTMATCH).span(), (0, 5))3342        self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',3343          regex.BESTMATCH).fuzzy_counts, (0, 1, 0))3344        # Hg issue 226: Error matching at start of string3345        self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',3346          regex.BESTMATCH).span(), (0, 11))3347        self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',3348          regex.BESTMATCH).fuzzy_counts, (0, 8, 0))3349        # Hg issue 227: Incorrect behavior for ? operator with UNICODE +3350        # IGNORECASE3351        self.assertEqual(regex.search(r'a?yz', 'xxxxyz', flags=regex.FULLCASE |3352          regex.IGNORECASE).span(), (4, 6))3353        # Hg issue 230: Is it a bug of (?(DEFINE)...)3354        self.assertEqual(regex.findall(r'(?:(?![a-d]).)+', 'abcdefgh'),3355          ['efgh'])3356        self.assertEqual(regex.findall(r'''(?(DEFINE)(?P<mydef>(?:(?![a-d]).)))(?&mydef)+''',3357          'abcdefgh'), ['efgh'])3358        # Hg issue 238: Not fully re backward compatible3359        self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1,3}',3360          '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',3361          '....'), ('T...', 'T', '...')])3362        self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){3}',3363          '"Erm....yes. T..T...Thank you for that."'), [])3364        self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){2}',3365          '"Erm....yes. T..T...Thank you for that."'), [('T...', 'T', '...')])3366        self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1}',3367          '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',3368          '....'), ('T..', 'T', '..'), ('T...', 'T', '...')])3369        # Hg issue 247: Unexpected result with fuzzy matching and lookahead3370        # expression3371        self.assertEqual(regex.search(r'(?:ESTONIA(?!\w)){e<=1}',3372          'ESTONIAN WORKERS').group(), 'ESTONIAN')3373        self.assertEqual(regex.search(r'(?:ESTONIA(?=\W)){e<=1}',3374          'ESTONIAN WORKERS').group(), 'ESTONIAN')3375        self.assertEqual(regex.search(r'(?:(?<!\w)ESTONIA){e<=1}',3376          'BLUB NESTONIA').group(), 'NESTONIA')3377        self.assertEqual(regex.search(r'(?:(?<=\W)ESTONIA){e<=1}',3378          'BLUB NESTONIA').group(), 'NESTONIA')3379        self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?!\w)){e<=1}',3380          'ESTONIAN WORKERS').group(), 'ESTONIAN')3381        self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?=\W)){e<=1}',3382          'ESTONIAN WORKERS').group(), 'ESTONIAN')3383        self.assertEqual(regex.search(r'(?r)(?:(?<!\w)ESTONIA){e<=1}',3384          'BLUB NESTONIA').group(), 'NESTONIA')3385        self.assertEqual(regex.search(r'(?r)(?:(?<=\W)ESTONIA){e<=1}',3386          'BLUB NESTONIA').group(), 'NESTONIA')3387        # Hg issue 248: Unexpected result with fuzzy matching and more than one3388        # non-greedy quantifier3389        self.assertEquals(regex.search(r'(?:A.*B.*CDE){e<=2}',3390          'A B CYZ').group(), 'A B CYZ')3391        self.assertEquals(regex.search(r'(?:A.*B.*?CDE){e<=2}',3392          'A B CYZ').group(), 'A B CYZ')3393        self.assertEquals(regex.search(r'(?:A.*?B.*CDE){e<=2}',3394          'A B CYZ').group(), 'A B CYZ')3395        self.assertEquals(regex.search(r'(?:A.*?B.*?CDE){e<=2}',3396          'A B CYZ').group(), 'A B CYZ')3397        # Hg issue 249: Add an option to regex.escape() to not escape spaces3398        self.assertEquals(regex.escape(' ,0A[', special_only=False, literal_spaces=False), '\\ \\,0A\\[')3399        self.assertEquals(regex.escape(' ,0A[', special_only=False, literal_spaces=True), ' \\,0A\\[')3400        self.assertEquals(regex.escape(' ,0A[', special_only=True, literal_spaces=False), '\\ ,0A\\[')3401        self.assertEquals(regex.escape(' ,0A[', special_only=True, literal_spaces=True), ' ,0A\\[')3402        self.assertEquals(regex.escape(' ,0A['), '\\ ,0A\\[')3403        # Hg issue 251: Segfault with a particular expression3404        self.assertEquals(regex.search(r'(?(?=A)A|B)', 'A').span(), (0, 1))3405        self.assertEquals(regex.search(r'(?(?=A)A|B)', 'B').span(), (0, 1))3406        self.assertEquals(regex.search(r'(?(?=A)A|)', 'B').span(), (0, 0))3407        self.assertEquals(regex.search(r'(?(?=X)X|)', '').span(), (0, 0))3408        self.assertEquals(regex.search(r'(?(?=X))', '').span(), (0, 0))3409        # Hg issue 252: Empty capture strings when using DEFINE group reference3410        # within look-behind expression3411        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3412          'abc').groups(), (None, ))3413        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3414          'abc').groupdict(), {'func': None})3415        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3416          'abc').capturesdict(), {'func': ['a']})3417        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3418          'abc').groups(), (None, ))3419        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3420          'abc').groupdict(), {'func': None})3421        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3422          'abc').capturesdict(), {'func': ['a']})3423        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3424          'abc').groups(), (None, ))3425        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3426          'abc').groupdict(), {'func': None})3427        self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3428          'abc').capturesdict(), {'func': ['a']})3429    def test_subscripted_captures(self):3430        self.assertEqual(regex.match(r'(?P<x>.)+',3431          'abc').expandf('{0} {0[0]} {0[-1]}'), 'abc abc abc')3432        self.assertEqual(regex.match(r'(?P<x>.)+',3433          'abc').expandf('{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}'),3434          'c a b c c b a')3435        self.assertEqual(regex.match(r'(?P<x>.)+',3436          'abc').expandf('{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}'),3437          'c a b c c b a')3438        self.assertEqual(regex.subf(r'(?P<x>.)+', r'{0} {0[0]} {0[-1]}',3439          'abc'), 'abc abc abc')3440        self.assertEqual(regex.subf(r'(?P<x>.)+',3441          '{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}', 'abc'),3442          'c a b c c b a')3443        self.assertEqual(regex.subf(r'(?P<x>.)+',3444          '{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}', 'abc'),3445          'c a b c c b a')3446if sys.version_info < (3, 2, 0):3447    # In Python 3.1 it's called assertRaisesRegexp.3448    RegexTests.assertRaisesRegex = RegexTests.assertRaisesRegexp3449def test_main():3450    run_unittest(RegexTests)3451if __name__ == "__main__":...mode-kotlin.js
Source:mode-kotlin.js  
1define("ace/mode/kotlin_highlight_rules",["require","exports","module","ace/lib/oop","ace/mode/text_highlight_rules"], function(require, exports, module) {2"use strict";3var oop = require("../lib/oop");4var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;5var KotlinHighlightRules = function() {6    this.$rules = {7        start: [{8            include: "#comments"9        }, {10            token: [11                "text",12                "keyword.other.kotlin",13                "text",14                "entity.name.package.kotlin",15                "text"16            ],17            regex: /^(\s*)(package)\b(?:(\s*)([^ ;$]+)(\s*))?/18        }, {19            include: "#imports"20        }, {21            include: "#statements"22        }],23        "#classes": [{24            token: "text",25            regex: /(?=\s*(?:companion|class|object|interface))/,26            push: [{27                token: "text",28                regex: /}|(?=$)/,29                next: "pop"30            }, {31                token: ["keyword.other.kotlin", "text"],32                regex: /\b((?:companion\s*)?)(class|object|interface)\b/,33                push: [{34                    token: "text",35                    regex: /(?=<|{|\(|:)/,36                    next: "pop"37                }, {38                    token: "keyword.other.kotlin",39                    regex: /\bobject\b/40                }, {41                    token: "entity.name.type.class.kotlin",42                    regex: /\w+/43                }]44            }, {45                token: "text",46                regex: /</,47                push: [{48                    token: "text",49                    regex: />/,50                    next: "pop"51                }, {52                    include: "#generics"53                }]54            }, {55                token: "text",56                regex: /\(/,57                push: [{58                    token: "text",59                    regex: /\)/,60                    next: "pop"61                }, {62                    include: "#parameters"63                }]64            }, {65                token: "keyword.operator.declaration.kotlin",66                regex: /:/,67                push: [{68                    token: "text",69                    regex: /(?={|$)/,70                    next: "pop"71                }, {72                    token: "entity.other.inherited-class.kotlin",73                    regex: /\w+/74                }, {75                    token: "text",76                    regex: /\(/,77                    push: [{78                        token: "text",79                        regex: /\)/,80                        next: "pop"81                    }, {82                        include: "#expressions"83                    }]84                }]85            }, {86                token: "text",87                regex: /\{/,88                push: [{89                    token: "text",90                    regex: /\}/,91                    next: "pop"92                }, {93                    include: "#statements"94                }]95            }]96        }],97        "#comments": [{98            token: "punctuation.definition.comment.kotlin",99            regex: /\/\*/,100            push: [{101                token: "punctuation.definition.comment.kotlin",102                regex: /\*\//,103                next: "pop"104            }, {105                defaultToken: "comment.block.kotlin"106            }]107        }, {108            token: [109                "text",110                "punctuation.definition.comment.kotlin",111                "comment.line.double-slash.kotlin"112            ],113            regex: /(\s*)(\/\/)(.*$)/114        }],115        "#constants": [{116            token: "constant.language.kotlin",117            regex: /\b(?:true|false|null|this|super)\b/118        }, {119            token: "constant.numeric.kotlin",120            regex: /\b(?:0(?:x|X)[0-9a-fA-F]*|(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:(?:e|E)(?:\+|-)?[0-9]+)?)(?:[LlFfUuDd]|UL|ul)?\b/121        }, {122            token: "constant.other.kotlin",123            regex: /\b[A-Z][A-Z0-9_]+\b/124        }],125        "#expressions": [{126            token: "text",127            regex: /\(/,128            push: [{129                token: "text",130                regex: /\)/,131                next: "pop"132            }, {133                include: "#expressions"134            }]135        }, {136            include: "#types"137        }, {138            include: "#strings"139        }, {140            include: "#constants"141        }, {142            include: "#comments"143        }, {144            include: "#keywords"145        }],146        "#functions": [{147            token: "text",148            regex: /(?=\s*fun)/,149            push: [{150                token: "text",151                regex: /}|(?=$)/,152                next: "pop"153            }, {154                token: "keyword.other.kotlin",155                regex: /\bfun\b/,156                push: [{157                    token: "text",158                    regex: /(?=\()/,159                    next: "pop"160                }, {161                    token: "text",162                    regex: /</,163                    push: [{164                        token: "text",165                        regex: />/,166                        next: "pop"167                    }, {168                        include: "#generics"169                    }]170                }, {171                    token: ["text", "entity.name.function.kotlin"],172                    regex: /((?:[\.<\?>\w]+\.)?)(\w+)/173                }]174            }, {175                token: "text",176                regex: /\(/,177                push: [{178                    token: "text",179                    regex: /\)/,180                    next: "pop"181                }, {182                    include: "#parameters"183                }]184            }, {185                token: "keyword.operator.declaration.kotlin",186                regex: /:/,187                push: [{188                    token: "text",189                    regex: /(?={|=|$)/,190                    next: "pop"191                }, {192                    include: "#types"193                }]194            }, {195                token: "text",196                regex: /\{/,197                push: [{198                    token: "text",199                    regex: /(?=\})/,200                    next: "pop"201                }, {202                    include: "#statements"203                }]204            }, {205                token: "keyword.operator.assignment.kotlin",206                regex: /=/,207                push: [{208                    token: "text",209                    regex: /(?=$)/,210                    next: "pop"211                }, {212                    include: "#expressions"213                }]214            }]215        }],216        "#generics": [{217            token: "keyword.operator.declaration.kotlin",218            regex: /:/,219            push: [{220                token: "text",221                regex: /(?=,|>)/,222                next: "pop"223            }, {224                include: "#types"225            }]226        }, {227            include: "#keywords"228        }, {229            token: "storage.type.generic.kotlin",230            regex: /\w+/231        }],232        "#getters-and-setters": [{233            token: ["entity.name.function.kotlin", "text"],234            regex: /\b(get)\b(\s*\(\s*\))/,235            push: [{236                token: "text",237                regex: /\}|(?=\bset\b)|$/,238                next: "pop"239            }, {240                token: "keyword.operator.assignment.kotlin",241                regex: /=/,242                push: [{243                    token: "text",244                    regex: /(?=$|\bset\b)/,245                    next: "pop"246                }, {247                    include: "#expressions"248                }]249            }, {250                token: "text",251                regex: /\{/,252                push: [{253                    token: "text",254                    regex: /\}/,255                    next: "pop"256                }, {257                    include: "#expressions"258                }]259            }]260        }, {261            token: ["entity.name.function.kotlin", "text"],262            regex: /\b(set)\b(\s*)(?=\()/,263            push: [{264                token: "text",265                regex: /\}|(?=\bget\b)|$/,266                next: "pop"267            }, {268                token: "text",269                regex: /\(/,270                push: [{271                    token: "text",272                    regex: /\)/,273                    next: "pop"274                }, {275                    include: "#parameters"276                }]277            }, {278                token: "keyword.operator.assignment.kotlin",279                regex: /=/,280                push: [{281                    token: "text",282                    regex: /(?=$|\bset\b)/,283                    next: "pop"284                }, {285                    include: "#expressions"286                }]287            }, {288                token: "text",289                regex: /\{/,290                push: [{291                    token: "text",292                    regex: /\}/,293                    next: "pop"294                }, {295                    include: "#expressions"296                }]297            }]298        }],299        "#imports": [{300            token: [301                "text",302                "keyword.other.kotlin",303                "text",304                "keyword.other.kotlin"305            ],306            regex: /^(\s*)(import)(\s+[^ $]+\s+)((?:as)?)/307        }],308        "#keywords": [{309            token: "storage.modifier.kotlin",310            regex: /\b(?:var|val|public|private|protected|abstract|final|enum|open|attribute|annotation|override|inline|var|val|vararg|lazy|in|out|internal|data|tailrec|operator|infix|const|yield|typealias|typeof)\b/311        }, {312            token: "keyword.control.catch-exception.kotlin",313            regex: /\b(?:try|catch|finally|throw)\b/314        }, {315            token: "keyword.control.kotlin",316            regex: /\b(?:if|else|while|for|do|return|when|where|break|continue)\b/317        }, {318            token: "keyword.operator.kotlin",319            regex: /\b(?:in|is|as|assert)\b/320        }, {321            token: "keyword.operator.comparison.kotlin",322            regex: /==|!=|===|!==|<=|>=|<|>/323        }, {324            token: "keyword.operator.assignment.kotlin",325            regex: /=/326        }, {327            token: "keyword.operator.declaration.kotlin",328            regex: /:/329        }, {330            token: "keyword.operator.dot.kotlin",331            regex: /\./332        }, {333            token: "keyword.operator.increment-decrement.kotlin",334            regex: /\-\-|\+\+/335        }, {336            token: "keyword.operator.arithmetic.kotlin",337            regex: /\-|\+|\*|\/|%/338        }, {339            token: "keyword.operator.arithmetic.assign.kotlin",340            regex: /\+=|\-=|\*=|\/=/341        }, {342            token: "keyword.operator.logical.kotlin",343            regex: /!|&&|\|\|/344        }, {345            token: "keyword.operator.range.kotlin",346            regex: /\.\./347        }, {348            token: "punctuation.terminator.kotlin",349            regex: /;/350        }],351        "#namespaces": [{352            token: "keyword.other.kotlin",353            regex: /\bnamespace\b/354        }, {355            token: "text",356            regex: /\{/,357            push: [{358                token: "text",359                regex: /\}/,360                next: "pop"361            }, {362                include: "#statements"363            }]364        }],365        "#parameters": [{366            token: "keyword.operator.declaration.kotlin",367            regex: /:/,368            push: [{369                token: "text",370                regex: /(?=,|\)|=)/,371                next: "pop"372            }, {373                include: "#types"374            }]375        }, {376            token: "keyword.operator.declaration.kotlin",377            regex: /=/,378            push: [{379                token: "text",380                regex: /(?=,|\))/,381                next: "pop"382            }, {383                include: "#expressions"384            }]385        }, {386            include: "#keywords"387        }, {388            token: "variable.parameter.function.kotlin",389            regex: /\w+/390        }],391        "#statements": [{392            include: "#namespaces"393        }, {394            include: "#typedefs"395        }, {396            include: "#classes"397        }, {398            include: "#functions"399        }, {400            include: "#variables"401        }, {402            include: "#getters-and-setters"403        }, {404            include: "#expressions"405        }],406        "#strings": [{407            token: "punctuation.definition.string.begin.kotlin",408            regex: /"""/,409            push: [{410                token: "punctuation.definition.string.end.kotlin",411                regex: /"""/,412                next: "pop"413            }, {414                token: "variable.parameter.template.kotlin",415                regex: /\$\w+|\$\{[^\}]+\}/416            }, {417                token: "constant.character.escape.kotlin",418                regex: /\\./419            }, {420                defaultToken: "string.quoted.third.kotlin"421            }]422        }, {423            token: "punctuation.definition.string.begin.kotlin",424            regex: /"/,425            push: [{426                token: "punctuation.definition.string.end.kotlin",427                regex: /"/,428                next: "pop"429            }, {430                token: "variable.parameter.template.kotlin",431                regex: /\$\w+|\$\{[^\}]+\}/432            }, {433                token: "constant.character.escape.kotlin",434                regex: /\\./435            }, {436                defaultToken: "string.quoted.double.kotlin"437            }]438        }, {439            token: "punctuation.definition.string.begin.kotlin",440            regex: /'/,441            push: [{442                token: "punctuation.definition.string.end.kotlin",443                regex: /'/,444                next: "pop"445            }, {446                token: "constant.character.escape.kotlin",447                regex: /\\./448            }, {449                defaultToken: "string.quoted.single.kotlin"450            }]451        }, {452            token: "punctuation.definition.string.begin.kotlin",453            regex: /`/,454            push: [{455                token: "punctuation.definition.string.end.kotlin",456                regex: /`/,457                next: "pop"458            }, {459                defaultToken: "string.quoted.single.kotlin"460            }]461        }],462        "#typedefs": [{463            token: "text",464            regex: /(?=\s*type)/,465            push: [{466                token: "text",467                regex: /(?=$)/,468                next: "pop"469            }, {470                token: "keyword.other.kotlin",471                regex: /\btype\b/472            }, {473                token: "text",474                regex: /</,475                push: [{476                    token: "text",477                    regex: />/,478                    next: "pop"479                }, {480                    include: "#generics"481                }]482            }, {483                include: "#expressions"484            }]485        }],486        "#types": [{487            token: "storage.type.buildin.kotlin",488            regex: /\b(?:Any|Unit|String|Int|Boolean|Char|Long|Double|Float|Short|Byte|dynamic)\b/489        }, {490            token: "storage.type.buildin.array.kotlin",491            regex: /\b(?:IntArray|BooleanArray|CharArray|LongArray|DoubleArray|FloatArray|ShortArray|ByteArray)\b/492        }, {493            token: [494                "storage.type.buildin.collection.kotlin",495                "text"496            ],497            regex: /\b(Array|List|Map)(<\b)/,498            push: [{499                token: "text",500                regex: />/,501                next: "pop"502            }, {503                include: "#types"504            }, {505                include: "#keywords"506            }]507        }, {508            token: "text",509            regex: /\w+</,510            push: [{511                token: "text",512                regex: />/,513                next: "pop"514            }, {515                include: "#types"516            }, {517                include: "#keywords"518            }]519        }, {520            token: ["keyword.operator.tuple.kotlin", "text"],521            regex: /(#)(\()/,522            push: [{523                token: "text",524                regex: /\)/,525                next: "pop"526            }, {527                include: "#expressions"528            }]529        }, {530            token: "text",531            regex: /\{/,532            push: [{533                token: "text",534                regex: /\}/,535                next: "pop"536            }, {537                include: "#statements"538            }]539        }, {540            token: "text",541            regex: /\(/,542            push: [{543                token: "text",544                regex: /\)/,545                next: "pop"546            }, {547                include: "#types"548            }]549        }, {550            token: "keyword.operator.declaration.kotlin",551            regex: /->/552        }],553        "#variables": [{554            token: "text",555            regex: /(?=\s*(?:var|val))/,556            push: [{557                token: "text",558                regex: /(?=:|=|$)/,559                next: "pop"560            }, {561                token: "keyword.other.kotlin",562                regex: /\b(?:var|val)\b/,563                push: [{564                    token: "text",565                    regex: /(?=:|=|$)/,566                    next: "pop"567                }, {568                    token: "text",569                    regex: /</,570                    push: [{571                        token: "text",572                        regex: />/,573                        next: "pop"574                    }, {575                        include: "#generics"576                    }]577                }, {578                    token: ["text", "entity.name.variable.kotlin"],579                    regex: /((?:[\.<\?>\w]+\.)?)(\w+)/580                }]581            }, {582                token: "keyword.operator.declaration.kotlin",583                regex: /:/,584                push: [{585                    token: "text",586                    regex: /(?==|$)/,587                    next: "pop"588                }, {589                    include: "#types"590                }, {591                    include: "#getters-and-setters"592                }]593            }, {594                token: "keyword.operator.assignment.kotlin",595                regex: /=/,596                push: [{597                    token: "text",598                    regex: /(?=$)/,599                    next: "pop"600                }, {601                    include: "#expressions"602                }, {603                    include: "#getters-and-setters"604                }]605            }]606        }]607    }608    609    this.normalizeRules();610};611KotlinHighlightRules.metaData = {612    fileTypes: ["kt", "kts"],613    name: "Kotlin",614    scopeName: "source.Kotlin"615}616oop.inherits(KotlinHighlightRules, TextHighlightRules);617exports.KotlinHighlightRules = KotlinHighlightRules;618});619define("ace/mode/folding/cstyle",["require","exports","module","ace/lib/oop","ace/range","ace/mode/folding/fold_mode"], function(require, exports, module) {620"use strict";621var oop = require("../../lib/oop");622var Range = require("../../range").Range;623var BaseFoldMode = require("./fold_mode").FoldMode;624var FoldMode = exports.FoldMode = function(commentRegex) {625    if (commentRegex) {626        this.foldingStartMarker = new RegExp(627            this.foldingStartMarker.source.replace(/\|[^|]*?$/, "|" + commentRegex.start)628        );629        this.foldingStopMarker = new RegExp(630            this.foldingStopMarker.source.replace(/\|[^|]*?$/, "|" + commentRegex.end)631        );632    }633};634oop.inherits(FoldMode, BaseFoldMode);635(function() {636    637    this.foldingStartMarker = /(\{|\[)[^\}\]]*$|^\s*(\/\*)/;638    this.foldingStopMarker = /^[^\[\{]*(\}|\])|^[\s\*]*(\*\/)/;639    this.singleLineBlockCommentRe= /^\s*(\/\*).*\*\/\s*$/;640    this.tripleStarBlockCommentRe = /^\s*(\/\*\*\*).*\*\/\s*$/;641    this.startRegionRe = /^\s*(\/\*|\/\/)#?region\b/;642    this._getFoldWidgetBase = this.getFoldWidget;643    this.getFoldWidget = function(session, foldStyle, row) {644        var line = session.getLine(row);645    646        if (this.singleLineBlockCommentRe.test(line)) {647            if (!this.startRegionRe.test(line) && !this.tripleStarBlockCommentRe.test(line))648                return "";649        }650    651        var fw = this._getFoldWidgetBase(session, foldStyle, row);652    653        if (!fw && this.startRegionRe.test(line))654            return "start"; // lineCommentRegionStart655    656        return fw;657    };658    this.getFoldWidgetRange = function(session, foldStyle, row, forceMultiline) {659        var line = session.getLine(row);660        661        if (this.startRegionRe.test(line))662            return this.getCommentRegionBlock(session, line, row);663        664        var match = line.match(this.foldingStartMarker);665        if (match) {666            var i = match.index;667            if (match[1])668                return this.openingBracketBlock(session, match[1], row, i);669                670            var range = session.getCommentFoldRange(row, i + match[0].length, 1);671            672            if (range && !range.isMultiLine()) {673                if (forceMultiline) {674                    range = this.getSectionRange(session, row);675                } else if (foldStyle != "all")676                    range = null;677            }678            679            return range;680        }681        if (foldStyle === "markbegin")682            return;683        var match = line.match(this.foldingStopMarker);684        if (match) {685            var i = match.index + match[0].length;686            if (match[1])687                return this.closingBracketBlock(session, match[1], row, i);688            return session.getCommentFoldRange(row, i, -1);689        }690    };691    692    this.getSectionRange = function(session, row) {693        var line = session.getLine(row);694        var startIndent = line.search(/\S/);695        var startRow = row;696        var startColumn = line.length;697        row = row + 1;698        var endRow = row;699        var maxRow = session.getLength();700        while (++row < maxRow) {701            line = session.getLine(row);702            var indent = line.search(/\S/);703            if (indent === -1)704                continue;705            if  (startIndent > indent)706                break;707            var subRange = this.getFoldWidgetRange(session, "all", row);708            709            if (subRange) {710                if (subRange.start.row <= startRow) {711                    break;712                } else if (subRange.isMultiLine()) {713                    row = subRange.end.row;714                } else if (startIndent == indent) {715                    break;716                }717            }718            endRow = row;719        }720        721        return new Range(startRow, startColumn, endRow, session.getLine(endRow).length);722    };723    this.getCommentRegionBlock = function(session, line, row) {724        var startColumn = line.search(/\s*$/);725        var maxRow = session.getLength();726        var startRow = row;727        728        var re = /^\s*(?:\/\*|\/\/|--)#?(end)?region\b/;729        var depth = 1;730        while (++row < maxRow) {731            line = session.getLine(row);732            var m = re.exec(line);733            if (!m) continue;734            if (m[1]) depth--;735            else depth++;736            if (!depth) break;737        }738        var endRow = row;739        if (endRow > startRow) {740            return new Range(startRow, startColumn, endRow, line.length);741        }742    };743}).call(FoldMode.prototype);744});745define("ace/mode/kotlin",["require","exports","module","ace/lib/oop","ace/mode/text","ace/mode/kotlin_highlight_rules","ace/mode/folding/cstyle"], function(require, exports, module) {746"use strict";747var oop = require("../lib/oop");748var TextMode = require("./text").Mode;749var KotlinHighlightRules = require("./kotlin_highlight_rules").KotlinHighlightRules;750var FoldMode = require("./folding/cstyle").FoldMode;751var Mode = function() {752    this.HighlightRules = KotlinHighlightRules;753    this.foldingRules = new FoldMode();754};755oop.inherits(Mode, TextMode);756(function() {757    this.$id = "ace/mode/kotlin"758}).call(Mode.prototype);759exports.Mode = Mode;...kotlin_highlight_rules.js
Source:kotlin_highlight_rules.js  
1/* ***** BEGIN LICENSE BLOCK *****2 * Distributed under the BSD license:3 *4 * Copyright (c) 2012, Ajax.org B.V.5 * All rights reserved.6 *7 * Redistribution and use in source and binary forms, with or without8 * modification, are permitted provided that the following conditions are met:9 *     * Redistributions of source code must retain the above copyright10 *       notice, this list of conditions and the following disclaimer.11 *     * Redistributions in binary form must reproduce the above copyright12 *       notice, this list of conditions and the following disclaimer in the13 *       documentation and/or other materials provided with the distribution.14 *     * Neither the name of Ajax.org B.V. nor the15 *       names of its contributors may be used to endorse or promote products16 *       derived from this software without specific prior written permission.17 *18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE21 * DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.28 *29 * ***** END LICENSE BLOCK ***** */30/* This file was autogenerated from Kotlin.tmLanguage (uuid: ) */31/****************************************************************************************32 * IT MIGHT NOT BE PERFECT ...But it's a good start from an existing *.tmlanguage file. *33 * fileTypes                                                                            *34 ****************************************************************************************/35define(function(require, exports, module) {36"use strict";37var oop = require("../lib/oop");38var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;39var KotlinHighlightRules = function() {40    // regexp must not have capturing parentheses. Use (?:) instead.41    // regexps are ordered -> the first match is used42    this.$rules = {43        start: [{44            include: "#comments"45        }, {46            token: [47                "text",48                "keyword.other.kotlin",49                "text",50                "entity.name.package.kotlin",51                "text"52            ],53            regex: /^(\s*)(package)\b(?:(\s*)([^ ;$]+)(\s*))?/54        }, {55            include: "#imports"56        }, {57            include: "#statements"58        }],59        "#classes": [{60            token: "text",61            regex: /(?=\s*(?:companion|class|object|interface))/,62            push: [{63                token: "text",64                regex: /}|(?=$)/,65                next: "pop"66            }, {67                token: ["keyword.other.kotlin", "text"],68                regex: /\b((?:companion\s*)?)(class|object|interface)\b/,69                push: [{70                    token: "text",71                    regex: /(?=<|{|\(|:)/,72                    next: "pop"73                }, {74                    token: "keyword.other.kotlin",75                    regex: /\bobject\b/76                }, {77                    token: "entity.name.type.class.kotlin",78                    regex: /\w+/79                }]80            }, {81                token: "text",82                regex: /</,83                push: [{84                    token: "text",85                    regex: />/,86                    next: "pop"87                }, {88                    include: "#generics"89                }]90            }, {91                token: "text",92                regex: /\(/,93                push: [{94                    token: "text",95                    regex: /\)/,96                    next: "pop"97                }, {98                    include: "#parameters"99                }]100            }, {101                token: "keyword.operator.declaration.kotlin",102                regex: /:/,103                push: [{104                    token: "text",105                    regex: /(?={|$)/,106                    next: "pop"107                }, {108                    token: "entity.other.inherited-class.kotlin",109                    regex: /\w+/110                }, {111                    token: "text",112                    regex: /\(/,113                    push: [{114                        token: "text",115                        regex: /\)/,116                        next: "pop"117                    }, {118                        include: "#expressions"119                    }]120                }]121            }, {122                token: "text",123                regex: /\{/,124                push: [{125                    token: "text",126                    regex: /\}/,127                    next: "pop"128                }, {129                    include: "#statements"130                }]131            }]132        }],133        "#comments": [{134            token: "punctuation.definition.comment.kotlin",135            regex: /\/\*/,136            push: [{137                token: "punctuation.definition.comment.kotlin",138                regex: /\*\//,139                next: "pop"140            }, {141                defaultToken: "comment.block.kotlin"142            }]143        }, {144            token: [145                "text",146                "punctuation.definition.comment.kotlin",147                "comment.line.double-slash.kotlin"148            ],149            regex: /(\s*)(\/\/)(.*$)/150        }],151        "#constants": [{152            token: "constant.language.kotlin",153            regex: /\b(?:true|false|null|this|super)\b/154        }, {155            token: "constant.numeric.kotlin",156            regex: /\b(?:0(?:x|X)[0-9a-fA-F]*|(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:(?:e|E)(?:\+|-)?[0-9]+)?)(?:[LlFfUuDd]|UL|ul)?\b/157        }, {158            token: "constant.other.kotlin",159            regex: /\b[A-Z][A-Z0-9_]+\b/160        }],161        "#expressions": [{162            token: "text",163            regex: /\(/,164            push: [{165                token: "text",166                regex: /\)/,167                next: "pop"168            }, {169                include: "#expressions"170            }]171        }, {172            include: "#types"173        }, {174            include: "#strings"175        }, {176            include: "#constants"177        }, {178            include: "#comments"179        }, {180            include: "#keywords"181        }],182        "#functions": [{183            token: "text",184            regex: /(?=\s*fun)/,185            push: [{186                token: "text",187                regex: /}|(?=$)/,188                next: "pop"189            }, {190                token: "keyword.other.kotlin",191                regex: /\bfun\b/,192                push: [{193                    token: "text",194                    regex: /(?=\()/,195                    next: "pop"196                }, {197                    token: "text",198                    regex: /</,199                    push: [{200                        token: "text",201                        regex: />/,202                        next: "pop"203                    }, {204                        include: "#generics"205                    }]206                }, {207                    token: ["text", "entity.name.function.kotlin"],208                    regex: /((?:[\.<\?>\w]+\.)?)(\w+)/209                }]210            }, {211                token: "text",212                regex: /\(/,213                push: [{214                    token: "text",215                    regex: /\)/,216                    next: "pop"217                }, {218                    include: "#parameters"219                }]220            }, {221                token: "keyword.operator.declaration.kotlin",222                regex: /:/,223                push: [{224                    token: "text",225                    regex: /(?={|=|$)/,226                    next: "pop"227                }, {228                    include: "#types"229                }]230            }, {231                token: "text",232                regex: /\{/,233                push: [{234                    token: "text",235                    regex: /(?=\})/,236                    next: "pop"237                }, {238                    include: "#statements"239                }]240            }, {241                token: "keyword.operator.assignment.kotlin",242                regex: /=/,243                push: [{244                    token: "text",245                    regex: /(?=$)/,246                    next: "pop"247                }, {248                    include: "#expressions"249                }]250            }]251        }],252        "#generics": [{253            token: "keyword.operator.declaration.kotlin",254            regex: /:/,255            push: [{256                token: "text",257                regex: /(?=,|>)/,258                next: "pop"259            }, {260                include: "#types"261            }]262        }, {263            include: "#keywords"264        }, {265            token: "storage.type.generic.kotlin",266            regex: /\w+/267        }],268        "#getters-and-setters": [{269            token: ["entity.name.function.kotlin", "text"],270            regex: /\b(get)\b(\s*\(\s*\))/,271            push: [{272                token: "text",273                regex: /\}|(?=\bset\b)|$/,274                next: "pop"275            }, {276                token: "keyword.operator.assignment.kotlin",277                regex: /=/,278                push: [{279                    token: "text",280                    regex: /(?=$|\bset\b)/,281                    next: "pop"282                }, {283                    include: "#expressions"284                }]285            }, {286                token: "text",287                regex: /\{/,288                push: [{289                    token: "text",290                    regex: /\}/,291                    next: "pop"292                }, {293                    include: "#expressions"294                }]295            }]296        }, {297            token: ["entity.name.function.kotlin", "text"],298            regex: /\b(set)\b(\s*)(?=\()/,299            push: [{300                token: "text",301                regex: /\}|(?=\bget\b)|$/,302                next: "pop"303            }, {304                token: "text",305                regex: /\(/,306                push: [{307                    token: "text",308                    regex: /\)/,309                    next: "pop"310                }, {311                    include: "#parameters"312                }]313            }, {314                token: "keyword.operator.assignment.kotlin",315                regex: /=/,316                push: [{317                    token: "text",318                    regex: /(?=$|\bset\b)/,319                    next: "pop"320                }, {321                    include: "#expressions"322                }]323            }, {324                token: "text",325                regex: /\{/,326                push: [{327                    token: "text",328                    regex: /\}/,329                    next: "pop"330                }, {331                    include: "#expressions"332                }]333            }]334        }],335        "#imports": [{336            token: [337                "text",338                "keyword.other.kotlin",339                "text",340                "keyword.other.kotlin"341            ],342            regex: /^(\s*)(import)(\s+[^ $]+\s+)((?:as)?)/343        }],344        "#keywords": [{345            token: "storage.modifier.kotlin",346            regex: /\b(?:var|val|public|private|protected|abstract|final|enum|open|attribute|annotation|override|inline|var|val|vararg|lazy|in|out|internal|data|tailrec|operator|infix|const|yield|typealias|typeof)\b/347        }, {348            token: "keyword.control.catch-exception.kotlin",349            regex: /\b(?:try|catch|finally|throw)\b/350        }, {351            token: "keyword.control.kotlin",352            regex: /\b(?:if|else|while|for|do|return|when|where|break|continue)\b/353        }, {354            token: "keyword.operator.kotlin",355            regex: /\b(?:in|is|as|assert)\b/356        }, {357            token: "keyword.operator.comparison.kotlin",358            regex: /==|!=|===|!==|<=|>=|<|>/359        }, {360            token: "keyword.operator.assignment.kotlin",361            regex: /=/362        }, {363            token: "keyword.operator.declaration.kotlin",364            regex: /:/365        }, {366            token: "keyword.operator.dot.kotlin",367            regex: /\./368        }, {369            token: "keyword.operator.increment-decrement.kotlin",370            regex: /\-\-|\+\+/371        }, {372            token: "keyword.operator.arithmetic.kotlin",373            regex: /\-|\+|\*|\/|%/374        }, {375            token: "keyword.operator.arithmetic.assign.kotlin",376            regex: /\+=|\-=|\*=|\/=/377        }, {378            token: "keyword.operator.logical.kotlin",379            regex: /!|&&|\|\|/380        }, {381            token: "keyword.operator.range.kotlin",382            regex: /\.\./383        }, {384            token: "punctuation.terminator.kotlin",385            regex: /;/386        }],387        "#namespaces": [{388            token: "keyword.other.kotlin",389            regex: /\bnamespace\b/390        }, {391            token: "text",392            regex: /\{/,393            push: [{394                token: "text",395                regex: /\}/,396                next: "pop"397            }, {398                include: "#statements"399            }]400        }],401        "#parameters": [{402            token: "keyword.operator.declaration.kotlin",403            regex: /:/,404            push: [{405                token: "text",406                regex: /(?=,|\)|=)/,407                next: "pop"408            }, {409                include: "#types"410            }]411        }, {412            token: "keyword.operator.declaration.kotlin",413            regex: /=/,414            push: [{415                token: "text",416                regex: /(?=,|\))/,417                next: "pop"418            }, {419                include: "#expressions"420            }]421        }, {422            include: "#keywords"423        }, {424            token: "variable.parameter.function.kotlin",425            regex: /\w+/426        }],427        "#statements": [{428            include: "#namespaces"429        }, {430            include: "#typedefs"431        }, {432            include: "#classes"433        }, {434            include: "#functions"435        }, {436            include: "#variables"437        }, {438            include: "#getters-and-setters"439        }, {440            include: "#expressions"441        }],442        "#strings": [{443            token: "punctuation.definition.string.begin.kotlin",444            regex: /"""/,445            push: [{446                token: "punctuation.definition.string.end.kotlin",447                regex: /"""/,448                next: "pop"449            }, {450                token: "variable.parameter.template.kotlin",451                regex: /\$\w+|\$\{[^\}]+\}/452            }, {453                token: "constant.character.escape.kotlin",454                regex: /\\./455            }, {456                defaultToken: "string.quoted.third.kotlin"457            }]458        }, {459            token: "punctuation.definition.string.begin.kotlin",460            regex: /"/,461            push: [{462                token: "punctuation.definition.string.end.kotlin",463                regex: /"/,464                next: "pop"465            }, {466                token: "variable.parameter.template.kotlin",467                regex: /\$\w+|\$\{[^\}]+\}/468            }, {469                token: "constant.character.escape.kotlin",470                regex: /\\./471            }, {472                defaultToken: "string.quoted.double.kotlin"473            }]474        }, {475            token: "punctuation.definition.string.begin.kotlin",476            regex: /'/,477            push: [{478                token: "punctuation.definition.string.end.kotlin",479                regex: /'/,480                next: "pop"481            }, {482                token: "constant.character.escape.kotlin",483                regex: /\\./484            }, {485                defaultToken: "string.quoted.single.kotlin"486            }]487        }, {488            token: "punctuation.definition.string.begin.kotlin",489            regex: /`/,490            push: [{491                token: "punctuation.definition.string.end.kotlin",492                regex: /`/,493                next: "pop"494            }, {495                defaultToken: "string.quoted.single.kotlin"496            }]497        }],498        "#typedefs": [{499            token: "text",500            regex: /(?=\s*type)/,501            push: [{502                token: "text",503                regex: /(?=$)/,504                next: "pop"505            }, {506                token: "keyword.other.kotlin",507                regex: /\btype\b/508            }, {509                token: "text",510                regex: /</,511                push: [{512                    token: "text",513                    regex: />/,514                    next: "pop"515                }, {516                    include: "#generics"517                }]518            }, {519                include: "#expressions"520            }]521        }],522        "#types": [{523            token: "storage.type.buildin.kotlin",524            regex: /\b(?:Any|Unit|String|Int|Boolean|Char|Long|Double|Float|Short|Byte|dynamic)\b/525        }, {526            token: "storage.type.buildin.array.kotlin",527            regex: /\b(?:IntArray|BooleanArray|CharArray|LongArray|DoubleArray|FloatArray|ShortArray|ByteArray)\b/528        }, {529            token: [530                "storage.type.buildin.collection.kotlin",531                "text"532            ],533            regex: /\b(Array|List|Map)(<\b)/,534            push: [{535                token: "text",536                regex: />/,537                next: "pop"538            }, {539                include: "#types"540            }, {541                include: "#keywords"542            }]543        }, {544            token: "text",545            regex: /\w+</,546            push: [{547                token: "text",548                regex: />/,549                next: "pop"550            }, {551                include: "#types"552            }, {553                include: "#keywords"554            }]555        }, {556            token: ["keyword.operator.tuple.kotlin", "text"],557            regex: /(#)(\()/,558            push: [{559                token: "text",560                regex: /\)/,561                next: "pop"562            }, {563                include: "#expressions"564            }]565        }, {566            token: "text",567            regex: /\{/,568            push: [{569                token: "text",570                regex: /\}/,571                next: "pop"572            }, {573                include: "#statements"574            }]575        }, {576            token: "text",577            regex: /\(/,578            push: [{579                token: "text",580                regex: /\)/,581                next: "pop"582            }, {583                include: "#types"584            }]585        }, {586            token: "keyword.operator.declaration.kotlin",587            regex: /->/588        }],589        "#variables": [{590            token: "text",591            regex: /(?=\s*(?:var|val))/,592            push: [{593                token: "text",594                regex: /(?=:|=|$)/,595                next: "pop"596            }, {597                token: "keyword.other.kotlin",598                regex: /\b(?:var|val)\b/,599                push: [{600                    token: "text",601                    regex: /(?=:|=|$)/,602                    next: "pop"603                }, {604                    token: "text",605                    regex: /</,606                    push: [{607                        token: "text",608                        regex: />/,609                        next: "pop"610                    }, {611                        include: "#generics"612                    }]613                }, {614                    token: ["text", "entity.name.variable.kotlin"],615                    regex: /((?:[\.<\?>\w]+\.)?)(\w+)/616                }]617            }, {618                token: "keyword.operator.declaration.kotlin",619                regex: /:/,620                push: [{621                    token: "text",622                    regex: /(?==|$)/,623                    next: "pop"624                }, {625                    include: "#types"626                }, {627                    include: "#getters-and-setters"628                }]629            }, {630                token: "keyword.operator.assignment.kotlin",631                regex: /=/,632                push: [{633                    token: "text",634                    regex: /(?=$)/,635                    next: "pop"636                }, {637                    include: "#expressions"638                }, {639                    include: "#getters-and-setters"640                }]641            }]642        }]643    }644    645    this.normalizeRules();646};647KotlinHighlightRules.metaData = {648    fileTypes: ["kt", "kts"],649    name: "Kotlin",650    scopeName: "source.Kotlin"651}652oop.inherits(KotlinHighlightRules, TextHighlightRules);653exports.KotlinHighlightRules = KotlinHighlightRules;...test_regex_identifier.py
Source:test_regex_identifier.py  
1import pytest2from pywhat import regex_identifier3def test_regex_successfully_parses():4    r = regex_identifier.RegexIdentifier()5    assert "Name" in r.distribution.get_regexes()[0]6def test_regex_runs():7    r = regex_identifier.RegexIdentifier()8    res = r.check(["DANHz6EQVoWyZ9rER56DwTXHWUxfkv9k2o"])9    assert "Dogecoin (DOGE) Wallet Address" in res[0]["Regex Pattern"]["Name"]10def test_url():11    r = regex_identifier.RegexIdentifier()12    res = r.check(["tryhackme.com"])13    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]14def test_url_2():15    r = regex_identifier.RegexIdentifier()16    res = r.check(["http://username:password@example.com/"])17    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]18def test_invalid_tld():19    r = regex_identifier.RegexIdentifier()20    res = r.check(["tryhackme.comm"])21    assert "Uniform Resource Locator (URL)" not in res22def test_https():23    r = regex_identifier.RegexIdentifier()24    res = r.check(["hTTPs://tryhackme.com"])25    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]26def test_lat_long():27    r = regex_identifier.RegexIdentifier()28    res = r.check(["52.6169586, -1.9779857"])29    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]30def test_lat_long2():31    r = regex_identifier.RegexIdentifier()32    res = r.check(["53.76297,-1.9388732"])33    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]34def test_lat_long3():35    r = regex_identifier.RegexIdentifier()36    res = r.check(["77\u00B0 30' 29.9988\" N"])37    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]38def test_lat_long4():39    r = regex_identifier.RegexIdentifier()40    # degree symbol has to be a unicode character, otherwise Windows will not understand it41    res = r.check(["N 32\u00B0 53.733 W 096\u00B0 48.358"])42    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]43def test_lat_long5():44    r = regex_identifier.RegexIdentifier()45    res = r.check(["41\u00B024'12.2\" N 2\u00B010'26.5\" E"])46    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]47def test_lat_long6():48    r = regex_identifier.RegexIdentifier()49    res = r.check(["40.741895,-73.989308"])50    assert "Latitude & Longitude Coordinates" in res[0]["Regex Pattern"]["Name"]51def test_ip():52    r = regex_identifier.RegexIdentifier()53    res = r.check(["http://10.1.1.1/just/a/test"])54    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]55    assert "Internet Protocol (IP) Address Version 4" in res[1]["Regex Pattern"]["Name"]56def test_ip_not_url():57    r = regex_identifier.RegexIdentifier()58    res = r.check(["http://10.1.1.1"])59    assert "URL" not in res[0]60def test_ip2():61    r = regex_identifier.RegexIdentifier()62    res = r.check(["192.0.2.235:80"])63    assert "192.0.2.235:80" in res[0]["Matched"]64def test_ip3():65    r = regex_identifier.RegexIdentifier()66    res = r.check(["2001:0db8:85a3:0000:0000:8a2e:0370:7334"])67    assert "Internet Protocol (IP) Address Version 6" in res[0]["Regex Pattern"]["Name"]68def test_ip4():69    r = regex_identifier.RegexIdentifier()70    res = r.check(["[2001:db8::1]:8080"])71    assert "[2001:db8::1]:8080" in res[0]["Matched"]72@pytest.mark.skip(reason="Fails because not a valid TLD. If presented in punycode, it works.")73def test_international_url():74    r = regex_identifier.RegexIdentifier()75    res = r.check(["http://папиÑоÑка.ÑÑ"])76    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]77def test_same_international_url_in_punycode():78    r = regex_identifier.RegexIdentifier()79    res = r.check(["https://xn--80aaxitdbjk.xn--p1ai/"])80    assert "Uniform Resource Locator (URL)" in res[0]["Regex Pattern"]["Name"]81def test_ctf_flag():82    r = regex_identifier.RegexIdentifier()83    res = r.check(["thm{hello}"])84    assert "TryHackMe Flag Format" in res[0]["Regex Pattern"]["Name"]85def test_ctf_flag_uppercase():86    r = regex_identifier.RegexIdentifier()87    res = r.check(["FLAG{hello}"])88    assert "Capture The Flag (CTF) Flag" in res[0]["Regex Pattern"]["Name"]89def test_ethereum():90    r = regex_identifier.RegexIdentifier()91    res = r.check(["0x52908400098527886E0F7030069857D2E4169EE7"])92    assert "Ethereum (ETH) Wallet Address" in res[0]["Regex Pattern"]["Name"]93def test_bitcoin():94    r = regex_identifier.RegexIdentifier()95    res = r.check(["1KFHE7w8BhaENAswwryaoccDb6qcT6DbYY"])96    assert "Bitcoin" in res[0]["Regex Pattern"]["Name"]97def test_monero():98    r = regex_identifier.RegexIdentifier()99    res = r.check(100        [101            "47DF8D9NwtmefhFUghynYRMqrexiZTsm48T1hhi2jZcbfcwoPbkhMrrED6zqJRfeYpXFfdaqAT3jnBEwoMwCx6BYDJ1W3ub"102        ]103    )104    assert "Monero (XMR) Wallet Address" in res[0]["Regex Pattern"]["Name"]105def test_litecoin():106    r = regex_identifier.RegexIdentifier()107    res = r.check(["LRX8rSPVjifTxoLeoJtLf2JYdJFTQFcE7m"])108    assert "Litecoin (LTC) Wallet Address" in res[0]["Regex Pattern"]["Name"]109def test_bitcoincash():110    r = regex_identifier.RegexIdentifier()111    res = r.check(["bitcoincash:qzlg6uvceehgzgtz6phmvy8gtdqyt6vf359at4n3lq"])112    assert "Bitcoin Cash (BCH) Wallet Address" in res[0]["Regex Pattern"]["Name"]113def test_ripple():114    r = regex_identifier.RegexIdentifier()115    res = r.check(["rBPAQmwMrt7FDDPNyjwFgwSqbWZPf6SLkk"])116    assert "Ripple (XRP) Wallet Address" in res[0]["Regex Pattern"]["Name"]117def test_visa():118    r = regex_identifier.RegexIdentifier()119    res = r.check(["4111111111111111"])120    assert "Visa" in res[0]["Regex Pattern"]["Name"]121def test_visa_spaces():122    r = regex_identifier.RegexIdentifier()123    res = r.check(["4607 0000 0000 0009"])124    assert "Visa" in res[0]["Regex Pattern"]["Name"]125def test_master_Card():126    r = regex_identifier.RegexIdentifier()127    res = r.check(["5500000000000004"])128    assert "MasterCard" in res[0]["Regex Pattern"]["Name"]129def test_master_card_spaces():130    r = regex_identifier.RegexIdentifier()131    res = r.check(["5555 5555 5555 4444"])132    assert "MasterCard" in res[0]["Regex Pattern"]["Name"]133def test_american_express():134    r = regex_identifier.RegexIdentifier()135    res = r.check(["340000000000009"])136    assert "American Express" in res[0]["Regex Pattern"]["Name"]137def test_american_express_spaces():138    r = regex_identifier.RegexIdentifier()139    res = r.check(["3714 4963 5398 431"])140    assert "American Express" in res[0]["Regex Pattern"]["Name"]141def test_american_diners_club():142    r = regex_identifier.RegexIdentifier()143    res = r.check(["30000000000004"])144    assert "Diners Club Card" in res[0]["Regex Pattern"]["Name"]145def test_american_diners_club_spaces():146    r = regex_identifier.RegexIdentifier()147    res = r.check(["3056 9309 0259 04"])148    assert "Diners Club Card" in res[0]["Regex Pattern"]["Name"]149def test_discover_card():150    r = regex_identifier.RegexIdentifier()151    res = r.check(["6011000000000004"])152    assert "Discover" in res[0]["Regex Pattern"]["Name"]153def test_discover_card_spaces():154    r = regex_identifier.RegexIdentifier()155    res = r.check(["6011 1111 1111 1117"])156    assert "Discover" in res[0]["Regex Pattern"]["Name"]157def test_maestro_card():158    r = regex_identifier.RegexIdentifier()159    res = r.check(["5038146401278870"])160    assert "Maestro" in res[0]["Regex Pattern"]["Name"]161def test_maestro_card_spaces():162    r = regex_identifier.RegexIdentifier()163    res = r.check(["6759 6498 2643 8453"])164    assert "Maestro" in res[0]["Regex Pattern"]["Name"]165@pytest.mark.skip("Key:value is turned off")166def test_username():167    r = regex_identifier.RegexIdentifier()168    res = r.check(["james:S3cr37_P@$$W0rd"])169    assert "Key:Value" in res[0]["Regex Pattern"]["Name"]170def test_email():171    r = regex_identifier.RegexIdentifier()172    res = r.check(["github@skerritt.blog"])173    assert "Email" in res[0]["Regex Pattern"]["Name"]174def test_email2():175    r = regex_identifier.RegexIdentifier()176    res = r.check(["firstname+lastname@example.com"])177    assert "Email" in res[0]["Regex Pattern"]["Name"]178def test_email3():179    r = regex_identifier.RegexIdentifier()180    res = r.check(["john.smith@[123.123.123.123]"])181    assert "Email" in res[1]["Regex Pattern"]["Name"]182def test_email4():183    r = regex_identifier.RegexIdentifier()184    res = r.check(["email@example@example.com"])185    assert "Email" not in res186def test_phone_number():187    r = regex_identifier.RegexIdentifier()188    res = r.check(["202-555-0178"])189    assert "Phone Number" in res[0]["Regex Pattern"]["Name"]190def test_phone_number2():191    r = regex_identifier.RegexIdentifier()192    res = r.check(["+1-202-555-0156"])193    assert "Phone Number" in res[0]["Regex Pattern"]["Name"]194    assert "United States" in res[0]["Regex Pattern"]["Description"]195def test_phone_number3():196    r = regex_identifier.RegexIdentifier()197    res = r.check(["+662025550156"])198    assert "Phone Number" in res[0]["Regex Pattern"]["Name"]199    assert "Thailand" in res[0]["Regex Pattern"]["Description"]200def test_phone_number4():201    r = regex_identifier.RegexIdentifier()202    res = r.check(["+356 202 555 0156"])203    assert "Phone Number" in res[0]["Regex Pattern"]["Name"]204    assert "Malta" in res[0]["Regex Pattern"]["Description"]205def test_youtube():206    r = regex_identifier.RegexIdentifier()207    res = r.check(["https://www.youtube.com/watch?v=ScOAntcCa78"])208    assert "YouTube" in res[0]["Regex Pattern"]["Name"]209def test_youtube2():210    r = regex_identifier.RegexIdentifier()211    res = r.check(["http://www.youtube.com/watch?v=dQw4w9WgXcQ"])212    assert "YouTube" in res[0]["Regex Pattern"]["Name"]213def test_youtube_id():214    r = regex_identifier.RegexIdentifier()215    res = r.check(["dQw4w9WgXcQ"])216    assert "YouTube" in res[0]["Regex Pattern"]["Name"]217def test_youtube_id2():218    r = regex_identifier.RegexIdentifier()219    res = r.check(["078-05-1120"])220    assert "YouTube" not in res[0]221def test_ssn():222    r = regex_identifier.RegexIdentifier()223    res = r.check(["001-01-0001"])224    assert "Social" in str(res)225def test_cors():226    r = regex_identifier.RegexIdentifier()227    res = r.check(["Access-Control-Allow: *"])228    assert "Access" in str(res)229def test_jwt():230    r = regex_identifier.RegexIdentifier()231    res = r.check(232        [233            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"234        ]235    )236    assert "JWT" in str(res)237def test_s3():238    r = regex_identifier.RegexIdentifier()239    res = r.check(["http://s3.amazonaws.com/bucket/"])240    assert "S3" in str(res)241def test_s3_internal():242    r = regex_identifier.RegexIdentifier()243    res = r.check(["s3://bucket/path/key"])244    assert "S3" in str(res)245def test_s3_internal2():246    r = regex_identifier.RegexIdentifier()247    res = r.check(["s3://bucket/path/directory/"])248    assert "S3" in str(res)249def test_arn():250    r = regex_identifier.RegexIdentifier()251    res = r.check(["arn:partition:service:region:account-id:resource"])252    assert "ARN" in str(res)253def test_arn2():254    r = regex_identifier.RegexIdentifier()255    res = r.check(["arn:partition:service:region:account-id:resourcetype/resource"])256    assert "ARN" in str(res)257def test_arn3():258    r = regex_identifier.RegexIdentifier()259    res = r.check(["arn:partition:service:region:account-id:resourcetype:resource"])260    assert "ARN" in str(res)261def test_arn4():262    r = regex_identifier.RegexIdentifier()263    res = r.check(["arn:aws:s3:::my_corporate_bucket/Development/*"])...Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!
