How to use regex method in apickli

Best JavaScript code snippet using apickli

test_regex.py

Source:test_regex.py Github

copy

Full Screen

1import regex2import string3from weakref import proxy4import unittest5import copy6import pickle7from test.support import run_unittest8import sys9# String subclasses for issue 18468.10class StrSubclass(str):11 def __getitem__(self, index):12 return StrSubclass(super().__getitem__(index))13class BytesSubclass(bytes):14 def __getitem__(self, index):15 return BytesSubclass(super().__getitem__(index))16class RegexTests(unittest.TestCase):17 PATTERN_CLASS = "<class '_regex.Pattern'>"18 FLAGS_WITH_COMPILED_PAT = "cannot process flags argument with a compiled pattern"19 INVALID_GROUP_REF = "invalid group reference"20 MISSING_GT = "missing >"21 BAD_GROUP_NAME = "bad character in group name"22 MISSING_GROUP_NAME = "missing group name"23 MISSING_LT = "missing <"24 UNKNOWN_GROUP_I = "unknown group"25 UNKNOWN_GROUP = "unknown group"26 BAD_ESCAPE = r"bad escape \(end of pattern\)"27 BAD_OCTAL_ESCAPE = r"bad escape \\"28 BAD_SET = "unterminated character set"29 STR_PAT_ON_BYTES = "cannot use a string pattern on a bytes-like object"30 BYTES_PAT_ON_STR = "cannot use a bytes pattern on a string-like object"31 STR_PAT_BYTES_TEMPL = "expected str instance, bytes found"32 BYTES_PAT_STR_TEMPL = "expected a bytes-like object, str found"33 BYTES_PAT_UNI_FLAG = "cannot use UNICODE flag with a bytes pattern"34 MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible"35 MISSING_RPAREN = "missing \\)"36 TRAILING_CHARS = "unbalanced parenthesis"37 BAD_CHAR_RANGE = "bad character range"38 NOTHING_TO_REPEAT = "nothing to repeat"39 MULTIPLE_REPEAT = "multiple repeat"40 OPEN_GROUP = "cannot refer to an open group"41 DUPLICATE_GROUP = "duplicate group"42 CANT_TURN_OFF = "bad inline flags: cannot turn flags off"43 UNDEF_CHAR_NAME = "undefined character name"44 def assertTypedEqual(self, actual, expect, msg=None):45 self.assertEqual(actual, expect, msg)46 def recurse(actual, expect):47 if isinstance(expect, (tuple, list)):48 for x, y in zip(actual, expect):49 recurse(x, y)50 else:51 self.assertIs(type(actual), type(expect), msg)52 recurse(actual, expect)53 def test_weakref(self):54 s = 'QabbbcR'55 x = regex.compile('ab+c')56 y = proxy(x)57 if x.findall('QabbbcR') != y.findall('QabbbcR'):58 self.fail()59 def test_search_star_plus(self):60 self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0))61 self.assertEqual(regex.search('x*', 'axx').span(), (0, 0))62 self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3))63 self.assertEqual(regex.search('x+', 'axx').span(), (1, 3))64 self.assertEqual(regex.search('x', 'aaa'), None)65 self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0))66 self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0))67 self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3))68 self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3))69 self.assertEqual(regex.match('a+', 'xxx'), None)70 def bump_num(self, matchobj):71 int_value = int(matchobj[0])72 return str(int_value + 1)73 def test_basic_regex_sub(self):74 self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')75 self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),76 '9.3 -3 24x100y')77 self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),78 '9.3 -3 23x99y')79 self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n")80 self.assertEqual(regex.sub('.', r"\n", 'x'), "\n")81 self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')82 self.assertEqual(regex.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')83 self.assertEqual(regex.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'),84 'xxxx')85 self.assertEqual(regex.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')86 self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D',87 'a'), "\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D")88 self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a")89 self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10)90 + chr(11) + chr(13) + chr(12) + chr(7))91 self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest')92 self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n")93 self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\n")94 self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\n")95 self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}",96 "x"), "A")97 self.assertEqual(regex.sub(br"x", br"\x0A", b"x"), b"\n")98 self.assertEqual(regex.sub(br"x", br"\u000A", b"x"), b"\\u000A")99 self.assertEqual(regex.sub(br"x", br"\U0000000A", b"x"),100 b"\\U0000000A")101 self.assertEqual(regex.sub(br"x", br"\N{LATIN CAPITAL LETTER A}",102 b"x"), b"\\N{LATIN CAPITAL LETTER A}")103 def test_bug_449964(self):104 # Fails for group followed by other escape.105 self.assertEqual(regex.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),106 "xx\bxx\b")107 def test_bug_449000(self):108 # Test for sub() on escaped characters.109 self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),110 "abc\ndef\n")111 self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),112 "abc\ndef\n")113 self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),114 "abc\ndef\n")115 self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'),116 "abc\ndef\n")117 def test_bug_1661(self):118 # Verify that flags do not get silently ignored with compiled patterns119 pattern = regex.compile('.')120 self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,121 lambda: regex.match(pattern, 'A', regex.I))122 self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,123 lambda: regex.search(pattern, 'A', regex.I))124 self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,125 lambda: regex.findall(pattern, 'A', regex.I))126 self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT,127 lambda: regex.compile(pattern, regex.I))128 def test_bug_3629(self):129 # A regex that triggered a bug in the sre-code validator130 self.assertEqual(repr(type(regex.compile("(?P<quote>)(?(quote))"))),131 self.PATTERN_CLASS)132 def test_sub_template_numeric_escape(self):133 # Bug 776311 and friends.134 self.assertEqual(regex.sub('x', r'\0', 'x'), "\0")135 self.assertEqual(regex.sub('x', r'\000', 'x'), "\000")136 self.assertEqual(regex.sub('x', r'\001', 'x'), "\001")137 self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8")138 self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9")139 self.assertEqual(regex.sub('x', r'\111', 'x'), "\111")140 self.assertEqual(regex.sub('x', r'\117', 'x'), "\117")141 self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111")142 self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1")143 self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00')144 self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07')145 self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8")146 self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9")147 self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a")148 self.assertEqual(regex.sub('x', r'\400', 'x'), "\u0100")149 self.assertEqual(regex.sub('x', r'\777', 'x'), "\u01FF")150 self.assertEqual(regex.sub(b'x', br'\400', b'x'), b"\x00")151 self.assertEqual(regex.sub(b'x', br'\777', b'x'), b"\xFF")152 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:153 regex.sub('x', r'\1', 'x'))154 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:155 regex.sub('x', r'\8', 'x'))156 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:157 regex.sub('x', r'\9', 'x'))158 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:159 regex.sub('x', r'\11', 'x'))160 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:161 regex.sub('x', r'\18', 'x'))162 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:163 regex.sub('x', r'\1a', 'x'))164 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:165 regex.sub('x', r'\90', 'x'))166 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:167 regex.sub('x', r'\99', 'x'))168 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:169 regex.sub('x', r'\118', 'x')) # r'\11' + '8'170 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:171 regex.sub('x', r'\11a', 'x'))172 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:173 regex.sub('x', r'\181', 'x')) # r'\18' + '1'174 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:175 regex.sub('x', r'\800', 'x')) # r'\80' + '0'176 # In Python 2.3 (etc), these loop endlessly in sre_parser.py.177 self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'),178 'x')179 self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),180 'xz8')181 self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),182 'xza')183 def test_qualified_re_sub(self):184 self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb')185 self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa')186 def test_bug_114660(self):187 self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),188 'hello there')189 def test_bug_462270(self):190 # Test for empty sub() behaviour, see SF bug #462270191 self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-')192 self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-')193 self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d')194 def test_bug_14462(self):195 # chr(255) is a valid identifier in Python 3.196 group_name = '\xFF'197 self.assertEqual(regex.search(r'(?P<' + group_name + '>a)',198 'abc').group(group_name), 'a')199 def test_symbolic_refs(self):200 self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda:201 regex.sub('(?P<a>x)', r'\g<a', 'xx'))202 self.assertRaisesRegex(regex.error, self.MISSING_GROUP_NAME, lambda:203 regex.sub('(?P<a>x)', r'\g<', 'xx'))204 self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda:205 regex.sub('(?P<a>x)', r'\g', 'xx'))206 self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:207 regex.sub('(?P<a>x)', r'\g<a a>', 'xx'))208 self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:209 regex.sub('(?P<a>x)', r'\g<1a1>', 'xx'))210 self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda:211 regex.sub('(?P<a>x)', r'\g<ab>', 'xx'))212 # The new behaviour of unmatched but valid groups is to treat them like213 # empty matches in the replacement template, like in Perl.214 self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')215 self.assertEqual(regex.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')216 # The old behaviour was to raise it as an IndexError.217 self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda:218 regex.sub('(?P<a>x)', r'\g<-1>', 'xx'))219 def test_re_subn(self):220 self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))221 self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))222 self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0))223 self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4))224 self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2))225 def test_re_split(self):226 self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])227 self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])228 self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', ':',229 'b', '::', 'c'])230 self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])231 self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', ':',232 'b', ':', 'c'])233 self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a',234 ':b::', 'c'])235 self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':',236 'a', None, ':', '', 'b', None, '', None, '::', 'c'])237 self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '',238 '', 'c'])239 self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c'])240 self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a',241 'b', 'c'])242 self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', ''])243 self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c',244 'b', 'a', ''])245 self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a',246 'x', None, 'b', 'x', None, 'c'])247 self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")],248 ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c'])249 self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None,250 'b', 'x', None, 'a', 'x', None, ''])251 self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")],252 ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, ''])253 self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b',254 ' ', 'c', ''])255 self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ',256 'c'])257 self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c',258 ''])259 def test_qualified_re_split(self):260 self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])261 self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])262 self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':',263 'b::c'])264 self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', ':',265 'b::c'])266 def test_re_findall(self):267 self.assertEqual(regex.findall(":+", "abc"), [])268 self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::'])269 self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::',270 ':::'])271 self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''),272 (':', ':'), (':', '::')])273 self.assertEqual(regex.findall(r"\((?P<test>.{0,5}?TEST)\)",274 "(MY TEST)"), ["MY TEST"])275 self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?TEST)\)",276 "(MY TEST)"), ["MY TEST"])277 self.assertEqual(regex.findall(r"\((?P<test>.{0,3}?T)\)", "(MY T)"),278 ["MY T"])279 self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n S"), [' S'])280 self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), ['\n S'])281 self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), [' S'])282 self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF",283 "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')])284 self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End",285 "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')])286 def test_bug_117612(self):287 self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b',288 'b'), ('a', '')])289 def test_re_match(self):290 self.assertEqual(regex.match('a', 'a')[:], ('a',))291 self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a'))292 self.assertEqual(regex.match(r'(a)', 'a')[0], 'a')293 self.assertEqual(regex.match(r'(a)', 'a')[1], 'a')294 self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a'))295 pat = regex.compile('((a)|(b))(c)?')296 self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None))297 self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None))298 self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c'))299 self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))300 self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c'))301 # A single group.302 m = regex.match('(a)', 'a')303 self.assertEqual(m.group(), 'a')304 self.assertEqual(m.group(0), 'a')305 self.assertEqual(m.group(1), 'a')306 self.assertEqual(m.group(1, 1), ('a', 'a'))307 pat = regex.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')308 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))309 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b',310 None))311 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))312 def test_re_groupref_exists(self):313 self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:],314 ('(a)', '(', 'a'))315 self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a',316 None, 'a'))317 self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None)318 self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None)319 self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab',320 'a', 'b'))321 self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd',322 None, 'd'))323 self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd',324 None, 'd'))325 self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a',326 'a', ''))327 # Tests for bug #1177831: exercise groups other than the first group.328 p = regex.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')329 self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c'))330 self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd'))331 self.assertEqual(p.match('abd'), None)332 self.assertEqual(p.match('ac'), None)333 def test_re_groupref(self):334 self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|',335 '|', 'a'))336 self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a',337 None, 'a'))338 self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None)339 self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None)340 self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a',341 'a'))342 self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None,343 None))344 self.assertEqual(regex.findall("(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\\3(\ |;)+(.{1,80}?)\\1",345 "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST',346 ' LEST', ' ', '123 ')])347 def test_groupdict(self):348 self.assertEqual(regex.match('(?P<first>first) (?P<second>second)',349 'first second').groupdict(), {'first': 'first', 'second': 'second'})350 def test_expand(self):351 self.assertEqual(regex.match("(?P<first>first) (?P<second>second)",352 "first second").expand(r"\2 \1 \g<second> \g<first>"),353 'second first second first')354 def test_repeat_minmax(self):355 self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None)356 self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None)357 self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None)358 self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None)359 self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c')360 self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c')361 self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c')362 self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')363 self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c')364 self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c')365 self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c')366 self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c')367 self.assertEqual(regex.match("^x{1}$", "xxx"), None)368 self.assertEqual(regex.match("^x{1}?$", "xxx"), None)369 self.assertEqual(regex.match("^x{1,2}$", "xxx"), None)370 self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None)371 self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x')372 self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x')373 self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x')374 self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '')375 self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True)376 self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True)377 self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True)378 self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)379 self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True)380 self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True)381 self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True)382 self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True)383 self.assertEqual(regex.match("^x{}$", "xxx"), None)384 self.assertEqual(bool(regex.match("^x{}$", "x{}")), True)385 def test_getattr(self):386 self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)')387 self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.I | regex.U |388 regex.DEFAULT_VERSION)389 self.assertEqual(regex.compile(b"(?i)(a)(b)").flags, regex.A | regex.I390 | regex.DEFAULT_VERSION)391 self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2)392 self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {})393 self.assertEqual(regex.compile("(?i)(?P<first>a)(?P<other>b)").groupindex,394 {'first': 1, 'other': 2})395 self.assertEqual(regex.match("(a)", "a").pos, 0)396 self.assertEqual(regex.match("(a)", "a").endpos, 1)397 self.assertEqual(regex.search("b(c)", "abcdef").pos, 0)398 self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6)399 self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3))400 self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3))401 self.assertEqual(regex.match("(a)", "a").string, 'a')402 self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1)))403 self.assertEqual(repr(type(regex.match("(a)", "a").re)),404 self.PATTERN_CLASS)405 # Issue 14260.406 p = regex.compile(r'abc(?P<n>def)')407 p.groupindex["n"] = 0408 self.assertEqual(p.groupindex["n"], 1)409 def test_special_escapes(self):410 self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx')411 self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx')412 self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx",413 regex.LOCALE)[1], b'bx')414 self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd",415 regex.LOCALE)[1], b'bx')416 self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx",417 regex.UNICODE)[1], 'bx')418 self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd",419 regex.UNICODE)[1], 'bx')420 self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc')421 self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc')422 self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None)423 self.assertEqual(regex.search(br"\b(b.)\b", b"abcd abc bcd bx")[1],424 b'bx')425 self.assertEqual(regex.search(br"\B(b.)\B", b"abc bcd bc abxd")[1],426 b'bx')427 self.assertEqual(regex.search(br"^abc$", b"\nabc\n", regex.M)[0],428 b'abc')429 self.assertEqual(regex.search(br"^\Aabc\Z$", b"abc", regex.M)[0],430 b'abc')431 self.assertEqual(regex.search(br"^\Aabc\Z$", b"\nabc\n", regex.M),432 None)433 self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a')434 self.assertEqual(regex.search(br"\d\D\w\W\s\S", b"1aa! a",435 regex.LOCALE)[0], b'1aa! a')436 self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a",437 regex.UNICODE)[0], '1aa! a')438 def test_bigcharset(self):439 self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222")[1],440 '\u2222')441 self.assertEqual(regex.match(r"([\u2222\u2223])", "\u2222",442 regex.UNICODE)[1], '\u2222')443 self.assertEqual("".join(regex.findall(".",444 "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),445 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')446 self.assertEqual("".join(regex.findall(r"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]",447 "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),448 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')449 self.assertEqual("".join(regex.findall(r"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117",450 "e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)),451 'e\xe8\xe9\xea\xeb\u0113\u011b\u0117')452 def test_anyall(self):453 self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb")454 self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0],455 "a\n\nb")456 def test_non_consuming(self):457 self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a')458 self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a')459 self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a')460 self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a')461 self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a')462 self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a')463 self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a')464 self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a')465 self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a')466 self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a')467 self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a')468 def test_ignore_case(self):469 self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC')470 self.assertEqual(regex.match(b"abc", b"ABC", regex.I)[0], b'ABC')471 self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1],472 'a bb')473 self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b')474 self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1],475 'a bb')476 self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a')477 self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1],478 'a aa')479 self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1],480 'a a')481 self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1],482 'a aa')483 # Issue 3511.484 self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1))485 self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1))486 self.assertEqual(bool(regex.match(r"(?i)nao", "nAo")), True)487 self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "n\xC3o")), True)488 self.assertEqual(bool(regex.match(r"(?i)n\xE3o", "N\xC3O")), True)489 self.assertEqual(bool(regex.match(r"(?i)s", "\u017F")), True)490 def test_case_folding(self):491 self.assertEqual(regex.search(r"(?fi)ss", "SS").span(), (0, 2))492 self.assertEqual(regex.search(r"(?fi)SS", "ss").span(), (0, 2))493 self.assertEqual(regex.search(r"(?fi)SS",494 "\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1))495 self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LETTER SHARP S}",496 "SS").span(), (0, 2))497 self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",498 "ST").span(), (0, 2))499 self.assertEqual(regex.search(r"(?fi)ST",500 "\N{LATIN SMALL LIGATURE ST}").span(), (0, 1))501 self.assertEqual(regex.search(r"(?fi)ST",502 "\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1))503 self.assertEqual(regex.search(r"(?fi)SST",504 "\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2))505 self.assertEqual(regex.search(r"(?fi)SST",506 "s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2))507 self.assertEqual(regex.search(r"(?fi)SST",508 "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))509 self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE ST}",510 "SST").span(), (1, 3))511 self.assertEqual(regex.search(r"(?fi)SST",512 "s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2))513 self.assertEqual(regex.search(r"(?fi)FFI",514 "\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1))515 self.assertEqual(regex.search(r"(?fi)FFI",516 "\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2))517 self.assertEqual(regex.search(r"(?fi)FFI",518 "f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2))519 self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FFI}",520 "FFI").span(), (0, 3))521 self.assertEqual(regex.search(r"(?fi)\N{LATIN SMALL LIGATURE FF}i",522 "FFI").span(), (0, 3))523 self.assertEqual(regex.search(r"(?fi)f\N{LATIN SMALL LIGATURE FI}",524 "FFI").span(), (0, 3))525 sigma = "\u03A3\u03C3\u03C2"526 for ch1 in sigma:527 for ch2 in sigma:528 if not regex.match(r"(?fi)" + ch1, ch2):529 self.fail()530 self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),531 True)532 self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB01\uFB00")),533 True)534 self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),535 True)536 self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB01\uFB00")),537 True)538 self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),539 True)540 self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",541 "\uFB00\uFB01")), True)542 self.assertEqual(bool(regex.search(r"(?iV1)ff", "\uFB00\uFB01")),543 True)544 self.assertEqual(bool(regex.search(r"(?iV1)fi", "\uFB00\uFB01")),545 True)546 self.assertEqual(bool(regex.search(r"(?iV1)fffi", "\uFB00\uFB01")),547 True)548 self.assertEqual(bool(regex.search(r"(?iV1)f\uFB03",549 "\uFB00\uFB01")), True)550 self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),551 True)552 self.assertEqual(bool(regex.search(r"(?iV1)f\uFB01", "\uFB00i")),553 True)554 self.assertEqual(regex.findall(r"(?iV0)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",555 "word word2 word word3 word word234 word23 word"), ["word234",556 "word23"])557 self.assertEqual(regex.findall(r"(?iV1)\m(?:word){e<=3}\M(?<!\m(?:word){e<=1}\M)",558 "word word2 word word3 word word234 word23 word"), ["word234",559 "word23"])560 self.assertEqual(regex.search(r"(?fi)a\N{LATIN SMALL LIGATURE FFI}ne",561 " affine ").span(), (2, 8))562 self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|x)ne",563 " affine ").span(), (2, 8))564 self.assertEqual(regex.search(r"(?fi)a(?:\N{LATIN SMALL LIGATURE FFI}|xy)ne",565 " affine ").span(), (2, 8))566 self.assertEqual(regex.search(r"(?fi)a\L<options>ne", "affine",567 options=["\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6))568 self.assertEqual(regex.search(r"(?fi)a\L<options>ne",569 "a\N{LATIN SMALL LIGATURE FFI}ne", options=["ffi"]).span(), (0, 4))570 def test_category(self):571 self.assertEqual(regex.match(r"(\s)", " ")[1], ' ')572 def test_not_literal(self):573 self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b')574 self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb')575 def test_search_coverage(self):576 self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b')577 self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ')578 def test_re_escape(self):579 p = ""580 self.assertEqual(regex.escape(p), p)581 for i in range(0, 256):582 p += chr(i)583 self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))),584 True)585 self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(),586 (0, 1))587 pat = regex.compile(regex.escape(p))588 self.assertEqual(pat.match(p).span(), (0, 256))589 def test_re_escape_byte(self):590 p = b""591 self.assertEqual(regex.escape(p), p)592 for i in range(0, 256):593 b = bytes([i])594 p += b595 self.assertEqual(bool(regex.match(regex.escape(b), b)), True)596 self.assertEqual(regex.match(regex.escape(b), b).span(), (0, 1))597 pat = regex.compile(regex.escape(p))598 self.assertEqual(pat.match(p).span(), (0, 256))599 def test_constants(self):600 if regex.I != regex.IGNORECASE:601 self.fail()602 if regex.L != regex.LOCALE:603 self.fail()604 if regex.M != regex.MULTILINE:605 self.fail()606 if regex.S != regex.DOTALL:607 self.fail()608 if regex.X != regex.VERBOSE:609 self.fail()610 def test_flags(self):611 for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]:612 self.assertEqual(repr(type(regex.compile('^pattern$', flag))),613 self.PATTERN_CLASS)614 def test_sre_character_literals(self):615 for i in [0, 8, 16, 32, 64, 127, 128, 255]:616 self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True)617 self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")),618 True)619 self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")),620 True)621 self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True)622 self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")),623 True)624 self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")),625 True)626 self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:627 regex.match(r"\911", ""))628 def test_sre_character_class_literals(self):629 for i in [0, 8, 16, 32, 64, 127, 128, 255]:630 self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True)631 self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True)632 self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True)633 self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True)634 self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True)635 self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True)636 self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda:637 regex.match(r"[\911]", ""))638 def test_bug_113254(self):639 self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1)640 self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1)641 self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1))642 def test_bug_527371(self):643 # Bug described in patches 527371/672491.644 self.assertEqual(regex.match(r'(a)?a','a').lastindex, None)645 self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1)646 self.assertEqual(regex.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup,647 'a')648 self.assertEqual(regex.match("(?P<a>a(b))", "ab").lastgroup, 'a')649 self.assertEqual(regex.match("((a))", "a").lastindex, 1)650 def test_bug_545855(self):651 # Bug 545855 -- This pattern failed to cause a compile error as it652 # should, instead provoking a TypeError.653 self.assertRaisesRegex(regex.error, self.BAD_SET, lambda:654 regex.compile('foo[a-'))655 def test_bug_418626(self):656 # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code657 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of658 # pattern '*?' on a long string.659 self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0),660 20001)661 self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' +662 'cde').end(0), 20003)663 self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0),664 60001)665 # Non-simple '*?' still used to hit the recursion limit, before the666 # non-recursive scheme was implemented.667 self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0),668 20001)669 def test_bug_612074(self):670 pat = "[" + regex.escape("\u2039") + "]"671 self.assertEqual(regex.compile(pat) and 1, 1)672 def test_stack_overflow(self):673 # Nasty cases that used to overflow the straightforward recursive674 # implementation of repeated groups.675 self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x')676 self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x')677 self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x')678 def test_scanner(self):679 def s_ident(scanner, token): return token680 def s_operator(scanner, token): return "op%s" % token681 def s_float(scanner, token): return float(token)682 def s_int(scanner, token): return int(token)683 scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*",684 s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+",685 None), ])686 self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)),687 self.PATTERN_CLASS)688 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum',689 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], ''))690 def test_bug_448951(self):691 # Bug 448951 (similar to 429357, but with single char match).692 # (Also test greedy matches.)693 for op in '', '?', '*':694 self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z',695 None, None))696 self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z',697 'a:', 'a'))698 def test_bug_725106(self):699 # Capturing groups in alternatives in repeats.700 self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a'))701 self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c',702 'b'))703 self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b',704 None))705 self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b',706 None))707 self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b',708 'a'))709 self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd',710 'c', 'b'))711 self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b',712 None))713 self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b',714 None))715 def test_bug_725149(self):716 # Mark_stack_base restoring before restoring marks.717 self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a',718 None))719 self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a',720 None, None))721 def test_bug_764548(self):722 # Bug 764548, regex.compile() barfs on str/unicode subclasses.723 class my_unicode(str): pass724 pat = regex.compile(my_unicode("abc"))725 self.assertEqual(pat.match("xyz"), None)726 def test_finditer(self):727 it = regex.finditer(r":+", "a:b::c:::d")728 self.assertEqual([item[0] for item in it], [':', '::', ':::'])729 def test_bug_926075(self):730 if regex.compile('bug_926075') is regex.compile(b'bug_926075'):731 self.fail()732 def test_bug_931848(self):733 pattern = "[\u002E\u3002\uFF0E\uFF61]"734 self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b',735 'c'])736 def test_bug_581080(self):737 it = regex.finditer(r"\s", "a b")738 self.assertEqual(next(it).span(), (1, 2))739 self.assertRaises(StopIteration, lambda: next(it))740 scanner = regex.compile(r"\s").scanner("a b")741 self.assertEqual(scanner.search().span(), (1, 2))742 self.assertEqual(scanner.search(), None)743 def test_bug_817234(self):744 it = regex.finditer(r".*", "asdf")745 self.assertEqual(next(it).span(), (0, 4))746 self.assertEqual(next(it).span(), (4, 4))747 self.assertRaises(StopIteration, lambda: next(it))748 def test_empty_array(self):749 # SF buf 1647541.750 import array751 for typecode in 'bBuhHiIlLfd':752 a = array.array(typecode)753 self.assertEqual(regex.compile(b"bla").match(a), None)754 self.assertEqual(regex.compile(b"").match(a)[1 : ], ())755 def test_inline_flags(self):756 # Bug #1700.757 upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Below758 lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Below759 p = regex.compile(upper_char, regex.I | regex.U)760 self.assertEqual(bool(p.match(lower_char)), True)761 p = regex.compile(lower_char, regex.I | regex.U)762 self.assertEqual(bool(p.match(upper_char)), True)763 p = regex.compile('(?i)' + upper_char, regex.U)764 self.assertEqual(bool(p.match(lower_char)), True)765 p = regex.compile('(?i)' + lower_char, regex.U)766 self.assertEqual(bool(p.match(upper_char)), True)767 p = regex.compile('(?iu)' + upper_char)768 self.assertEqual(bool(p.match(lower_char)), True)769 p = regex.compile('(?iu)' + lower_char)770 self.assertEqual(bool(p.match(upper_char)), True)771 self.assertEqual(bool(regex.match(r"(?i)a", "A")), True)772 self.assertEqual(bool(regex.match(r"a(?i)", "A")), True)773 self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True)774 self.assertEqual(regex.match(r"a(?iV1)", "A"), None)775 def test_dollar_matches_twice(self):776 # $ matches the end of string, and just before the terminating \n.777 pattern = regex.compile('$')778 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')779 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')780 self.assertEqual(pattern.sub('#', '\n'), '#\n#')781 pattern = regex.compile('$', regex.MULTILINE)782 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#')783 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')784 self.assertEqual(pattern.sub('#', '\n'), '#\n#')785 def test_bytes_str_mixing(self):786 # Mixing str and bytes is disallowed.787 pat = regex.compile('.')788 bpat = regex.compile(b'.')789 self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:790 pat.match(b'b'))791 self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:792 bpat.match('b'))793 self.assertRaisesRegex(TypeError, self.STR_PAT_BYTES_TEMPL, lambda:794 pat.sub(b'b', 'c'))795 self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:796 pat.sub('b', b'c'))797 self.assertRaisesRegex(TypeError, self.STR_PAT_ON_BYTES, lambda:798 pat.sub(b'b', b'c'))799 self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:800 bpat.sub(b'b', 'c'))801 self.assertRaisesRegex(TypeError, self.BYTES_PAT_STR_TEMPL, lambda:802 bpat.sub('b', b'c'))803 self.assertRaisesRegex(TypeError, self.BYTES_PAT_ON_STR, lambda:804 bpat.sub('b', 'c'))805 self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:806 regex.compile(b'\w', regex.UNICODE))807 self.assertRaisesRegex(ValueError, self.BYTES_PAT_UNI_FLAG, lambda:808 regex.compile(b'(?u)\w'))809 self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:810 regex.compile('\w', regex.UNICODE | regex.ASCII))811 self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:812 regex.compile('(?u)\w', regex.ASCII))813 self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:814 regex.compile('(?a)\w', regex.UNICODE))815 self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:816 regex.compile('(?au)\w'))817 def test_ascii_and_unicode_flag(self):818 # String patterns.819 for flags in (0, regex.UNICODE):820 pat = regex.compile('\xc0', flags | regex.IGNORECASE)821 self.assertEqual(bool(pat.match('\xe0')), True)822 pat = regex.compile('\w', flags)823 self.assertEqual(bool(pat.match('\xe0')), True)824 pat = regex.compile('\xc0', regex.ASCII | regex.IGNORECASE)825 self.assertEqual(pat.match('\xe0'), None)826 pat = regex.compile('(?a)\xc0', regex.IGNORECASE)827 self.assertEqual(pat.match('\xe0'), None)828 pat = regex.compile('\w', regex.ASCII)829 self.assertEqual(pat.match('\xe0'), None)830 pat = regex.compile('(?a)\w')831 self.assertEqual(pat.match('\xe0'), None)832 # Bytes patterns.833 for flags in (0, regex.ASCII):834 pat = regex.compile(b'\xc0', flags | regex.IGNORECASE)835 self.assertEqual(pat.match(b'\xe0'), None)836 pat = regex.compile(b'\w')837 self.assertEqual(pat.match(b'\xe0'), None)838 self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda:839 regex.compile('(?au)\w'))840 def test_subscripting_match(self):841 m = regex.match(r'(?<a>\w)', 'xy')842 if not m:843 self.fail("Failed: expected match but returned None")844 elif not m or m[0] != m.group(0) or m[1] != m.group(1):845 self.fail("Failed")846 if not m:847 self.fail("Failed: expected match but returned None")848 elif m[:] != ('x', 'x'):849 self.fail("Failed: expected \"('x', 'x')\" but got {} instead".format(ascii(m[:])))850 def test_new_named_groups(self):851 m0 = regex.match(r'(?P<a>\w)', 'x')852 m1 = regex.match(r'(?<a>\w)', 'x')853 if not (m0 and m1 and m0[:] == m1[:]):854 self.fail("Failed")855 def test_properties(self):856 self.assertEqual(regex.match(b'(?ai)\xC0', b'\xE0'), None)857 self.assertEqual(regex.match(br'(?ai)\xC0', b'\xE0'), None)858 self.assertEqual(regex.match(br'(?a)\w', b'\xE0'), None)859 self.assertEqual(bool(regex.match(r'\w', '\xE0')), True)860 # Dropped the following test. It's not possible to determine what the861 # correct result should be in the general case.862# self.assertEqual(bool(regex.match(br'(?L)\w', b'\xE0')),863# b'\xE0'.isalnum())864 self.assertEqual(bool(regex.match(br'(?L)\d', b'0')), True)865 self.assertEqual(bool(regex.match(br'(?L)\s', b' ')), True)866 self.assertEqual(bool(regex.match(br'(?L)\w', b'a')), True)867 self.assertEqual(regex.match(br'(?L)\d', b'?'), None)868 self.assertEqual(regex.match(br'(?L)\s', b'?'), None)869 self.assertEqual(regex.match(br'(?L)\w', b'?'), None)870 self.assertEqual(regex.match(br'(?L)\D', b'0'), None)871 self.assertEqual(regex.match(br'(?L)\S', b' '), None)872 self.assertEqual(regex.match(br'(?L)\W', b'a'), None)873 self.assertEqual(bool(regex.match(br'(?L)\D', b'?')), True)874 self.assertEqual(bool(regex.match(br'(?L)\S', b'?')), True)875 self.assertEqual(bool(regex.match(br'(?L)\W', b'?')), True)876 self.assertEqual(bool(regex.match(r'\p{Cyrillic}',877 '\N{CYRILLIC CAPITAL LETTER A}')), True)878 self.assertEqual(bool(regex.match(r'(?i)\p{Cyrillic}',879 '\N{CYRILLIC CAPITAL LETTER A}')), True)880 self.assertEqual(bool(regex.match(r'\p{IsCyrillic}',881 '\N{CYRILLIC CAPITAL LETTER A}')), True)882 self.assertEqual(bool(regex.match(r'\p{Script=Cyrillic}',883 '\N{CYRILLIC CAPITAL LETTER A}')), True)884 self.assertEqual(bool(regex.match(r'\p{InCyrillic}',885 '\N{CYRILLIC CAPITAL LETTER A}')), True)886 self.assertEqual(bool(regex.match(r'\p{Block=Cyrillic}',887 '\N{CYRILLIC CAPITAL LETTER A}')), True)888 self.assertEqual(bool(regex.match(r'[[:Cyrillic:]]',889 '\N{CYRILLIC CAPITAL LETTER A}')), True)890 self.assertEqual(bool(regex.match(r'[[:IsCyrillic:]]',891 '\N{CYRILLIC CAPITAL LETTER A}')), True)892 self.assertEqual(bool(regex.match(r'[[:Script=Cyrillic:]]',893 '\N{CYRILLIC CAPITAL LETTER A}')), True)894 self.assertEqual(bool(regex.match(r'[[:InCyrillic:]]',895 '\N{CYRILLIC CAPITAL LETTER A}')), True)896 self.assertEqual(bool(regex.match(r'[[:Block=Cyrillic:]]',897 '\N{CYRILLIC CAPITAL LETTER A}')), True)898 self.assertEqual(bool(regex.match(r'\P{Cyrillic}',899 '\N{LATIN CAPITAL LETTER A}')), True)900 self.assertEqual(bool(regex.match(r'\P{IsCyrillic}',901 '\N{LATIN CAPITAL LETTER A}')), True)902 self.assertEqual(bool(regex.match(r'\P{Script=Cyrillic}',903 '\N{LATIN CAPITAL LETTER A}')), True)904 self.assertEqual(bool(regex.match(r'\P{InCyrillic}',905 '\N{LATIN CAPITAL LETTER A}')), True)906 self.assertEqual(bool(regex.match(r'\P{Block=Cyrillic}',907 '\N{LATIN CAPITAL LETTER A}')), True)908 self.assertEqual(bool(regex.match(r'\p{^Cyrillic}',909 '\N{LATIN CAPITAL LETTER A}')), True)910 self.assertEqual(bool(regex.match(r'\p{^IsCyrillic}',911 '\N{LATIN CAPITAL LETTER A}')), True)912 self.assertEqual(bool(regex.match(r'\p{^Script=Cyrillic}',913 '\N{LATIN CAPITAL LETTER A}')), True)914 self.assertEqual(bool(regex.match(r'\p{^InCyrillic}',915 '\N{LATIN CAPITAL LETTER A}')), True)916 self.assertEqual(bool(regex.match(r'\p{^Block=Cyrillic}',917 '\N{LATIN CAPITAL LETTER A}')), True)918 self.assertEqual(bool(regex.match(r'[[:^Cyrillic:]]',919 '\N{LATIN CAPITAL LETTER A}')), True)920 self.assertEqual(bool(regex.match(r'[[:^IsCyrillic:]]',921 '\N{LATIN CAPITAL LETTER A}')), True)922 self.assertEqual(bool(regex.match(r'[[:^Script=Cyrillic:]]',923 '\N{LATIN CAPITAL LETTER A}')), True)924 self.assertEqual(bool(regex.match(r'[[:^InCyrillic:]]',925 '\N{LATIN CAPITAL LETTER A}')), True)926 self.assertEqual(bool(regex.match(r'[[:^Block=Cyrillic:]]',927 '\N{LATIN CAPITAL LETTER A}')), True)928 self.assertEqual(bool(regex.match(r'\d', '0')), True)929 self.assertEqual(bool(regex.match(r'\s', ' ')), True)930 self.assertEqual(bool(regex.match(r'\w', 'A')), True)931 self.assertEqual(regex.match(r"\d", "?"), None)932 self.assertEqual(regex.match(r"\s", "?"), None)933 self.assertEqual(regex.match(r"\w", "?"), None)934 self.assertEqual(regex.match(r"\D", "0"), None)935 self.assertEqual(regex.match(r"\S", " "), None)936 self.assertEqual(regex.match(r"\W", "A"), None)937 self.assertEqual(bool(regex.match(r'\D', '?')), True)938 self.assertEqual(bool(regex.match(r'\S', '?')), True)939 self.assertEqual(bool(regex.match(r'\W', '?')), True)940 self.assertEqual(bool(regex.match(r'\p{L}', 'A')), True)941 self.assertEqual(bool(regex.match(r'\p{L}', 'a')), True)942 self.assertEqual(bool(regex.match(r'\p{Lu}', 'A')), True)943 self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)944 self.assertEqual(bool(regex.match(r'(?i)a', 'a')), True)945 self.assertEqual(bool(regex.match(r'(?i)a', 'A')), True)946 self.assertEqual(bool(regex.match(r'\w', '0')), True)947 self.assertEqual(bool(regex.match(r'\w', 'a')), True)948 self.assertEqual(bool(regex.match(r'\w', '_')), True)949 self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))950 self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))951 self.assertEqual(regex.findall(r"\X",952 "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',953 '\xe9', 'e\u0301'])954 self.assertEqual(regex.findall(r"\X{3}",955 "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])956 self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),957 ['\r', '\r\n', '\u0301', 'A\u0301'])958 self.assertEqual(bool(regex.match(r'\p{Ll}', 'a')), True)959 chars_u = "-09AZaz_\u0393\u03b3"960 chars_b = b"-09AZaz_"961 word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split())962 tests = [963 (r"\w", chars_u, "09AZaz_\u0393\u03b3"),964 (r"[[:word:]]", chars_u, "09AZaz_\u0393\u03b3"),965 (r"\W", chars_u, "-"),966 (r"[[:^word:]]", chars_u, "-"),967 (r"\d", chars_u, "09"),968 (r"[[:digit:]]", chars_u, "09"),969 (r"\D", chars_u, "-AZaz_\u0393\u03b3"),970 (r"[[:^digit:]]", chars_u, "-AZaz_\u0393\u03b3"),971 (r"[[:alpha:]]", chars_u, "AZaz\u0393\u03b3"),972 (r"[[:^alpha:]]", chars_u, "-09_"),973 (r"[[:alnum:]]", chars_u, "09AZaz\u0393\u03b3"),974 (r"[[:^alnum:]]", chars_u, "-_"),975 (r"[[:xdigit:]]", chars_u, "09Aa"),976 (r"[[:^xdigit:]]", chars_u, "-Zz_\u0393\u03b3"),977 (r"\p{InBasicLatin}", "a\xE1", "a"),978 (r"\P{InBasicLatin}", "a\xE1", "\xE1"),979 (r"(?i)\p{InBasicLatin}", "a\xE1", "a"),980 (r"(?i)\P{InBasicLatin}", "a\xE1", "\xE1"),981 (br"(?L)\w", chars_b, b"09AZaz_"),982 (br"(?L)[[:word:]]", chars_b, b"09AZaz_"),983 (br"(?L)\W", chars_b, b"-"),984 (br"(?L)[[:^word:]]", chars_b, b"-"),985 (br"(?L)\d", chars_b, b"09"),986 (br"(?L)[[:digit:]]", chars_b, b"09"),987 (br"(?L)\D", chars_b, b"-AZaz_"),988 (br"(?L)[[:^digit:]]", chars_b, b"-AZaz_"),989 (br"(?L)[[:alpha:]]", chars_b, b"AZaz"),990 (br"(?L)[[:^alpha:]]", chars_b, b"-09_"),991 (br"(?L)[[:alnum:]]", chars_b, b"09AZaz"),992 (br"(?L)[[:^alnum:]]", chars_b, b"-_"),993 (br"(?L)[[:xdigit:]]", chars_b, b"09Aa"),994 (br"(?L)[[:^xdigit:]]", chars_b, b"-Zz_"),995 (br"(?a)\w", chars_b, b"09AZaz_"),996 (br"(?a)[[:word:]]", chars_b, b"09AZaz_"),997 (br"(?a)\W", chars_b, b"-"),998 (br"(?a)[[:^word:]]", chars_b, b"-"),999 (br"(?a)\d", chars_b, b"09"),1000 (br"(?a)[[:digit:]]", chars_b, b"09"),1001 (br"(?a)\D", chars_b, b"-AZaz_"),1002 (br"(?a)[[:^digit:]]", chars_b, b"-AZaz_"),1003 (br"(?a)[[:alpha:]]", chars_b, b"AZaz"),1004 (br"(?a)[[:^alpha:]]", chars_b, b"-09_"),1005 (br"(?a)[[:alnum:]]", chars_b, b"09AZaz"),1006 (br"(?a)[[:^alnum:]]", chars_b, b"-_"),1007 (br"(?a)[[:xdigit:]]", chars_b, b"09Aa"),1008 (br"(?a)[[:^xdigit:]]", chars_b, b"-Zz_"),1009 ]1010 for pattern, chars, expected in tests:1011 try:1012 if chars[ : 0].join(regex.findall(pattern, chars)) != expected:1013 self.fail("Failed: {}".format(pattern))1014 except Exception as e:1015 self.fail("Failed: {} raised {}".format(pattern, ascii(e)))1016 self.assertEqual(bool(regex.match(r"\p{NumericValue=0}", "0")),1017 True)1018 self.assertEqual(bool(regex.match(r"\p{NumericValue=1/2}",1019 "\N{VULGAR FRACTION ONE HALF}")), True)1020 self.assertEqual(bool(regex.match(r"\p{NumericValue=0.5}",1021 "\N{VULGAR FRACTION ONE HALF}")), True)1022 def test_word_class(self):1023 self.assertEqual(regex.findall(r"\w+",1024 " \u0939\u093f\u0928\u094d\u0926\u0940,"),1025 ['\u0939\u093f\u0928\u094d\u0926\u0940'])1026 self.assertEqual(regex.findall(r"\W+",1027 " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ', ','])1028 self.assertEqual(regex.split(r"(?V1)\b",1029 " \u0939\u093f\u0928\u094d\u0926\u0940,"), [' ',1030 '\u0939\u093f\u0928\u094d\u0926\u0940', ','])1031 self.assertEqual(regex.split(r"(?V1)\B",1032 " \u0939\u093f\u0928\u094d\u0926\u0940,"), ['', ' \u0939',1033 '\u093f', '\u0928', '\u094d', '\u0926', '\u0940,', ''])1034 def test_search_anchor(self):1035 self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])1036 def test_search_reverse(self):1037 self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a'])1038 self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c',1039 'b', 'a'])1040 self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])1041 self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),1042 ['de', 'cd', 'bc', 'ab'])1043 self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c",1044 overlapped=True), [("b", "-", "c"), ("a", "-", "b")])1045 self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',1046 'b', 'a'])1047 self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1048 overlapped=True)], ['de', 'cd', 'bc', 'ab'])1049 self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c',1050 'b', 'a'])1051 self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1052 overlapped=True)], ['de', 'cd', 'bc', 'ab'])1053 self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',1054 'bar'])1055 self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',1056 'bar'])1057 self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',1058 ''])1059 self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',1060 'foo', ''])1061 self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],1062 ['', 'foo', 'bar'])1063 self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",1064 "foo bar")], ['', 'foo', 'bar'])1065 self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",1066 "foo bar")], ['bar', 'foo', ''])1067 self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",1068 "foo bar")], ['bar', 'foo', ''])1069 self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd'])1070 self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd'])1071 self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), [])1072 self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef'])1073 self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', ''])1074 self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', ''])1075 self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', ''])1076 self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq',1077 ''])1078 self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b',1079 'c'])1080 self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b',1081 'c'])1082 self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,1083 endpos=3)], ['b', 'c'])1084 self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1,1085 endpos=-1)], ['b', 'c'])1086 self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,1087 endpos=3)], ['c', 'b'])1088 self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1,1089 endpos=-1)], ['c', 'b'])1090 self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c',1091 'b'])1092 self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1),1093 ['c', 'b'])1094 self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B'])1095 self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a'])1096 self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc'])1097 self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True),1098 ['bc', 'ab'])1099 self.assertEqual(regex.findall(r"(\w+) (\w+)",1100 "first second third fourth fifth"), [('first', 'second'), ('third',1101 'fourth')])1102 self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)",1103 "first second third fourth fifth"), [('fourth', 'fifth'), ('second',1104 'third')])1105 self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")],1106 ['bc'])1107 self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc",1108 overlapped=True)], ['bc', 'ab'])1109 self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)",1110 "first second third fourth fifth")], ['first second',1111 'third fourth'])1112 self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)",1113 "first second third fourth fifth")], ['fourth fifth',1114 'second third'])1115 self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6))1116 self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6))1117 self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6))1118 self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6))1119 self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc')1120 self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc')1121 def test_atomic(self):1122 # Issue 433030.1123 self.assertEqual(regex.search(r"(?>a*)a", "aa"), None)1124 def test_possessive(self):1125 # Single-character non-possessive.1126 self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1))1127 self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3))1128 self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3))1129 self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3))1130 # Multiple-character non-possessive.1131 self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2))1132 self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6))1133 self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6))1134 self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0,1135 6))1136 # Single-character possessive.1137 self.assertEqual(regex.search(r"a?+a", "a"), None)1138 self.assertEqual(regex.search(r"a*+a", "aaa"), None)1139 self.assertEqual(regex.search(r"a++a", "aaa"), None)1140 self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None)1141 # Multiple-character possessive.1142 self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None)1143 self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None)1144 self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None)1145 self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None)1146 def test_zerowidth(self):1147 # Issue 3262.1148 self.assertEqual(regex.split(r"\b", "a b"), ['a b'])1149 self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b',1150 ''])1151 # Issue 1647489.1152 self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo',1153 'bar'])1154 self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")],1155 ['', 'foo', 'bar'])1156 self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo',1157 ''])1158 self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+",1159 "foo bar")], ['bar', 'foo', ''])1160 self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo',1161 'bar'])1162 self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+",1163 "foo bar")], ['', 'foo', 'bar'])1164 self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar',1165 'foo', ''])1166 self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+",1167 "foo bar")], ['bar', 'foo', ''])1168 self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc'])1169 self.assertEqual([m for m in regex.splititer("", "xaxbxc")],1170 ['xaxbxc'])1171 self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc'])1172 self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")],1173 ['xaxbxc'])1174 self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x',1175 'b', 'x', 'c', ''])1176 self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['',1177 'x', 'a', 'x', 'b', 'x', 'c', ''])1178 self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b',1179 'x', 'a', 'x', ''])1180 self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['',1181 'c', 'x', 'b', 'x', 'a', 'x', ''])1182 def test_scoped_and_inline_flags(self):1183 # Issues 433028, 433024, 433027.1184 self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2))1185 self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2))1186 self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2))1187 self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None)1188 self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda:1189 regex.search(r"(?V0-i)Ab", "ab", flags=regex.I))1190 self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None)1191 self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None)1192 self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None)1193 self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None)1194 self.assertEqual(regex.search(r"A(?V1-i)b", "ab",1195 flags=regex.I).span(), (0, 2))1196 def test_repeated_repeats(self):1197 # Issue 2537.1198 self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3))1199 self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0,1200 6))1201 def test_lookbehind(self):1202 self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4))1203 self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None)1204 self.assertEqual(regex.search(r"123(?<!a\d+)", "a123"), None)1205 self.assertEqual(regex.search(r"123(?<!a\d+)", "b123").span(), (1, 4))1206 self.assertEqual(bool(regex.match("(a)b(?<=b)(c)", "abc")), True)1207 self.assertEqual(regex.match("(a)b(?<=c)(c)", "abc"), None)1208 self.assertEqual(bool(regex.match("(a)b(?=c)(c)", "abc")), True)1209 self.assertEqual(regex.match("(a)b(?=b)(c)", "abc"), None)1210 self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)x|c))c", "abc"),1211 None)1212 self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(2)b|x))c", "abc"),1213 None)1214 self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(2)x|b))c",1215 "abc")), True)1216 self.assertEqual(regex.match("(?:(a)|(x))b(?<=(?(1)c|x))c", "abc"),1217 None)1218 self.assertEqual(bool(regex.match("(?:(a)|(x))b(?<=(?(1)b|x))c",1219 "abc")), True)1220 self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",1221 "abc")), True)1222 self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(2)c|x))c", "abc"),1223 None)1224 self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(2)x|c))c",1225 "abc")), True)1226 self.assertEqual(regex.match("(?:(a)|(x))b(?=(?(1)b|x))c", "abc"),1227 None)1228 self.assertEqual(bool(regex.match("(?:(a)|(x))b(?=(?(1)c|x))c",1229 "abc")), True)1230 self.assertEqual(regex.match("(a)b(?<=(?(2)x|c))(c)", "abc"), None)1231 self.assertEqual(regex.match("(a)b(?<=(?(2)b|x))(c)", "abc"), None)1232 self.assertEqual(regex.match("(a)b(?<=(?(1)c|x))(c)", "abc"), None)1233 self.assertEqual(bool(regex.match("(a)b(?<=(?(1)b|x))(c)", "abc")),1234 True)1235 self.assertEqual(bool(regex.match("(a)b(?=(?(2)x|c))(c)", "abc")),1236 True)1237 self.assertEqual(regex.match("(a)b(?=(?(2)b|x))(c)", "abc"), None)1238 self.assertEqual(bool(regex.match("(a)b(?=(?(1)c|x))(c)", "abc")),1239 True)1240 self.assertEqual(repr(type(regex.compile(r"(a)\2(b)"))),1241 self.PATTERN_CLASS)1242 def test_unmatched_in_sub(self):1243 # Issue 1519638.1244 self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "xy"), 'y-x')1245 self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "xy"), 'y-x-')1246 self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "x"), '-x')1247 self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "x"), '-x-')1248 self.assertEqual(regex.sub(r"(?V0)(x)?(y)?", r"\2-\1", "y"), 'y-')1249 self.assertEqual(regex.sub(r"(?V1)(x)?(y)?", r"\2-\1", "y"), 'y--')1250 def test_bug_10328 (self):1251 # Issue 10328.1252 pat = regex.compile(r'(?mV0)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')1253 self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',1254 'foobar '), ('foobar<trailing_ws>', 1))1255 self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',1256 ''])1257 pat = regex.compile(r'(?mV1)(?P<trailing_ws>[ \t]+\r*$)|(?P<no_final_newline>(?<=[^\n])\Z)')1258 self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>',1259 'foobar '), ('foobar<trailing_ws><no_final_newline>', 2))1260 self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ',1261 ''])1262 def test_overlapped(self):1263 self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd'])1264 self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab',1265 'bc', 'cd', 'de'])1266 self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc'])1267 self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True),1268 ['de', 'cd', 'bc', 'ab'])1269 self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True),1270 [("a", "-", "b"), ("b", "-", "c")])1271 self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab',1272 'cd'])1273 self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde",1274 overlapped=True)], ['ab', 'bc', 'cd', 'de'])1275 self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")],1276 ['de', 'bc'])1277 self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde",1278 overlapped=True)], ['de', 'cd', 'bc', 'ab'])1279 self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)",1280 "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")])1281 self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)",1282 "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")])1283 def test_splititer(self):1284 self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', ''])1285 self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a',1286 'b', '', 'c', ''])1287 def test_grapheme(self):1288 self.assertEqual(regex.match(r"\X", "\xE0").span(), (0, 1))1289 self.assertEqual(regex.match(r"\X", "a\u0300").span(), (0, 2))1290 self.assertEqual(regex.findall(r"\X",1291 "a\xE0a\u0300e\xE9e\u0301"), ['a', '\xe0', 'a\u0300', 'e',1292 '\xe9', 'e\u0301'])1293 self.assertEqual(regex.findall(r"\X{3}",1294 "a\xE0a\u0300e\xE9e\u0301"), ['a\xe0a\u0300', 'e\xe9e\u0301'])1295 self.assertEqual(regex.findall(r"\X", "\r\r\n\u0301A\u0301"),1296 ['\r', '\r\n', '\u0301', 'A\u0301'])1297 def test_word_boundary(self):1298 text = 'The quick ("brown") fox can\'t jump 32.3 feet, right?'1299 self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ',1300 'quick', ' ("', 'brown', '") ', 'fox', ' ', 'can', "'", 't',1301 ' ', 'jump', ' ', '32', '.', '3', ' ', 'feet', ', ',1302 'right', '?'])1303 self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',1304 'quick', ' ', '(', '"', 'brown', '"', ')', ' ', 'fox', ' ',1305 "can't", ' ', 'jump', ' ', '32.3', ' ', 'feet', ',', ' ',1306 'right', '?', ''])1307 text = "The fox"1308 self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'The', ' ',1309 'fox', ''])1310 self.assertEqual(regex.split(r'(?V1w)\b', text), ['', 'The', ' ',1311 ' ', 'fox', ''])1312 text = "can't aujourd'hui l'objectif"1313 self.assertEqual(regex.split(r'(?V1)\b', text), ['', 'can', "'",1314 't', ' ', 'aujourd', "'", 'hui', ' ', 'l', "'", 'objectif',1315 ''])1316 self.assertEqual(regex.split(r'(?V1w)\b', text), ['', "can't", ' ',1317 "aujourd'hui", ' ', "l'", 'objectif', ''])1318 def test_line_boundary(self):1319 self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1",1320 "Line 2"])1321 self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"),1322 ["Line 1\rLine 2\r"])1323 self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"),1324 ["Line 1\r", "Line 2\r"])1325 self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"),1326 ["Line 1", "Line 2"])1327 self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"),1328 ["Line 1", "Line 2"])1329 self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"),1330 ["Line 1", "Line 2"])1331 self.assertEqual(regex.search(r"^abc", "abc").start(), 0)1332 self.assertEqual(regex.search(r"^abc", "\nabc"), None)1333 self.assertEqual(regex.search(r"^abc", "\rabc"), None)1334 self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0)1335 self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None)1336 self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None)1337 self.assertEqual(regex.search(r"abc$", "abc").start(), 0)1338 self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0)1339 self.assertEqual(regex.search(r"abc$", "abc\r"), None)1340 self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0)1341 self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0)1342 self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0)1343 self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0)1344 self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1)1345 self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None)1346 self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0)1347 self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1)1348 self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1)1349 self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0)1350 self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0)1351 self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None)1352 self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0)1353 self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0)1354 self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0)1355 def test_branch_reset(self):1356 self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a',1357 None, 'c'))1358 self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None,1359 'b', 'c'))1360 self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",1361 "ac").groups(), ('a', None, 'c'))1362 self.assertEqual(regex.match(r"(?:(?<a>a)|(?<b>b))(?<c>c)",1363 "bc").groups(), (None, 'b', 'c'))1364 self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",1365 "abd").groups(), ('a', 'b', None, 'd'))1366 self.assertEqual(regex.match(r"(?<a>a)(?:(?<b>b)|(?<c>c))(?<d>d)",1367 "acd").groups(), ('a', None, 'c', 'd'))1368 self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(),1369 ('a', 'b', None, 'd'))1370 self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(),1371 ('a', None, 'c', 'd'))1372 self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(),1373 ('a', 'b', 'd'))1374 self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "ac").groups(),1375 ('a', None, 'c'))1376 self.assertEqual(regex.match(r"(?|(?<a>a)|(?<b>b))(c)", "bc").groups(),1377 (None, 'b', 'c'))1378 self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "ac").groups(),1379 ('a', 'c'))1380 self.assertEqual(regex.match(r"(?|(?<a>a)|(?<a>b))(c)", "bc").groups(),1381 ('b', 'c'))1382 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",1383 "abe").groups(), ('a', 'b', 'e'))1384 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(?<a>d))(e)",1385 "cde").groups(), ('d', 'c', 'e'))1386 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",1387 "abe").groups(), ('a', 'b', 'e'))1388 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(?<b>c)(d))(e)",1389 "cde").groups(), ('d', 'c', 'e'))1390 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",1391 "abe").groups(), ('a', 'b', 'e'))1392 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(d))(e)",1393 "cde").groups(), ('c', 'd', 'e'))1394 # Hg issue 87: Allow duplicate names of groups1395 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1396 "abe").groups(), ("a", "b", "e"))1397 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1398 "abe").capturesdict(), {"a": ["a"], "b": ["b"]})1399 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1400 "cde").groups(), ("d", None, "e"))1401 self.assertEqual(regex.match(r"(?|(?<a>a)(?<b>b)|(c)(?<a>d))(e)",1402 "cde").capturesdict(), {"a": ["c", "d"], "b": []})1403 def test_set(self):1404 self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1))1405 self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1))1406 self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1))1407 self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1))1408 self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c")1409 self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])1410 self.assertEqual(regex.findall(r"(?i)[\p{Alpha}]", "A0"), ["A"])1411 self.assertEqual(regex.findall(r"[a\p{Alpha}]", "ab0"), ["a", "b"])1412 self.assertEqual(regex.findall(r"[a\P{Alpha}]", "ab0"), ["a", "0"])1413 self.assertEqual(regex.findall(r"(?i)[a\p{Alpha}]", "ab0"), ["a",1414 "b"])1415 self.assertEqual(regex.findall(r"(?i)[a\P{Alpha}]", "ab0"), ["a",1416 "0"])1417 self.assertEqual(regex.findall(r"[a-b\p{Alpha}]", "abC0"), ["a",1418 "b", "C"])1419 self.assertEqual(regex.findall(r"(?i)[a-b\p{Alpha}]", "AbC0"), ["A",1420 "b", "C"])1421 self.assertEqual(regex.findall(r"[\p{Alpha}]", "a0"), ["a"])1422 self.assertEqual(regex.findall(r"[\P{Alpha}]", "a0"), ["0"])1423 self.assertEqual(regex.findall(r"[^\p{Alpha}]", "a0"), ["0"])1424 self.assertEqual(regex.findall(r"[^\P{Alpha}]", "a0"), ["a"])1425 self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")),1426 'a^bc')1427 self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")),1428 'a^bc-')1429 self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")),1430 'a^c-')1431 self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ')1432 self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ')1433 self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b')1434 all_chars = "".join(chr(c) for c in range(0x100))1435 self.assertEqual(len(regex.findall(r"\p{ASCII}", all_chars)), 128)1436 self.assertEqual(len(regex.findall(r"\p{Letter}", all_chars)),1437 117)1438 self.assertEqual(len(regex.findall(r"\p{Digit}", all_chars)), 10)1439 # Set operators1440 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Letter}]",1441 all_chars)), 52)1442 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]",1443 all_chars)), 52)1444 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]",1445 all_chars)), 10)1446 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Cc}]",1447 all_chars)), 33)1448 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}&&\p{Graph}]",1449 all_chars)), 94)1450 self.assertEqual(len(regex.findall(r"(?V1)[\p{ASCII}--\p{Cc}]",1451 all_chars)), 95)1452 self.assertEqual(len(regex.findall(r"[\p{Letter}\p{Digit}]",1453 all_chars)), 127)1454 self.assertEqual(len(regex.findall(r"(?V1)[\p{Letter}||\p{Digit}]",1455 all_chars)), 127)1456 self.assertEqual(len(regex.findall(r"\p{HexDigit}", all_chars)),1457 22)1458 self.assertEqual(len(regex.findall(r"(?V1)[\p{HexDigit}~~\p{Digit}]",1459 all_chars)), 12)1460 self.assertEqual(len(regex.findall(r"(?V1)[\p{Digit}~~\p{HexDigit}]",1461 all_chars)), 12)1462 self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))),1463 self.PATTERN_CLASS)1464 self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b",1465 "c"])1466 self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b",1467 "c"])1468 self.assertEqual(regex.findall("(?V1)[\w--a]","abc"), ["b", "c"])1469 self.assertEqual(regex.findall("(?iV1)[\w--a]","abc"), ["b", "c"])1470 def test_various(self):1471 tests = [1472 # Test ?P< and ?P= extensions.1473 ('(?P<foo_123', '', '', regex.error, self.MISSING_GT), # Unterminated group identifier.1474 ('(?P<1>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with a digit.1475 ('(?P<!>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char.1476 ('(?P<foo!>a)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char.1477 # Same tests, for the ?P= form.1478 ('(?P<foo_123>a)(?P=foo_123', 'aa', '', regex.error,1479 self.MISSING_RPAREN),1480 ('(?P<foo_123>a)(?P=1)', 'aa', '1', ascii('a')),1481 ('(?P<foo_123>a)(?P=0)', 'aa', '', regex.error,1482 self.BAD_GROUP_NAME),1483 ('(?P<foo_123>a)(?P=-1)', 'aa', '', regex.error,1484 self.BAD_GROUP_NAME),1485 ('(?P<foo_123>a)(?P=!)', 'aa', '', regex.error,1486 self.BAD_GROUP_NAME),1487 ('(?P<foo_123>a)(?P=foo_124)', 'aa', '', regex.error,1488 self.UNKNOWN_GROUP), # Backref to undefined group.1489 ('(?P<foo_123>a)', 'a', '1', ascii('a')),1490 ('(?P<foo_123>a)(?P=foo_123)', 'aa', '1', ascii('a')),1491 # Mal-formed \g in pattern treated as literal for compatibility.1492 (r'(?<foo_123>a)\g<foo_123', 'aa', '', ascii(None)),1493 (r'(?<foo_123>a)\g<1>', 'aa', '1', ascii('a')),1494 (r'(?<foo_123>a)\g<!>', 'aa', '', ascii(None)),1495 (r'(?<foo_123>a)\g<foo_124>', 'aa', '', regex.error,1496 self.UNKNOWN_GROUP), # Backref to undefined group.1497 ('(?<foo_123>a)', 'a', '1', ascii('a')),1498 (r'(?<foo_123>a)\g<foo_123>', 'aa', '1', ascii('a')),1499 # Test octal escapes.1500 ('\\1', 'a', '', regex.error, self.INVALID_GROUP_REF), # Backreference.1501 ('[\\1]', '\1', '0', "'\\x01'"), # Character.1502 ('\\09', chr(0) + '9', '0', ascii(chr(0) + '9')),1503 ('\\141', 'a', '0', ascii('a')),1504 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9',1505 '0,11', ascii(('abcdefghijklk9', 'k'))),1506 # Test \0 is handled everywhere.1507 (r'\0', '\0', '0', ascii('\0')),1508 (r'[\0a]', '\0', '0', ascii('\0')),1509 (r'[a\0]', '\0', '0', ascii('\0')),1510 (r'[^a\0]', '\0', '', ascii(None)),1511 # Test various letter escapes.1512 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0',1513 ascii('\a\b\f\n\r\t\v')),1514 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0',1515 ascii('\a\b\f\n\r\t\v')),1516 (r'\c\e\g\h\i\j\k\o\p\q\y\z', 'ceghijkopqyz', '0',1517 ascii('ceghijkopqyz')),1518 (r'\xff', '\377', '0', ascii(chr(255))),1519 # New \x semantics.1520 (r'\x00ffffffffffffff', '\377', '', ascii(None)),1521 (r'\x00f', '\017', '', ascii(None)),1522 (r'\x00fe', '\376', '', ascii(None)),1523 (r'\x00ff', '\377', '', ascii(None)),1524 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),1525 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', ascii('\t\n\v\r\f\ag')),1526 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', ascii(chr(9) + chr(10) +1527 chr(11) + chr(13) + chr(12) + chr(7))),1528 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0',1529 ascii('\t\n\v\r\f\b')),1530 (r"^\w+=(\\[\000-\277]|[^\n\\])*",1531 "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0',1532 ascii("SRC=eval.c g.c blah blah blah \\\\")),1533 # Test that . only matches \n in DOTALL mode.1534 ('a.b', 'acb', '0', ascii('acb')),1535 ('a.b', 'a\nb', '', ascii(None)),1536 ('a.*b', 'acc\nccb', '', ascii(None)),1537 ('a.{4,5}b', 'acc\nccb', '', ascii(None)),1538 ('a.b', 'a\rb', '0', ascii('a\rb')),1539 # The new behaviour is that the inline flag affects only what follows.1540 ('a.b(?s)', 'a\nb', '0', ascii('a\nb')),1541 ('a.b(?sV1)', 'a\nb', '', ascii(None)),1542 ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),1543 ('a.*(?s)b', 'acc\nccb', '0', ascii('acc\nccb')),1544 ('a.*(?sV1)b', 'acc\nccb', '', ascii(None)),1545 ('(?s)a.*b', 'acc\nccb', '0', ascii('acc\nccb')),1546 ('(?s)a.{4,5}b', 'acc\nccb', '0', ascii('acc\nccb')),1547 (')', '', '', regex.error, self.TRAILING_CHARS), # Unmatched right bracket.1548 ('', '', '0', "''"), # Empty pattern.1549 ('abc', 'abc', '0', ascii('abc')),1550 ('abc', 'xbc', '', ascii(None)),1551 ('abc', 'axc', '', ascii(None)),1552 ('abc', 'abx', '', ascii(None)),1553 ('abc', 'xabcy', '0', ascii('abc')),1554 ('abc', 'ababc', '0', ascii('abc')),1555 ('ab*c', 'abc', '0', ascii('abc')),1556 ('ab*bc', 'abc', '0', ascii('abc')),1557 ('ab*bc', 'abbc', '0', ascii('abbc')),1558 ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),1559 ('ab+bc', 'abbc', '0', ascii('abbc')),1560 ('ab+bc', 'abc', '', ascii(None)),1561 ('ab+bc', 'abq', '', ascii(None)),1562 ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),1563 ('ab?bc', 'abbc', '0', ascii('abbc')),1564 ('ab?bc', 'abc', '0', ascii('abc')),1565 ('ab?bc', 'abbbbc', '', ascii(None)),1566 ('ab?c', 'abc', '0', ascii('abc')),1567 ('^abc$', 'abc', '0', ascii('abc')),1568 ('^abc$', 'abcc', '', ascii(None)),1569 ('^abc', 'abcc', '0', ascii('abc')),1570 ('^abc$', 'aabc', '', ascii(None)),1571 ('abc$', 'aabc', '0', ascii('abc')),1572 ('^', 'abc', '0', ascii('')),1573 ('$', 'abc', '0', ascii('')),1574 ('a.c', 'abc', '0', ascii('abc')),1575 ('a.c', 'axc', '0', ascii('axc')),1576 ('a.*c', 'axyzc', '0', ascii('axyzc')),1577 ('a.*c', 'axyzd', '', ascii(None)),1578 ('a[bc]d', 'abc', '', ascii(None)),1579 ('a[bc]d', 'abd', '0', ascii('abd')),1580 ('a[b-d]e', 'abd', '', ascii(None)),1581 ('a[b-d]e', 'ace', '0', ascii('ace')),1582 ('a[b-d]', 'aac', '0', ascii('ac')),1583 ('a[-b]', 'a-', '0', ascii('a-')),1584 ('a[\\-b]', 'a-', '0', ascii('a-')),1585 ('a[b-]', 'a-', '0', ascii('a-')),1586 ('a[]b', '-', '', regex.error, self.BAD_SET),1587 ('a[', '-', '', regex.error, self.BAD_SET),1588 ('a\\', '-', '', regex.error, self.BAD_ESCAPE),1589 ('abc)', '-', '', regex.error, self.TRAILING_CHARS),1590 ('(abc', '-', '', regex.error, self.MISSING_RPAREN),1591 ('a]', 'a]', '0', ascii('a]')),1592 ('a[]]b', 'a]b', '0', ascii('a]b')),1593 ('a[]]b', 'a]b', '0', ascii('a]b')),1594 ('a[^bc]d', 'aed', '0', ascii('aed')),1595 ('a[^bc]d', 'abd', '', ascii(None)),1596 ('a[^-b]c', 'adc', '0', ascii('adc')),1597 ('a[^-b]c', 'a-c', '', ascii(None)),1598 ('a[^]b]c', 'a]c', '', ascii(None)),1599 ('a[^]b]c', 'adc', '0', ascii('adc')),1600 ('\\ba\\b', 'a-', '0', ascii('a')),1601 ('\\ba\\b', '-a', '0', ascii('a')),1602 ('\\ba\\b', '-a-', '0', ascii('a')),1603 ('\\by\\b', 'xy', '', ascii(None)),1604 ('\\by\\b', 'yz', '', ascii(None)),1605 ('\\by\\b', 'xyz', '', ascii(None)),1606 ('x\\b', 'xyz', '', ascii(None)),1607 ('x\\B', 'xyz', '0', ascii('x')),1608 ('\\Bz', 'xyz', '0', ascii('z')),1609 ('z\\B', 'xyz', '', ascii(None)),1610 ('\\Bx', 'xyz', '', ascii(None)),1611 ('\\Ba\\B', 'a-', '', ascii(None)),1612 ('\\Ba\\B', '-a', '', ascii(None)),1613 ('\\Ba\\B', '-a-', '', ascii(None)),1614 ('\\By\\B', 'xy', '', ascii(None)),1615 ('\\By\\B', 'yz', '', ascii(None)),1616 ('\\By\\b', 'xy', '0', ascii('y')),1617 ('\\by\\B', 'yz', '0', ascii('y')),1618 ('\\By\\B', 'xyz', '0', ascii('y')),1619 ('ab|cd', 'abc', '0', ascii('ab')),1620 ('ab|cd', 'abcd', '0', ascii('ab')),1621 ('()ef', 'def', '0,1', ascii(('ef', ''))),1622 ('$b', 'b', '', ascii(None)),1623 ('a\\(b', 'a(b', '', ascii(('a(b',))),1624 ('a\\(*b', 'ab', '0', ascii('ab')),1625 ('a\\(*b', 'a((b', '0', ascii('a((b')),1626 ('a\\\\b', 'a\\b', '0', ascii('a\\b')),1627 ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),1628 ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),1629 ('a+b+c', 'aabbabc', '0', ascii('abc')),1630 ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),1631 ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),1632 ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),1633 (')(', '-', '', regex.error, self.TRAILING_CHARS),1634 ('[^ab]*', 'cde', '0', ascii('cde')),1635 ('abc', '', '', ascii(None)),1636 ('a*', '', '0', ascii('')),1637 ('a|b|c|d|e', 'e', '0', ascii('e')),1638 ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),1639 ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),1640 ('ab*', 'xabyabbbz', '0', ascii('ab')),1641 ('ab*', 'xayabbbz', '0', ascii('a')),1642 ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),1643 ('[abhgefdc]ij', 'hij', '0', ascii('hij')),1644 ('^(ab|cd)e', 'abcde', '', ascii(None)),1645 ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),1646 ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),1647 ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),1648 ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),1649 ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1650 ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1651 ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),1652 ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),1653 ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),1654 ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),1655 ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),1656 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),1657 ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),1658 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',1659 'effgz', None))),1660 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',1661 'j'))),1662 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),1663 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),1664 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',1665 'effgz', None))),1666 ('(((((((((a)))))))))', 'a', '0', ascii('a')),1667 ('multiple words of text', 'uh-uh', '', ascii(None)),1668 ('multiple words', 'multiple words, yeah', '0',1669 ascii('multiple words')),1670 ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),1671 ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),1672 ('[k]', 'ab', '', ascii(None)),1673 ('a[-]?c', 'ac', '0', ascii('ac')),1674 ('(abc)\\1', 'abcabc', '1', ascii('abc')),1675 ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),1676 ('^(.+)?B', 'AB', '1', ascii('A')),1677 ('(a+).\\1$', 'aaaaa', '0,1', ascii(('aaaaa', 'aa'))),1678 ('^(a+).\\1$', 'aaaa', '', ascii(None)),1679 ('(abc)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1680 ('([a-c]+)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1681 ('(a)\\1', 'aa', '0,1', ascii(('aa', 'a'))),1682 ('(a+)\\1', 'aa', '0,1', ascii(('aa', 'a'))),1683 ('(a+)+\\1', 'aa', '0,1', ascii(('aa', 'a'))),1684 ('(a).+\\1', 'aba', '0,1', ascii(('aba', 'a'))),1685 ('(a)ba*\\1', 'aba', '0,1', ascii(('aba', 'a'))),1686 ('(aa|a)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1687 ('(a|aa)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1688 ('(a+)a\\1$', 'aaa', '0,1', ascii(('aaa', 'a'))),1689 ('([abc]*)\\1', 'abcabc', '0,1', ascii(('abcabc', 'abc'))),1690 ('(a)(b)c|ab', 'ab', '0,1,2', ascii(('ab', None, None))),1691 ('(a)+x', 'aaax', '0,1', ascii(('aaax', 'a'))),1692 ('([ac])+x', 'aacx', '0,1', ascii(('aacx', 'c'))),1693 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1',1694 ascii(('d:msgs/tdir/sub1/', 'tdir/'))),1695 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah',1696 '0,1,2,3', ascii(('track1.title:TBlah blah blah', 'track1',1697 'title', 'Blah blah blah'))),1698 ('([^N]*N)+', 'abNNxyzN', '0,1', ascii(('abNNxyzN', 'xyzN'))),1699 ('([^N]*N)+', 'abNNxyz', '0,1', ascii(('abNN', 'N'))),1700 ('([abc]*)x', 'abcx', '0,1', ascii(('abcx', 'abc'))),1701 ('([abc]*)x', 'abc', '', ascii(None)),1702 ('([xyz]*)x', 'abcx', '0,1', ascii(('x', ''))),1703 ('(a)+b|aac', 'aac', '0,1', ascii(('aac', None))),1704 # Test symbolic groups.1705 ('(?P<i d>aaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME),1706 ('(?P<id>aaa)a', 'aaaa', '0,id', ascii(('aaaa', 'aaa'))),1707 ('(?P<id>aa)(?P=id)', 'aaaa', '0,id', ascii(('aaaa', 'aa'))),1708 ('(?P<id>aa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP),1709 # Character properties.1710 (r"\g", "g", '0', ascii('g')),1711 (r"\g<1>", "g", '', regex.error, self.INVALID_GROUP_REF),1712 (r"(.)\g<1>", "gg", '0', ascii('gg')),1713 (r"(.)\g<1>", "gg", '', ascii(('gg', 'g'))),1714 (r"\N", "N", '0', ascii('N')),1715 (r"\N{LATIN SMALL LETTER A}", "a", '0', ascii('a')),1716 (r"\p", "p", '0', ascii('p')),1717 (r"\p{Ll}", "a", '0', ascii('a')),1718 (r"\P", "P", '0', ascii('P')),1719 (r"\P{Lu}", "p", '0', ascii('p')),1720 # All tests from Perl.1721 ('abc', 'abc', '0', ascii('abc')),1722 ('abc', 'xbc', '', ascii(None)),1723 ('abc', 'axc', '', ascii(None)),1724 ('abc', 'abx', '', ascii(None)),1725 ('abc', 'xabcy', '0', ascii('abc')),1726 ('abc', 'ababc', '0', ascii('abc')),1727 ('ab*c', 'abc', '0', ascii('abc')),1728 ('ab*bc', 'abc', '0', ascii('abc')),1729 ('ab*bc', 'abbc', '0', ascii('abbc')),1730 ('ab*bc', 'abbbbc', '0', ascii('abbbbc')),1731 ('ab{0,}bc', 'abbbbc', '0', ascii('abbbbc')),1732 ('ab+bc', 'abbc', '0', ascii('abbc')),1733 ('ab+bc', 'abc', '', ascii(None)),1734 ('ab+bc', 'abq', '', ascii(None)),1735 ('ab{1,}bc', 'abq', '', ascii(None)),1736 ('ab+bc', 'abbbbc', '0', ascii('abbbbc')),1737 ('ab{1,}bc', 'abbbbc', '0', ascii('abbbbc')),1738 ('ab{1,3}bc', 'abbbbc', '0', ascii('abbbbc')),1739 ('ab{3,4}bc', 'abbbbc', '0', ascii('abbbbc')),1740 ('ab{4,5}bc', 'abbbbc', '', ascii(None)),1741 ('ab?bc', 'abbc', '0', ascii('abbc')),1742 ('ab?bc', 'abc', '0', ascii('abc')),1743 ('ab{0,1}bc', 'abc', '0', ascii('abc')),1744 ('ab?bc', 'abbbbc', '', ascii(None)),1745 ('ab?c', 'abc', '0', ascii('abc')),1746 ('ab{0,1}c', 'abc', '0', ascii('abc')),1747 ('^abc$', 'abc', '0', ascii('abc')),1748 ('^abc$', 'abcc', '', ascii(None)),1749 ('^abc', 'abcc', '0', ascii('abc')),1750 ('^abc$', 'aabc', '', ascii(None)),1751 ('abc$', 'aabc', '0', ascii('abc')),1752 ('^', 'abc', '0', ascii('')),1753 ('$', 'abc', '0', ascii('')),1754 ('a.c', 'abc', '0', ascii('abc')),1755 ('a.c', 'axc', '0', ascii('axc')),1756 ('a.*c', 'axyzc', '0', ascii('axyzc')),1757 ('a.*c', 'axyzd', '', ascii(None)),1758 ('a[bc]d', 'abc', '', ascii(None)),1759 ('a[bc]d', 'abd', '0', ascii('abd')),1760 ('a[b-d]e', 'abd', '', ascii(None)),1761 ('a[b-d]e', 'ace', '0', ascii('ace')),1762 ('a[b-d]', 'aac', '0', ascii('ac')),1763 ('a[-b]', 'a-', '0', ascii('a-')),1764 ('a[b-]', 'a-', '0', ascii('a-')),1765 ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),1766 ('a[]b', '-', '', regex.error, self.BAD_SET),1767 ('a[', '-', '', regex.error, self.BAD_SET),1768 ('a]', 'a]', '0', ascii('a]')),1769 ('a[]]b', 'a]b', '0', ascii('a]b')),1770 ('a[^bc]d', 'aed', '0', ascii('aed')),1771 ('a[^bc]d', 'abd', '', ascii(None)),1772 ('a[^-b]c', 'adc', '0', ascii('adc')),1773 ('a[^-b]c', 'a-c', '', ascii(None)),1774 ('a[^]b]c', 'a]c', '', ascii(None)),1775 ('a[^]b]c', 'adc', '0', ascii('adc')),1776 ('ab|cd', 'abc', '0', ascii('ab')),1777 ('ab|cd', 'abcd', '0', ascii('ab')),1778 ('()ef', 'def', '0,1', ascii(('ef', ''))),1779 ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),1780 ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),1781 ('$b', 'b', '', ascii(None)),1782 ('a\\', '-', '', regex.error, self.BAD_ESCAPE),1783 ('a\\(b', 'a(b', '', ascii(('a(b',))),1784 ('a\\(*b', 'ab', '0', ascii('ab')),1785 ('a\\(*b', 'a((b', '0', ascii('a((b')),1786 ('a\\\\b', 'a\\b', '0', ascii('a\\b')),1787 ('abc)', '-', '', regex.error, self.TRAILING_CHARS),1788 ('(abc', '-', '', regex.error, self.MISSING_RPAREN),1789 ('((a))', 'abc', '0,1,2', ascii(('a', 'a', 'a'))),1790 ('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),1791 ('a+b+c', 'aabbabc', '0', ascii('abc')),1792 ('a{1,}b{1,}c', 'aabbabc', '0', ascii('abc')),1793 ('a**', '-', '', regex.error, self.MULTIPLE_REPEAT),1794 ('a.+?c', 'abcabc', '0', ascii('abc')),1795 ('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),1796 ('(a+|b){0,}', 'ab', '0,1', ascii(('ab', 'b'))),1797 ('(a+|b)+', 'ab', '0,1', ascii(('ab', 'b'))),1798 ('(a+|b){1,}', 'ab', '0,1', ascii(('ab', 'b'))),1799 ('(a+|b)?', 'ab', '0,1', ascii(('a', 'a'))),1800 ('(a+|b){0,1}', 'ab', '0,1', ascii(('a', 'a'))),1801 (')(', '-', '', regex.error, self.TRAILING_CHARS),1802 ('[^ab]*', 'cde', '0', ascii('cde')),1803 ('abc', '', '', ascii(None)),1804 ('a*', '', '0', ascii('')),1805 ('([abc])*d', 'abbbcd', '0,1', ascii(('abbbcd', 'c'))),1806 ('([abc])*bcd', 'abcd', '0,1', ascii(('abcd', 'a'))),1807 ('a|b|c|d|e', 'e', '0', ascii('e')),1808 ('(a|b|c|d|e)f', 'ef', '0,1', ascii(('ef', 'e'))),1809 ('abcd*efg', 'abcdefg', '0', ascii('abcdefg')),1810 ('ab*', 'xabyabbbz', '0', ascii('ab')),1811 ('ab*', 'xayabbbz', '0', ascii('a')),1812 ('(ab|cd)e', 'abcde', '0,1', ascii(('cde', 'cd'))),1813 ('[abhgefdc]ij', 'hij', '0', ascii('hij')),1814 ('^(ab|cd)e', 'abcde', '', ascii(None)),1815 ('(abc|)ef', 'abcdef', '0,1', ascii(('ef', ''))),1816 ('(a|b)c*d', 'abcd', '0,1', ascii(('bcd', 'b'))),1817 ('(ab|ab*)bc', 'abc', '0,1', ascii(('abc', 'a'))),1818 ('a([bc]*)c*', 'abc', '0,1', ascii(('abc', 'bc'))),1819 ('a([bc]*)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1820 ('a([bc]+)(c*d)', 'abcd', '0,1,2', ascii(('abcd', 'bc', 'd'))),1821 ('a([bc]*)(c+d)', 'abcd', '0,1,2', ascii(('abcd', 'b', 'cd'))),1822 ('a[bcd]*dcdcde', 'adcdcde', '0', ascii('adcdcde')),1823 ('a[bcd]+dcdcde', 'adcdcde', '', ascii(None)),1824 ('(ab|a)b*c', 'abc', '0,1', ascii(('abc', 'ab'))),1825 ('((a)(b)c)(d)', 'abcd', '1,2,3,4', ascii(('abc', 'a', 'b', 'd'))),1826 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', ascii('alpha')),1827 ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', ascii(('bh', None))),1828 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', ascii(('effgz',1829 'effgz', None))),1830 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', ascii(('ij', 'ij',1831 'j'))),1832 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', ascii(None)),1833 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', ascii(None)),1834 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', ascii(('effgz',1835 'effgz', None))),1836 ('((((((((((a))))))))))', 'a', '10', ascii('a')),1837 ('((((((((((a))))))))))\\10', 'aa', '0', ascii('aa')),1838 # Python does not have the same rules for \\41 so this is a syntax error1839 # ('((((((((((a))))))))))\\41', 'aa', '', ascii(None)),1840 # ('((((((((((a))))))))))\\41', 'a!', '0', ascii('a!')),1841 ('((((((((((a))))))))))\\41', '', '', regex.error,1842 self.INVALID_GROUP_REF),1843 ('(?i)((((((((((a))))))))))\\41', '', '', regex.error,1844 self.INVALID_GROUP_REF),1845 ('(((((((((a)))))))))', 'a', '0', ascii('a')),1846 ('multiple words of text', 'uh-uh', '', ascii(None)),1847 ('multiple words', 'multiple words, yeah', '0',1848 ascii('multiple words')),1849 ('(.*)c(.*)', 'abcde', '0,1,2', ascii(('abcde', 'ab', 'de'))),1850 ('\\((.*), (.*)\\)', '(a, b)', '2,1', ascii(('b', 'a'))),1851 ('[k]', 'ab', '', ascii(None)),1852 ('a[-]?c', 'ac', '0', ascii('ac')),1853 ('(abc)\\1', 'abcabc', '1', ascii('abc')),1854 ('([a-c]*)\\1', 'abcabc', '1', ascii('abc')),1855 ('(?i)abc', 'ABC', '0', ascii('ABC')),1856 ('(?i)abc', 'XBC', '', ascii(None)),1857 ('(?i)abc', 'AXC', '', ascii(None)),1858 ('(?i)abc', 'ABX', '', ascii(None)),1859 ('(?i)abc', 'XABCY', '0', ascii('ABC')),1860 ('(?i)abc', 'ABABC', '0', ascii('ABC')),1861 ('(?i)ab*c', 'ABC', '0', ascii('ABC')),1862 ('(?i)ab*bc', 'ABC', '0', ascii('ABC')),1863 ('(?i)ab*bc', 'ABBC', '0', ascii('ABBC')),1864 ('(?i)ab*?bc', 'ABBBBC', '0', ascii('ABBBBC')),1865 ('(?i)ab{0,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1866 ('(?i)ab+?bc', 'ABBC', '0', ascii('ABBC')),1867 ('(?i)ab+bc', 'ABC', '', ascii(None)),1868 ('(?i)ab+bc', 'ABQ', '', ascii(None)),1869 ('(?i)ab{1,}bc', 'ABQ', '', ascii(None)),1870 ('(?i)ab+bc', 'ABBBBC', '0', ascii('ABBBBC')),1871 ('(?i)ab{1,}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1872 ('(?i)ab{1,3}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1873 ('(?i)ab{3,4}?bc', 'ABBBBC', '0', ascii('ABBBBC')),1874 ('(?i)ab{4,5}?bc', 'ABBBBC', '', ascii(None)),1875 ('(?i)ab??bc', 'ABBC', '0', ascii('ABBC')),1876 ('(?i)ab??bc', 'ABC', '0', ascii('ABC')),1877 ('(?i)ab{0,1}?bc', 'ABC', '0', ascii('ABC')),1878 ('(?i)ab??bc', 'ABBBBC', '', ascii(None)),1879 ('(?i)ab??c', 'ABC', '0', ascii('ABC')),1880 ('(?i)ab{0,1}?c', 'ABC', '0', ascii('ABC')),1881 ('(?i)^abc$', 'ABC', '0', ascii('ABC')),1882 ('(?i)^abc$', 'ABCC', '', ascii(None)),1883 ('(?i)^abc', 'ABCC', '0', ascii('ABC')),1884 ('(?i)^abc$', 'AABC', '', ascii(None)),1885 ('(?i)abc$', 'AABC', '0', ascii('ABC')),1886 ('(?i)^', 'ABC', '0', ascii('')),1887 ('(?i)$', 'ABC', '0', ascii('')),1888 ('(?i)a.c', 'ABC', '0', ascii('ABC')),1889 ('(?i)a.c', 'AXC', '0', ascii('AXC')),1890 ('(?i)a.*?c', 'AXYZC', '0', ascii('AXYZC')),1891 ('(?i)a.*c', 'AXYZD', '', ascii(None)),1892 ('(?i)a[bc]d', 'ABC', '', ascii(None)),1893 ('(?i)a[bc]d', 'ABD', '0', ascii('ABD')),1894 ('(?i)a[b-d]e', 'ABD', '', ascii(None)),1895 ('(?i)a[b-d]e', 'ACE', '0', ascii('ACE')),1896 ('(?i)a[b-d]', 'AAC', '0', ascii('AC')),1897 ('(?i)a[-b]', 'A-', '0', ascii('A-')),1898 ('(?i)a[b-]', 'A-', '0', ascii('A-')),1899 ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE),1900 ('(?i)a[]b', '-', '', regex.error, self.BAD_SET),1901 ('(?i)a[', '-', '', regex.error, self.BAD_SET),1902 ('(?i)a]', 'A]', '0', ascii('A]')),1903 ('(?i)a[]]b', 'A]B', '0', ascii('A]B')),1904 ('(?i)a[^bc]d', 'AED', '0', ascii('AED')),1905 ('(?i)a[^bc]d', 'ABD', '', ascii(None)),1906 ('(?i)a[^-b]c', 'ADC', '0', ascii('ADC')),1907 ('(?i)a[^-b]c', 'A-C', '', ascii(None)),1908 ('(?i)a[^]b]c', 'A]C', '', ascii(None)),1909 ('(?i)a[^]b]c', 'ADC', '0', ascii('ADC')),1910 ('(?i)ab|cd', 'ABC', '0', ascii('AB')),1911 ('(?i)ab|cd', 'ABCD', '0', ascii('AB')),1912 ('(?i)()ef', 'DEF', '0,1', ascii(('EF', ''))),1913 ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT),1914 ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT),1915 ('(?i)$b', 'B', '', ascii(None)),1916 ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE),1917 ('(?i)a\\(b', 'A(B', '', ascii(('A(B',))),1918 ('(?i)a\\(*b', 'AB', '0', ascii('AB')),1919 ('(?i)a\\(*b', 'A((B', '0', ascii('A((B')),1920 ('(?i)a\\\\b', 'A\\B', '0', ascii('A\\B')),1921 ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS),1922 ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN),1923 ('(?i)((a))', 'ABC', '0,1,2', ascii(('A', 'A', 'A'))),1924 ('(?i)(a)b(c)', 'ABC', '0,1,2', ascii(('ABC', 'A', 'C'))),1925 ('(?i)a+b+c', 'AABBABC', '0', ascii('ABC')),1926 ('(?i)a{1,}b{1,}c', 'AABBABC', '0', ascii('ABC')),1927 ('(?i)a**', '-', '', regex.error, self.MULTIPLE_REPEAT),1928 ('(?i)a.+?c', 'ABCABC', '0', ascii('ABC')),1929 ('(?i)a.*?c', 'ABCABC', '0', ascii('ABC')),1930 ('(?i)a.{0,5}?c', 'ABCABC', '0', ascii('ABC')),1931 ('(?i)(a+|b)*', 'AB', '0,1', ascii(('AB', 'B'))),1932 ('(?i)(a+|b){0,}', 'AB', '0,1', ascii(('AB', 'B'))),1933 ('(?i)(a+|b)+', 'AB', '0,1', ascii(('AB', 'B'))),1934 ('(?i)(a+|b){1,}', 'AB', '0,1', ascii(('AB', 'B'))),1935 ('(?i)(a+|b)?', 'AB', '0,1', ascii(('A', 'A'))),1936 ('(?i)(a+|b){0,1}', 'AB', '0,1', ascii(('A', 'A'))),1937 ('(?i)(a+|b){0,1}?', 'AB', '0,1', ascii(('', None))),1938 ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS),1939 ('(?i)[^ab]*', 'CDE', '0', ascii('CDE')),1940 ('(?i)abc', '', '', ascii(None)),1941 ('(?i)a*', '', '0', ascii('')),1942 ('(?i)([abc])*d', 'ABBBCD', '0,1', ascii(('ABBBCD', 'C'))),1943 ('(?i)([abc])*bcd', 'ABCD', '0,1', ascii(('ABCD', 'A'))),1944 ('(?i)a|b|c|d|e', 'E', '0', ascii('E')),1945 ('(?i)(a|b|c|d|e)f', 'EF', '0,1', ascii(('EF', 'E'))),1946 ('(?i)abcd*efg', 'ABCDEFG', '0', ascii('ABCDEFG')),1947 ('(?i)ab*', 'XABYABBBZ', '0', ascii('AB')),1948 ('(?i)ab*', 'XAYABBBZ', '0', ascii('A')),1949 ('(?i)(ab|cd)e', 'ABCDE', '0,1', ascii(('CDE', 'CD'))),1950 ('(?i)[abhgefdc]ij', 'HIJ', '0', ascii('HIJ')),1951 ('(?i)^(ab|cd)e', 'ABCDE', '', ascii(None)),1952 ('(?i)(abc|)ef', 'ABCDEF', '0,1', ascii(('EF', ''))),1953 ('(?i)(a|b)c*d', 'ABCD', '0,1', ascii(('BCD', 'B'))),1954 ('(?i)(ab|ab*)bc', 'ABC', '0,1', ascii(('ABC', 'A'))),1955 ('(?i)a([bc]*)c*', 'ABC', '0,1', ascii(('ABC', 'BC'))),1956 ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),1957 ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', ascii(('ABCD', 'BC', 'D'))),1958 ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', ascii(('ABCD', 'B', 'CD'))),1959 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', ascii('ADCDCDE')),1960 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', ascii(None)),1961 ('(?i)(ab|a)b*c', 'ABC', '0,1', ascii(('ABC', 'AB'))),1962 ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', ascii(('ABC', 'A', 'B',1963 'D'))),1964 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', ascii('ALPHA')),1965 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', ascii(('BH', None))),1966 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', ascii(('EFFGZ',1967 'EFFGZ', None))),1968 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', ascii(('IJ', 'IJ',1969 'J'))),1970 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', ascii(None)),1971 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', ascii(None)),1972 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', ascii(('EFFGZ',1973 'EFFGZ', None))),1974 ('(?i)((((((((((a))))))))))', 'A', '10', ascii('A')),1975 ('(?i)((((((((((a))))))))))\\10', 'AA', '0', ascii('AA')),1976 #('(?i)((((((((((a))))))))))\\41', 'AA', '', ascii(None)),1977 #('(?i)((((((((((a))))))))))\\41', 'A!', '0', ascii('A!')),1978 ('(?i)(((((((((a)))))))))', 'A', '0', ascii('A')),1979 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1',1980 ascii('A')),1981 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1',1982 ascii('C')),1983 ('(?i)multiple words of text', 'UH-UH', '', ascii(None)),1984 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0',1985 ascii('MULTIPLE WORDS')),1986 ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', ascii(('ABCDE', 'AB', 'DE'))),1987 ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', ascii(('B', 'A'))),1988 ('(?i)[k]', 'AB', '', ascii(None)),1989 # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', ascii(ABCD-$&-\\ABCD)),1990 # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', ascii(BC-$1-\\BC)),1991 ('(?i)a[-]?c', 'AC', '0', ascii('AC')),1992 ('(?i)(abc)\\1', 'ABCABC', '1', ascii('ABC')),1993 ('(?i)([a-c]*)\\1', 'ABCABC', '1', ascii('ABC')),1994 ('a(?!b).', 'abad', '0', ascii('ad')),1995 ('a(?=d).', 'abad', '0', ascii('ad')),1996 ('a(?=c|d).', 'abad', '0', ascii('ad')),1997 ('a(?:b|c|d)(.)', 'ace', '1', ascii('e')),1998 ('a(?:b|c|d)*(.)', 'ace', '1', ascii('e')),1999 ('a(?:b|c|d)+?(.)', 'ace', '1', ascii('e')),2000 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', ascii(('c', 'e'))),2001 # Lookbehind: split by : but not if it is escaped by -.2002 ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', '1', ascii('bc-:de')),2003 # Escaping with \ as we know it.2004 ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', '1', ascii('bc\\:de')),2005 # Terminating with ' and escaping with ? as in edifact.2006 ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", '1', ascii("bc?'de")),2007 # Comments using the (?#...) syntax.2008 ('w(?# comment', 'w', '', regex.error, self.MISSING_RPAREN),2009 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', '0', ascii('wxyz')),2010 # Check odd placement of embedded pattern modifiers.2011 # Not an error under PCRE/PRE:2012 # When the new behaviour is turned on positional inline flags affect2013 # only what follows.2014 ('w(?i)', 'W', '0', ascii('W')),2015 ('w(?iV1)', 'W', '0', ascii(None)),2016 ('w(?i)', 'w', '0', ascii('w')),2017 ('w(?iV1)', 'w', '0', ascii('w')),2018 ('(?i)w', 'W', '0', ascii('W')),2019 ('(?iV1)w', 'W', '0', ascii('W')),2020 # Comments using the x embedded pattern modifier.2021 ("""(?x)w# comment 12022x y2023# comment 22024z""", 'wxyz', '0', ascii('wxyz')),2025 # Using the m embedded pattern modifier.2026 ('^abc', """jkl2027abc2028xyz""", '', ascii(None)),2029 ('(?m)^abc', """jkl2030abc2031xyz""", '0', ascii('abc')),2032 ('(?m)abc$', """jkl2033xyzabc2034123""", '0', ascii('abc')),2035 # Using the s embedded pattern modifier.2036 ('a.b', 'a\nb', '', ascii(None)),2037 ('(?s)a.b', 'a\nb', '0', ascii('a\nb')),2038 # Test \w, etc. both inside and outside character classes.2039 ('\\w+', '--ab_cd0123--', '0', ascii('ab_cd0123')),2040 ('[\\w]+', '--ab_cd0123--', '0', ascii('ab_cd0123')),2041 ('\\D+', '1234abc5678', '0', ascii('abc')),2042 ('[\\D]+', '1234abc5678', '0', ascii('abc')),2043 ('[\\da-fA-F]+', '123abc', '0', ascii('123abc')),2044 # Not an error under PCRE/PRE:2045 # ('[\\d-x]', '-', '', regex.error, self.BAD_CHAR_RANGE),2046 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', '3,2,1', ascii(('',2047 'testing!1972', ' '))),2048 (r'(\s*)(\S*)(\s*)', ' testing!1972', '3,2,1', ascii(('',2049 'testing!1972', ' '))),2050 #2051 # Post-1.5.2 additions.2052 # xmllib problem.2053 (r'(([a-z]+):)?([a-z]+)$', 'smil', '1,2,3', ascii((None, None,2054 'smil'))),2055 # Bug 110866: reference to undefined group.2056 (r'((.)\1+)', '', '', regex.error, self.OPEN_GROUP),2057 # Bug 111869: search (PRE/PCRE fails on this one, SRE doesn't).2058 (r'.*d', 'abc\nabd', '0', ascii('abd')),2059 # Bug 112468: various expected syntax errors.2060 (r'(', '', '', regex.error, self.MISSING_RPAREN),2061 (r'[\41]', '!', '0', ascii('!')),2062 # Bug 114033: nothing to repeat.2063 (r'(x?)?', 'x', '0', ascii('x')),2064 # Bug 115040: rescan if flags are modified inside pattern.2065 # If the new behaviour is turned on then positional inline flags2066 # affect only what follows.2067 (r' (?x)foo ', 'foo', '0', ascii('foo')),2068 (r' (?V1x)foo ', 'foo', '0', ascii(None)),2069 (r'(?x) foo ', 'foo', '0', ascii('foo')),2070 (r'(?V1x) foo ', 'foo', '0', ascii('foo')),2071 (r'(?x)foo ', 'foo', '0', ascii('foo')),2072 (r'(?V1x)foo ', 'foo', '0', ascii('foo')),2073 # Bug 115618: negative lookahead.2074 (r'(?<!abc)(d.f)', 'abcdefdof', '0', ascii('dof')),2075 # Bug 116251: character class bug.2076 (r'[\w-]+', 'laser_beam', '0', ascii('laser_beam')),2077 # Bug 123769+127259: non-greedy backtracking bug.2078 (r'.*?\S *:', 'xx:', '0', ascii('xx:')),2079 (r'a[ ]*?\ (\d+).*', 'a 10', '0', ascii('a 10')),2080 (r'a[ ]*?\ (\d+).*', 'a 10', '0', ascii('a 10')),2081 # Bug 127259: \Z shouldn't depend on multiline mode.2082 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', '1', ascii('')),2083 # Bug 128899: uppercase literals under the ignorecase flag.2084 (r'(?i)M+', 'MMM', '0', ascii('MMM')),2085 (r'(?i)m+', 'MMM', '0', ascii('MMM')),2086 (r'(?i)[M]+', 'MMM', '0', ascii('MMM')),2087 (r'(?i)[m]+', 'MMM', '0', ascii('MMM')),2088 # Bug 130748: ^* should be an error (nothing to repeat).2089 # In 'regex' we won't bother to complain about this.2090 # (r'^*', '', '', regex.error, self.NOTHING_TO_REPEAT),2091 # Bug 133283: minimizing repeat problem.2092 (r'"(?:\\"|[^"])*?"', r'"\""', '0', ascii(r'"\""')),2093 # Bug 477728: minimizing repeat problem.2094 (r'^.*?$', 'one\ntwo\nthree\n', '', ascii(None)),2095 # Bug 483789: minimizing repeat problem.2096 (r'a[^>]*?b', 'a>b', '', ascii(None)),2097 # Bug 490573: minimizing repeat problem.2098 (r'^a*?$', 'foo', '', ascii(None)),2099 # Bug 470582: nested groups problem.2100 (r'^((a)c)?(ab)$', 'ab', '1,2,3', ascii((None, None, 'ab'))),2101 # Another minimizing repeat problem (capturing groups in assertions).2102 ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', ascii(('ab', None))),2103 ('^([ab]*?)(?!(b))c', 'abc', '1,2', ascii(('ab', None))),2104 ('^([ab]*?)(?<!(a))c', 'abc', '1,2', ascii(('ab', None))),2105 # Bug 410271: \b broken under locales.2106 (r'\b.\b', 'a', '0', ascii('a')),2107 (r'\b.\b', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',2108 ascii('\xc4')),2109 (r'\w', '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}', '0',2110 ascii('\xc4')),2111 ]2112 for t in tests:2113 excval = None2114 try:2115 if len(t) == 4:2116 pattern, string, groups, expected = t2117 else:2118 pattern, string, groups, expected, excval = t2119 except ValueError:2120 fields = ", ".join([ascii(f) for f in t[ : 3]] + ["..."])2121 self.fail("Incorrect number of test fields: ({})".format(fields))2122 else:2123 group_list = []2124 if groups:2125 for group in groups.split(","):2126 try:2127 group_list.append(int(group))2128 except ValueError:2129 group_list.append(group)2130 if excval is not None:2131 if (sys.version_info.major, sys.version_info.minor) >= (3, 4):2132 with self.subTest(pattern=pattern, string=string):2133 self.assertRaisesRegex(expected, excval, regex.search,2134 pattern, string)2135 else:2136 m = regex.search(pattern, string)2137 if m:2138 if group_list:2139 actual = ascii(m.group(*group_list))2140 else:2141 actual = ascii(m[:])2142 else:2143 actual = ascii(m)2144 self.assertEqual(actual, expected)2145 def test_replacement(self):2146 self.assertEqual(regex.sub("test\?", "result\?\.\a\q\m\n", "test?"),2147 "result\?\.\a\q\m\n")2148 self.assertEqual(regex.sub(r"test\?", "result\?\.\a\q\m\n", "test?"),2149 "result\?\.\a\q\m\n")2150 self.assertEqual(regex.sub('(.)', r"\1\1", 'x'), 'xx')2151 self.assertEqual(regex.sub('(.)', regex.escape(r"\1\1"), 'x'), r"\1\1")2152 self.assertEqual(regex.sub('(.)', r"\\1\\1", 'x'), r"\1\1")2153 self.assertEqual(regex.sub('(.)', lambda m: r"\1\1", 'x'), r"\1\1")2154 def test_common_prefix(self):2155 # Very long common prefix2156 all = string.ascii_lowercase + string.digits + string.ascii_uppercase2157 side = all * 42158 regexp = '(' + side + '|' + side + ')'2159 self.assertEqual(repr(type(regex.compile(regexp))), self.PATTERN_CLASS)2160 def test_captures(self):2161 self.assertEqual(regex.search(r"(\w)+", "abc").captures(1), ['a', 'b',2162 'c'])2163 self.assertEqual(regex.search(r"(\w{3})+", "abcdef").captures(0, 1),2164 (['abcdef'], ['abc', 'def']))2165 self.assertEqual(regex.search(r"^(\d{1,3})(?:\.(\d{1,3})){3}$",2166 "192.168.0.1").captures(1, 2), (['192', ], ['168', '0', '1']))2167 self.assertEqual(regex.match(r"^([0-9A-F]{2}){4} ([a-z]\d){5}$",2168 "3FB52A0C a2c4g3k9d3").captures(1, 2), (['3F', 'B5', '2A', '0C'],2169 ['a2', 'c4', 'g3', 'k9', 'd3']))2170 self.assertEqual(regex.match("([a-z]W)([a-z]X)+([a-z]Y)",2171 "aWbXcXdXeXfY").captures(1, 2, 3), (['aW'], ['bX', 'cX', 'dX', 'eX'],2172 ['fY']))2173 self.assertEqual(regex.search(r".*?(?=(.)+)b", "ab").captures(1),2174 ['b'])2175 self.assertEqual(regex.search(r".*?(?>(.){0,2})d", "abcd").captures(1),2176 ['b', 'c'])2177 self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a'])2178 def test_guards(self):2179 m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:",2180 "XY\nX Y\nX Y\nXY\nXX AB:")2181 self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18)))2182 m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:",2183 "XY\nX Y\nX Y\nXY\nXX AB:")2184 self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18)))2185 m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX")2186 self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6)))2187 m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X')2188 self.assertEqual(m.span(0, 1), ((0, 10), (5, 8)))2189 m = regex.search('Derde\s*:', 'aaaaaa:\nDerde:')2190 self.assertEqual(m.span(), (8, 14))2191 m = regex.search('Derde\s*:', 'aaaaa:\nDerde:')2192 self.assertEqual(m.span(), (7, 13))2193 def test_turkic(self):2194 # Turkish has dotted and dotless I/i.2195 pairs = "I=i;I=\u0131;i=\u0130"2196 all_chars = set()2197 matching = set()2198 for pair in pairs.split(";"):2199 ch1, ch2 = pair.split("=")2200 all_chars.update((ch1, ch2))2201 matching.add((ch1, ch1))2202 matching.add((ch1, ch2))2203 matching.add((ch2, ch1))2204 matching.add((ch2, ch2))2205 for ch1 in all_chars:2206 for ch2 in all_chars:2207 m = regex.match(r"(?i)\A" + ch1 + r"\Z", ch2)2208 if m:2209 if (ch1, ch2) not in matching:2210 self.fail("{} matching {}".format(ascii(ch1),2211 ascii(ch2)))2212 else:2213 if (ch1, ch2) in matching:2214 self.fail("{} not matching {}".format(ascii(ch1),2215 ascii(ch2)))2216 def test_named_lists(self):2217 options = ["one", "two", "three"]2218 self.assertEqual(regex.match(r"333\L<bar>444", "333one444",2219 bar=options).group(), "333one444")2220 self.assertEqual(regex.match(r"(?i)333\L<bar>444", "333TWO444",2221 bar=options).group(), "333TWO444")2222 self.assertEqual(regex.match(r"333\L<bar>444", "333four444",2223 bar=options), None)2224 options = [b"one", b"two", b"three"]2225 self.assertEqual(regex.match(br"333\L<bar>444", b"333one444",2226 bar=options).group(), b"333one444")2227 self.assertEqual(regex.match(br"(?i)333\L<bar>444", b"333TWO444",2228 bar=options).group(), b"333TWO444")2229 self.assertEqual(regex.match(br"333\L<bar>444", b"333four444",2230 bar=options), None)2231 self.assertEqual(repr(type(regex.compile(r"3\L<bar>4\L<bar>+5",2232 bar=["one", "two", "three"]))), self.PATTERN_CLASS)2233 self.assertEqual(regex.findall(r"^\L<options>", "solid QWERT",2234 options=set(['good', 'brilliant', '+s\\ol[i}d'])), [])2235 self.assertEqual(regex.findall(r"^\L<options>", "+solid QWERT",2236 options=set(['good', 'brilliant', '+solid'])), ['+solid'])2237 options = ["STRASSE"]2238 self.assertEqual(regex.match(r"(?fi)\L<words>",2239 "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,2240 6))2241 options = ["STRASSE", "stress"]2242 self.assertEqual(regex.match(r"(?fi)\L<words>",2243 "stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0,2244 6))2245 options = ["stra\N{LATIN SMALL LETTER SHARP S}e"]2246 self.assertEqual(regex.match(r"(?fi)\L<words>", "STRASSE",2247 words=options).span(), (0, 7))2248 options = ["kit"]2249 self.assertEqual(regex.search(r"(?i)\L<words>", "SKITS",2250 words=options).span(), (1, 4))2251 self.assertEqual(regex.search(r"(?i)\L<words>",2252 "SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS",2253 words=options).span(), (1, 4))2254 self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",2255 " stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15))2256 self.assertEqual(regex.search(r"(?fi)\b(\w+) +\1\b",2257 " STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15))2258 self.assertEqual(regex.search(r"^\L<options>$", "", options=[]).span(),2259 (0, 0))2260 def test_fuzzy(self):2261 # Some tests borrowed from TRE library tests.2262 self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))),2263 self.PATTERN_CLASS)2264 self.assertEqual(repr(type(regex.compile('(fuu){s}'))),2265 self.PATTERN_CLASS)2266 self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))),2267 self.PATTERN_CLASS)2268 self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))),2269 self.PATTERN_CLASS)2270 self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))),2271 self.PATTERN_CLASS)2272 self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))),2273 self.PATTERN_CLASS)2274 text = 'molasses anaconda foo bar baz smith anderson '2275 self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text),2276 None)2277 self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}',2278 text).span(0, 1), ((9, 17), (9, 17)))2279 self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None)2280 self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0],2281 "anaconda")2282 self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0],2283 "anaconda")2284 text = 'anaconda foo bar baz smith anderson'2285 self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0,2286 1), ((0, 0), (0, 0)))2287 self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}',2288 text).span(0, 1), ((9, 10), (9, 10)))2289 self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0,2290 1), ((7, 10), (7, 10)))2291 self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}',2292 text).span(0, 1), ((9, 10), (9, 10)))2293 self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1),2294 ((0, 0), (0, 0)))2295 self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0,2296 1), ((9, 10), (9, 10)))2297 self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))),2298 self.PATTERN_CLASS)2299 # No cost limit.2300 self.assertEqual(regex.search('(foobar){e}',2301 'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6)))2302 self.assertEqual(regex.search('(?e)(foobar){e}',2303 'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3)))2304 self.assertEqual(regex.search('(?b)(foobar){e}',2305 'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16)))2306 # At most two errors.2307 self.assertEqual(regex.search('(foobar){e<=2}',2308 'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9)))2309 self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None)2310 # At most two inserts or substitutions and max two errors total.2311 self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}',2312 'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11)))2313 # Find best whole word match for "foobar".2314 self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0,2315 1), ((0, 8), (0, 8)))2316 self.assertEqual(regex.search('\\b(foobar){e}\\b',2317 'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6)))2318 self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b',2319 'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21)))2320 # Match whole string, allow only 1 error.2321 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1),2322 ((0, 6), (0, 6)))2323 self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0,2324 1), ((0, 7), (0, 7)))2325 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0,2326 1), ((0, 7), (0, 7)))2327 self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0,2328 1), ((0, 7), (0, 7)))2329 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1),2330 ((0, 6), (0, 6)))2331 self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1),2332 ((0, 6), (0, 6)))2333 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1),2334 ((0, 6), (0, 6)))2335 self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1),2336 ((0, 5), (0, 5)))2337 self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1),2338 ((0, 5), (0, 5)))2339 self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1),2340 ((0, 5), (0, 5)))2341 self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None)2342 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None)2343 self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None)2344 self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None)2345 self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None)2346 # At most one insert, two deletes, and three substitutions.2347 # Additionally, deletes cost two and substitutes one, and total2348 # cost must be less than 4.2349 self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}',2350 '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6,2351 13)))2352 self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}',2353 '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((34, 39),2354 (34, 39)))2355 # Partially fuzzy matches.2356 self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0,2357 1), ((0, 9), (3, 6)))2358 self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None)2359 self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0,2360 1), ((0, 8), (3, 5)))2361 text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n'2362 'For useful information, use www.slashdot.org\nthis is demo data!\n')2363 self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0,2364 1), ((0, 120), (120, 120)))2365 self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0,2366 1), ((0, 120), (93, 100)))2367 self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1),2368 ((0, 119), (24, 101)))2369 # Behaviour is unexpected, but arguably not wrong. It first finds the2370 # best match, then the best in what follows, etc.2371 self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",2372 " book cot dog desk ", words="cat dog".split()), ["cot", "dog"])2373 self.assertEqual(regex.findall(r"\b\L<words>{e<=1}\b",2374 " book dog cot desk ", words="cat dog".split()), [" dog", "cot"])2375 self.assertEqual(regex.findall(r"(?e)\b\L<words>{e<=1}\b",2376 " book dog cot desk ", words="cat dog".split()), ["dog", "cot"])2377 self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",2378 " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"])2379 self.assertEqual(regex.findall(r"(?er)\b\L<words>{e<=1}\b",2380 " book cot dog desk ", words="cat dog".split()), ["dog", "cot"])2381 self.assertEqual(regex.findall(r"(?r)\b\L<words>{e<=1}\b",2382 " book dog cot desk ", words="cat dog".split()), ["cot", "dog"])2383 self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",2384 b" book cot dog desk ", words=b"cat dog".split()), [b"cot", b"dog"])2385 self.assertEqual(regex.findall(br"\b\L<words>{e<=1}\b",2386 b" book dog cot desk ", words=b"cat dog".split()), [b" dog", b"cot"])2387 self.assertEqual(regex.findall(br"(?e)\b\L<words>{e<=1}\b",2388 b" book dog cot desk ", words=b"cat dog".split()), [b"dog", b"cot"])2389 self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",2390 b" book cot dog desk ", words=b"cat dog".split()), [b"dog ", b"cot"])2391 self.assertEqual(regex.findall(br"(?er)\b\L<words>{e<=1}\b",2392 b" book cot dog desk ", words=b"cat dog".split()), [b"dog", b"cot"])2393 self.assertEqual(regex.findall(br"(?r)\b\L<words>{e<=1}\b",2394 b" book dog cot desk ", words=b"cat dog".split()), [b"cot", b"dog"])2395 self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(),2396 ("foo", "fou"))2397 self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)",2398 "foo fou").groups(), ("foo", "fou"))2399 self.assertEqual(regex.search(br"(\w+) (\1{e<=1})",2400 b"foo fou").groups(), (b"foo", b"fou"))2401 self.assertEqual(regex.findall(r"(?:(?:QR)+){e}","abcde"), ["abcde",2402 ""])2403 self.assertEqual(regex.findall(r"(?:Q+){e}","abc"), ["abc", ""])2404 # Hg issue 41: = for fuzzy matches2405 self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2406 "servic detection").span(), (0, 16))2407 self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2408 "service detect").span(), (0, 14))2409 self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2410 "service detecti").span(), (0, 15))2411 self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2412 "service detection"), None)2413 self.assertEqual(regex.match(r"(?:service detection){0<e<5}",2414 "in service detection").span(), (0, 20))2415 # Hg issue 109: Edit distance of fuzzy match2416 self.assertEqual(regex.fullmatch(r"(?:cats|cat){e<=1}",2417 "cat").fuzzy_counts, (0, 0, 1))2418 self.assertEqual(regex.fullmatch(r"(?e)(?:cats|cat){e<=1}",2419 "cat").fuzzy_counts, (0, 0, 0))2420 self.assertEqual(regex.fullmatch(r"(?:cat|cats){e<=1}",2421 "cats").fuzzy_counts, (0, 1, 0))2422 self.assertEqual(regex.fullmatch(r"(?e)(?:cat|cats){e<=1}",2423 "cats").fuzzy_counts, (0, 0, 0))2424 self.assertEqual(regex.fullmatch(r"(?:cat){e<=1} (?:cat){e<=1}",2425 "cat cot").fuzzy_counts, (1, 0, 0))2426 def test_recursive(self):2427 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "xx")[ : ],2428 ("xx", "x", ""))2429 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "aba")[ : ],2430 ("aba", "a", "b"))2431 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "abba")[ : ],2432 ("abba", "a", None))2433 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")[ : ],2434 ("kayak", "k", None))2435 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "paper")[ : ],2436 ("pap", "p", "a"))2437 self.assertEqual(regex.search(r"(\w)(?:(?R)|(\w?))\1", "dontmatchme"),2438 None)2439 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "xx")[ : ],2440 ("xx", "", "x"))2441 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "aba")[ : ],2442 ("aba", "b", "a"))2443 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "abba")[ :2444 ], ("abba", None, "a"))2445 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "kayak")[ :2446 ], ("kayak", None, "k"))2447 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)", "paper")[ :2448 ], ("pap", "a", "p"))2449 self.assertEqual(regex.search(r"(?r)\2(?:(\w?)|(?R))(\w)",2450 "dontmatchme"), None)2451 self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", "(ab(cd)ef)")[2452 : ], ("(ab(cd)ef)", "ef"))2453 self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)",2454 "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"])2455 self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",2456 "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab"))2457 self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)",2458 "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"])2459 self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)",2460 "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e"))2461 self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)",2462 "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a"))2463 self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))",2464 "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",2465 "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",2466 "bar(baz)+baz(bop)"))2467 self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))",2468 "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))",2469 "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))",2470 "bar(baz)+baz(bop)"))2471 rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""")2472 self.assertEqual(bool(rgx.search('<foo><bar></bar></foo>')), True)2473 self.assertEqual(bool(rgx.search('<foo><bar></foo></bar>')), False)2474 self.assertEqual(bool(rgx.search('<foo><bar/></foo>')), True)2475 self.assertEqual(bool(rgx.search('<foo><bar></foo>')), False)2476 self.assertEqual(bool(rgx.search('<foo bar=baz/>')), False)2477 self.assertEqual(bool(rgx.search('<foo bar="baz">')), False)2478 self.assertEqual(bool(rgx.search('<foo bar="baz"/>')), True)2479 self.assertEqual(bool(rgx.search('< fooo / >')), True)2480 # The next regex should and does match. Perl 5.14 agrees.2481 #self.assertEqual(bool(rgx.search('<foo/>foo')), False)2482 self.assertEqual(bool(rgx.search('foo<foo/>')), False)2483 self.assertEqual(bool(rgx.search('<foo>foo</foo>')), True)2484 self.assertEqual(bool(rgx.search('<foo><bar/>foo</foo>')), True)2485 self.assertEqual(bool(rgx.search('<a><b><c></c></b></a>')), True)2486 def test_copy(self):2487 # PatternObjects are immutable, therefore there's no need to clone them.2488 r = regex.compile("a")2489 self.assert_(copy.copy(r) is r)2490 self.assert_(copy.deepcopy(r) is r)2491 # MatchObjects are normally mutable because the target string can be2492 # detached. However, after the target string has been detached, a2493 # MatchObject becomes immutable, so there's no need to clone it.2494 m = r.match("a")2495 self.assert_(copy.copy(m) is not m)2496 self.assert_(copy.deepcopy(m) is not m)2497 self.assert_(m.string is not None)2498 m2 = copy.copy(m)2499 m2.detach_string()2500 self.assert_(m.string is not None)2501 self.assert_(m2.string is None)2502 # The following behaviour matches that of the re module.2503 it = regex.finditer(".", "ab")2504 it2 = copy.copy(it)2505 self.assertEqual(next(it).group(), "a")2506 self.assertEqual(next(it2).group(), "b")2507 # The following behaviour matches that of the re module.2508 it = regex.finditer(".", "ab")2509 it2 = copy.deepcopy(it)2510 self.assertEqual(next(it).group(), "a")2511 self.assertEqual(next(it2).group(), "b")2512 # The following behaviour is designed to match that of copying 'finditer'.2513 it = regex.splititer(" ", "a b")2514 it2 = copy.copy(it)2515 self.assertEqual(next(it), "a")2516 self.assertEqual(next(it2), "b")2517 # The following behaviour is designed to match that of copying 'finditer'.2518 it = regex.splititer(" ", "a b")2519 it2 = copy.deepcopy(it)2520 self.assertEqual(next(it), "a")2521 self.assertEqual(next(it2), "b")2522 def test_format(self):2523 self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}",2524 "foo bar"), "foo bar => bar foo")2525 self.assertEqual(regex.subf(r"(?<word1>\w+) (?<word2>\w+)",2526 "{word2} {word1}", "foo bar"), "bar foo")2527 self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}",2528 "foo bar"), ("foo bar => bar foo", 1))2529 self.assertEqual(regex.subfn(r"(?<word1>\w+) (?<word2>\w+)",2530 "{word2} {word1}", "foo bar"), ("bar foo", 1))2531 self.assertEqual(regex.match(r"(\w+) (\w+)",2532 "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo")2533 def test_fullmatch(self):2534 self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True)2535 self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False)2536 self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True)2537 self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True)2538 self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False)2539 self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1,2540 endpos=4)), True)2541 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True)2542 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False)2543 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)),2544 True)2545 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)),2546 True)2547 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)),2548 False)2549 self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1,2550 endpos=4)), True)2551 def test_issue_18468(self):2552 # Applies only after Python 3.4 for compatibility with re.2553 if (sys.version_info.major, sys.version_info.minor) < (3, 4):2554 return2555 self.assertTypedEqual(regex.sub('y', 'a', 'xyz'), 'xaz')2556 self.assertTypedEqual(regex.sub('y', StrSubclass('a'),2557 StrSubclass('xyz')), 'xaz')2558 self.assertTypedEqual(regex.sub(b'y', b'a', b'xyz'), b'xaz')2559 self.assertTypedEqual(regex.sub(b'y', BytesSubclass(b'a'),2560 BytesSubclass(b'xyz')), b'xaz')2561 self.assertTypedEqual(regex.sub(b'y', bytearray(b'a'),2562 bytearray(b'xyz')), b'xaz')2563 self.assertTypedEqual(regex.sub(b'y', memoryview(b'a'),2564 memoryview(b'xyz')), b'xaz')2565 for string in ":a:b::c", StrSubclass(":a:b::c"):2566 self.assertTypedEqual(regex.split(":", string), ['', 'a', 'b', '',2567 'c'])2568 self.assertTypedEqual(regex.split(":*", string), ['', 'a', 'b',2569 'c'])2570 self.assertTypedEqual(regex.split("(:*)", string), ['', ':', 'a',2571 ':', 'b', '::', 'c'])2572 for string in (b":a:b::c", BytesSubclass(b":a:b::c"),2573 bytearray(b":a:b::c"), memoryview(b":a:b::c")):2574 self.assertTypedEqual(regex.split(b":", string), [b'', b'a', b'b',2575 b'', b'c'])2576 self.assertTypedEqual(regex.split(b":*", string), [b'', b'a', b'b',2577 b'c'])2578 self.assertTypedEqual(regex.split(b"(:*)", string), [b'', b':',2579 b'a', b':', b'b', b'::', b'c'])2580 for string in "a:b::c:::d", StrSubclass("a:b::c:::d"):2581 self.assertTypedEqual(regex.findall(":+", string), [":", "::",2582 ":::"])2583 self.assertTypedEqual(regex.findall("(:+)", string), [":", "::",2584 ":::"])2585 self.assertTypedEqual(regex.findall("(:)(:*)", string), [(":", ""),2586 (":", ":"), (":", "::")])2587 for string in (b"a:b::c:::d", BytesSubclass(b"a:b::c:::d"),2588 bytearray(b"a:b::c:::d"), memoryview(b"a:b::c:::d")):2589 self.assertTypedEqual(regex.findall(b":+", string), [b":", b"::",2590 b":::"])2591 self.assertTypedEqual(regex.findall(b"(:+)", string), [b":", b"::",2592 b":::"])2593 self.assertTypedEqual(regex.findall(b"(:)(:*)", string), [(b":",2594 b""), (b":", b":"), (b":", b"::")])2595 for string in 'a', StrSubclass('a'):2596 self.assertEqual(regex.match('a', string).groups(), ())2597 self.assertEqual(regex.match('(a)', string).groups(), ('a',))2598 self.assertEqual(regex.match('(a)', string).group(0), 'a')2599 self.assertEqual(regex.match('(a)', string).group(1), 'a')2600 self.assertEqual(regex.match('(a)', string).group(1, 1), ('a',2601 'a'))2602 for string in (b'a', BytesSubclass(b'a'), bytearray(b'a'),2603 memoryview(b'a')):2604 self.assertEqual(regex.match(b'a', string).groups(), ())2605 self.assertEqual(regex.match(b'(a)', string).groups(), (b'a',))2606 self.assertEqual(regex.match(b'(a)', string).group(0), b'a')2607 self.assertEqual(regex.match(b'(a)', string).group(1), b'a')2608 self.assertEqual(regex.match(b'(a)', string).group(1, 1), (b'a',2609 b'a'))2610 def test_partial(self):2611 self.assertEqual(regex.match('ab', 'a', partial=True).partial, True)2612 self.assertEqual(regex.match('ab', 'a', partial=True).span(), (0, 1))2613 self.assertEqual(regex.match(r'cats', 'cat', partial=True).partial,2614 True)2615 self.assertEqual(regex.match(r'cats', 'cat', partial=True).span(), (0,2616 3))2617 self.assertEqual(regex.match(r'cats', 'catch', partial=True), None)2618 self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',2619 partial=True).partial, False)2620 self.assertEqual(regex.match(r'abc\w{3}', 'abcdef',2621 partial=True).span(), (0, 6))2622 self.assertEqual(regex.match(r'abc\w{3}', 'abcde',2623 partial=True).partial, True)2624 self.assertEqual(regex.match(r'abc\w{3}', 'abcde',2625 partial=True).span(), (0, 5))2626 self.assertEqual(regex.match(r'\d{4}$', '1234', partial=True).partial,2627 False)2628 self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,2629 words=['post']).partial, False)2630 self.assertEqual(regex.match(r'\L<words>', 'post', partial=True,2631 words=['post']).span(), (0, 4))2632 self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,2633 words=['post']).partial, True)2634 self.assertEqual(regex.match(r'\L<words>', 'pos', partial=True,2635 words=['post']).span(), (0, 3))2636 self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,2637 words=['po\uFB06']).partial, False)2638 self.assertEqual(regex.match(r'(?fi)\L<words>', 'POST', partial=True,2639 words=['po\uFB06']).span(), (0, 4))2640 self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,2641 words=['po\uFB06']).partial, True)2642 self.assertEqual(regex.match(r'(?fi)\L<words>', 'POS', partial=True,2643 words=['po\uFB06']).span(), (0, 3))2644 self.assertEqual(regex.match(r'(?fi)\L<words>', 'po\uFB06',2645 partial=True, words=['POS']), None)2646 self.assertEqual(regex.match(r'[a-z]*4R$', 'a', partial=True).span(),2647 (0, 1))2648 self.assertEqual(regex.match(r'[a-z]*4R$', 'ab', partial=True).span(),2649 (0, 2))2650 self.assertEqual(regex.match(r'[a-z]*4R$', 'ab4', partial=True).span(),2651 (0, 3))2652 self.assertEqual(regex.match(r'[a-z]*4R$', 'a4', partial=True).span(),2653 (0, 2))2654 self.assertEqual(regex.match(r'[a-z]*4R$', 'a4R', partial=True).span(),2655 (0, 3))2656 self.assertEqual(regex.match(r'[a-z]*4R$', '4a', partial=True), None)2657 self.assertEqual(regex.match(r'[a-z]*4R$', 'a44', partial=True), None)2658 def test_hg_bugs(self):2659 # Hg issue 28: regex.compile("(?>b)") causes "TypeError: 'Character'2660 # object is not subscriptable"2661 self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True)2662 # Hg issue 29: regex.compile("^((?>\w+)|(?>\s+))*$") causes2663 # "TypeError: 'GreedyRepeat' object is not iterable"2664 self.assertEqual(bool(regex.compile(r"^((?>\w+)|(?>\s+))*$",2665 flags=regex.V1)), True)2666 # Hg issue 31: atomic and normal groups in recursive patterns2667 self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2668 "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])2669 self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)",2670 "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)'])2671 self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2672 "a(b(cd)e)f)g)h"), ['(b(cd)e)'])2673 self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)",2674 "a(bc(d(e)f)gh"), ['(d(e)f)'])2675 self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)",2676 "a(bc(d(e)f)gh"), ['(d(e)f)'])2677 self.assertEqual([m.group() for m in2678 regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")],2679 ['(c(de)fg)'])2680 # Hg issue 32: regex.search("a(bc)d", "abcd", regex.I|regex.V1) returns2681 # None2682 self.assertEqual(regex.search("a(bc)d", "abcd", regex.I |2683 regex.V1).group(0), "abcd")2684 # Hg issue 33: regex.search("([\da-f:]+)$", "E", regex.I|regex.V1)2685 # returns None2686 self.assertEqual(regex.search("([\da-f:]+)$", "E", regex.I |2687 regex.V1).group(0), "E")2688 self.assertEqual(regex.search("([\da-f:]+)$", "e", regex.I |2689 regex.V1).group(0), "e")2690 # Hg issue 34: regex.search("^(?=ab(de))(abd)(e)", "abde").groups()2691 # returns (None, 'abd', 'e') instead of ('de', 'abd', 'e')2692 self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(),2693 ('de', 'abd', 'e'))2694 # Hg issue 35: regex.compile("\ ", regex.X) causes "_regex_core.error:2695 # bad escape"2696 self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True)2697 # Hg issue 36: regex.search("^(a|)\1{2}b", "b") returns None2698 self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b',2699 ''))2700 # Hg issue 37: regex.search("^(a){0,0}", "abc").group(0,1) returns2701 # ('a', 'a') instead of ('', None)2702 self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('',2703 None))2704 # Hg issue 38: regex.search("(?>.*/)b", "a/b") returns None2705 self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b")2706 # Hg issue 39: regex.search("((?i)blah)\\s+\\1", "blah BLAH") doesn't2707 # return None2708 self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1",2709 "blah BLAH").group(0, 1), ("blah BLAH", "blah"))2710 self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"),2711 None)2712 # Hg issue 40: regex.search("(\()?[^()]+(?(1)\)|)", "(abcd").group(0)2713 # returns "bcd" instead of "abcd"2714 self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)",2715 "(abcd").group(0), "abcd")2716 # Hg issue 42: regex.search("(a*)*", "a", flags=regex.V1).span(1)2717 # returns (0, 1) instead of (1, 1)2718 self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1))2719 self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2))2720 self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3))2721 # Hg issue 43: regex.compile("a(?#xxx)*") causes "_regex_core.error:2722 # nothing to repeat"2723 self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa")2724 # Hg issue 44: regex.compile("(?=abc){3}abc") causes2725 # "_regex_core.error: nothing to repeat"2726 self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0,2727 3))2728 # Hg issue 45: regex.compile("^(?:a(?:(?:))+)+") causes2729 # "_regex_core.error: nothing to repeat"2730 self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1))2731 self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2))2732 # Hg issue 46: regex.compile("a(?x: b c )d") causes2733 # "_regex_core.error: missing )"2734 self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd")2735 # Hg issue 47: regex.compile("a#comment\n*", flags=regex.X) causes2736 # "_regex_core.error: nothing to repeat"2737 self.assertEqual(regex.search("a#comment\n*", "aaa",2738 flags=regex.X).group(0), "aaa")2739 # Hg issue 48: regex.search("(a(?(1)\\1)){4}", "a"*10,2740 # flags=regex.V1).group(0,1) returns ('aaaaa', 'a') instead of ('aaaaaaaaaa', 'aaaa')2741 self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}",2742 "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1)))2743 self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}",2744 "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3)))2745 self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}",2746 "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6)))2747 self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}",2748 "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10)))2749 # Hg issue 49: regex.search("(a)(?<=b(?1))", "baz", regex.V1) returns2750 # None incorrectly2751 self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0),2752 "a")2753 # Hg issue 50: not all keywords are found by named list with2754 # overlapping keywords when full Unicode casefolding is required2755 self.assertEqual(regex.findall(r'(?fi)\L<keywords>',2756 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05',2757 keywords=['post','pos']), ['POST', 'Post', 'post', 'po\u017Ft',2758 'po\uFB06', 'po\uFB05'])2759 self.assertEqual(regex.findall(r'(?fi)pos|post',2760 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POS',2761 'Pos', 'pos', 'po\u017F', 'po\uFB06', 'po\uFB05'])2762 self.assertEqual(regex.findall(r'(?fi)post|pos',2763 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',2764 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])2765 self.assertEqual(regex.findall(r'(?fi)post|another',2766 'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), ['POST',2767 'Post', 'post', 'po\u017Ft', 'po\uFB06', 'po\uFB05'])2768 # Hg issue 51: regex.search("((a)(?1)|(?2))", "a", flags=regex.V1)2769 # returns None incorrectly2770 self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1,2771 2), ('a', 'a', None))2772 # Hg issue 52: regex.search("(\\1xx|){6}", "xx",2773 # flags=regex.V1).span(0,1) returns incorrect value2774 self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1),2775 ((0, 2), (2, 2)))2776 # Hg issue 53: regex.search("(a|)+", "a") causes MemoryError2777 self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", ""))2778 # Hg issue 54: regex.search("(a|)*\\d", "a"*80) causes MemoryError2779 self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None)2780 # Hg issue 55: regex.search("^(?:a?b?)*$", "ac") take a very long time.2781 self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None)2782 # Hg issue 58: bad named character escape sequences like "\\N{1}"2783 # treats as "N"2784 self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda:2785 regex.compile("\\N{1}"))2786 # Hg issue 59: regex.search("\\Z", "a\na\n") returns None incorrectly2787 self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4))2788 # Hg issue 60: regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", "xayxay")2789 # returns None incorrectly2790 self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}",2791 "xayxay").group(0), "xayxay")2792 # Hg issue 61: regex.search("[^a]", "A", regex.I).group(0) returns ''2793 # incorrectly2794 self.assertEqual(regex.search("(?i)[^a]", "A"), None)2795 # Hg issue 63: regex.search("[[:ascii:]]", "\N{KELVIN SIGN}",2796 # flags=regex.I|regex.V1) doesn't return None2797 self.assertEqual(regex.search("(?i)[[:ascii:]]", "\N{KELVIN SIGN}"),2798 None)2799 # Hg issue 66: regex.search("((a|b(?1)c){3,5})", "baaaaca",2800 # flags=regex.V1).groups() returns ('baaaac', 'baaaac') instead of ('aaaa', 'a')2801 self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0,2802 1, 2), ('aaaa', 'aaaa', 'a'))2803 # Hg issue 71: non-greedy quantifier in lookbehind2804 self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"),2805 ['abc', 'def'])2806 self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"),2807 ['abc', 'def'])2808 self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"),2809 ['abc', 'def'])2810 self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"),2811 ['abc', 'def'])2812 # Hg issue 73: conditional patterns2813 self.assertEqual(regex.search(r"(?:fe)?male", "female").group(),2814 "female")2815 self.assertEqual([m.group() for m in2816 regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)",2817 "female: her dog; male: his cat. asdsasda")], ['female: her dog',2818 'male: his cat'])2819 # Hg issue 78: "Captures"doesn't work for recursive calls2820 self.assertEqual(regex.search(r'(?<rec>\((?:[^()]++|(?&rec))*\))',2821 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)',2822 '(((1+0)+1)+1)'])2823 # Hg issue 80: Escape characters throws an exception2824 self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda:2825 regex.sub('x', '\\', 'x'), )2826 # Hg issue 82: error range does not work2827 fz = "(CAGCCTCCCATTTCAGAATATACATCC){1<e<=2}"2828 seq = "tcagacgagtgcgttgtaaaacgacggccagtCAGCCTCCCATTCAGAATATACATCCcgacggccagttaaaaacaatgccaaggaggtcatagctgtttcctgccagttaaaaacaatgccaaggaggtcatagctgtttcctgacgcactcgtctgagcgggctggcaagg"2829 self.assertEqual(regex.search(fz, seq, regex.BESTMATCH)[0],2830 "tCAGCCTCCCATTCAGAATATACATCC")2831 # Hg issue 83: slash handling in presence of a quantifier2832 self.assertEqual(regex.findall(r"c..+/c", "cA/c\ncAb/c"), ['cAb/c'])2833 # Hg issue 85: Non-conformance to Unicode UAX#29 re: ZWJ / ZWNJ2834 self.assertEqual(ascii(regex.sub(r"(\w+)", r"[\1]",2835 '\u0905\u0928\u094d\u200d\u0928 \u0d28\u0d4d\u200d \u0915\u093f\u0928',2836 regex.WORD)),2837 ascii('[\u0905\u0928\u094d\u200d\u0928] [\u0d28\u0d4d\u200d] [\u0915\u093f\u0928]'))2838 # Hg issue 88: regex.match() hangs2839 self.assertEqual(regex.match(r".*a.*ba.*aa", "ababba"), None)2840 # Hg issue 87: Allow duplicate names of groups2841 self.assertEqual(regex.match(r'(?<x>a(?<x>b))', "ab").spans("x"), [(1,2842 2), (0, 2)])2843 # Hg issue 91: match.expand is extremely slow2844 # Check that the replacement cache works.2845 self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'),2846 'axbxc')2847 # Hg issue 94: Python crashes when executing regex updates2848 # pattern.findall2849 rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1)2850 self.assertEqual(rx.search("Some text"), None)2851 self.assertEqual(rx.findall("Some text"), [])2852 # Hg issue 95: 'pos' for regex.error2853 self.assertRaisesRegex(regex.error, self.MULTIPLE_REPEAT, lambda:2854 regex.compile(r'.???'))2855 # Hg issue 97: behaviour of regex.escape's special_only is wrong2856 #2857 # Hg issue 244: Make `special_only=True` the default in2858 # `regex.escape()`2859 self.assertEqual(regex.escape('foo!?', special_only=False), 'foo\\!\\?')2860 self.assertEqual(regex.escape('foo!?', special_only=True), 'foo!\\?')2861 self.assertEqual(regex.escape('foo!?'), 'foo!\\?')2862 self.assertEqual(regex.escape(b'foo!?', special_only=False), b'foo\\!\\?')2863 self.assertEqual(regex.escape(b'foo!?', special_only=True),2864 b'foo!\\?')2865 self.assertEqual(regex.escape(b'foo!?'), b'foo!\\?')2866 # Hg issue 100: strange results from regex.search2867 self.assertEqual(regex.search('^([^z]*(?:WWWi|W))?$',2868 'WWWi').groups(), ('WWWi', ))2869 self.assertEqual(regex.search('^([^z]*(?:WWWi|w))?$',2870 'WWWi').groups(), ('WWWi', ))2871 self.assertEqual(regex.search('^([^z]*?(?:WWWi|W))?$',2872 'WWWi').groups(), ('WWWi', ))2873 # Hg issue 101: findall() broken (seems like memory corruption)2874 pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE)2875 self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])2876 self.assertEqual(pat.findall('yxxx'), ['xxx'])2877 raw = 'yxxx'2878 self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])2879 self.assertEqual(pat.findall(raw), ['xxx'])2880 pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE |2881 regex.UNICODE)2882 self.assertEqual([x.group() for x in pat.finditer('yxxx')], ['xxx'])2883 self.assertEqual(pat.findall('yxxx'), ['xxx'])2884 raw = 'yxxx'2885 self.assertEqual([x.group() for x in pat.finditer(raw)], ['xxx'])2886 self.assertEqual(pat.findall(raw), ['xxx'])2887 # Hg issue 106: * operator not working correctly with sub()2888 self.assertEqual(regex.sub('(?V0).*', 'x', 'test'), 'x')2889 self.assertEqual(regex.sub('(?V1).*', 'x', 'test'), 'xx')2890 self.assertEqual(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|')2891 self.assertEqual(regex.sub('(?V1).*?', '|', 'test'), '|||||||||')2892 # Hg issue 112: re: OK, but regex: SystemError2893 self.assertEqual(regex.sub(r'^(@)\n(?!.*?@)(.*)',2894 r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n')2895 # Hg issue 109: Edit distance of fuzzy match2896 self.assertEqual(regex.match(r'(?:cats|cat){e<=1}',2897 'caz').fuzzy_counts, (1, 0, 0))2898 self.assertEqual(regex.match(r'(?e)(?:cats|cat){e<=1}',2899 'caz').fuzzy_counts, (1, 0, 0))2900 self.assertEqual(regex.match(r'(?b)(?:cats|cat){e<=1}',2901 'caz').fuzzy_counts, (1, 0, 0))2902 self.assertEqual(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts,2903 (1, 0, 0))2904 self.assertEqual(regex.match(r'(?e)(?:cat){e<=1}',2905 'caz').fuzzy_counts, (1, 0, 0))2906 self.assertEqual(regex.match(r'(?b)(?:cat){e<=1}',2907 'caz').fuzzy_counts, (1, 0, 0))2908 self.assertEqual(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts,2909 (1, 1, 0))2910 self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',2911 'c ats').fuzzy_counts, (0, 1, 0))2912 self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',2913 'c ats').fuzzy_counts, (0, 1, 0))2914 self.assertEqual(regex.match(r'(?:cats){e<=2}',2915 'c a ts').fuzzy_counts, (0, 2, 0))2916 self.assertEqual(regex.match(r'(?e)(?:cats){e<=2}',2917 'c a ts').fuzzy_counts, (0, 2, 0))2918 self.assertEqual(regex.match(r'(?b)(?:cats){e<=2}',2919 'c a ts').fuzzy_counts, (0, 2, 0))2920 self.assertEqual(regex.match(r'(?:cats){e<=1}', 'c ats').fuzzy_counts,2921 (0, 1, 0))2922 self.assertEqual(regex.match(r'(?e)(?:cats){e<=1}',2923 'c ats').fuzzy_counts, (0, 1, 0))2924 self.assertEqual(regex.match(r'(?b)(?:cats){e<=1}',2925 'c ats').fuzzy_counts, (0, 1, 0))2926 # Hg issue 115: Infinite loop when processing backreferences2927 self.assertEqual(regex.findall(r'\bof ([a-z]+) of \1\b',2928 'To make use of one of these modules'), [])2929 # Hg issue 125: Reference to entire match (\g&lt;0&gt;) in2930 # Pattern.sub() doesn't work as of 2014.09.22 release.2931 self.assertEqual(regex.sub(r'x', r'\g<0>', 'x'), 'x')2932 # Unreported issue: no such builtin as 'ascii' in Python 2.2933 self.assertEqual(bool(regex.match(r'a', 'a', regex.DEBUG)), True)2934 # Hg issue 131: nested sets behaviour2935 self.assertEqual(regex.findall(r'(?V1)[[b-e]--cd]', 'abcdef'), ['b',2936 'e'])2937 self.assertEqual(regex.findall(r'(?V1)[b-e--cd]', 'abcdef'), ['b',2938 'e'])2939 self.assertEqual(regex.findall(r'(?V1)[[bcde]--cd]', 'abcdef'), ['b',2940 'e'])2941 self.assertEqual(regex.findall(r'(?V1)[bcde--cd]', 'abcdef'), ['b',2942 'e'])2943 # Hg issue 132: index out of range on null property \p{}2944 self.assertRaisesRegex(regex.error, '^unknown property at position 4$',2945 lambda: regex.compile(r'\p{}'))2946 # Issue 23692.2947 self.assertEqual(regex.match('(?:()|(?(1)()|z)){2}(?(2)a|z)',2948 'a').group(0, 1, 2), ('a', '', ''))2949 self.assertEqual(regex.match('(?:()|(?(1)()|z)){0,2}(?(2)a|z)',2950 'a').group(0, 1, 2), ('a', '', ''))2951 # Hg issue 137: Posix character class :punct: does not seem to be2952 # supported.2953 # Posix compatibility as recommended here:2954 # http://www.unicode.org/reports/tr18/#Compatibility_Properties2955 # Posix in Unicode.2956 chars = ''.join(chr(c) for c in range(0x10000))2957 self.assertEqual(ascii(''.join(regex.findall(r'''[[:alnum:]]+''',2958 chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{PosixDigit}]+''',2959 chars))))2960 self.assertEqual(ascii(''.join(regex.findall(r'''[[:alpha:]]+''',2961 chars))), ascii(''.join(regex.findall(r'''\p{Alpha}+''',2962 chars))))2963 self.assertEqual(ascii(''.join(regex.findall(r'''[[:ascii:]]+''',2964 chars))), ascii(''.join(regex.findall(r'''[\p{InBasicLatin}]+''',2965 chars))))2966 self.assertEqual(ascii(''.join(regex.findall(r'''[[:blank:]]+''',2967 chars))), ascii(''.join(regex.findall(r'''[\p{gc=Space_Separator}\t]+''',2968 chars))))2969 self.assertEqual(ascii(''.join(regex.findall(r'''[[:cntrl:]]+''',2970 chars))), ascii(''.join(regex.findall(r'''\p{gc=Control}+''', chars))))2971 self.assertEqual(ascii(''.join(regex.findall(r'''[[:digit:]]+''',2972 chars))), ascii(''.join(regex.findall(r'''[0-9]+''', chars))))2973 self.assertEqual(ascii(''.join(regex.findall(r'''[[:graph:]]+''',2974 chars))), ascii(''.join(regex.findall(r'''[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''',2975 chars))))2976 self.assertEqual(ascii(''.join(regex.findall(r'''[[:lower:]]+''',2977 chars))), ascii(''.join(regex.findall(r'''\p{Lower}+''',2978 chars))))2979 self.assertEqual(ascii(''.join(regex.findall(r'''[[:print:]]+''',2980 chars))), ascii(''.join(regex.findall(r'''(?V1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))2981 self.assertEqual(ascii(''.join(regex.findall(r'''[[:punct:]]+''',2982 chars))),2983 ascii(''.join(regex.findall(r'''(?V1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',2984 chars))))2985 self.assertEqual(ascii(''.join(regex.findall(r'''[[:space:]]+''',2986 chars))), ascii(''.join(regex.findall(r'''\p{Whitespace}+''',2987 chars))))2988 self.assertEqual(ascii(''.join(regex.findall(r'''[[:upper:]]+''',2989 chars))), ascii(''.join(regex.findall(r'''\p{Upper}+''',2990 chars))))2991 self.assertEqual(ascii(''.join(regex.findall(r'''[[:word:]]+''',2992 chars))), ascii(''.join(regex.findall(r'''[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''',2993 chars))))2994 self.assertEqual(ascii(''.join(regex.findall(r'''[[:xdigit:]]+''',2995 chars))), ascii(''.join(regex.findall(r'''[0-9A-Fa-f]+''',2996 chars))))2997 # Posix in ASCII.2998 chars = bytes(range(0x100))2999 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alnum:]]+''',3000 chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{PosixDigit}]+''',3001 chars))))3002 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:alpha:]]+''',3003 chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Alpha}+''', chars))))3004 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:ascii:]]+''',3005 chars))), ascii(b''.join(regex.findall(br'''(?a)[\x00-\x7F]+''', chars))))3006 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:blank:]]+''',3007 chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{gc=Space_Separator}\t]+''',3008 chars))))3009 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:cntrl:]]+''',3010 chars))), ascii(b''.join(regex.findall(br'''(?a)\p{gc=Control}+''',3011 chars))))3012 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:digit:]]+''',3013 chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9]+''', chars))))3014 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:graph:]]+''',3015 chars))), ascii(b''.join(regex.findall(br'''(?a)[^\p{Space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]+''', chars))))3016 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:lower:]]+''',3017 chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Lower}+''', chars))))3018 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:print:]]+''',3019 chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{Graph}\p{Blank}--\p{Cntrl}]+''', chars))))3020 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:punct:]]+''',3021 chars))), ascii(b''.join(regex.findall(br'''(?aV1)[\p{gc=Punctuation}\p{gc=Symbol}--\p{Alpha}]+''',3022 chars))))3023 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:space:]]+''',3024 chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Whitespace}+''', chars))))3025 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:upper:]]+''',3026 chars))), ascii(b''.join(regex.findall(br'''(?a)\p{Upper}+''', chars))))3027 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:word:]]+''',3028 chars))), ascii(b''.join(regex.findall(br'''(?a)[\p{Alpha}\p{gc=Mark}\p{Digit}\p{gc=Connector_Punctuation}\p{Join_Control}]+''', chars))))3029 self.assertEqual(ascii(b''.join(regex.findall(br'''(?a)[[:xdigit:]]+''',3030 chars))), ascii(b''.join(regex.findall(br'''(?a)[0-9A-Fa-f]+''', chars))))3031 # Hg issue 138: grapheme anchored search not working properly.3032 self.assertEqual(ascii(regex.search(r'\X$', 'ab\u2103').group()),3033 ascii('\u2103'))3034 # Hg issue 139: Regular expression with multiple wildcards where first3035 # should match empty string does not always work.3036 self.assertEqual(regex.search("([^L]*)([^R]*R)", "LtR").groups(), ('',3037 'LtR'))3038 # Hg issue 140: Replace with REVERSE and groups has unexpected3039 # behavior.3040 self.assertEqual(regex.sub(r'(.)', r'x\1y', 'ab'), 'xayxby')3041 self.assertEqual(regex.sub(r'(?r)(.)', r'x\1y', 'ab'), 'xayxby')3042 self.assertEqual(regex.subf(r'(.)', 'x{1}y', 'ab'), 'xayxby')3043 self.assertEqual(regex.subf(r'(?r)(.)', 'x{1}y', 'ab'), 'xayxby')3044 # Hg issue 141: Crash on a certain partial match.3045 self.assertEqual(regex.fullmatch('(a)*abc', 'ab',3046 partial=True).span(), (0, 2))3047 self.assertEqual(regex.fullmatch('(a)*abc', 'ab',3048 partial=True).partial, True)3049 # Hg issue 143: Partial matches have incorrect span if prefix is '.'3050 # wildcard.3051 self.assertEqual(regex.search('OXRG', 'OOGOX', partial=True).span(),3052 (3, 5))3053 self.assertEqual(regex.search('.XRG', 'OOGOX', partial=True).span(),3054 (3, 5))3055 self.assertEqual(regex.search('.{1,3}XRG', 'OOGOX',3056 partial=True).span(), (1, 5))3057 # Hg issue 144: Latest version problem with matching 'R|R'.3058 self.assertEqual(regex.match('R|R', 'R').span(), (0, 1))3059 # Hg issue 146: Forced-fail (?!) works improperly in conditional.3060 self.assertEqual(regex.match(r'(.)(?(1)(?!))', 'xy'), None)3061 # Groups cleared after failure.3062 self.assertEqual(regex.findall(r'(y)?(\d)(?(1)\b\B)', 'ax1y2z3b'),3063 [('', '1'), ('', '2'), ('', '3')])3064 self.assertEqual(regex.findall(r'(y)?+(\d)(?(1)\b\B)', 'ax1y2z3b'),3065 [('', '1'), ('', '2'), ('', '3')])3066 # Hg issue 147: Fuzzy match can return match points beyond buffer end.3067 self.assertEqual([m.span() for m in3068 regex.finditer(r'(?i)(?:error){e}', 'regex failure')], [(0, 5), (5,3069 10), (10, 13), (13, 13)])3070 self.assertEqual([m.span() for m in3071 regex.finditer(r'(?fi)(?:error){e}', 'regex failure')], [(0, 5), (5,3072 10), (10, 13), (13, 13)])3073 # Hg issue 150: Have an option for POSIX-compatible longest match of3074 # alternates.3075 self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',3076 '10b12')[0], '10b12')3077 self.assertEqual(regex.search(r'(?p)\d+(\w(\d*)?|[eE]([+-]\d+))',3078 '10E+12')[0], '10E+12')3079 self.assertEqual(regex.search(r'(?p)(\w|ae|oe|ue|ss)', 'ae')[0], 'ae')3080 self.assertEqual(regex.search(r'(?p)one(self)?(selfsufficient)?',3081 'oneselfsufficient')[0], 'oneselfsufficient')3082 # Hg issue 151: Request: \K.3083 self.assertEqual(regex.search(r'(ab\Kcd)', 'abcd').group(0, 1), ('cd',3084 'abcd'))3085 self.assertEqual(regex.findall(r'\w\w\K\w\w', 'abcdefgh'), ['cd',3086 'gh'])3087 self.assertEqual(regex.findall(r'(\w\w\K\w\w)', 'abcdefgh'), ['abcd',3088 'efgh'])3089 self.assertEqual(regex.search(r'(?r)(ab\Kcd)', 'abcd').group(0, 1),3090 ('ab', 'abcd'))3091 self.assertEqual(regex.findall(r'(?r)\w\w\K\w\w', 'abcdefgh'), ['ef',3092 'ab'])3093 self.assertEqual(regex.findall(r'(?r)(\w\w\K\w\w)', 'abcdefgh'),3094 ['efgh', 'abcd'])3095 # Hg issue 152: Request: Request: (?(DEFINE)...).3096 self.assertEqual(regex.search(r'(?(DEFINE)(?<quant>\d+)(?<item>\w+))(?&quant) (?&item)',3097 '5 elephants')[0], '5 elephants')3098 # Hg issue 153: Request: (*SKIP).3099 self.assertEqual(regex.search(r'12(*FAIL)|3', '123')[0], '3')3100 self.assertEqual(regex.search(r'(?r)12(*FAIL)|3', '123')[0], '3')3101 self.assertEqual(regex.search(r'\d+(*PRUNE)\d', '123'), None)3102 self.assertEqual(regex.search(r'\d+(?=(*PRUNE))\d', '123')[0], '123')3103 self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123bcd')[0],3104 '123bcd')3105 self.assertEqual(regex.search(r'\d+(*PRUNE)bcd|[3d]', '123zzd')[0],3106 'd')3107 self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123bcd')[0],3108 '3bcd')3109 self.assertEqual(regex.search(r'\d+?(*PRUNE)bcd|[3d]', '123zzd')[0],3110 'd')3111 self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',3112 '123zzd')[0], '123zzd')3113 self.assertEqual(regex.search(r'\d++(?<=3(*PRUNE))zzd|[4d]$',3114 '124zzd')[0], 'd')3115 self.assertEqual(regex.search(r'\d++(?<=(*PRUNE)3)zzd|[4d]$',3116 '124zzd')[0], 'd')3117 self.assertEqual(regex.search(r'\d++(?<=2(*PRUNE)3)zzd|[3d]$',3118 '124zzd')[0], 'd')3119 self.assertEqual(regex.search(r'(?r)\d(*PRUNE)\d+', '123'), None)3120 self.assertEqual(regex.search(r'(?r)\d(?<=(*PRUNE))\d+', '123')[0],3121 '123')3122 self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',3123 '123bcd')[0], '123bcd')3124 self.assertEqual(regex.search(r'(?r)\d+(*PRUNE)bcd|[3d]',3125 '123zzd')[0], 'd')3126 self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',3127 '123zzd')[0], '123zzd')3128 self.assertEqual(regex.search(r'(?r)\d++(?<=3(*PRUNE))zzd|[4d]$',3129 '124zzd')[0], 'd')3130 self.assertEqual(regex.search(r'(?r)\d++(?<=(*PRUNE)3)zzd|[4d]$',3131 '124zzd')[0], 'd')3132 self.assertEqual(regex.search(r'(?r)\d++(?<=2(*PRUNE)3)zzd|[3d]$',3133 '124zzd')[0], 'd')3134 self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123bcd')[0],3135 '123bcd')3136 self.assertEqual(regex.search(r'\d+(*SKIP)bcd|[3d]', '123zzd')[0],3137 'd')3138 self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123bcd')[0],3139 '3bcd')3140 self.assertEqual(regex.search(r'\d+?(*SKIP)bcd|[3d]', '123zzd')[0],3141 'd')3142 self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',3143 '123zzd')[0], '123zzd')3144 self.assertEqual(regex.search(r'\d++(?<=3(*SKIP))zzd|[4d]$',3145 '124zzd')[0], 'd')3146 self.assertEqual(regex.search(r'\d++(?<=(*SKIP)3)zzd|[4d]$',3147 '124zzd')[0], 'd')3148 self.assertEqual(regex.search(r'\d++(?<=2(*SKIP)3)zzd|[3d]$',3149 '124zzd')[0], 'd')3150 self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123bcd')[0],3151 '123bcd')3152 self.assertEqual(regex.search(r'(?r)\d+(*SKIP)bcd|[3d]', '123zzd')[0],3153 'd')3154 self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',3155 '123zzd')[0], '123zzd')3156 self.assertEqual(regex.search(r'(?r)\d++(?<=3(*SKIP))zzd|[4d]$',3157 '124zzd')[0], 'd')3158 self.assertEqual(regex.search(r'(?r)\d++(?<=(*SKIP)3)zzd|[4d]$',3159 '124zzd')[0], 'd')3160 self.assertEqual(regex.search(r'(?r)\d++(?<=2(*SKIP)3)zzd|[3d]$',3161 '124zzd')[0], 'd')3162 # Hg issue 154: Segmentation fault 11 when working with an atomic group3163 text = """June 30, December 31, 2013 20123164some words follow:3165more words and numbers 1,234,567 9,876,5423166more words and numbers 1,234,567 9,876,542"""3167 self.assertEqual(len(regex.findall(r'(?<!\d)(?>2014|2013 ?2012)', text)), 1)3168 # Hg issue 156: regression on atomic grouping3169 self.assertEqual(regex.match('1(?>2)', '12').span(), (0, 2))3170 # Hg issue 157: regression: segfault on complex lookaround3171 self.assertEqual(regex.match(r'(?V1w)(?=(?=[^A-Z]*+[A-Z])(?=[^a-z]*+[a-z]))(?=\D*+\d)(?=\p{Alphanumeric}*+\P{Alphanumeric})\A(?s:.){8,255}+\Z',3172 'AAaa11!!')[0], 'AAaa11!!')3173 # Hg issue 158: Group issue with (?(DEFINE)...)3174 TEST_REGEX = regex.compile(r'''(?smx)3175(?(DEFINE)3176 (?<subcat>3177 ^,[^,]+,3178 )3179)3180# Group 2 is defined on this line3181^,([^,]+),3182(?:(?!(?&subcat)[\r\n]+(?&subcat)).)+3183''')3184 TEST_DATA = '''3185,Cat 1,3186,Brand 1,3187some3188thing3189,Brand 2,3190other3191things3192,Cat 2,3193,Brand,3194Some3195thing3196'''3197 self.assertEqual([m.span(1, 2) for m in3198 TEST_REGEX.finditer(TEST_DATA)], [((-1, -1), (2, 7)), ((-1, -1), (54,3199 59))])3200 # Hg issue 161: Unexpected fuzzy match results3201 self.assertEqual(regex.search('(abcdefgh){e}',3202 '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 14))3203 self.assertEqual(regex.search('(abcdefghi){e}',3204 '******abcdefghijklmnopqrtuvwxyz', regex.BESTMATCH).span(), (6, 15))3205 # Hg issue 163: allow lookarounds in conditionals.3206 self.assertEqual(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc').span(),3207 (0, 6))3208 self.assertEqual(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'), None)3209 self.assertEqual(regex.search(r'(?(?<=love\s)you|(?<=hate\s)her)',3210 "I love you").span(), (7, 10))3211 self.assertEqual(regex.findall(r'(?(?<=love\s)you|(?<=hate\s)her)',3212 "I love you but I don't hate her either"), ['you', 'her'])3213 # Hg issue 180: bug of POSIX matching.3214 self.assertEqual(regex.search(r'(?p)a*(.*?)', 'aaabbb').group(0, 1),3215 ('aaabbb', 'bbb'))3216 self.assertEqual(regex.search(r'(?p)a*(.*)', 'aaabbb').group(0, 1),3217 ('aaabbb', 'bbb'))3218 self.assertEqual(regex.sub(r'(?p)a*(.*?)', r'\1', 'aaabbb'), 'bbb')3219 self.assertEqual(regex.sub(r'(?p)a*(.*)', r'\1', 'aaabbb'), 'bbb')3220 # Hg issue 192: Named lists reverse matching doesn't work with3221 # IGNORECASE and V13222 self.assertEqual(regex.match(r'(?irV0)\L<kw>', '21', kw=['1']).span(),3223 (1, 2))3224 self.assertEqual(regex.match(r'(?irV1)\L<kw>', '21', kw=['1']).span(),3225 (1, 2))3226 # Hg issue 193: Alternation and .REVERSE flag.3227 self.assertEqual(regex.search('a|b', '111a222').span(), (3, 4))3228 self.assertEqual(regex.search('(?r)a|b', '111a222').span(), (3, 4))3229 # Hg issue 194: .FULLCASE and Backreference3230 self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',3231 '<cli><cli>').span(), (0, 10))3232 self.assertEqual(regex.search(r'(?if)<(CLI)><\1>',3233 '<cli><clI>').span(), (0, 10))3234 self.assertEqual(regex.search(r'(?ifr)<\1><(CLI)>',3235 '<cli><clI>').span(), (0, 10))3236 # Hg issue 195: Pickle (or otherwise serial) the compiled regex3237 r = regex.compile(r'\L<options>', options=['foo', 'bar'])3238 p = pickle.dumps(r)3239 r = pickle.loads(p)3240 self.assertEqual(r.match('foo').span(), (0, 3))3241 # Hg issue 196: Fuzzy matching on repeated regex not working as3242 # expected3243 self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxxx',3244 flags=regex.BESTMATCH).span(), (0, 6))3245 self.assertEqual(regex.match('(x{6}){e<=1}', 'xxxxx',3246 flags=regex.BESTMATCH).span(), (0, 5))3247 self.assertEqual(regex.match('(x{6}){e<=1}', 'x',3248 flags=regex.BESTMATCH), None)3249 self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxxx',3250 flags=regex.BESTMATCH).span(), (0, 6))3251 self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'xxxxx',3252 flags=regex.BESTMATCH).span(), (0, 5))3253 self.assertEqual(regex.match('(?r)(x{6}){e<=1}', 'x',3254 flags=regex.BESTMATCH), None)3255 # Hg issue 197: ValueError in regex.compile3256 self.assertRaises(regex.error, lambda:3257 regex.compile(b'00000\\0\\00\^\50\\00\U05000000'))3258 # Hg issue 198: ValueError in regex.compile3259 self.assertRaises(regex.error, lambda: regex.compile(b"{e<l"))3260 # Hg issue 199: Segfault in re.compile3261 self.assertEquals(bool(regex.compile('((?0)){e}')), True)3262 # Hg issue 200: AttributeError in regex.compile with latest regex3263 self.assertEquals(bool(regex.compile('\x00?(?0){e}')), True)3264 # Hg issue 201: ENHANCEMATCH crashes interpreter3265 self.assertEquals(regex.findall(r'((brown)|(lazy)){1<=e<=3} ((dog)|(fox)){1<=e<=3}',3266 'The quick borwn fax jumped over the lzy hog', regex.ENHANCEMATCH),3267 [('borwn', 'borwn', '', 'fax', '', 'fax'), ('lzy', '', 'lzy', 'hog',3268 'hog', '')])3269 # Hg issue 203: partial matching bug3270 self.assertEquals(regex.search(r'\d\d\d-\d\d-\d\d\d\d',3271 "My SSN is 999-89-76, but don't tell.", partial=True).span(), (36,3272 36))3273 # Hg issue 204: confusion of (?aif) flags3274 upper_i = '\N{CYRILLIC CAPITAL LETTER SHORT I}'3275 lower_i = '\N{CYRILLIC SMALL LETTER SHORT I}'3276 self.assertEquals(bool(regex.match(r'(?ui)' + upper_i,3277 lower_i)), True)3278 self.assertEquals(bool(regex.match(r'(?ui)' + lower_i,3279 upper_i)), True)3280 self.assertEquals(bool(regex.match(r'(?ai)' + upper_i,3281 lower_i)), False)3282 self.assertEquals(bool(regex.match(r'(?ai)' + lower_i,3283 upper_i)), False)3284 self.assertEquals(bool(regex.match(r'(?afi)' + upper_i,3285 lower_i)), False)3286 self.assertEquals(bool(regex.match(r'(?afi)' + lower_i,3287 upper_i)), False)3288 # Hg issue 205: Named list and (?ri) flags3289 self.assertEquals(bool(regex.search(r'(?i)\L<aa>', '22', aa=['121',3290 '22'])), True)3291 self.assertEquals(bool(regex.search(r'(?ri)\L<aa>', '22', aa=['121',3292 '22'])), True)3293 self.assertEquals(bool(regex.search(r'(?fi)\L<aa>', '22', aa=['121',3294 '22'])), True)3295 self.assertEquals(bool(regex.search(r'(?fri)\L<aa>', '22', aa=['121',3296 '22'])), True)3297 # Hg issue 208: Named list, (?ri) flags, Backreference3298 self.assertEquals(regex.search(r'(?r)\1dog..(?<=(\L<aa>))$', 'ccdogcc',3299 aa=['bcb', 'cc']). span(), (0, 7))3300 self.assertEquals(regex.search(r'(?ir)\1dog..(?<=(\L<aa>))$',3301 'ccdogcc', aa=['bcb', 'cc']). span(), (0, 7))3302 # Hg issue 210: Fuzzy matching and Backreference3303 self.assertEquals(regex.search(r'(2)(?:\1{5}){e<=1}',3304 '3222212').span(), (1, 7))3305 self.assertEquals(regex.search(r'(\d)(?:\1{5}){e<=1}',3306 '3222212').span(), (1, 7))3307 # Hg issue 211: Segmentation fault with recursive matches and atomic3308 # groups3309 self.assertEquals(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',3310 '((-))').span(), (0, 5))3311 self.assertEquals(regex.match(r'''\A(?P<whole>(?>\((?&whole)\)|[+\-]))\Z''',3312 '((-)+)'), None)3313 # Hg issue 212: Unexpected matching difference with .*? between re and3314 # regex3315 self.assertEquals(regex.match(r"x.*? (.).*\1(.*)\1",3316 'x |y| z|').span(), (0, 9))3317 self.assertEquals(regex.match(r"\.sr (.*?) (.)(.*)\2(.*)\2(.*)",3318 r'.sr h |<nw>|<span class="locked">|').span(), (0, 35))3319 # Hg issue 213: Segmentation Fault3320 a = '"\\xF9\\x80\\xAEqdz\\x95L\\xA7\\x89[\\xFE \\x91)\\xF9]\\xDB\'\\x99\\x09=\\x00\\xFD\\x98\\x22\\xDD\\xF1\\xB6\\xC3 Z\\xB6gv\\xA5x\\x93P\\xE1r\\x14\\x8Cv\\x0C\\xC0w\\x15r\\xFFc%" '3321 py_regex_pattern = r'''(?P<http_referer>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)))) (?P<useragent>((?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))))'''3322 self.assertEqual(bool(regex.search(py_regex_pattern, a)), False)3323 # Hg Issue 216: Invalid match when using negative lookbehind and pipe3324 self.assertEqual(bool(regex.match('foo(?<=foo)', 'foo')), True)3325 self.assertEqual(bool(regex.match('foo(?<!foo)', 'foo')), False)3326 self.assertEqual(bool(regex.match('foo(?<=foo|x)', 'foo')), True)3327 self.assertEqual(bool(regex.match('foo(?<!foo|x)', 'foo')), False)3328 # Hg issue 217: Core dump in conditional ahead match and matching \!3329 # character3330 self.assertEqual(bool(regex.match(r'(?(?=.*\!.*)(?P<true>.*\!\w*\:.*)|(?P<false>.*))',3331 '!')), False)3332 # Hg issue 220: Misbehavior of group capture with OR operand3333 self.assertEqual(regex.match(r'\w*(ea)\w*|\w*e(?!a)\w*',3334 'easier').groups(), ('ea', ))3335 # Hg issue 225: BESTMATCH in fuzzy match not working3336 self.assertEqual(regex.search('(^1234$){i,d}', '12234',3337 regex.BESTMATCH).span(), (0, 5))3338 self.assertEqual(regex.search('(^1234$){i,d}', '12234',3339 regex.BESTMATCH).fuzzy_counts, (0, 1, 0))3340 self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',3341 regex.BESTMATCH).span(), (0, 5))3342 self.assertEqual(regex.search('(^1234$){s,i,d}', '12234',3343 regex.BESTMATCH).fuzzy_counts, (0, 1, 0))3344 # Hg issue 226: Error matching at start of string3345 self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',3346 regex.BESTMATCH).span(), (0, 11))3347 self.assertEqual(regex.search('(^123$){s,i,d}', 'xxxxxxxx123',3348 regex.BESTMATCH).fuzzy_counts, (0, 8, 0))3349 # Hg issue 227: Incorrect behavior for ? operator with UNICODE +3350 # IGNORECASE3351 self.assertEqual(regex.search(r'a?yz', 'xxxxyz', flags=regex.FULLCASE |3352 regex.IGNORECASE).span(), (4, 6))3353 # Hg issue 230: Is it a bug of (?(DEFINE)...)3354 self.assertEqual(regex.findall(r'(?:(?![a-d]).)+', 'abcdefgh'),3355 ['efgh'])3356 self.assertEqual(regex.findall(r'''(?(DEFINE)(?P<mydef>(?:(?![a-d]).)))(?&mydef)+''',3357 'abcdefgh'), ['efgh'])3358 # Hg issue 238: Not fully re backward compatible3359 self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1,3}',3360 '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',3361 '....'), ('T...', 'T', '...')])3362 self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){3}',3363 '"Erm....yes. T..T...Thank you for that."'), [])3364 self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){2}',3365 '"Erm....yes. T..T...Thank you for that."'), [('T...', 'T', '...')])3366 self.assertEqual(regex.findall(r'((\w{1,3})(\.{2,10})){1}',3367 '"Erm....yes. T..T...Thank you for that."'), [('Erm....', 'Erm',3368 '....'), ('T..', 'T', '..'), ('T...', 'T', '...')])3369 # Hg issue 247: Unexpected result with fuzzy matching and lookahead3370 # expression3371 self.assertEqual(regex.search(r'(?:ESTONIA(?!\w)){e<=1}',3372 'ESTONIAN WORKERS').group(), 'ESTONIAN')3373 self.assertEqual(regex.search(r'(?:ESTONIA(?=\W)){e<=1}',3374 'ESTONIAN WORKERS').group(), 'ESTONIAN')3375 self.assertEqual(regex.search(r'(?:(?<!\w)ESTONIA){e<=1}',3376 'BLUB NESTONIA').group(), 'NESTONIA')3377 self.assertEqual(regex.search(r'(?:(?<=\W)ESTONIA){e<=1}',3378 'BLUB NESTONIA').group(), 'NESTONIA')3379 self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?!\w)){e<=1}',3380 'ESTONIAN WORKERS').group(), 'ESTONIAN')3381 self.assertEqual(regex.search(r'(?r)(?:ESTONIA(?=\W)){e<=1}',3382 'ESTONIAN WORKERS').group(), 'ESTONIAN')3383 self.assertEqual(regex.search(r'(?r)(?:(?<!\w)ESTONIA){e<=1}',3384 'BLUB NESTONIA').group(), 'NESTONIA')3385 self.assertEqual(regex.search(r'(?r)(?:(?<=\W)ESTONIA){e<=1}',3386 'BLUB NESTONIA').group(), 'NESTONIA')3387 # Hg issue 248: Unexpected result with fuzzy matching and more than one3388 # non-greedy quantifier3389 self.assertEquals(regex.search(r'(?:A.*B.*CDE){e<=2}',3390 'A B CYZ').group(), 'A B CYZ')3391 self.assertEquals(regex.search(r'(?:A.*B.*?CDE){e<=2}',3392 'A B CYZ').group(), 'A B CYZ')3393 self.assertEquals(regex.search(r'(?:A.*?B.*CDE){e<=2}',3394 'A B CYZ').group(), 'A B CYZ')3395 self.assertEquals(regex.search(r'(?:A.*?B.*?CDE){e<=2}',3396 'A B CYZ').group(), 'A B CYZ')3397 # Hg issue 249: Add an option to regex.escape() to not escape spaces3398 self.assertEquals(regex.escape(' ,0A[', special_only=False, literal_spaces=False), '\\ \\,0A\\[')3399 self.assertEquals(regex.escape(' ,0A[', special_only=False, literal_spaces=True), ' \\,0A\\[')3400 self.assertEquals(regex.escape(' ,0A[', special_only=True, literal_spaces=False), '\\ ,0A\\[')3401 self.assertEquals(regex.escape(' ,0A[', special_only=True, literal_spaces=True), ' ,0A\\[')3402 self.assertEquals(regex.escape(' ,0A['), '\\ ,0A\\[')3403 # Hg issue 251: Segfault with a particular expression3404 self.assertEquals(regex.search(r'(?(?=A)A|B)', 'A').span(), (0, 1))3405 self.assertEquals(regex.search(r'(?(?=A)A|B)', 'B').span(), (0, 1))3406 self.assertEquals(regex.search(r'(?(?=A)A|)', 'B').span(), (0, 0))3407 self.assertEquals(regex.search(r'(?(?=X)X|)', '').span(), (0, 0))3408 self.assertEquals(regex.search(r'(?(?=X))', '').span(), (0, 0))3409 # Hg issue 252: Empty capture strings when using DEFINE group reference3410 # within look-behind expression3411 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3412 'abc').groups(), (None, ))3413 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3414 'abc').groupdict(), {'func': None})3415 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?&func)',3416 'abc').capturesdict(), {'func': ['a']})3417 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3418 'abc').groups(), (None, ))3419 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3420 'abc').groupdict(), {'func': None})3421 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.))(?=(?&func))',3422 'abc').capturesdict(), {'func': ['a']})3423 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3424 'abc').groups(), (None, ))3425 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3426 'abc').groupdict(), {'func': None})3427 self.assertEquals(regex.search(r'(?(DEFINE)(?<func>.)).(?<=(?&func))',3428 'abc').capturesdict(), {'func': ['a']})3429 def test_subscripted_captures(self):3430 self.assertEqual(regex.match(r'(?P<x>.)+',3431 'abc').expandf('{0} {0[0]} {0[-1]}'), 'abc abc abc')3432 self.assertEqual(regex.match(r'(?P<x>.)+',3433 'abc').expandf('{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}'),3434 'c a b c c b a')3435 self.assertEqual(regex.match(r'(?P<x>.)+',3436 'abc').expandf('{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}'),3437 'c a b c c b a')3438 self.assertEqual(regex.subf(r'(?P<x>.)+', r'{0} {0[0]} {0[-1]}',3439 'abc'), 'abc abc abc')3440 self.assertEqual(regex.subf(r'(?P<x>.)+',3441 '{1} {1[0]} {1[1]} {1[2]} {1[-1]} {1[-2]} {1[-3]}', 'abc'),3442 'c a b c c b a')3443 self.assertEqual(regex.subf(r'(?P<x>.)+',3444 '{x} {x[0]} {x[1]} {x[2]} {x[-1]} {x[-2]} {x[-3]}', 'abc'),3445 'c a b c c b a')3446if sys.version_info < (3, 2, 0):3447 # In Python 3.1 it's called assertRaisesRegexp.3448 RegexTests.assertRaisesRegex = RegexTests.assertRaisesRegexp3449def test_main():3450 run_unittest(RegexTests)3451if __name__ == "__main__":...

Full Screen

Full Screen

mode-kotlin.js

Source:mode-kotlin.js Github

copy

Full Screen

1define("ace/mode/kotlin_highlight_rules",["require","exports","module","ace/lib/oop","ace/mode/text_highlight_rules"], function(require, exports, module) {2"use strict";3var oop = require("../lib/oop");4var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;5var KotlinHighlightRules = function() {6 this.$rules = {7 start: [{8 include: "#comments"9 }, {10 token: [11 "text",12 "keyword.other.kotlin",13 "text",14 "entity.name.package.kotlin",15 "text"16 ],17 regex: /^(\s*)(package)\b(?:(\s*)([^ ;$]+)(\s*))?/18 }, {19 include: "#imports"20 }, {21 include: "#statements"22 }],23 "#classes": [{24 token: "text",25 regex: /(?=\s*(?:companion|class|object|interface))/,26 push: [{27 token: "text",28 regex: /}|(?=$)/,29 next: "pop"30 }, {31 token: ["keyword.other.kotlin", "text"],32 regex: /\b((?:companion\s*)?)(class|object|interface)\b/,33 push: [{34 token: "text",35 regex: /(?=<|{|\(|:)/,36 next: "pop"37 }, {38 token: "keyword.other.kotlin",39 regex: /\bobject\b/40 }, {41 token: "entity.name.type.class.kotlin",42 regex: /\w+/43 }]44 }, {45 token: "text",46 regex: /</,47 push: [{48 token: "text",49 regex: />/,50 next: "pop"51 }, {52 include: "#generics"53 }]54 }, {55 token: "text",56 regex: /\(/,57 push: [{58 token: "text",59 regex: /\)/,60 next: "pop"61 }, {62 include: "#parameters"63 }]64 }, {65 token: "keyword.operator.declaration.kotlin",66 regex: /:/,67 push: [{68 token: "text",69 regex: /(?={|$)/,70 next: "pop"71 }, {72 token: "entity.other.inherited-class.kotlin",73 regex: /\w+/74 }, {75 token: "text",76 regex: /\(/,77 push: [{78 token: "text",79 regex: /\)/,80 next: "pop"81 }, {82 include: "#expressions"83 }]84 }]85 }, {86 token: "text",87 regex: /\{/,88 push: [{89 token: "text",90 regex: /\}/,91 next: "pop"92 }, {93 include: "#statements"94 }]95 }]96 }],97 "#comments": [{98 token: "punctuation.definition.comment.kotlin",99 regex: /\/\*/,100 push: [{101 token: "punctuation.definition.comment.kotlin",102 regex: /\*\//,103 next: "pop"104 }, {105 defaultToken: "comment.block.kotlin"106 }]107 }, {108 token: [109 "text",110 "punctuation.definition.comment.kotlin",111 "comment.line.double-slash.kotlin"112 ],113 regex: /(\s*)(\/\/)(.*$)/114 }],115 "#constants": [{116 token: "constant.language.kotlin",117 regex: /\b(?:true|false|null|this|super)\b/118 }, {119 token: "constant.numeric.kotlin",120 regex: /\b(?:0(?:x|X)[0-9a-fA-F]*|(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:(?:e|E)(?:\+|-)?[0-9]+)?)(?:[LlFfUuDd]|UL|ul)?\b/121 }, {122 token: "constant.other.kotlin",123 regex: /\b[A-Z][A-Z0-9_]+\b/124 }],125 "#expressions": [{126 token: "text",127 regex: /\(/,128 push: [{129 token: "text",130 regex: /\)/,131 next: "pop"132 }, {133 include: "#expressions"134 }]135 }, {136 include: "#types"137 }, {138 include: "#strings"139 }, {140 include: "#constants"141 }, {142 include: "#comments"143 }, {144 include: "#keywords"145 }],146 "#functions": [{147 token: "text",148 regex: /(?=\s*fun)/,149 push: [{150 token: "text",151 regex: /}|(?=$)/,152 next: "pop"153 }, {154 token: "keyword.other.kotlin",155 regex: /\bfun\b/,156 push: [{157 token: "text",158 regex: /(?=\()/,159 next: "pop"160 }, {161 token: "text",162 regex: /</,163 push: [{164 token: "text",165 regex: />/,166 next: "pop"167 }, {168 include: "#generics"169 }]170 }, {171 token: ["text", "entity.name.function.kotlin"],172 regex: /((?:[\.<\?>\w]+\.)?)(\w+)/173 }]174 }, {175 token: "text",176 regex: /\(/,177 push: [{178 token: "text",179 regex: /\)/,180 next: "pop"181 }, {182 include: "#parameters"183 }]184 }, {185 token: "keyword.operator.declaration.kotlin",186 regex: /:/,187 push: [{188 token: "text",189 regex: /(?={|=|$)/,190 next: "pop"191 }, {192 include: "#types"193 }]194 }, {195 token: "text",196 regex: /\{/,197 push: [{198 token: "text",199 regex: /(?=\})/,200 next: "pop"201 }, {202 include: "#statements"203 }]204 }, {205 token: "keyword.operator.assignment.kotlin",206 regex: /=/,207 push: [{208 token: "text",209 regex: /(?=$)/,210 next: "pop"211 }, {212 include: "#expressions"213 }]214 }]215 }],216 "#generics": [{217 token: "keyword.operator.declaration.kotlin",218 regex: /:/,219 push: [{220 token: "text",221 regex: /(?=,|>)/,222 next: "pop"223 }, {224 include: "#types"225 }]226 }, {227 include: "#keywords"228 }, {229 token: "storage.type.generic.kotlin",230 regex: /\w+/231 }],232 "#getters-and-setters": [{233 token: ["entity.name.function.kotlin", "text"],234 regex: /\b(get)\b(\s*\(\s*\))/,235 push: [{236 token: "text",237 regex: /\}|(?=\bset\b)|$/,238 next: "pop"239 }, {240 token: "keyword.operator.assignment.kotlin",241 regex: /=/,242 push: [{243 token: "text",244 regex: /(?=$|\bset\b)/,245 next: "pop"246 }, {247 include: "#expressions"248 }]249 }, {250 token: "text",251 regex: /\{/,252 push: [{253 token: "text",254 regex: /\}/,255 next: "pop"256 }, {257 include: "#expressions"258 }]259 }]260 }, {261 token: ["entity.name.function.kotlin", "text"],262 regex: /\b(set)\b(\s*)(?=\()/,263 push: [{264 token: "text",265 regex: /\}|(?=\bget\b)|$/,266 next: "pop"267 }, {268 token: "text",269 regex: /\(/,270 push: [{271 token: "text",272 regex: /\)/,273 next: "pop"274 }, {275 include: "#parameters"276 }]277 }, {278 token: "keyword.operator.assignment.kotlin",279 regex: /=/,280 push: [{281 token: "text",282 regex: /(?=$|\bset\b)/,283 next: "pop"284 }, {285 include: "#expressions"286 }]287 }, {288 token: "text",289 regex: /\{/,290 push: [{291 token: "text",292 regex: /\}/,293 next: "pop"294 }, {295 include: "#expressions"296 }]297 }]298 }],299 "#imports": [{300 token: [301 "text",302 "keyword.other.kotlin",303 "text",304 "keyword.other.kotlin"305 ],306 regex: /^(\s*)(import)(\s+[^ $]+\s+)((?:as)?)/307 }],308 "#keywords": [{309 token: "storage.modifier.kotlin",310 regex: /\b(?:var|val|public|private|protected|abstract|final|enum|open|attribute|annotation|override|inline|var|val|vararg|lazy|in|out|internal|data|tailrec|operator|infix|const|yield|typealias|typeof)\b/311 }, {312 token: "keyword.control.catch-exception.kotlin",313 regex: /\b(?:try|catch|finally|throw)\b/314 }, {315 token: "keyword.control.kotlin",316 regex: /\b(?:if|else|while|for|do|return|when|where|break|continue)\b/317 }, {318 token: "keyword.operator.kotlin",319 regex: /\b(?:in|is|as|assert)\b/320 }, {321 token: "keyword.operator.comparison.kotlin",322 regex: /==|!=|===|!==|<=|>=|<|>/323 }, {324 token: "keyword.operator.assignment.kotlin",325 regex: /=/326 }, {327 token: "keyword.operator.declaration.kotlin",328 regex: /:/329 }, {330 token: "keyword.operator.dot.kotlin",331 regex: /\./332 }, {333 token: "keyword.operator.increment-decrement.kotlin",334 regex: /\-\-|\+\+/335 }, {336 token: "keyword.operator.arithmetic.kotlin",337 regex: /\-|\+|\*|\/|%/338 }, {339 token: "keyword.operator.arithmetic.assign.kotlin",340 regex: /\+=|\-=|\*=|\/=/341 }, {342 token: "keyword.operator.logical.kotlin",343 regex: /!|&&|\|\|/344 }, {345 token: "keyword.operator.range.kotlin",346 regex: /\.\./347 }, {348 token: "punctuation.terminator.kotlin",349 regex: /;/350 }],351 "#namespaces": [{352 token: "keyword.other.kotlin",353 regex: /\bnamespace\b/354 }, {355 token: "text",356 regex: /\{/,357 push: [{358 token: "text",359 regex: /\}/,360 next: "pop"361 }, {362 include: "#statements"363 }]364 }],365 "#parameters": [{366 token: "keyword.operator.declaration.kotlin",367 regex: /:/,368 push: [{369 token: "text",370 regex: /(?=,|\)|=)/,371 next: "pop"372 }, {373 include: "#types"374 }]375 }, {376 token: "keyword.operator.declaration.kotlin",377 regex: /=/,378 push: [{379 token: "text",380 regex: /(?=,|\))/,381 next: "pop"382 }, {383 include: "#expressions"384 }]385 }, {386 include: "#keywords"387 }, {388 token: "variable.parameter.function.kotlin",389 regex: /\w+/390 }],391 "#statements": [{392 include: "#namespaces"393 }, {394 include: "#typedefs"395 }, {396 include: "#classes"397 }, {398 include: "#functions"399 }, {400 include: "#variables"401 }, {402 include: "#getters-and-setters"403 }, {404 include: "#expressions"405 }],406 "#strings": [{407 token: "punctuation.definition.string.begin.kotlin",408 regex: /"""/,409 push: [{410 token: "punctuation.definition.string.end.kotlin",411 regex: /"""/,412 next: "pop"413 }, {414 token: "variable.parameter.template.kotlin",415 regex: /\$\w+|\$\{[^\}]+\}/416 }, {417 token: "constant.character.escape.kotlin",418 regex: /\\./419 }, {420 defaultToken: "string.quoted.third.kotlin"421 }]422 }, {423 token: "punctuation.definition.string.begin.kotlin",424 regex: /"/,425 push: [{426 token: "punctuation.definition.string.end.kotlin",427 regex: /"/,428 next: "pop"429 }, {430 token: "variable.parameter.template.kotlin",431 regex: /\$\w+|\$\{[^\}]+\}/432 }, {433 token: "constant.character.escape.kotlin",434 regex: /\\./435 }, {436 defaultToken: "string.quoted.double.kotlin"437 }]438 }, {439 token: "punctuation.definition.string.begin.kotlin",440 regex: /'/,441 push: [{442 token: "punctuation.definition.string.end.kotlin",443 regex: /'/,444 next: "pop"445 }, {446 token: "constant.character.escape.kotlin",447 regex: /\\./448 }, {449 defaultToken: "string.quoted.single.kotlin"450 }]451 }, {452 token: "punctuation.definition.string.begin.kotlin",453 regex: /`/,454 push: [{455 token: "punctuation.definition.string.end.kotlin",456 regex: /`/,457 next: "pop"458 }, {459 defaultToken: "string.quoted.single.kotlin"460 }]461 }],462 "#typedefs": [{463 token: "text",464 regex: /(?=\s*type)/,465 push: [{466 token: "text",467 regex: /(?=$)/,468 next: "pop"469 }, {470 token: "keyword.other.kotlin",471 regex: /\btype\b/472 }, {473 token: "text",474 regex: /</,475 push: [{476 token: "text",477 regex: />/,478 next: "pop"479 }, {480 include: "#generics"481 }]482 }, {483 include: "#expressions"484 }]485 }],486 "#types": [{487 token: "storage.type.buildin.kotlin",488 regex: /\b(?:Any|Unit|String|Int|Boolean|Char|Long|Double|Float|Short|Byte|dynamic)\b/489 }, {490 token: "storage.type.buildin.array.kotlin",491 regex: /\b(?:IntArray|BooleanArray|CharArray|LongArray|DoubleArray|FloatArray|ShortArray|ByteArray)\b/492 }, {493 token: [494 "storage.type.buildin.collection.kotlin",495 "text"496 ],497 regex: /\b(Array|List|Map)(<\b)/,498 push: [{499 token: "text",500 regex: />/,501 next: "pop"502 }, {503 include: "#types"504 }, {505 include: "#keywords"506 }]507 }, {508 token: "text",509 regex: /\w+</,510 push: [{511 token: "text",512 regex: />/,513 next: "pop"514 }, {515 include: "#types"516 }, {517 include: "#keywords"518 }]519 }, {520 token: ["keyword.operator.tuple.kotlin", "text"],521 regex: /(#)(\()/,522 push: [{523 token: "text",524 regex: /\)/,525 next: "pop"526 }, {527 include: "#expressions"528 }]529 }, {530 token: "text",531 regex: /\{/,532 push: [{533 token: "text",534 regex: /\}/,535 next: "pop"536 }, {537 include: "#statements"538 }]539 }, {540 token: "text",541 regex: /\(/,542 push: [{543 token: "text",544 regex: /\)/,545 next: "pop"546 }, {547 include: "#types"548 }]549 }, {550 token: "keyword.operator.declaration.kotlin",551 regex: /->/552 }],553 "#variables": [{554 token: "text",555 regex: /(?=\s*(?:var|val))/,556 push: [{557 token: "text",558 regex: /(?=:|=|$)/,559 next: "pop"560 }, {561 token: "keyword.other.kotlin",562 regex: /\b(?:var|val)\b/,563 push: [{564 token: "text",565 regex: /(?=:|=|$)/,566 next: "pop"567 }, {568 token: "text",569 regex: /</,570 push: [{571 token: "text",572 regex: />/,573 next: "pop"574 }, {575 include: "#generics"576 }]577 }, {578 token: ["text", "entity.name.variable.kotlin"],579 regex: /((?:[\.<\?>\w]+\.)?)(\w+)/580 }]581 }, {582 token: "keyword.operator.declaration.kotlin",583 regex: /:/,584 push: [{585 token: "text",586 regex: /(?==|$)/,587 next: "pop"588 }, {589 include: "#types"590 }, {591 include: "#getters-and-setters"592 }]593 }, {594 token: "keyword.operator.assignment.kotlin",595 regex: /=/,596 push: [{597 token: "text",598 regex: /(?=$)/,599 next: "pop"600 }, {601 include: "#expressions"602 }, {603 include: "#getters-and-setters"604 }]605 }]606 }]607 }608 609 this.normalizeRules();610};611KotlinHighlightRules.metaData = {612 fileTypes: ["kt", "kts"],613 name: "Kotlin",614 scopeName: "source.Kotlin"615}616oop.inherits(KotlinHighlightRules, TextHighlightRules);617exports.KotlinHighlightRules = KotlinHighlightRules;618});619define("ace/mode/folding/cstyle",["require","exports","module","ace/lib/oop","ace/range","ace/mode/folding/fold_mode"], function(require, exports, module) {620"use strict";621var oop = require("../../lib/oop");622var Range = require("../../range").Range;623var BaseFoldMode = require("./fold_mode").FoldMode;624var FoldMode = exports.FoldMode = function(commentRegex) {625 if (commentRegex) {626 this.foldingStartMarker = new RegExp(627 this.foldingStartMarker.source.replace(/\|[^|]*?$/, "|" + commentRegex.start)628 );629 this.foldingStopMarker = new RegExp(630 this.foldingStopMarker.source.replace(/\|[^|]*?$/, "|" + commentRegex.end)631 );632 }633};634oop.inherits(FoldMode, BaseFoldMode);635(function() {636 637 this.foldingStartMarker = /(\{|\[)[^\}\]]*$|^\s*(\/\*)/;638 this.foldingStopMarker = /^[^\[\{]*(\}|\])|^[\s\*]*(\*\/)/;639 this.singleLineBlockCommentRe= /^\s*(\/\*).*\*\/\s*$/;640 this.tripleStarBlockCommentRe = /^\s*(\/\*\*\*).*\*\/\s*$/;641 this.startRegionRe = /^\s*(\/\*|\/\/)#?region\b/;642 this._getFoldWidgetBase = this.getFoldWidget;643 this.getFoldWidget = function(session, foldStyle, row) {644 var line = session.getLine(row);645 646 if (this.singleLineBlockCommentRe.test(line)) {647 if (!this.startRegionRe.test(line) && !this.tripleStarBlockCommentRe.test(line))648 return "";649 }650 651 var fw = this._getFoldWidgetBase(session, foldStyle, row);652 653 if (!fw && this.startRegionRe.test(line))654 return "start"; // lineCommentRegionStart655 656 return fw;657 };658 this.getFoldWidgetRange = function(session, foldStyle, row, forceMultiline) {659 var line = session.getLine(row);660 661 if (this.startRegionRe.test(line))662 return this.getCommentRegionBlock(session, line, row);663 664 var match = line.match(this.foldingStartMarker);665 if (match) {666 var i = match.index;667 if (match[1])668 return this.openingBracketBlock(session, match[1], row, i);669 670 var range = session.getCommentFoldRange(row, i + match[0].length, 1);671 672 if (range && !range.isMultiLine()) {673 if (forceMultiline) {674 range = this.getSectionRange(session, row);675 } else if (foldStyle != "all")676 range = null;677 }678 679 return range;680 }681 if (foldStyle === "markbegin")682 return;683 var match = line.match(this.foldingStopMarker);684 if (match) {685 var i = match.index + match[0].length;686 if (match[1])687 return this.closingBracketBlock(session, match[1], row, i);688 return session.getCommentFoldRange(row, i, -1);689 }690 };691 692 this.getSectionRange = function(session, row) {693 var line = session.getLine(row);694 var startIndent = line.search(/\S/);695 var startRow = row;696 var startColumn = line.length;697 row = row + 1;698 var endRow = row;699 var maxRow = session.getLength();700 while (++row < maxRow) {701 line = session.getLine(row);702 var indent = line.search(/\S/);703 if (indent === -1)704 continue;705 if (startIndent > indent)706 break;707 var subRange = this.getFoldWidgetRange(session, "all", row);708 709 if (subRange) {710 if (subRange.start.row <= startRow) {711 break;712 } else if (subRange.isMultiLine()) {713 row = subRange.end.row;714 } else if (startIndent == indent) {715 break;716 }717 }718 endRow = row;719 }720 721 return new Range(startRow, startColumn, endRow, session.getLine(endRow).length);722 };723 this.getCommentRegionBlock = function(session, line, row) {724 var startColumn = line.search(/\s*$/);725 var maxRow = session.getLength();726 var startRow = row;727 728 var re = /^\s*(?:\/\*|\/\/|--)#?(end)?region\b/;729 var depth = 1;730 while (++row < maxRow) {731 line = session.getLine(row);732 var m = re.exec(line);733 if (!m) continue;734 if (m[1]) depth--;735 else depth++;736 if (!depth) break;737 }738 var endRow = row;739 if (endRow > startRow) {740 return new Range(startRow, startColumn, endRow, line.length);741 }742 };743}).call(FoldMode.prototype);744});745define("ace/mode/kotlin",["require","exports","module","ace/lib/oop","ace/mode/text","ace/mode/kotlin_highlight_rules","ace/mode/folding/cstyle"], function(require, exports, module) {746"use strict";747var oop = require("../lib/oop");748var TextMode = require("./text").Mode;749var KotlinHighlightRules = require("./kotlin_highlight_rules").KotlinHighlightRules;750var FoldMode = require("./folding/cstyle").FoldMode;751var Mode = function() {752 this.HighlightRules = KotlinHighlightRules;753 this.foldingRules = new FoldMode();754};755oop.inherits(Mode, TextMode);756(function() {757 this.$id = "ace/mode/kotlin"758}).call(Mode.prototype);759exports.Mode = Mode;...

Full Screen

Full Screen

kotlin_highlight_rules.js

Source:kotlin_highlight_rules.js Github

copy

Full Screen

1/* ***** BEGIN LICENSE BLOCK *****2 * Distributed under the BSD license:3 *4 * Copyright (c) 2012, Ajax.org B.V.5 * All rights reserved.6 *7 * Redistribution and use in source and binary forms, with or without8 * modification, are permitted provided that the following conditions are met:9 * * Redistributions of source code must retain the above copyright10 * notice, this list of conditions and the following disclaimer.11 * * Redistributions in binary form must reproduce the above copyright12 * notice, this list of conditions and the following disclaimer in the13 * documentation and/or other materials provided with the distribution.14 * * Neither the name of Ajax.org B.V. nor the15 * names of its contributors may be used to endorse or promote products16 * derived from this software without specific prior written permission.17 *18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE21 * DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.28 *29 * ***** END LICENSE BLOCK ***** */30/* This file was autogenerated from Kotlin.tmLanguage (uuid: ) */31/****************************************************************************************32 * IT MIGHT NOT BE PERFECT ...But it's a good start from an existing *.tmlanguage file. *33 * fileTypes *34 ****************************************************************************************/35define(function(require, exports, module) {36"use strict";37var oop = require("../lib/oop");38var TextHighlightRules = require("./text_highlight_rules").TextHighlightRules;39var KotlinHighlightRules = function() {40 // regexp must not have capturing parentheses. Use (?:) instead.41 // regexps are ordered -> the first match is used42 this.$rules = {43 start: [{44 include: "#comments"45 }, {46 token: [47 "text",48 "keyword.other.kotlin",49 "text",50 "entity.name.package.kotlin",51 "text"52 ],53 regex: /^(\s*)(package)\b(?:(\s*)([^ ;$]+)(\s*))?/54 }, {55 include: "#imports"56 }, {57 include: "#statements"58 }],59 "#classes": [{60 token: "text",61 regex: /(?=\s*(?:companion|class|object|interface))/,62 push: [{63 token: "text",64 regex: /}|(?=$)/,65 next: "pop"66 }, {67 token: ["keyword.other.kotlin", "text"],68 regex: /\b((?:companion\s*)?)(class|object|interface)\b/,69 push: [{70 token: "text",71 regex: /(?=<|{|\(|:)/,72 next: "pop"73 }, {74 token: "keyword.other.kotlin",75 regex: /\bobject\b/76 }, {77 token: "entity.name.type.class.kotlin",78 regex: /\w+/79 }]80 }, {81 token: "text",82 regex: /</,83 push: [{84 token: "text",85 regex: />/,86 next: "pop"87 }, {88 include: "#generics"89 }]90 }, {91 token: "text",92 regex: /\(/,93 push: [{94 token: "text",95 regex: /\)/,96 next: "pop"97 }, {98 include: "#parameters"99 }]100 }, {101 token: "keyword.operator.declaration.kotlin",102 regex: /:/,103 push: [{104 token: "text",105 regex: /(?={|$)/,106 next: "pop"107 }, {108 token: "entity.other.inherited-class.kotlin",109 regex: /\w+/110 }, {111 token: "text",112 regex: /\(/,113 push: [{114 token: "text",115 regex: /\)/,116 next: "pop"117 }, {118 include: "#expressions"119 }]120 }]121 }, {122 token: "text",123 regex: /\{/,124 push: [{125 token: "text",126 regex: /\}/,127 next: "pop"128 }, {129 include: "#statements"130 }]131 }]132 }],133 "#comments": [{134 token: "punctuation.definition.comment.kotlin",135 regex: /\/\*/,136 push: [{137 token: "punctuation.definition.comment.kotlin",138 regex: /\*\//,139 next: "pop"140 }, {141 defaultToken: "comment.block.kotlin"142 }]143 }, {144 token: [145 "text",146 "punctuation.definition.comment.kotlin",147 "comment.line.double-slash.kotlin"148 ],149 regex: /(\s*)(\/\/)(.*$)/150 }],151 "#constants": [{152 token: "constant.language.kotlin",153 regex: /\b(?:true|false|null|this|super)\b/154 }, {155 token: "constant.numeric.kotlin",156 regex: /\b(?:0(?:x|X)[0-9a-fA-F]*|(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:(?:e|E)(?:\+|-)?[0-9]+)?)(?:[LlFfUuDd]|UL|ul)?\b/157 }, {158 token: "constant.other.kotlin",159 regex: /\b[A-Z][A-Z0-9_]+\b/160 }],161 "#expressions": [{162 token: "text",163 regex: /\(/,164 push: [{165 token: "text",166 regex: /\)/,167 next: "pop"168 }, {169 include: "#expressions"170 }]171 }, {172 include: "#types"173 }, {174 include: "#strings"175 }, {176 include: "#constants"177 }, {178 include: "#comments"179 }, {180 include: "#keywords"181 }],182 "#functions": [{183 token: "text",184 regex: /(?=\s*fun)/,185 push: [{186 token: "text",187 regex: /}|(?=$)/,188 next: "pop"189 }, {190 token: "keyword.other.kotlin",191 regex: /\bfun\b/,192 push: [{193 token: "text",194 regex: /(?=\()/,195 next: "pop"196 }, {197 token: "text",198 regex: /</,199 push: [{200 token: "text",201 regex: />/,202 next: "pop"203 }, {204 include: "#generics"205 }]206 }, {207 token: ["text", "entity.name.function.kotlin"],208 regex: /((?:[\.<\?>\w]+\.)?)(\w+)/209 }]210 }, {211 token: "text",212 regex: /\(/,213 push: [{214 token: "text",215 regex: /\)/,216 next: "pop"217 }, {218 include: "#parameters"219 }]220 }, {221 token: "keyword.operator.declaration.kotlin",222 regex: /:/,223 push: [{224 token: "text",225 regex: /(?={|=|$)/,226 next: "pop"227 }, {228 include: "#types"229 }]230 }, {231 token: "text",232 regex: /\{/,233 push: [{234 token: "text",235 regex: /(?=\})/,236 next: "pop"237 }, {238 include: "#statements"239 }]240 }, {241 token: "keyword.operator.assignment.kotlin",242 regex: /=/,243 push: [{244 token: "text",245 regex: /(?=$)/,246 next: "pop"247 }, {248 include: "#expressions"249 }]250 }]251 }],252 "#generics": [{253 token: "keyword.operator.declaration.kotlin",254 regex: /:/,255 push: [{256 token: "text",257 regex: /(?=,|>)/,258 next: "pop"259 }, {260 include: "#types"261 }]262 }, {263 include: "#keywords"264 }, {265 token: "storage.type.generic.kotlin",266 regex: /\w+/267 }],268 "#getters-and-setters": [{269 token: ["entity.name.function.kotlin", "text"],270 regex: /\b(get)\b(\s*\(\s*\))/,271 push: [{272 token: "text",273 regex: /\}|(?=\bset\b)|$/,274 next: "pop"275 }, {276 token: "keyword.operator.assignment.kotlin",277 regex: /=/,278 push: [{279 token: "text",280 regex: /(?=$|\bset\b)/,281 next: "pop"282 }, {283 include: "#expressions"284 }]285 }, {286 token: "text",287 regex: /\{/,288 push: [{289 token: "text",290 regex: /\}/,291 next: "pop"292 }, {293 include: "#expressions"294 }]295 }]296 }, {297 token: ["entity.name.function.kotlin", "text"],298 regex: /\b(set)\b(\s*)(?=\()/,299 push: [{300 token: "text",301 regex: /\}|(?=\bget\b)|$/,302 next: "pop"303 }, {304 token: "text",305 regex: /\(/,306 push: [{307 token: "text",308 regex: /\)/,309 next: "pop"310 }, {311 include: "#parameters"312 }]313 }, {314 token: "keyword.operator.assignment.kotlin",315 regex: /=/,316 push: [{317 token: "text",318 regex: /(?=$|\bset\b)/,319 next: "pop"320 }, {321 include: "#expressions"322 }]323 }, {324 token: "text",325 regex: /\{/,326 push: [{327 token: "text",328 regex: /\}/,329 next: "pop"330 }, {331 include: "#expressions"332 }]333 }]334 }],335 "#imports": [{336 token: [337 "text",338 "keyword.other.kotlin",339 "text",340 "keyword.other.kotlin"341 ],342 regex: /^(\s*)(import)(\s+[^ $]+\s+)((?:as)?)/343 }],344 "#keywords": [{345 token: "storage.modifier.kotlin",346 regex: /\b(?:var|val|public|private|protected|abstract|final|enum|open|attribute|annotation|override|inline|var|val|vararg|lazy|in|out|internal|data|tailrec|operator|infix|const|yield|typealias|typeof)\b/347 }, {348 token: "keyword.control.catch-exception.kotlin",349 regex: /\b(?:try|catch|finally|throw)\b/350 }, {351 token: "keyword.control.kotlin",352 regex: /\b(?:if|else|while|for|do|return|when|where|break|continue)\b/353 }, {354 token: "keyword.operator.kotlin",355 regex: /\b(?:in|is|as|assert)\b/356 }, {357 token: "keyword.operator.comparison.kotlin",358 regex: /==|!=|===|!==|<=|>=|<|>/359 }, {360 token: "keyword.operator.assignment.kotlin",361 regex: /=/362 }, {363 token: "keyword.operator.declaration.kotlin",364 regex: /:/365 }, {366 token: "keyword.operator.dot.kotlin",367 regex: /\./368 }, {369 token: "keyword.operator.increment-decrement.kotlin",370 regex: /\-\-|\+\+/371 }, {372 token: "keyword.operator.arithmetic.kotlin",373 regex: /\-|\+|\*|\/|%/374 }, {375 token: "keyword.operator.arithmetic.assign.kotlin",376 regex: /\+=|\-=|\*=|\/=/377 }, {378 token: "keyword.operator.logical.kotlin",379 regex: /!|&&|\|\|/380 }, {381 token: "keyword.operator.range.kotlin",382 regex: /\.\./383 }, {384 token: "punctuation.terminator.kotlin",385 regex: /;/386 }],387 "#namespaces": [{388 token: "keyword.other.kotlin",389