Skip to content

[3.11] gh-106052: Fix bug in the matching of possessive quantifiers (gh-106515) #107795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/re/_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ def _compile(code, pattern, flags):
emit(ANY_ALL)
else:
emit(ANY)
elif op is POSSESSIVE_REPEAT:
# gh-106052: Possessive quantifiers do not work when the
# subpattern contains backtracking, i.e. "(?:ab?c)*+".
# Implement it as equivalent greedy qualifier in atomic group.
p = [(MAX_REPEAT, av)]
p = [(ATOMIC_GROUP, p)]
_compile(code, p, flags)
elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error("internal: unsupported template operator %r" % (op,))
Expand Down
12 changes: 12 additions & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2396,6 +2396,16 @@ def test_template_function_and_flag_is_deprecated(self):
self.assertTrue(template_re1.match('ahoy'))
self.assertFalse(template_re1.match('nope'))

def test_bug_gh106052(self):
self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c)*+", "aca").span(), (0, 2))
self.assertEqual(re.match("(?>(?:ab?c)?)", "a").span(), (0, 0))
self.assertEqual(re.match("(?:ab?c)?+", "a").span(), (0, 0))
self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2))
self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2))

@unittest.skipIf(multiprocessing is None, 'test requires multiprocessing')
def test_regression_gh94675(self):
pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*'
Expand Down Expand Up @@ -2492,6 +2502,7 @@ def test_atomic_group(self):
17: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat_one(self):
self.assertEqual(get_debug_out(r'a?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand All @@ -2504,6 +2515,7 @@ def test_possesive_repeat_one(self):
12: SUCCESS
''')

@unittest.expectedFailure # gh-106052
def test_possesive_repeat(self):
self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
POSSESSIVE_REPEAT 0 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`re` module: fix the matching of possessive quantifiers in the case of
a subpattern containing backtracking.