Skip to content

Commit 6e3eee5

Browse files
author
Ma Lin
authored
bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)
1 parent b82cdd1 commit 6e3eee5

File tree

9 files changed

+151
-71
lines changed

9 files changed

+151
-71
lines changed

Lib/re/_compiler.py

+39-22
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,21 @@
6767
_ignorecase_fixes = {i: tuple(j for j in t if i != j)
6868
for t in _equivalences for i in t}
6969

70+
class _CompileData:
71+
__slots__ = ('code', 'repeat_count')
72+
def __init__(self):
73+
self.code = []
74+
self.repeat_count = 0
75+
7076
def _combine_flags(flags, add_flags, del_flags,
7177
TYPE_FLAGS=_parser.TYPE_FLAGS):
7278
if add_flags & TYPE_FLAGS:
7379
flags &= ~TYPE_FLAGS
7480
return (flags | add_flags) & ~del_flags
7581

76-
def _compile(code, pattern, flags):
82+
def _compile(data, pattern, flags):
7783
# internal: compile a (sub)pattern
84+
code = data.code
7885
emit = code.append
7986
_len = len
8087
LITERAL_CODES = _LITERAL_CODES
@@ -147,15 +154,19 @@ def _compile(code, pattern, flags):
147154
skip = _len(code); emit(0)
148155
emit(av[0])
149156
emit(av[1])
150-
_compile(code, av[2], flags)
157+
_compile(data, av[2], flags)
151158
emit(SUCCESS)
152159
code[skip] = _len(code) - skip
153160
else:
154161
emit(REPEATING_CODES[op][0])
155162
skip = _len(code); emit(0)
156163
emit(av[0])
157164
emit(av[1])
158-
_compile(code, av[2], flags)
165+
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
166+
if op != POSSESSIVE_REPEAT:
167+
emit(data.repeat_count)
168+
data.repeat_count += 1
169+
_compile(data, av[2], flags)
159170
code[skip] = _len(code) - skip
160171
emit(REPEATING_CODES[op][1])
161172
elif op is SUBPATTERN:
@@ -164,7 +175,7 @@ def _compile(code, pattern, flags):
164175
emit(MARK)
165176
emit((group-1)*2)
166177
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
167-
_compile(code, p, _combine_flags(flags, add_flags, del_flags))
178+
_compile(data, p, _combine_flags(flags, add_flags, del_flags))
168179
if group:
169180
emit(MARK)
170181
emit((group-1)*2+1)
@@ -176,7 +187,7 @@ def _compile(code, pattern, flags):
176187
# pop their stack if they reach it
177188
emit(ATOMIC_GROUP)
178189
skip = _len(code); emit(0)
179-
_compile(code, av, flags)
190+
_compile(data, av, flags)
180191
emit(SUCCESS)
181192
code[skip] = _len(code) - skip
182193
elif op in SUCCESS_CODES:
@@ -191,13 +202,13 @@ def _compile(code, pattern, flags):
191202
if lo != hi:
192203
raise error("look-behind requires fixed-width pattern")
193204
emit(lo) # look behind
194-
_compile(code, av[1], flags)
205+
_compile(data, av[1], flags)
195206
emit(SUCCESS)
196207
code[skip] = _len(code) - skip
197208
elif op is CALL:
198209
emit(op)
199210
skip = _len(code); emit(0)
200-
_compile(code, av, flags)
211+
_compile(data, av, flags)
201212
emit(SUCCESS)
202213
code[skip] = _len(code) - skip
203214
elif op is AT:
@@ -216,7 +227,7 @@ def _compile(code, pattern, flags):
216227
for av in av[1]:
217228
skip = _len(code); emit(0)
218229
# _compile_info(code, av, flags)
219-
_compile(code, av, flags)
230+
_compile(data, av, flags)
220231
emit(JUMP)
221232
tailappend(_len(code)); emit(0)
222233
code[skip] = _len(code) - skip
@@ -244,12 +255,12 @@ def _compile(code, pattern, flags):
244255
emit(op)
245256
emit(av[0]-1)
246257
skipyes = _len(code); emit(0)
247-
_compile(code, av[1], flags)
258+
_compile(data, av[1], flags)
248259
if av[2]:
249260
emit(JUMP)
250261
skipno = _len(code); emit(0)
251262
code[skipyes] = _len(code) - skipyes + 1
252-
_compile(code, av[2], flags)
263+
_compile(data, av[2], flags)
253264
code[skipno] = _len(code) - skipno
254265
else:
255266
code[skipyes] = _len(code) - skipyes + 1
@@ -608,17 +619,17 @@ def isstring(obj):
608619
def _code(p, flags):
609620

610621
flags = p.state.flags | flags
611-
code = []
622+
data = _CompileData()
612623

613624
# compile info block
614-
_compile_info(code, p, flags)
625+
_compile_info(data.code, p, flags)
615626

616627
# compile the pattern
617-
_compile(code, p.data, flags)
628+
_compile(data, p.data, flags)
618629

619-
code.append(SUCCESS)
630+
data.code.append(SUCCESS)
620631

621-
return code
632+
return data
622633

623634
def _hex_code(code):
624635
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@@ -719,14 +730,21 @@ def print_2(*args):
719730
else:
720731
print_(FAILURE)
721732
i += 1
722-
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
733+
elif op in (REPEAT_ONE, MIN_REPEAT_ONE,
723734
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
724735
skip, min, max = code[i: i+3]
725736
if max == MAXREPEAT:
726737
max = 'MAXREPEAT'
727738
print_(op, skip, min, max, to=i+skip)
728739
dis_(i+3, i+skip)
729740
i += skip
741+
elif op is REPEAT:
742+
skip, min, max, repeat_index = code[i: i+4]
743+
if max == MAXREPEAT:
744+
max = 'MAXREPEAT'
745+
print_(op, skip, min, max, repeat_index, to=i+skip)
746+
dis_(i+4, i+skip)
747+
i += skip
730748
elif op is GROUPREF_EXISTS:
731749
arg, skip = code[i: i+2]
732750
print_(op, arg, skip, to=i+skip)
@@ -781,11 +799,11 @@ def compile(p, flags=0):
781799
else:
782800
pattern = None
783801

784-
code = _code(p, flags)
802+
data = _code(p, flags)
785803

786804
if flags & SRE_FLAG_DEBUG:
787805
print()
788-
dis(code)
806+
dis(data.code)
789807

790808
# map in either direction
791809
groupindex = p.state.groupdict
@@ -794,7 +812,6 @@ def compile(p, flags=0):
794812
indexgroup[i] = k
795813

796814
return _sre.compile(
797-
pattern, flags | p.state.flags, code,
798-
p.state.groups-1,
799-
groupindex, tuple(indexgroup)
800-
)
815+
pattern, flags | p.state.flags, data.code,
816+
p.state.groups-1, groupindex, tuple(indexgroup),
817+
data.repeat_count)

Lib/re/_constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# update when constants are added or removed
1515

16-
MAGIC = 20220318
16+
MAGIC = 20220402
1717

1818
from _sre import MAXREPEAT, MAXGROUPS
1919

Lib/test/test_re.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -1643,9 +1643,12 @@ def test_dealloc(self):
16431643
long_overflow = 2**128
16441644
self.assertRaises(TypeError, re.finditer, "a", {})
16451645
with self.assertRaises(OverflowError):
1646-
_sre.compile("abc", 0, [long_overflow], 0, {}, ())
1646+
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0)
16471647
with self.assertRaises(TypeError):
1648-
_sre.compile({}, 0, [], 0, [], [])
1648+
_sre.compile({}, 0, [], 0, [], [], 0)
1649+
with self.assertRaises(RuntimeError):
1650+
# invalid repeat_count -1
1651+
_sre.compile("abc", 0, [1], 0, {}, (), -1)
16491652

16501653
def test_search_dot_unicode(self):
16511654
self.assertTrue(re.search("123.*-", '123abc-'))
@@ -2334,6 +2337,27 @@ def test_possesive_repeat(self):
23342337
14. SUCCESS
23352338
''')
23362339

2340+
def test_repeat_index(self):
2341+
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
2342+
MIN_REPEAT 0 MAXREPEAT
2343+
LITERAL 97
2344+
LITERAL 98
2345+
MAX_REPEAT 0 MAXREPEAT
2346+
LITERAL 99
2347+
LITERAL 100
2348+
2349+
0. INFO 4 0b0 0 MAXREPEAT (to 5)
2350+
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
2351+
10. LITERAL 0x61 ('a')
2352+
12. LITERAL 0x62 ('b')
2353+
14: MIN_UNTIL
2354+
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
2355+
20. LITERAL 0x63 ('c')
2356+
22. LITERAL 0x64 ('d')
2357+
24: MAX_UNTIL
2358+
25. SUCCESS
2359+
''')
2360+
23372361

23382362
class PatternReprTests(unittest.TestCase):
23392363
def check(self, pattern, expected):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`re` module: fix memory leak when a match is terminated by a signal or
2+
memory allocation failure. Patch by Ma Lin.

0 commit comments

Comments
 (0)