Skip to content

Commit d5ba4a1

Browse files
committed
httputil: Fix quadratic performance of cookie parsing
Maliciously-crafted cookies can cause Tornado to spend an unreasonable amount of CPU time and block the event loop. This change replaces the quadratic algorithm with a more efficient one. The implementation is copied from the Python 3.13 standard library (the previous one was from Python 3.5). Fixes CVE-2024-52804 See CVE-2024-7592 for a similar vulnerability in cpython. Thanks to github.com/kexinoh for the report.
1 parent 2a0e1d1 commit d5ba4a1

File tree

2 files changed

+56
-28
lines changed

2 files changed

+56
-28
lines changed

tornado/httputil.py

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,15 +1057,20 @@ def qs_to_qsl(qs: Dict[str, List[AnyStr]]) -> Iterable[Tuple[str, AnyStr]]:
10571057
yield (k, v)
10581058

10591059

1060-
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
1061-
_QuotePatt = re.compile(r"[\\].")
1062-
_nulljoin = "".join
1060+
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
1061+
1062+
1063+
def _unquote_replace(m: re.Match) -> str:
1064+
if m[1]:
1065+
return chr(int(m[1], 8))
1066+
else:
1067+
return m[2]
10631068

10641069

10651070
def _unquote_cookie(s: str) -> str:
10661071
"""Handle double quotes and escaping in cookie values.
10671072
1068-
This method is copied verbatim from the Python 3.5 standard
1073+
This method is copied verbatim from the Python 3.13 standard
10691074
library (http.cookies._unquote) so we don't have to depend on
10701075
non-public interfaces.
10711076
"""
@@ -1086,30 +1091,7 @@ def _unquote_cookie(s: str) -> str:
10861091
# \012 --> \n
10871092
# \" --> "
10881093
#
1089-
i = 0
1090-
n = len(s)
1091-
res = []
1092-
while 0 <= i < n:
1093-
o_match = _OctalPatt.search(s, i)
1094-
q_match = _QuotePatt.search(s, i)
1095-
if not o_match and not q_match: # Neither matched
1096-
res.append(s[i:])
1097-
break
1098-
# else:
1099-
j = k = -1
1100-
if o_match:
1101-
j = o_match.start(0)
1102-
if q_match:
1103-
k = q_match.start(0)
1104-
if q_match and (not o_match or k < j): # QuotePatt matched
1105-
res.append(s[i:k])
1106-
res.append(s[k + 1])
1107-
i = k + 2
1108-
else: # OctalPatt matched
1109-
res.append(s[i:j])
1110-
res.append(chr(int(s[j + 1 : j + 4], 8)))
1111-
i = j + 4
1112-
return _nulljoin(res)
1094+
return _unquote_sub(_unquote_replace, s)
11131095

11141096

11151097
def parse_cookie(cookie: str) -> Dict[str, str]:

tornado/test/httputil_test.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,3 +560,49 @@ def test_invalid_cookies(self):
560560
self.assertEqual(
561561
parse_cookie(" = b ; ; = ; c = ; "), {"": "b", "c": ""}
562562
)
563+
564+
def test_unquote(self):
565+
# Copied from
566+
# https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L62
567+
cases = [
568+
(r'a="b=\""', 'b="'),
569+
(r'a="b=\\"', "b=\\"),
570+
(r'a="b=\="', "b=="),
571+
(r'a="b=\n"', "b=n"),
572+
(r'a="b=\042"', 'b="'),
573+
(r'a="b=\134"', "b=\\"),
574+
(r'a="b=\377"', "b=\xff"),
575+
(r'a="b=\400"', "b=400"),
576+
(r'a="b=\42"', "b=42"),
577+
(r'a="b=\\042"', "b=\\042"),
578+
(r'a="b=\\134"', "b=\\134"),
579+
(r'a="b=\\\""', 'b=\\"'),
580+
(r'a="b=\\\042"', 'b=\\"'),
581+
(r'a="b=\134\""', 'b=\\"'),
582+
(r'a="b=\134\042"', 'b=\\"'),
583+
]
584+
for encoded, decoded in cases:
585+
with self.subTest(encoded):
586+
c = parse_cookie(encoded)
587+
self.assertEqual(c["a"], decoded)
588+
589+
def test_unquote_large(self):
590+
# Adapted from
591+
# https://github.com/python/cpython/blob/dc7a2b6522ec7af41282bc34f405bee9b306d611/Lib/test/test_http_cookies.py#L87
592+
# Modified from that test because we handle semicolons differently from the stdlib.
593+
#
594+
# This is a performance regression test: prior to improvements in Tornado 6.4.2, this test
595+
# would take over a minute with n= 100k. Now it runs in tens of milliseconds.
596+
n = 100000
597+
for encoded in r"\\", r"\134":
598+
with self.subTest(encoded):
599+
start = time.time()
600+
data = 'a="b=' + encoded * n + '"'
601+
value = parse_cookie(data)["a"]
602+
end = time.time()
603+
self.assertEqual(value[:3], "b=\\")
604+
self.assertEqual(value[-3:], "\\\\\\")
605+
self.assertEqual(len(value), n + 2)
606+
607+
# Very loose performance check to avoid false positives
608+
self.assertLess(end - start, 1, "Test took too long")

0 commit comments

Comments
 (0)