Skip to content

Commit 7ebbd27

Browse files
gh-130631: Make join_header_words() more similar to the original Perl version (GH-130632)
* Always quote strings with non-ASCII characters. * Allow some non-separator and non-control characters (like "." or "-") be unquoted. * Always quote strings that end with "\n". * Use the fullmatch() method for clarity and optimization.
1 parent 16dcb57 commit 7ebbd27

File tree

3 files changed

+23
-9
lines changed

3 files changed

+23
-9
lines changed

Lib/http/cookiejar.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -430,25 +430,26 @@ def split_header_words(header_values):
430430
if pairs: result.append(pairs)
431431
return result
432432

433+
HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
433434
HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
434435
def join_header_words(lists):
435436
"""Do the inverse (almost) of the conversion done by split_header_words.
436437
437438
Takes a list of lists of (key, value) pairs and produces a single header
438439
value. Attribute values are quoted if needed.
439440
440-
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
441-
'text/plain; charset="iso-8859-1"'
442-
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
443-
'text/plain, charset="iso-8859-1"'
441+
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
442+
'text/plain; charset="iso-8859/1"'
443+
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
444+
'text/plain, charset="iso-8859/1"'
444445
445446
"""
446447
headers = []
447448
for pairs in lists:
448449
attr = []
449450
for k, v in pairs:
450451
if v is not None:
451-
if not re.search(r"^\w+$", v):
452+
if not HEADER_JOIN_TOKEN_RE.fullmatch(v):
452453
v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
453454
v = '"%s"' % v
454455
k = "%s=%s" % (k, v)

Lib/test/test_http_cookiejar.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,20 +285,30 @@ def test_roundtrip(self):
285285
("foo=bar;bar=baz", "foo=bar; bar=baz"),
286286
('foo bar baz', "foo; bar; baz"),
287287
(r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'),
288+
("föo=bär", 'föo="bär"'),
288289
('foo,,,bar', 'foo, bar'),
289290
('foo=bar,bar=baz', 'foo=bar, bar=baz'),
291+
("foo=\n", 'foo=""'),
292+
('foo="\n"', 'foo="\n"'),
293+
('foo=bar\n', 'foo=bar'),
294+
('foo="bar\n"', 'foo="bar\n"'),
295+
('foo=bar\nbaz', 'foo=bar; baz'),
296+
('foo="bar\nbaz"', 'foo="bar\nbaz"'),
290297

291298
('text/html; charset=iso-8859-1',
292-
'text/html; charset="iso-8859-1"'),
299+
'text/html; charset=iso-8859-1'),
300+
301+
('text/html; charset="iso-8859/1"',
302+
'text/html; charset="iso-8859/1"'),
293303

294304
('foo="bar"; port="80,81"; discard, bar=baz',
295305
'foo=bar; port="80,81"; discard, bar=baz'),
296306

297307
(r'Basic realm="\"foo\\\\bar\""',
298308
r'Basic; realm="\"foo\\\\bar\""'),
299309

300-
('n; foo="foo;_", bar=foo!_',
301-
'n; foo="foo;_", bar="foo!_"'),
310+
('n; foo="foo;_", bar="foo,_"',
311+
'n; foo="foo;_", bar="foo,_"'),
302312
]
303313

304314
for arg, expect in tests:
@@ -553,7 +563,7 @@ def test_missing_value(self):
553563
self.assertIsNone(cookie.value)
554564
self.assertEqual(cookie.name, '"spam"')
555565
self.assertEqual(lwp_cookie_str(cookie), (
556-
r'"spam"; path="/foo/"; domain="www.acme.com"; '
566+
r'"spam"; path="/foo/"; domain=www.acme.com; '
557567
'path_spec; discard; version=0'))
558568
old_str = repr(c)
559569
c.save(ignore_expires=True, ignore_discard=True)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`!http.cookiejar.join_header_words` is now more similar to the original
2+
Perl version. It now quotes the same set of characters and always quote
3+
values that end with ``"\n"``.

0 commit comments

Comments
 (0)