Skip to content

Commit 8e97bd7

Browse files
[3.13] gh-130631: Make join_header_words() more similar to the original Perl version (GH-130632) (GH-132303)
* Always quote strings with non-ASCII characters. * Allow some non-separator and non-control characters (like "." or "-") be unquoted. * Always quote strings that end with "\n". * Use the fullmatch() method for clarity and optimization. (cherry picked from commit 7ebbd27)
1 parent 099b301 commit 8e97bd7

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

Lib/http/cookiejar.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -430,25 +430,26 @@ def split_header_words(header_values):
430430
if pairs: result.append(pairs)
431431
return result
432432

433+
HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
433434
HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
434435
def join_header_words(lists):
435436
"""Do the inverse (almost) of the conversion done by split_header_words.
436437
437438
Takes a list of lists of (key, value) pairs and produces a single header
438439
value. Attribute values are quoted if needed.
439440
440-
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
441-
'text/plain; charset="iso-8859-1"'
442-
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
443-
'text/plain, charset="iso-8859-1"'
441+
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
442+
'text/plain; charset="iso-8859/1"'
443+
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
444+
'text/plain, charset="iso-8859/1"'
444445
445446
"""
446447
headers = []
447448
for pairs in lists:
448449
attr = []
449450
for k, v in pairs:
450451
if v is not None:
451-
if not re.search(r"^\w+$", v):
452+
if not HEADER_JOIN_TOKEN_RE.fullmatch(v):
452453
v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
453454
v = '"%s"' % v
454455
k = "%s=%s" % (k, v)

Lib/test/test_http_cookiejar.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,17 +276,30 @@ def test_roundtrip(self):
276276
("foo=bar;bar=baz", "foo=bar; bar=baz"),
277277
('foo bar baz', "foo; bar; baz"),
278278
(r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'),
279+
("föo=bär", 'föo="bär"'),
279280
('foo,,,bar', 'foo, bar'),
280281
('foo=bar,bar=baz', 'foo=bar, bar=baz'),
282+
("foo=\n", 'foo=""'),
283+
('foo="\n"', 'foo="\n"'),
284+
('foo=bar\n', 'foo=bar'),
285+
('foo="bar\n"', 'foo="bar\n"'),
286+
('foo=bar\nbaz', 'foo=bar; baz'),
287+
('foo="bar\nbaz"', 'foo="bar\nbaz"'),
281288

282289
('text/html; charset=iso-8859-1',
283-
'text/html; charset="iso-8859-1"'),
290+
'text/html; charset=iso-8859-1'),
291+
292+
('text/html; charset="iso-8859/1"',
293+
'text/html; charset="iso-8859/1"'),
284294

285295
('foo="bar"; port="80,81"; discard, bar=baz',
286296
'foo=bar; port="80,81"; discard, bar=baz'),
287297

288298
(r'Basic realm="\"foo\\\\bar\""',
289-
r'Basic; realm="\"foo\\\\bar\""')
299+
r'Basic; realm="\"foo\\\\bar\""'),
300+
301+
('n; foo="foo;_", bar="foo,_"',
302+
'n; foo="foo;_", bar="foo,_"'),
290303
]
291304

292305
for arg, expect in tests:
@@ -541,7 +554,7 @@ def test_missing_value(self):
541554
self.assertIsNone(cookie.value)
542555
self.assertEqual(cookie.name, '"spam"')
543556
self.assertEqual(lwp_cookie_str(cookie), (
544-
r'"spam"; path="/foo/"; domain="www.acme.com"; '
557+
r'"spam"; path="/foo/"; domain=www.acme.com; '
545558
'path_spec; discard; version=0'))
546559
old_str = repr(c)
547560
c.save(ignore_expires=True, ignore_discard=True)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`!http.cookiejar.join_header_words` is now more similar to the original
2+
Perl version. It now quotes the same set of characters and always quote
3+
values that end with ``"\n"``.

0 commit comments

Comments
 (0)