Skip to content

Commit 41597ad

Browse files
authored
Move remaining utility functions from _utils.py to _models.py (#3387)
1 parent 6212e8f commit 41597ad

File tree

5 files changed

+118
-124
lines changed

5 files changed

+118
-124
lines changed

httpx/_models.py

+74-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

3+
import codecs
34
import datetime
45
import email.message
56
import json as jsonlib
7+
import re
68
import typing
79
import urllib.request
810
from collections.abc import Mapping
@@ -44,15 +46,23 @@
4446
SyncByteStream,
4547
)
4648
from ._urls import URL
47-
from ._utils import (
48-
is_known_encoding,
49-
obfuscate_sensitive_headers,
50-
parse_content_type_charset,
51-
parse_header_links,
52-
)
49+
from ._utils import to_bytes_or_str, to_str
5350

5451
__all__ = ["Cookies", "Headers", "Request", "Response"]
5552

53+
SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}
54+
55+
56+
def _is_known_encoding(encoding: str) -> bool:
57+
"""
58+
Return `True` if `encoding` is a known codec.
59+
"""
60+
try:
61+
codecs.lookup(encoding)
62+
except LookupError:
63+
return False
64+
return True
65+
5666

5767
def _normalize_header_key(key: str | bytes, encoding: str | None = None) -> bytes:
5868
"""
@@ -72,6 +82,60 @@ def _normalize_header_value(value: str | bytes, encoding: str | None = None) ->
7282
return value.encode(encoding or "ascii")
7383

7484

85+
def _parse_content_type_charset(content_type: str) -> str | None:
86+
# We used to use `cgi.parse_header()` here, but `cgi` became a dead battery.
87+
# See: https://peps.python.org/pep-0594/#cgi
88+
msg = email.message.Message()
89+
msg["content-type"] = content_type
90+
return msg.get_content_charset(failobj=None)
91+
92+
93+
def _parse_header_links(value: str) -> list[dict[str, str]]:
94+
"""
95+
Returns a list of parsed link headers, for more info see:
96+
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
97+
The generic syntax of those is:
98+
Link: < uri-reference >; param1=value1; param2="value2"
99+
So for instance:
100+
Link; '<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;'
101+
would return
102+
[
103+
{"url": "http:/.../front.jpeg", "type": "image/jpeg"},
104+
{"url": "http://.../back.jpeg"},
105+
]
106+
:param value: HTTP Link entity-header field
107+
:return: list of parsed link headers
108+
"""
109+
links: list[dict[str, str]] = []
110+
replace_chars = " '\""
111+
value = value.strip(replace_chars)
112+
if not value:
113+
return links
114+
for val in re.split(", *<", value):
115+
try:
116+
url, params = val.split(";", 1)
117+
except ValueError:
118+
url, params = val, ""
119+
link = {"url": url.strip("<> '\"")}
120+
for param in params.split(";"):
121+
try:
122+
key, value = param.split("=")
123+
except ValueError:
124+
break
125+
link[key.strip(replace_chars)] = value.strip(replace_chars)
126+
links.append(link)
127+
return links
128+
129+
130+
def _obfuscate_sensitive_headers(
131+
items: typing.Iterable[tuple[typing.AnyStr, typing.AnyStr]],
132+
) -> typing.Iterator[tuple[typing.AnyStr, typing.AnyStr]]:
133+
for k, v in items:
134+
if to_str(k.lower()) in SENSITIVE_HEADERS:
135+
v = to_bytes_or_str("[secure]", match_type_of=v)
136+
yield k, v
137+
138+
75139
class Headers(typing.MutableMapping[str, str]):
76140
"""
77141
HTTP headers, as a case-insensitive multi-dict.
@@ -306,7 +370,7 @@ def __repr__(self) -> str:
306370
if self.encoding != "ascii":
307371
encoding_str = f", encoding={self.encoding!r}"
308372

309-
as_list = list(obfuscate_sensitive_headers(self.multi_items()))
373+
as_list = list(_obfuscate_sensitive_headers(self.multi_items()))
310374
as_dict = dict(as_list)
311375

312376
no_duplicate_keys = len(as_dict) == len(as_list)
@@ -599,7 +663,7 @@ def encoding(self) -> str | None:
599663
"""
600664
if not hasattr(self, "_encoding"):
601665
encoding = self.charset_encoding
602-
if encoding is None or not is_known_encoding(encoding):
666+
if encoding is None or not _is_known_encoding(encoding):
603667
if isinstance(self.default_encoding, str):
604668
encoding = self.default_encoding
605669
elif hasattr(self, "_content"):
@@ -630,7 +694,7 @@ def charset_encoding(self) -> str | None:
630694
if content_type is None:
631695
return None
632696

633-
return parse_content_type_charset(content_type)
697+
return _parse_content_type_charset(content_type)
634698

635699
def _get_content_decoder(self) -> ContentDecoder:
636700
"""
@@ -785,7 +849,7 @@ def links(self) -> dict[str | None, dict[str, str]]:
785849

786850
return {
787851
(link.get("rel") or link.get("url")): link
788-
for link in parse_header_links(header)
852+
for link in _parse_header_links(header)
789853
}
790854

791855
@property

httpx/_utils.py

-70
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from __future__ import annotations
22

3-
import codecs
4-
import email.message
53
import ipaddress
64
import os
75
import re
@@ -29,74 +27,6 @@ def primitive_value_to_str(value: PrimitiveData) -> str:
2927
return str(value)
3028

3129

32-
def is_known_encoding(encoding: str) -> bool:
33-
"""
34-
Return `True` if `encoding` is a known codec.
35-
"""
36-
try:
37-
codecs.lookup(encoding)
38-
except LookupError:
39-
return False
40-
return True
41-
42-
43-
def parse_header_links(value: str) -> list[dict[str, str]]:
44-
"""
45-
Returns a list of parsed link headers, for more info see:
46-
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
47-
The generic syntax of those is:
48-
Link: < uri-reference >; param1=value1; param2="value2"
49-
So for instance:
50-
Link; '<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;'
51-
would return
52-
[
53-
{"url": "http:/.../front.jpeg", "type": "image/jpeg"},
54-
{"url": "http://.../back.jpeg"},
55-
]
56-
:param value: HTTP Link entity-header field
57-
:return: list of parsed link headers
58-
"""
59-
links: list[dict[str, str]] = []
60-
replace_chars = " '\""
61-
value = value.strip(replace_chars)
62-
if not value:
63-
return links
64-
for val in re.split(", *<", value):
65-
try:
66-
url, params = val.split(";", 1)
67-
except ValueError:
68-
url, params = val, ""
69-
link = {"url": url.strip("<> '\"")}
70-
for param in params.split(";"):
71-
try:
72-
key, value = param.split("=")
73-
except ValueError:
74-
break
75-
link[key.strip(replace_chars)] = value.strip(replace_chars)
76-
links.append(link)
77-
return links
78-
79-
80-
def parse_content_type_charset(content_type: str) -> str | None:
81-
# We used to use `cgi.parse_header()` here, but `cgi` became a dead battery.
82-
# See: https://peps.python.org/pep-0594/#cgi
83-
msg = email.message.Message()
84-
msg["content-type"] = content_type
85-
return msg.get_content_charset(failobj=None)
86-
87-
88-
SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}
89-
90-
91-
def obfuscate_sensitive_headers(
92-
items: typing.Iterable[tuple[typing.AnyStr, typing.AnyStr]],
93-
) -> typing.Iterator[tuple[typing.AnyStr, typing.AnyStr]]:
94-
for k, v in items:
95-
if to_str(k.lower()) in SENSITIVE_HEADERS:
96-
v = to_bytes_or_str("[secure]", match_type_of=v)
97-
yield k, v
98-
99-
10030
def port_or_default(url: URL) -> int | None:
10131
if url.port is not None:
10232
return url.port

scripts/lint

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ export SOURCE_FILES="httpx tests"
88

99
set -x
1010

11-
${PREFIX}ruff --fix $SOURCE_FILES
11+
${PREFIX}ruff check --fix $SOURCE_FILES
1212
${PREFIX}ruff format $SOURCE_FILES

tests/models/test_headers.py

+43
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,46 @@ def test_sensitive_headers(header):
174174
value = "s3kr3t"
175175
h = httpx.Headers({header: value})
176176
assert repr(h) == "Headers({'%s': '[secure]'})" % header
177+
178+
179+
@pytest.mark.parametrize(
180+
"headers, output",
181+
[
182+
([("content-type", "text/html")], [("content-type", "text/html")]),
183+
([("authorization", "s3kr3t")], [("authorization", "[secure]")]),
184+
([("proxy-authorization", "s3kr3t")], [("proxy-authorization", "[secure]")]),
185+
],
186+
)
187+
def test_obfuscate_sensitive_headers(headers, output):
188+
as_dict = {k: v for k, v in output}
189+
headers_class = httpx.Headers({k: v for k, v in headers})
190+
assert repr(headers_class) == f"Headers({as_dict!r})"
191+
192+
193+
@pytest.mark.parametrize(
194+
"value, expected",
195+
(
196+
(
197+
'<http:/.../front.jpeg>; rel=front; type="image/jpeg"',
198+
[{"url": "http:/.../front.jpeg", "rel": "front", "type": "image/jpeg"}],
199+
),
200+
("<http:/.../front.jpeg>", [{"url": "http:/.../front.jpeg"}]),
201+
("<http:/.../front.jpeg>;", [{"url": "http:/.../front.jpeg"}]),
202+
(
203+
'<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;',
204+
[
205+
{"url": "http:/.../front.jpeg", "type": "image/jpeg"},
206+
{"url": "http://.../back.jpeg"},
207+
],
208+
),
209+
("", []),
210+
),
211+
)
212+
def test_parse_header_links(value, expected):
213+
all_links = httpx.Response(200, headers={"link": value}).links.values()
214+
assert all(link in all_links for link in expected)
215+
216+
217+
def test_parse_header_links_no_link():
218+
all_links = httpx.Response(200).links
219+
assert all_links == {}

tests/test_utils.py

-43
Original file line numberDiff line numberDiff line change
@@ -53,35 +53,6 @@ def test_guess_by_bom(encoding, expected):
5353
assert response.json() == {"abc": 123}
5454

5555

56-
@pytest.mark.parametrize(
57-
"value, expected",
58-
(
59-
(
60-
'<http:/.../front.jpeg>; rel=front; type="image/jpeg"',
61-
[{"url": "http:/.../front.jpeg", "rel": "front", "type": "image/jpeg"}],
62-
),
63-
("<http:/.../front.jpeg>", [{"url": "http:/.../front.jpeg"}]),
64-
("<http:/.../front.jpeg>;", [{"url": "http:/.../front.jpeg"}]),
65-
(
66-
'<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;',
67-
[
68-
{"url": "http:/.../front.jpeg", "type": "image/jpeg"},
69-
{"url": "http://.../back.jpeg"},
70-
],
71-
),
72-
("", []),
73-
),
74-
)
75-
def test_parse_header_links(value, expected):
76-
all_links = httpx.Response(200, headers={"link": value}).links.values()
77-
assert all(link in all_links for link in expected)
78-
79-
80-
def test_parse_header_links_no_link():
81-
all_links = httpx.Response(200).links
82-
assert all_links == {}
83-
84-
8556
def test_logging_request(server, caplog):
8657
caplog.set_level(logging.INFO)
8758
with httpx.Client() as client:
@@ -144,20 +115,6 @@ def test_get_environment_proxies(environment, proxies):
144115
assert get_environment_proxies() == proxies
145116

146117

147-
@pytest.mark.parametrize(
148-
"headers, output",
149-
[
150-
([("content-type", "text/html")], [("content-type", "text/html")]),
151-
([("authorization", "s3kr3t")], [("authorization", "[secure]")]),
152-
([("proxy-authorization", "s3kr3t")], [("proxy-authorization", "[secure]")]),
153-
],
154-
)
155-
def test_obfuscate_sensitive_headers(headers, output):
156-
as_dict = {k: v for k, v in output}
157-
headers_class = httpx.Headers({k: v for k, v in headers})
158-
assert repr(headers_class) == f"Headers({as_dict!r})"
159-
160-
161118
def test_same_origin():
162119
origin = httpx.URL("https://example.com")
163120
request = httpx.Request("GET", "HTTPS://EXAMPLE.COM:443")

0 commit comments

Comments
 (0)