Skip to content

Commit 0a8b44e

Browse files
authored
Perform port normalization for http, https, ws, wss, and ftp schemes (#1603)
1 parent c927f3e commit 0a8b44e

File tree

3 files changed

+41
-17
lines changed

3 files changed

+41
-17
lines changed

httpx/_models.py

+36-11
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ class URL:
100100
url = httpx.URL("http://xn--fiqs8s.icom.museum")
101101
assert url.raw_host == b"xn--fiqs8s.icom.museum"
102102
103+
* `url.port` is either None or an integer. URLs that include the default port for
104+
"http", "https", "ws", "wss", and "ftp" schemes have their port normalized to `None`.
105+
106+
assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
107+
assert httpx.URL("http://example.com").port is None
108+
assert httpx.URL("http://example.com:80").port is None
109+
103110
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with
104111
`url.username` and `url.password` instead, which handle the URL escaping.
105112
@@ -144,6 +151,24 @@ def __init__(
144151
f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}"
145152
)
146153

154+
# Perform port normalization, following the WHATWG spec for default ports.
155+
#
156+
# See:
157+
# * https://tools.ietf.org/html/rfc3986#section-3.2.3
158+
# * https://url.spec.whatwg.org/#url-miscellaneous
159+
# * https://url.spec.whatwg.org/#scheme-state
160+
default_port = {
161+
"ftp": ":21",
162+
"http": ":80",
163+
"https": ":443",
164+
"ws": ":80",
165+
"wss": ":443",
166+
}.get(self._uri_reference.scheme, "")
167+
authority = self._uri_reference.authority or ""
168+
if default_port and authority.endswith(default_port):
169+
authority = authority[: -len(default_port)]
170+
self._uri_reference = self._uri_reference.copy_with(authority=authority)
171+
147172
if kwargs:
148173
self._uri_reference = self.copy_with(**kwargs)._uri_reference
149174

@@ -253,6 +278,15 @@ def raw_host(self) -> bytes:
253278
def port(self) -> typing.Optional[int]:
254279
"""
255280
The URL port as an integer.
281+
282+
Note that the URL class performs port normalization as per the WHATWG spec.
283+
Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
284+
treated as `None`.
285+
286+
For example:
287+
288+
assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
289+
assert httpx.URL("http://www.example.com:80").port is None
256290
"""
257291
port = self._uri_reference.port
258292
return int(port) if port else None
@@ -263,13 +297,8 @@ def netloc(self) -> bytes:
263297
Either `<host>` or `<host>:<port>` as bytes.
264298
Always normalized to lowercase, and IDNA encoded.
265299
266-
The port component is not included if it is the default for an
267-
"http://" or "https://" URL.
268-
269300
This property may be used for generating the value of a request
270301
"Host" header.
271-
272-
See: https://tools.ietf.org/html/rfc3986#section-3.2.3
273302
"""
274303
host = self._uri_reference.host or ""
275304
port = self._uri_reference.port
@@ -547,7 +576,7 @@ def __hash__(self) -> int:
547576
return hash(str(self))
548577

549578
def __eq__(self, other: typing.Any) -> bool:
550-
return isinstance(other, (URL, str)) and str(self) == str(other)
579+
return isinstance(other, (URL, str)) and str(self) == str(URL(other))
551580

552581
def __str__(self) -> str:
553582
return self._uri_reference.unsplit()
@@ -1099,11 +1128,7 @@ def _prepare(self, default_headers: typing.Dict[str, str]) -> None:
10991128
)
11001129

11011130
if not has_host and self.url.host:
1102-
default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"")
1103-
host_header = self.url.netloc
1104-
if host_header.endswith(default_port):
1105-
host_header = host_header[: -len(default_port)]
1106-
auto_headers.append((b"Host", host_header))
1131+
auto_headers.append((b"Host", self.url.netloc))
11071132
if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
11081133
auto_headers.append((b"Content-Length", b"0"))
11091134

tests/client/test_proxies.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,8 @@ def test_proxies_parameter(proxies, expected_proxies):
7979
("http://example.com", {"all://": PROXY_URL, "http://example.com": None}, None),
8080
("http://example.com", {"http://": PROXY_URL}, PROXY_URL),
8181
("http://example.com", {"all://example.com": PROXY_URL}, PROXY_URL),
82-
("http://example.com", {"all://example.com:80": PROXY_URL}, None),
8382
("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL),
84-
("http://example.com", {"http://example.com:80": PROXY_URL}, None),
83+
("http://example.com", {"http://example.com:80": PROXY_URL}, PROXY_URL),
8584
("http://example.com:8080", {"http://example.com:8080": PROXY_URL}, PROXY_URL),
8685
("http://example.com:8080", {"http://example.com": PROXY_URL}, PROXY_URL),
8786
(

tests/models/test_url.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"中国.icom.museum",
1313
b"xn--fiqs8s.icom.museum",
1414
"http",
15-
80,
15+
None,
1616
),
1717
(
1818
"http://Königsgäßchen.de",
@@ -36,7 +36,7 @@
3636
"βόλος.com",
3737
b"xn--nxasmm1c.com",
3838
"https",
39-
443,
39+
None,
4040
),
4141
(
4242
"http://ශ්‍රී.com:444",
@@ -374,5 +374,5 @@ def test_ipv6_url_from_raw_url(host):
374374
url = httpx.URL(raw_url)
375375

376376
assert url.host == "::ffff:192.168.0.1"
377-
assert url.netloc == b"[::ffff:192.168.0.1]:443"
378-
assert str(url) == "https://[::ffff:192.168.0.1]:443/"
377+
assert url.netloc == b"[::ffff:192.168.0.1]"
378+
assert str(url) == "https://[::ffff:192.168.0.1]/"

0 commit comments

Comments
 (0)