Skip to content

gh-88037: Move port validation logic to parsing time #25774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,12 +808,12 @@ def test_file(self):
self.assertEqual(respurl, url)

for url in [
"file://localhost:80%s" % urlpath,
"file://localhost:80/%s" % urlpath,
"file:///file_does_not_exist.txt",
"file://not-a-local-host.com//dir/file.txt",
"file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
"file://%s:80/%s/%s" % (socket.gethostbyname('localhost'),
os.getcwd(), TESTFN),
"file://somerandomhost.ontheinternet.com%s/%s" %
"file://somerandomhost.ontheinternet.com/%s/%s" %
(os.getcwd(), TESTFN),
]:
try:
Expand Down
13 changes: 4 additions & 9 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,9 +608,8 @@ def test_urlsplit_attributes(self):

# Verify an illegal port raises ValueError
url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
p = urllib.parse.urlsplit(url)
with self.assertRaisesRegex(ValueError, "out of range"):
p.port
urllib.parse.urlsplit(url)

def test_urlsplit_remove_unsafe_bytes(self):
# Remove ASCII tabs and newlines from input
Expand Down Expand Up @@ -660,10 +659,8 @@ def test_attributes_bad_port(self):
if bytes:
netloc = netloc.encode("ascii")
url = url.encode("ascii")
p = parse(url)
self.assertEqual(p.netloc, netloc)
with self.assertRaises(ValueError):
p.port
parse(url)

def test_attributes_without_netloc(self):
# This example is straight from RFC 3261. It looks like it
Expand Down Expand Up @@ -1014,13 +1011,11 @@ def test_issue14072(self):

def test_port_casting_failure_message(self):
message = "Port could not be cast to integer value as 'oracle'"
p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
with self.assertRaisesRegex(ValueError, message):
p1.port
urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')

p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
with self.assertRaisesRegex(ValueError, message):
p2.port
urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')

def test_telurl_params(self):
p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
Expand Down
54 changes: 34 additions & 20 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import re
import sys
import types
import collections
import warnings

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
Expand Down Expand Up @@ -123,6 +124,18 @@ def _coerce_args(*args):
return args + (_noop,)
return _decode_args(args) + (_encode_result,)

def _parse_hostinfo(netloc):
_, _, hostinfo = netloc.rpartition('@')
_, have_open_br, bracketed = hostinfo.partition('[')
if have_open_br:
hostname, _, port = bracketed.partition(']')
_, _, port = port.partition(':')
else:
hostname, _, port = hostinfo.partition(':')
if not port:
port = None
return hostname, port

# Result objects are more helpful than simple tuples
class _ResultMixinStr(object):
"""Standard approach to encoding parsed results from str to bytes"""
Expand Down Expand Up @@ -167,13 +180,7 @@ def hostname(self):
def port(self):
port = self._hostinfo[1]
if port is not None:
try:
port = int(port, 10)
except ValueError:
message = f'Port could not be cast to integer value as {port!r}'
raise ValueError(message) from None
if not ( 0 <= port <= 65535):
raise ValueError("Port out of range 0-65535")
port = _validate_port(port)
return port

__class_getitem__ = classmethod(types.GenericAlias)
Expand All @@ -197,16 +204,7 @@ def _userinfo(self):
@property
def _hostinfo(self):
netloc = self.netloc
_, _, hostinfo = netloc.rpartition('@')
_, have_open_br, bracketed = hostinfo.partition('[')
if have_open_br:
hostname, _, port = bracketed.partition(']')
_, _, port = port.partition(':')
else:
hostname, _, port = hostinfo.partition(':')
if not port:
port = None
return hostname, port
return _parse_hostinfo(netloc)


class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
Expand Down Expand Up @@ -402,6 +400,16 @@ def _splitparams(url):
i = url.find(';')
return url[:i], url[i+1:]

def _validate_port(port_str):
try:
port = int(port_str, 10)
except ValueError:
message = f'Port could not be cast to integer value as {port_str!r}'
raise ValueError(message) from None
if not (0 <= port <= 65535):
raise ValueError("Port out of range 0-65535: %r" % port_str)
return port

def _splitnetloc(url, start=0):
delim = len(url) # position of end of domain part of url, default is end
for c in '/?#': # look for delimiters; the order is NOT important
Expand All @@ -410,9 +418,7 @@ def _splitnetloc(url, start=0):
delim = min(delim, wdelim) # use earliest delim position
return url[start:delim], url[delim:] # return (domain, rest)

def _checknetloc(netloc):
if not netloc or netloc.isascii():
return
def _checknetloc_nfkc(netloc):
# looking for characters like \u2100 that expand to 'a/c'
# IDNA uses NFKC equivalence, so normalize for this check
import unicodedata
Expand All @@ -428,6 +434,14 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")

def _checknetloc(netloc):
if netloc and not netloc.isascii():
_checknetloc_nfkc(netloc)

_, port = _parse_hostinfo(netloc)
if port is not None:
_validate_port(port)

# typed=True avoids BytesWarnings being emitted during cache key
# comparison since this API supports both bytes and str input.
@functools.lru_cache(typed=True)
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ Tom Bridgman
Anthony Briggs
Keith Briggs
Tobias Brink
Miguel Brito
Dillon Brock
Richard Brodie
Michael Broghton
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Move port validation logic to parsing time. Patch by Miguel Brito.