Skip to content

Commit 343b299

Browse files
committed
Move port validation logic to parsing time
1 parent a42d98e commit 343b299

File tree

5 files changed

+43
-32
lines changed

5 files changed

+43
-32
lines changed

Lib/test/test_urllib2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -808,12 +808,12 @@ def test_file(self):
808808
self.assertEqual(respurl, url)
809809

810810
for url in [
811-
"file://localhost:80%s" % urlpath,
811+
"file://localhost:80/%s" % urlpath,
812812
"file:///file_does_not_exist.txt",
813813
"file://not-a-local-host.com//dir/file.txt",
814-
"file://%s:80%s/%s" % (socket.gethostbyname('localhost'),
814+
"file://%s:80/%s/%s" % (socket.gethostbyname('localhost'),
815815
os.getcwd(), TESTFN),
816-
"file://somerandomhost.ontheinternet.com%s/%s" %
816+
"file://somerandomhost.ontheinternet.com/%s/%s" %
817817
(os.getcwd(), TESTFN),
818818
]:
819819
try:

Lib/test/test_urlparse.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -608,9 +608,8 @@ def test_urlsplit_attributes(self):
608608

609609
# Verify an illegal port raises ValueError
610610
url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
611-
p = urllib.parse.urlsplit(url)
612611
with self.assertRaisesRegex(ValueError, "out of range"):
613-
p.port
612+
urllib.parse.urlsplit(url)
614613

615614
def test_urlsplit_remove_unsafe_bytes(self):
616615
# Remove ASCII tabs and newlines from input
@@ -660,10 +659,8 @@ def test_attributes_bad_port(self):
660659
if bytes:
661660
netloc = netloc.encode("ascii")
662661
url = url.encode("ascii")
663-
p = parse(url)
664-
self.assertEqual(p.netloc, netloc)
665662
with self.assertRaises(ValueError):
666-
p.port
663+
parse(url)
667664

668665
def test_attributes_without_netloc(self):
669666
# This example is straight from RFC 3261. It looks like it
@@ -1014,13 +1011,11 @@ def test_issue14072(self):
10141011

10151012
def test_port_casting_failure_message(self):
10161013
message = "Port could not be cast to integer value as 'oracle'"
1017-
p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
10181014
with self.assertRaisesRegex(ValueError, message):
1019-
p1.port
1015+
urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
10201016

1021-
p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
10221017
with self.assertRaisesRegex(ValueError, message):
1023-
p2.port
1018+
urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
10241019

10251020
def test_telurl_params(self):
10261021
p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')

Lib/urllib/parse.py

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import re
3333
import sys
3434
import types
35+
import collections
3536
import warnings
3637

3738
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
@@ -123,6 +124,18 @@ def _coerce_args(*args):
123124
return args + (_noop,)
124125
return _decode_args(args) + (_encode_result,)
125126

127+
def _parse_hostinfo(netloc):
128+
_, _, hostinfo = netloc.rpartition('@')
129+
_, have_open_br, bracketed = hostinfo.partition('[')
130+
if have_open_br:
131+
hostname, _, port = bracketed.partition(']')
132+
_, _, port = port.partition(':')
133+
else:
134+
hostname, _, port = hostinfo.partition(':')
135+
if not port:
136+
port = None
137+
return hostname, port
138+
126139
# Result objects are more helpful than simple tuples
127140
class _ResultMixinStr(object):
128141
"""Standard approach to encoding parsed results from str to bytes"""
@@ -167,13 +180,7 @@ def hostname(self):
167180
def port(self):
168181
port = self._hostinfo[1]
169182
if port is not None:
170-
try:
171-
port = int(port, 10)
172-
except ValueError:
173-
message = f'Port could not be cast to integer value as {port!r}'
174-
raise ValueError(message) from None
175-
if not ( 0 <= port <= 65535):
176-
raise ValueError("Port out of range 0-65535")
183+
port = _validate_port(port)
177184
return port
178185

179186
__class_getitem__ = classmethod(types.GenericAlias)
@@ -197,16 +204,7 @@ def _userinfo(self):
197204
@property
198205
def _hostinfo(self):
199206
netloc = self.netloc
200-
_, _, hostinfo = netloc.rpartition('@')
201-
_, have_open_br, bracketed = hostinfo.partition('[')
202-
if have_open_br:
203-
hostname, _, port = bracketed.partition(']')
204-
_, _, port = port.partition(':')
205-
else:
206-
hostname, _, port = hostinfo.partition(':')
207-
if not port:
208-
port = None
209-
return hostname, port
207+
return _parse_hostinfo(netloc)
210208

211209

212210
class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
@@ -402,6 +400,16 @@ def _splitparams(url):
402400
i = url.find(';')
403401
return url[:i], url[i+1:]
404402

403+
def _validate_port(port_str):
404+
try:
405+
port = int(port_str, 10)
406+
except ValueError:
407+
message = f'Port could not be cast to integer value as {port_str!r}'
408+
raise ValueError(message) from None
409+
if not (0 <= port <= 65535):
410+
raise ValueError("Port out of range 0-65535: %r" % port_str)
411+
return port
412+
405413
def _splitnetloc(url, start=0):
406414
delim = len(url) # position of end of domain part of url, default is end
407415
for c in '/?#': # look for delimiters; the order is NOT important
@@ -410,9 +418,7 @@ def _splitnetloc(url, start=0):
410418
delim = min(delim, wdelim) # use earliest delim position
411419
return url[start:delim], url[delim:] # return (domain, rest)
412420

413-
def _checknetloc(netloc):
414-
if not netloc or netloc.isascii():
415-
return
421+
def _checknetloc_nfkc(netloc):
416422
# looking for characters like \u2100 that expand to 'a/c'
417423
# IDNA uses NFKC equivalence, so normalize for this check
418424
import unicodedata
@@ -428,6 +434,14 @@ def _checknetloc(netloc):
428434
raise ValueError("netloc '" + netloc + "' contains invalid " +
429435
"characters under NFKC normalization")
430436

437+
def _checknetloc(netloc):
438+
if netloc and not netloc.isascii():
439+
_checknetloc_nfkc(netloc)
440+
441+
_, port = _parse_hostinfo(netloc)
442+
if port is not None:
443+
_validate_port(port)
444+
431445
# typed=True avoids BytesWarnings being emitted during cache key
432446
# comparison since this API supports both bytes and str input.
433447
@functools.lru_cache(typed=True)

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Tom Bridgman
227227
Anthony Briggs
228228
Keith Briggs
229229
Tobias Brink
230+
Miguel Brito
230231
Dillon Brock
231232
Richard Brodie
232233
Michael Broghton
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Move port validation logic to parsing time. Patch by Miguel Brito.

0 commit comments

Comments
 (0)