Skip to content

Commit dd6bbe0

Browse files
Fix parse_url (#2161)
FIx url parsing. --------- Co-authored-by: Anton Pirker <[email protected]>
1 parent 8a6c19c commit dd6bbe0

File tree

2 files changed

+41
-17
lines changed

2 files changed

+41
-17
lines changed

Diff for: sentry_sdk/utils.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -1353,8 +1353,8 @@ def from_base64(base64_string):
13531353
Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"])
13541354

13551355

1356-
def sanitize_url(url, remove_authority=True, remove_query_values=True):
1357-
# type: (str, bool, bool) -> str
1356+
def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False):
1357+
# type: (str, bool, bool, bool) -> Union[str, Components]
13581358
"""
13591359
Removes the authority and query parameter values from a given URL.
13601360
"""
@@ -1383,17 +1383,18 @@ def sanitize_url(url, remove_authority=True, remove_query_values=True):
13831383
else:
13841384
query_string = parsed_url.query
13851385

1386-
safe_url = urlunsplit(
1387-
Components(
1388-
scheme=parsed_url.scheme,
1389-
netloc=netloc,
1390-
query=query_string,
1391-
path=parsed_url.path,
1392-
fragment=parsed_url.fragment,
1393-
)
1386+
components = Components(
1387+
scheme=parsed_url.scheme,
1388+
netloc=netloc,
1389+
query=query_string,
1390+
path=parsed_url.path,
1391+
fragment=parsed_url.fragment,
13941392
)
13951393

1396-
return safe_url
1394+
if split:
1395+
return components
1396+
else:
1397+
return urlunsplit(components)
13971398

13981399

13991400
ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"])
@@ -1406,20 +1407,25 @@ def parse_url(url, sanitize=True):
14061407
parameters will be sanitized to remove sensitive data. The autority (username and password)
14071408
in the URL will always be removed.
14081409
"""
1409-
url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize)
1410+
parsed_url = sanitize_url(
1411+
url, remove_authority=True, remove_query_values=sanitize, split=True
1412+
)
14101413

1411-
parsed_url = urlsplit(url)
14121414
base_url = urlunsplit(
14131415
Components(
1414-
scheme=parsed_url.scheme,
1415-
netloc=parsed_url.netloc,
1416+
scheme=parsed_url.scheme, # type: ignore
1417+
netloc=parsed_url.netloc, # type: ignore
14161418
query="",
1417-
path=parsed_url.path,
1419+
path=parsed_url.path, # type: ignore
14181420
fragment="",
14191421
)
14201422
)
14211423

1422-
return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment)
1424+
return ParsedUrl(
1425+
url=base_url,
1426+
query=parsed_url.query, # type: ignore
1427+
fragment=parsed_url.fragment, # type: ignore
1428+
)
14231429

14241430

14251431
def is_valid_sample_rate(rate, source):

Diff for: tests/test_utils.py

+18
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,24 @@ def test_sanitize_url(url, expected_result):
6969
assert parts == expected_parts
7070

7171

72+
def test_sanitize_url_and_split():
73+
parts = sanitize_url(
74+
"https://username:[email protected]?token=abc&sessionid=123&save=true",
75+
split=True,
76+
)
77+
78+
expected_query = sorted(
79+
"token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&")
80+
)
81+
query = sorted(parts.query.split("&"))
82+
83+
assert parts.scheme == "https"
84+
assert parts.netloc == "[Filtered]:[Filtered]@example.com"
85+
assert query == expected_query
86+
assert parts.path == ""
87+
assert parts.fragment == ""
88+
89+
7290
@pytest.mark.parametrize(
7391
("url", "sanitize", "expected_url", "expected_query", "expected_fragment"),
7492
[

0 commit comments

Comments
 (0)