Skip to content

Commit 74b8694

Browse files
authored
Fix anarchist URL where path starts with // (#817)
* Handle null host with leading empty path segment * Disable failing test * Fix invariants of anarchist URLs + empty segment * Handle empty leading segment in relative paths * Handle case where input starts with a slash * Unify handling of leading empty path segment * Avoid casting back and forth between u32 and usize * Fix use of .get() that should never fail
1 parent edeaea7 commit 74b8694

File tree

5 files changed

+69
-16
lines changed

5 files changed

+69
-16
lines changed

url/src/lib.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,14 @@ impl Url {
683683
assert_eq!(self.host_end, self.scheme_end + 1);
684684
assert_eq!(self.host, HostInternal::None);
685685
assert_eq!(self.port, None);
686-
assert_eq!(self.path_start, self.scheme_end + 1);
686+
if self.path().starts_with("//") {
687+
// special case when first path segment is empty
688+
assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
689+
assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
690+
assert_eq!(self.path_start, self.scheme_end + 3);
691+
} else {
692+
assert_eq!(self.path_start, self.scheme_end + 1);
693+
}
687694
}
688695
if let Some(start) = self.query_start {
689696
assert!(start >= self.path_start);

url/src/parser.rs

+43-3
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,8 @@ impl<'a> Parser<'a> {
474474
let host = HostInternal::None;
475475
let port = None;
476476
let remaining = if let Some(input) = input.split_prefix('/') {
477-
let path_start = self.serialization.len();
478477
self.serialization.push('/');
479-
self.parse_path(scheme_type, &mut false, path_start, input)
478+
self.parse_path(scheme_type, &mut false, path_start as usize, input)
480479
} else {
481480
self.parse_cannot_be_a_base_path(input)
482481
};
@@ -1354,9 +1353,50 @@ impl<'a> Parser<'a> {
13541353
host_end: u32,
13551354
host: HostInternal,
13561355
port: Option<u16>,
1357-
path_start: u32,
1356+
mut path_start: u32,
13581357
remaining: Input<'_>,
13591358
) -> ParseResult<Url> {
1359+
// Special case for anarchist URL's with a leading empty path segment
1360+
// This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/,
1361+
// when parsed and then serialized, from ending up as web+demo://not-a-host/
1362+
// (they end up as web+demo:/.//not-a-host/).
1363+
//
1364+
// If url’s host is null, url does not have an opaque path,
1365+
// url’s path’s size is greater than 1, and url’s path[0] is the empty string,
1366+
// then append U+002F (/) followed by U+002E (.) to output.
1367+
let scheme_end_as_usize = scheme_end as usize;
1368+
let path_start_as_usize = path_start as usize;
1369+
if path_start_as_usize == scheme_end_as_usize + 1 {
1370+
// Anarchist URL
1371+
if self.serialization[path_start_as_usize..].starts_with("//") {
1372+
// Case 1: The base URL did not have an empty path segment, but the resulting one does
1373+
// Insert the "/." prefix
1374+
self.serialization.insert_str(path_start_as_usize, "/.");
1375+
path_start += 2;
1376+
}
1377+
assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1378+
} else if path_start_as_usize == scheme_end_as_usize + 3
1379+
&& &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/."
1380+
{
1381+
// Anarchist URL with leading empty path segment
1382+
// The base URL has a "/." between the host and the path
1383+
assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/');
1384+
if self
1385+
.serialization
1386+
.as_bytes()
1387+
.get(path_start_as_usize + 1)
1388+
.copied()
1389+
!= Some(b'/')
1390+
{
1391+
// Case 2: The base URL had an empty path segment, but the resulting one does not
1392+
// Remove the "/." prefix
1393+
self.serialization
1394+
.replace_range(scheme_end_as_usize..path_start_as_usize, ":");
1395+
path_start -= 2;
1396+
}
1397+
assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
1398+
}
1399+
13601400
let (query_start, fragment_start) =
13611401
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
13621402
Ok(Url {

url/src/slicing.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,14 @@ impl Url {
149149
}
150150
}
151151

152-
Position::AfterPort => self.path_start as usize,
152+
Position::AfterPort => {
153+
if let Some(port) = self.port {
154+
debug_assert!(self.byte_at(self.host_end) == b':');
155+
self.host_end as usize + ":".len() + port.to_string().len()
156+
} else {
157+
self.host_end as usize
158+
}
159+
}
153160

154161
Position::BeforePath => self.path_start as usize,
155162

url/tests/unit.rs

+10
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,16 @@ fn no_panic() {
954954
url::quirks::set_hostname(&mut url, "//eom/datcom/\\\\t\\://eom/data.cs").unwrap();
955955
}
956956

957+
#[test]
958+
fn test_null_host_with_leading_empty_path_segment() {
959+
// since Note in item 3 of URL serializing in the URL Standard
960+
// https://url.spec.whatwg.org/#url-serializing
961+
let url = Url::parse("m:/.//\\").unwrap();
962+
let encoded = url.as_str();
963+
let reparsed = Url::parse(encoded).unwrap();
964+
assert_eq!(reparsed, url);
965+
}
966+
957967
#[test]
958968
fn pop_if_empty_in_bounds() {
959969
let mut url = Url::parse("m://").unwrap();

url/tests/urltestdata.json

-11
Original file line numberDiff line numberDiff line change
@@ -7487,7 +7487,6 @@
74877487
"hash": ""
74887488
},
74897489
"Serialize /. in path",
7490-
"skip next",
74917490
{
74927491
"input": "non-spec:/.//",
74937492
"base": "about:blank",
@@ -7502,7 +7501,6 @@
75027501
"search": "",
75037502
"hash": ""
75047503
},
7505-
"skip next",
75067504
{
75077505
"input": "non-spec:/..//",
75087506
"base": "about:blank",
@@ -7517,7 +7515,6 @@
75177515
"search": "",
75187516
"hash": ""
75197517
},
7520-
"skip next",
75217518
{
75227519
"input": "non-spec:/a/..//",
75237520
"base": "about:blank",
@@ -7532,7 +7529,6 @@
75327529
"search": "",
75337530
"hash": ""
75347531
},
7535-
"skip next",
75367532
{
75377533
"input": "non-spec:/.//path",
75387534
"base": "about:blank",
@@ -7547,7 +7543,6 @@
75477543
"search": "",
75487544
"hash": ""
75497545
},
7550-
"skip next",
75517546
{
75527547
"input": "non-spec:/..//path",
75537548
"base": "about:blank",
@@ -7562,7 +7557,6 @@
75627557
"search": "",
75637558
"hash": ""
75647559
},
7565-
"skip next",
75667560
{
75677561
"input": "non-spec:/a/..//path",
75687562
"base": "about:blank",
@@ -7592,7 +7586,6 @@
75927586
"search": "",
75937587
"hash": ""
75947588
},
7595-
"skip next",
75967589
{
75977590
"input": "/..//path",
75987591
"base": "non-spec:/p",
@@ -7607,7 +7600,6 @@
76077600
"search": "",
76087601
"hash": ""
76097602
},
7610-
"skip next",
76117603
{
76127604
"input": "..//path",
76137605
"base": "non-spec:/p",
@@ -7622,7 +7614,6 @@
76227614
"search": "",
76237615
"hash": ""
76247616
},
7625-
"skip next",
76267617
{
76277618
"input": "a/..//path",
76287619
"base": "non-spec:/p",
@@ -7637,7 +7628,6 @@
76377628
"search": "",
76387629
"hash": ""
76397630
},
7640-
"skip next",
76417631
{
76427632
"input": "",
76437633
"base": "non-spec:/..//p",
@@ -7652,7 +7642,6 @@
76527642
"search": "",
76537643
"hash": ""
76547644
},
7655-
"skip next",
76567645
{
76577646
"input": "path",
76587647
"base": "non-spec:/..//p",

0 commit comments

Comments
 (0)