diff --git a/src/parser.cpp b/src/parser.cpp index f7354169e..ed1d4ac9f 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -619,10 +619,18 @@ result_type parse_url_impl(std::string_view user_input, input_position = input_size + 1; } url.has_opaque_path = true; + // This is a really unlikely scenario in real world. We should not seek // to optimize it. - url.update_base_pathname(unicode::percent_encode( - view, character_sets::C0_CONTROL_PERCENT_ENCODE)); + if (view.ends_with(' ')) { + std::string modified_view = + std::string(view.begin(), view.end() - 1) + "%20"; + url.update_base_pathname(unicode::percent_encode( + modified_view, character_sets::C0_CONTROL_PERCENT_ENCODE)); + } else { + url.update_base_pathname(unicode::percent_encode( + view, character_sets::C0_CONTROL_PERCENT_ENCODE)); + } break; } case state::PORT: { diff --git a/tests/basic_tests.cpp b/tests/basic_tests.cpp index 74046ae6f..7b69b90a7 100644 --- a/tests/basic_tests.cpp +++ b/tests/basic_tests.cpp @@ -196,8 +196,8 @@ TYPED_TEST(basic_tests, nodejs2) { ASSERT_EQ(url->get_search(), "?test"); url->set_search(""); ASSERT_EQ(url->get_search(), ""); - ASSERT_EQ(url->get_pathname(), "space"); - ASSERT_EQ(url->get_href(), "data:space"); + ASSERT_EQ(url->get_pathname(), "space %20"); + ASSERT_EQ(url->get_href(), "data:space %20"); SUCCEED(); } @@ -206,8 +206,8 @@ TYPED_TEST(basic_tests, nodejs3) { ASSERT_EQ(url->get_search(), "?test"); url->set_search(""); ASSERT_EQ(url->get_search(), ""); - ASSERT_EQ(url->get_pathname(), "space "); - ASSERT_EQ(url->get_href(), "data:space #test"); + ASSERT_EQ(url->get_pathname(), "space %20"); + ASSERT_EQ(url->get_href(), "data:space %20#test"); SUCCEED(); } diff --git a/tests/wpt/ada_extra_setters_tests.json b/tests/wpt/ada_extra_setters_tests.json index 9ba7523af..8d573409c 100644 --- a/tests/wpt/ada_extra_setters_tests.json +++ b/tests/wpt/ada_extra_setters_tests.json @@ -127,7 +127,7 @@ "new_value": "", "expected": { "search": "", - "pathname": "space" + "pathname": "space %20" } } ] diff --git a/tests/wpt/ada_extra_urltestdata.json b/tests/wpt/ada_extra_urltestdata.json index d6c162b3f..51e4be79a 100644 --- a/tests/wpt/ada_extra_urltestdata.json +++ b/tests/wpt/ada_extra_urltestdata.json @@ -224,7 +224,7 @@ { "input": "data:space ?test#test", "base": "about:blank", - "href": "data:space ?test#test", + "href": "data:space %20?test#test", "origin": "null", "protocol": "data:", "username": "", @@ -232,7 +232,7 @@ "host": "", "hostname": "", "port": "", - "pathname": "space ", + "pathname": "space %20", "search": "?test", "hash": "#test" }, diff --git a/tests/wpt/setters_tests.json b/tests/wpt/setters_tests.json index c47797c4d..e7072cd6a 100644 --- a/tests/wpt/setters_tests.json +++ b/tests/wpt/setters_tests.json @@ -1177,6 +1177,24 @@ "host": "test.invalid", "hostname": "test.invalid" } + }, + { + "href": "https://test.invalid/", + "new_value": "test/@aaa", + "expected": { + "href": "https://test/", + "host": "test", + "hostname": "test" + } + }, + { + "href": "https://test.invalid/", + "new_value": "test/:aaa", + "expected": { + "href": "https://test/", + "host": "test", + "hostname": "test" + } } ], "hostname": [ @@ -1624,6 +1642,24 @@ "host": "test.invalid", "hostname": "test.invalid" } + }, + { + "href": "https://test.invalid/", + "new_value": "test/@aaa", + "expected": { + "href": "https://test/", + "host": "test", + "hostname": "test" + } + }, + { + "href": "https://test.invalid/", + "new_value": "test/:aaa", + "expected": { + "href": "https://test/", + "host": "test", + "hostname": "test" + } } ], "port": [ @@ -2241,12 +2277,12 @@ } }, { - "comment": "Drop trailing spaces from trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query", "new_value": "", "expected": { - "href": "data:space", - "pathname": "space", + "href": "data:space%20", + "pathname": "space%20", "search": "" } }, @@ -2254,17 +2290,17 @@ "href": "sc:space ?query", "new_value": "", "expected": { - "href": "sc:space", - "pathname": "space", + "href": "sc:space%20", + "pathname": "space%20", "search": "" } }, { - "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { - "href": "data:space #fragment", + "href": "data:space %20#fragment", "search": "" } }, @@ -2272,7 +2308,7 @@ "href": "sc:space ?query#fragment", "new_value": "", "expected": { - "href": "sc:space #fragment", + "href": "sc:space %20#fragment", "search": "" } }, @@ -2429,12 +2465,12 @@ } }, { - "comment": "Drop trailing spaces from trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space #fragment", "new_value": "", "expected": { - "href": "data:space", - "pathname": "space", + "href": "data:space %20", + "pathname": "space %20", "hash": "" } }, @@ -2442,17 +2478,17 @@ "href": "sc:space #fragment", "new_value": "", "expected": { - "href": "sc:space", - "pathname": "space", + "href": "sc:space %20", + "pathname": "space %20", "hash": "" } }, { - "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { - "href": "data:space ?query", + "href": "data:space %20?query", "hash": "" } }, @@ -2460,7 +2496,7 @@ "href": "sc:space ?query#fragment", "new_value": "", "expected": { - "href": "sc:space ?query", + "href": "sc:space %20?query", "hash": "" } }, diff --git a/tests/wpt/toascii.json b/tests/wpt/toascii.json index 02291962e..588ef150f 100644 --- a/tests/wpt/toascii.json +++ b/tests/wpt/toascii.json @@ -1,6 +1,9 @@ [ "This contains assorted IDNA tests that IdnaTestV2 might not cover.", "Feel free to deduplicate with a clear commit message.", + "", + "If the test only applies to the URL Standard's 'domain to ASCII', ", + "and not to TR46's ToASCII, then tag it with `urlStandardOnly`", { "comment": "Label with hyphens in 3rd and 4th position", "input": "aa--", @@ -239,11 +242,13 @@ }, { "input": "www.lookout.net\u2A7480", - "output": null + "output": null, + "urlStandardOnly": true }, { "input": "www\u00A0.lookout.net", - "output": null + "output": null, + "urlStandardOnly": true }, { "input": "\u1680lookout.net", @@ -251,7 +256,8 @@ }, { "input": "\u001flookout.net", - "output": null + "output": null, + "urlStandardOnly": true }, { "input": "look\u06DDout.net", diff --git a/tests/wpt/urltestdata.json b/tests/wpt/urltestdata.json index 214ed0852..d1a06f631 100644 --- a/tests/wpt/urltestdata.json +++ b/tests/wpt/urltestdata.json @@ -3778,6 +3778,126 @@ "search": "", "hash": "" }, + { + "input": "non-special:opaque ", + "base": null, + "href": "non-special:opaque", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque", + "search": "", + "hash": "" + }, + { + "input": "non-special:opaque ?hi", + "base": null, + "href": "non-special:opaque %20?hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "?hi", + "hash": "" + }, + { + "input": "non-special:opaque #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque x?hi", + "base": null, + "href": "non-special:opaque x?hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque x", + "search": "?hi", + "hash": "" + }, + { + "input": "non-special:opaque x#hi", + "base": null, + "href": "non-special:opaque x#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque x", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque \t\t \t#hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque \t\t #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque\t\t \r #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", { "input": "http://www.foo怂bar.com",