From 5f98a7f00e338c0985e7743be5b23dbd8f1039b3 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 3 Feb 2023 17:36:27 -0700 Subject: [PATCH 1/2] rustdoc: use the same URL escape rules for fragments as for examples --- src/librustdoc/html/markdown.rs | 43 +---------- src/librustdoc/html/render/mod.rs | 75 +++++++++++++------ .../const-generics/const-generics-docs.rs | 6 +- tests/rustdoc/const-generics/const-impl.rs | 10 +-- tests/rustdoc/double-quote-escape.rs | 2 +- tests/rustdoc/primitive-tuple-variadic.rs | 4 +- .../rustdoc/sidebar-links-to-foreign-impl.rs | 4 +- tests/rustdoc/where-clause-order.rs | 2 +- 8 files changed, 68 insertions(+), 78 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 00e3f859bfcb3..03382aeeb737d 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -46,6 +46,7 @@ use crate::html::escape::Escape; use crate::html::format::Buffer; use crate::html::highlight; use crate::html::length_limit::HtmlWithLimit; +use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ @@ -294,47 +295,7 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { doctest::make_test(&test, krate, false, &Default::default(), edition, None); let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" }; - // These characters don't need to be escaped in a URI. - // See https://url.spec.whatwg.org/#query-percent-encode-set - // and https://url.spec.whatwg.org/#urlencoded-parsing - // and https://url.spec.whatwg.org/#url-code-points - fn dont_escape(c: u8) -> bool { - (b'a' <= c && c <= b'z') - || (b'A' <= c && c <= b'Z') - || (b'0' <= c && c <= b'9') - || c == b'-' - || c == b'_' - || c == b'.' - || c == b',' - || c == b'~' - || c == b'!' - || c == b'\'' - || c == b'(' - || c == b')' - || c == b'*' - || c == b'/' - || c == b';' - || c == b':' - || c == b'?' - // As described in urlencoded-parsing, the - // first `=` is the one that separates key from - // value. Following `=`s are part of the value. - || c == b'=' - } - let mut test_escaped = String::new(); - for b in test.bytes() { - if dont_escape(b) { - test_escaped.push(char::from(b)); - } else if b == b' ' { - // URL queries are decoded with + replaced with SP - test_escaped.push('+'); - } else if b == b'%' { - test_escaped.push('%'); - test_escaped.push('%'); - } else { - write!(test_escaped, "%{:02X}", b).unwrap(); - } - } + let test_escaped = small_url_encode(test); Some(format!( r#"Run"#, url, test_escaped, channel, edition, diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 816a8f4e274ce..fa22c46120517 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -38,7 +38,7 @@ pub(crate) use self::span_map::{collect_spans_and_sources, LinkFromSrc}; use std::collections::VecDeque; use std::default::Default; -use std::fmt; +use std::fmt::{self, Write}; use std::fs; use std::iter::Peekable; use std::path::PathBuf; @@ -2020,31 +2020,60 @@ fn get_associated_constants( .collect::>() } -// The point is to url encode any potential character from a type with genericity. -fn small_url_encode(s: String) -> String { +pub(crate) fn small_url_encode(s: String) -> String { + // These characters don't need to be escaped in a URI. + // See https://url.spec.whatwg.org/#query-percent-encode-set + // and https://url.spec.whatwg.org/#urlencoded-parsing + // and https://url.spec.whatwg.org/#url-code-points + fn dont_escape(c: u8) -> bool { + (b'a' <= c && c <= b'z') + || (b'A' <= c && c <= b'Z') + || (b'0' <= c && c <= b'9') + || c == b'-' + || c == b'_' + || c == b'.' + || c == b',' + || c == b'~' + || c == b'!' + || c == b'\'' + || c == b'(' + || c == b')' + || c == b'*' + || c == b'/' + || c == b';' + || c == b':' + || c == b'?' + // As described in urlencoded-parsing, the + // first `=` is the one that separates key from + // value. Following `=`s are part of the value. + || c == b'=' + } let mut st = String::new(); let mut last_match = 0; - for (idx, c) in s.char_indices() { - let escaped = match c { - '<' => "%3C", - '>' => "%3E", - ' ' => "%20", - '?' => "%3F", - '\'' => "%27", - '&' => "%26", - ',' => "%2C", - ':' => "%3A", - ';' => "%3B", - '[' => "%5B", - ']' => "%5D", - '"' => "%22", - _ => continue, - }; + for (idx, b) in s.bytes().enumerate() { + if dont_escape(b) { + continue; + } - st += &s[last_match..idx]; - st += escaped; - // NOTE: we only expect single byte characters here - which is fine as long as we - // only match single byte characters + if last_match != idx { + // Invariant: `idx` must be the first byte in a character at this point. + st += &s[last_match..idx]; + } + if b == b' ' { + // URL queries are decoded with + replaced with SP. + // While the same is not true for hashes, rustdoc only needs to be + // consistent with itself when encoding them. + st += "+"; + } else if b == b'%' { + st += "%%"; + } else { + write!(st, "%{:02X}", b).unwrap(); + } + // Invariant: if the current byte is not at the start of a multi-byte character, + // we need to get down here so that when the next turn of the loop comes around, + // last_match winds up equalling idx. + // + // In other words, dont_escape must always return `false` in multi-byte character. last_match = idx + 1; } diff --git a/tests/rustdoc/const-generics/const-generics-docs.rs b/tests/rustdoc/const-generics/const-generics-docs.rs index ade70bbe80d92..7e27ef8d8e5b8 100644 --- a/tests/rustdoc/const-generics/const-generics-docs.rs +++ b/tests/rustdoc/const-generics/const-generics-docs.rs @@ -21,8 +21,8 @@ pub use extern_crate::WTrait; // 'pub trait Trait' // @has - '//*[@id="impl-Trait%3C1%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<1> for u8' // @has - '//*[@id="impl-Trait%3C2%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<2> for u8' -// @has - '//*[@id="impl-Trait%3C{1%20+%202}%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8' -// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8%3B%20N%5D"]//h3[@class="code-header"]' \ +// @has - '//*[@id="impl-Trait%3C%7B1+%2B+2%7D%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8' +// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8;+N%5D"]//h3[@class="code-header"]' \ // 'impl Trait for [u8; N]' pub trait Trait {} impl Trait<1> for u8 {} @@ -47,7 +47,7 @@ impl Foo where u8: Trait { } } -// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8%2C%20M%3E"]/h3[@class="code-header"]' 'impl Bar' +// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8,+M%3E"]/h3[@class="code-header"]' 'impl Bar' impl Bar { // @has - '//*[@id="method.hey"]' \ // 'pub fn hey(&self) -> Foowhere u8: Trait' diff --git a/tests/rustdoc/const-generics/const-impl.rs b/tests/rustdoc/const-generics/const-impl.rs index 91866b7d890c7..152b643bf4bd8 100644 --- a/tests/rustdoc/const-generics/const-impl.rs +++ b/tests/rustdoc/const-generics/const-impl.rs @@ -9,20 +9,20 @@ pub enum Order { } // @has foo/struct.VSet.html '//pre[@class="rust item-decl"]' 'pub struct VSet' -// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl Send for VSet' -// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl Sync for VSet' +// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl Send for VSet' +// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl Sync for VSet' pub struct VSet { inner: Vec, } -// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3ASorted%20}%3E"]/h3[@class="code-header"]' 'impl VSet' +// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Sorted+%7D%3E"]/h3[@class="code-header"]' 'impl VSet' impl VSet { pub fn new() -> Self { Self { inner: Vec::new() } } } -// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3AUnsorted%20}%3E"]/h3[@class="code-header"]' 'impl VSet' +// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Unsorted+%7D%3E"]/h3[@class="code-header"]' 'impl VSet' impl VSet { pub fn new() -> Self { Self { inner: Vec::new() } @@ -31,7 +31,7 @@ impl VSet { pub struct Escape; -// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr#%22%3Cscript%3Ealert(%22Escape%22)%3B%3C/script%3E%22#%3E"]/h3[@class="code-header"]' 'impl Escapealert("Escape");"#>' +// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr%23%22%3Cscript%3Ealert(%22Escape%22);%3C/script%3E%22%23%3E"]/h3[@class="code-header"]' 'impl Escapealert("Escape");"#>' impl Escapealert("Escape");"#> { pub fn f() {} } diff --git a/tests/rustdoc/double-quote-escape.rs b/tests/rustdoc/double-quote-escape.rs index 350c897417d1f..4f4436377a07b 100644 --- a/tests/rustdoc/double-quote-escape.rs +++ b/tests/rustdoc/double-quote-escape.rs @@ -7,5 +7,5 @@ pub trait Foo { pub struct Bar; // @has foo/struct.Bar.html -// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe%20extern%20%22C%22%20fn()%3E-for-Bar"]' 'Foo' +// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe+extern+%22C%22+fn()%3E-for-Bar"]' 'Foo' impl Foo for Bar {} diff --git a/tests/rustdoc/primitive-tuple-variadic.rs b/tests/rustdoc/primitive-tuple-variadic.rs index db7cfd60c71a6..846028bbb1906 100644 --- a/tests/rustdoc/primitive-tuple-variadic.rs +++ b/tests/rustdoc/primitive-tuple-variadic.rs @@ -6,13 +6,13 @@ pub trait Foo {} // @has foo/trait.Foo.html -// @has - '//section[@id="impl-Foo-for-(T%2C)"]/h3' 'impl Foo for (T₁, T₂, …, Tₙ)' +// @has - '//section[@id="impl-Foo-for-(T,)"]/h3' 'impl Foo for (T₁, T₂, …, Tₙ)' #[doc(fake_variadic)] impl Foo for (T,) {} pub trait Bar {} // @has foo/trait.Bar.html -// @has - '//section[@id="impl-Bar-for-(U%2C)"]/h3' 'impl Bar for (U₁, U₂, …, Uₙ)' +// @has - '//section[@id="impl-Bar-for-(U,)"]/h3' 'impl Bar for (U₁, U₂, …, Uₙ)' #[doc(fake_variadic)] impl Bar for (U,) {} diff --git a/tests/rustdoc/sidebar-links-to-foreign-impl.rs b/tests/rustdoc/sidebar-links-to-foreign-impl.rs index 11e946948026d..caa17dfbb1c73 100644 --- a/tests/rustdoc/sidebar-links-to-foreign-impl.rs +++ b/tests/rustdoc/sidebar-links-to-foreign-impl.rs @@ -7,8 +7,8 @@ // @has - '//h2[@id="foreign-impls"]' 'Implementations on Foreign Types' // @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-u32"]' 'u32' // @has - '//*[@id="impl-Foo-for-u32"]//h3[@class="code-header"]' 'impl Foo for u32' -// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-%26%27a%20str"]' "&'a str" -// @has - '//*[@id="impl-Foo-for-%26%27a%20str"]//h3[@class="code-header"]' "impl<'a> Foo for &'a str" +// @has - "//*[@class=\"sidebar-elems\"]//section//a[@href=\"#impl-Foo-for-%26'a+str\"]" "&'a str" +// @has - "//*[@id=\"impl-Foo-for-%26'a+str\"]//h3[@class=\"code-header\"]" "impl<'a> Foo for &'a str" pub trait Foo {} impl Foo for u32 {} diff --git a/tests/rustdoc/where-clause-order.rs b/tests/rustdoc/where-clause-order.rs index b8502e10a48c4..b10f8f6856e8c 100644 --- a/tests/rustdoc/where-clause-order.rs +++ b/tests/rustdoc/where-clause-order.rs @@ -7,7 +7,7 @@ where } // @has 'foo/trait.SomeTrait.html' -// @has - "//*[@id='impl-SomeTrait%3C(A%2C%20B%2C%20C%2C%20D%2C%20E)%3E-for-(A%2C%20B%2C%20C%2C%20D%2C%20E)']/h3" "impl SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd + PartialEq, B: PartialOrd + PartialEq, C: PartialOrd + PartialEq, D: PartialOrd + PartialEq, E: PartialOrd + PartialEq + ?Sized, " +// @has - "//*[@id='impl-SomeTrait%3C(A,+B,+C,+D,+E)%3E-for-(A,+B,+C,+D,+E)']/h3" "impl SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd + PartialEq, B: PartialOrd + PartialEq, C: PartialOrd + PartialEq, D: PartialOrd + PartialEq, E: PartialOrd + PartialEq + ?Sized, " impl SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E) where A: PartialOrd + PartialEq, From fa6c3a2d2aab19fa95c8612c53ac87ac4c8c64dc Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 3 Feb 2023 19:02:20 -0700 Subject: [PATCH 2/2] docs: update fragment for Result impls --- library/core/src/result.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/library/core/src/result.rs b/library/core/src/result.rs index f00c40f35d584..7596e9cc005e9 100644 --- a/library/core/src/result.rs +++ b/library/core/src/result.rs @@ -458,7 +458,7 @@ //! [`Result`] of a collection of each contained value of the original //! [`Result`] values, or [`Err`] if any of the elements was [`Err`]. //! -//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA%2C%20E%3E%3E-for-Result%3CV%2C%20E%3E +//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA,+E%3E%3E-for-Result%3CV,+E%3E //! //! ``` //! let v = [Ok(2), Ok(4), Err("err!"), Ok(8)]; @@ -474,8 +474,8 @@ //! to provide the [`product`][Iterator::product] and //! [`sum`][Iterator::sum] methods. //! -//! [impl-Product]: Result#impl-Product%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E -//! [impl-Sum]: Result#impl-Sum%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E +//! [impl-Product]: Result#impl-Product%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E +//! [impl-Sum]: Result#impl-Sum%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E //! //! ``` //! let v = [Err("error!"), Ok(1), Ok(2), Ok(3), Err("foo")];