Skip to content

Commit 64f99b4

Browse files
committed
Auto merge of #74627 - petrochenkov:docbeauty2, r=Aaron1011
rustc_ast: Stop using "string typing" for doc comment tokens Explicitly store their kind and style retrieved during lexing in the `token::DocComment`. Also don't "beautify" doc comments before converting them to `#[doc]` attributes when passing them to macros (both declarative and procedural). The trimming of empty lines, lines containing only `*`s, etc is purely a rustdoc's job as a part of its presentation of doc strings to users, rustc must not do this and must pass tokens as precisely as possible internally.
2 parents 1e0e618 + a7eabec commit 64f99b4

File tree

25 files changed

+312
-240
lines changed

25 files changed

+312
-240
lines changed

src/librustc_ast/ast.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pub use GenericArgs::*;
2323
pub use UnsafeSource::*;
2424

2525
use crate::ptr::P;
26-
use crate::token::{self, DelimToken};
26+
use crate::token::{self, CommentKind, DelimToken};
2727
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};
2828

2929
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@@ -2365,7 +2365,7 @@ pub enum AttrKind {
23652365
/// A doc comment (e.g. `/// ...`, `//! ...`, `/** ... */`, `/*! ... */`).
23662366
/// Doc attributes (e.g. `#[doc="..."]`) are represented with the `Normal`
23672367
/// variant (which is much less compact and thus more expensive).
2368-
DocComment(Symbol),
2368+
DocComment(CommentKind, Symbol),
23692369
}
23702370

23712371
/// `TraitRef`s appear in impls.

src/librustc_ast/attr/mod.rs

+15-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::ast::{MacArgs, MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem};
77
use crate::ast::{Path, PathSegment};
88
use crate::mut_visit::visit_clobber;
99
use crate::ptr::P;
10-
use crate::token::{self, Token};
10+
use crate::token::{self, CommentKind, Token};
1111
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndJoint};
1212

1313
use rustc_data_structures::sync::Lock;
@@ -169,7 +169,7 @@ impl Attribute {
169169
pub fn has_name(&self, name: Symbol) -> bool {
170170
match self.kind {
171171
AttrKind::Normal(ref item) => item.path == name,
172-
AttrKind::DocComment(_) => false,
172+
AttrKind::DocComment(..) => false,
173173
}
174174
}
175175

@@ -198,7 +198,7 @@ impl Attribute {
198198
None
199199
}
200200
}
201-
AttrKind::DocComment(_) => None,
201+
AttrKind::DocComment(..) => None,
202202
}
203203
}
204204
pub fn name_or_empty(&self) -> Symbol {
@@ -218,7 +218,7 @@ impl Attribute {
218218
Some(MetaItem { kind: MetaItemKind::List(list), .. }) => Some(list),
219219
_ => None,
220220
},
221-
AttrKind::DocComment(_) => None,
221+
AttrKind::DocComment(..) => None,
222222
}
223223
}
224224

@@ -314,13 +314,13 @@ impl Attribute {
314314
pub fn is_doc_comment(&self) -> bool {
315315
match self.kind {
316316
AttrKind::Normal(_) => false,
317-
AttrKind::DocComment(_) => true,
317+
AttrKind::DocComment(..) => true,
318318
}
319319
}
320320

321321
pub fn doc_str(&self) -> Option<Symbol> {
322322
match self.kind {
323-
AttrKind::DocComment(symbol) => Some(symbol),
323+
AttrKind::DocComment(.., data) => Some(data),
324324
AttrKind::Normal(ref item) if item.path == sym::doc => {
325325
item.meta(self.span).and_then(|meta| meta.value_str())
326326
}
@@ -331,14 +331,14 @@ impl Attribute {
331331
pub fn get_normal_item(&self) -> &AttrItem {
332332
match self.kind {
333333
AttrKind::Normal(ref item) => item,
334-
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
334+
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
335335
}
336336
}
337337

338338
pub fn unwrap_normal_item(self) -> AttrItem {
339339
match self.kind {
340340
AttrKind::Normal(item) => item,
341-
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
341+
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
342342
}
343343
}
344344

@@ -405,8 +405,13 @@ pub fn mk_attr_outer(item: MetaItem) -> Attribute {
405405
mk_attr(AttrStyle::Outer, item.path, item.kind.mac_args(item.span), item.span)
406406
}
407407

408-
pub fn mk_doc_comment(style: AttrStyle, comment: Symbol, span: Span) -> Attribute {
409-
Attribute { kind: AttrKind::DocComment(comment), id: mk_attr_id(), style, span }
408+
pub fn mk_doc_comment(
409+
comment_kind: CommentKind,
410+
style: AttrStyle,
411+
data: Symbol,
412+
span: Span,
413+
) -> Attribute {
414+
Attribute { kind: AttrKind::DocComment(comment_kind, data), id: mk_attr_id(), style, span }
410415
}
411416

412417
pub fn list_contains_name(items: &[NestedMetaItem], name: Symbol) -> bool {

src/librustc_ast/mut_visit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ pub fn noop_visit_attribute<T: MutVisitor>(attr: &mut Attribute, vis: &mut T) {
582582
vis.visit_path(path);
583583
visit_mac_args(args, vis);
584584
}
585-
AttrKind::DocComment(_) => {}
585+
AttrKind::DocComment(..) => {}
586586
}
587587
vis.visit_span(span);
588588
}

src/librustc_ast/token.rs

+10-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ use rustc_span::{self, Span, DUMMY_SP};
1717
use std::borrow::Cow;
1818
use std::{fmt, mem};
1919

20+
#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug, HashStable_Generic)]
21+
pub enum CommentKind {
22+
Line,
23+
Block,
24+
}
25+
2026
#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
2127
#[derive(HashStable_Generic)]
2228
pub enum BinOpToken {
@@ -238,9 +244,10 @@ pub enum TokenKind {
238244

239245
Interpolated(Lrc<Nonterminal>),
240246

241-
// Can be expanded into several tokens.
242-
/// A doc comment.
243-
DocComment(Symbol),
247+
/// A doc comment token.
248+
/// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
249+
/// similarly to symbols in string literal tokens.
250+
DocComment(CommentKind, ast::AttrStyle, Symbol),
244251

245252
// Junk. These carry no data because we don't really care about the data
246253
// they *would* carry, and don't really want to allocate a new ident for

src/librustc_ast/util/comments.rs

+57-63
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
pub use CommentStyle::*;
2-
3-
use crate::ast;
1+
use crate::ast::AttrStyle;
42
use rustc_span::source_map::SourceMap;
53
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
64

7-
use log::debug;
8-
95
#[cfg(test)]
106
mod tests;
117

@@ -28,43 +24,48 @@ pub struct Comment {
2824
pub pos: BytePos,
2925
}
3026

31-
pub fn is_line_doc_comment(s: &str) -> bool {
32-
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
33-
|| s.starts_with("//!");
34-
debug!("is {:?} a doc comment? {}", s, res);
35-
res
36-
}
37-
38-
pub fn is_block_doc_comment(s: &str) -> bool {
39-
// Prevent `/**/` from being parsed as a doc comment
40-
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
41-
|| s.starts_with("/*!"))
42-
&& s.len() >= 5;
43-
debug!("is {:?} a doc comment? {}", s, res);
44-
res
45-
}
46-
47-
// FIXME(#64197): Try to privatize this again.
48-
pub fn is_doc_comment(s: &str) -> bool {
49-
(s.starts_with("///") && is_line_doc_comment(s))
50-
|| s.starts_with("//!")
51-
|| (s.starts_with("/**") && is_block_doc_comment(s))
52-
|| s.starts_with("/*!")
27+
/// For a full line comment string returns its doc comment style if it's a doc comment
28+
/// and returns `None` if it's a regular comment.
29+
pub fn line_doc_comment_style(line_comment: &str) -> Option<AttrStyle> {
30+
let line_comment = line_comment.as_bytes();
31+
assert!(line_comment.starts_with(b"//"));
32+
match line_comment.get(2) {
33+
// `//!` is an inner line doc comment.
34+
Some(b'!') => Some(AttrStyle::Inner),
35+
Some(b'/') => match line_comment.get(3) {
36+
// `////` (more than 3 slashes) is not considered a doc comment.
37+
Some(b'/') => None,
38+
// Otherwise `///` is an outer line doc comment.
39+
_ => Some(AttrStyle::Outer),
40+
},
41+
_ => None,
42+
}
5343
}
5444

55-
pub fn doc_comment_style(comment: Symbol) -> ast::AttrStyle {
56-
let comment = &comment.as_str();
57-
assert!(is_doc_comment(comment));
58-
if comment.starts_with("//!") || comment.starts_with("/*!") {
59-
ast::AttrStyle::Inner
60-
} else {
61-
ast::AttrStyle::Outer
45+
/// For a full block comment string returns its doc comment style if it's a doc comment
46+
/// and returns `None` if it's a regular comment.
47+
pub fn block_doc_comment_style(block_comment: &str, terminated: bool) -> Option<AttrStyle> {
48+
let block_comment = block_comment.as_bytes();
49+
assert!(block_comment.starts_with(b"/*"));
50+
assert!(!terminated || block_comment.ends_with(b"*/"));
51+
match block_comment.get(2) {
52+
// `/*!` is an inner block doc comment.
53+
Some(b'!') => Some(AttrStyle::Inner),
54+
Some(b'*') => match block_comment.get(3) {
55+
// `/***` (more than 2 stars) is not considered a doc comment.
56+
Some(b'*') => None,
57+
// `/**/` is not considered a doc comment.
58+
Some(b'/') if block_comment.len() == 4 => None,
59+
// Otherwise `/**` is an outer block doc comment.
60+
_ => Some(AttrStyle::Outer),
61+
},
62+
_ => None,
6263
}
6364
}
6465

65-
pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
66-
let comment = &comment.as_str();
67-
66+
/// Makes a doc string more presentable to users.
67+
/// Used by rustdoc and perhaps other tools, but not by rustc.
68+
pub fn beautify_doc_string(data: Symbol) -> String {
6869
/// remove whitespace-only lines from the start/end of lines
6970
fn vertical_trim(lines: Vec<String>) -> Vec<String> {
7071
let mut i = 0;
@@ -126,26 +127,15 @@ pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
126127
}
127128
}
128129

129-
// one-line comments lose their prefix
130-
const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
131-
132-
for prefix in ONELINERS {
133-
if comment.starts_with(*prefix) {
134-
return (&comment[prefix.len()..]).to_string();
135-
}
136-
}
137-
138-
if comment.starts_with("/*") {
139-
let lines =
140-
comment[3..comment.len() - 2].lines().map(|s| s.to_string()).collect::<Vec<String>>();
141-
130+
let data = data.as_str();
131+
if data.contains('\n') {
132+
let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
142133
let lines = vertical_trim(lines);
143134
let lines = horizontal_trim(lines);
144-
145-
return lines.join("\n");
135+
lines.join("\n")
136+
} else {
137+
data.to_string()
146138
}
147-
148-
panic!("not a doc-comment: {}", comment);
149139
}
150140

151141
/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
@@ -203,7 +193,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
203193

204194
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
205195
comments.push(Comment {
206-
style: Isolated,
196+
style: CommentStyle::Isolated,
207197
lines: vec![text[..shebang_len].to_string()],
208198
pos: start_bpos,
209199
});
@@ -219,23 +209,23 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
219209
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
220210
idx = idx + 1 + next_newline;
221211
comments.push(Comment {
222-
style: BlankLine,
212+
style: CommentStyle::BlankLine,
223213
lines: vec![],
224214
pos: start_bpos + BytePos((pos + idx) as u32),
225215
});
226216
}
227217
}
228218
}
229-
rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
230-
if !is_block_doc_comment(token_text) {
219+
rustc_lexer::TokenKind::BlockComment { terminated } => {
220+
if block_doc_comment_style(token_text, terminated).is_none() {
231221
let code_to_the_right = match text[pos + token.len..].chars().next() {
232222
Some('\r' | '\n') => false,
233223
_ => true,
234224
};
235225
let style = match (code_to_the_left, code_to_the_right) {
236-
(_, true) => Mixed,
237-
(false, false) => Isolated,
238-
(true, false) => Trailing,
226+
(_, true) => CommentStyle::Mixed,
227+
(false, false) => CommentStyle::Isolated,
228+
(true, false) => CommentStyle::Trailing,
239229
};
240230

241231
// Count the number of chars since the start of the line by rescanning.
@@ -249,9 +239,13 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
249239
}
250240
}
251241
rustc_lexer::TokenKind::LineComment => {
252-
if !is_doc_comment(token_text) {
242+
if line_doc_comment_style(token_text).is_none() {
253243
comments.push(Comment {
254-
style: if code_to_the_left { Trailing } else { Isolated },
244+
style: if code_to_the_left {
245+
CommentStyle::Trailing
246+
} else {
247+
CommentStyle::Isolated
248+
},
255249
lines: vec![token_text.to_string()],
256250
pos: start_bpos + BytePos(pos as u32),
257251
})
+19-27
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,50 @@
11
use super::*;
22
use crate::with_default_session_globals;
33

4+
#[test]
5+
fn line_doc_comments() {
6+
assert!(line_doc_comment_style("///").is_some());
7+
assert!(line_doc_comment_style("/// blah").is_some());
8+
assert!(line_doc_comment_style("////").is_none());
9+
}
10+
411
#[test]
512
fn test_block_doc_comment_1() {
613
with_default_session_globals(|| {
7-
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
8-
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
14+
let comment = "\n * Test \n ** Test\n * Test\n";
15+
let stripped = beautify_doc_string(Symbol::intern(comment));
916
assert_eq!(stripped, " Test \n* Test\n Test");
1017
})
1118
}
1219

1320
#[test]
1421
fn test_block_doc_comment_2() {
1522
with_default_session_globals(|| {
16-
let comment = "/**\n * Test\n * Test\n*/";
17-
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
23+
let comment = "\n * Test\n * Test\n";
24+
let stripped = beautify_doc_string(Symbol::intern(comment));
1825
assert_eq!(stripped, " Test\n Test");
1926
})
2027
}
2128

2229
#[test]
2330
fn test_block_doc_comment_3() {
2431
with_default_session_globals(|| {
25-
let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
26-
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
32+
let comment = "\n let a: *i32;\n *a = 5;\n";
33+
let stripped = beautify_doc_string(Symbol::intern(comment));
2734
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
2835
})
2936
}
3037

31-
#[test]
32-
fn test_block_doc_comment_4() {
33-
with_default_session_globals(|| {
34-
let comment = "/*******************\n test\n *********************/";
35-
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
36-
assert_eq!(stripped, " test");
37-
})
38-
}
39-
4038
#[test]
4139
fn test_line_doc_comment() {
4240
with_default_session_globals(|| {
43-
let stripped = strip_doc_comment_decoration(Symbol::intern("/// test"));
44-
assert_eq!(stripped, " test");
45-
let stripped = strip_doc_comment_decoration(Symbol::intern("///! test"));
46-
assert_eq!(stripped, " test");
47-
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
41+
let stripped = beautify_doc_string(Symbol::intern(" test"));
4842
assert_eq!(stripped, " test");
49-
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
50-
assert_eq!(stripped, " test");
51-
let stripped = strip_doc_comment_decoration(Symbol::intern("///test"));
52-
assert_eq!(stripped, "test");
53-
let stripped = strip_doc_comment_decoration(Symbol::intern("///!test"));
54-
assert_eq!(stripped, "test");
55-
let stripped = strip_doc_comment_decoration(Symbol::intern("//test"));
43+
let stripped = beautify_doc_string(Symbol::intern("! test"));
44+
assert_eq!(stripped, "! test");
45+
let stripped = beautify_doc_string(Symbol::intern("test"));
5646
assert_eq!(stripped, "test");
47+
let stripped = beautify_doc_string(Symbol::intern("!test"));
48+
assert_eq!(stripped, "!test");
5749
})
5850
}

0 commit comments

Comments
 (0)