Skip to content

Commit db64c83

Browse files
author
Lukasz Anforowicz
committed
Handling of numbered markdown lists.
Fixes issue rust-lang#5416
1 parent ef91154 commit db64c83

File tree

5 files changed

+288
-38
lines changed

5 files changed

+288
-38
lines changed

src/comment.rs

+122-34
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,9 @@ impl CodeBlockAttribute {
432432

433433
/// Block that is formatted as an item.
434434
///
435-
/// An item starts with either a star `*` a dash `-` or a greater-than `>`.
435+
/// An item starts with either a star `*`, a dash `-`, a greater-than `>`,
436+
/// or a number `12.` or `34)` (with at most 2 digits).
437+
///
436438
/// Different level of indentation are handled by shrinking the shape accordingly.
437439
struct ItemizedBlock {
438440
/// the lines that are identified as part of an itemized block
@@ -446,36 +448,47 @@ struct ItemizedBlock {
446448
}
447449

448450
impl ItemizedBlock {
449-
/// Returns `true` if the line is formatted as an item
450-
fn is_itemized_line(line: &str) -> bool {
451-
let trimmed = line.trim_start();
452-
trimmed.starts_with("* ") || trimmed.starts_with("- ") || trimmed.starts_with("> ")
453-
}
454-
455-
/// Creates a new ItemizedBlock described with the given line.
456-
/// The `is_itemized_line` needs to be called first.
457-
fn new(line: &str) -> ItemizedBlock {
458-
let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
459-
// +2 = '* ', which will add the appropriate amount of whitespace to keep itemized
460-
// content formatted correctly.
461-
let mut indent = space_to_sigil + 2;
462-
let mut line_start = " ".repeat(indent);
463-
464-
// Markdown blockquote start with a "> "
465-
if line.trim_start().starts_with(">") {
466-
// remove the original +2 indent because there might be multiple nested block quotes
467-
// and it's easier to reason about the final indent by just taking the length
468-
// of th new line_start. We update the indent because it effects the max width
469-
// of each formatted line.
470-
line_start = itemized_block_quote_start(line, line_start, 2);
471-
indent = line_start.len();
451+
/// Returns the sigil's (e.g. "- ", "* ", or "1. ") length or None if there is no sigil.
452+
fn get_sigil_length(trimmed: &str) -> Option<usize> {
453+
if trimmed.starts_with("* ") || trimmed.starts_with("- ") || trimmed.starts_with("> ") {
454+
return Some(2);
472455
}
473-
ItemizedBlock {
474-
lines: vec![line[indent..].to_string()],
475-
indent,
476-
opener: line[..indent].to_string(),
477-
line_start,
456+
457+
for suffix in [". ", ") "] {
458+
if let Some((prefix, _)) = trimmed.split_once(suffix) {
459+
if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
460+
return Some(prefix.len() + suffix.len());
461+
}
462+
}
478463
}
464+
465+
None
466+
}
467+
468+
/// Creates a new ItemizedBlock described with the given `line`
469+
/// or None if `line` doesn't start an item.
470+
fn new(line: &str) -> Option<ItemizedBlock> {
471+
ItemizedBlock::get_sigil_length(line.trim_start()).map(|sigil_length| {
472+
let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
473+
let mut indent = space_to_sigil + sigil_length;
474+
let mut line_start = " ".repeat(indent);
475+
476+
// Markdown blockquote start with a "> "
477+
if line.trim_start().starts_with(">") {
478+
// remove the original +2 indent because there might be multiple nested block quotes
479+
// and it's easier to reason about the final indent by just taking the length
480+
// of the new line_start. We update the indent because it effects the max width
481+
// of each formatted line.
482+
line_start = itemized_block_quote_start(line, line_start, 2);
483+
indent = line_start.len();
484+
}
485+
ItemizedBlock {
486+
lines: vec![line[indent..].to_string()],
487+
indent,
488+
opener: line[..indent].to_string(),
489+
line_start,
490+
}
491+
})
479492
}
480493

481494
/// Returns a `StringFormat` used for formatting the content of an item.
@@ -494,7 +507,7 @@ impl ItemizedBlock {
494507
/// Returns `true` if the line is part of the current itemized block.
495508
/// If it is, then it is added to the internal lines list.
496509
fn add_line(&mut self, line: &str) -> bool {
497-
if !ItemizedBlock::is_itemized_line(line)
510+
if ItemizedBlock::get_sigil_length(line.trim_start()).is_none()
498511
&& self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
499512
{
500513
self.lines.push(line.to_string());
@@ -765,10 +778,11 @@ impl<'a> CommentRewrite<'a> {
765778
self.item_block = None;
766779
if let Some(stripped) = line.strip_prefix("```") {
767780
self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
768-
} else if self.fmt.config.wrap_comments() && ItemizedBlock::is_itemized_line(line) {
769-
let ib = ItemizedBlock::new(line);
770-
self.item_block = Some(ib);
771-
return false;
781+
} else if self.fmt.config.wrap_comments() {
782+
if let Some(ib) = ItemizedBlock::new(line) {
783+
self.item_block = Some(ib);
784+
return false;
785+
}
772786
}
773787

774788
if self.result == self.opener {
@@ -2004,4 +2018,78 @@ fn main() {
20042018
"#;
20052019
assert_eq!(s, filter_normal_code(s_with_comment));
20062020
}
2021+
2022+
#[test]
2023+
fn test_itemized_block_first_line_handling() {
2024+
fn run_test(
2025+
test_input: &str,
2026+
expected_line: &str,
2027+
expected_indent: usize,
2028+
expected_opener: &str,
2029+
expected_line_start: &str,
2030+
) {
2031+
let block = ItemizedBlock::new(test_input).unwrap();
2032+
assert_eq!(1, block.lines.len(), "test_input: {:?}", test_input);
2033+
assert_eq!(
2034+
expected_line, &block.lines[0],
2035+
"test_input: {:?}",
2036+
test_input
2037+
);
2038+
assert_eq!(
2039+
expected_indent, block.indent,
2040+
"test_input: {:?}",
2041+
test_input
2042+
);
2043+
assert_eq!(
2044+
expected_opener, &block.opener,
2045+
"test_input: {:?}",
2046+
test_input
2047+
);
2048+
assert_eq!(
2049+
expected_line_start, &block.line_start,
2050+
"test_input: {:?}",
2051+
test_input
2052+
);
2053+
}
2054+
2055+
run_test("- foo", "foo", 2, "- ", " ");
2056+
run_test("* foo", "foo", 2, "* ", " ");
2057+
run_test("> foo", "foo", 2, "> ", "> ");
2058+
2059+
run_test("1. foo", "foo", 3, "1. ", " ");
2060+
run_test("12. foo", "foo", 4, "12. ", " ");
2061+
2062+
run_test(" - foo", "foo", 6, " - ", " ");
2063+
}
2064+
2065+
#[test]
2066+
fn test_itemized_block_nonobvious_sigils_are_rejected() {
2067+
let test_inputs = vec![
2068+
// Non-numeric sigils (e.g. `a.` or `iv.`) are not supported, because of a risk of
2069+
// misidentifying regular words as sigils. See also the discussion in
2070+
// https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2071+
"word. rest of the paragraph.",
2072+
"a. maybe this is a list item? maybe not?",
2073+
"iv. maybe this is a list item? maybe not?",
2074+
// Numbers with 3 or more digits are not recognized as sigils, to avoid
2075+
// formatting the following example as a list:
2076+
//
2077+
// ```
2078+
// The Captain died in
2079+
// 1868. He was buried in...
2080+
// ```
2081+
"123. only 2-digit numbers are recognized as sigils.",
2082+
// Parens.
2083+
"123) giving some coverage to parens as well.",
2084+
"a) giving some coverage to parens as well.",
2085+
];
2086+
for line in test_inputs.iter() {
2087+
let maybe_block = ItemizedBlock::new(line);
2088+
assert!(
2089+
maybe_block.is_none(),
2090+
"The following line shouldn't be classified as a list item: {}",
2091+
line
2092+
);
2093+
}
2094+
}
20072095
}

tests/source/itemized-blocks/no_wrap.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// rustfmt-normalize_comments: true
22
// rustfmt-format_code_in_doc_comments: true
33

4-
//! This is a list:
4+
//! This is an itemized markdown list (see also issue #3224):
55
//! * Outer
66
//! * Outer
77
//! * Inner
@@ -13,6 +13,40 @@
1313
//! - when the log level is info, the level name is green and the rest of the line is white
1414
//! - when the log level is debug, the whole line is white
1515
//! - when the log level is trace, the whole line is gray ("bright black")
16+
//!
17+
//! This is a numbered markdown list (see also issue #5416):
18+
//! 1. Long long long long long long long long long long long long long long long long long line
19+
//! 2. Another very long long long long long long long long long long long long long long long line
20+
//! 3. Nested list
21+
//! 1. Long long long long long long long long long long long long long long long long line
22+
//! 2. Another very long long long long long long long long long long long long long long line
23+
//! 4. Last item
24+
//!
25+
//! Using the ')' instead of '.' character after the number:
26+
//! 1) Long long long long long long long long long long long long long long long long long line
27+
//! 2) Another very long long long long long long long long long long long long long long long line
28+
//!
29+
//! Deep list that mixes various bullet and number formats:
30+
//! 1. First level with a long long long long long long long long long long long long long long
31+
//! long long long line
32+
//! 2. First level with another very long long long long long long long long long long long long
33+
//! long long long line
34+
//! * Second level with a long long long long long long long long long long long long long
35+
//! long long long line
36+
//! * Second level with another very long long long long long long long long long long long
37+
//! long long long line
38+
//! 1) Third level with a long long long long long long long long long long long long long
39+
//! long long long line
40+
//! 2) Third level with another very long long long long long long long long long long
41+
//! long long long long line
42+
//! - Forth level with a long long long long long long long long long long long long
43+
//! long long long long line
44+
//! - Forth level with another very long long long long long long long long long long
45+
//! long long long long line
46+
//! 3) One more item at the third level
47+
//! 4) Last item of the third level
48+
//! * Last item of second level
49+
//! 3. Last item of first level
1650
1751
/// All the parameters ***except for `from_theater`*** should be inserted as sent by the remote
1852
/// theater, i.e., as passed to [`Theater::send`] on the remote actor:

tests/source/itemized-blocks/wrap.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// rustfmt-format_code_in_doc_comments: true
33
// rustfmt-max_width: 50
44

5-
//! This is a list:
5+
//! This is an itemized markdown list (see also issue #3224):
66
//! * Outer
77
//! * Outer
88
//! * Inner
@@ -14,6 +14,40 @@
1414
//! - when the log level is info, the level name is green and the rest of the line is white
1515
//! - when the log level is debug, the whole line is white
1616
//! - when the log level is trace, the whole line is gray ("bright black")
17+
//!
18+
//! This is a numbered markdown list (see also issue #5416):
19+
//! 1. Long long long long long long long long long long long long long long long long long line
20+
//! 2. Another very long long long long long long long long long long long long long long long line
21+
//! 3. Nested list
22+
//! 1. Long long long long long long long long long long long long long long long long line
23+
//! 2. Another very long long long long long long long long long long long long long long line
24+
//! 4. Last item
25+
//!
26+
//! Using the ')' instead of '.' character after the number:
27+
//! 1) Long long long long long long long long long long long long long long long long long line
28+
//! 2) Another very long long long long long long long long long long long long long long long line
29+
//!
30+
//! Deep list that mixes various bullet and number formats:
31+
//! 1. First level with a long long long long long long long long long long long long long long
32+
//! long long long line
33+
//! 2. First level with another very long long long long long long long long long long long long
34+
//! long long long line
35+
//! * Second level with a long long long long long long long long long long long long long
36+
//! long long long line
37+
//! * Second level with another very long long long long long long long long long long long
38+
//! long long long line
39+
//! 1) Third level with a long long long long long long long long long long long long long
40+
//! long long long line
41+
//! 2) Third level with another very long long long long long long long long long long
42+
//! long long long long line
43+
//! - Forth level with a long long long long long long long long long long long long
44+
//! long long long long line
45+
//! - Forth level with another very long long long long long long long long long long
46+
//! long long long long line
47+
//! 3) One more item at the third level
48+
//! 4) Last item of the third level
49+
//! * Last item of second level
50+
//! 3. Last item of first level
1751
1852
// This example shows how to configure fern to output really nicely colored logs
1953
// - when the log level is error, the whole line is red

tests/target/itemized-blocks/no_wrap.rs

+35-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// rustfmt-normalize_comments: true
22
// rustfmt-format_code_in_doc_comments: true
33

4-
//! This is a list:
4+
//! This is an itemized markdown list (see also issue #3224):
55
//! * Outer
66
//! * Outer
77
//! * Inner
@@ -13,6 +13,40 @@
1313
//! - when the log level is info, the level name is green and the rest of the line is white
1414
//! - when the log level is debug, the whole line is white
1515
//! - when the log level is trace, the whole line is gray ("bright black")
16+
//!
17+
//! This is a numbered markdown list (see also issue #5416):
18+
//! 1. Long long long long long long long long long long long long long long long long long line
19+
//! 2. Another very long long long long long long long long long long long long long long long line
20+
//! 3. Nested list
21+
//! 1. Long long long long long long long long long long long long long long long long line
22+
//! 2. Another very long long long long long long long long long long long long long long line
23+
//! 4. Last item
24+
//!
25+
//! Using the ')' instead of '.' character after the number:
26+
//! 1) Long long long long long long long long long long long long long long long long long line
27+
//! 2) Another very long long long long long long long long long long long long long long long line
28+
//!
29+
//! Deep list that mixes various bullet and number formats:
30+
//! 1. First level with a long long long long long long long long long long long long long long
31+
//! long long long line
32+
//! 2. First level with another very long long long long long long long long long long long long
33+
//! long long long line
34+
//! * Second level with a long long long long long long long long long long long long long
35+
//! long long long line
36+
//! * Second level with another very long long long long long long long long long long long
37+
//! long long long line
38+
//! 1) Third level with a long long long long long long long long long long long long long
39+
//! long long long line
40+
//! 2) Third level with another very long long long long long long long long long long
41+
//! long long long long line
42+
//! - Forth level with a long long long long long long long long long long long long
43+
//! long long long long line
44+
//! - Forth level with another very long long long long long long long long long long
45+
//! long long long long line
46+
//! 3) One more item at the third level
47+
//! 4) Last item of the third level
48+
//! * Last item of second level
49+
//! 3. Last item of first level
1650
1751
/// All the parameters ***except for `from_theater`*** should be inserted as sent by the remote
1852
/// theater, i.e., as passed to [`Theater::send`] on the remote actor:

0 commit comments

Comments
 (0)