@@ -432,12 +432,18 @@ impl CodeBlockAttribute {
432
432
433
433
/// Block that is formatted as an item.
434
434
///
435
- /// An item starts with either a star `*` a dash `-` a greater-than `>` or a plus '+'.
435
+ /// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
436
+ /// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
437
+ /// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
438
+ /// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
439
+ /// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
440
+ /// details.
441
+ ///
436
442
/// Different level of indentation are handled by shrinking the shape accordingly.
437
443
struct ItemizedBlock {
438
444
/// the lines that are identified as part of an itemized block
439
445
lines : Vec < String > ,
440
- /// the number of characters (typically whitespaces) up to the item sigil
446
+ /// the number of characters (typically whitespaces) up to the item marker
441
447
indent : usize ,
442
448
/// the string that marks the start of an item
443
449
opener : String ,
@@ -446,37 +452,70 @@ struct ItemizedBlock {
446
452
}
447
453
448
454
impl ItemizedBlock {
449
- /// Returns `true` if the line is formatted as an item
450
- fn is_itemized_line ( line : & str ) -> bool {
451
- let trimmed = line. trim_start ( ) ;
455
+ /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
456
+ /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
457
+ /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
458
+ /// has the length of 2.
459
+ ///
460
+ /// This function recognizes item markers that correspond to CommonMark's
461
+ /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
462
+ /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
463
+ /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
464
+ ///
465
+ /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
466
+ /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
467
+ /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
468
+ /// arbitrary numbers as markers. See also
469
+ /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
470
+ /// following example where a number (i.e. "1868") doesn't signify an ordered list:
471
+ /// ```md
472
+ /// The Captain died in
473
+ /// 1868. He wes buried in...
474
+ /// ```
475
+ fn get_marker_length ( trimmed : & str ) -> Option < usize > {
476
+ // https://spec.commonmark.org/0.30/#bullet-list-marker or
477
+ // https://spec.commonmark.org/0.30/#block-quote-marker
452
478
let itemized_start = [ "* " , "- " , "> " , "+ " ] ;
453
- itemized_start. iter ( ) . any ( |s| trimmed. starts_with ( s) )
479
+ if itemized_start. iter ( ) . any ( |s| trimmed. starts_with ( s) ) {
480
+ return Some ( 2 ) ; // All items in `itemized_start` have length 2.
481
+ }
482
+
483
+ // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
484
+ // allowed.
485
+ for suffix in [ ". " , ") " ] {
486
+ if let Some ( ( prefix, _) ) = trimmed. split_once ( suffix) {
487
+ if prefix. len ( ) <= 2 && prefix. chars ( ) . all ( |c| char:: is_ascii_digit ( & c) ) {
488
+ return Some ( prefix. len ( ) + suffix. len ( ) ) ;
489
+ }
490
+ }
491
+ }
492
+
493
+ None // No markers found.
454
494
}
455
495
456
- /// Creates a new ItemizedBlock described with the given line.
457
- /// The `is_itemized_line` needs to be called first.
458
- fn new ( line : & str ) -> ItemizedBlock {
459
- let space_to_sigil = line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( ) ;
460
- // +2 = '* ', which will add the appropriate amount of whitespace to keep itemized
461
- // content formatted correctly.
462
- let mut indent = space_to_sigil + 2 ;
496
+ /// Creates a new `ItemizedBlock` described with the given `line`.
497
+ /// Returns `None` if `line` doesn't start an item.
498
+ fn new ( line : & str ) -> Option < ItemizedBlock > {
499
+ let marker_length = ItemizedBlock :: get_marker_length ( line. trim_start ( ) ) ?;
500
+ let space_to_marker = line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( ) ;
501
+ let mut indent = space_to_marker + marker_length;
463
502
let mut line_start = " " . repeat ( indent) ;
464
503
465
504
// Markdown blockquote start with a "> "
466
505
if line. trim_start ( ) . starts_with ( ">" ) {
467
506
// remove the original +2 indent because there might be multiple nested block quotes
468
507
// and it's easier to reason about the final indent by just taking the length
469
- // of th new line_start. We update the indent because it effects the max width
508
+ // of the new line_start. We update the indent because it effects the max width
470
509
// of each formatted line.
471
510
line_start = itemized_block_quote_start ( line, line_start, 2 ) ;
472
511
indent = line_start. len ( ) ;
473
512
}
474
- ItemizedBlock {
513
+ Some ( ItemizedBlock {
475
514
lines : vec ! [ line[ indent..] . to_string( ) ] ,
476
515
indent,
477
516
opener : line[ ..indent] . to_string ( ) ,
478
517
line_start,
479
- }
518
+ } )
480
519
}
481
520
482
521
/// Returns a `StringFormat` used for formatting the content of an item.
@@ -495,7 +534,7 @@ impl ItemizedBlock {
495
534
/// Returns `true` if the line is part of the current itemized block.
496
535
/// If it is, then it is added to the internal lines list.
497
536
fn add_line ( & mut self , line : & str ) -> bool {
498
- if ! ItemizedBlock :: is_itemized_line ( line)
537
+ if ItemizedBlock :: get_marker_length ( line. trim_start ( ) ) . is_none ( )
499
538
&& self . indent <= line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( )
500
539
{
501
540
self . lines . push ( line. to_string ( ) ) ;
@@ -766,10 +805,11 @@ impl<'a> CommentRewrite<'a> {
766
805
self . item_block = None ;
767
806
if let Some ( stripped) = line. strip_prefix ( "```" ) {
768
807
self . code_block_attr = Some ( CodeBlockAttribute :: new ( stripped) )
769
- } else if self . fmt . config . wrap_comments ( ) && ItemizedBlock :: is_itemized_line ( line) {
770
- let ib = ItemizedBlock :: new ( line) ;
771
- self . item_block = Some ( ib) ;
772
- return false ;
808
+ } else if self . fmt . config . wrap_comments ( ) {
809
+ if let Some ( ib) = ItemizedBlock :: new ( line) {
810
+ self . item_block = Some ( ib) ;
811
+ return false ;
812
+ }
773
813
}
774
814
775
815
if self . result == self . opener {
@@ -2020,4 +2060,96 @@ fn main() {
2020
2060
"# ;
2021
2061
assert_eq ! ( s, filter_normal_code( s_with_comment) ) ;
2022
2062
}
2063
+
2064
+ #[ test]
2065
+ fn test_itemized_block_first_line_handling ( ) {
2066
+ fn run_test (
2067
+ test_input : & str ,
2068
+ expected_line : & str ,
2069
+ expected_indent : usize ,
2070
+ expected_opener : & str ,
2071
+ expected_line_start : & str ,
2072
+ ) {
2073
+ let block = ItemizedBlock :: new ( test_input) . unwrap ( ) ;
2074
+ assert_eq ! ( 1 , block. lines. len( ) , "test_input: {:?}" , test_input) ;
2075
+ assert_eq ! (
2076
+ expected_line, & block. lines[ 0 ] ,
2077
+ "test_input: {:?}" ,
2078
+ test_input
2079
+ ) ;
2080
+ assert_eq ! (
2081
+ expected_indent, block. indent,
2082
+ "test_input: {:?}" ,
2083
+ test_input
2084
+ ) ;
2085
+ assert_eq ! (
2086
+ expected_opener, & block. opener,
2087
+ "test_input: {:?}" ,
2088
+ test_input
2089
+ ) ;
2090
+ assert_eq ! (
2091
+ expected_line_start, & block. line_start,
2092
+ "test_input: {:?}" ,
2093
+ test_input
2094
+ ) ;
2095
+ }
2096
+
2097
+ run_test ( "- foo" , "foo" , 2 , "- " , " " ) ;
2098
+ run_test ( "* foo" , "foo" , 2 , "* " , " " ) ;
2099
+ run_test ( "> foo" , "foo" , 2 , "> " , "> " ) ;
2100
+
2101
+ run_test ( "1. foo" , "foo" , 3 , "1. " , " " ) ;
2102
+ run_test ( "12. foo" , "foo" , 4 , "12. " , " " ) ;
2103
+ run_test ( "1) foo" , "foo" , 3 , "1) " , " " ) ;
2104
+ run_test ( "12) foo" , "foo" , 4 , "12) " , " " ) ;
2105
+
2106
+ run_test ( " - foo" , "foo" , 6 , " - " , " " ) ;
2107
+
2108
+ // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2109
+ run_test ( "0. foo" , "foo" , 3 , "0. " , " " ) ;
2110
+ run_test ( "01. foo" , "foo" , 4 , "01. " , " " ) ;
2111
+ }
2112
+
2113
+ #[ test]
2114
+ fn test_itemized_block_nonobvious_markers_are_rejected ( ) {
2115
+ let test_inputs = vec ! [
2116
+ // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2117
+ // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2118
+ // them would risk misidentifying regular words as item markers. See also the
2119
+ // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2120
+ "word. rest of the paragraph." ,
2121
+ "a. maybe this is a list item? maybe not?" ,
2122
+ "iv. maybe this is a list item? maybe not?" ,
2123
+ // Numbers with 3 or more digits are not recognized as item markers, to avoid
2124
+ // formatting the following example as a list:
2125
+ //
2126
+ // ```
2127
+ // The Captain died in
2128
+ // 1868. He was buried in...
2129
+ // ```
2130
+ "123. only 2-digit numbers are recognized as item markers." ,
2131
+ // Parens:
2132
+ "123) giving some coverage to parens as well." ,
2133
+ "a) giving some coverage to parens as well." ,
2134
+ // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2135
+ // between the list marker and any following content":
2136
+ "1.Not a list item." ,
2137
+ "1.2.3. Not a list item." ,
2138
+ "1)Not a list item." ,
2139
+ "-Not a list item." ,
2140
+ "+Not a list item." ,
2141
+ "+1 not a list item." ,
2142
+ // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2143
+ "-1. Not a list item." ,
2144
+ "-1 Not a list item." ,
2145
+ ] ;
2146
+ for line in test_inputs. iter ( ) {
2147
+ let maybe_block = ItemizedBlock :: new ( line) ;
2148
+ assert ! (
2149
+ maybe_block. is_none( ) ,
2150
+ "The following line shouldn't be classified as a list item: {}" ,
2151
+ line
2152
+ ) ;
2153
+ }
2154
+ }
2023
2155
}
0 commit comments