@@ -432,7 +432,9 @@ impl CodeBlockAttribute {
432
432
433
433
/// Block that is formatted as an item.
434
434
///
435
- /// An item starts with either a star `*` a dash `-` or a greater-than `>`.
435
+ /// An item starts with either a star `*`, a dash `-`, a greater-than `>`,
436
+ /// or a number `12.` or `34)` (with at most 2 digits).
437
+ ///
436
438
/// Different level of indentation are handled by shrinking the shape accordingly.
437
439
struct ItemizedBlock {
438
440
/// the lines that are identified as part of an itemized block
@@ -446,36 +448,47 @@ struct ItemizedBlock {
446
448
}
447
449
448
450
impl ItemizedBlock {
449
- /// Returns `true` if the line is formatted as an item
450
- fn is_itemized_line ( line : & str ) -> bool {
451
- let trimmed = line. trim_start ( ) ;
452
- trimmed. starts_with ( "* " ) || trimmed. starts_with ( "- " ) || trimmed. starts_with ( "> " )
453
- }
454
-
455
- /// Creates a new ItemizedBlock described with the given line.
456
- /// The `is_itemized_line` needs to be called first.
457
- fn new ( line : & str ) -> ItemizedBlock {
458
- let space_to_sigil = line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( ) ;
459
- // +2 = '* ', which will add the appropriate amount of whitespace to keep itemized
460
- // content formatted correctly.
461
- let mut indent = space_to_sigil + 2 ;
462
- let mut line_start = " " . repeat ( indent) ;
463
-
464
- // Markdown blockquote start with a "> "
465
- if line. trim_start ( ) . starts_with ( ">" ) {
466
- // remove the original +2 indent because there might be multiple nested block quotes
467
- // and it's easier to reason about the final indent by just taking the length
468
- // of th new line_start. We update the indent because it effects the max width
469
- // of each formatted line.
470
- line_start = itemized_block_quote_start ( line, line_start, 2 ) ;
471
- indent = line_start. len ( ) ;
451
+ /// Returns the sigil's (e.g. "- ", "* ", or "1. ") length or None if there is no sigil.
452
+ fn get_sigil_length ( trimmed : & str ) -> Option < usize > {
453
+ if trimmed. starts_with ( "* " ) || trimmed. starts_with ( "- " ) || trimmed. starts_with ( "> " ) {
454
+ return Some ( 2 ) ;
472
455
}
473
- ItemizedBlock {
474
- lines : vec ! [ line[ indent..] . to_string( ) ] ,
475
- indent,
476
- opener : line[ ..indent] . to_string ( ) ,
477
- line_start,
456
+
457
+ for suffix in [ ". " , ") " ] {
458
+ if let Some ( ( prefix, _) ) = trimmed. split_once ( suffix) {
459
+ if prefix. len ( ) <= 2 && prefix. chars ( ) . all ( |c| char:: is_ascii_digit ( & c) ) {
460
+ return Some ( prefix. len ( ) + suffix. len ( ) ) ;
461
+ }
462
+ }
478
463
}
464
+
465
+ None
466
+ }
467
+
468
+ /// Creates a new ItemizedBlock described with the given `line`
469
+ /// or None if `line` doesn't start an item.
470
+ fn new ( line : & str ) -> Option < ItemizedBlock > {
471
+ ItemizedBlock :: get_sigil_length ( line. trim_start ( ) ) . map ( |sigil_length| {
472
+ let space_to_sigil = line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( ) ;
473
+ let mut indent = space_to_sigil + sigil_length;
474
+ let mut line_start = " " . repeat ( indent) ;
475
+
476
+ // Markdown blockquote start with a "> "
477
+ if line. trim_start ( ) . starts_with ( ">" ) {
478
+ // remove the original +2 indent because there might be multiple nested block quotes
479
+ // and it's easier to reason about the final indent by just taking the length
480
+ // of the new line_start. We update the indent because it effects the max width
481
+ // of each formatted line.
482
+ line_start = itemized_block_quote_start ( line, line_start, 2 ) ;
483
+ indent = line_start. len ( ) ;
484
+ }
485
+ ItemizedBlock {
486
+ lines : vec ! [ line[ indent..] . to_string( ) ] ,
487
+ indent,
488
+ opener : line[ ..indent] . to_string ( ) ,
489
+ line_start,
490
+ }
491
+ } )
479
492
}
480
493
481
494
/// Returns a `StringFormat` used for formatting the content of an item.
@@ -494,7 +507,7 @@ impl ItemizedBlock {
494
507
/// Returns `true` if the line is part of the current itemized block.
495
508
/// If it is, then it is added to the internal lines list.
496
509
fn add_line ( & mut self , line : & str ) -> bool {
497
- if ! ItemizedBlock :: is_itemized_line ( line)
510
+ if ItemizedBlock :: get_sigil_length ( line. trim_start ( ) ) . is_none ( )
498
511
&& self . indent <= line. chars ( ) . take_while ( |c| c. is_whitespace ( ) ) . count ( )
499
512
{
500
513
self . lines . push ( line. to_string ( ) ) ;
@@ -765,10 +778,11 @@ impl<'a> CommentRewrite<'a> {
765
778
self . item_block = None ;
766
779
if let Some ( stripped) = line. strip_prefix ( "```" ) {
767
780
self . code_block_attr = Some ( CodeBlockAttribute :: new ( stripped) )
768
- } else if self . fmt . config . wrap_comments ( ) && ItemizedBlock :: is_itemized_line ( line) {
769
- let ib = ItemizedBlock :: new ( line) ;
770
- self . item_block = Some ( ib) ;
771
- return false ;
781
+ } else if self . fmt . config . wrap_comments ( ) {
782
+ if let Some ( ib) = ItemizedBlock :: new ( line) {
783
+ self . item_block = Some ( ib) ;
784
+ return false ;
785
+ }
772
786
}
773
787
774
788
if self . result == self . opener {
@@ -2004,4 +2018,78 @@ fn main() {
2004
2018
"# ;
2005
2019
assert_eq ! ( s, filter_normal_code( s_with_comment) ) ;
2006
2020
}
2021
+
2022
+ #[ test]
2023
+ fn test_itemized_block_first_line_handling ( ) {
2024
+ fn run_test (
2025
+ test_input : & str ,
2026
+ expected_line : & str ,
2027
+ expected_indent : usize ,
2028
+ expected_opener : & str ,
2029
+ expected_line_start : & str ,
2030
+ ) {
2031
+ let block = ItemizedBlock :: new ( test_input) . unwrap ( ) ;
2032
+ assert_eq ! ( 1 , block. lines. len( ) , "test_input: {:?}" , test_input) ;
2033
+ assert_eq ! (
2034
+ expected_line, & block. lines[ 0 ] ,
2035
+ "test_input: {:?}" ,
2036
+ test_input
2037
+ ) ;
2038
+ assert_eq ! (
2039
+ expected_indent, block. indent,
2040
+ "test_input: {:?}" ,
2041
+ test_input
2042
+ ) ;
2043
+ assert_eq ! (
2044
+ expected_opener, & block. opener,
2045
+ "test_input: {:?}" ,
2046
+ test_input
2047
+ ) ;
2048
+ assert_eq ! (
2049
+ expected_line_start, & block. line_start,
2050
+ "test_input: {:?}" ,
2051
+ test_input
2052
+ ) ;
2053
+ }
2054
+
2055
+ run_test ( "- foo" , "foo" , 2 , "- " , " " ) ;
2056
+ run_test ( "* foo" , "foo" , 2 , "* " , " " ) ;
2057
+ run_test ( "> foo" , "foo" , 2 , "> " , "> " ) ;
2058
+
2059
+ run_test ( "1. foo" , "foo" , 3 , "1. " , " " ) ;
2060
+ run_test ( "12. foo" , "foo" , 4 , "12. " , " " ) ;
2061
+
2062
+ run_test ( " - foo" , "foo" , 6 , " - " , " " ) ;
2063
+ }
2064
+
2065
+ #[ test]
2066
+ fn test_itemized_block_nonobvious_sigils_are_rejected ( ) {
2067
+ let test_inputs = vec ! [
2068
+ // Non-numeric sigils (e.g. `a.` or `iv.`) are not supported, because of a risk of
2069
+ // misidentifying regular words as sigils. See also the discussion in
2070
+ // https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2071
+ "word. rest of the paragraph." ,
2072
+ "a. maybe this is a list item? maybe not?" ,
2073
+ "iv. maybe this is a list item? maybe not?" ,
2074
+ // Numbers with 3 or more digits are not recognized as sigils, to avoid
2075
+ // formatting the following example as a list:
2076
+ //
2077
+ // ```
2078
+ // The Captain died in
2079
+ // 1868. He was buried in...
2080
+ // ```
2081
+ "123. only 2-digit numbers are recognized as sigils." ,
2082
+ // Parens.
2083
+ "123) giving some coverage to parens as well." ,
2084
+ "a) giving some coverage to parens as well." ,
2085
+ ] ;
2086
+ for line in test_inputs. iter ( ) {
2087
+ let maybe_block = ItemizedBlock :: new ( line) ;
2088
+ assert ! (
2089
+ maybe_block. is_none( ) ,
2090
+ "The following line shouldn't be classified as a list item: {}" ,
2091
+ line
2092
+ ) ;
2093
+ }
2094
+ }
2007
2095
}
0 commit comments