@@ -197,7 +197,7 @@ impl StateBuilderEmpty {
197
197
}
198
198
199
199
pub ( crate ) fn into_matches ( mut self ) -> StateBuilderMatches {
200
- self . 0 . extend_from_slice ( & [ 0 , 0 , 0 , 0 , 0 ] ) ;
200
+ self . 0 . extend_from_slice ( & [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) ;
201
201
StateBuilderMatches ( self . 0 )
202
202
}
203
203
@@ -348,16 +348,17 @@ impl StateBuilderNFA {
348
348
/// generated by a transition over a "word" byte. (Callers may not always set
349
349
/// this. For example, if the NFA has no word boundary assertion, then needing
350
350
/// to track whether a state came from a word byte or not is superfluous and
351
- /// wasteful.)
351
+ /// wasteful.) Bit 3 is set to 1 if the state was generated by a transition
352
+ /// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is
353
+ /// enabled.
352
354
///
353
- /// Byte 1 corresponds to the look-behind assertions that were satisfied by
354
- /// the transition that created this state. This generally only includes the
355
- /// StartLF and Start assertions. (Look-ahead assertions are not tracked as
356
- /// part of states. Instead, these are applied by re-computing the epsilon
357
- /// closure of a state when computing the transition function. See `next` in
358
- /// the parent module.)
355
+ /// Bytes 1..5 correspond to the look-behind assertions that were satisfied
356
+ /// by the transition that created this state. (Look-ahead assertions are not
357
+ /// tracked as part of states. Instead, these are applied by re-computing the
358
+ /// epsilon closure of a state when computing the transition function. See
359
+ /// `next` in the parent module.)
359
360
///
360
- /// Byte 2 corresponds to the set of look-around assertions (including both
361
+ /// Bytes 5..9 correspond to the set of look-around assertions (including both
361
362
/// look-behind and look-ahead) that appear somewhere in this state's set of
362
363
/// NFA state IDs. This is used to determine whether this state's epsilon
363
364
/// closure should be re-computed when computing the transition function.
@@ -366,7 +367,7 @@ impl StateBuilderNFA {
366
367
/// function, we should only re-compute the epsilon closure if those new
367
368
/// assertions are relevant to this particular state.
368
369
///
369
- /// Bytes 3..7 correspond to a 32-bit native-endian encoded integer
370
+ /// Bytes 9..13 correspond to a 32-bit native-endian encoded integer
370
371
/// corresponding to the number of patterns encoded in this state. If the state
371
372
/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is
372
373
/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte
@@ -452,7 +453,7 @@ impl<'a> Repr<'a> {
452
453
/// state has no conditional epsilon transitions, then there is no need
453
454
/// to re-compute the epsilon closure.
454
455
fn look_need ( & self ) -> LookSet {
455
- LookSet :: read_repr ( & self . 0 [ 3 ..] )
456
+ LookSet :: read_repr ( & self . 0 [ 5 ..] )
456
457
}
457
458
458
459
/// Returns the total number of match pattern IDs in this state.
@@ -476,7 +477,7 @@ impl<'a> Repr<'a> {
476
477
if !self . has_pattern_ids ( ) {
477
478
PatternID :: ZERO
478
479
} else {
479
- let offset = 9 + index * PatternID :: SIZE ;
480
+ let offset = 13 + index * PatternID :: SIZE ;
480
481
// This is OK since we only ever serialize valid PatternIDs to
481
482
// states.
482
483
wire:: read_pattern_id_unchecked ( & self . 0 [ offset..] ) . 0
@@ -507,7 +508,7 @@ impl<'a> Repr<'a> {
507
508
f ( PatternID :: ZERO ) ;
508
509
return ;
509
510
}
510
- let mut pids = & self . 0 [ 9 ..self . pattern_offset_end ( ) ] ;
511
+ let mut pids = & self . 0 [ 13 ..self . pattern_offset_end ( ) ] ;
511
512
while !pids. is_empty ( ) {
512
513
let pid = wire:: read_u32 ( pids) ;
513
514
pids = & pids[ PatternID :: SIZE ..] ;
@@ -539,11 +540,11 @@ impl<'a> Repr<'a> {
539
540
fn pattern_offset_end ( & self ) -> usize {
540
541
let encoded = self . encoded_pattern_len ( ) ;
541
542
if encoded == 0 {
542
- return 5 ;
543
+ return 9 ;
543
544
}
544
545
// This arithmetic is OK since we were able to address this many bytes
545
546
// when writing to the state, thus, it must fit into a usize.
546
- encoded. checked_mul ( 4 ) . unwrap ( ) . checked_add ( 9 ) . unwrap ( )
547
+ encoded. checked_mul ( 4 ) . unwrap ( ) . checked_add ( 13 ) . unwrap ( )
547
548
}
548
549
549
550
/// Returns the total number of *encoded* pattern IDs in this state.
@@ -557,7 +558,7 @@ impl<'a> Repr<'a> {
557
558
}
558
559
// This unwrap is OK since the total number of patterns is always
559
560
// guaranteed to fit into a usize.
560
- usize:: try_from ( wire:: read_u32 ( & self . 0 [ 5 .. 9 ] ) ) . unwrap ( )
561
+ usize:: try_from ( wire:: read_u32 ( & self . 0 [ 9 .. 13 ] ) ) . unwrap ( )
561
562
}
562
563
}
563
564
@@ -643,7 +644,7 @@ impl<'a> ReprVec<'a> {
643
644
/// Mutate the set of look-around (both behind and ahead) assertions that
644
645
/// appear at least once in this state's set of NFA states.
645
646
fn set_look_need ( & mut self , mut set : impl FnMut ( LookSet ) -> LookSet ) {
646
- set ( self . look_need ( ) ) . write_repr ( & mut self . 0 [ 3 ..] ) ;
647
+ set ( self . look_need ( ) ) . write_repr ( & mut self . 0 [ 5 ..] ) ;
647
648
}
648
649
649
650
/// Add a pattern ID to this state. All match states must have at least
@@ -703,14 +704,14 @@ impl<'a> ReprVec<'a> {
703
704
return ;
704
705
}
705
706
let patsize = PatternID :: SIZE ;
706
- let pattern_bytes = self . 0 . len ( ) - 9 ;
707
+ let pattern_bytes = self . 0 . len ( ) - 13 ;
707
708
// Every pattern ID uses 4 bytes, so number of bytes should be
708
709
// divisible by 4.
709
710
assert_eq ! ( pattern_bytes % patsize, 0 ) ;
710
711
// This unwrap is OK since we are guaranteed that the maximum number
711
712
// of possible patterns fits into a u32.
712
713
let count32 = u32:: try_from ( pattern_bytes / patsize) . unwrap ( ) ;
713
- wire:: NE :: write_u32 ( count32, & mut self . 0 [ 5 .. 9 ] ) ;
714
+ wire:: NE :: write_u32 ( count32, & mut self . 0 [ 9 .. 13 ] ) ;
714
715
}
715
716
716
717
/// Add an NFA state ID to this state. The order in which NFA states are
0 commit comments