10
10
11
11
use std:: cell:: RefCell ;
12
12
use std:: collections:: HashMap ;
13
- use std:: cmp;
14
13
use std:: sync:: Arc ;
15
14
16
15
use aho_corasick:: { AhoCorasick , AhoCorasickBuilder , MatchKind } ;
@@ -589,7 +588,8 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
589
588
match self . ro . match_type {
590
589
MatchType :: Literal ( ty) => {
591
590
self . find_literals ( ty, text, start) . and_then ( |( s, e) | {
592
- self . captures_nfa_with_match ( slots, text, s, e)
591
+ self . captures_nfa_type (
592
+ MatchNfaType :: Auto , slots, text, s, e)
593
593
} )
594
594
}
595
595
MatchType :: Dfa => {
@@ -598,17 +598,21 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
598
598
} else {
599
599
match self . find_dfa_forward ( text, start) {
600
600
dfa:: Result :: Match ( ( s, e) ) => {
601
- self . captures_nfa_with_match ( slots, text, s, e)
601
+ self . captures_nfa_type (
602
+ MatchNfaType :: Auto , slots, text, s, e)
602
603
}
603
604
dfa:: Result :: NoMatch ( _) => None ,
604
- dfa:: Result :: Quit => self . captures_nfa ( slots, text, start) ,
605
+ dfa:: Result :: Quit => {
606
+ self . captures_nfa ( slots, text, start)
607
+ }
605
608
}
606
609
}
607
610
}
608
611
MatchType :: DfaAnchoredReverse => {
609
612
match self . find_dfa_anchored_reverse ( text, start) {
610
613
dfa:: Result :: Match ( ( s, e) ) => {
611
- self . captures_nfa_with_match ( slots, text, s, e)
614
+ self . captures_nfa_type (
615
+ MatchNfaType :: Auto , slots, text, s, e)
612
616
}
613
617
dfa:: Result :: NoMatch ( _) => None ,
614
618
dfa:: Result :: Quit => self . captures_nfa ( slots, text, start) ,
@@ -617,14 +621,15 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
617
621
MatchType :: DfaSuffix => {
618
622
match self . find_dfa_reverse_suffix ( text, start) {
619
623
dfa:: Result :: Match ( ( s, e) ) => {
620
- self . captures_nfa_with_match ( slots, text, s, e)
624
+ self . captures_nfa_type (
625
+ MatchNfaType :: Auto , slots, text, s, e)
621
626
}
622
627
dfa:: Result :: NoMatch ( _) => None ,
623
628
dfa:: Result :: Quit => self . captures_nfa ( slots, text, start) ,
624
629
}
625
630
}
626
631
MatchType :: Nfa ( ty) => {
627
- self . captures_nfa_type ( ty, slots, text, start)
632
+ self . captures_nfa_type ( ty, slots, text, start, text . len ( ) )
628
633
}
629
634
MatchType :: Nothing => None ,
630
635
MatchType :: DfaMany => {
@@ -867,7 +872,7 @@ impl<'c> ExecNoSync<'c> {
867
872
text : & [ u8 ] ,
868
873
start : usize ,
869
874
) -> bool {
870
- self . exec_nfa ( ty, & mut [ false ] , & mut [ ] , true , text, start)
875
+ self . exec_nfa ( ty, & mut [ false ] , & mut [ ] , true , text, start, text . len ( ) )
871
876
}
872
877
873
878
/// Finds the shortest match using an NFA.
@@ -883,7 +888,15 @@ impl<'c> ExecNoSync<'c> {
883
888
start : usize ,
884
889
) -> Option < usize > {
885
890
let mut slots = [ None , None ] ;
886
- if self . exec_nfa ( ty, & mut [ false ] , & mut slots, true , text, start) {
891
+ if self . exec_nfa (
892
+ ty,
893
+ & mut [ false ] ,
894
+ & mut slots,
895
+ true ,
896
+ text,
897
+ start,
898
+ text. len ( )
899
+ ) {
887
900
slots[ 1 ]
888
901
} else {
889
902
None
@@ -898,7 +911,15 @@ impl<'c> ExecNoSync<'c> {
898
911
start : usize ,
899
912
) -> Option < ( usize , usize ) > {
900
913
let mut slots = [ None , None ] ;
901
- if self . exec_nfa ( ty, & mut [ false ] , & mut slots, false , text, start) {
914
+ if self . exec_nfa (
915
+ ty,
916
+ & mut [ false ] ,
917
+ & mut slots,
918
+ false ,
919
+ text,
920
+ start,
921
+ text. len ( )
922
+ ) {
902
923
match ( slots[ 0 ] , slots[ 1 ] ) {
903
924
( Some ( s) , Some ( e) ) => Some ( ( s, e) ) ,
904
925
_ => None ,
@@ -908,26 +929,6 @@ impl<'c> ExecNoSync<'c> {
908
929
}
909
930
}
910
931
911
- /// Like find_nfa, but fills in captures and restricts the search space
912
- /// using previously found match information.
913
- ///
914
- /// `slots` should have length equal to `2 * nfa.captures.len()`.
915
- fn captures_nfa_with_match (
916
- & self ,
917
- slots : & mut [ Slot ] ,
918
- text : & [ u8 ] ,
919
- match_start : usize ,
920
- match_end : usize ,
921
- ) -> Option < ( usize , usize ) > {
922
- // We can't use match_end directly, because we may need to examine one
923
- // "character" after the end of a match for lookahead operators. We
924
- // need to move two characters beyond the end, since some look-around
925
- // operations may falsely assume a premature end of text otherwise.
926
- let e = cmp:: min (
927
- next_utf8 ( text, next_utf8 ( text, match_end) ) , text. len ( ) ) ;
928
- self . captures_nfa ( slots, & text[ ..e] , match_start)
929
- }
930
-
931
932
/// Like find_nfa, but fills in captures.
932
933
///
933
934
/// `slots` should have length equal to `2 * nfa.captures.len()`.
@@ -937,7 +938,8 @@ impl<'c> ExecNoSync<'c> {
937
938
text : & [ u8 ] ,
938
939
start : usize ,
939
940
) -> Option < ( usize , usize ) > {
940
- self . captures_nfa_type ( MatchNfaType :: Auto , slots, text, start)
941
+ self . captures_nfa_type (
942
+ MatchNfaType :: Auto , slots, text, start, text. len ( ) )
941
943
}
942
944
943
945
/// Like captures_nfa, but allows specification of type of NFA engine.
@@ -947,8 +949,9 @@ impl<'c> ExecNoSync<'c> {
947
949
slots : & mut [ Slot ] ,
948
950
text : & [ u8 ] ,
949
951
start : usize ,
952
+ end : usize ,
950
953
) -> Option < ( usize , usize ) > {
951
- if self . exec_nfa ( ty, & mut [ false ] , slots, false , text, start) {
954
+ if self . exec_nfa ( ty, & mut [ false ] , slots, false , text, start, end ) {
952
955
match ( slots[ 0 ] , slots[ 1 ] ) {
953
956
( Some ( s) , Some ( e) ) => Some ( ( s, e) ) ,
954
957
_ => None ,
@@ -966,6 +969,7 @@ impl<'c> ExecNoSync<'c> {
966
969
quit_after_match : bool ,
967
970
text : & [ u8 ] ,
968
971
start : usize ,
972
+ end : usize ,
969
973
) -> bool {
970
974
use self :: MatchNfaType :: * ;
971
975
if let Auto = ty {
@@ -977,10 +981,10 @@ impl<'c> ExecNoSync<'c> {
977
981
}
978
982
match ty {
979
983
Auto => unreachable ! ( ) ,
980
- Backtrack => self . exec_backtrack ( matches, slots, text, start) ,
984
+ Backtrack => self . exec_backtrack ( matches, slots, text, start, end ) ,
981
985
PikeVM => {
982
986
self . exec_pikevm (
983
- matches, slots, quit_after_match, text, start)
987
+ matches, slots, quit_after_match, text, start, end )
984
988
}
985
989
}
986
990
}
@@ -993,6 +997,7 @@ impl<'c> ExecNoSync<'c> {
993
997
quit_after_match : bool ,
994
998
text : & [ u8 ] ,
995
999
start : usize ,
1000
+ end : usize ,
996
1001
) -> bool {
997
1002
if self . ro . nfa . uses_bytes ( ) {
998
1003
pikevm:: Fsm :: exec (
@@ -1002,7 +1007,8 @@ impl<'c> ExecNoSync<'c> {
1002
1007
slots,
1003
1008
quit_after_match,
1004
1009
ByteInput :: new ( text, self . ro . nfa . only_utf8 ) ,
1005
- start)
1010
+ start,
1011
+ end)
1006
1012
} else {
1007
1013
pikevm:: Fsm :: exec (
1008
1014
& self . ro . nfa ,
@@ -1011,7 +1017,8 @@ impl<'c> ExecNoSync<'c> {
1011
1017
slots,
1012
1018
quit_after_match,
1013
1019
CharInput :: new ( text) ,
1014
- start)
1020
+ start,
1021
+ end)
1015
1022
}
1016
1023
}
1017
1024
@@ -1022,6 +1029,7 @@ impl<'c> ExecNoSync<'c> {
1022
1029
slots : & mut [ Slot ] ,
1023
1030
text : & [ u8 ] ,
1024
1031
start : usize ,
1032
+ end : usize ,
1025
1033
) -> bool {
1026
1034
if self . ro . nfa . uses_bytes ( ) {
1027
1035
backtrack:: Bounded :: exec (
@@ -1030,15 +1038,17 @@ impl<'c> ExecNoSync<'c> {
1030
1038
matches,
1031
1039
slots,
1032
1040
ByteInput :: new ( text, self . ro . nfa . only_utf8 ) ,
1033
- start)
1041
+ start,
1042
+ end)
1034
1043
} else {
1035
1044
backtrack:: Bounded :: exec (
1036
1045
& self . ro . nfa ,
1037
1046
self . cache ,
1038
1047
matches,
1039
1048
slots,
1040
1049
CharInput :: new ( text) ,
1041
- start)
1050
+ start,
1051
+ end)
1042
1052
}
1043
1053
}
1044
1054
@@ -1082,11 +1092,15 @@ impl<'c> ExecNoSync<'c> {
1082
1092
& mut [ ] ,
1083
1093
false ,
1084
1094
text,
1085
- start)
1095
+ start,
1096
+ text. len ( ) )
1086
1097
}
1087
1098
}
1088
1099
}
1089
- Nfa ( ty) => self . exec_nfa ( ty, matches, & mut [ ] , false , text, start) ,
1100
+ Nfa ( ty) => {
1101
+ self . exec_nfa (
1102
+ ty, matches, & mut [ ] , false , text, start, text. len ( ) )
1103
+ }
1090
1104
Nothing => false ,
1091
1105
}
1092
1106
}
@@ -1118,7 +1132,9 @@ impl Exec {
1118
1132
/// Get a searcher that isn't Sync.
1119
1133
#[ inline( always) ] // reduces constant overhead
1120
1134
pub fn searcher ( & self ) -> ExecNoSync {
1121
- let create = || Box :: new ( RefCell :: new ( ProgramCacheInner :: new ( & self . ro ) ) ) ;
1135
+ let create = || {
1136
+ Box :: new ( RefCell :: new ( ProgramCacheInner :: new ( & self . ro ) ) )
1137
+ } ;
1122
1138
ExecNoSync {
1123
1139
ro : & self . ro , // a clone is too expensive here! (and not needed)
1124
1140
cache : self . cache . get_or ( create) ,
0 commit comments