@@ -15,6 +15,7 @@ use memchr::{memchr, memchr2, memchr3};
15
15
use syntax;
16
16
17
17
use freqs:: BYTE_FREQUENCIES ;
18
+ use simd_accel:: teddy128:: Teddy ;
18
19
19
20
/// A prefix extracted from a compiled regular expression.
20
21
///
@@ -51,6 +52,8 @@ enum Matcher {
51
52
Single ( SingleSearch ) ,
52
53
/// An Aho-Corasick automaton.
53
54
AC ( FullAcAutomaton < syntax:: Lit > ) ,
55
+ /// A simd accelerated multiple string matcher.
56
+ Teddy128 ( Teddy ) ,
54
57
}
55
58
56
59
impl LiteralSearcher {
@@ -100,6 +103,7 @@ impl LiteralSearcher {
100
103
Bytes ( ref sset) => sset. find ( haystack) . map ( |i| ( i, i + 1 ) ) ,
101
104
Single ( ref s) => s. find ( haystack) . map ( |i| ( i, i + s. len ( ) ) ) ,
102
105
AC ( ref aut) => aut. find ( haystack) . next ( ) . map ( |m| ( m. start , m. end ) ) ,
106
+ Teddy128 ( ref ted) => ted. find ( haystack) . map ( |m| ( m. start , m. end ) ) ,
103
107
}
104
108
}
105
109
@@ -136,6 +140,9 @@ impl LiteralSearcher {
136
140
Matcher :: Bytes ( ref sset) => LiteralIter :: Bytes ( & sset. dense ) ,
137
141
Matcher :: Single ( ref s) => LiteralIter :: Single ( & s. pat ) ,
138
142
Matcher :: AC ( ref ac) => LiteralIter :: AC ( ac. patterns ( ) ) ,
143
+ Matcher :: Teddy128 ( ref ted) => {
144
+ LiteralIter :: Teddy128 ( ted. patterns ( ) )
145
+ }
139
146
}
140
147
}
141
148
@@ -162,6 +169,7 @@ impl LiteralSearcher {
162
169
Bytes ( ref sset) => sset. dense . len ( ) ,
163
170
Single ( _) => 1 ,
164
171
AC ( ref aut) => aut. len ( ) ,
172
+ Teddy128 ( ref ted) => ted. len ( ) ,
165
173
}
166
174
}
167
175
@@ -173,6 +181,7 @@ impl LiteralSearcher {
173
181
Bytes ( ref sset) => sset. approximate_size ( ) ,
174
182
Single ( ref single) => single. approximate_size ( ) ,
175
183
AC ( ref aut) => aut. heap_bytes ( ) ,
184
+ Teddy128 ( ref ted) => ted. approximate_size ( ) ,
176
185
}
177
186
}
178
187
}
@@ -190,23 +199,34 @@ impl Matcher {
190
199
191
200
fn new ( lits : & syntax:: Literals , sset : SingleByteSet ) -> Self {
192
201
if lits. literals ( ) . is_empty ( ) {
193
- Matcher :: Empty
194
- } else if sset. dense . len ( ) >= 26 {
202
+ return Matcher :: Empty ;
203
+ }
204
+ if sset. dense . len ( ) >= 26 {
195
205
// Avoid trying to match a large number of single bytes.
196
206
// This is *very* sensitive to a frequency analysis comparison
197
207
// between the bytes in sset and the composition of the haystack.
198
208
// No matter the size of sset, if its members all are rare in the
199
209
// haystack, then it'd be worth using it. How to tune this... IDK.
200
210
// ---AG
201
- Matcher :: Empty
202
- } else if sset . complete {
203
- Matcher :: Bytes ( sset)
204
- } else if lits . literals ( ) . len ( ) == 1 {
205
- Matcher :: Single ( SingleSearch :: new ( lits . literals ( ) [ 0 ] . to_vec ( ) ) )
206
- } else {
207
- let pats = lits. literals ( ) . to_owned ( ) ;
208
- Matcher :: AC ( AcAutomaton :: new ( pats ) . into_full ( ) )
211
+ return Matcher :: Empty ;
212
+ }
213
+ if sset. complete {
214
+ return Matcher :: Bytes ( sset ) ;
215
+ }
216
+ if lits . literals ( ) . len ( ) == 1 {
217
+ let lit = lits. literals ( ) [ 0 ] . to_vec ( ) ;
218
+ return Matcher :: Single ( SingleSearch :: new ( lit ) ) ;
209
219
}
220
+ // Only try Teddy if Aho-Corasick can't use memchr.
221
+ // Also, in its current form, Teddy doesn't scale well to lots of
222
+ // literals.
223
+ if sset. dense . len ( ) > 1 && lits. literals ( ) . len ( ) <= 32 {
224
+ if let Some ( ted) = Teddy :: new ( lits) {
225
+ return Matcher :: Teddy128 ( ted) ;
226
+ }
227
+ }
228
+ let pats = lits. literals ( ) . to_owned ( ) ;
229
+ Matcher :: AC ( AcAutomaton :: new ( pats) . into_full ( ) )
210
230
}
211
231
}
212
232
@@ -215,6 +235,7 @@ pub enum LiteralIter<'a> {
215
235
Bytes ( & ' a [ u8 ] ) ,
216
236
Single ( & ' a [ u8 ] ) ,
217
237
AC ( & ' a [ syntax:: Lit ] ) ,
238
+ Teddy128 ( & ' a [ Vec < u8 > ] ) ,
218
239
}
219
240
220
241
impl < ' a > Iterator for LiteralIter < ' a > {
@@ -250,6 +271,15 @@ impl<'a> Iterator for LiteralIter<'a> {
250
271
Some ( & * * next)
251
272
}
252
273
}
274
+ LiteralIter :: Teddy128 ( ref mut lits) => {
275
+ if lits. is_empty ( ) {
276
+ None
277
+ } else {
278
+ let next = & lits[ 0 ] ;
279
+ * lits = & lits[ 1 ..] ;
280
+ Some ( & * * next)
281
+ }
282
+ }
253
283
}
254
284
}
255
285
}
0 commit comments