36
36
import org .apache .lucene .util .automaton .Operations ;
37
37
import org .apache .lucene .util .automaton .RegExp ;
38
38
import org .elasticsearch .ElasticsearchParseException ;
39
+ import org .elasticsearch .Version ;
39
40
import org .elasticsearch .common .ParseField ;
40
41
import org .elasticsearch .common .io .stream .StreamInput ;
41
42
import org .elasticsearch .common .io .stream .StreamOutput ;
@@ -78,17 +79,8 @@ public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclud
78
79
if (include .isPartitionBased ()) {
79
80
throw new IllegalArgumentException ("Cannot specify any excludes when using a partition-based include" );
80
81
}
81
- String includeMethod = include .isRegexBased () ? "regex" : "set" ;
82
- String excludeMethod = exclude .isRegexBased () ? "regex" : "set" ;
83
- if (includeMethod .equals (excludeMethod ) == false ) {
84
- throw new IllegalArgumentException ("Cannot mix a " + includeMethod + "-based include with a "
85
- + excludeMethod + "-based method" );
86
- }
87
- if (include .isRegexBased ()) {
88
- return new IncludeExclude (include .include , exclude .exclude );
89
- } else {
90
- return new IncludeExclude (include .includeValues , exclude .excludeValues );
91
- }
82
+
83
+ return new IncludeExclude (include .include , exclude .exclude , include .includeValues , exclude .excludeValues );
92
84
}
93
85
94
86
public static IncludeExclude parseInclude (XContentParser parser ) throws IOException {
@@ -196,46 +188,39 @@ public boolean accept(BytesRef value) {
196
188
}
197
189
}
198
190
199
- static class AutomatonBackedStringFilter extends StringFilter {
191
+ class SetAndRegexStringFilter extends StringFilter {
200
192
201
193
private final ByteRunAutomaton runAutomaton ;
202
-
203
- private AutomatonBackedStringFilter (Automaton automaton ) {
204
- this .runAutomaton = new ByteRunAutomaton (automaton );
205
- }
206
-
207
- /**
208
- * Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
209
- */
210
- @ Override
211
- public boolean accept (BytesRef value ) {
212
- return runAutomaton .run (value .bytes , value .offset , value .length );
213
- }
214
- }
215
-
216
- static class TermListBackedStringFilter extends StringFilter {
217
-
218
194
private final Set <BytesRef > valids ;
219
195
private final Set <BytesRef > invalids ;
220
196
221
- TermListBackedStringFilter (Set <BytesRef > includeValues , Set <BytesRef > excludeValues ) {
222
- this .valids = includeValues ;
223
- this .invalids = excludeValues ;
197
+ private SetAndRegexStringFilter (DocValueFormat format ) {
198
+ Automaton automaton = toAutomaton ();
199
+ this .runAutomaton = automaton == null ? null : new ByteRunAutomaton (automaton );
200
+ this .valids = parseForDocValues (includeValues , format );
201
+ this .invalids = parseForDocValues (excludeValues , format );
224
202
}
225
203
226
204
/**
227
- * Returns whether the given value is accepted based on the
228
- * {@code include} & {@code exclude} sets .
205
+ * Returns whether the given value is accepted based on the {@code includeValues} & {@code excludeValues}
206
+ * sets, as well as the {@code include} & {@code exclude} patterns .
229
207
*/
230
208
@ Override
231
209
public boolean accept (BytesRef value ) {
232
- return ((valids == null ) || (valids .contains (value ))) && ((invalids == null ) || (!invalids .contains (value )));
210
+ if (valids != null && valids .contains (value ) == false ) {
211
+ return false ;
212
+ }
213
+
214
+ if (runAutomaton != null && runAutomaton .run (value .bytes , value .offset , value .length ) == false ) {
215
+ return false ;
216
+ }
217
+
218
+ return invalids == null || invalids .contains (value ) == false ;
233
219
}
234
220
}
235
221
236
222
public abstract static class OrdinalsFilter extends Filter {
237
223
public abstract LongBitSet acceptedGlobalOrdinals (SortedSetDocValues globalOrdinals ) throws IOException ;
238
-
239
224
}
240
225
241
226
class PartitionedOrdinalsFilter extends OrdinalsFilter {
@@ -258,59 +243,64 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro
258
243
}
259
244
}
260
245
261
- static class AutomatonBackedOrdinalsFilter extends OrdinalsFilter {
246
+ class SetAndRegexOrdinalsFilter extends OrdinalsFilter {
262
247
263
248
private final CompiledAutomaton compiled ;
249
+ private final SortedSet <BytesRef > valids ;
250
+ private final SortedSet <BytesRef > invalids ;
264
251
265
- private AutomatonBackedOrdinalsFilter (Automaton automaton ) {
266
- this .compiled = new CompiledAutomaton (automaton );
252
+ private SetAndRegexOrdinalsFilter (DocValueFormat format ) {
253
+ Automaton automaton = toAutomaton ();
254
+ this .compiled = automaton == null ? null : new CompiledAutomaton (automaton );
255
+ this .valids = parseForDocValues (includeValues , format );
256
+ this .invalids = parseForDocValues (excludeValues , format );
267
257
}
268
258
269
259
/**
270
- * Computes which global ordinals are accepted by this IncludeExclude instance.
271
- *
260
+ * Computes which global ordinals are accepted by this IncludeExclude instance, based on the combination of
261
+ * the {@code includeValues} & {@code excludeValues} sets, as well as the {@code include} &
262
+ * {@code exclude} patterns.
272
263
*/
273
264
@ Override
274
265
public LongBitSet acceptedGlobalOrdinals (SortedSetDocValues globalOrdinals ) throws IOException {
275
- LongBitSet acceptedGlobalOrdinals = new LongBitSet (globalOrdinals .getValueCount ());
276
- TermsEnum globalTermsEnum ;
277
- Terms globalTerms = new DocValuesTerms (globalOrdinals );
278
- // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
279
- globalTermsEnum = compiled .getTermsEnum (globalTerms );
280
- for (BytesRef term = globalTermsEnum .next (); term != null ; term = globalTermsEnum .next ()) {
281
- acceptedGlobalOrdinals .set (globalTermsEnum .ord ());
282
- }
283
- return acceptedGlobalOrdinals ;
284
- }
285
-
286
- }
287
-
288
- static class TermListBackedOrdinalsFilter extends OrdinalsFilter {
289
-
290
- private final SortedSet <BytesRef > includeValues ;
291
- private final SortedSet <BytesRef > excludeValues ;
292
-
293
- TermListBackedOrdinalsFilter (SortedSet <BytesRef > includeValues , SortedSet <BytesRef > excludeValues ) {
294
- this .includeValues = includeValues ;
295
- this .excludeValues = excludeValues ;
296
- }
297
-
298
- @ Override
299
- public LongBitSet acceptedGlobalOrdinals (SortedSetDocValues globalOrdinals ) throws IOException {
300
- LongBitSet acceptedGlobalOrdinals = new LongBitSet (globalOrdinals .getValueCount ());
301
- if (includeValues != null ) {
302
- for (BytesRef term : includeValues ) {
266
+ LongBitSet acceptedGlobalOrdinals = null ;
267
+ if (valids != null ) {
268
+ acceptedGlobalOrdinals = new LongBitSet (globalOrdinals .getValueCount ());
269
+ for (BytesRef term : valids ) {
303
270
long ord = globalOrdinals .lookupTerm (term );
304
271
if (ord >= 0 ) {
305
272
acceptedGlobalOrdinals .set (ord );
306
273
}
307
274
}
308
- } else if (acceptedGlobalOrdinals .length () > 0 ) {
309
- // default to all terms being acceptable
310
- acceptedGlobalOrdinals .set (0 , acceptedGlobalOrdinals .length ());
311
275
}
312
- if (excludeValues != null ) {
313
- for (BytesRef term : excludeValues ) {
276
+
277
+ if (compiled != null ) {
278
+ LongBitSet automatonGlobalOrdinals = new LongBitSet (globalOrdinals .getValueCount ());
279
+ TermsEnum globalTermsEnum ;
280
+ Terms globalTerms = new DocValuesTerms (globalOrdinals );
281
+ // TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
282
+ globalTermsEnum = compiled .getTermsEnum (globalTerms );
283
+ for (BytesRef term = globalTermsEnum .next (); term != null ; term = globalTermsEnum .next ()) {
284
+ automatonGlobalOrdinals .set (globalTermsEnum .ord ());
285
+ }
286
+
287
+ if (acceptedGlobalOrdinals == null ) {
288
+ acceptedGlobalOrdinals = automatonGlobalOrdinals ;
289
+ } else {
290
+ acceptedGlobalOrdinals .and (automatonGlobalOrdinals );
291
+ }
292
+ }
293
+
294
+ if (acceptedGlobalOrdinals == null ) {
295
+ acceptedGlobalOrdinals = new LongBitSet (globalOrdinals .getValueCount ());
296
+ if (acceptedGlobalOrdinals .length () > 0 ) {
297
+ // default to all terms being acceptable
298
+ acceptedGlobalOrdinals .set (0 , acceptedGlobalOrdinals .length ());
299
+ }
300
+ }
301
+
302
+ if (invalids != null ) {
303
+ for (BytesRef term : invalids ) {
314
304
long ord = globalOrdinals .lookupTerm (term );
315
305
if (ord >= 0 ) {
316
306
acceptedGlobalOrdinals .clear (ord );
@@ -319,9 +309,9 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro
319
309
}
320
310
return acceptedGlobalOrdinals ;
321
311
}
322
-
323
312
}
324
313
314
+
325
315
private final RegExp include , exclude ;
326
316
private final SortedSet <BytesRef > includeValues , excludeValues ;
327
317
private final int incZeroBasedPartition ;
@@ -332,17 +322,36 @@ public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) thro
332
322
* @param exclude The regular expression pattern for the terms to be excluded
333
323
*/
334
324
public IncludeExclude (RegExp include , RegExp exclude ) {
335
- if (include == null && exclude == null ) {
325
+ this (include , exclude , null , null );
326
+ }
327
+
328
+ public IncludeExclude (RegExp include , RegExp exclude , SortedSet <BytesRef > includeValues , SortedSet <BytesRef > excludeValues ) {
329
+ if (include == null && exclude == null && includeValues == null && excludeValues == null ) {
330
+ throw new IllegalArgumentException ();
331
+ }
332
+ if (include != null && includeValues != null ) {
333
+ throw new IllegalArgumentException ();
334
+ }
335
+ if (exclude != null && excludeValues != null ) {
336
336
throw new IllegalArgumentException ();
337
337
}
338
338
this .include = include ;
339
339
this .exclude = exclude ;
340
- this .includeValues = null ;
341
- this .excludeValues = null ;
340
+ this .includeValues = includeValues ;
341
+ this .excludeValues = excludeValues ;
342
342
this .incZeroBasedPartition = 0 ;
343
343
this .incNumPartitions = 0 ;
344
344
}
345
345
346
+ public IncludeExclude (String include , String exclude , String [] includeValues , String [] excludeValues ) {
347
+ this (
348
+ include == null ? null : new RegExp (include ),
349
+ exclude == null ? null : new RegExp (exclude ),
350
+ convertToBytesRefSet (includeValues ),
351
+ convertToBytesRefSet (excludeValues )
352
+ );
353
+ }
354
+
346
355
public IncludeExclude (String include , String exclude ) {
347
356
this (include == null ? null : new RegExp (include ), exclude == null ? null : new RegExp (exclude ));
348
357
}
@@ -352,15 +361,7 @@ public IncludeExclude(String include, String exclude) {
352
361
* @param excludeValues The terms to be excluded
353
362
*/
354
363
public IncludeExclude (SortedSet <BytesRef > includeValues , SortedSet <BytesRef > excludeValues ) {
355
- if (includeValues == null && excludeValues == null ) {
356
- throw new IllegalArgumentException ();
357
- }
358
- this .include = null ;
359
- this .exclude = null ;
360
- this .incZeroBasedPartition = 0 ;
361
- this .incNumPartitions = 0 ;
362
- this .includeValues = includeValues ;
363
- this .excludeValues = excludeValues ;
364
+ this (null , null , includeValues , excludeValues );
364
365
}
365
366
366
367
public IncludeExclude (String [] includeValues , String [] excludeValues ) {
@@ -395,18 +396,21 @@ public IncludeExclude(int partition, int numPartitions) {
395
396
*/
396
397
public IncludeExclude (StreamInput in ) throws IOException {
397
398
if (in .readBoolean ()) {
398
- includeValues = null ;
399
- excludeValues = null ;
400
- incZeroBasedPartition = 0 ;
401
- incNumPartitions = 0 ;
402
399
String includeString = in .readOptionalString ();
403
400
include = includeString == null ? null : new RegExp (includeString );
404
401
String excludeString = in .readOptionalString ();
405
402
exclude = excludeString == null ? null : new RegExp (excludeString );
406
- return ;
403
+ if (in .getVersion ().before (Version .V_7_11_0 )) {
404
+ incZeroBasedPartition = 0 ;
405
+ incNumPartitions = 0 ;
406
+ includeValues = null ;
407
+ excludeValues = null ;
408
+ return ;
409
+ }
410
+ } else {
411
+ include = null ;
412
+ exclude = null ;
407
413
}
408
- include = null ;
409
- exclude = null ;
410
414
if (in .readBoolean ()) {
411
415
int size = in .readVInt ();
412
416
includeValues = new TreeSet <>();
@@ -436,26 +440,28 @@ public void writeTo(StreamOutput out) throws IOException {
436
440
if (regexBased ) {
437
441
out .writeOptionalString (include == null ? null : include .getOriginalString ());
438
442
out .writeOptionalString (exclude == null ? null : exclude .getOriginalString ());
439
- } else {
440
- boolean hasIncludes = includeValues != null ;
441
- out .writeBoolean (hasIncludes );
442
- if (hasIncludes ) {
443
- out .writeVInt (includeValues .size ());
444
- for (BytesRef value : includeValues ) {
445
- out .writeBytesRef (value );
446
- }
443
+ if (out .getVersion ().before (Version .V_7_11_0 )) {
444
+ return ;
447
445
}
448
- boolean hasExcludes = excludeValues != null ;
449
- out . writeBoolean ( hasExcludes ) ;
450
- if ( hasExcludes ) {
451
- out . writeVInt ( excludeValues . size ());
452
- for ( BytesRef value : excludeValues ) {
453
- out . writeBytesRef ( value );
454
- }
446
+ }
447
+ boolean hasIncludes = includeValues != null ;
448
+ out . writeBoolean ( hasIncludes );
449
+ if ( hasIncludes ) {
450
+ out . writeVInt ( includeValues . size ());
451
+ for ( BytesRef value : includeValues ) {
452
+ out . writeBytesRef ( value );
455
453
}
456
- out .writeVInt (incNumPartitions );
457
- out .writeVInt (incZeroBasedPartition );
458
454
}
455
+ boolean hasExcludes = excludeValues != null ;
456
+ out .writeBoolean (hasExcludes );
457
+ if (hasExcludes ) {
458
+ out .writeVInt (excludeValues .size ());
459
+ for (BytesRef value : excludeValues ) {
460
+ out .writeBytesRef (value );
461
+ }
462
+ }
463
+ out .writeVInt (incNumPartitions );
464
+ out .writeVInt (incZeroBasedPartition );
459
465
}
460
466
461
467
private static SortedSet <BytesRef > convertToBytesRefSet (String [] values ) {
@@ -573,29 +579,25 @@ public boolean isPartitionBased() {
573
579
574
580
private Automaton toAutomaton () {
575
581
Automaton a = null ;
582
+ if (include == null && exclude == null ) {
583
+ return a ;
584
+ }
576
585
if (include != null ) {
577
586
a = include .toAutomaton ();
578
- } else if (includeValues != null ) {
579
- a = Automata .makeStringUnion (includeValues );
580
587
} else {
581
588
a = Automata .makeAnyString ();
582
589
}
583
590
if (exclude != null ) {
584
591
a = Operations .minus (a , exclude .toAutomaton (), Operations .DEFAULT_MAX_DETERMINIZED_STATES );
585
- } else if (excludeValues != null ) {
586
- a = Operations .minus (a , Automata .makeStringUnion (excludeValues ), Operations .DEFAULT_MAX_DETERMINIZED_STATES );
587
592
}
588
593
return a ;
589
594
}
590
595
591
596
public StringFilter convertToStringFilter (DocValueFormat format ) {
592
- if (isRegexBased ()) {
593
- return new AutomatonBackedStringFilter (toAutomaton ());
594
- }
595
597
if (isPartitionBased ()){
596
598
return new PartitionedStringFilter ();
597
599
}
598
- return new TermListBackedStringFilter ( parseForDocValues ( includeValues , format ), parseForDocValues ( excludeValues , format ) );
600
+ return new SetAndRegexStringFilter ( format );
599
601
}
600
602
601
603
private static SortedSet <BytesRef > parseForDocValues (SortedSet <BytesRef > endUserFormattedValues , DocValueFormat format ) {
@@ -612,15 +614,11 @@ private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUser
612
614
}
613
615
614
616
public OrdinalsFilter convertToOrdinalsFilter (DocValueFormat format ) {
615
-
616
- if (isRegexBased ()) {
617
- return new AutomatonBackedOrdinalsFilter (toAutomaton ());
618
- }
619
617
if (isPartitionBased ()){
620
618
return new PartitionedOrdinalsFilter ();
621
619
}
622
620
623
- return new TermListBackedOrdinalsFilter ( parseForDocValues ( includeValues , format ), parseForDocValues ( excludeValues , format ) );
621
+ return new SetAndRegexOrdinalsFilter ( format );
624
622
}
625
623
626
624
public LongFilter convertToLongFilter (DocValueFormat format ) {
0 commit comments