Skip to content

Commit 98c379c

Browse files
authored
Merge remaining sig_terms into terms (#57397) (#57687)
Merges the remaining implementation of `significant_terms` into `terms` so that we can more easilly make them work properly without `asMultiBucketAggregator` which *should* save memory and speed them up. Relates #56487
1 parent 76ee1aa commit 98c379c

File tree

8 files changed

+521
-343
lines changed

8 files changed

+521
-343
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ setup:
779779
body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
780780

781781
---
782-
"global ords profiler":
782+
"string profiler":
783783
- skip:
784784
version: " - 7.8.99"
785785
reason: debug information added in 7.9.0
@@ -831,6 +831,36 @@ setup:
831831
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
832832
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
833833

834+
- do:
835+
search:
836+
index: test_1
837+
body:
838+
profile: true
839+
size: 0
840+
aggs:
841+
str_terms:
842+
terms:
843+
field: str
844+
collect_mode: breadth_first
845+
execution_hint: map
846+
aggs:
847+
max_number:
848+
max:
849+
field: number
850+
- match: { aggregations.str_terms.buckets.0.key: sheep }
851+
- match: { aggregations.str_terms.buckets.0.max_number.value: 3 }
852+
- match: { aggregations.str_terms.buckets.1.key: cow }
853+
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
854+
- match: { aggregations.str_terms.buckets.2.key: pig }
855+
- match: { aggregations.str_terms.buckets.2.max_number.value: 1 }
856+
- match: { profile.shards.0.aggregations.0.type: MapStringTermsAggregator }
857+
- match: { profile.shards.0.aggregations.0.description: str_terms }
858+
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
859+
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
860+
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
861+
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
862+
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
863+
834864
- do:
835865
indices.create:
836866
index: test_3

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -551,11 +551,12 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep
551551
B[] topBuckets = buildBuckets(ordered.size());
552552
for (int i = ordered.size() - 1; i >= 0; --i) {
553553
topBuckets[i] = convertTempBucketToRealBucket(ordered.pop());
554+
otherDocCount[0] -= topBuckets[i].getDocCount();
554555
}
555556
buildSubAggs(topBuckets);
556557

557558
return new InternalAggregation[] {
558-
buildResult(topBuckets)
559+
buildResult(topBuckets, otherDocCount[0])
559560
};
560561
}
561562

@@ -608,7 +609,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep
608609
/**
609610
* Turn the buckets into an aggregation result.
610611
*/
611-
abstract R buildResult(B[] topBuckets);
612+
abstract R buildResult(B[] topBuckets, long otherDocCount);
612613

613614
/**
614615
* Build an "empty" result. Only called if there isn't any data on this
@@ -621,8 +622,6 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep
621622
* Builds results for the standard {@code terms} aggregation.
622623
*/
623624
class StandardTermsResults extends ResultStrategy<StringTerms, StringTerms.Bucket, OrdBucket> {
624-
private long otherDocCount;
625-
626625
@Override
627626
String describe() {
628627
return "terms";
@@ -648,7 +647,6 @@ void updateBucket(OrdBucket spare, long globalOrd, long bucketOrd, long docCount
648647
spare.globalOrd = globalOrd;
649648
spare.bucketOrd = bucketOrd;
650649
spare.docCount = docCount;
651-
otherDocCount += docCount;
652650
}
653651

654652
@Override
@@ -660,7 +658,6 @@ StringTerms.Bucket convertTempBucketToRealBucket(OrdBucket temp) throws IOExcept
660658
BytesRef term = BytesRef.deepCopyOf(lookupGlobalOrd.apply(temp.globalOrd));
661659
StringTerms.Bucket result = new StringTerms.Bucket(term, temp.docCount, null, showTermDocCountError, 0, format);
662660
result.bucketOrd = temp.bucketOrd;
663-
otherDocCount -= temp.docCount;
664661
result.docCountError = 0;
665662
return result;
666663
}
@@ -671,7 +668,7 @@ void buildSubAggs(StringTerms.Bucket[] topBuckets) throws IOException {
671668
}
672669

673670
@Override
674-
StringTerms buildResult(StringTerms.Bucket[] topBuckets) {
671+
StringTerms buildResult(StringTerms.Bucket[] topBuckets, long otherDocCount) {
675672
return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
676673
metadata(), format, bucketCountThresholds.getShardSize(), showTermDocCountError,
677674
otherDocCount, Arrays.asList(topBuckets), 0);
@@ -707,7 +704,7 @@ class SignificantTermsResults extends ResultStrategy<
707704

708705
@Override
709706
String describe() {
710-
return "terms";
707+
return "significant_terms";
711708
}
712709

713710
@Override
@@ -763,7 +760,7 @@ void buildSubAggs(SignificantStringTerms.Bucket[] topBuckets) throws IOException
763760
}
764761

765762
@Override
766-
SignificantStringTerms buildResult(SignificantStringTerms.Bucket[] topBuckets) {
763+
SignificantStringTerms buildResult(SignificantStringTerms.Bucket[] topBuckets, long otherDocCount) {
767764
return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
768765
metadata(), format, subsetSize, termsAggFactory.getSupersetNumDocs(), significanceHeuristic, Arrays.asList(topBuckets));
769766
}

0 commit comments

Comments
 (0)