Skip to content

Commit 1104d65

Browse files
authored
Fix bug with terms' min_doc_count (#62130) (#62177)
The `global_ordinals` implementation of `terms` had a bug when `min_doc_count: 0` that'd cause sub-aggregations to have array index out of bounds exceptions. Ooops. My fault. This fixes the bug by assigning ordinals to those buckets. Closes #62084
1 parent 885051f commit 1104d65

File tree

2 files changed

+142
-2
lines changed

2 files changed

+142
-2
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,3 +1001,130 @@ setup:
10011001
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
10021002
- match: { profile.shards.0.aggregations.0.debug.result_strategy: long_terms }
10031003
- match: { profile.shards.0.aggregations.0.debug.total_buckets: 2 }
1004+
1005+
---
1006+
"min_doc_count":
1007+
- skip:
1008+
version: " - 7.99.99"
1009+
reason: broken in 7.9.1, not yet backported
1010+
1011+
- do:
1012+
bulk:
1013+
index: test_1
1014+
refresh: true
1015+
body: |
1016+
{ "index": {} }
1017+
{ "str": "sheep", "number": 1 }
1018+
{ "index": {} }
1019+
{ "str": "sheep", "number": 3 }
1020+
{ "index": {} }
1021+
{ "str": "cow", "number": 1 }
1022+
{ "index": {} }
1023+
{ "str": "pig", "number": 1 }
1024+
1025+
- do:
1026+
search:
1027+
index: test_1
1028+
body:
1029+
size: 0
1030+
query:
1031+
simple_query_string:
1032+
fields: [str]
1033+
query: sheep cow
1034+
minimum_should_match: 1
1035+
aggs:
1036+
str_terms:
1037+
terms:
1038+
field: str
1039+
min_doc_count: 2
1040+
aggs:
1041+
max_number:
1042+
max:
1043+
field: number
1044+
- length: { aggregations.str_terms.buckets: 1 }
1045+
- match: { aggregations.str_terms.buckets.0.key: sheep }
1046+
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
1047+
- match: { aggregations.str_terms.buckets.0.max_number.value: 3 }
1048+
1049+
- do:
1050+
search:
1051+
index: test_1
1052+
body:
1053+
size: 0
1054+
query:
1055+
simple_query_string:
1056+
fields: [str]
1057+
query: sheep cow
1058+
minimum_should_match: 1
1059+
aggs:
1060+
str_terms:
1061+
terms:
1062+
field: str
1063+
min_doc_count: 1
1064+
aggs:
1065+
max_number:
1066+
max:
1067+
field: number
1068+
- length: { aggregations.str_terms.buckets: 2 }
1069+
- match: { aggregations.str_terms.buckets.0.key: sheep }
1070+
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
1071+
- match: { aggregations.str_terms.buckets.0.max_number.value: 3 }
1072+
- match: { aggregations.str_terms.buckets.1.key: cow }
1073+
- match: { aggregations.str_terms.buckets.1.doc_count: 1 }
1074+
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
1075+
1076+
- do:
1077+
search:
1078+
index: test_1
1079+
body:
1080+
size: 0
1081+
query:
1082+
simple_query_string:
1083+
fields: [str]
1084+
query: sheep cow
1085+
minimum_should_match: 1
1086+
aggs:
1087+
str_terms:
1088+
terms:
1089+
field: str
1090+
aggs:
1091+
max_number:
1092+
max:
1093+
field: number
1094+
- length: { aggregations.str_terms.buckets: 2 }
1095+
- match: { aggregations.str_terms.buckets.0.key: sheep }
1096+
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
1097+
- match: { aggregations.str_terms.buckets.0.max_number.value: 3 }
1098+
- match: { aggregations.str_terms.buckets.1.key: cow }
1099+
- match: { aggregations.str_terms.buckets.1.doc_count: 1 }
1100+
- match: { aggregations.str_terms.buckets.1.max_number.value: 1 }
1101+
1102+
- do:
1103+
search:
1104+
index: test_1
1105+
body:
1106+
size: 0
1107+
query:
1108+
simple_query_string:
1109+
fields: [str]
1110+
query: sheep cow
1111+
minimum_should_match: 1
1112+
aggs:
1113+
str_terms:
1114+
terms:
1115+
field: str
1116+
min_doc_count: 0
1117+
aggs:
1118+
max_number:
1119+
max:
1120+
field: number
1121+
- length: { aggregations.str_terms.buckets: 3 }
1122+
- match: { aggregations.str_terms.buckets.0.key: sheep }
1123+
- match: { aggregations.str_terms.buckets.0.doc_count: 2 }
1124+
- match: { aggregations.str_terms.buckets.0.max_number.value: 3 }
1125+
- match: { aggregations.str_terms.buckets.1.key: cow }
1126+
- match: { aggregations.str_terms.buckets.1.doc_count: 1 }
1127+
- match: { aggregations.str_terms.buckets.1.max_number.value: 1.0 }
1128+
- match: { aggregations.str_terms.buckets.2.key: pig }
1129+
- match: { aggregations.str_terms.buckets.2.doc_count: 0 }
1130+
- match: { aggregations.str_terms.buckets.2.max_number.value: null }

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,21 @@ void forEach(long owningBucketOrd, BucketInfoConsumer consumer) throws IOExcepti
513513
if (false == acceptedGlobalOrdinals.test(globalOrd)) {
514514
continue;
515515
}
516-
long bucketOrd = bucketOrds.find(owningBucketOrd, globalOrd);
517-
long docCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
516+
/*
517+
* Use `add` instead of `find` here to assign an ordinal
518+
* even if the global ord wasn't found so we can build
519+
* sub-aggregations without trouble even though we haven't
520+
* hit any documents for them. This is wasteful, but
521+
* settings minDocCount == 0 is wasteful in general.....
522+
*/
523+
long bucketOrd = bucketOrds.add(owningBucketOrd, globalOrd);
524+
long docCount;
525+
if (bucketOrd < 0) {
526+
bucketOrd = -1 - bucketOrd;
527+
docCount = bucketDocCount(bucketOrd);
528+
} else {
529+
docCount = 0;
530+
}
518531
consumer.accept(globalOrd, bucketOrd, docCount);
519532
}
520533
} else {

0 commit comments

Comments
 (0)