Skip to content

Commit 818bad8

Browse files
authored
Remove Lucene's PackedInt dependency from Cuckoo filter (#74946)
Forks the Lucene data structure into a class called PackedArray that can be serialised using Elasticsearch streams.
1 parent d603790 commit 818bad8

File tree

2 files changed

+326
-24
lines changed

2 files changed

+326
-24
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.backwards;
10+
11+
import org.elasticsearch.Version;
12+
import org.elasticsearch.client.Request;
13+
import org.elasticsearch.client.Response;
14+
import org.elasticsearch.cluster.metadata.IndexMetadata;
15+
import org.elasticsearch.common.settings.Settings;
16+
import org.elasticsearch.common.xcontent.support.XContentMapValues;
17+
import org.elasticsearch.test.rest.ESRestTestCase;
18+
import org.hamcrest.Matchers;
19+
20+
import java.io.IOException;
21+
import java.util.List;
22+
23+
/**
24+
* Test that index enough data to trigger the creation of Cuckoo filters.
25+
*/
26+
public class RareTermsIT extends ESRestTestCase {
27+
28+
private static final String index = "idx";
29+
30+
private void setupMaxBuckets() throws Exception {
31+
// increases the max bucket limit for this test
32+
final Request request = new Request("PUT", "_cluster/settings");
33+
request.setJsonEntity("{ \"transient\" : { \"search.max_buckets\" : 65356 } }");
34+
assertOK(client().performRequest(request));
35+
}
36+
37+
private int indexDocs(int numDocs, int id) throws Exception {
38+
final Request request = new Request("POST", "/_bulk");
39+
final StringBuilder builder = new StringBuilder();
40+
for (int i = 0; i < numDocs; ++i) {
41+
builder.append("{ \"index\" : { \"_index\" : \"" + index + "\", \"_id\": \"" + id++ + "\" } }\n");
42+
builder.append("{\"str_value\" : \"s" + i + "\"}\n");
43+
}
44+
request.setJsonEntity(builder.toString());
45+
assertOK(client().performRequest(request));
46+
return id;
47+
}
48+
49+
public void testSingleValuedString() throws Exception {
50+
IndexingIT.Nodes nodes = IndexingIT.buildNodeAndVersions(client());
51+
Version version = nodes.getBWCVersion();
52+
// rare_terms was introduced in version 7.3.0
53+
assumeTrue("Version too old", version.onOrAfter(Version.V_7_3_0));
54+
// increase max buckets
55+
setupMaxBuckets();
56+
final Settings.Builder settings = Settings.builder()
57+
.put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 2)
58+
.put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0);
59+
createIndex(index, settings.build());
60+
// We want to trigger the usage of cuckoo filters that happen only when there are
61+
// more than 10k distinct values in one shard.
62+
final int numDocs = randomIntBetween(12000, 17000);
63+
int id = 1;
64+
// Index every value 5 times
65+
for (int i = 0; i < 5; i++) {
66+
id = indexDocs(numDocs, id);
67+
refreshAllIndices();
68+
}
69+
// There are no rare terms that only appear in one document
70+
assertNumRareTerms(1, 0);
71+
// All terms have a cardinality lower than 10
72+
assertNumRareTerms(10, numDocs);
73+
}
74+
75+
private void assertNumRareTerms(int maxDocs, int rareTerms) throws IOException {
76+
final Request request = new Request("POST", index + "/_search");
77+
request.setJsonEntity(
78+
"{\"aggs\" : {\"rareTerms\" : {\"rare_terms\" : {\"field\" : \"str_value.keyword\", \"max_doc_count\" : " + maxDocs + "}}}}"
79+
);
80+
final Response response = client().performRequest(request);
81+
assertOK(response);
82+
final Object o = XContentMapValues.extractValue("aggregations.rareTerms.buckets", responseAsMap(response));
83+
assertThat(o, Matchers.instanceOf(List.class));
84+
assertThat(((List<?>) o).size(), Matchers.equalTo(rareTerms));
85+
}
86+
}

0 commit comments

Comments
 (0)