Skip to content

Commit 720756c

Browse files
Intern IndexFieldCapabilities Type String on Read (#76405) (#77754)
In case of handling a large number of these messages, i.e. when fetching field caps for many indices (and/or those indices contain lots of fields) the type string is repeated many times over. As these strings are already interned because they are constants, taking the performance hit of interning them on deserialization seems a reasonable trade-off for the benefit of saving a non-trivial amount of memory for large clusters as well as speeding up `org.elasticsearch.action.fieldcaps.TransportFieldCapabilitiesAction#merge` which uses these strings in map lookup and will run significantly faster with interned strings instead of fresh strings that do not have their hash values cached yet.
1 parent 787465c commit 720756c

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

server/src/main/java/org/elasticsearch/action/fieldcaps/IndexFieldCapabilities.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.common.io.stream.StreamInput;
1313
import org.elasticsearch.common.io.stream.StreamOutput;
1414
import org.elasticsearch.common.io.stream.Writeable;
15+
import org.elasticsearch.common.util.StringLiteralDeduplicator;
1516

1617
import java.io.IOException;
1718
import java.util.Collections;
@@ -25,6 +26,8 @@
2526
*/
2627
public class IndexFieldCapabilities implements Writeable {
2728

29+
private static final StringLiteralDeduplicator typeStringDeduplicator = new StringLiteralDeduplicator();
30+
2831
private final String name;
2932
private final String type;
3033
private final boolean isMetadatafield;
@@ -55,7 +58,7 @@ public class IndexFieldCapabilities implements Writeable {
5558
IndexFieldCapabilities(StreamInput in) throws IOException {
5659
if (in.getVersion().onOrAfter(Version.V_7_7_0)) {
5760
this.name = in.readString();
58-
this.type = in.readString();
61+
this.type = typeStringDeduplicator.deduplicate(in.readString());
5962
this.isMetadatafield = in.getVersion().onOrAfter(Version.V_7_13_0) ? in.readBoolean() : false;
6063
this.isSearchable = in.readBoolean();
6164
this.isAggregatable = in.readBoolean();
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
package org.elasticsearch.common.util;
9+
10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
12+
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
13+
14+
import java.util.Map;
15+
16+
/**
17+
* A cache in front of Java's string interning. This method assumes that it is only called with strings that are already part of the
18+
* JVM's string pool so that interning them does not grow the pool. Calling it with strings not in the interned string pool is not
19+
* advisable as its performance may deteriorate to slower than outright calls to {@link String#intern()}.
20+
*/
21+
public final class StringLiteralDeduplicator {
22+
23+
private static final Logger logger = LogManager.getLogger(StringLiteralDeduplicator.class);
24+
25+
private static final int MAX_SIZE = 1000;
26+
27+
private final Map<String, String> map = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency();
28+
29+
public StringLiteralDeduplicator() {
30+
}
31+
32+
public String deduplicate(String string) {
33+
final String res = map.get(string);
34+
if (res != null) {
35+
return res;
36+
}
37+
final String interned = string.intern();
38+
if (map.size() > MAX_SIZE) {
39+
map.clear();
40+
logger.debug("clearing intern cache");
41+
}
42+
map.put(interned, interned);
43+
return interned;
44+
}
45+
}

0 commit comments

Comments
 (0)