Skip to content

Commit 2d08ef7

Browse files
Deduplicate Strings in REST Bulk Request Parsing (#56506) (#56568)
We can save a little memory here since these strings might live for quite a while on the coordinating node.
1 parent 5c0f26d commit 2d08ef7

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

server/src/main/java/org/elasticsearch/action/bulk/BulkRequestParser.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@
4040

4141
import java.io.IOException;
4242
import java.io.InputStream;
43+
import java.util.HashMap;
44+
import java.util.Map;
4345
import java.util.function.Consumer;
46+
import java.util.function.Function;
4447

4548
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
4649

@@ -139,6 +142,10 @@ public void parse(
139142
int from = 0;
140143
byte marker = xContent.streamSeparator();
141144
boolean typesDeprecationLogged = false;
145+
// Bulk requests can contain a lot of repeated strings for the index, pipeline and routing parameters. This map is used to
146+
// deduplicate duplicate strings parsed for these parameters. While it does not prevent instantiating the duplicate strings, it
147+
// reduces their lifetime to the lifetime of this parse call instead of the lifetime of the full bulk request.
148+
final Map<String, String> stringDeduplicator = new HashMap<>();
142149
while (true) {
143150
int nextMarker = findNextMarker(marker, from, data);
144151
if (nextMarker == -1) {
@@ -198,17 +205,17 @@ public void parse(
198205
if (!allowExplicitIndex) {
199206
throw new IllegalArgumentException("explicit index in bulk is not allowed");
200207
}
201-
index = parser.text();
208+
index = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
202209
} else if (TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
203210
if (warnOnTypeUsage && typesDeprecationLogged == false) {
204211
deprecationLogger.deprecatedAndMaybeLog("bulk_with_types", RestBulkAction.TYPES_DEPRECATION_MESSAGE);
205212
typesDeprecationLogged = true;
206213
}
207-
type = parser.text();
214+
type = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
208215
} else if (ID.match(currentFieldName, parser.getDeprecationHandler())) {
209216
id = parser.text();
210217
} else if (ROUTING.match(currentFieldName, parser.getDeprecationHandler())) {
211-
routing = parser.text();
218+
routing = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
212219
} else if (OP_TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
213220
opType = parser.text();
214221
} else if (VERSION.match(currentFieldName, parser.getDeprecationHandler())) {
@@ -222,7 +229,7 @@ public void parse(
222229
} else if (RETRY_ON_CONFLICT.match(currentFieldName, parser.getDeprecationHandler())) {
223230
retryOnConflict = parser.intValue();
224231
} else if (PIPELINE.match(currentFieldName, parser.getDeprecationHandler())) {
225-
pipeline = parser.text();
232+
pipeline = stringDeduplicator.computeIfAbsent(parser.text(), Function.identity());
226233
} else if (SOURCE.match(currentFieldName, parser.getDeprecationHandler())) {
227234
fetchSourceContext = FetchSourceContext.fromXContent(parser);
228235
} else {

server/src/test/java/org/elasticsearch/action/bulk/BulkRequestParserTests.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@
1919

2020
package org.elasticsearch.action.bulk;
2121

22+
import org.elasticsearch.action.index.IndexRequest;
2223
import org.elasticsearch.common.bytes.BytesArray;
2324
import org.elasticsearch.common.xcontent.XContentType;
2425
import org.elasticsearch.rest.action.document.RestBulkAction;
2526
import org.elasticsearch.test.ESTestCase;
27+
import org.hamcrest.Matchers;
2628

2729
import java.io.IOException;
30+
import java.util.ArrayList;
31+
import java.util.List;
2832
import java.util.concurrent.atomic.AtomicBoolean;
2933

3034
public class BulkRequestParserTests extends ESTestCase {
@@ -111,4 +115,20 @@ public void testTypeWarning() throws IOException {
111115
assertWarnings(RestBulkAction.TYPES_DEPRECATION_MESSAGE);
112116
}
113117

118+
public void testParseDeduplicatesParameterStrings() throws IOException {
119+
BytesArray request = new BytesArray(
120+
"{ \"index\":{ \"_index\": \"bar\", \"pipeline\": \"foo\", \"routing\": \"blub\"} }\n{}\n"
121+
+ "{ \"index\":{ \"_index\": \"bar\", \"pipeline\": \"foo\", \"routing\": \"blub\" } }\n{}\n");
122+
BulkRequestParser parser = new BulkRequestParser(randomBoolean());
123+
final List<IndexRequest> indexRequests = new ArrayList<>();
124+
parser.parse(request, null, null, null, null, true, XContentType.JSON,
125+
indexRequests::add,
126+
req -> fail(), req -> fail());
127+
assertThat(indexRequests, Matchers.hasSize(2));
128+
final IndexRequest first = indexRequests.get(0);
129+
final IndexRequest second = indexRequests.get(1);
130+
assertSame(first.index(), second.index());
131+
assertSame(first.getPipeline(), second.getPipeline());
132+
assertSame(first.routing(), second.routing());
133+
}
114134
}

0 commit comments

Comments
 (0)