37
37
38
38
import java .io .IOException ;
39
39
import java .io .InputStream ;
40
+ import java .util .HashMap ;
41
+ import java .util .Map ;
40
42
import java .util .function .BiConsumer ;
41
43
import java .util .function .Consumer ;
44
+ import java .util .function .Function ;
42
45
43
46
import static org .elasticsearch .index .seqno .SequenceNumbers .UNASSIGNED_PRIMARY_TERM ;
44
47
@@ -115,6 +118,10 @@ public void parse(
115
118
int line = 0 ;
116
119
int from = 0 ;
117
120
byte marker = xContent .streamSeparator ();
121
+ // Bulk requests can contain a lot of repeated strings for the index, pipeline and routing parameters. This map is used to
122
+ // deduplicate duplicate strings parsed for these parameters. While it does not prevent instantiating the duplicate strings, it
123
+ // reduces their lifetime to the lifetime of this parse call instead of the lifetime of the full bulk request.
124
+ final Map <String , String > stringDeduplicator = new HashMap <>();
118
125
while (true ) {
119
126
int nextMarker = findNextMarker (marker , from , data );
120
127
if (nextMarker == -1 ) {
@@ -174,17 +181,17 @@ public void parse(
174
181
if (!allowExplicitIndex ) {
175
182
throw new IllegalArgumentException ("explicit index in bulk is not allowed" );
176
183
}
177
- index = parser .text ();
184
+ index = stringDeduplicator . computeIfAbsent ( parser .text (), Function . identity () );
178
185
} else if (TYPE .match (currentFieldName , parser .getDeprecationHandler ())) {
179
186
if (errorOnType ) {
180
187
throw new IllegalArgumentException ("Action/metadata line [" + line + "] contains an unknown parameter ["
181
188
+ currentFieldName + "]" );
182
189
}
183
- type = parser .text ();
190
+ type = stringDeduplicator . computeIfAbsent ( parser .text (), Function . identity () );
184
191
} else if (ID .match (currentFieldName , parser .getDeprecationHandler ())) {
185
192
id = parser .text ();
186
193
} else if (ROUTING .match (currentFieldName , parser .getDeprecationHandler ())) {
187
- routing = parser .text ();
194
+ routing = stringDeduplicator . computeIfAbsent ( parser .text (), Function . identity () );
188
195
} else if (OP_TYPE .match (currentFieldName , parser .getDeprecationHandler ())) {
189
196
opType = parser .text ();
190
197
} else if (VERSION .match (currentFieldName , parser .getDeprecationHandler ())) {
@@ -198,7 +205,7 @@ public void parse(
198
205
} else if (RETRY_ON_CONFLICT .match (currentFieldName , parser .getDeprecationHandler ())) {
199
206
retryOnConflict = parser .intValue ();
200
207
} else if (PIPELINE .match (currentFieldName , parser .getDeprecationHandler ())) {
201
- pipeline = parser .text ();
208
+ pipeline = stringDeduplicator . computeIfAbsent ( parser .text (), Function . identity () );
202
209
} else if (SOURCE .match (currentFieldName , parser .getDeprecationHandler ())) {
203
210
fetchSourceContext = FetchSourceContext .fromXContent (parser );
204
211
} else {
0 commit comments