Skip to content

Commit dea935a

Browse files
Reindex max_docs parameter name (#42942)
Previously, a reindex request had two different size specifications in the body: * Outer level, determining the maximum documents to process * Inside the source element, determining the scroll/batch size. The outer level size has now been renamed to max_docs to avoid confusion and clarify its semantics, with backwards compatibility and deprecation warnings for using size. Similarly, the size parameter has been renamed to max_docs for update/delete-by-query to keep the 3 interfaces consistent. Finally, all 3 endpoints now support max_docs in both body and URL. Relates #24344
1 parent 5929803 commit dea935a

File tree

34 files changed

+824
-90
lines changed

34 files changed

+824
-90
lines changed

client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -585,8 +585,8 @@ static Request updateByQuery(UpdateByQueryRequest updateByQueryRequest) throws I
585585
if (updateByQueryRequest.getScrollTime() != AbstractBulkByScrollRequest.DEFAULT_SCROLL_TIMEOUT) {
586586
params.putParam("scroll", updateByQueryRequest.getScrollTime());
587587
}
588-
if (updateByQueryRequest.getSize() > 0) {
589-
params.putParam("size", Integer.toString(updateByQueryRequest.getSize()));
588+
if (updateByQueryRequest.getMaxDocs() > 0) {
589+
params.putParam("max_docs", Integer.toString(updateByQueryRequest.getMaxDocs()));
590590
}
591591
request.addParameters(params.asMap());
592592
request.setEntity(createEntity(updateByQueryRequest, REQUEST_BODY_CONTENT_TYPE));
@@ -613,8 +613,8 @@ static Request deleteByQuery(DeleteByQueryRequest deleteByQueryRequest) throws I
613613
if (deleteByQueryRequest.getScrollTime() != AbstractBulkByScrollRequest.DEFAULT_SCROLL_TIMEOUT) {
614614
params.putParam("scroll", deleteByQueryRequest.getScrollTime());
615615
}
616-
if (deleteByQueryRequest.getSize() > 0) {
617-
params.putParam("size", Integer.toString(deleteByQueryRequest.getSize()));
616+
if (deleteByQueryRequest.getMaxDocs() > 0) {
617+
params.putParam("max_docs", Integer.toString(deleteByQueryRequest.getMaxDocs()));
618618
}
619619
request.addParameters(params.asMap());
620620
request.setEntity(createEntity(deleteByQueryRequest, REQUEST_BODY_CONTENT_TYPE));

client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java

+17-5
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,11 @@ public void testReindex() throws IOException {
436436
reindexRequest.setDestRouting("=cat");
437437
}
438438
if (randomBoolean()) {
439-
reindexRequest.setSize(randomIntBetween(100, 1000));
439+
if (randomBoolean()) {
440+
reindexRequest.setMaxDocs(randomIntBetween(100, 1000));
441+
} else {
442+
reindexRequest.setSize(randomIntBetween(100, 1000));
443+
}
440444
}
441445
if (randomBoolean()) {
442446
reindexRequest.setAbortOnVersionConflict(false);
@@ -488,8 +492,12 @@ public void testUpdateByQuery() throws IOException {
488492
}
489493
if (randomBoolean()) {
490494
int size = randomIntBetween(100, 1000);
491-
updateByQueryRequest.setSize(size);
492-
expectedParams.put("size", Integer.toString(size));
495+
if (randomBoolean()) {
496+
updateByQueryRequest.setMaxDocs(size);
497+
} else {
498+
updateByQueryRequest.setSize(size);
499+
}
500+
expectedParams.put("max_docs", Integer.toString(size));
493501
}
494502
if (randomBoolean()) {
495503
updateByQueryRequest.setAbortOnVersionConflict(false);
@@ -538,8 +546,12 @@ public void testDeleteByQuery() throws IOException {
538546
}
539547
if (randomBoolean()) {
540548
int size = randomIntBetween(100, 1000);
541-
deleteByQueryRequest.setSize(size);
542-
expectedParams.put("size", Integer.toString(size));
549+
if (randomBoolean()) {
550+
deleteByQueryRequest.setMaxDocs(size);
551+
} else {
552+
deleteByQueryRequest.setSize(size);
553+
}
554+
expectedParams.put("max_docs", Integer.toString(size));
543555
}
544556
if (randomBoolean()) {
545557
deleteByQueryRequest.setAbortOnVersionConflict(false);

client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/CRUDDocumentationIT.java

+9-9
Original file line numberDiff line numberDiff line change
@@ -824,9 +824,9 @@ public void testReindex() throws Exception {
824824
// tag::reindex-request-conflicts
825825
request.setConflicts("proceed"); // <1>
826826
// end::reindex-request-conflicts
827-
// tag::reindex-request-size
828-
request.setSize(10); // <1>
829-
// end::reindex-request-size
827+
// tag::reindex-request-maxDocs
828+
request.setMaxDocs(10); // <1>
829+
// end::reindex-request-maxDocs
830830
// tag::reindex-request-sourceSize
831831
request.setSourceBatchSize(100); // <1>
832832
// end::reindex-request-sourceSize
@@ -1026,9 +1026,9 @@ public void testUpdateByQuery() throws Exception {
10261026
// tag::update-by-query-request-query
10271027
request.setQuery(new TermQueryBuilder("user", "kimchy")); // <1>
10281028
// end::update-by-query-request-query
1029-
// tag::update-by-query-request-size
1030-
request.setSize(10); // <1>
1031-
// end::update-by-query-request-size
1029+
// tag::update-by-query-request-maxDocs
1030+
request.setMaxDocs(10); // <1>
1031+
// end::update-by-query-request-maxDocs
10321032
// tag::update-by-query-request-scrollSize
10331033
request.setBatchSize(100); // <1>
10341034
// end::update-by-query-request-scrollSize
@@ -1148,9 +1148,9 @@ public void testDeleteByQuery() throws Exception {
11481148
// tag::delete-by-query-request-query
11491149
request.setQuery(new TermQueryBuilder("user", "kimchy")); // <1>
11501150
// end::delete-by-query-request-query
1151-
// tag::delete-by-query-request-size
1152-
request.setSize(10); // <1>
1153-
// end::delete-by-query-request-size
1151+
// tag::delete-by-query-request-maxDocs
1152+
request.setMaxDocs(10); // <1>
1153+
// end::delete-by-query-request-maxDocs
11541154
// tag::delete-by-query-request-scrollSize
11551155
request.setBatchSize(100); // <1>
11561156
// end::delete-by-query-request-scrollSize

docs/java-api/docs/update-by-query.asciidoc

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ otherwise modify the request for matching documents.
5151
include-tagged::{client-reindex-tests}/ReindexDocumentationIT.java[update-by-query-size]
5252
--------------------------------------------------
5353

54-
You can also combine `size` with sorting to limit the documents updated:
54+
You can also combine `maxDocs` with sorting to limit the documents updated:
5555

5656
["source","java",subs="attributes,callouts,macros"]
5757
--------------------------------------------------

docs/java-rest/high-level/document/delete-by-query.asciidoc

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
3939
--------------------------------------------------
4040
<1> Only copy documents which have field `user` set to `kimchy`
4141

42-
It’s also possible to limit the number of processed documents by setting size.
42+
It’s also possible to limit the number of processed documents by setting `maxDocs`.
4343

4444
["source","java",subs="attributes,callouts,macros"]
4545
--------------------------------------------------
46-
include-tagged::{doc-tests-file}[{api}-request-size]
46+
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
4747
--------------------------------------------------
4848
<1> Only copy 10 documents
4949

docs/java-rest/high-level/document/reindex.asciidoc

+3-3
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
6565
--------------------------------------------------
6666
<1> Only copy documents which have field `user` set to `kimchy`
6767

68-
It’s also possible to limit the number of processed documents by setting size.
68+
It’s also possible to limit the number of processed documents by setting `maxDocs`.
6969

7070
["source","java",subs="attributes,callouts,macros"]
7171
--------------------------------------------------
72-
include-tagged::{doc-tests-file}[{api}-request-size]
72+
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
7373
--------------------------------------------------
7474
<1> Only copy 10 documents
7575

@@ -90,7 +90,7 @@ include-tagged::{doc-tests-file}[{api}-request-pipeline]
9090
<1> set pipeline to `my_pipeline`
9191

9292
If you want a particular set of documents from the source index you’ll need to use sort. If possible, prefer a more
93-
selective query to size and sort.
93+
selective query to maxDocs and sort.
9494

9595
["source","java",subs="attributes,callouts,macros"]
9696
--------------------------------------------------

docs/java-rest/high-level/document/update-by-query.asciidoc

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
4040
--------------------------------------------------
4141
<1> Only copy documents which have field `user` set to `kimchy`
4242

43-
It’s also possible to limit the number of processed documents by setting size.
43+
It’s also possible to limit the number of processed documents by setting `maxDocs`.
4444

4545
["source","java",subs="attributes,callouts,macros"]
4646
--------------------------------------------------
47-
include-tagged::{doc-tests-file}[{api}-request-size]
47+
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
4848
--------------------------------------------------
4949
<1> Only copy 10 documents
5050

docs/reference/docs/delete-by-query.asciidoc

+5-5
Original file line numberDiff line numberDiff line change
@@ -571,11 +571,11 @@ sub-request proportionally.
571571
* Due to the nature of `slices` each sub-request won't get a perfectly even
572572
portion of the documents. All documents will be addressed, but some slices may
573573
be larger than others. Expect larger slices to have a more even distribution.
574-
* Parameters like `requests_per_second` and `size` on a request with `slices`
575-
are distributed proportionally to each sub-request. Combine that with the point
576-
above about distribution being uneven and you should conclude that the using
577-
`size` with `slices` might not result in exactly `size` documents being
578-
deleted.
574+
* Parameters like `requests_per_second` and `max_docs` on a request with
575+
slices` are distributed proportionally to each sub-request. Combine that with
576+
the point above about distribution being uneven and you should conclude that
577+
using `max_docs` with `slices` might not result in exactly `max_docs` documents
578+
being deleted.
579579
* Each sub-request gets a slightly different snapshot of the source index
580580
though these are all taken at approximately the same time.
581581

docs/reference/docs/reindex.asciidoc

+10-10
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,14 @@ not a good idea to rely on this behavior. Instead, make sure that IDs are unique
190190
using a script.
191191

192192
It's also possible to limit the number of processed documents by setting
193-
`size`. This will only copy a single document from `twitter` to
193+
`max_docs`. This will only copy a single document from `twitter` to
194194
`new_twitter`:
195195

196196
[source,js]
197197
--------------------------------------------------
198198
POST _reindex
199199
{
200-
"size": 1,
200+
"max_docs": 1,
201201
"source": {
202202
"index": "twitter"
203203
},
@@ -211,14 +211,14 @@ POST _reindex
211211

212212
If you want a particular set of documents from the `twitter` index you'll
213213
need to use `sort`. Sorting makes the scroll less efficient but in some contexts
214-
it's worth it. If possible, prefer a more selective query to `size` and `sort`.
214+
it's worth it. If possible, prefer a more selective query to `max_docs` and `sort`.
215215
This will copy 10000 documents from `twitter` into `new_twitter`:
216216

217217
[source,js]
218218
--------------------------------------------------
219219
POST _reindex
220220
{
221-
"size": 10000,
221+
"max_docs": 10000,
222222
"source": {
223223
"index": "twitter",
224224
"sort": { "date": "desc" }
@@ -1111,11 +1111,11 @@ sub-request proportionally.
11111111
* Due to the nature of `slices` each sub-request won't get a perfectly even
11121112
portion of the documents. All documents will be addressed, but some slices may
11131113
be larger than others. Expect larger slices to have a more even distribution.
1114-
* Parameters like `requests_per_second` and `size` on a request with `slices`
1115-
are distributed proportionally to each sub-request. Combine that with the point
1116-
above about distribution being uneven and you should conclude that the using
1117-
`size` with `slices` might not result in exactly `size` documents being
1118-
reindexed.
1114+
* Parameters like `requests_per_second` and `max_docs` on a request with
1115+
`slices` are distributed proportionally to each sub-request. Combine that with
1116+
the point above about distribution being uneven and you should conclude that
1117+
using `max_docs` with `slices` might not result in exactly `max_docs` documents
1118+
being reindexed.
11191119
* Each sub-request gets a slightly different snapshot of the source index,
11201120
though these are all taken at approximately the same time.
11211121

@@ -1232,7 +1232,7 @@ to load only the existing data into the new index and rename any fields if neede
12321232
----------------------------------------------------------------
12331233
POST _reindex
12341234
{
1235-
"size": 10,
1235+
"max_docs": 10,
12361236
"source": {
12371237
"index": "twitter",
12381238
"query": {

docs/reference/docs/update-by-query.asciidoc

+5-5
Original file line numberDiff line numberDiff line change
@@ -602,11 +602,11 @@ sub-request proportionally.
602602
* Due to the nature of `slices` each sub-request won't get a perfectly even
603603
portion of the documents. All documents will be addressed, but some slices may
604604
be larger than others. Expect larger slices to have a more even distribution.
605-
* Parameters like `requests_per_second` and `size` on a request with `slices`
606-
are distributed proportionally to each sub-request. Combine that with the point
607-
above about distribution being uneven and you should conclude that the using
608-
`size` with `slices` might not result in exactly `size` documents being
609-
updated.
605+
* Parameters like `requests_per_second` and `max_docs` on a request with
606+
`slices` are distributed proportionally to each sub-request. Combine that with
607+
the point above about distribution being uneven and you should conclude that
608+
using `max_docs` with `slices` might not result in exactly `max_docs` documents
609+
being updated.
610610
* Each sub-request gets a slightly different snapshot of the source index
611611
though these are all taken at approximately the same time.
612612

modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
import static java.util.Collections.unmodifiableList;
7676
import static org.elasticsearch.action.bulk.BackoffPolicy.exponentialBackoff;
7777
import static org.elasticsearch.common.unit.TimeValue.timeValueNanos;
78-
import static org.elasticsearch.index.reindex.AbstractBulkByScrollRequest.SIZE_ALL_MATCHES;
78+
import static org.elasticsearch.index.reindex.AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES;
7979
import static org.elasticsearch.rest.RestStatus.CONFLICT;
8080
import static org.elasticsearch.search.sort.SortBuilders.fieldSort;
8181

@@ -263,8 +263,8 @@ void onScrollResponse(TimeValue lastBatchStartTime, int lastBatchSize, Scrollabl
263263
return;
264264
}
265265
long total = response.getTotalHits();
266-
if (mainRequest.getSize() > 0) {
267-
total = min(total, mainRequest.getSize());
266+
if (mainRequest.getMaxDocs() > 0) {
267+
total = min(total, mainRequest.getMaxDocs());
268268
}
269269
worker.setTotal(total);
270270
AbstractRunnable prepareBulkRequestRunnable = new AbstractRunnable() {
@@ -304,9 +304,9 @@ void prepareBulkRequest(TimeValue thisBatchStartTime, ScrollableHitSource.Respon
304304
}
305305
worker.countBatch();
306306
List<? extends ScrollableHitSource.Hit> hits = response.getHits();
307-
if (mainRequest.getSize() != SIZE_ALL_MATCHES) {
308-
// Truncate the hits if we have more than the request size
309-
long remaining = max(0, mainRequest.getSize() - worker.getSuccessfullyProcessed());
307+
if (mainRequest.getMaxDocs() != MAX_DOCS_ALL_MATCHES) {
308+
// Truncate the hits if we have more than the request max docs
309+
long remaining = max(0, mainRequest.getMaxDocs() - worker.getSuccessfullyProcessed());
310310
if (remaining < hits.size()) {
311311
hits = hits.subList(0, (int) remaining);
312312
}
@@ -395,7 +395,7 @@ void onBulkResponse(TimeValue thisBatchStartTime, BulkResponse response) {
395395
return;
396396
}
397397

398-
if (mainRequest.getSize() != SIZE_ALL_MATCHES && worker.getSuccessfullyProcessed() >= mainRequest.getSize()) {
398+
if (mainRequest.getMaxDocs() != MAX_DOCS_ALL_MATCHES && worker.getSuccessfullyProcessed() >= mainRequest.getMaxDocs()) {
399399
// We've processed all the requested docs.
400400
refreshAndFinish(emptyList(), emptyList(), false);
401401
return;

modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBaseReindexRestHandler.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
package org.elasticsearch.index.reindex;
2121

22-
import org.elasticsearch.action.ActionRequestValidationException;
2322
import org.elasticsearch.action.Action;
23+
import org.elasticsearch.action.ActionRequestValidationException;
2424
import org.elasticsearch.action.support.ActiveShardCount;
2525
import org.elasticsearch.client.node.NodeClient;
2626
import org.elasticsearch.common.settings.Settings;
@@ -105,6 +105,11 @@ protected Request setCommonOptions(RestRequest restRequest, Request request) {
105105
if (requestsPerSecond != null) {
106106
request.setRequestsPerSecond(requestsPerSecond);
107107
}
108+
109+
if (restRequest.hasParam("max_docs")) {
110+
setMaxDocsValidateIdentical(request, restRequest.paramAsInt("max_docs", -1));
111+
}
112+
108113
return request;
109114
}
110115

@@ -170,4 +175,13 @@ public static Float parseRequestsPerSecond(RestRequest request) {
170175
}
171176
return requestsPerSecond;
172177
}
178+
179+
static void setMaxDocsValidateIdentical(AbstractBulkByScrollRequest<?> request, int maxDocs) {
180+
if (request.getMaxDocs() != AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES && request.getMaxDocs() != maxDocs) {
181+
throw new IllegalArgumentException("[max_docs] set to two different values [" + request.getMaxDocs() + "]" +
182+
" and [" + maxDocs + "]");
183+
} else {
184+
request.setMaxDocs(maxDocs);
185+
}
186+
}
173187
}

modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractBulkByQueryRestHandler.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.action.search.SearchRequest;
2424
import org.elasticsearch.common.bytes.BytesReference;
2525
import org.elasticsearch.common.settings.Settings;
26+
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
2627
import org.elasticsearch.common.xcontent.XContentBuilder;
2728
import org.elasticsearch.common.xcontent.XContentFactory;
2829
import org.elasticsearch.common.xcontent.XContentParser;
@@ -52,7 +53,7 @@ protected void parseInternalRequest(Request internal, RestRequest restRequest,
5253
SearchRequest searchRequest = internal.getSearchRequest();
5354

5455
try (XContentParser parser = extractRequestSpecificFields(restRequest, bodyConsumers)) {
55-
RestSearchAction.parseSearchRequest(searchRequest, restRequest, parser, internal::setSize);
56+
RestSearchAction.parseSearchRequest(searchRequest, restRequest, parser, size -> setMaxDocsFromSearchSize(internal, size));
5657
}
5758

5859
searchRequest.source().size(restRequest.paramAsInt("scroll_size", searchRequest.source().size()));
@@ -94,4 +95,9 @@ private XContentParser extractRequestSpecificFields(RestRequest restRequest,
9495
parser.getDeprecationHandler(), BytesReference.bytes(builder.map(body)).streamInput());
9596
}
9697
}
98+
99+
private void setMaxDocsFromSearchSize(Request request, int size) {
100+
LoggingDeprecationHandler.INSTANCE.usedDeprecatedName("size", "max_docs");
101+
setMaxDocsValidateIdentical(request, size);
102+
}
97103
}

modules/reindex/src/main/java/org/elasticsearch/index/reindex/RestDeleteByQueryAction.java

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ protected DeleteByQueryRequest buildRequest(RestRequest request) throws IOExcept
5959

6060
Map<String, Consumer<Object>> consumers = new HashMap<>();
6161
consumers.put("conflicts", o -> internal.setConflicts((String) o));
62+
consumers.put("max_docs", s -> setMaxDocsValidateIdentical(internal, ((Number) s).intValue()));
6263

6364
parseInternalRequest(internal, request, consumers);
6465

modules/reindex/src/main/java/org/elasticsearch/index/reindex/RestReindexAction.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public class RestReindexAction extends AbstractBaseReindexRestHandler<ReindexReq
9797

9898
PARSER.declareField(sourceParser::parse, new ParseField("source"), ValueType.OBJECT);
9999
PARSER.declareField((p, v, c) -> destParser.parse(p, v.getDestination(), c), new ParseField("dest"), ValueType.OBJECT);
100-
PARSER.declareInt(ReindexRequest::setSize, new ParseField("size"));
100+
PARSER.declareInt(RestReindexAction::setMaxDocsValidateIdentical, new ParseField("max_docs", "size"));
101101
PARSER.declareField((p, v, c) -> v.setScript(Script.parse(p)), new ParseField("script"),
102102
ValueType.OBJECT);
103103
PARSER.declareString(ReindexRequest::setConflicts, new ParseField("conflicts"));

modules/reindex/src/main/java/org/elasticsearch/index/reindex/RestUpdateByQueryAction.java

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ protected UpdateByQueryRequest buildRequest(RestRequest request) throws IOExcept
6767
Map<String, Consumer<Object>> consumers = new HashMap<>();
6868
consumers.put("conflicts", o -> internal.setConflicts((String) o));
6969
consumers.put("script", o -> internal.setScript(parseScript(o)));
70+
consumers.put("max_docs", s -> setMaxDocsValidateIdentical(internal, ((Number) s).intValue()));
7071

7172
parseInternalRequest(internal, request, consumers);
7273

0 commit comments

Comments
 (0)