Skip to content

Commit 812e5eb

Browse files
Reindex search resiliency
Local reindex can now survive loosing data nodes that contain source data. The original query will be restarted with a filter for `_seq_no >= last_seq_no` when a failure is detected. Part of elastic#42612 and split out from elastic#43187
1 parent 96528f6 commit 812e5eb

17 files changed

+656
-107
lines changed

modules/reindex/src/main/java/org/elasticsearch/index/reindex/AbstractAsyncBulkByScrollAction.java

+8-4
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
119119
AbstractAsyncBulkByScrollAction(BulkByScrollTask task, boolean needsSourceDocumentVersions,
120120
boolean needsSourceDocumentSeqNoAndPrimaryTerm, Logger logger, ParentTaskAssigningClient client,
121121
ThreadPool threadPool, Request mainRequest, ActionListener<BulkByScrollResponse> listener,
122-
@Nullable ScriptService scriptService, @Nullable ReindexSslConfig sslConfig) {
122+
@Nullable ScriptService scriptService, @Nullable ReindexSslConfig sslConfig,
123+
@Nullable String restartFromField) {
123124
this.task = task;
124125
this.scriptService = scriptService;
125126
this.sslConfig = sslConfig;
@@ -135,7 +136,9 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
135136
this.listener = listener;
136137
BackoffPolicy backoffPolicy = buildBackoffPolicy();
137138
bulkRetry = new Retry(BackoffPolicy.wrap(backoffPolicy, worker::countBulkRetry), threadPool);
138-
scrollSource = buildScrollableResultSource(backoffPolicy);
139+
// todo: this is trappy, since if a subclass override relies on subclass fields, they are not initialized. We should fix
140+
// to simply pass in the hit-source.
141+
scrollSource = buildScrollableResultSource(backoffPolicy, restartFromField);
139142
scriptApplier = Objects.requireNonNull(buildScriptApplier(), "script applier must not be null");
140143
/*
141144
* Default to sorting by doc. We can't do this in the request itself because it is normal to *add* to the sorts rather than replace
@@ -213,10 +216,11 @@ private BulkRequest buildBulk(Iterable<? extends ScrollableHitSource.Hit> docs)
213216
return bulkRequest;
214217
}
215218

216-
protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffPolicy) {
219+
protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffPolicy,
220+
String restartFromField) {
217221
return new ClientScrollableHitSource(logger, backoffPolicy, threadPool, worker::countSearchRetry,
218222
this::onScrollResponse, this::finishHim, client,
219-
mainRequest.getSearchRequest());
223+
mainRequest.getSearchRequest(), restartFromField);
220224
}
221225

222226
/**

modules/reindex/src/main/java/org/elasticsearch/index/reindex/AsyncDeleteByQueryAction.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ public class AsyncDeleteByQueryAction extends AbstractAsyncBulkByScrollAction<De
3434
public AsyncDeleteByQueryAction(BulkByScrollTask task, Logger logger, ParentTaskAssigningClient client,
3535
ThreadPool threadPool, DeleteByQueryRequest request, ScriptService scriptService,
3636
ActionListener<BulkByScrollResponse> listener) {
37-
super(task, false, true, logger, client, threadPool, request, listener, scriptService, null);
37+
super(task, false, true, logger, client, threadPool, request, listener,
38+
scriptService, null, null);
3839
}
3940

4041
@Override

modules/reindex/src/main/java/org/elasticsearch/index/reindex/Reindexer.java

+36-5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.action.bulk.BackoffPolicy;
3434
import org.elasticsearch.action.bulk.BulkItemResponse;
3535
import org.elasticsearch.action.index.IndexRequest;
36+
import org.elasticsearch.action.search.SearchRequest;
3637
import org.elasticsearch.client.Client;
3738
import org.elasticsearch.client.ParentTaskAssigningClient;
3839
import org.elasticsearch.client.RestClient;
@@ -47,10 +48,14 @@
4748
import org.elasticsearch.common.xcontent.XContentParser;
4849
import org.elasticsearch.common.xcontent.XContentType;
4950
import org.elasticsearch.index.VersionType;
51+
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
5052
import org.elasticsearch.index.mapper.VersionFieldMapper;
5153
import org.elasticsearch.index.reindex.remote.RemoteScrollableHitSource;
5254
import org.elasticsearch.script.Script;
5355
import org.elasticsearch.script.ScriptService;
56+
import org.elasticsearch.search.sort.FieldSortBuilder;
57+
import org.elasticsearch.search.sort.SortBuilder;
58+
import org.elasticsearch.search.sort.SortOrder;
5459
import org.elasticsearch.threadpool.ThreadPool;
5560

5661
import java.io.IOException;
@@ -92,17 +97,41 @@ public void initTask(BulkByScrollTask task, ReindexRequest request, ActionListen
9297
}
9398

9499
public void execute(BulkByScrollTask task, ReindexRequest request, ActionListener<BulkByScrollResponse> listener) {
100+
request.getSearchRequest().allowPartialSearchResults(false);
101+
// Notice that this is called both on leader and workers when slicing.
102+
String resumableSortingField = request.getRemoteInfo() == null ? getOrAddRestartFromField(request.getSearchRequest()) : null;
103+
95104
BulkByScrollParallelizationHelper.executeSlicedAction(task, request, ReindexAction.INSTANCE, listener, client,
96105
clusterService.localNode(),
97106
() -> {
98107
ParentTaskAssigningClient assigningClient = new ParentTaskAssigningClient(client, clusterService.localNode(), task);
99108
AsyncIndexBySearchAction searchAction = new AsyncIndexBySearchAction(task, logger, assigningClient, threadPool,
100-
scriptService, reindexSslConfig, request, listener);
109+
scriptService, reindexSslConfig, request, resumableSortingField, listener);
101110
searchAction.start();
102111
});
103112

104113
}
105114

115+
private static String getOrAddRestartFromField(SearchRequest searchRequest) {
116+
// we keep with the tradition of modifying the input request, though this can lead to strange results (in transport clients).
117+
List<SortBuilder<?>> sorts = searchRequest.source().sorts();
118+
if (sorts != null && sorts.size() >= 1) {
119+
SortBuilder<?> firstSort = sorts.get(0);
120+
if (firstSort instanceof FieldSortBuilder) {
121+
FieldSortBuilder fieldSort = (FieldSortBuilder) firstSort;
122+
if (SeqNoFieldMapper.NAME.equals(fieldSort.getFieldName())
123+
&& fieldSort.order() == SortOrder.ASC) {
124+
return SeqNoFieldMapper.NAME;
125+
}
126+
// todo: support non seq_no fields and descending, but need to check field is numeric and handle missing values too then.
127+
}
128+
return null;
129+
}
130+
131+
searchRequest.source().sort(SeqNoFieldMapper.NAME);
132+
return SeqNoFieldMapper.NAME;
133+
}
134+
106135
/**
107136
* Build the {@link RestClient} used for reindexing from remote clusters.
108137
* @param remoteInfo connection information for the remote cluster
@@ -170,18 +199,20 @@ static class AsyncIndexBySearchAction extends AbstractAsyncBulkByScrollAction<Re
170199

171200
AsyncIndexBySearchAction(BulkByScrollTask task, Logger logger, ParentTaskAssigningClient client,
172201
ThreadPool threadPool, ScriptService scriptService, ReindexSslConfig sslConfig, ReindexRequest request,
173-
ActionListener<BulkByScrollResponse> listener) {
202+
String restartFromField, ActionListener<BulkByScrollResponse> listener) {
174203
super(task,
175204
/*
176205
* We only need the source version if we're going to use it when write and we only do that when the destination request uses
177206
* external versioning.
178207
*/
179208
request.getDestination().versionType() != VersionType.INTERNAL,
180-
false, logger, client, threadPool, request, listener, scriptService, sslConfig);
209+
SeqNoFieldMapper.NAME.equals(restartFromField), logger, client, threadPool, request, listener,
210+
scriptService, sslConfig, restartFromField);
181211
}
182212

183213
@Override
184-
protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffPolicy) {
214+
protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffPolicy,
215+
String restartFromField) {
185216
if (mainRequest.getRemoteInfo() != null) {
186217
RemoteInfo remoteInfo = mainRequest.getRemoteInfo();
187218
createdThreads = synchronizedList(new ArrayList<>());
@@ -191,7 +222,7 @@ protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffP
191222
this::onScrollResponse, this::finishHim,
192223
restClient, remoteInfo.getQuery(), mainRequest.getSearchRequest());
193224
}
194-
return super.buildScrollableResultSource(backoffPolicy);
225+
return super.buildScrollableResultSource(backoffPolicy, restartFromField);
195226
}
196227

197228
@Override

modules/reindex/src/main/java/org/elasticsearch/index/reindex/TransportUpdateByQueryAction.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static class AsyncIndexBySearchAction extends AbstractAsyncBulkByScrollAction<Up
8787
super(task,
8888
// use sequence number powered optimistic concurrency control
8989
false, true,
90-
logger, client, threadPool, request, listener, scriptService, null);
90+
logger, client, threadPool, request, listener, scriptService, null, null);
9191
}
9292

9393
@Override

modules/reindex/src/main/java/org/elasticsearch/index/reindex/remote/RemoteScrollableHitSource.java

+18-2
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ public class RemoteScrollableHitSource extends ScrollableHitSource {
6969
public RemoteScrollableHitSource(Logger logger, BackoffPolicy backoffPolicy, ThreadPool threadPool, Runnable countSearchRetry,
7070
Consumer<AsyncResponse> onResponse, Consumer<Exception> fail,
7171
RestClient client, BytesReference query, SearchRequest searchRequest) {
72-
super(logger, backoffPolicy, threadPool, countSearchRetry, onResponse, fail);
72+
super(logger, backoffPolicy, threadPool, countSearchRetry, onResponse, fail, null);// todo: handle resume or grace
7373
this.query = query;
7474
this.searchRequest = searchRequest;
7575
this.client = client;
7676
}
7777

7878
@Override
79-
protected void doStart(RejectAwareActionListener<Response> searchListener) {
79+
protected void doStart(TimeValue extraKeepAlive, RejectAwareActionListener<Response> searchListener) {
8080
lookupRemoteVersion(RejectAwareActionListener.withResponseHandler(searchListener, version -> {
8181
remoteVersion = version;
8282
execute(RemoteRequestBuilders.initialSearch(searchRequest, query, remoteVersion),
@@ -97,12 +97,28 @@ private void onStartResponse(RejectAwareActionListener<Response> searchListener,
9797
}
9898
}
9999

100+
@Override
101+
protected void doRestart(TimeValue extraKeepAlive, long restartFromValue, RejectAwareActionListener<Response> searchListener) {
102+
assert false;
103+
throw new UnsupportedOperationException("restart during remote reindex not supported yet");
104+
}
105+
100106
@Override
101107
protected void doStartNextScroll(String scrollId, TimeValue extraKeepAlive, RejectAwareActionListener<Response> searchListener) {
102108
TimeValue keepAlive = timeValueNanos(searchRequest.scroll().keepAlive().nanos() + extraKeepAlive.nanos());
103109
execute(RemoteRequestBuilders.scroll(scrollId, keepAlive, remoteVersion), RESPONSE_PARSER, searchListener);
104110
}
105111

112+
@Override
113+
protected boolean canRestart() {
114+
return false;
115+
}
116+
117+
@Override
118+
protected String[] indices() {
119+
return searchRequest.indices();
120+
}
121+
106122
@Override
107123
protected void clearScroll(String scrollId, Runnable onCompletion) {
108124
client.performRequestAsync(RemoteRequestBuilders.clearScroll(scrollId, remoteVersion), new ResponseListener() {

modules/reindex/src/test/java/org/elasticsearch/client/documentation/ReindexDocumentationIT.java

+4
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ public void setup() {
8686
@SuppressWarnings("unused")
8787
public void testReindex() {
8888
Client client = client();
89+
// todo: this is only necessary to ensure the seqno mapping is created.
90+
client().prepareIndex(INDEX_NAME, "_doc", "1").setSource("data", "x").get();
8991
// tag::reindex1
9092
BulkByScrollResponse response =
9193
new ReindexRequestBuilder(client, ReindexAction.INSTANCE)
@@ -94,6 +96,8 @@ public void testReindex() {
9496
.filter(QueryBuilders.matchQuery("category", "xzy")) // <1>
9597
.get();
9698
// end::reindex1
99+
100+
client().prepareDelete(INDEX_NAME, "_doc", "1").get();
97101
}
98102

99103
@SuppressWarnings("unused")

modules/reindex/src/test/java/org/elasticsearch/index/reindex/AsyncBulkByScrollActionTests.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ public void testStartNextScrollRetriesOnRejectionAndSucceeds() throws Exception
216216
ClientScrollableHitSource hitSource = new ClientScrollableHitSource(logger, buildTestBackoffPolicy(),
217217
threadPool,
218218
testTask.getWorkerState()::countSearchRetry, r -> fail(), ExceptionsHelper::reThrowIfNotNull,
219-
new ParentTaskAssigningClient(client, localNode, testTask), testRequest.getSearchRequest());
219+
new ParentTaskAssigningClient(client, localNode, testTask), testRequest.getSearchRequest(), null);
220220
hitSource.setScroll(scrollId());
221221
hitSource.startNextScroll(TimeValue.timeValueSeconds(0));
222222
assertBusy(() -> assertEquals(client.scrollsToReject + 1, client.scrollAttempts.get()));
@@ -240,7 +240,7 @@ public void testStartNextScrollRetriesOnRejectionButFailsOnTooManyRejections() t
240240
ClientScrollableHitSource hitSource = new ClientScrollableHitSource(logger, buildTestBackoffPolicy(),
241241
threadPool,
242242
testTask.getWorkerState()::countSearchRetry, r -> fail(), validingOnFail,
243-
new ParentTaskAssigningClient(client, localNode, testTask), testRequest.getSearchRequest());
243+
new ParentTaskAssigningClient(client, localNode, testTask), testRequest.getSearchRequest(), null);
244244
hitSource.setScroll(scrollId());
245245
hitSource.startNextScroll(TimeValue.timeValueSeconds(0));
246246
assertBusy(() -> assertEquals(testRequest.getMaxRetries() + 1, client.scrollAttempts.get()));
@@ -733,7 +733,8 @@ private class DummyAsyncBulkByScrollAction
733733
extends AbstractAsyncBulkByScrollAction<DummyAbstractBulkByScrollRequest, DummyTransportAsyncBulkByScrollAction> {
734734
DummyAsyncBulkByScrollAction() {
735735
super(testTask, randomBoolean(), randomBoolean(), AsyncBulkByScrollActionTests.this.logger,
736-
new ParentTaskAssigningClient(client, localNode, testTask), client.threadPool(), testRequest, listener, null, null);
736+
new ParentTaskAssigningClient(client, localNode, testTask), client.threadPool(), testRequest, listener,
737+
null, null, null);
737738
}
738739

739740
@Override

0 commit comments

Comments
 (0)