Skip to content

Commit dbc9d60

Browse files
Support for remote path in reindex api (#31290)
Support for remote path in reindex api Closes #22913
1 parent a705e1a commit dbc9d60

File tree

13 files changed

+136
-73
lines changed

13 files changed

+136
-73
lines changed

docs/reference/docs/reindex.asciidoc

+5-5
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,11 @@ POST _reindex
422422
// TEST[s/"username": "user",//]
423423
// TEST[s/"password": "pass"//]
424424

425-
The `host` parameter must contain a scheme, host, and port (e.g.
426-
`https://otherhost:9200`). The `username` and `password` parameters are
427-
optional, and when they are present `_reindex` will connect to the remote
428-
Elasticsearch node using basic auth. Be sure to use `https` when using
429-
basic auth or the password will be sent in plain text.
425+
The `host` parameter must contain a scheme, host, port (e.g.
426+
`https://otherhost:9200`) and optional path (e.g. `https://otherhost:9200/proxy`).
427+
The `username` and `password` parameters are optional, and when they are present `_reindex`
428+
will connect to the remote Elasticsearch node using basic auth. Be sure to use `https` when
429+
using basic auth or the password will be sent in plain text.
430430

431431
Remote hosts have to be explicitly whitelisted in elasticsearch.yaml using the
432432
`reindex.remote.whitelist` property. It can be set to a comma delimited list

modules/reindex/src/main/java/org/elasticsearch/index/reindex/RestReindexAction.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
*/
5858
public class RestReindexAction extends AbstractBaseReindexRestHandler<ReindexRequest, ReindexAction> {
5959
static final ObjectParser<ReindexRequest, Void> PARSER = new ObjectParser<>("reindex");
60-
private static final Pattern HOST_PATTERN = Pattern.compile("(?<scheme>[^:]+)://(?<host>[^:]+):(?<port>\\d+)");
60+
private static final Pattern HOST_PATTERN = Pattern.compile("(?<scheme>[^:]+)://(?<host>[^:]+):(?<port>\\d+)(?<pathPrefix>/.*)?");
6161

6262
static {
6363
ObjectParser.Parser<ReindexRequest, Void> sourceParser = (parser, request, context) -> {
@@ -139,10 +139,12 @@ static RemoteInfo buildRemoteInfo(Map<String, Object> source) throws IOException
139139
String hostInRequest = requireNonNull(extractString(remote, "host"), "[host] must be specified to reindex from a remote cluster");
140140
Matcher hostMatcher = HOST_PATTERN.matcher(hostInRequest);
141141
if (false == hostMatcher.matches()) {
142-
throw new IllegalArgumentException("[host] must be of the form [scheme]://[host]:[port] but was [" + hostInRequest + "]");
142+
throw new IllegalArgumentException("[host] must be of the form [scheme]://[host]:[port](/[pathPrefix])? but was ["
143+
+ hostInRequest + "]");
143144
}
144145
String scheme = hostMatcher.group("scheme");
145146
String host = hostMatcher.group("host");
147+
String pathPrefix = hostMatcher.group("pathPrefix");
146148
int port = Integer.parseInt(hostMatcher.group("port"));
147149
Map<String, String> headers = extractStringStringMap(remote, "headers");
148150
TimeValue socketTimeout = extractTimeValue(remote, "socket_timeout", RemoteInfo.DEFAULT_SOCKET_TIMEOUT);
@@ -151,7 +153,8 @@ static RemoteInfo buildRemoteInfo(Map<String, Object> source) throws IOException
151153
throw new IllegalArgumentException(
152154
"Unsupported fields in [remote]: [" + Strings.collectionToCommaDelimitedString(remote.keySet()) + "]");
153155
}
154-
return new RemoteInfo(scheme, host, port, queryForRemote(source), username, password, headers, socketTimeout, connectTimeout);
156+
return new RemoteInfo(scheme, host, port, pathPrefix, queryForRemote(source),
157+
username, password, headers, socketTimeout, connectTimeout);
155158
}
156159

157160
/**

modules/reindex/src/main/java/org/elasticsearch/index/reindex/TransportReindexAction.java

+34-28
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.action.ActionRequestValidationException;
3838
import org.elasticsearch.action.bulk.BackoffPolicy;
3939
import org.elasticsearch.action.bulk.BulkItemResponse.Failure;
40+
import org.elasticsearch.client.RestClientBuilder;
4041
import org.elasticsearch.common.bytes.BytesReference;
4142
import org.elasticsearch.common.xcontent.DeprecationHandler;
4243
import org.elasticsearch.index.reindex.ScrollableHitSource.SearchFailure;
@@ -206,34 +207,39 @@ static RestClient buildRestClient(RemoteInfo remoteInfo, long taskId, List<Threa
206207
for (Map.Entry<String, String> header : remoteInfo.getHeaders().entrySet()) {
207208
clientHeaders[i++] = new BasicHeader(header.getKey(), header.getValue());
208209
}
209-
return RestClient.builder(new HttpHost(remoteInfo.getHost(), remoteInfo.getPort(), remoteInfo.getScheme()))
210-
.setDefaultHeaders(clientHeaders)
211-
.setRequestConfigCallback(c -> {
212-
c.setConnectTimeout(Math.toIntExact(remoteInfo.getConnectTimeout().millis()));
213-
c.setSocketTimeout(Math.toIntExact(remoteInfo.getSocketTimeout().millis()));
214-
return c;
215-
})
216-
.setHttpClientConfigCallback(c -> {
217-
// Enable basic auth if it is configured
218-
if (remoteInfo.getUsername() != null) {
219-
UsernamePasswordCredentials creds = new UsernamePasswordCredentials(remoteInfo.getUsername(),
220-
remoteInfo.getPassword());
221-
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
222-
credentialsProvider.setCredentials(AuthScope.ANY, creds);
223-
c.setDefaultCredentialsProvider(credentialsProvider);
224-
}
225-
// Stick the task id in the thread name so we can track down tasks from stack traces
226-
AtomicInteger threads = new AtomicInteger();
227-
c.setThreadFactory(r -> {
228-
String name = "es-client-" + taskId + "-" + threads.getAndIncrement();
229-
Thread t = new Thread(r, name);
230-
threadCollector.add(t);
231-
return t;
232-
});
233-
// Limit ourselves to one reactor thread because for now the search process is single threaded.
234-
c.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build());
235-
return c;
236-
}).build();
210+
final RestClientBuilder builder =
211+
RestClient.builder(new HttpHost(remoteInfo.getHost(), remoteInfo.getPort(), remoteInfo.getScheme()))
212+
.setDefaultHeaders(clientHeaders)
213+
.setRequestConfigCallback(c -> {
214+
c.setConnectTimeout(Math.toIntExact(remoteInfo.getConnectTimeout().millis()));
215+
c.setSocketTimeout(Math.toIntExact(remoteInfo.getSocketTimeout().millis()));
216+
return c;
217+
})
218+
.setHttpClientConfigCallback(c -> {
219+
// Enable basic auth if it is configured
220+
if (remoteInfo.getUsername() != null) {
221+
UsernamePasswordCredentials creds = new UsernamePasswordCredentials(remoteInfo.getUsername(),
222+
remoteInfo.getPassword());
223+
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
224+
credentialsProvider.setCredentials(AuthScope.ANY, creds);
225+
c.setDefaultCredentialsProvider(credentialsProvider);
226+
}
227+
// Stick the task id in the thread name so we can track down tasks from stack traces
228+
AtomicInteger threads = new AtomicInteger();
229+
c.setThreadFactory(r -> {
230+
String name = "es-client-" + taskId + "-" + threads.getAndIncrement();
231+
Thread t = new Thread(r, name);
232+
threadCollector.add(t);
233+
return t;
234+
});
235+
// Limit ourselves to one reactor thread because for now the search process is single threaded.
236+
c.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build());
237+
return c;
238+
});
239+
if (Strings.hasLength(remoteInfo.getPathPrefix()) && "/".equals(remoteInfo.getPathPrefix()) == false) {
240+
builder.setPathPrefix(remoteInfo.getPathPrefix());
241+
}
242+
return builder.build();
237243
}
238244

239245
/**

modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexFromRemoteBuildRestClientTests.java

+15-13
Original file line numberDiff line numberDiff line change
@@ -34,20 +34,22 @@
3434

3535
public class ReindexFromRemoteBuildRestClientTests extends RestClientBuilderTestCase {
3636
public void testBuildRestClient() throws Exception {
37-
RemoteInfo remoteInfo = new RemoteInfo("https", "localhost", 9200, new BytesArray("ignored"), null, null, emptyMap(),
37+
for(final String path: new String[]{"", null, "/", "path"}) {
38+
RemoteInfo remoteInfo = new RemoteInfo("https", "localhost", 9200, path, new BytesArray("ignored"), null, null, emptyMap(),
3839
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
39-
long taskId = randomLong();
40-
List<Thread> threads = synchronizedList(new ArrayList<>());
41-
RestClient client = TransportReindexAction.buildRestClient(remoteInfo, taskId, threads);
42-
try {
43-
assertBusy(() -> assertThat(threads, hasSize(2)));
44-
int i = 0;
45-
for (Thread thread : threads) {
46-
assertEquals("es-client-" + taskId + "-" + i, thread.getName());
47-
i++;
40+
long taskId = randomLong();
41+
List<Thread> threads = synchronizedList(new ArrayList<>());
42+
RestClient client = TransportReindexAction.buildRestClient(remoteInfo, taskId, threads);
43+
try {
44+
assertBusy(() -> assertThat(threads, hasSize(2)));
45+
int i = 0;
46+
for (Thread thread : threads) {
47+
assertEquals("es-client-" + taskId + "-" + i, thread.getName());
48+
i++;
49+
}
50+
} finally {
51+
client.close();
4852
}
49-
} finally {
50-
client.close();
5153
}
5254
}
5355

@@ -57,7 +59,7 @@ public void testHeaders() throws Exception {
5759
for (int i = 0; i < numHeaders; ++i) {
5860
headers.put("header" + i, Integer.toString(i));
5961
}
60-
RemoteInfo remoteInfo = new RemoteInfo("https", "localhost", 9200, new BytesArray("ignored"), null, null,
62+
RemoteInfo remoteInfo = new RemoteInfo("https", "localhost", 9200, null, new BytesArray("ignored"), null, null,
6163
headers, RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
6264
long taskId = randomLong();
6365
List<Thread> threads = synchronizedList(new ArrayList<>());

modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexFromRemoteWhitelistTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public void testLocalRequestWithWhitelist() {
4949
* Build a {@link RemoteInfo}, defaulting values that we don't care about in this test to values that don't hurt anything.
5050
*/
5151
private RemoteInfo newRemoteInfo(String host, int port) {
52-
return new RemoteInfo(randomAlphaOfLength(5), host, port, new BytesArray("test"), null, null, emptyMap(),
52+
return new RemoteInfo(randomAlphaOfLength(5), host, port, null, new BytesArray("test"), null, null, emptyMap(),
5353
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
5454
}
5555

@@ -63,7 +63,7 @@ public void testWhitelistedRemote() {
6363

6464
public void testWhitelistedByPrefix() {
6565
checkRemoteWhitelist(buildRemoteWhitelist(singletonList("*.example.com:9200")),
66-
new RemoteInfo(randomAlphaOfLength(5), "es.example.com", 9200, new BytesArray("test"), null, null, emptyMap(),
66+
new RemoteInfo(randomAlphaOfLength(5), "es.example.com", 9200, null, new BytesArray("test"), null, null, emptyMap(),
6767
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT));
6868
checkRemoteWhitelist(buildRemoteWhitelist(singletonList("*.example.com:9200")),
6969
newRemoteInfo("6e134134a1.us-east-1.aws.example.com", 9200));

modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexFromRemoteWithAuthTests.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ public void fetchTransportAddress() {
104104
* Build a {@link RemoteInfo}, defaulting values that we don't care about in this test to values that don't hurt anything.
105105
*/
106106
private RemoteInfo newRemoteInfo(String username, String password, Map<String, String> headers) {
107-
return new RemoteInfo("http", address.getAddress(), address.getPort(), new BytesArray("{\"match_all\":{}}"), username, password,
108-
headers, RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
107+
return new RemoteInfo("http", address.getAddress(), address.getPort(), null,
108+
new BytesArray("{\"match_all\":{}}"), username, password, headers,
109+
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
109110
}
110111

111112
public void testReindexFromRemoteWithAuthentication() throws Exception {

modules/reindex/src/test/java/org/elasticsearch/index/reindex/ReindexSourceTargetValidationTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ public void testTargetIsAlias() {
8888

8989
public void testRemoteInfoSkipsValidation() {
9090
// The index doesn't have to exist
91-
succeeds(new RemoteInfo(randomAlphaOfLength(5), "test", 9200, new BytesArray("test"), null, null, emptyMap(),
91+
succeeds(new RemoteInfo(randomAlphaOfLength(5), "test", 9200, null, new BytesArray("test"), null, null, emptyMap(),
9292
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT), "does_not_exist", "target");
9393
// And it doesn't matter if they are the same index. They are considered to be different because the remote one is, well, remote.
94-
succeeds(new RemoteInfo(randomAlphaOfLength(5), "test", 9200, new BytesArray("test"), null, null, emptyMap(),
94+
succeeds(new RemoteInfo(randomAlphaOfLength(5), "test", 9200, null, new BytesArray("test"), null, null, emptyMap(),
9595
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT), "target", "target");
9696
}
9797

modules/reindex/src/test/java/org/elasticsearch/index/reindex/RestReindexActionTests.java

+23
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,38 @@ public void testBuildRemoteInfoWithAllHostParts() throws IOException {
8989
assertEquals("http", info.getScheme());
9090
assertEquals("example.com", info.getHost());
9191
assertEquals(9200, info.getPort());
92+
assertNull(info.getPathPrefix());
9293
assertEquals(RemoteInfo.DEFAULT_SOCKET_TIMEOUT, info.getSocketTimeout()); // Didn't set the timeout so we should get the default
9394
assertEquals(RemoteInfo.DEFAULT_CONNECT_TIMEOUT, info.getConnectTimeout()); // Didn't set the timeout so we should get the default
9495

9596
info = buildRemoteInfoHostTestCase("https://other.example.com:9201");
9697
assertEquals("https", info.getScheme());
9798
assertEquals("other.example.com", info.getHost());
9899
assertEquals(9201, info.getPort());
100+
assertNull(info.getPathPrefix());
99101
assertEquals(RemoteInfo.DEFAULT_SOCKET_TIMEOUT, info.getSocketTimeout());
100102
assertEquals(RemoteInfo.DEFAULT_CONNECT_TIMEOUT, info.getConnectTimeout());
103+
104+
info = buildRemoteInfoHostTestCase("https://other.example.com:9201/");
105+
assertEquals("https", info.getScheme());
106+
assertEquals("other.example.com", info.getHost());
107+
assertEquals(9201, info.getPort());
108+
assertEquals("/", info.getPathPrefix());
109+
assertEquals(RemoteInfo.DEFAULT_SOCKET_TIMEOUT, info.getSocketTimeout());
110+
assertEquals(RemoteInfo.DEFAULT_CONNECT_TIMEOUT, info.getConnectTimeout());
111+
112+
info = buildRemoteInfoHostTestCase("https://other.example.com:9201/proxy-path/");
113+
assertEquals("https", info.getScheme());
114+
assertEquals("other.example.com", info.getHost());
115+
assertEquals(9201, info.getPort());
116+
assertEquals("/proxy-path/", info.getPathPrefix());
117+
assertEquals(RemoteInfo.DEFAULT_SOCKET_TIMEOUT, info.getSocketTimeout());
118+
assertEquals(RemoteInfo.DEFAULT_CONNECT_TIMEOUT, info.getConnectTimeout());
119+
120+
final IllegalArgumentException exception = expectThrows(IllegalArgumentException.class,
121+
() -> buildRemoteInfoHostTestCase("https"));
122+
assertEquals("[host] must be of the form [scheme]://[host]:[port](/[pathPrefix])? but was [https]",
123+
exception.getMessage());
101124
}
102125

103126
public void testReindexFromRemoteRequestParsing() throws IOException {

modules/reindex/src/test/java/org/elasticsearch/index/reindex/RetryTests.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,10 @@ public void testReindexFromRemote() throws Exception {
124124
assertNotNull(masterNode);
125125

126126
TransportAddress address = masterNode.getHttp().getAddress().publishAddress();
127-
RemoteInfo remote = new RemoteInfo("http", address.getAddress(), address.getPort(), new BytesArray("{\"match_all\":{}}"), null,
128-
null, emptyMap(), RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
127+
RemoteInfo remote =
128+
new RemoteInfo("http", address.getAddress(), address.getPort(), null,
129+
new BytesArray("{\"match_all\":{}}"), null, null, emptyMap(),
130+
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
129131
ReindexRequestBuilder request = new ReindexRequestBuilder(client, ReindexAction.INSTANCE).source("source").destination("dest")
130132
.setRemoteInfo(remote);
131133
return request;

modules/reindex/src/test/java/org/elasticsearch/index/reindex/RoundTripTests.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,9 @@ public void testReindexRequest() throws IOException {
6363
}
6464
TimeValue socketTimeout = parseTimeValue(randomPositiveTimeValue(), "socketTimeout");
6565
TimeValue connectTimeout = parseTimeValue(randomPositiveTimeValue(), "connectTimeout");
66-
reindex.setRemoteInfo(new RemoteInfo(randomAlphaOfLength(5), randomAlphaOfLength(5), port, query, username, password, headers,
67-
socketTimeout, connectTimeout));
66+
reindex.setRemoteInfo(
67+
new RemoteInfo(randomAlphaOfLength(5), randomAlphaOfLength(5), port, null,
68+
query, username, password, headers, socketTimeout, connectTimeout));
6869
}
6970
ReindexRequest tripped = new ReindexRequest();
7071
roundTrip(reindex, tripped);

modules/reindex/src/test/java/org/elasticsearch/index/reindex/remote/RemoteInfoTests.java

+10-6
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,21 @@
2626
import static java.util.Collections.emptyMap;
2727

2828
public class RemoteInfoTests extends ESTestCase {
29-
private RemoteInfo newRemoteInfo(String scheme, String username, String password) {
30-
return new RemoteInfo(scheme, "testhost", 12344, new BytesArray("testquery"), username, password, emptyMap(),
29+
private RemoteInfo newRemoteInfo(String scheme, String prefixPath, String username, String password) {
30+
return new RemoteInfo(scheme, "testhost", 12344, prefixPath, new BytesArray("testquery"), username, password, emptyMap(),
3131
RemoteInfo.DEFAULT_SOCKET_TIMEOUT, RemoteInfo.DEFAULT_CONNECT_TIMEOUT);
3232
}
3333

3434
public void testToString() {
35-
assertEquals("host=testhost port=12344 query=testquery", newRemoteInfo("http", null, null).toString());
36-
assertEquals("host=testhost port=12344 query=testquery username=testuser", newRemoteInfo("http", "testuser", null).toString());
35+
assertEquals("host=testhost port=12344 query=testquery",
36+
newRemoteInfo("http", null, null, null).toString());
37+
assertEquals("host=testhost port=12344 query=testquery username=testuser",
38+
newRemoteInfo("http", null, "testuser", null).toString());
3739
assertEquals("host=testhost port=12344 query=testquery username=testuser password=<<>>",
38-
newRemoteInfo("http", "testuser", "testpass").toString());
40+
newRemoteInfo("http", null, "testuser", "testpass").toString());
3941
assertEquals("scheme=https host=testhost port=12344 query=testquery username=testuser password=<<>>",
40-
newRemoteInfo("https", "testuser", "testpass").toString());
42+
newRemoteInfo("https", null, "testuser", "testpass").toString());
43+
assertEquals("scheme=https host=testhost port=12344 pathPrefix=prxy query=testquery username=testuser password=<<>>",
44+
newRemoteInfo("https", "prxy", "testuser", "testpass").toString());
4145
}
4246
}

0 commit comments

Comments
 (0)