Skip to content

Commit f907b60

Browse files
authored
Handle partial failure retrieving segments in SegmentCountStep (#46556)
Since the `IndicesSegmentsRequest` scatters to all shards for the index, it's possible that some of the shards may fail. This adds failure handling and logging (since this is a best-effort step in the first place) for this case.
1 parent 58a66e9 commit f907b60

File tree

2 files changed

+90
-12
lines changed

2 files changed

+90
-12
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SegmentCountStep.java

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.action.admin.indices.segments.IndexSegments;
1212
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsRequest;
1313
import org.elasticsearch.action.admin.indices.segments.ShardSegments;
14+
import org.elasticsearch.action.support.DefaultShardOperationFailedException;
1415
import org.elasticsearch.client.Client;
1516
import org.elasticsearch.cluster.metadata.IndexMetaData;
1617
import org.elasticsearch.cluster.routing.ShardRouting;
@@ -50,19 +51,32 @@ public int getMaxNumSegments() {
5051
public void evaluateCondition(IndexMetaData indexMetaData, Listener listener) {
5152
getClient().admin().indices().segments(new IndicesSegmentsRequest(indexMetaData.getIndex().getName()),
5253
ActionListener.wrap(response -> {
53-
IndexSegments segments = response.getIndices().get(indexMetaData.getIndex().getName());
54-
List<ShardSegments> unmergedShards = segments.getShards().values().stream()
55-
.flatMap(iss -> Arrays.stream(iss.getShards()))
56-
.filter(shardSegments -> shardSegments.getSegments().size() > maxNumSegments)
57-
.collect(Collectors.toList());
58-
if (unmergedShards.size() > 0) {
59-
Map<ShardRouting, Integer> unmergedShardCounts = unmergedShards.stream()
60-
.collect(Collectors.toMap(ShardSegments::getShardRouting, ss -> ss.getSegments().size()));
61-
logger.info("[{}] best effort force merge to [{}] segments did not succeed for {} shards: {}",
62-
indexMetaData.getIndex().getName(), maxNumSegments, unmergedShards.size(), unmergedShardCounts);
54+
IndexSegments idxSegments = response.getIndices().get(indexMetaData.getIndex().getName());
55+
if (idxSegments == null || (response.getShardFailures() != null && response.getShardFailures().length > 0)) {
56+
final DefaultShardOperationFailedException[] failures = response.getShardFailures();
57+
logger.info("[{}] retrieval of segment counts after force merge did not succeed, " +
58+
"there were {} shard failures. " +
59+
"failures: {}",
60+
indexMetaData.getIndex().getName(),
61+
response.getFailedShards(),
62+
failures == null ? "n/a" : Strings.collectionToDelimitedString(Arrays.stream(failures)
63+
.map(Strings::toString)
64+
.collect(Collectors.toList()), ","));
65+
listener.onResponse(true, new Info(-1));
66+
} else {
67+
List<ShardSegments> unmergedShards = idxSegments.getShards().values().stream()
68+
.flatMap(iss -> Arrays.stream(iss.getShards()))
69+
.filter(shardSegments -> shardSegments.getSegments().size() > maxNumSegments)
70+
.collect(Collectors.toList());
71+
if (unmergedShards.size() > 0) {
72+
Map<ShardRouting, Integer> unmergedShardCounts = unmergedShards.stream()
73+
.collect(Collectors.toMap(ShardSegments::getShardRouting, ss -> ss.getSegments().size()));
74+
logger.info("[{}] best effort force merge to [{}] segments did not succeed for {} shards: {}",
75+
indexMetaData.getIndex().getName(), maxNumSegments, unmergedShards.size(), unmergedShardCounts);
76+
}
77+
// Force merging is best effort, so always return true that the condition has been met.
78+
listener.onResponse(true, new Info(unmergedShards.size()));
6379
}
64-
// Force merging is best effort, so always return true that the condition has been met.
65-
listener.onResponse(true, new Info(unmergedShards.size()));
6680
}, listener::onFailure));
6781
}
6882

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SegmentCountStepTests.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
1313
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
1414
import org.elasticsearch.action.admin.indices.segments.ShardSegments;
15+
import org.elasticsearch.action.support.DefaultShardOperationFailedException;
1516
import org.elasticsearch.client.AdminClient;
1617
import org.elasticsearch.client.Client;
1718
import org.elasticsearch.client.IndicesAdminClient;
@@ -130,6 +131,7 @@ public void onResponse(boolean conditionMet, ToXContentObject info) {
130131

131132
@Override
132133
public void onFailure(Exception e) {
134+
logger.warn("unexpected onFailure call", e);
133135
throw new AssertionError("unexpected method call");
134136
}
135137
});
@@ -187,6 +189,7 @@ public void onResponse(boolean conditionMet, ToXContentObject info) {
187189

188190
@Override
189191
public void onFailure(Exception e) {
192+
logger.warn("unexpected onFailure call", e);
190193
throw new AssertionError("unexpected method call");
191194
}
192195
});
@@ -195,6 +198,67 @@ public void onFailure(Exception e) {
195198
assertEquals(new SegmentCountStep.Info(0L), conditionInfo.get());
196199
}
197200

201+
public void testFailedToRetrieveSomeSegments() {
202+
int maxNumSegments = randomIntBetween(3, 10);
203+
Index index = new Index(randomAlphaOfLengthBetween(1, 20), randomAlphaOfLengthBetween(1, 20));
204+
Client client = Mockito.mock(Client.class);
205+
AdminClient adminClient = Mockito.mock(AdminClient.class);
206+
IndicesAdminClient indicesClient = Mockito.mock(IndicesAdminClient.class);
207+
IndicesSegmentResponse indicesSegmentResponse = Mockito.mock(IndicesSegmentResponse.class);
208+
IndexSegments indexSegments = Mockito.mock(IndexSegments.class);
209+
IndexShardSegments indexShardSegments = Mockito.mock(IndexShardSegments.class);
210+
Map<Integer, IndexShardSegments> indexShards = Collections.singletonMap(0, indexShardSegments);
211+
ShardSegments shardSegmentsOne = Mockito.mock(ShardSegments.class);
212+
ShardSegments[] shardSegmentsArray = new ShardSegments[] { shardSegmentsOne };
213+
Spliterator<IndexShardSegments> iss = indexShards.values().spliterator();
214+
List<Segment> segments = new ArrayList<>();
215+
for (int i = 0; i < maxNumSegments + randomIntBetween(1, 3); i++) {
216+
segments.add(null);
217+
}
218+
Mockito.when(indicesSegmentResponse.getStatus()).thenReturn(RestStatus.OK);
219+
Mockito.when(indicesSegmentResponse.getIndices()).thenReturn(Collections.singletonMap(index.getName(), null));
220+
Mockito.when(indicesSegmentResponse.getShardFailures())
221+
.thenReturn(new DefaultShardOperationFailedException[]{new DefaultShardOperationFailedException(index.getName(),
222+
0, new IllegalArgumentException("fake"))});
223+
Mockito.when(indexSegments.spliterator()).thenReturn(iss);
224+
Mockito.when(indexShardSegments.getShards()).thenReturn(shardSegmentsArray);
225+
Mockito.when(shardSegmentsOne.getSegments()).thenReturn(segments);
226+
227+
Mockito.when(client.admin()).thenReturn(adminClient);
228+
Mockito.when(adminClient.indices()).thenReturn(indicesClient);
229+
230+
Step.StepKey stepKey = randomStepKey();
231+
StepKey nextStepKey = randomStepKey();
232+
233+
Mockito.doAnswer(invocationOnMock -> {
234+
@SuppressWarnings("unchecked")
235+
ActionListener<IndicesSegmentResponse> listener = (ActionListener<IndicesSegmentResponse>) invocationOnMock.getArguments()[1];
236+
listener.onResponse(indicesSegmentResponse);
237+
return null;
238+
}).when(indicesClient).segments(any(), any());
239+
240+
SetOnce<Boolean> conditionMetResult = new SetOnce<>();
241+
SetOnce<ToXContentObject> conditionInfo = new SetOnce<>();
242+
243+
SegmentCountStep step = new SegmentCountStep(stepKey, nextStepKey, client, maxNumSegments);
244+
step.evaluateCondition(makeMeta(index), new AsyncWaitStep.Listener() {
245+
@Override
246+
public void onResponse(boolean conditionMet, ToXContentObject info) {
247+
conditionMetResult.set(conditionMet);
248+
conditionInfo.set(info);
249+
}
250+
251+
@Override
252+
public void onFailure(Exception e) {
253+
logger.warn("unexpected onFailure call", e);
254+
throw new AssertionError("unexpected method call: " + e);
255+
}
256+
});
257+
258+
assertTrue(conditionMetResult.get());
259+
assertEquals(new SegmentCountStep.Info(-1L), conditionInfo.get());
260+
}
261+
198262
public void testThrowsException() {
199263
Exception exception = new RuntimeException("error");
200264
Index index = new Index(randomAlphaOfLengthBetween(1, 20), randomAlphaOfLengthBetween(1, 20));

0 commit comments

Comments
 (0)