Skip to content

Commit 80a9a08

Browse files
committed
Fix leaking searcher when shards are removed or relocated (#52099)
We might leak a searcher if the target shard is removed (i.e., its index is deleted) or relocated while we are creating a SearchContext from a SearchRewriteContext. Relates #51708 Closes #52021 I labelled this non-issue for an unreleased bug introduced in #51708.
1 parent 79f67e7 commit 80a9a08

File tree

3 files changed

+59
-10
lines changed

3 files changed

+59
-10
lines changed

server/src/main/java/org/elasticsearch/search/SearchService.java

+9-8
Original file line numberDiff line numberDiff line change
@@ -694,23 +694,24 @@ public DefaultSearchContext createSearchContext(ShardSearchRequest request, Time
694694
}
695695

696696
private DefaultSearchContext createSearchContext(SearchRewriteContext rewriteContext, TimeValue timeout) {
697-
final ShardSearchRequest request = rewriteContext.request;
698-
final Engine.Searcher searcher = rewriteContext.searcher;
699-
IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
700-
IndexShard indexShard = indexService.getShard(request.shardId().getId());
701-
SearchShardTarget shardTarget = new SearchShardTarget(clusterService.localNode().getId(),
702-
indexShard.shardId(), request.getClusterAlias(), OriginalIndices.NONE);
703697
boolean success = false;
704698
try {
699+
final ShardSearchRequest request = rewriteContext.request;
700+
final Engine.Searcher searcher = rewriteContext.searcher;
701+
IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
702+
IndexShard indexShard = indexService.getShard(request.shardId().getId());
703+
SearchShardTarget shardTarget = new SearchShardTarget(clusterService.localNode().getId(),
704+
indexShard.shardId(), request.getClusterAlias(), OriginalIndices.NONE);
705705
DefaultSearchContext searchContext = new DefaultSearchContext(idGenerator.incrementAndGet(), request, shardTarget,
706706
searcher, clusterService, indexService, indexShard, bigArrays, threadPool::relativeTimeInMillis, timeout,
707707
fetchPhase, clusterService.state().nodes().getMinNodeVersion());
708708
success = true;
709709
return searchContext;
710710
} finally {
711711
if (success == false) {
712-
// we handle the case where the DefaultSearchContext constructor throws an exception since we would otherwise
713-
// leak a searcher and this can have severe implications (unable to obtain shard lock exceptions).
712+
// we handle the case where `IndicesService#indexServiceSafe`or `IndexService#getShard`, or the DefaultSearchContext
713+
// constructor throws an exception since we would otherwise leak a searcher and this can have severe implications
714+
// (unable to obtain shard lock exceptions).
714715
IOUtils.closeWhileHandlingException(rewriteContext.searcher);
715716
}
716717
}

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

+19-2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
import java.util.concurrent.CountDownLatch;
8888
import java.util.concurrent.Semaphore;
8989
import java.util.concurrent.TimeUnit;
90+
import java.util.concurrent.atomic.AtomicBoolean;
9091
import java.util.stream.Collectors;
9192
import java.util.stream.Stream;
9293

@@ -457,7 +458,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
457458
}
458459
}
459460

460-
public void testIndexAndRelocateConcurrently() throws Exception {
461+
public void testIndexSearchAndRelocateConcurrently() throws Exception {
461462
int halfNodes = randomIntBetween(1, 3);
462463
Settings[] nodeSettings = Stream.concat(
463464
Stream.generate(() -> Settings.builder().put("node.attr.color", "blue").build()).limit(halfNodes),
@@ -474,8 +475,21 @@ public void testIndexAndRelocateConcurrently() throws Exception {
474475
.put("index.routing.allocation.exclude.color", "blue")
475476
.put(indexSettings())
476477
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(halfNodes - 1));
478+
if (randomBoolean()) {
479+
settings.put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), randomIntBetween(1, 10) + "s");
480+
}
477481
assertAcked(prepareCreate("test", settings));
478482
assertAllShardsOnNodes("test", redNodes);
483+
AtomicBoolean stopped = new AtomicBoolean(false);
484+
Thread[] searchThreads = randomBoolean() ? new Thread[0] : new Thread[randomIntBetween(1, 4)];
485+
for (int i = 0; i < searchThreads.length; i++) {
486+
searchThreads[i] = new Thread(() -> {
487+
while (stopped.get() == false) {
488+
assertNoFailures(client().prepareSearch("test").setRequestCache(false).get());
489+
}
490+
});
491+
searchThreads[i].start();
492+
}
479493
int numDocs = randomIntBetween(100, 150);
480494
ArrayList<String> ids = new ArrayList<>();
481495
logger.info(" --> indexing [{}] docs", numDocs);
@@ -513,7 +527,10 @@ public void testIndexAndRelocateConcurrently() throws Exception {
513527
assertNoFailures(afterRelocation);
514528
assertSearchHits(afterRelocation, ids.toArray(new String[ids.size()]));
515529
}
516-
530+
stopped.set(true);
531+
for (Thread searchThread : searchThreads) {
532+
searchThread.join();
533+
}
517534
}
518535

519536
public void testRelocateWhileWaitingForRefresh() {

server/src/test/java/org/elasticsearch/search/SearchServiceTests.java

+31
Original file line numberDiff line numberDiff line change
@@ -898,4 +898,35 @@ public void onFailure(Exception e) {
898898
latch.await();
899899
}
900900
}
901+
902+
public void testDeleteIndexWhileSearch() throws Exception {
903+
createIndex("test");
904+
int numDocs = randomIntBetween(1, 20);
905+
for (int i = 0; i < numDocs; i++) {
906+
client().prepareIndex("test", "_doc").setSource("f", "v").get();
907+
}
908+
client().admin().indices().prepareRefresh("test").get();
909+
AtomicBoolean stopped = new AtomicBoolean(false);
910+
Thread[] searchers = new Thread[randomIntBetween(1, 4)];
911+
CountDownLatch latch = new CountDownLatch(searchers.length);
912+
for (int i = 0; i < searchers.length; i++) {
913+
searchers[i] = new Thread(() -> {
914+
latch.countDown();
915+
while (stopped.get() == false) {
916+
try {
917+
client().prepareSearch("test").setRequestCache(false).get();
918+
} catch (Exception ignored) {
919+
return;
920+
}
921+
}
922+
});
923+
searchers[i].start();
924+
}
925+
latch.await();
926+
client().admin().indices().prepareDelete("test").get();
927+
stopped.set(true);
928+
for (Thread searcher : searchers) {
929+
searcher.join();
930+
}
931+
}
901932
}

0 commit comments

Comments
 (0)