Skip to content

Commit bf4861b

Browse files
Fix RareClusterStateIT (elastic#42430)
* It looks like we might be cancelling a previous publication instead of the one triggered by the given request with a very low likelihood. * Fixed by adding a wait for no in-progress publications * Also added debug logging that would've identified this problem * Closes elastic#36813
1 parent c7448b1 commit bf4861b

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,9 +1198,13 @@ public Iterable<DiscoveryNode> getFoundPeers() {
11981198
*/
11991199
boolean cancelCommittedPublication() {
12001200
synchronized (mutex) {
1201-
if (currentPublication.isPresent() && currentPublication.get().isCommitted()) {
1202-
currentPublication.get().cancel("cancelCommittedPublication");
1203-
return true;
1201+
if (currentPublication.isPresent()) {
1202+
final CoordinatorPublication publication = currentPublication.get();
1203+
if (publication.isCommitted()) {
1204+
publication.cancel("cancelCommittedPublication");
1205+
logger.debug("Cancelled publication of [{}].", publication);
1206+
return true;
1207+
}
12041208
}
12051209
return false;
12061210
}

server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ public void onFailure(String source, Exception e) {
141141

142142
private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication(
143143
ActionRequestBuilder<Req, Res> req) throws Exception {
144+
// Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
145+
// request.
146+
assertBusy(
147+
() -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress()));
144148
ActionFuture<Res> future = req.execute();
145149
assertBusy(
146150
() -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));

0 commit comments

Comments
 (0)