-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Port Primary Terms to master #17044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Port Primary Terms to master #17044
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,7 +52,6 @@ | |
import org.elasticsearch.common.util.concurrent.AbstractRunnable; | ||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; | ||
import org.elasticsearch.common.util.concurrent.ThreadContext; | ||
import org.elasticsearch.index.Index; | ||
import org.elasticsearch.index.IndexService; | ||
import org.elasticsearch.index.engine.VersionConflictEngineException; | ||
import org.elasticsearch.index.shard.IndexShard; | ||
|
@@ -359,32 +358,7 @@ public void onTimeout(TimeValue timeout) { | |
} | ||
}); | ||
} else { | ||
try { | ||
failReplicaIfNeeded(t); | ||
} catch (Throwable unexpected) { | ||
logger.error("{} unexpected error while failing replica", unexpected, request.shardId().id()); | ||
} finally { | ||
responseWithFailure(t); | ||
} | ||
} | ||
} | ||
|
||
private void failReplicaIfNeeded(Throwable t) { | ||
Index index = request.shardId().getIndex(); | ||
int shardId = request.shardId().id(); | ||
logger.trace("failure on replica [{}][{}], action [{}], request [{}]", t, index, shardId, actionName, request); | ||
if (ignoreReplicaException(t) == false) { | ||
IndexService indexService = indicesService.indexService(index); | ||
if (indexService == null) { | ||
logger.debug("ignoring failed replica {}[{}] because index was already removed.", index, shardId); | ||
return; | ||
} | ||
IndexShard indexShard = indexService.getShardOrNull(shardId); | ||
if (indexShard == null) { | ||
logger.debug("ignoring failed replica {}[{}] because index was already removed.", index, shardId); | ||
return; | ||
} | ||
indexShard.failShard(actionName + " failed on replica", t); | ||
} | ||
} | ||
|
||
|
@@ -401,7 +375,7 @@ protected void responseWithFailure(Throwable t) { | |
protected void doRun() throws Exception { | ||
setPhase(task, "replica"); | ||
assert request.shardId() != null : "request shardId must be set"; | ||
try (Releasable ignored = getIndexShardReferenceOnReplica(request.shardId())) { | ||
try (Releasable ignored = getIndexShardReferenceOnReplica(request.shardId(), request.primaryTerm())) { | ||
shardOperationOnReplica(request); | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("action [{}] completed on shard [{}] for request [{}]", transportReplicaAction, request.shardId(), request); | ||
|
@@ -707,7 +681,6 @@ protected void doRun() throws Exception { | |
indexShardReference = getIndexShardReferenceOnPrimary(shardId); | ||
if (indexShardReference.isRelocated() == false) { | ||
executeLocally(); | ||
|
||
} else { | ||
executeRemotely(); | ||
} | ||
|
@@ -716,6 +689,7 @@ protected void doRun() throws Exception { | |
private void executeLocally() throws Exception { | ||
// execute locally | ||
Tuple<Response, ReplicaRequest> primaryResponse = shardOperationOnPrimary(state.metaData(), request); | ||
primaryResponse.v2().primaryTerm(indexShardReference.opPrimaryTerm()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if the primary term should instead be returned by the call to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My goal here is not to force a subclass to worry about these things. As you noted later on - the primary term of a specific primary shard can not change. |
||
if (logger.isTraceEnabled()) { | ||
logger.trace("action [{}] completed on shard [{}] for request [{}] with cluster state version [{}]", transportPrimaryAction, shardId, request, state.version()); | ||
} | ||
|
@@ -825,17 +799,17 @@ void finishBecauseUnavailable(ShardId shardId, String message) { | |
protected IndexShardReference getIndexShardReferenceOnPrimary(ShardId shardId) { | ||
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); | ||
IndexShard indexShard = indexService.getShard(shardId.id()); | ||
return new IndexShardReferenceImpl(indexShard, true); | ||
return IndexShardReferenceImpl.createOnPrimary(indexShard); | ||
} | ||
|
||
/** | ||
* returns a new reference to {@link IndexShard} on a node that the request is replicated to. The reference is closed as soon as | ||
* replication is completed on the node. | ||
*/ | ||
protected IndexShardReference getIndexShardReferenceOnReplica(ShardId shardId) { | ||
protected IndexShardReference getIndexShardReferenceOnReplica(ShardId shardId, long primaryTerm) { | ||
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); | ||
IndexShard indexShard = indexService.getShard(shardId.id()); | ||
return new IndexShardReferenceImpl(indexShard, false); | ||
return IndexShardReferenceImpl.createOnReplica(indexShard, primaryTerm); | ||
} | ||
|
||
/** | ||
|
@@ -1098,9 +1072,13 @@ private void doFinish() { | |
totalShards, | ||
success.get(), | ||
failuresArray | ||
|
||
) | ||
); | ||
if (logger.isTraceEnabled()) { | ||
logger.trace("finished replicating action [{}], request [{}], shardInfo [{}]", actionName, replicaRequest, | ||
finalResponse.getShardInfo()); | ||
} | ||
|
||
try { | ||
channel.sendResponse(finalResponse); | ||
} catch (IOException responseException) { | ||
|
@@ -1125,22 +1103,33 @@ interface IndexShardReference extends Releasable { | |
boolean isRelocated(); | ||
void failShard(String reason, @Nullable Throwable e); | ||
ShardRouting routingEntry(); | ||
|
||
/** returns the primary term of the current operation */ | ||
long opPrimaryTerm(); | ||
} | ||
|
||
static final class IndexShardReferenceImpl implements IndexShardReference { | ||
|
||
private final IndexShard indexShard; | ||
private final Releasable operationLock; | ||
|
||
IndexShardReferenceImpl(IndexShard indexShard, boolean primaryAction) { | ||
private IndexShardReferenceImpl(IndexShard indexShard, long primaryTerm) { | ||
this.indexShard = indexShard; | ||
if (primaryAction) { | ||
if (primaryTerm < 0) { | ||
operationLock = indexShard.acquirePrimaryOperationLock(); | ||
} else { | ||
operationLock = indexShard.acquireReplicaOperationLock(); | ||
operationLock = indexShard.acquireReplicaOperationLock(primaryTerm); | ||
} | ||
} | ||
|
||
static IndexShardReferenceImpl createOnPrimary(IndexShard indexShard) { | ||
return new IndexShardReferenceImpl(indexShard, -1); | ||
} | ||
|
||
static IndexShardReferenceImpl createOnReplica(IndexShard indexShard, long primaryTerm) { | ||
return new IndexShardReferenceImpl(indexShard, primaryTerm); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
operationLock.close(); | ||
|
@@ -1160,6 +1149,11 @@ public void failShard(String reason, @Nullable Throwable e) { | |
public ShardRouting routingEntry() { | ||
return indexShard.routingEntry(); | ||
} | ||
|
||
@Override | ||
public long opPrimaryTerm() { | ||
return indexShard.getPrimaryTerm(); | ||
} | ||
} | ||
|
||
protected final void processAfterWrite(boolean refresh, IndexShard indexShard, Translog.Location location) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this can be reused to replace
routedBasedOnClusterVersion
(#16274). Yay 👍There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
interesting idea - it's currently not incremented when relocating a primary though... requires more thought.