@@ -1158,11 +1158,9 @@ public Store.MetadataSnapshot snapshotStoreMetadata() throws IOException {
1158
1158
synchronized (engineMutex ) {
1159
1159
// if the engine is not running, we can access the store directly, but we need to make sure no one starts
1160
1160
// the engine on us. If the engine is running, we can get a snapshot via the deletion policy of the engine.
1161
- synchronized (mutex ) {
1162
- final Engine engine = getEngineOrNull ();
1163
- if (engine != null ) {
1164
- indexCommit = engine .acquireLastIndexCommit (false );
1165
- }
1161
+ final Engine engine = getEngineOrNull ();
1162
+ if (engine != null ) {
1163
+ indexCommit = engine .acquireLastIndexCommit (false );
1166
1164
}
1167
1165
if (indexCommit == null ) {
1168
1166
return store .getMetadata (null , true );
@@ -1286,9 +1284,11 @@ public CacheHelper getReaderCacheHelper() {
1286
1284
}
1287
1285
1288
1286
public void close (String reason , boolean flushEngine ) throws IOException {
1289
- synchronized (mutex ) {
1287
+ synchronized (engineMutex ) {
1290
1288
try {
1291
- changeState (IndexShardState .CLOSED , reason );
1289
+ synchronized (mutex ) {
1290
+ changeState (IndexShardState .CLOSED , reason );
1291
+ }
1292
1292
} finally {
1293
1293
final Engine engine = this .currentEngineReference .getAndSet (null );
1294
1294
try {
@@ -1343,6 +1343,7 @@ public void prepareForIndexRecovery() {
1343
1343
* This is the first operation after the local checkpoint of the safe commit if exists.
1344
1344
*/
1345
1345
public long recoverLocallyUpToGlobalCheckpoint () {
1346
+ assert Thread .holdsLock (mutex ) == false : "recover locally under mutex" ;
1346
1347
if (state != IndexShardState .RECOVERING ) {
1347
1348
throw new IndexShardNotRecoveringException (shardId , state );
1348
1349
}
@@ -1394,7 +1395,7 @@ public long recoverLocallyUpToGlobalCheckpoint() {
1394
1395
getEngine ().recoverFromTranslog (translogRecoveryRunner , globalCheckpoint );
1395
1396
logger .trace ("shard locally recovered up to {}" , getEngine ().getSeqNoStats (globalCheckpoint ));
1396
1397
} finally {
1397
- synchronized (mutex ) {
1398
+ synchronized (engineMutex ) {
1398
1399
IOUtils .close (currentEngineReference .getAndSet (null ));
1399
1400
}
1400
1401
}
@@ -1569,23 +1570,15 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) t
1569
1570
: "expected empty set of retention leases with recovery source [" + recoveryState .getRecoverySource ()
1570
1571
+ "] but got " + getRetentionLeases ();
1571
1572
synchronized (engineMutex ) {
1573
+ assert currentEngineReference .get () == null : "engine is running" ;
1574
+ verifyNotClosed ();
1572
1575
// we must create a new engine under mutex (see IndexShard#snapshotStoreMetadata).
1573
1576
final Engine newEngine = engineFactory .newReadWriteEngine (config );
1574
- synchronized (mutex ) {
1575
- try {
1576
- verifyNotClosed ();
1577
- assert currentEngineReference .get () == null : "engine is running" ;
1578
- onNewEngine (newEngine );
1579
- currentEngineReference .set (newEngine );
1580
- // We set active because we are now writing operations to the engine; this way,
1581
- // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1582
- active .set (true );
1583
- } finally {
1584
- if (currentEngineReference .get () != newEngine ) {
1585
- newEngine .close ();
1586
- }
1587
- }
1588
- }
1577
+ onNewEngine (newEngine );
1578
+ currentEngineReference .set (newEngine );
1579
+ // We set active because we are now writing operations to the engine; this way,
1580
+ // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1581
+ active .set (true );
1589
1582
}
1590
1583
// time elapses after the engine is created above (pulling the config settings) until we set the engine reference, during
1591
1584
// which settings changes could possibly have happened, so here we forcefully push any config changes to the new engine.
@@ -1616,7 +1609,8 @@ private void onNewEngine(Engine newEngine) {
1616
1609
* called if recovery has to be restarted after network error / delay **
1617
1610
*/
1618
1611
public void performRecoveryRestart () throws IOException {
1619
- synchronized (mutex ) {
1612
+ assert Thread .holdsLock (mutex ) == false : "restart recovery under mutex" ;
1613
+ synchronized (engineMutex ) {
1620
1614
assert refreshListeners .pendingCount () == 0 : "we can't restart with pending listeners" ;
1621
1615
IOUtils .close (currentEngineReference .getAndSet (null ));
1622
1616
resetRecoveryStage ();
@@ -3288,7 +3282,7 @@ public ParsedDocument newNoopTombstoneDoc(String reason) {
3288
3282
* Rollback the current engine to the safe commit, then replay local translog up to the global checkpoint.
3289
3283
*/
3290
3284
void resetEngineToGlobalCheckpoint () throws IOException {
3291
- assert Thread .holdsLock (engineMutex ) == false : "resetting engine under mutex" ;
3285
+ assert Thread .holdsLock (mutex ) == false : "resetting engine under mutex" ;
3292
3286
assert getActiveOperationsCount () == OPERATIONS_BLOCKED
3293
3287
: "resetting engine without blocking operations; active operations are [" + getActiveOperations () + ']' ;
3294
3288
sync (); // persist the global checkpoint to disk
@@ -3301,6 +3295,7 @@ assert getActiveOperationsCount() == OPERATIONS_BLOCKED
3301
3295
final long globalCheckpoint = getLastKnownGlobalCheckpoint ();
3302
3296
assert globalCheckpoint == getLastSyncedGlobalCheckpoint ();
3303
3297
synchronized (engineMutex ) {
3298
+ verifyNotClosed ();
3304
3299
// we must create both new read-only engine and new read-write engine under engineMutex to ensure snapshotStoreMetadata,
3305
3300
// acquireXXXCommit and close works.
3306
3301
final Engine readOnlyEngine =
@@ -3328,7 +3323,7 @@ public IndexCommitRef acquireSafeIndexCommit() {
3328
3323
3329
3324
@ Override
3330
3325
public void close () throws IOException {
3331
- assert Thread .holdsLock (mutex );
3326
+ assert Thread .holdsLock (engineMutex );
3332
3327
3333
3328
Engine newEngine = newEngineReference .get ();
3334
3329
if (newEngine == currentEngineReference .get ()) {
@@ -3338,36 +3333,17 @@ public void close() throws IOException {
3338
3333
IOUtils .close (super ::close , newEngine );
3339
3334
}
3340
3335
};
3341
- synchronized (mutex ) {
3342
- try {
3343
- verifyNotClosed ();
3344
- IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3345
- } finally {
3346
- if (currentEngineReference .get () != readOnlyEngine ) {
3347
- readOnlyEngine .close ();
3348
- }
3349
- }
3350
- }
3351
- final Engine newReadWriteEngine = engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker ));
3352
- synchronized (mutex ) {
3353
- try {
3354
- verifyNotClosed ();
3355
- newEngineReference .set (newReadWriteEngine );
3356
- onNewEngine (newReadWriteEngine );
3357
- } finally {
3358
- if (newEngineReference .get () != newReadWriteEngine ) {
3359
- newReadWriteEngine .close (); // shard was closed
3360
- }
3361
- }
3362
- }
3336
+ IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3337
+ newEngineReference .set (engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker )));
3338
+ onNewEngine (newEngineReference .get ());
3363
3339
}
3364
3340
final Engine .TranslogRecoveryRunner translogRunner = (engine , snapshot ) -> runTranslogRecovery (
3365
3341
engine , snapshot , Engine .Operation .Origin .LOCAL_RESET , () -> {
3366
3342
// TODO: add a dedicate recovery stats for the reset translog
3367
3343
});
3368
3344
newEngineReference .get ().recoverFromTranslog (translogRunner , globalCheckpoint );
3369
3345
newEngineReference .get ().refresh ("reset_engine" );
3370
- synchronized (mutex ) {
3346
+ synchronized (engineMutex ) {
3371
3347
verifyNotClosed ();
3372
3348
IOUtils .close (currentEngineReference .getAndSet (newEngineReference .get ()));
3373
3349
// We set active because we are now writing operations to the engine; this way,
0 commit comments