@@ -1190,11 +1190,9 @@ public Store.MetadataSnapshot snapshotStoreMetadata() throws IOException {
1190
1190
synchronized (engineMutex ) {
1191
1191
// if the engine is not running, we can access the store directly, but we need to make sure no one starts
1192
1192
// the engine on us. If the engine is running, we can get a snapshot via the deletion policy of the engine.
1193
- synchronized (mutex ) {
1194
- final Engine engine = getEngineOrNull ();
1195
- if (engine != null ) {
1196
- indexCommit = engine .acquireLastIndexCommit (false );
1197
- }
1193
+ final Engine engine = getEngineOrNull ();
1194
+ if (engine != null ) {
1195
+ indexCommit = engine .acquireLastIndexCommit (false );
1198
1196
}
1199
1197
if (indexCommit == null ) {
1200
1198
return store .getMetadata (null , true );
@@ -1318,9 +1316,11 @@ public CacheHelper getReaderCacheHelper() {
1318
1316
}
1319
1317
1320
1318
public void close (String reason , boolean flushEngine ) throws IOException {
1321
- synchronized (mutex ) {
1319
+ synchronized (engineMutex ) {
1322
1320
try {
1323
- changeState (IndexShardState .CLOSED , reason );
1321
+ synchronized (mutex ) {
1322
+ changeState (IndexShardState .CLOSED , reason );
1323
+ }
1324
1324
} finally {
1325
1325
final Engine engine = this .currentEngineReference .getAndSet (null );
1326
1326
try {
@@ -1375,6 +1375,7 @@ public void prepareForIndexRecovery() {
1375
1375
* This is the first operation after the local checkpoint of the safe commit if exists.
1376
1376
*/
1377
1377
public long recoverLocallyUpToGlobalCheckpoint () {
1378
+ assert Thread .holdsLock (mutex ) == false : "recover locally under mutex" ;
1378
1379
if (state != IndexShardState .RECOVERING ) {
1379
1380
throw new IndexShardNotRecoveringException (shardId , state );
1380
1381
}
@@ -1426,7 +1427,7 @@ public long recoverLocallyUpToGlobalCheckpoint() {
1426
1427
getEngine ().recoverFromTranslog (translogRecoveryRunner , globalCheckpoint );
1427
1428
logger .trace ("shard locally recovered up to {}" , getEngine ().getSeqNoStats (globalCheckpoint ));
1428
1429
} finally {
1429
- synchronized (mutex ) {
1430
+ synchronized (engineMutex ) {
1430
1431
IOUtils .close (currentEngineReference .getAndSet (null ));
1431
1432
}
1432
1433
}
@@ -1601,23 +1602,15 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) t
1601
1602
: "expected empty set of retention leases with recovery source [" + recoveryState .getRecoverySource ()
1602
1603
+ "] but got " + getRetentionLeases ();
1603
1604
synchronized (engineMutex ) {
1605
+ assert currentEngineReference .get () == null : "engine is running" ;
1606
+ verifyNotClosed ();
1604
1607
// we must create a new engine under mutex (see IndexShard#snapshotStoreMetadata).
1605
1608
final Engine newEngine = engineFactory .newReadWriteEngine (config );
1606
- synchronized (mutex ) {
1607
- try {
1608
- verifyNotClosed ();
1609
- assert currentEngineReference .get () == null : "engine is running" ;
1610
- onNewEngine (newEngine );
1611
- currentEngineReference .set (newEngine );
1612
- // We set active because we are now writing operations to the engine; this way,
1613
- // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1614
- active .set (true );
1615
- } finally {
1616
- if (currentEngineReference .get () != newEngine ) {
1617
- newEngine .close ();
1618
- }
1619
- }
1620
- }
1609
+ onNewEngine (newEngine );
1610
+ currentEngineReference .set (newEngine );
1611
+ // We set active because we are now writing operations to the engine; this way,
1612
+ // if we go idle after some time and become inactive, we still give sync'd flush a chance to run.
1613
+ active .set (true );
1621
1614
}
1622
1615
// time elapses after the engine is created above (pulling the config settings) until we set the engine reference, during
1623
1616
// which settings changes could possibly have happened, so here we forcefully push any config changes to the new engine.
@@ -1648,7 +1641,8 @@ private void onNewEngine(Engine newEngine) {
1648
1641
* called if recovery has to be restarted after network error / delay **
1649
1642
*/
1650
1643
public void performRecoveryRestart () throws IOException {
1651
- synchronized (mutex ) {
1644
+ assert Thread .holdsLock (mutex ) == false : "restart recovery under mutex" ;
1645
+ synchronized (engineMutex ) {
1652
1646
assert refreshListeners .pendingCount () == 0 : "we can't restart with pending listeners" ;
1653
1647
IOUtils .close (currentEngineReference .getAndSet (null ));
1654
1648
resetRecoveryStage ();
@@ -3333,7 +3327,7 @@ public ParsedDocument newNoopTombstoneDoc(String reason) {
3333
3327
* Rollback the current engine to the safe commit, then replay local translog up to the global checkpoint.
3334
3328
*/
3335
3329
void resetEngineToGlobalCheckpoint () throws IOException {
3336
- assert Thread .holdsLock (engineMutex ) == false : "resetting engine under mutex" ;
3330
+ assert Thread .holdsLock (mutex ) == false : "resetting engine under mutex" ;
3337
3331
assert getActiveOperationsCount () == OPERATIONS_BLOCKED
3338
3332
: "resetting engine without blocking operations; active operations are [" + getActiveOperations () + ']' ;
3339
3333
sync (); // persist the global checkpoint to disk
@@ -3346,6 +3340,7 @@ assert getActiveOperationsCount() == OPERATIONS_BLOCKED
3346
3340
final long globalCheckpoint = getLastKnownGlobalCheckpoint ();
3347
3341
assert globalCheckpoint == getLastSyncedGlobalCheckpoint ();
3348
3342
synchronized (engineMutex ) {
3343
+ verifyNotClosed ();
3349
3344
// we must create both new read-only engine and new read-write engine under engineMutex to ensure snapshotStoreMetadata,
3350
3345
// acquireXXXCommit and close works.
3351
3346
final Engine readOnlyEngine =
@@ -3373,7 +3368,7 @@ public IndexCommitRef acquireSafeIndexCommit() {
3373
3368
3374
3369
@ Override
3375
3370
public void close () throws IOException {
3376
- assert Thread .holdsLock (mutex );
3371
+ assert Thread .holdsLock (engineMutex );
3377
3372
3378
3373
Engine newEngine = newEngineReference .get ();
3379
3374
if (newEngine == currentEngineReference .get ()) {
@@ -3383,36 +3378,17 @@ public void close() throws IOException {
3383
3378
IOUtils .close (super ::close , newEngine );
3384
3379
}
3385
3380
};
3386
- synchronized (mutex ) {
3387
- try {
3388
- verifyNotClosed ();
3389
- IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3390
- } finally {
3391
- if (currentEngineReference .get () != readOnlyEngine ) {
3392
- readOnlyEngine .close ();
3393
- }
3394
- }
3395
- }
3396
- final Engine newReadWriteEngine = engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker ));
3397
- synchronized (mutex ) {
3398
- try {
3399
- verifyNotClosed ();
3400
- newEngineReference .set (newReadWriteEngine );
3401
- onNewEngine (newReadWriteEngine );
3402
- } finally {
3403
- if (newEngineReference .get () != newReadWriteEngine ) {
3404
- newReadWriteEngine .close (); // shard was closed
3405
- }
3406
- }
3407
- }
3381
+ IOUtils .close (currentEngineReference .getAndSet (readOnlyEngine ));
3382
+ newEngineReference .set (engineFactory .newReadWriteEngine (newEngineConfig (replicationTracker )));
3383
+ onNewEngine (newEngineReference .get ());
3408
3384
}
3409
3385
final Engine .TranslogRecoveryRunner translogRunner = (engine , snapshot ) -> runTranslogRecovery (
3410
3386
engine , snapshot , Engine .Operation .Origin .LOCAL_RESET , () -> {
3411
3387
// TODO: add a dedicate recovery stats for the reset translog
3412
3388
});
3413
3389
newEngineReference .get ().recoverFromTranslog (translogRunner , globalCheckpoint );
3414
3390
newEngineReference .get ().refresh ("reset_engine" );
3415
- synchronized (mutex ) {
3391
+ synchronized (engineMutex ) {
3416
3392
verifyNotClosed ();
3417
3393
IOUtils .close (currentEngineReference .getAndSet (newEngineReference .get ()));
3418
3394
// We set active because we are now writing operations to the engine; this way,
0 commit comments