|
125 | 125 | import org.elasticsearch.test.DummyShardLock;
|
126 | 126 | import org.elasticsearch.test.FieldMaskingReader;
|
127 | 127 | import org.elasticsearch.test.VersionUtils;
|
| 128 | +import org.elasticsearch.test.store.MockFSDirectoryService; |
128 | 129 | import org.elasticsearch.threadpool.ThreadPool;
|
129 | 130 | import org.junit.Assert;
|
130 | 131 |
|
@@ -3697,6 +3698,122 @@ public void testResetEngine() throws Exception {
|
3697 | 3698 | closeShard(shard, false);
|
3698 | 3699 | }
|
3699 | 3700 |
|
| 3701 | + /** |
| 3702 | + * This test simulates a scenario seen rarely in ConcurrentSeqNoVersioningIT. Closing a shard while engine is inside |
| 3703 | + * resetEngineToGlobalCheckpoint can lead to check index failure in integration tests. |
| 3704 | + */ |
| 3705 | + public void testCloseShardWhileResettingEngine() throws Exception { |
| 3706 | + CountDownLatch readyToCloseLatch = new CountDownLatch(1); |
| 3707 | + CountDownLatch closeDoneLatch = new CountDownLatch(1); |
| 3708 | + IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { |
| 3709 | + @Override |
| 3710 | + public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, |
| 3711 | + long recoverUpToSeqNo) throws IOException { |
| 3712 | + readyToCloseLatch.countDown(); |
| 3713 | + try { |
| 3714 | + closeDoneLatch.await(); |
| 3715 | + } catch (InterruptedException e) { |
| 3716 | + throw new AssertionError(e); |
| 3717 | + } |
| 3718 | + return super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); |
| 3719 | + } |
| 3720 | + }); |
| 3721 | + |
| 3722 | + Thread closeShardThread = new Thread(() -> { |
| 3723 | + try { |
| 3724 | + readyToCloseLatch.await(); |
| 3725 | + shard.close("testing", false); |
| 3726 | + // in integration tests, this is done as a listener on IndexService. |
| 3727 | + MockFSDirectoryService.checkIndex(logger, shard.store(), shard.shardId); |
| 3728 | + } catch (InterruptedException | IOException e) { |
| 3729 | + throw new AssertionError(e); |
| 3730 | + } finally { |
| 3731 | + closeDoneLatch.countDown(); |
| 3732 | + } |
| 3733 | + }); |
| 3734 | + |
| 3735 | + closeShardThread.start(); |
| 3736 | + |
| 3737 | + final CountDownLatch engineResetLatch = new CountDownLatch(1); |
| 3738 | + shard.acquireAllReplicaOperationsPermits(shard.getOperationPrimaryTerm(), shard.getGlobalCheckpoint(), 0L, |
| 3739 | + ActionListener.wrap(r -> { |
| 3740 | + try (Releasable dummy = r) { |
| 3741 | + shard.resetEngineToGlobalCheckpoint(); |
| 3742 | + } finally { |
| 3743 | + engineResetLatch.countDown(); |
| 3744 | + } |
| 3745 | + }, Assert::assertNotNull), TimeValue.timeValueMinutes(1L)); |
| 3746 | + |
| 3747 | + engineResetLatch.await(); |
| 3748 | + |
| 3749 | + closeShardThread.join(); |
| 3750 | + |
| 3751 | + // close store. |
| 3752 | + closeShard(shard, false); |
| 3753 | + } |
| 3754 | + |
| 3755 | + /** |
| 3756 | + * This test simulates a scenario seen rarely in ConcurrentSeqNoVersioningIT. While engine is inside |
| 3757 | + * resetEngineToGlobalCheckpoint snapshot metadata could fail |
| 3758 | + */ |
| 3759 | + public void testSnapshotWhileResettingEngine() throws Exception { |
| 3760 | + CountDownLatch readyToSnapshotLatch = new CountDownLatch(1); |
| 3761 | + CountDownLatch snapshotDoneLatch = new CountDownLatch(1); |
| 3762 | + IndexShard shard = newStartedShard(false, Settings.EMPTY, config -> new InternalEngine(config) { |
| 3763 | + @Override |
| 3764 | + public InternalEngine recoverFromTranslog(TranslogRecoveryRunner translogRecoveryRunner, |
| 3765 | + long recoverUpToSeqNo) throws IOException { |
| 3766 | + InternalEngine internalEngine = super.recoverFromTranslog(translogRecoveryRunner, recoverUpToSeqNo); |
| 3767 | + readyToSnapshotLatch.countDown(); |
| 3768 | + try { |
| 3769 | + snapshotDoneLatch.await(); |
| 3770 | + } catch (InterruptedException e) { |
| 3771 | + throw new AssertionError(e); |
| 3772 | + } |
| 3773 | + return internalEngine; |
| 3774 | + } |
| 3775 | + }); |
| 3776 | + |
| 3777 | + indexOnReplicaWithGaps(shard, between(0, 1000), Math.toIntExact(shard.getLocalCheckpoint())); |
| 3778 | + final long globalCheckpoint = randomLongBetween(shard.getGlobalCheckpoint(), shard.getLocalCheckpoint()); |
| 3779 | + shard.updateGlobalCheckpointOnReplica(globalCheckpoint, "test"); |
| 3780 | + |
| 3781 | + Thread snapshotThread = new Thread(() -> { |
| 3782 | + try { |
| 3783 | + readyToSnapshotLatch.await(); |
| 3784 | + shard.snapshotStoreMetadata(); |
| 3785 | + try (Engine.IndexCommitRef indexCommitRef = shard.acquireLastIndexCommit(randomBoolean())) { |
| 3786 | + shard.store().getMetadata(indexCommitRef.getIndexCommit()); |
| 3787 | + } |
| 3788 | + try (Engine.IndexCommitRef indexCommitRef = shard.acquireSafeIndexCommit()) { |
| 3789 | + shard.store().getMetadata(indexCommitRef.getIndexCommit()); |
| 3790 | + } |
| 3791 | + } catch (InterruptedException | IOException e) { |
| 3792 | + throw new AssertionError(e); |
| 3793 | + } finally { |
| 3794 | + snapshotDoneLatch.countDown(); |
| 3795 | + } |
| 3796 | + }); |
| 3797 | + |
| 3798 | + snapshotThread.start(); |
| 3799 | + |
| 3800 | + final CountDownLatch engineResetLatch = new CountDownLatch(1); |
| 3801 | + shard.acquireAllReplicaOperationsPermits(shard.getOperationPrimaryTerm(), shard.getGlobalCheckpoint(), 0L, |
| 3802 | + ActionListener.wrap(r -> { |
| 3803 | + try (Releasable dummy = r) { |
| 3804 | + shard.resetEngineToGlobalCheckpoint(); |
| 3805 | + } finally { |
| 3806 | + engineResetLatch.countDown(); |
| 3807 | + } |
| 3808 | + }, Assert::assertNotNull), TimeValue.timeValueMinutes(1L)); |
| 3809 | + |
| 3810 | + engineResetLatch.await(); |
| 3811 | + |
| 3812 | + snapshotThread.join(); |
| 3813 | + |
| 3814 | + closeShard(shard, false); |
| 3815 | + } |
| 3816 | + |
3700 | 3817 | public void testConcurrentAcquireAllReplicaOperationsPermitsWithPrimaryTermUpdate() throws Exception {
|
3701 | 3818 | final IndexShard replica = newStartedShard(false);
|
3702 | 3819 | indexOnReplicaWithGaps(replica, between(0, 1000), Math.toIntExact(replica.getLocalCheckpoint()));
|
|
0 commit comments