23
23
import org .apache .lucene .index .DirectoryReader ;
24
24
import org .apache .lucene .index .IndexCommit ;
25
25
import org .apache .lucene .index .IndexableField ;
26
+ import org .apache .lucene .index .IndexWriter ;
26
27
import org .apache .lucene .index .Term ;
27
28
import org .apache .lucene .search .IndexSearcher ;
28
29
import org .apache .lucene .search .TermQuery ;
118
119
import org .elasticsearch .snapshots .SnapshotId ;
119
120
import org .elasticsearch .snapshots .SnapshotInfo ;
120
121
import org .elasticsearch .snapshots .SnapshotShardFailure ;
122
+ import org .elasticsearch .test .CorruptionUtils ;
121
123
import org .elasticsearch .test .DummyShardLock ;
122
124
import org .elasticsearch .test .FieldMaskingReader ;
123
125
import org .elasticsearch .test .VersionUtils ;
126
128
127
129
import java .io .IOException ;
128
130
import java .nio .charset .Charset ;
131
+ import java .nio .file .FileVisitResult ;
132
+ import java .nio .file .Files ;
129
133
import java .nio .file .Path ;
134
+ import java .nio .file .SimpleFileVisitor ;
135
+ import java .nio .file .attribute .BasicFileAttributes ;
130
136
import java .util .ArrayList ;
131
137
import java .util .Arrays ;
132
138
import java .util .Collections ;
@@ -1239,7 +1245,7 @@ public String[] listAll() throws IOException {
1239
1245
};
1240
1246
1241
1247
try (Store store = createStore (shardId , new IndexSettings (metaData , Settings .EMPTY ), directory )) {
1242
- IndexShard shard = newShard (shardRouting , shardPath , metaData , store ,
1248
+ IndexShard shard = newShard (shardRouting , shardPath , metaData , i -> store ,
1243
1249
null , new InternalEngineFactory (), () -> {
1244
1250
}, EMPTY_EVENT_LISTENER );
1245
1251
AtomicBoolean failureCallbackTriggered = new AtomicBoolean (false );
@@ -2590,6 +2596,143 @@ public void testReadSnapshotConcurrently() throws IOException, InterruptedExcept
2590
2596
closeShards (newShard );
2591
2597
}
2592
2598
2599
+ public void testIndexCheckOnStartup () throws Exception {
2600
+ final IndexShard indexShard = newStartedShard (true );
2601
+
2602
+ final long numDocs = between (10 , 100 );
2603
+ for (long i = 0 ; i < numDocs ; i ++) {
2604
+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2605
+ }
2606
+ indexShard .flush (new FlushRequest ());
2607
+ closeShards (indexShard );
2608
+
2609
+ final ShardPath shardPath = indexShard .shardPath ();
2610
+
2611
+ final Path indexPath = corruptIndexFile (shardPath );
2612
+
2613
+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2614
+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2615
+ @ Override
2616
+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2617
+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2618
+ corruptedMarkerCount .incrementAndGet ();
2619
+ }
2620
+ return FileVisitResult .CONTINUE ;
2621
+ }
2622
+ };
2623
+ Files .walkFileTree (indexPath , corruptedVisitor );
2624
+
2625
+ assertThat ("corruption marker should not be there" , corruptedMarkerCount .get (), equalTo (0 ));
2626
+
2627
+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2628
+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2629
+ );
2630
+ // start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
2631
+ final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
2632
+ .settings (Settings .builder ()
2633
+ .put (indexShard .indexSettings .getSettings ())
2634
+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("true" , "checksum" )))
2635
+ .build ();
2636
+
2637
+ IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2638
+ null , null , indexShard .engineFactory ,
2639
+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2640
+
2641
+ final IndexShardRecoveryException indexShardRecoveryException =
2642
+ expectThrows (IndexShardRecoveryException .class , () -> newStartedShard (p -> corruptedShard , true ));
2643
+ assertThat (indexShardRecoveryException .getMessage (), equalTo ("failed recovery" ));
2644
+
2645
+ // check that corrupt marker is there
2646
+ Files .walkFileTree (indexPath , corruptedVisitor );
2647
+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2648
+
2649
+ try {
2650
+ closeShards (corruptedShard );
2651
+ } catch (RuntimeException e ) {
2652
+ assertThat (e .getMessage (), equalTo ("CheckIndex failed" ));
2653
+ }
2654
+ }
2655
+
2656
+ public void testShardDoesNotStartIfCorruptedMarkerIsPresent () throws Exception {
2657
+ final IndexShard indexShard = newStartedShard (true );
2658
+
2659
+ final long numDocs = between (10 , 100 );
2660
+ for (long i = 0 ; i < numDocs ; i ++) {
2661
+ indexDoc (indexShard , "_doc" , Long .toString (i ), "{}" );
2662
+ }
2663
+ indexShard .flush (new FlushRequest ());
2664
+ closeShards (indexShard );
2665
+
2666
+ final ShardPath shardPath = indexShard .shardPath ();
2667
+
2668
+ final ShardRouting shardRouting = ShardRoutingHelper .initWithSameId (indexShard .routingEntry (),
2669
+ RecoverySource .StoreRecoverySource .EXISTING_STORE_INSTANCE
2670
+ );
2671
+ final IndexMetaData indexMetaData = indexShard .indexSettings ().getIndexMetaData ();
2672
+
2673
+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2674
+
2675
+ // create corrupted marker
2676
+ final String corruptionMessage = "fake ioexception" ;
2677
+ try (Store store = createStore (indexShard .indexSettings (), shardPath )) {
2678
+ store .markStoreCorrupted (new IOException (corruptionMessage ));
2679
+ }
2680
+
2681
+ // try to start shard on corrupted files
2682
+ final IndexShard corruptedShard = newShard (shardRouting , shardPath , indexMetaData ,
2683
+ null , null , indexShard .engineFactory ,
2684
+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2685
+
2686
+ final IndexShardRecoveryException exception1 = expectThrows (IndexShardRecoveryException .class ,
2687
+ () -> newStartedShard (p -> corruptedShard , true ));
2688
+ assertThat (exception1 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2689
+ closeShards (corruptedShard );
2690
+
2691
+ final AtomicInteger corruptedMarkerCount = new AtomicInteger ();
2692
+ final SimpleFileVisitor <Path > corruptedVisitor = new SimpleFileVisitor <Path >() {
2693
+ @ Override
2694
+ public FileVisitResult visitFile (Path file , BasicFileAttributes attrs ) throws IOException {
2695
+ if (Files .isRegularFile (file ) && file .getFileName ().toString ().startsWith (Store .CORRUPTED )) {
2696
+ corruptedMarkerCount .incrementAndGet ();
2697
+ }
2698
+ return FileVisitResult .CONTINUE ;
2699
+ }
2700
+ };
2701
+ Files .walkFileTree (indexPath , corruptedVisitor );
2702
+ assertThat ("store has to be marked as corrupted" , corruptedMarkerCount .get (), equalTo (1 ));
2703
+
2704
+ // try to start another time shard on corrupted files
2705
+ final IndexShard corruptedShard2 = newShard (shardRouting , shardPath , indexMetaData ,
2706
+ null , null , indexShard .engineFactory ,
2707
+ indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
2708
+
2709
+ final IndexShardRecoveryException exception2 = expectThrows (IndexShardRecoveryException .class ,
2710
+ () -> newStartedShard (p -> corruptedShard2 , true ));
2711
+ assertThat (exception2 .getCause ().getMessage (), equalTo (corruptionMessage + " (resource=preexisting_corruption)" ));
2712
+ closeShards (corruptedShard2 );
2713
+
2714
+ // check that corrupt marker is there
2715
+ corruptedMarkerCount .set (0 );
2716
+ Files .walkFileTree (indexPath , corruptedVisitor );
2717
+ assertThat ("store still has a single corrupt marker" , corruptedMarkerCount .get (), equalTo (1 ));
2718
+ }
2719
+
2720
+ private Path corruptIndexFile (ShardPath shardPath ) throws IOException {
2721
+ final Path indexPath = shardPath .getDataPath ().resolve (ShardPath .INDEX_FOLDER_NAME );
2722
+ final Path [] filesToCorrupt =
2723
+ Files .walk (indexPath )
2724
+ .filter (p -> {
2725
+ final String name = p .getFileName ().toString ();
2726
+ return Files .isRegularFile (p )
2727
+ && name .startsWith ("extra" ) == false // Skip files added by Lucene's ExtrasFS
2728
+ && IndexWriter .WRITE_LOCK_NAME .equals (name ) == false
2729
+ && name .startsWith ("segments_" ) == false && name .endsWith (".si" ) == false ;
2730
+ })
2731
+ .toArray (Path []::new );
2732
+ CorruptionUtils .corruptFile (random (), filesToCorrupt );
2733
+ return indexPath ;
2734
+ }
2735
+
2593
2736
/**
2594
2737
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
2595
2738
* and checking index concurrently. This should always be possible without any exception.
@@ -2613,7 +2756,7 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
2613
2756
final IndexMetaData indexMetaData = IndexMetaData .builder (indexShard .indexSettings ().getIndexMetaData ())
2614
2757
.settings (Settings .builder ()
2615
2758
.put (indexShard .indexSettings .getSettings ())
2616
- .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" , "fix" )))
2759
+ .put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), randomFrom ("false" , "true" , "checksum" )))
2617
2760
.build ();
2618
2761
final IndexShard newShard = newShard (shardRouting , indexShard .shardPath (), indexMetaData ,
2619
2762
null , null , indexShard .engineFactory , indexShard .getGlobalCheckpointSyncer (), EMPTY_EVENT_LISTENER );
@@ -2655,6 +2798,16 @@ public void testReadSnapshotAndCheckIndexConcurrently() throws Exception {
2655
2798
closeShards (newShard );
2656
2799
}
2657
2800
2801
+ public void testCheckOnStartupDeprecatedValue () throws Exception {
2802
+ final Settings settings = Settings .builder ().put (IndexSettings .INDEX_CHECK_ON_STARTUP .getKey (), "fix" ).build ();
2803
+
2804
+ final IndexShard newShard = newShard (true , settings );
2805
+ closeShards (newShard );
2806
+
2807
+ assertWarnings ("Setting [index.shard.check_on_startup] is set to deprecated value [fix], "
2808
+ + "which has no effect and will not be accepted in future" );
2809
+ }
2810
+
2658
2811
class Result {
2659
2812
private final int localCheckpoint ;
2660
2813
private final int maxSeqNo ;
0 commit comments