@@ -598,11 +598,49 @@ public static PartitionWriter createWriter(Settings settings, long currentSplit,
598
598
IndexExtractor iformat = ObjectUtils .instantiate (settings .getMappingIndexExtractorClassName (), settings );
599
599
iformat .compile (resource .toString ());
600
600
601
- RestRepository repository = (iformat .hasPattern () ? initMultiIndices (settings , currentSplit , resource , log ) : initSingleIndex (settings , currentSplit , resource , log ));
602
-
601
+ // Create the partition writer and its client
602
+ RestRepository repository ;
603
+ if (iformat .hasPattern ()) {
604
+ // Can't be sure if a multi-index pattern will resolve to indices or aliases
605
+ // during the job. It's better to trust the user and discover any issues the
606
+ // hard way at runtime.
607
+ repository = initMultiIndices (settings , currentSplit , resource , log );
608
+ } else {
609
+ // Determine if the configured index is an alias.
610
+ RestClient bootstrap = new RestClient (settings );
611
+ GetAliasesRequestBuilder .Response response = null ;
612
+ try {
613
+ response = new GetAliasesRequestBuilder (bootstrap ).aliases (resource .index ()).execute ();
614
+ } catch (EsHadoopInvalidRequest remoteException ) {
615
+ // For now, the get alias call throws if it does not find an alias that matches. Just log and continue.
616
+ if (log .isDebugEnabled ()) {
617
+ log .debug (String .format ("Provided index name [%s] is not an alias. Reason: [%s]" ,
618
+ resource .index (), remoteException .getMessage ()));
619
+ }
620
+ } finally {
621
+ bootstrap .close ();
622
+ }
623
+ // Validate the alias for writing, or pin to a single index shard.
624
+ if (response != null && response .hasAliases ()) {
625
+ repository = initAliasWrite (response , settings , currentSplit , resource , log );
626
+ } else {
627
+ repository = initSingleIndex (settings , currentSplit , resource , log );
628
+ }
629
+ }
603
630
return new PartitionWriter (settings , currentSplit , totalSplits , repository );
604
631
}
605
632
633
+ /**
634
+ * Validate and configure a rest repository for writing to an index.
635
+ * The index is potentially created if it does not exist, and the
636
+ * client is pinned to a node that hosts one of the index's primary
637
+ * shards based on its currentInstance number.
638
+ * @param settings Job settings
639
+ * @param currentInstance Partition number
640
+ * @param resource Configured write resource
641
+ * @param log Logger to use
642
+ * @return The RestRepository to be used by the partition writer
643
+ */
606
644
private static RestRepository initSingleIndex (Settings settings , long currentInstance , Resource resource , Log log ) {
607
645
if (log .isDebugEnabled ()) {
608
646
log .debug (String .format ("Resource [%s] resolves as a single index" , resource ));
@@ -638,9 +676,7 @@ private static RestRepository initSingleIndex(Settings settings, long currentIns
638
676
}
639
677
640
678
// no routing necessary; select the relevant target shard/node
641
- Map <ShardInfo , NodeInfo > targetShards = Collections .emptyMap ();
642
-
643
- targetShards = repository .getWriteTargetPrimaryShards (settings .getNodesClientOnly ());
679
+ Map <ShardInfo , NodeInfo > targetShards = repository .getWriteTargetPrimaryShards (settings .getNodesClientOnly ());
644
680
repository .close ();
645
681
646
682
Assert .isTrue (!targetShards .isEmpty (),
@@ -675,6 +711,15 @@ private static RestRepository initSingleIndex(Settings settings, long currentIns
675
711
return repository ;
676
712
}
677
713
714
+ /**
715
+ * Creates a RestRepository for use with a multi-index resource pattern. The client is left pinned
716
+ * to the original node that it was pinned to since the shard locations cannot be determined at all.
717
+ * @param settings Job settings
718
+ * @param currentInstance Partition number
719
+ * @param resource Configured write resource
720
+ * @param log Logger to use
721
+ * @return The RestRepository to be used by the partition writer
722
+ */
678
723
private static RestRepository initMultiIndices (Settings settings , long currentInstance , Resource resource , Log log ) {
679
724
if (log .isDebugEnabled ()) {
680
725
log .debug (String .format ("Resource [%s] resolves as an index pattern" , resource ));
@@ -688,4 +733,66 @@ private static RestRepository initMultiIndices(Settings settings, long currentIn
688
733
689
734
return new RestRepository (settings );
690
735
}
736
+
737
+ /**
738
+ * Validate and configure a rest repository for writing to an alias backed by a valid write-index.
739
+ * This validation only checks that an alias is valid at time of job start, and makes no guarantees
740
+ * about the alias changing during the execution.
741
+ * @param response Response from the get alias call
742
+ * @param settings Job settings
743
+ * @param currentInstance Partition number
744
+ * @param resource Configured write resource
745
+ * @param log Logger to use
746
+ * @return The RestRepository to be used by the partition writer
747
+ */
748
+ private static RestRepository initAliasWrite (GetAliasesRequestBuilder .Response response , Settings settings , long currentInstance ,
749
+ Resource resource , Log log ) {
750
+ if (log .isDebugEnabled ()) {
751
+ log .debug (String .format ("Resource [%s] resolves as an index alias" , resource ));
752
+ }
753
+
754
+ // indexName -> aliasName -> alias definition
755
+ Map <String , Map <String , IndicesAliases .Alias >> indexAliasTable = response .getIndices ().getAll ();
756
+
757
+ if (indexAliasTable .size () < 1 ) {
758
+ // Sanity check
759
+ throw new EsHadoopIllegalArgumentException ("Cannot initialize alias write resource [" + resource .index () +
760
+ "] if it does not have any alias entries." );
761
+ } else if (indexAliasTable .size () > 1 ) {
762
+ // Multiple indices, validate that one index-alias relation has its write index flag set
763
+ String currentWriteIndex = null ;
764
+ for (Map .Entry <String , Map <String , IndicesAliases .Alias >> indexRow : indexAliasTable .entrySet ()) {
765
+ String indexName = indexRow .getKey ();
766
+ Map <String , IndicesAliases .Alias > aliases = indexRow .getValue ();
767
+ IndicesAliases .Alias aliasInfo = aliases .get (resource .index ());
768
+ if (aliasInfo .isWriteIndex ()) {
769
+ currentWriteIndex = indexName ;
770
+ break ;
771
+ }
772
+ }
773
+ if (currentWriteIndex == null ) {
774
+ throw new EsHadoopIllegalArgumentException ("Attempting to write to alias [" + resource .index () + "], " +
775
+ "but detected multiple indices [" + indexAliasTable .size () + "] with no write index selected. " +
776
+ "Bailing out..." );
777
+ } else {
778
+ if (log .isDebugEnabled ()) {
779
+ log .debug (String .format ("Writing to currently configured write-index [%s]" , currentWriteIndex ));
780
+ }
781
+ }
782
+ } else {
783
+ // Single index in the alias, but we should still not pin the nodes
784
+ if (log .isDebugEnabled ()) {
785
+ log .debug (String .format ("Writing to the alias's single configured index [%s]" , indexAliasTable .keySet ().iterator ().next ()));
786
+ }
787
+ }
788
+
789
+ // alias-index write - since we don't know beforehand what concrete index will be used at any
790
+ // given time during the job, use an already selected node
791
+ String node = SettingsUtils .getPinnedNode (settings );
792
+ if (log .isDebugEnabled ()) {
793
+ log .debug (String .format ("Partition writer instance [%s] assigned to [%s]" , currentInstance , node ));
794
+ }
795
+
796
+ return new RestRepository (settings );
797
+ }
691
798
}
0 commit comments