@@ -753,65 +753,70 @@ void PassManagerBuilder::populateModulePassManager(
753
753
754
754
addExtensionsToPM (EP_VectorizerStart, MPM);
755
755
756
- // Re-rotate loops in all our loop nests. These may have fallout out of
757
- // rotated form due to GVN or other transformations, and the vectorizer relies
758
- // on the rotated form. Disable header duplication at -Oz.
759
- MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 ));
760
-
761
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
762
- // into separate loop that would otherwise inhibit vectorization. This is
763
- // currently only performed for loops marked with the metadata
764
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
765
- MPM.add (createLoopDistributePass ());
766
-
767
- MPM.add (createLoopVectorizePass (!LoopsInterleaved, !LoopVectorize));
768
-
769
- // Eliminate loads by forwarding stores from the previous iteration to loads
770
- // of the current iteration.
771
- MPM.add (createLoopLoadEliminationPass ());
772
-
773
- // FIXME: Because of #pragma vectorize enable, the passes below are always
774
- // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
775
- // on -O1 and no #pragma is found). Would be good to have these two passes
776
- // as function calls, so that we can only pass them when the vectorizer
777
- // changed the code.
778
- MPM.add (createInstructionCombiningPass ());
779
- if (OptLevel > 1 && ExtraVectorizerPasses) {
780
- // At higher optimization levels, try to clean up any runtime overlap and
781
- // alignment checks inserted by the vectorizer. We want to track correllated
782
- // runtime checks for two inner loops in the same outer loop, fold any
783
- // common computations, hoist loop-invariant aspects out of any outer loop,
784
- // and unswitch the runtime checks if possible. Once hoisted, we may have
785
- // dead (or speculatable) control flows or more combining opportunities.
786
- MPM.add (createEarlyCSEPass ());
787
- MPM.add (createCorrelatedValuePropagationPass ());
788
- MPM.add (createInstructionCombiningPass ());
789
- MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
790
- MPM.add (createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
791
- MPM.add (createCFGSimplificationPass ());
792
- MPM.add (createInstructionCombiningPass ());
793
- }
756
+ if (!SYCLOptimizationMode) {
757
+ // Re-rotate loops in all our loop nests. These may have fallout out of
758
+ // rotated form due to GVN or other transformations, and the vectorizer
759
+ // relies on the rotated form. Disable header duplication at -Oz.
760
+ MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 ));
761
+
762
+ // Distribute loops to allow partial vectorization. I.e. isolate
763
+ // dependences into separate loop that would otherwise inhibit
764
+ // vectorization. This is currently only performed for loops marked with
765
+ // the metadata llvm.loop.distribute=true or when -enable-loop-distribute is
766
+ // specified.
767
+ MPM.add (createLoopDistributePass ());
768
+
769
+ MPM.add (createLoopVectorizePass (!LoopsInterleaved, !LoopVectorize));
770
+
771
+ // Eliminate loads by forwarding stores from the previous iteration to loads
772
+ // of the current iteration.
773
+ MPM.add (createLoopLoadEliminationPass ());
794
774
795
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
796
- // GVN, loop transforms, and others have already run, so it's now better to
797
- // convert to more optimized IR using more aggressive simplify CFG options.
798
- // The extra sinking transform can create larger basic blocks, so do this
799
- // before SLP vectorization.
800
- MPM.add (createCFGSimplificationPass (SimplifyCFGOptions ()
801
- .forwardSwitchCondToPhi (true )
802
- .convertSwitchToLookupTable (true )
803
- .needCanonicalLoops (false )
804
- .sinkCommonInsts (true )));
805
-
806
- if (SLPVectorize) {
807
- MPM.add (createSLPVectorizerPass ()); // Vectorize parallel scalar chains.
775
+ // FIXME: Because of #pragma vectorize enable, the passes below are always
776
+ // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
777
+ // on -O1 and no #pragma is found). Would be good to have these two passes
778
+ // as function calls, so that we can only pass them when the vectorizer
779
+ // changed the code.
780
+ MPM.add (createInstructionCombiningPass ());
808
781
if (OptLevel > 1 && ExtraVectorizerPasses) {
782
+ // At higher optimization levels, try to clean up any runtime overlap and
783
+ // alignment checks inserted by the vectorizer. We want to track
784
+ // correllated runtime checks for two inner loops in the same outer loop,
785
+ // fold any common computations, hoist loop-invariant aspects out of any
786
+ // outer loop, and unswitch the runtime checks if possible. Once hoisted,
787
+ // we may have dead (or speculatable) control flows or more combining
788
+ // opportunities.
809
789
MPM.add (createEarlyCSEPass ());
790
+ MPM.add (createCorrelatedValuePropagationPass ());
791
+ MPM.add (createInstructionCombiningPass ());
792
+ MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
793
+ MPM.add (
794
+ createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
795
+ MPM.add (createCFGSimplificationPass ());
796
+ MPM.add (createInstructionCombiningPass ());
810
797
}
811
- }
812
798
813
- // Enhance/cleanup vector code.
814
- MPM.add (createVectorCombinePass ());
799
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
800
+ // GVN, loop transforms, and others have already run, so it's now better to
801
+ // convert to more optimized IR using more aggressive simplify CFG options.
802
+ // The extra sinking transform can create larger basic blocks, so do this
803
+ // before SLP vectorization.
804
+ MPM.add (createCFGSimplificationPass (SimplifyCFGOptions ()
805
+ .forwardSwitchCondToPhi (true )
806
+ .convertSwitchToLookupTable (true )
807
+ .needCanonicalLoops (false )
808
+ .sinkCommonInsts (true )));
809
+
810
+ if (SLPVectorize) {
811
+ MPM.add (createSLPVectorizerPass ()); // Vectorize parallel scalar chains.
812
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
813
+ MPM.add (createEarlyCSEPass ());
814
+ }
815
+ }
816
+
817
+ // Enhance/cleanup vector code.
818
+ MPM.add (createVectorCombinePass ());
819
+ }
815
820
816
821
addExtensionsToPM (EP_Peephole, MPM);
817
822
MPM.add (createInstructionCombiningPass ());
0 commit comments