Disable more vectorization passes

bader · bader · commit dec884d192d3 · 2020-09-10T17:06:02.000+03:00
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -753,65 +753,70 @@ void PassManagerBuilder::populateModulePassManager(
 
   addExtensionsToPM(EP_VectorizerStart, MPM);
 
-  // Re-rotate loops in all our loop nests. These may have fallout out of
-  // rotated form due to GVN or other transformations, and the vectorizer relies
-  // on the rotated form. Disable header duplication at -Oz.
-  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
-
-  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
-  // into separate loop that would otherwise inhibit vectorization.  This is
-  // currently only performed for loops marked with the metadata
-  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
-  MPM.add(createLoopDistributePass());
-
-  MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
-
-  // Eliminate loads by forwarding stores from the previous iteration to loads
-  // of the current iteration.
-  MPM.add(createLoopLoadEliminationPass());
-
-  // FIXME: Because of #pragma vectorize enable, the passes below are always
-  // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
-  // on -O1 and no #pragma is found). Would be good to have these two passes
-  // as function calls, so that we can only pass them when the vectorizer
-  // changed the code.
-  MPM.add(createInstructionCombiningPass());
-  if (OptLevel > 1 && ExtraVectorizerPasses) {
-    // At higher optimization levels, try to clean up any runtime overlap and
-    // alignment checks inserted by the vectorizer. We want to track correllated
-    // runtime checks for two inner loops in the same outer loop, fold any
-    // common computations, hoist loop-invariant aspects out of any outer loop,
-    // and unswitch the runtime checks if possible. Once hoisted, we may have
-    // dead (or speculatable) control flows or more combining opportunities.
-    MPM.add(createEarlyCSEPass());
-    MPM.add(createCorrelatedValuePropagationPass());
-    MPM.add(createInstructionCombiningPass());
-    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
-    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
-    MPM.add(createCFGSimplificationPass());
-    MPM.add(createInstructionCombiningPass());
-  }
+  if (!SYCLOptimizationMode) {
+    // Re-rotate loops in all our loop nests. These may have fallout out of
+    // rotated form due to GVN or other transformations, and the vectorizer
+    // relies on the rotated form. Disable header duplication at -Oz.
+    MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
+
+    // Distribute loops to allow partial vectorization.  I.e. isolate
+    // dependences into separate loop that would otherwise inhibit
+    // vectorization.  This is currently only performed for loops marked with
+    // the metadata llvm.loop.distribute=true or when -enable-loop-distribute is
+    // specified.
+    MPM.add(createLoopDistributePass());
+
+    MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
+
+    // Eliminate loads by forwarding stores from the previous iteration to loads
+    // of the current iteration.
+    MPM.add(createLoopLoadEliminationPass());
 
-  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
-  // GVN, loop transforms, and others have already run, so it's now better to
-  // convert to more optimized IR using more aggressive simplify CFG options.
-  // The extra sinking transform can create larger basic blocks, so do this
-  // before SLP vectorization.
-  MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
-                                          .forwardSwitchCondToPhi(true)
-                                          .convertSwitchToLookupTable(true)
-                                          .needCanonicalLoops(false)
-                                          .sinkCommonInsts(true)));
-
-  if (SLPVectorize) {
-    MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+    // FIXME: Because of #pragma vectorize enable, the passes below are always
+    // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
+    // on -O1 and no #pragma is found). Would be good to have these two passes
+    // as function calls, so that we can only pass them when the vectorizer
+    // changed the code.
+    MPM.add(createInstructionCombiningPass());
     if (OptLevel > 1 && ExtraVectorizerPasses) {
+      // At higher optimization levels, try to clean up any runtime overlap and
+      // alignment checks inserted by the vectorizer. We want to track
+      // correllated runtime checks for two inner loops in the same outer loop,
+      // fold any common computations, hoist loop-invariant aspects out of any
+      // outer loop, and unswitch the runtime checks if possible. Once hoisted,
+      // we may have dead (or speculatable) control flows or more combining
+      // opportunities.
       MPM.add(createEarlyCSEPass());
+      MPM.add(createCorrelatedValuePropagationPass());
+      MPM.add(createInstructionCombiningPass());
+      MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+      MPM.add(
+          createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+      MPM.add(createCFGSimplificationPass());
+      MPM.add(createInstructionCombiningPass());
     }
-  }
 
-  // Enhance/cleanup vector code.
-  MPM.add(createVectorCombinePass());
+    // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+    // GVN, loop transforms, and others have already run, so it's now better to
+    // convert to more optimized IR using more aggressive simplify CFG options.
+    // The extra sinking transform can create larger basic blocks, so do this
+    // before SLP vectorization.
+    MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+                                            .forwardSwitchCondToPhi(true)
+                                            .convertSwitchToLookupTable(true)
+                                            .needCanonicalLoops(false)
+                                            .sinkCommonInsts(true)));
+
+    if (SLPVectorize) {
+      MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+      if (OptLevel > 1 && ExtraVectorizerPasses) {
+        MPM.add(createEarlyCSEPass());
+      }
+    }
+
+    // Enhance/cleanup vector code.
+    MPM.add(createVectorCombinePass());
+  }
 
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createInstructionCombiningPass());