Skip to content

Commit dec884d

Browse files
committed
Disable more vectorization passes
1 parent ab30c86 commit dec884d

File tree

1 file changed

+59
-54
lines changed

1 file changed

+59
-54
lines changed

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

+59-54
Original file line numberDiff line numberDiff line change
@@ -753,65 +753,70 @@ void PassManagerBuilder::populateModulePassManager(
753753

754754
addExtensionsToPM(EP_VectorizerStart, MPM);
755755

756-
// Re-rotate loops in all our loop nests. These may have fallout out of
757-
// rotated form due to GVN or other transformations, and the vectorizer relies
758-
// on the rotated form. Disable header duplication at -Oz.
759-
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
760-
761-
// Distribute loops to allow partial vectorization. I.e. isolate dependences
762-
// into separate loop that would otherwise inhibit vectorization. This is
763-
// currently only performed for loops marked with the metadata
764-
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
765-
MPM.add(createLoopDistributePass());
766-
767-
MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
768-
769-
// Eliminate loads by forwarding stores from the previous iteration to loads
770-
// of the current iteration.
771-
MPM.add(createLoopLoadEliminationPass());
772-
773-
// FIXME: Because of #pragma vectorize enable, the passes below are always
774-
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
775-
// on -O1 and no #pragma is found). Would be good to have these two passes
776-
// as function calls, so that we can only pass them when the vectorizer
777-
// changed the code.
778-
MPM.add(createInstructionCombiningPass());
779-
if (OptLevel > 1 && ExtraVectorizerPasses) {
780-
// At higher optimization levels, try to clean up any runtime overlap and
781-
// alignment checks inserted by the vectorizer. We want to track correllated
782-
// runtime checks for two inner loops in the same outer loop, fold any
783-
// common computations, hoist loop-invariant aspects out of any outer loop,
784-
// and unswitch the runtime checks if possible. Once hoisted, we may have
785-
// dead (or speculatable) control flows or more combining opportunities.
786-
MPM.add(createEarlyCSEPass());
787-
MPM.add(createCorrelatedValuePropagationPass());
788-
MPM.add(createInstructionCombiningPass());
789-
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
790-
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
791-
MPM.add(createCFGSimplificationPass());
792-
MPM.add(createInstructionCombiningPass());
793-
}
756+
if (!SYCLOptimizationMode) {
757+
// Re-rotate loops in all our loop nests. These may have fallout out of
758+
// rotated form due to GVN or other transformations, and the vectorizer
759+
// relies on the rotated form. Disable header duplication at -Oz.
760+
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
761+
762+
// Distribute loops to allow partial vectorization. I.e. isolate
763+
// dependences into separate loop that would otherwise inhibit
764+
// vectorization. This is currently only performed for loops marked with
765+
// the metadata llvm.loop.distribute=true or when -enable-loop-distribute is
766+
// specified.
767+
MPM.add(createLoopDistributePass());
768+
769+
MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
770+
771+
// Eliminate loads by forwarding stores from the previous iteration to loads
772+
// of the current iteration.
773+
MPM.add(createLoopLoadEliminationPass());
794774

795-
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
796-
// GVN, loop transforms, and others have already run, so it's now better to
797-
// convert to more optimized IR using more aggressive simplify CFG options.
798-
// The extra sinking transform can create larger basic blocks, so do this
799-
// before SLP vectorization.
800-
MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
801-
.forwardSwitchCondToPhi(true)
802-
.convertSwitchToLookupTable(true)
803-
.needCanonicalLoops(false)
804-
.sinkCommonInsts(true)));
805-
806-
if (SLPVectorize) {
807-
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
775+
// FIXME: Because of #pragma vectorize enable, the passes below are always
776+
// inserted in the pipeline, even when the vectorizer doesn't run (ex. when
777+
// on -O1 and no #pragma is found). Would be good to have these two passes
778+
// as function calls, so that we can only pass them when the vectorizer
779+
// changed the code.
780+
MPM.add(createInstructionCombiningPass());
808781
if (OptLevel > 1 && ExtraVectorizerPasses) {
782+
// At higher optimization levels, try to clean up any runtime overlap and
783+
// alignment checks inserted by the vectorizer. We want to track
784+
// correllated runtime checks for two inner loops in the same outer loop,
785+
// fold any common computations, hoist loop-invariant aspects out of any
786+
// outer loop, and unswitch the runtime checks if possible. Once hoisted,
787+
// we may have dead (or speculatable) control flows or more combining
788+
// opportunities.
809789
MPM.add(createEarlyCSEPass());
790+
MPM.add(createCorrelatedValuePropagationPass());
791+
MPM.add(createInstructionCombiningPass());
792+
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
793+
MPM.add(
794+
createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
795+
MPM.add(createCFGSimplificationPass());
796+
MPM.add(createInstructionCombiningPass());
810797
}
811-
}
812798

813-
// Enhance/cleanup vector code.
814-
MPM.add(createVectorCombinePass());
799+
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
800+
// GVN, loop transforms, and others have already run, so it's now better to
801+
// convert to more optimized IR using more aggressive simplify CFG options.
802+
// The extra sinking transform can create larger basic blocks, so do this
803+
// before SLP vectorization.
804+
MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
805+
.forwardSwitchCondToPhi(true)
806+
.convertSwitchToLookupTable(true)
807+
.needCanonicalLoops(false)
808+
.sinkCommonInsts(true)));
809+
810+
if (SLPVectorize) {
811+
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
812+
if (OptLevel > 1 && ExtraVectorizerPasses) {
813+
MPM.add(createEarlyCSEPass());
814+
}
815+
}
816+
817+
// Enhance/cleanup vector code.
818+
MPM.add(createVectorCombinePass());
819+
}
815820

816821
addExtensionsToPM(EP_Peephole, MPM);
817822
MPM.add(createInstructionCombiningPass());

0 commit comments

Comments
 (0)