Skip to content

Commit ab30c86

Browse files
committed
Disable loop pass pipeline in SYCL optimization mode.
This change seems to hide issues with broadcast tests on CPU.
1 parent 587514e commit ab30c86

File tree

3 files changed

+41
-40
lines changed

3 files changed

+41
-40
lines changed

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

+41-38
Original file line numberDiff line numberDiff line change
@@ -411,47 +411,50 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
411411
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
412412
MPM.add(createReassociatePass()); // Reassociate expressions
413413

414-
// Begin the loop pass pipeline.
415-
if (EnableSimpleLoopUnswitch) {
416-
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
417-
// them first so when we re-process a loop they run before other loop
418-
// passes.
419-
MPM.add(createLoopInstSimplifyPass());
420-
MPM.add(createLoopSimplifyCFGPass());
421-
}
422-
// Rotate Loop - disable header duplication at -Oz
423-
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
424-
// TODO: Investigate promotion cap for O1.
425-
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
426-
if (EnableSimpleLoopUnswitch)
427-
MPM.add(createSimpleLoopUnswitchLegacyPass());
428-
else
429-
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
430-
// FIXME: We break the loop pass pipeline here in order to do full
431-
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
432-
// need for this.
433-
MPM.add(createCFGSimplificationPass());
434-
MPM.add(createInstructionCombiningPass());
435-
// We resume loop passes creating a second loop pipeline here.
436-
// TODO: this pass hurts performance due to promotions of induction variables
437-
// from 32-bit value to 64-bit values. I assume it's because SPIR is a virtual
438-
// target with unlimited # of registers and pass doesn't take into account
439-
// that on real HW this promotion is not beneficial.
440-
if (!SYCLOptimizationMode)
441-
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
442-
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
443-
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
444-
MPM.add(createLoopDeletionPass()); // Delete dead loops
445-
446-
if (EnableLoopInterchange)
447-
MPM.add(createLoopInterchangePass()); // Interchange loops
414+
// Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
415+
// optimizations rely on TTI, which is not accurate for SPIR target.
416+
if (!SYCLOptimizationMode) {
417+
// Begin the loop pass pipeline.
418+
if (EnableSimpleLoopUnswitch) {
419+
// The simple loop unswitch pass relies on separate cleanup passes.
420+
// Schedule them first so when we re-process a loop they run before other
421+
// loop passes.
422+
MPM.add(createLoopInstSimplifyPass());
423+
MPM.add(createLoopSimplifyCFGPass());
424+
}
425+
// Rotate Loop - disable header duplication at -Oz
426+
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
427+
// TODO: Investigate promotion cap for O1.
428+
MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
429+
if (EnableSimpleLoopUnswitch)
430+
MPM.add(createSimpleLoopUnswitchLegacyPass());
431+
else
432+
MPM.add(
433+
createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
434+
// FIXME: We break the loop pass pipeline here in order to do full
435+
// simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace
436+
// the need for this.
437+
MPM.add(createCFGSimplificationPass());
438+
MPM.add(createInstructionCombiningPass());
439+
// We resume loop passes creating a second loop pipeline here.
440+
// TODO: this pass hurts performance due to promotions of induction
441+
// variables from 32-bit value to 64-bit values. I assume it's because SPIR
442+
// is a virtual target with unlimited # of registers and pass doesn't take
443+
// into account that on real HW this promotion is not beneficial.
444+
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
445+
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
446+
addExtensionsToPM(EP_LateLoopOptimizations, MPM);
447+
MPM.add(createLoopDeletionPass()); // Delete dead loops
448+
449+
if (EnableLoopInterchange)
450+
MPM.add(createLoopInterchangePass()); // Interchange loops
448451

449-
// Unroll small loops
450-
if (!SYCLOptimizationMode) // TODO: disable the whole loop pass pipeline?
452+
// Unroll small loops
451453
MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
452454
ForgetAllSCEVInLoopUnroll));
453-
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
454-
// This ends the loop pass pipelines.
455+
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
456+
// This ends the loop pass pipelines.
457+
}
455458

456459
if (OptLevel > 1) {
457460
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds

sycl/test/sub_group/broadcast.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: cpu
21
// UNSUPPORTED: cuda
32
// CUDA compilation and runtime do not yet support sub-groups.
43

sycl/test/sub_group/broadcast_fp64.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: cpu
21
// UNSUPPORTED: cuda
32
// CUDA compilation and runtime do not yet support sub-groups.
43

0 commit comments

Comments
 (0)