From 32c3c2ab629ef4cefe2119775a88e960872c209f Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Fri, 17 Apr 2020 14:35:01 +0300 Subject: [PATCH 1/6] [SYCL][Experimental] Reduce the set of optimizations for SYCL device This is patch limits the set of optimizations aiming to reduce the size of generated device module. Optimizations are currently disabled by default as they cause multiple sorts of issues. Some of the issues are addressed within this patch, but not all of them. Optimizations can be enabled with `-fsycl-enable-optimizaions` front-end option (or `-Xclang -fsycl-enable-optimizaions` driver option). Signed-off-by: Alexey Bader --- clang/lib/Basic/Targets/SPIR.h | 4 +- clang/lib/CodeGen/BackendUtil.cpp | 56 ++++++++++++------- clang/test/CodeGen/target-data.c | 4 +- clang/test/CodeGenOpenCL/convergent.cl | 2 +- clang/test/CodeGenSYCL/address-space-swap.cpp | 2 +- .../CodeGenSYCL/debug-info-srcpos-kernel.cpp | 2 +- llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp | 20 ++++--- llvm-spirv/lib/SPIRV/SPIRVWriter.cpp | 5 ++ llvm-spirv/test/transcoding/llvm.memmove.ll | 46 +++++++++++++-- 9 files changed, 103 insertions(+), 38 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index b24d0107d51a0..f31deef140580 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -136,7 +136,7 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, @@ -152,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo { PtrDiffType = IntPtrType = TargetInfo::SignedLong; resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index b8a59bcdab391..9164d1613bbfd 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -599,19 +599,38 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, CodeGenOpts.PrepareForThinLTO)); } - PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; - PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; - PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; - PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; - - PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; - // Loop interleaving in the loop vectorizer has historically been set to be - // enabled when loop unrolling is enabled. - PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; - PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; - PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; - PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; - PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; + if (LangOpts.SYCLIsDevice) { + PMBuilder.OptLevel = 1; + PMBuilder.SizeLevel = 2; + PMBuilder.SLPVectorize = false; + PMBuilder.LoopVectorize = false; + PMBuilder.DivergentTarget = true; + PMBuilder.DisableGVNLoadPRE = true; + PMBuilder.ForgetAllSCEVInLoopUnroll = true; + + PMBuilder.DisableUnrollLoops = true; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = false; + PMBuilder.MergeFunctions = false; + PMBuilder.PrepareForThinLTO = false; + PMBuilder.PrepareForLTO = false; + PMBuilder.RerollLoops = false; + } else { + PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; + PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; + PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; + PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; + + PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; + PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; + PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; + PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; + PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; + } MPM.add(new TargetLibraryInfoWrapperPass(*TLII)); @@ -865,14 +884,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr ThinLinkOS, DwoOS; + // Clean-up SYCL device code if LLVM passes are disabled + if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses) { + PerModulePasses.add(createDeadCodeEliminationPass()); + } + switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - if (LangOpts.SYCLIsDevice) { - PerModulePasses.add(createDeadCodeEliminationPass()); - } if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); @@ -1346,9 +1367,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - if (LangOpts.SYCLIsDevice) { - CodeGenPasses.add(createDeadCodeEliminationPass()); - } if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index e49f8453e360e..7eca3ebc33478 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -237,11 +237,11 @@ // RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=SPIR -// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" // RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=SPIR64 -// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" // RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=BPFEL diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl index 193d391ced207..01495a28598d9 100644 --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -121,7 +121,7 @@ void test_unroll() { // The new PM produces a slightly different IR for the loop from the legacy PM, // but the test still checks that the loop is not unrolled. -// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_cond_cleanup]], label %[[for_body]] // CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] // CHECK-NEW: [[for_body_crit_edge]]: diff --git a/clang/test/CodeGenSYCL/address-space-swap.cpp b/clang/test/CodeGenSYCL/address-space-swap.cpp index 45ca6bb59d509..e361a63a8d3bf 100644 --- a/clang/test/CodeGenSYCL/address-space-swap.cpp +++ b/clang/test/CodeGenSYCL/address-space-swap.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -fsycl-device-only -S -Xclang -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s #include void test() { diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index 13cb265f0663f..fcff6aae4f763 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only %s -S -I %S/Inputs -emit-llvm -g -o - | FileCheck %s +// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -I %S/Inputs -g -o - | FileCheck %s // // Verify the SYCL kernel routine is marked artificial and has no source // correlation. diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp index 45dc71ab1aa52..f84d384fd09ba 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp @@ -75,10 +75,12 @@ class SPIRVLowerMemmove : public ModulePass, report_fatal_error("llvm.memmove of non-constant length not supported", false); auto *Length = cast(I.getLength()); - if (isa(Src)) - // The source could be bit-cast from another type, - // need the original type for the allocation of the temporary variable - SrcTy = cast(Src)->getOperand(0)->getType(); + auto *S = Src; + // The source could be bit-cast or addrspacecast from another type, + // need the original type for the allocation of the temporary variable + while (isa(S) || isa(S)) + S = cast(S)->getOperand(0); + SrcTy = S->getType(); MaybeAlign Align = I.getSourceAlign(); auto Volatile = I.isVolatile(); Value *NumElements = nullptr; @@ -87,9 +89,13 @@ class SPIRVLowerMemmove : public ModulePass, NumElements = Builder.getInt32(SrcTy->getArrayNumElements()); ElementsCount = SrcTy->getArrayNumElements(); } - if (Mod->getDataLayout().getTypeSizeInBits(SrcTy->getPointerElementType()) * - ElementsCount != - Length->getZExtValue() * 8) + if (((ElementsCount > 1) && (Mod->getDataLayout().getTypeSizeInBits( + SrcTy->getPointerElementType()) * + ElementsCount != + Length->getZExtValue() * 8)) || + ((ElementsCount == 1) && + (Mod->getDataLayout().getTypeSizeInBits( + SrcTy->getPointerElementType()) < Length->getZExtValue() * 8))) report_fatal_error("Size of the memcpy should match the allocated memory", false); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 8b355974259eb..298699af9e9f6 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1310,6 +1310,10 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, if (CallInst *CI = dyn_cast(V)) return mapValue(V, transCallInst(CI, BB)); + // FIXME: this is not valid translation of freeze instruction + if (FreezeInst *FI = dyn_cast(V)) + return mapValue(V, transValue(FI->getOperand(0), BB)); + llvm_unreachable("Not implemented"); return nullptr; } @@ -1825,6 +1829,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::dbg_label: + case Intrinsic::assume: return nullptr; default: if (SPIRVAllowUnknownIntrinsics) diff --git a/llvm-spirv/test/transcoding/llvm.memmove.ll b/llvm-spirv/test/transcoding/llvm.memmove.ll index dfb129e4d923c..867cd3a548547 100644 --- a/llvm-spirv/test/transcoding/llvm.memmove.ll +++ b/llvm-spirv/test/transcoding/llvm.memmove.ll @@ -17,18 +17,46 @@ ; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]] ; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]] +; CHECK-SPIRV: GenericCastToPtr {{[0-9]+}} [[out:[0-9]+]] +; CHECK-SPIRV: Variable {{[0-9]+}} [[mem:[0-9]+]] 7 +; CHECK-SPIRV: Bitcast [[i8Ty:[0-9]+]] [[tmp0:[0-9]+]] [[mem]] +; CHECK-SPIRV: LifetimeStart [[tmp0]] [[size:[0-9]+]] +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp1:[0-9]+]] [[mem]] +; CHECK-SPIRV: CopyMemorySized [[tmp1]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp2:[0-9]+]] [[mem]] +; CHECK-SPIRV: CopyMemorySized [[out]] [[tmp2]] {{[0-9]+}} +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]] +; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]] + ; CHECK-LLVM-NOT: llvm.memmove +; CHECK-LLVM-LABEL: @test_struct ; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct ; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]] ; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]]) ; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] -; CHECK-LLVM: call void @llvm.memcpy -; CHECK-LLVM: ([[type]] align 64 [[tmp2]], -; CHECK-LLVM: {{i[0-9]+}} [[size:[0-9]+]] +; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32 +; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]], +; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]] ; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] -; CHECK-LLVM: call void @llvm.memcpy -; CHECK-LLVM: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] +; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32 +; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] +; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]]) + +; CHECK-LLVM-LABEL: @copy_struct +; CHECK-LLVM: [[out:%[0-9]+]] = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)* +; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct +; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]] +; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]]) +; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32 +; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]], +; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]] +; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32 +; CHECK-LLVM-SAME: align 64 [[out]] +; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] ; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] ; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]]) @@ -45,6 +73,14 @@ define spir_kernel void @test_struct(%struct.SomeStruct addrspace(1)* nocapture ret void } +define spir_func void @copy_struct(%struct.SomeStruct addrspace(1)* nocapture readonly %in, %struct.SomeStruct addrspace(4)* nocapture %out) { + %1 = bitcast %struct.SomeStruct addrspace(1)* %in to i8 addrspace(1)* + %2 = bitcast %struct.SomeStruct addrspace(4)* %out to i8 addrspace(4)* + %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)* + call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* align 64 %3, i8 addrspace(1)* align 64 %1, i32 68, i1 false) + ret void +} + ; Function Attrs: nounwind declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1 From 9be423ee58bd4b7404e4cbe630f11bbba23298d3 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sat, 18 Apr 2020 09:37:28 +0300 Subject: [PATCH 2/6] Fix formatting. Signed-off-by: Alexey Bader --- clang/lib/Basic/Targets/SPIR.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index f31deef140580..2fc79c951e93a 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; - resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); + resetDataLayout( + "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, @@ -151,8 +152,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo { SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; - resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); + resetDataLayout( + "e-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, From 716d023e9cf01ae1d81109d21797c090fcd000dd Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sat, 18 Apr 2020 14:19:06 +0300 Subject: [PATCH 3/6] Add code comments Signed-off-by: Alexey Bader --- clang/lib/CodeGen/BackendUtil.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 9164d1613bbfd..65a848582efc3 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -599,6 +599,19 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, CodeGenOpts.PrepareForThinLTO)); } + // FIXME: This code is a workaround for a number of problems with optimized + // SYCL code for SPIR target. This change trying to balance between doing too + // little and too much optimizations. Current approach is to disable as much + // as possible just to keep the compiler functional. Eventually we can + // consider allowing -On option to configure optimization set for the FE + // device compiler as well, but before that we must fix all the functional and + // performance issues caused by LLVM transformantions. + // E.g. LLVM optimizations make use of llvm intrinsics, instructions, data + // types, etc., which are not supported by the SPIR-V translator (current + // "back-end" for SYCL device compiler). + // NOTE: We use "normal" inliner (i.e. from O2/O3), but limit the rest of + // optimization pipeline. Inliner is a must for enabling size reduction + // optimizations. if (LangOpts.SYCLIsDevice) { PMBuilder.OptLevel = 1; PMBuilder.SizeLevel = 2; From 810327a7d86be559891c224cc299908ceff635eb Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sat, 18 Apr 2020 14:32:16 +0300 Subject: [PATCH 4/6] Fix grammar errors. Signed-off-by: Alexey Bader --- clang/lib/CodeGen/BackendUtil.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 65a848582efc3..309cd554845a5 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -600,10 +600,10 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, } // FIXME: This code is a workaround for a number of problems with optimized - // SYCL code for SPIR target. This change trying to balance between doing too - // little and too much optimizations. Current approach is to disable as much - // as possible just to keep the compiler functional. Eventually we can - // consider allowing -On option to configure optimization set for the FE + // SYCL code for the SPIR target. This change trying to balance between doing + // too few and too many optimizations. The current approach is to disable as + // much as possible just to keep the compiler functional. Eventually we can + // consider allowing -On option to configure the optimization set for the FE // device compiler as well, but before that we must fix all the functional and // performance issues caused by LLVM transformantions. // E.g. LLVM optimizations make use of llvm intrinsics, instructions, data From 5a41ab22d496aa09a0835e99286647d2a5e84d6d Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 20 Apr 2020 08:18:13 +0300 Subject: [PATCH 5/6] Apply review comment. Signed-off-by: Alexey Bader --- clang/lib/CodeGen/BackendUtil.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 309cd554845a5..cf31c6376285a 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -898,9 +898,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr ThinLinkOS, DwoOS; // Clean-up SYCL device code if LLVM passes are disabled - if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses) { + if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses) PerModulePasses.add(createDeadCodeEliminationPass()); - } switch (Action) { case Backend_EmitNothing: From 7c8bf8f4d2a894909f33938329f36610cd5a8760 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 20 Apr 2020 13:40:28 +0300 Subject: [PATCH 6/6] Alter SYCL device compiler optimization pipeline for SPIR target only. Signed-off-by: Alexey Bader --- clang/lib/CodeGen/BackendUtil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index cf31c6376285a..7601ccad011de 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -612,7 +612,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, // NOTE: We use "normal" inliner (i.e. from O2/O3), but limit the rest of // optimization pipeline. Inliner is a must for enabling size reduction // optimizations. - if (LangOpts.SYCLIsDevice) { + if (LangOpts.SYCLIsDevice && TargetTriple.isSPIR()) { PMBuilder.OptLevel = 1; PMBuilder.SizeLevel = 2; PMBuilder.SLPVectorize = false;