diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index b24d0107d51a0..2fc79c951e93a 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -135,8 +135,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; - resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + resetDataLayout( + "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, @@ -151,8 +152,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo { SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; - resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" - "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + resetDataLayout( + "e-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"); } void getTargetDefines(const LangOptions &Opts, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index b8a59bcdab391..7601ccad011de 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -599,19 +599,51 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, CodeGenOpts.PrepareForThinLTO)); } - PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; - PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; - PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; - PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; - - PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; - // Loop interleaving in the loop vectorizer has historically been set to be - // enabled when loop unrolling is enabled. - PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; - PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; - PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; - PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; - PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; + // FIXME: This code is a workaround for a number of problems with optimized + // SYCL code for the SPIR target. This change trying to balance between doing + // too few and too many optimizations. The current approach is to disable as + // much as possible just to keep the compiler functional. Eventually we can + // consider allowing -On option to configure the optimization set for the FE + // device compiler as well, but before that we must fix all the functional and + // performance issues caused by LLVM transformantions. + // E.g. LLVM optimizations make use of llvm intrinsics, instructions, data + // types, etc., which are not supported by the SPIR-V translator (current + // "back-end" for SYCL device compiler). + // NOTE: We use "normal" inliner (i.e. from O2/O3), but limit the rest of + // optimization pipeline. Inliner is a must for enabling size reduction + // optimizations. + if (LangOpts.SYCLIsDevice && TargetTriple.isSPIR()) { + PMBuilder.OptLevel = 1; + PMBuilder.SizeLevel = 2; + PMBuilder.SLPVectorize = false; + PMBuilder.LoopVectorize = false; + PMBuilder.DivergentTarget = true; + PMBuilder.DisableGVNLoadPRE = true; + PMBuilder.ForgetAllSCEVInLoopUnroll = true; + + PMBuilder.DisableUnrollLoops = true; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = false; + PMBuilder.MergeFunctions = false; + PMBuilder.PrepareForThinLTO = false; + PMBuilder.PrepareForLTO = false; + PMBuilder.RerollLoops = false; + } else { + PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel; + PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; + PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; + PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; + + PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; + PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; + PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; + PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; + PMBuilder.RerollLoops = CodeGenOpts.RerollLoops; + } MPM.add(new TargetLibraryInfoWrapperPass(*TLII)); @@ -865,14 +897,15 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, std::unique_ptr ThinLinkOS, DwoOS; + // Clean-up SYCL device code if LLVM passes are disabled + if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses) + PerModulePasses.add(createDeadCodeEliminationPass()); + switch (Action) { case Backend_EmitNothing: break; case Backend_EmitBC: - if (LangOpts.SYCLIsDevice) { - PerModulePasses.add(createDeadCodeEliminationPass()); - } if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); @@ -1346,9 +1379,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - if (LangOpts.SYCLIsDevice) { - CodeGenPasses.add(createDeadCodeEliminationPass()); - } if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index e49f8453e360e..7eca3ebc33478 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -237,11 +237,11 @@ // RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=SPIR -// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" // RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=SPIR64 -// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" // RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=BPFEL diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl index 193d391ced207..01495a28598d9 100644 --- a/clang/test/CodeGenOpenCL/convergent.cl +++ b/clang/test/CodeGenOpenCL/convergent.cl @@ -121,7 +121,7 @@ void test_unroll() { // The new PM produces a slightly different IR for the loop from the legacy PM, // but the test still checks that the loop is not unrolled. -// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_cond_cleanup]], label %[[for_body]] // CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] // CHECK-NEW: [[for_body_crit_edge]]: diff --git a/clang/test/CodeGenSYCL/address-space-swap.cpp b/clang/test/CodeGenSYCL/address-space-swap.cpp index 45ca6bb59d509..e361a63a8d3bf 100644 --- a/clang/test/CodeGenSYCL/address-space-swap.cpp +++ b/clang/test/CodeGenSYCL/address-space-swap.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang -fsycl-device-only -S -Xclang -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s #include void test() { diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index 13cb265f0663f..fcff6aae4f763 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only %s -S -I %S/Inputs -emit-llvm -g -o - | FileCheck %s +// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -I %S/Inputs -g -o - | FileCheck %s // // Verify the SYCL kernel routine is marked artificial and has no source // correlation. diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp index 45dc71ab1aa52..f84d384fd09ba 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp @@ -75,10 +75,12 @@ class SPIRVLowerMemmove : public ModulePass, report_fatal_error("llvm.memmove of non-constant length not supported", false); auto *Length = cast(I.getLength()); - if (isa(Src)) - // The source could be bit-cast from another type, - // need the original type for the allocation of the temporary variable - SrcTy = cast(Src)->getOperand(0)->getType(); + auto *S = Src; + // The source could be bit-cast or addrspacecast from another type, + // need the original type for the allocation of the temporary variable + while (isa(S) || isa(S)) + S = cast(S)->getOperand(0); + SrcTy = S->getType(); MaybeAlign Align = I.getSourceAlign(); auto Volatile = I.isVolatile(); Value *NumElements = nullptr; @@ -87,9 +89,13 @@ class SPIRVLowerMemmove : public ModulePass, NumElements = Builder.getInt32(SrcTy->getArrayNumElements()); ElementsCount = SrcTy->getArrayNumElements(); } - if (Mod->getDataLayout().getTypeSizeInBits(SrcTy->getPointerElementType()) * - ElementsCount != - Length->getZExtValue() * 8) + if (((ElementsCount > 1) && (Mod->getDataLayout().getTypeSizeInBits( + SrcTy->getPointerElementType()) * + ElementsCount != + Length->getZExtValue() * 8)) || + ((ElementsCount == 1) && + (Mod->getDataLayout().getTypeSizeInBits( + SrcTy->getPointerElementType()) < Length->getZExtValue() * 8))) report_fatal_error("Size of the memcpy should match the allocated memory", false); diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 8b355974259eb..298699af9e9f6 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -1310,6 +1310,10 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, if (CallInst *CI = dyn_cast(V)) return mapValue(V, transCallInst(CI, BB)); + // FIXME: this is not valid translation of freeze instruction + if (FreezeInst *FI = dyn_cast(V)) + return mapValue(V, transValue(FI->getOperand(0), BB)); + llvm_unreachable("Not implemented"); return nullptr; } @@ -1825,6 +1829,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::dbg_label: + case Intrinsic::assume: return nullptr; default: if (SPIRVAllowUnknownIntrinsics) diff --git a/llvm-spirv/test/transcoding/llvm.memmove.ll b/llvm-spirv/test/transcoding/llvm.memmove.ll index dfb129e4d923c..867cd3a548547 100644 --- a/llvm-spirv/test/transcoding/llvm.memmove.ll +++ b/llvm-spirv/test/transcoding/llvm.memmove.ll @@ -17,18 +17,46 @@ ; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]] ; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]] +; CHECK-SPIRV: GenericCastToPtr {{[0-9]+}} [[out:[0-9]+]] +; CHECK-SPIRV: Variable {{[0-9]+}} [[mem:[0-9]+]] 7 +; CHECK-SPIRV: Bitcast [[i8Ty:[0-9]+]] [[tmp0:[0-9]+]] [[mem]] +; CHECK-SPIRV: LifetimeStart [[tmp0]] [[size:[0-9]+]] +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp1:[0-9]+]] [[mem]] +; CHECK-SPIRV: CopyMemorySized [[tmp1]] {{[0-9]+}} {{[0-9]+}} +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp2:[0-9]+]] [[mem]] +; CHECK-SPIRV: CopyMemorySized [[out]] [[tmp2]] {{[0-9]+}} +; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]] +; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]] + ; CHECK-LLVM-NOT: llvm.memmove +; CHECK-LLVM-LABEL: @test_struct ; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct ; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]] ; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]]) ; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] -; CHECK-LLVM: call void @llvm.memcpy -; CHECK-LLVM: ([[type]] align 64 [[tmp2]], -; CHECK-LLVM: {{i[0-9]+}} [[size:[0-9]+]] +; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32 +; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]], +; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]] ; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] -; CHECK-LLVM: call void @llvm.memcpy -; CHECK-LLVM: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] +; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32 +; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] +; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]]) + +; CHECK-LLVM-LABEL: @copy_struct +; CHECK-LLVM: [[out:%[0-9]+]] = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)* +; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct +; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]] +; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]]) +; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32 +; CHECK-LLVM-SAME: ([[type]] align 64 [[tmp2]], +; CHECK-LLVM-SAME: {{i[0-9]+}} [[size:[0-9]+]] +; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] +; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32 +; CHECK-LLVM-SAME: align 64 [[out]] +; CHECK-LLVM-SAME: , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]] ; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]] ; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]]) @@ -45,6 +73,14 @@ define spir_kernel void @test_struct(%struct.SomeStruct addrspace(1)* nocapture ret void } +define spir_func void @copy_struct(%struct.SomeStruct addrspace(1)* nocapture readonly %in, %struct.SomeStruct addrspace(4)* nocapture %out) { + %1 = bitcast %struct.SomeStruct addrspace(1)* %in to i8 addrspace(1)* + %2 = bitcast %struct.SomeStruct addrspace(4)* %out to i8 addrspace(4)* + %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)* + call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* align 64 %3, i8 addrspace(1)* align 64 %1, i32 68, i1 false) + ret void +} + ; Function Attrs: nounwind declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1