From 00563d66b4369aec4658c7047ff000961f2ba648 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Wed, 27 Mar 2024 15:13:32 +0000 Subject: [PATCH 1/6] [SYCL][SCLA] Add `sycl::aspect::ext_oneapi_private_alloca` Add aspect to check for `sycl_ext_oneapi_private_alloca` extension support. Only SPIR-V (OpenCL and Level Zero) devices have this aspect. In CodeGen, propagate this aspect when creating the `llvm.sycl.alloca.*` intrinsic. In `sycl-post-link`, do not assert on unsupported targets. Instead, assume the default value for the size specialization constant is used and handle the error at runtime. Signed-off-by: Victor Perez --- clang/lib/CodeGen/CGBuiltin.cpp | 17 +++++++- .../CodeGenSYCL/Inputs/private_alloca.hpp | 1 + clang/test/CodeGenSYCL/Inputs/sycl.hpp | 1 + clang/test/CodeGenSYCL/aspect_enum.cpp | 3 +- clang/test/CodeGenSYCL/builtin-alloca.cpp | 9 +++++ .../spec-constants/SYCL-alloca.ll | 20 ++++++++-- .../default-value/SYCL-alloca.ll | 32 +++++++++++++++ llvm/tools/sycl-post-link/SpecConstants.cpp | 28 +++++++------ sycl/include/sycl/device_aspect_macros.hpp | 10 +++++ .../sycl/ext/oneapi/experimental/alloca.hpp | 7 +++- sycl/include/sycl/info/aspects.def | 1 + sycl/source/detail/device_impl.cpp | 6 +++ .../exception_unsupported_backend.cpp | 40 +++++++++++++++++++ .../UnsupportedDevice/lit.local.cfg | 1 + .../Inputs/private_alloca_test.hpp | 3 +- .../PrivateAlloca/ValidUsage/lit.local.cfg | 1 + .../private_alloca_bool_size.cpp | 1 - .../private_alloca_decorated.cpp | 1 - .../private_alloca_legacy.cpp | 1 - .../private_alloca_multiple.cpp | 1 - .../{ => ValidUsage}/private_alloca_raw.cpp | 3 +- sycl/test-e2e/PrivateAlloca/device_query.cpp | 22 ++++++++++ .../PrivateAlloca/private_alloca_host.cpp | 3 +- .../private_alloca.cpp | 31 ++++++++++++++ 24 files changed, 216 insertions(+), 27 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll create mode 100644 sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp create mode 100644 sycl/test-e2e/PrivateAlloca/UnsupportedDevice/lit.local.cfg rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/Inputs/private_alloca_test.hpp (96%) create mode 100644 sycl/test-e2e/PrivateAlloca/ValidUsage/lit.local.cfg rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/private_alloca_bool_size.cpp (92%) rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/private_alloca_decorated.cpp (93%) rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/private_alloca_legacy.cpp (93%) rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/private_alloca_multiple.cpp (95%) rename sycl/test-e2e/PrivateAlloca/{ => ValidUsage}/private_alloca_raw.cpp (98%) create mode 100644 sycl/test-e2e/PrivateAlloca/device_query.cpp create mode 100644 sycl/test/optional_kernel_features/private_alloca.cpp diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9628c16eba6cf..449738b0c4c31 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23838,9 +23838,24 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, // an `alloca` or an equivalent construct in later compilation stages. IRBuilderBase::InsertPointGuard IPG(Builder); Builder.SetInsertPoint(AllocaInsertPt); - return Builder.CreateIntrinsic( + llvm::CallInst *CI = Builder.CreateIntrinsic( AllocaTy, Intrinsic::sycl_alloca, {UID, SpecConstPtr, RTBufferPtr, EltTyConst, Align}, nullptr, "alloca"); + + // Propagate function used aspects. + llvm::Function *F = CI->getCalledFunction(); + constexpr llvm::StringLiteral MDName = "sycl_used_aspects"; + if (!F->getMetadata(MDName)) { + auto *AspectAttr = FD->getAttr(); + assert(AspectAttr && AspectAttr->aspects_size() == 1 && + "Expecting a single aspect"); + llvm::APSInt AspectInt = + (*AspectAttr->aspects_begin())->EvaluateKnownConstInt(getContext()); + llvm::Constant *C = Builder.getInt32(static_cast(AspectInt.getZExtValue())); + llvm::Metadata *AspectMD = llvm::ConstantAsMetadata::get(C); + F->setMetadata(MDName, llvm::MDNode::get(Builder.getContext(), AspectMD)); + } + return CI; }(); // Perform AS cast if needed. diff --git a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp index 94eb6c007a01d..f1d486304e127 100644 --- a/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp +++ b/clang/test/CodeGenSYCL/Inputs/private_alloca.hpp @@ -10,6 +10,7 @@ namespace experimental { template __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) +[[__sycl_detail__::__uses_aspects__(sycl::aspect::ext_oneapi_private_alloca)]] multi_ptr private_alloca(kernel_handler &h); diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 9d05b0645778c..8b42f38c9b659 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -101,6 +101,7 @@ enum class __SYCL_TYPE(aspect) aspect { // #AspectEnum custom = 4, fp16 = 5, fp64 = 6, + ext_oneapi_private_alloca = 7, }; using access::target; diff --git a/clang/test/CodeGenSYCL/aspect_enum.cpp b/clang/test/CodeGenSYCL/aspect_enum.cpp index d2449d2fce25f..30a950b33cfa9 100644 --- a/clang/test/CodeGenSYCL/aspect_enum.cpp +++ b/clang/test/CodeGenSYCL/aspect_enum.cpp @@ -3,7 +3,7 @@ // Tests for IR of [[__sycl_detail__::sycl_type(aspect)]] enum. #include "sycl.hpp" -// CHECK: !sycl_aspects = !{![[HOST:[0-9]+]], ![[CPU:[0-9]+]], ![[GPU:[0-9]+]], ![[ACC:[0-9]+]], ![[CUSTOM:[0-9]+]], ![[FP16:[0-9]+]], ![[FP64:[0-9]+]]} +// CHECK: !sycl_aspects = !{![[HOST:[0-9]+]], ![[CPU:[0-9]+]], ![[GPU:[0-9]+]], ![[ACC:[0-9]+]], ![[CUSTOM:[0-9]+]], ![[FP16:[0-9]+]], ![[FP64:[0-9]+]], ![[PRIVATE_ALLOCA:[0-9]+]]} // CHECK: ![[HOST]] = !{!"host", i32 0} // CHECK: ![[CPU]] = !{!"cpu", i32 1} // CHECK: ![[GPU]] = !{!"gpu", i32 2} @@ -11,3 +11,4 @@ // CHECK: ![[CUSTOM]] = !{!"custom", i32 4} // CHECK: ![[FP16]] = !{!"fp16", i32 5} // CHECK: ![[FP64]] = !{!"fp64", i32 6} +// CHECK: ![[PRIVATE_ALLOCA]] = !{!"ext_oneapi_private_alloca", i32 7} diff --git a/clang/test/CodeGenSYCL/builtin-alloca.cpp b/clang/test/CodeGenSYCL/builtin-alloca.cpp index 30aa2df30f38c..10efb76d5552f 100644 --- a/clang/test/CodeGenSYCL/builtin-alloca.cpp +++ b/clang/test/CodeGenSYCL/builtin-alloca.cpp @@ -46,3 +46,12 @@ SYCL_EXTERNAL void test(sycl::kernel_handler &kh) { auto ptr1 = sycl::ext::oneapi::experimental::private_alloca(kh); auto ptr2 = sycl::ext::oneapi::experimental::private_alloca(kh); } + +// CHECK: declare !sycl_used_aspects ![[#USED_ASPECTS:]] ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64 + +// CHECK: declare !sycl_used_aspects ![[#USED_ASPECTS]] ptr @llvm.sycl.alloca.p0.p4.p4.p4.i32 + +// CHECK: declare !sycl_used_aspects ![[#USED_ASPECTS]] ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_struct.myStructs + +// CHECK-DAG: ![[#USED_ASPECTS]] = !{i32 [[#PRIVATE_ALLOCA_ASPECT:]]} +// CHECK-DAG: !{!"ext_oneapi_private_alloca", i32 [[#PRIVATE_ALLOCA_ASPECT]]} diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll index 0c5af50cadcf8..2bceb13b43843 100644 --- a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca.ll @@ -1,7 +1,10 @@ ; RUN: sycl-post-link -spec-const=native < %s -S -o %t.table -; RUN: FileCheck %s -check-prefixes=CHECK-RT < %t_0.ll +; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-RT < %t_0.ll ; RUN: FileCheck %s --check-prefixes=CHECK-PROPS < %t_0.prop +; RUN: sycl-post-link -spec-const=emulation < %s -S -o %t.table +; RUN: FileCheck %s -check-prefixes=CHECK,CHECK-EMULATION < %t_0.ll + ; This test checks that the post link tool is able to correctly transform ; SYCL alloca intrinsics in SPIR-V devices. @@ -10,9 +13,9 @@ %"class.sycl::_V1::specialization_id.1" = type { i16 } %my_range = type { ptr addrspace(4), ptr addrspace(4) } -@size_i64 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8 -@size_i32 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.0" { i32 120 }, align 4 -@size_i16 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.1" { i16 1 }, align 2 +@size_i64 = addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8 +@size_i32 = addrspace(1) constant %"class.sycl::_V1::specialization_id.0" { i32 120 }, align 4 +@size_i16 = addrspace(1) constant %"class.sycl::_V1::specialization_id.1" { i16 1 }, align 2 ; Check that the following globals are preserved: even though they are not used ; in the module anymore, they could still be referenced by debug info metadata @@ -30,10 +33,19 @@ define dso_local void @private_alloca() { ; CHECK-RT: [[LENGTH:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 1, i32 120) ; CHECK-RT: {{.*}} = alloca double, i32 [[LENGTH]], align 8 + +; CHECK-EMULATION: alloca double, i32 120, align 8 call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr @size_i32_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i32 to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) ; CHECK-RT: [[LENGTH:%.*]] = call i64 @_Z20__spirv_SpecConstantix(i32 0, i64 10) ; CHECK-RT: {{.*}} = alloca float, i64 [[LENGTH]], align 8 + +; CHECK-EMULATION: alloca float, i64 10, align 8 call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4) addrspacecast (ptr @size_i64_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i64 to ptr addrspace(4)), ptr addrspace(4) null, float 0.000000e+00, i64 8) + +; CHECK-RT: %[[LENGTH:.*]] = call i16 @_Z20__spirv_SpecConstantis(i32 2, i16 1) +; CHECK-RT: {{.*}} = alloca %my_range, i16 %[[LENGTH]], align 64 + +; CHECK-EMULATION: alloca %my_range, i16 1, align 64 call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4) addrspacecast (ptr @size_i16_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i16 to ptr addrspace(4)), ptr addrspace(4) null, %my_range zeroinitializer, i64 64) ret void } diff --git a/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll b/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll new file mode 100644 index 0000000000000..c1fa304fbf159 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/spec-constants/default-value/SYCL-alloca.ll @@ -0,0 +1,32 @@ +; RUN: sycl-post-link -split=auto -spec-const=native -S -o %t.table %s -generate-device-image-default-spec-consts +; RUN: FileCheck %s -input-file %t_1.ll --implicit-check-not="SpecConst" + +; This test checks that the post link tool is able to correctly transform +; SYCL alloca intrinsics in SPIR-V devices when using default values. + +%"class.sycl::_V1::specialization_id" = type { i64 } +%"class.sycl::_V1::specialization_id.0" = type { i32 } +%"class.sycl::_V1::specialization_id.1" = type { i16 } +%my_range = type { ptr addrspace(4), ptr addrspace(4) } + +@size_i64 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8 +@size_i32 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.0" { i32 120 }, align 4 +@size_i16 = internal addrspace(1) constant %"class.sycl::_V1::specialization_id.1" { i16 1 }, align 2 + +@size_i64_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i64EE\00", align 1 +@size_i32_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i32EE\00", align 1 +@size_i16_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i16EE\00", align 1 + +define dso_local void @private_alloca() { +; CHECK: alloca double, i32 120, align 8 + call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr @size_i32_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i32 to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) +; CHECK: alloca float, i64 10, align 8 + call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4) addrspacecast (ptr @size_i64_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i64 to ptr addrspace(4)), ptr addrspace(4) null, float 0.000000e+00, i64 8) +; CHECK: alloca %my_range, i16 1, align 64 + call ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4) addrspacecast (ptr @size_i16_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i16 to ptr addrspace(4)), ptr addrspace(4) null, %my_range zeroinitializer, i64 64) + ret void +} + +declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), float, i64) +declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), double, i64) +declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.s_my_range(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), %my_range, i64) diff --git a/llvm/tools/sycl-post-link/SpecConstants.cpp b/llvm/tools/sycl-post-link/SpecConstants.cpp index 34243a1e22163..8cd7412235de2 100644 --- a/llvm/tools/sycl-post-link/SpecConstants.cpp +++ b/llvm/tools/sycl-post-link/SpecConstants.cpp @@ -894,17 +894,6 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, // 3. Transform to spirv intrinsic _Z*__spirv_SpecConstant* or // _Z*__spirv_SpecConstantComposite Replacement = emitSpecConstantRecursive(SCTy, CI, IDs, DefaultValue); - if (IsSYCLAlloca) { - // In case this is a 'sycl.llvm.alloca' intrinsic, use the emitted - // specialization constant as the allocation size. - auto *Intr = cast(CI); - Value *ArraySize = Replacement; - assert(ArraySize->getType()->isIntegerTy() && - "Expecting integer type"); - Replacement = - new AllocaInst(Intr->getAllocatedType(), Intr->getAddressSpace(), - ArraySize, Intr->getAlign(), "alloca", CI); - } if (IsNewSpecConstant) { // emitSpecConstantRecursive might emit more than one spec constant // (because of composite types) and therefore, we need to adjust @@ -917,8 +906,6 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, M, SymID, SCTy, IDs, /* is native spec constant */ true); } } else if (Mode == HandlingMode::emulation) { - assert(!IsSYCLAlloca && "sycl_ext_oneapi_private_alloca not yet " - "supported in emulation mode"); // 2a. Spec constant will be passed as kernel argument; // Replace it with a load from the pointer to the specialization @@ -982,6 +969,21 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, generateSpecConstDefaultValueMetadata(DefaultValue)); } + if (IsSYCLAlloca) { + // In case this is a 'sycl.llvm.alloca' intrinsic, use the emitted + // specialization constant as the allocation size. + auto *Intr = cast(CI); + // For emulation mode, use the default value for now. This code should + // never be run, as the runtime should throw a 'kernel_not_supported' + // exception. + Value *ArraySize = + Mode == HandlingMode::emulation ? DefaultValue : Replacement; + assert(ArraySize->getType()->isIntegerTy() && "Expecting integer type"); + Replacement = + new AllocaInst(Intr->getAllocatedType(), Intr->getAddressSpace(), + ArraySize, Intr->getAlign(), "alloca", CI); + } + if (HasSretParameter) createStoreInstructionIntoSpecConstValue(CI->getArgOperand(0), Replacement, CI); diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index 93eb8947ee5b5..7a0b02bf143ce 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -328,6 +328,11 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_limited_graph__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_private_alloca__ +// __SYCL_ASPECT(ext_oneapi_private_alloca, 64) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_private_alloca__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -647,3 +652,8 @@ // __SYCL_ASPECT(ext_oneapi_limited_graph, 63) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_limited_graph__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_private_alloca__ +// __SYCL_ASPECT(ext_oneapi_private_alloca, 64) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_private_alloca__ 1 +#endif diff --git a/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp b/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp index c1c8537750b79..7314cb5807cec 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/alloca.hpp @@ -12,6 +12,10 @@ #include "sycl/kernel_handler.hpp" #include "sycl/multi_ptr.hpp" +#ifdef __SYCL_DEVICE_ONLY__ +#include "sycl/aspects.hpp" +#endif + namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { @@ -31,7 +35,8 @@ namespace ext::oneapi::experimental { template __SYCL_BUILTIN_ALIAS(__builtin_intel_sycl_alloca) -private_ptr private_alloca(kernel_handler &kh); +[[__sycl_detail__::__uses_aspects__(aspect::ext_oneapi_private_alloca)]] private_ptr< + ElementType, DecorateAddress> private_alloca(kernel_handler &kh); #else diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index bf8b3c020d4ad..37b7b6bbe7e57 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -58,3 +58,4 @@ __SYCL_ASPECT(ext_oneapi_is_component, 60) __SYCL_ASPECT(ext_oneapi_graph, 61) __SYCL_ASPECT(ext_intel_fpga_task_sequence, 62) __SYCL_ASPECT(ext_oneapi_limited_graph, 63) +__SYCL_ASPECT(ext_oneapi_private_alloca, 64) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 72f0fd1aa8cbd..4494e790507d3 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -667,6 +667,12 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_intel_fpga_task_sequence: { return is_accelerator(); } + case aspect::ext_oneapi_private_alloca: { + // Extension only supported on SPIR-V targets. + backend be = getBackend(); + return be == sycl::backend::ext_oneapi_level_zero || + be == sycl::backend::opencl; + } } throw runtime_error("This device aspect has not been implemented yet.", PI_ERROR_INVALID_DEVICE); diff --git a/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp new file mode 100644 index 0000000000000..5f350e2712be8 --- /dev/null +++ b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/exception_unsupported_backend.cpp @@ -0,0 +1,40 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Check that an exception with an exception with the `errc::invalid` error code +// thrown when trying to use `sycl_ext_oneapi_private_alloca` and no device +// supports the aspect. + +#include + +#include +#include +#include + +class Kernel; + +constexpr sycl::specialization_id Size(10); + +static std::error_code test() { + sycl::queue Queue; + sycl::buffer B(10); + + try { + Queue.submit([&](sycl::handler &Cgh) { + sycl::accessor Acc(B, Cgh, sycl::write_only, sycl::no_init); + Cgh.parallel_for(10, [=](sycl::id<1>, sycl::kernel_handler Kh) { + sycl::ext::oneapi::experimental::private_alloca< + int, Size, sycl::access::decorated::no>(Kh); + }); + }); + } catch (sycl::exception &Exception) { + return Exception.code(); + } + assert(false && "Exception not thrown"); +} + +int main() { + assert(test() == sycl::errc::invalid && "Unexpected error code"); + + return 0; +} diff --git a/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/lit.local.cfg b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/lit.local.cfg new file mode 100644 index 0000000000000..3107c2663ff27 --- /dev/null +++ b/sycl/test-e2e/PrivateAlloca/UnsupportedDevice/lit.local.cfg @@ -0,0 +1 @@ +config.required_features += ['!aspect-ext_oneapi_private_alloca'] diff --git a/sycl/test-e2e/PrivateAlloca/Inputs/private_alloca_test.hpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp similarity index 96% rename from sycl/test-e2e/PrivateAlloca/Inputs/private_alloca_test.hpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp index d3af55c41aa21..ebacb016adff5 100644 --- a/sycl/test-e2e/PrivateAlloca/Inputs/private_alloca_test.hpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/Inputs/private_alloca_test.hpp @@ -2,9 +2,10 @@ // Template for private alloca tests. -#include +#include #include +#include template diff --git a/sycl/test-e2e/PrivateAlloca/ValidUsage/lit.local.cfg b/sycl/test-e2e/PrivateAlloca/ValidUsage/lit.local.cfg new file mode 100644 index 0000000000000..8a9c07782b906 --- /dev/null +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/lit.local.cfg @@ -0,0 +1 @@ +config.required_features += ['aspect-ext_oneapi_private_alloca'] diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_bool_size.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp similarity index 92% rename from sycl/test-e2e/PrivateAlloca/private_alloca_bool_size.cpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp index 7c65f9684364a..e427307918f73 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_bool_size.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_bool_size.cpp @@ -1,6 +1,5 @@ // RUN: %{build} -w -o %t.out // RUN: echo 1 | %{run} %t.out -// UNSUPPORTED: cuda || hip // Test checking size of 'bool' type. This is not expected to be ever used, but, // as 'bool' is an integral type, it is a possible scenario. diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_decorated.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp similarity index 93% rename from sycl/test-e2e/PrivateAlloca/private_alloca_decorated.cpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp index c9a37d3bd47ec..4420a44626df8 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_decorated.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_decorated.cpp @@ -3,7 +3,6 @@ // RUN: echo 10 | %{run} %t.out // RUN: echo 20 | %{run} %t.out // RUN: echo 30 | %{run} %t.out -// UNSUPPORTED: cuda || hip // Simple test filling a SYCL private alloca and copying it back to an output // accessor using a decorated multi_ptr. diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_legacy.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp similarity index 93% rename from sycl/test-e2e/PrivateAlloca/private_alloca_legacy.cpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp index ba23db06d8df3..e22237ff58022 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_legacy.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_legacy.cpp @@ -3,7 +3,6 @@ // RUN: echo 10 | %{run} %t.out // RUN: echo 20 | %{run} %t.out // RUN: echo 30 | %{run} %t.out -// UNSUPPORTED: cuda || hip // Simple test filling a private alloca and copying it back to an output // accessor using a legacy multi_ptr. diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_multiple.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp similarity index 95% rename from sycl/test-e2e/PrivateAlloca/private_alloca_multiple.cpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp index 23453209ab67a..9663d1f215301 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_multiple.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_multiple.cpp @@ -1,6 +1,5 @@ // RUN: %{build} -w -o %t.out // RUN: echo 10 20 30 | %{run} %t.out -// UNSUPPORTED: cuda || hip // Chain of private_alloca test to check runtime support for compilation when // the default size is to be used. diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_raw.cpp b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp similarity index 98% rename from sycl/test-e2e/PrivateAlloca/private_alloca_raw.cpp rename to sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp index 4b92e1585f740..cc6eb7a57c86d 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_raw.cpp +++ b/sycl/test-e2e/PrivateAlloca/ValidUsage/private_alloca_raw.cpp @@ -3,13 +3,14 @@ // RUN: echo 10 | %{run} %t.out // RUN: echo 20 | %{run} %t.out // RUN: echo 30 | %{run} %t.out -// UNSUPPORTED: cuda || hip // Simple test filling a private alloca and copying it back to an output // accessor using a raw multi_ptr. This pointer checks struct allocation. #include "Inputs/private_alloca_test.hpp" +#include + constexpr sycl::specialization_id size(10); class value_and_sign { diff --git a/sycl/test-e2e/PrivateAlloca/device_query.cpp b/sycl/test-e2e/PrivateAlloca/device_query.cpp new file mode 100644 index 0000000000000..47890b82d9676 --- /dev/null +++ b/sycl/test-e2e/PrivateAlloca/device_query.cpp @@ -0,0 +1,22 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests the using device query for 'sycl_ext_oneapi_private_alloca' extension +// support, and that the return value matches expectations. + +#include + +int main() { + sycl::queue Queue; + + sycl::device Device = Queue.get_device(); + bool SupportsPrivateAlloca = + Device.has(sycl::aspect::ext_oneapi_private_alloca); + sycl::backend Backend = Device.get_backend(); + bool ShouldSupportPrivateAlloca = + Backend == sycl::backend::opencl || + Backend == sycl::backend::ext_oneapi_level_zero; + + assert(SupportsPrivateAlloca == ShouldSupportPrivateAlloca && + "Unexpected support value"); +} diff --git a/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp b/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp index aaeaf14e02e2d..5040bc2c4e3e5 100644 --- a/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp +++ b/sycl/test-e2e/PrivateAlloca/private_alloca_host.cpp @@ -4,9 +4,10 @@ // Simple test checking calling private_alloca on the host leads to an exception // being thrown. -#include +#include #include +#include constexpr sycl::specialization_id size(10); diff --git a/sycl/test/optional_kernel_features/private_alloca.cpp b/sycl/test/optional_kernel_features/private_alloca.cpp new file mode 100644 index 0000000000000..0bdc0121e2f80 --- /dev/null +++ b/sycl/test/optional_kernel_features/private_alloca.cpp @@ -0,0 +1,31 @@ +// RUN: %clangxx %s -fsycl -fsycl-device-only -S -emit-llvm -o - | FileCheck %s + +// Check the 'ext_oneapi_private_alloca' aspect is listed in the list of used +// aspects. + +#include + +#include + +class Kernel; + +// CHECK-LABEL: spir_kernel void @_ZTS6Kernel +// CHECK-SAME: !sycl_used_aspects ![[#USED_ASPECTS:]] + +// CHECK-DAG: ![[#USED_ASPECTS]] = !{i32 64} + +constexpr static sycl::specialization_id size(10); + +SYCL_EXTERNAL void foo(sycl::id<1> i, int *a, + sycl::decorated_private_ptr tmp); + +void test(sycl::queue q, sycl::range<1> r, int *a, int s) { + q.submit([&](sycl::handler &cgh) { + cgh.set_specialization_constant(s); + cgh.parallel_for(r, [=](sycl::id<1> i, sycl::kernel_handler kh) { + foo(i, a, + sycl::ext::oneapi::experimental::private_alloca< + int, size, sycl::access::decorated::yes>(kh)); + }); + }); +} From d52807838909f492a1786b15bf4d2ab612fbdfd4 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Wed, 27 Mar 2024 16:16:08 +0000 Subject: [PATCH 2/6] Format --- clang/lib/CodeGen/CGBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 449738b0c4c31..b0b7ff3059f28 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23851,7 +23851,8 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, "Expecting a single aspect"); llvm::APSInt AspectInt = (*AspectAttr->aspects_begin())->EvaluateKnownConstInt(getContext()); - llvm::Constant *C = Builder.getInt32(static_cast(AspectInt.getZExtValue())); + llvm::Constant *C = + Builder.getInt32(static_cast(AspectInt.getZExtValue())); llvm::Metadata *AspectMD = llvm::ConstantAsMetadata::get(C); F->setMetadata(MDName, llvm::MDNode::get(Builder.getContext(), AspectMD)); } From 25865608cd396b40ca9cfddcd392bb9b47c58a1f Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Thu, 28 Mar 2024 11:32:42 +0000 Subject: [PATCH 3/6] Add to DeviceConfigFile --- llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index d98572b4d7a7f..aaa55d3686ac5 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -72,6 +72,7 @@ def AspectExt_oneapi_is_component : Aspect<"ext_oneapi_is_component">; def AspectExt_oneapi_graph : Aspect<"ext_oneapi_graph">; def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">; +def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -123,7 +124,8 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_mipmap, AspectExt_oneapi_mipmap_anisotropy, AspectExt_oneapi_mipmap_level_reference, AspectExt_intel_esimd, AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component, - AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph], + AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph, + AspectExt_oneapi_private_alloca], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. From b1e9fffe0e5c68300b51a887b77691a5524ecea2 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Fri, 29 Mar 2024 09:20:09 +0000 Subject: [PATCH 4/6] Drop DAG --- sycl/test/optional_kernel_features/private_alloca.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/optional_kernel_features/private_alloca.cpp b/sycl/test/optional_kernel_features/private_alloca.cpp index 0bdc0121e2f80..041b9e95378c3 100644 --- a/sycl/test/optional_kernel_features/private_alloca.cpp +++ b/sycl/test/optional_kernel_features/private_alloca.cpp @@ -12,7 +12,7 @@ class Kernel; // CHECK-LABEL: spir_kernel void @_ZTS6Kernel // CHECK-SAME: !sycl_used_aspects ![[#USED_ASPECTS:]] -// CHECK-DAG: ![[#USED_ASPECTS]] = !{i32 64} +// CHECK: ![[#USED_ASPECTS]] = !{i32 64} constexpr static sycl::specialization_id size(10); From 02fff2b8c2c400195a97b115f83d547584eb2221 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Fri, 29 Mar 2024 10:02:36 +0000 Subject: [PATCH 5/6] Change to getIntegerValue --- clang/lib/CodeGen/CGBuiltin.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b0b7ff3059f28..34b77ba118a37 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -23851,8 +23851,8 @@ CodeGenFunction::EmitIntelSYCLAllocaBuiltin(const CallExpr *E, "Expecting a single aspect"); llvm::APSInt AspectInt = (*AspectAttr->aspects_begin())->EvaluateKnownConstInt(getContext()); - llvm::Constant *C = - Builder.getInt32(static_cast(AspectInt.getZExtValue())); + llvm::Type *I32Ty = Builder.getInt32Ty(); + llvm::Constant *C = llvm::Constant::getIntegerValue(I32Ty, AspectInt); llvm::Metadata *AspectMD = llvm::ConstantAsMetadata::get(C); F->setMetadata(MDName, llvm::MDNode::get(Builder.getContext(), AspectMD)); } From cb3022f891c0529f308f6f43db521c13fc8551a9 Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Wed, 3 Apr 2024 13:06:43 +0100 Subject: [PATCH 6/6] Handle SPIR AOT --- .../spec-constants/SYCL-alloca-error.ll | 24 +++++++++++++++++++ llvm/tools/sycl-post-link/SpecConstants.cpp | 7 ++++++ 2 files changed, 31 insertions(+) create mode 100644 llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll diff --git a/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll new file mode 100644 index 0000000000000..07f74450d0375 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/spec-constants/SYCL-alloca-error.ll @@ -0,0 +1,24 @@ +; RUN: sycl-post-link -spec-const=emulation %s 2>&1 | FileCheck %s + +; This test checks the `-spec-const` pass on SPIR-V targets and emulation mode, +; i.e., on AOT SPIR-V targets. In this scenario, 'llvm.sycl.alloca' intrinsics +; must be left unmodified. + +; Note that coming from clang this case should never be reached. + +; CHECK: sycl-post-link NOTE: no modifications to the input LLVM IR have been made + +target triple = "spir64_x86_64" + +%"class.sycl::_V1::specialization_id" = type { i64 } + +@size_i64 = addrspace(1) constant %"class.sycl::_V1::specialization_id" { i64 10 }, align 8 + +@size_i64_stable_name = private unnamed_addr constant [36 x i8] c"_ZTS14name_generatorIL_Z8size_i64EE\00", align 1 + +define dso_local void @private_alloca() { + call ptr @llvm.sycl.alloca.p0.p4.p4.p4.f64(ptr addrspace(4) addrspacecast (ptr @size_i64_stable_name to ptr addrspace(4)), ptr addrspace(4) addrspacecast (ptr addrspace(1) @size_i64 to ptr addrspace(4)), ptr addrspace(4) null, double 0.000000e+00, i64 8) + ret void +} + +declare ptr @llvm.sycl.alloca.p0.p4.p4.p4.f32(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4), float, i64) diff --git a/llvm/tools/sycl-post-link/SpecConstants.cpp b/llvm/tools/sycl-post-link/SpecConstants.cpp index 8cd7412235de2..3ef8d15d338bd 100644 --- a/llvm/tools/sycl-post-link/SpecConstants.cpp +++ b/llvm/tools/sycl-post-link/SpecConstants.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/TargetParser/Triple.h" #include @@ -815,12 +816,18 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, // intrinsic to find its calls and lower them depending on the HandlingMode. bool IRModified = false; LLVMContext &Ctx = M.getContext(); + bool IsSPIREmulated = + Triple(M.getTargetTriple()).isSPIR() && Mode == HandlingMode::emulation; for (Function &F : M) { if (!F.isDeclaration()) continue; const bool IsSYCLAlloca = F.getIntrinsicID() == Intrinsic::sycl_alloca; + // 'llvm.sycl.alloca' is not supported in emulation mode on SPIR-V targets. + if (IsSPIREmulated && IsSYCLAlloca) + continue; + if (!F.getName().starts_with(SYCL_GET_SCALAR_2020_SPEC_CONST_VAL) && !F.getName().starts_with(SYCL_GET_COMPOSITE_2020_SPEC_CONST_VAL) && !IsSYCLAlloca)