Skip to content

Commit eb4b933

Browse files
authored
fix work_group_scrach_memory (#16325)
Fixes #16285, local testing was done using arc only initially and didn't exibit any of the seen behaviour elsewhere. Tested locally with OpenCL CPU and OpenCL gpu (PVC). also partially addresses #16072 --------- Signed-off-by: Victor Lomuller <[email protected]>
1 parent 83fe1c1 commit eb4b933

File tree

6 files changed

+12
-11
lines changed

6 files changed

+12
-11
lines changed

llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ static bool dynamicWGLocalMemory(Module &M) {
205205
GlobalVariable::NotThreadLocal, // ThreadLocalMode
206206
LocalAS // AddressSpace
207207
);
208+
LocalMemArrayGV->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local);
208209
constexpr int DefaultMaxAlignment = 128;
209210
if (!TT.isSPIROrSPIRV())
210211
LocalMemArrayGV->setAlignment(Align{DefaultMaxAlignment});

llvm/test/SYCLLowerIR/work_group_static.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
77
target triple = "spir64-unknown-unknown"
88

9-
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr addrspace(3) global ptr addrspace(3) undef
9+
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr local_unnamed_addr addrspace(3) global ptr addrspace(3) undef
1010

1111
; Function Attrs: convergent norecurse
1212
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]

llvm/test/SYCLLowerIR/work_group_static_nv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
target triple = "nvptx64-nvidia-cuda"
77

8-
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external addrspace(3) global [0 x i8], align 128
8+
; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external local_unnamed_addr addrspace(3) global [0 x i8], align 128
99

1010
; Function Attrs: convergent norecurse
1111
; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0)

sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_local_accessor.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: %{run} %t.out
33
//
44

5-
// UNSUPPORTED: gpu-intel-gen12, cpu
5+
// UNSUPPORTED: gpu-intel-gen12
66
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
77

88
// Test work_group_dynamic extension with allocation size specified at runtime
@@ -35,8 +35,8 @@ int main() {
3535
sycl_ext::properties properties{static_size};
3636
auto LocalAccessor =
3737
sycl::local_accessor<int>(WgSize * RepeatWG * sizeof(int), Cgh);
38-
Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
39-
[=](nd_item<1> Item) {
38+
Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
39+
properties, [=](nd_item<1> Item) {
4040
int *Ptr = reinterpret_cast<int *>(
4141
sycl_ext::get_work_group_scratch_memory());
4242
size_t GroupOffset =

sycl/test-e2e/WorkGroupScratchMemory/dynamic_alloc_ptr_alias.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: %{run} %t.out
33
//
44

5-
// UNSUPPORTED: gpu-intel-gen12, cpu
5+
// UNSUPPORTED: gpu-intel-gen12
66
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
77

88
// Test work_group_dynamic extension with allocation size specified at runtime
@@ -33,8 +33,8 @@ int main() {
3333
sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG *
3434
sizeof(int));
3535
sycl_ext::properties properties{static_size};
36-
Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
37-
[=](nd_item<1> Item) {
36+
Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
37+
properties, [=](nd_item<1> Item) {
3838
int *Ptr = reinterpret_cast<int *>(
3939
sycl_ext::get_work_group_scratch_memory());
4040
size_t GroupOffset =

sycl/test-e2e/WorkGroupScratchMemory/dynamic_allocation.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// RUN: %{run} %t.out
33
//
44

5-
// UNSUPPORTED: gpu-intel-gen12, cpu
5+
// UNSUPPORTED: gpu-intel-gen12
66
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
77

88
// Test work_group_dynamic extension with allocation size specified at runtime.
@@ -32,8 +32,8 @@ int main() {
3232
sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG *
3333
sizeof(int));
3434
sycl_ext::properties properties{static_size};
35-
Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
36-
[=](nd_item<1> Item) {
35+
Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
36+
properties, [=](nd_item<1> Item) {
3737
int *Ptr = reinterpret_cast<int *>(
3838
sycl_ext::get_work_group_scratch_memory());
3939
size_t GroupOffset =

0 commit comments

Comments
 (0)