Skip to content

Commit 437d5c8

Browse files
committed
[SYCL] Add command line option for local accessor to shared mem pass
1 parent 82dbbdb commit 437d5c8

18 files changed

+69
-26
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5787,6 +5787,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57875787
CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
57885788
}
57895789

5790+
// Enable local accessor to shared memory pass for SYCL.
5791+
if (isa<BackendJobAction>(JA) && IsSYCL) {
5792+
CmdArgs.push_back("-mllvm");
5793+
CmdArgs.push_back("-sycl-enable-local-accessor");
5794+
}
57905795
// These two are potentially updated by AddClangCLArgs.
57915796
codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
57925797
bool EmitCodeView = false;

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,12 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
7878
const llvm::opt::ArgList &Args) const {
7979
// Construct lld command.
8080
// The output from ld.lld is an HSA code object file.
81-
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
82-
"-plugin-opt=-amdgpu-internalize-symbols"};
81+
ArgStringList LldArgs{"-flavor",
82+
"gnu",
83+
"--no-undefined",
84+
"-shared",
85+
"-plugin-opt=-amdgpu-internalize-symbols",
86+
"-plugin-opt=-sycl-enable-local-accessor"};
8387

8488
auto &TC = getToolChain();
8589
auto &D = TC.getDriver();

llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,20 @@
1818
#include "llvm/IR/GlobalValue.h"
1919
#include "llvm/IR/Instructions.h"
2020
#include "llvm/IR/PassManager.h"
21+
#include "llvm/Support/CommandLine.h"
2122
#include "llvm/Transforms/IPO.h"
2223

2324
using namespace llvm;
2425

2526
#define DEBUG_TYPE "localaccessortosharedmemory"
2627

28+
static bool EnableLocalAccessor;
29+
30+
static cl::opt<bool, true> EnableLocalAccessorFlag(
31+
"sycl-enable-local-accessor", cl::Hidden,
32+
cl::desc("Enable local accessor to shared memory optimisation."),
33+
cl::location(EnableLocalAccessor), cl::init(false));
34+
2735
namespace llvm {
2836
void initializeLocalAccessorToSharedMemoryPass(PassRegistry &);
2937
} // namespace llvm
@@ -48,6 +56,9 @@ class LocalAccessorToSharedMemory : public ModulePass {
4856
LocalAccessorToSharedMemory() : ModulePass(ID) {}
4957

5058
bool runOnModule(Module &M) override {
59+
if (!EnableLocalAccessor)
60+
return false;
61+
5162
auto AT = StringSwitch<ArchType>(M.getTargetTriple().c_str())
5263
.Case("nvptx64-nvidia-cuda", ArchType::Cuda)
5364
.Case("nvptx-nvidia-cuda", ArchType::Cuda)
@@ -57,7 +68,9 @@ class LocalAccessorToSharedMemory : public ModulePass {
5768
// Invariant: This pass is only intended to operate on SYCL kernels being
5869
// compiled to either `nvptx{,64}-nvidia-cuda`, or `amdgcn-amd-amdhsa`
5970
// triples.
60-
assert(AT != ArchType::Unsupported && "Only AMGHSA or CUDA supported.");
71+
if (ArchType::Unsupported == AT)
72+
return false;
73+
6174
if (skipModule(M))
6275
return false;
6376

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
; GCN-O0-NEXT: Expand vector predication intrinsics
5151
; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics
5252
; GCN-O0-NEXT: Expand reduction intrinsics
53+
; GCN-O0-NEXT: SYCL Local Accessor to Shared Memory
5354
; GCN-O0-NEXT: AMDGPU Attributor
5455
; GCN-O0-NEXT: CallGraph Construction
5556
; GCN-O0-NEXT: Call Graph SCC Pass Manager
@@ -216,6 +217,7 @@
216217
; GCN-O1-NEXT: Expand vector predication intrinsics
217218
; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics
218219
; GCN-O1-NEXT: Expand reduction intrinsics
220+
; GCN-O1-NEXT: SYCL Local Accessor to Shared Memory
219221
; GCN-O1-NEXT: AMDGPU Attributor
220222
; GCN-O1-NEXT: CallGraph Construction
221223
; GCN-O1-NEXT: Call Graph SCC Pass Manager
@@ -487,6 +489,7 @@
487489
; GCN-O1-OPTS-NEXT: Scalarize Masked Memory Intrinsics
488490
; GCN-O1-OPTS-NEXT: Expand reduction intrinsics
489491
; GCN-O1-OPTS-NEXT: Early CSE
492+
; GCN-O1-OPTS-NEXT: SYCL Local Accessor to Shared Memory
490493
; GCN-O1-OPTS-NEXT: AMDGPU Attributor
491494
; GCN-O1-OPTS-NEXT: CallGraph Construction
492495
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
@@ -772,6 +775,7 @@
772775
; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics
773776
; GCN-O2-NEXT: Expand reduction intrinsics
774777
; GCN-O2-NEXT: Early CSE
778+
; GCN-O2-NEXT: SYCL Local Accessor to Shared Memory
775779
; GCN-O2-NEXT: AMDGPU Attributor
776780
; GCN-O2-NEXT: CallGraph Construction
777781
; GCN-O2-NEXT: Call Graph SCC Pass Manager
@@ -1072,6 +1076,7 @@
10721076
; GCN-O3-NEXT: Lazy Block Frequency Analysis
10731077
; GCN-O3-NEXT: Optimization Remark Emitter
10741078
; GCN-O3-NEXT: Global Value Numbering
1079+
; GCN-O3-NEXT: SYCL Local Accessor to Shared Memory
10751080
; GCN-O3-NEXT: AMDGPU Attributor
10761081
; GCN-O3-NEXT: CallGraph Construction
10771082
; GCN-O3-NEXT: Call Graph SCC Pass Manager

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-basic-transformation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'basic-transformation.bc'
33
source_filename = "basic-transformation.ll"
44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-invalid-triple.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; This test checks that the Local Accessor to Shared Memory pass does not run with the
22
; `amdgcn-amd-amdpal` triple.
3-
; RUN: llc -mtriple=amdgcn-amd-amdpal < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -sycl-enable-local-accessor < %s | FileCheck %s
44

55
; ModuleID = 'local-accessor-to-shared-memory-invalid-triple.ll'
66
source_filename = "local-accessor-to-shared-memory-invalid-triple.ll"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-multiple-functions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'multiple-functions.bc'
33
source_filename = "multiple-functions.ll"
44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-no-entry-points.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'no-entry-points.bc'
33
source_filename = "no-entry-points.ll"
44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-preserves-types.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'bitcasts.bc'
33
source_filename = "bitcasts.ll"
44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; This test checks that the Local Accessor to Shared Memory pass runs with the
22
; `amdgcn-amd-amdhsa` triple, but not with `amdgcn-amd-amdpas`.
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-VALID %s
4-
; RUN: llc -mtriple=amdgcn-amd-amdpal < %s | FileCheck --check-prefix=CHECK-INVALID %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-VALID %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdpal -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-INVALID %s
55

66
; ModuleID = 'local-accessor-to-shared-memory-valid-triple.ll'
77
source_filename = "local-accessor-to-shared-memory-valid-triple.ll"

llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
11
; This test checks that the Local Accessor to Shared Memory pass runs with the
2-
; `amdgcn-amd-amdhsa` triple.
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s
2+
; `amdgcn-amd-amdhsa` triple and does nto if the option is not present.
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-OPT %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -sycl-enable-local-accessor=true < %s | FileCheck --check-prefix=CHECK-OPT %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-NO-OPT %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -sycl-enable-local-accessor=false < %s | FileCheck --check-prefix=CHECK-NO-OPT %s
47

58
; ModuleID = 'local-accessor-to-shared-memory-valid-triple.ll'
69
source_filename = "local-accessor-to-shared-memory-valid-triple.ll"
710
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
811
target triple = "amdgcn-amd-amdhsa"
912

10-
; CHECK: .globl _ZTS14example_kernel
11-
; CHECK: - .args:
12-
; CHECK-NOT: .address_space: local
13-
; CHECK-NEXT: .offset: 0
14-
; CHECK-NEXT: .size: 4
13+
; CHECK-OPT: .globl _ZTS14example_kernel
14+
; CHECK-OPT: - .args:
15+
; CHECK-OPT-NOT: .address_space: local
16+
; CHECK-OPT-NEXT: .offset: 0
17+
; CHECK-OPT-NEXT: .size: 4
18+
; CHECK-OPT-NEXT: .value_kind: by_value
19+
; CHECK-NO-OPT: .globl _ZTS14example_kernel
20+
; CHECK-NO-OPT: - .args:
21+
; CHECK-NO-OPT-NEXT: .address_space: local
22+
; CHECK-NO-OPT-NEXT: .name: a
23+
; CHECK-NO-OPT-NEXT: .offset: 0
24+
; CHECK-NO-OPT-NEXT: .pointee_align: 4
25+
; CHECK-NO-OPT-NEXT: .size: 4
26+
; CHECK-NO-OPT-NEXT: .value_kind: dynamic_shared_pointer
1527
; Function Attrs: noinline
1628
define amdgpu_kernel void @_ZTS14example_kernel(i32 addrspace(3)* %a) {
1729
entry:

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-basic-transformation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'basic-transformation.bc'
33
source_filename = "basic-transformation.ll"
44
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-invalid-triple.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; This test checks that the Local Accessor to Shared Memory pass does not run with the
22
; `nvptx64-nvidia-nvcl` triple.
3-
; RUN: llc -march=nvptx64 -mcpu=sm_20 < %s | FileCheck %s
3+
; RUN: llc -march=nvptx64 -mcpu=sm_20 -sycl-enable-local-accessor < %s | FileCheck %s
44
; CHECK: .param .u64 .ptr .shared .align 4 _ZTS14example_kernel_param_0
55

66
; ModuleID = 'local-accessor-to-shared-memory-invalid-triple.ll'

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-functions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'multiple-functions.bc'
33
source_filename = "multiple-functions.ll"
44
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-no-entry-points.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'no-entry-points.bc'
33
source_filename = "no-entry-points.ll"
44
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-preserves-types.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory %s -S -o - | FileCheck %s
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
22
; ModuleID = 'bitcasts.bc'
33
source_filename = "bitcasts.ll"
44
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-triple.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; This test checks that the Local Accessor to Shared Memory pass runs with the
22
; `nvptx64-nvidia-cuda` triple.
3-
; RUN: llc -mtriple=nvptx64-nvidia-cuda < %s | FileCheck --check-prefix=CHECK-VALID %s
4-
; RUN: llc -mtriple=nvptx64-nvidia-nvcl < %s | FileCheck --check-prefix=CHECK-INVALID %s
3+
; RUN: llc -mtriple=nvptx64-nvidia-cuda -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-VALID %s
4+
; RUN: llc -mtriple=nvptx64-nvidia-nvcl -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-INVALID %s
55
; CHECK-VALID: .param .u32 _ZTS14example_kernel_param_0
66
; CHECK-INVALID: .param .u64 .ptr .shared .align 4 _ZTS14example_kernel_param_0
77

llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-valid-triple.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
; This test checks that the Local Accessor to Shared Memory pass runs with the
22
; `nvptx64-nvidia-cuda` triple.
3-
; RUN: llc -march=nvptx64 -mcpu=sm_20 < %s | FileCheck %s
4-
; CHECK: .param .u32 _ZTS14example_kernel_param_0
3+
; RUN: llc -march=nvptx64 -mcpu=sm_20 -sycl-enable-local-accessor < %s | FileCheck --check-prefix=CHECK-OPT %s
4+
; RUN: llc -march=nvptx64 -mcpu=sm_20 -sycl-enable-local-accessor=true < %s | FileCheck --check-prefix=CHECK-OPT %s
5+
; RUN: llc -march=nvptx64 -mcpu=sm_20 < %s | FileCheck --check-prefix=CHECK-NO-OPT %s
6+
; RUN: llc -march=nvptx64 -mcpu=sm_20 -sycl-enable-local-accessor=false < %s | FileCheck --check-prefix=CHECK-NO-OPT %s
7+
; CHECK-OPT: .param .u32 _ZTS14example_kernel_param_0
8+
; CHECK-NO-OPT-NOT: .param .u32 _ZTS14example_kernel_param_0
59

610
; ModuleID = 'local-accessor-to-shared-memory-valid-triple.ll'
711
source_filename = "local-accessor-to-shared-memory-valid-triple.ll"

0 commit comments

Comments
 (0)