Skip to content

Commit 33d2a24

Browse files
jayfoadGeorgeARM
authored andcommitted
[AMDGPU] Check for nonnull loads feeding addrspacecast (llvm#138184)
Handle nonnull loads just like nonnull arguments when checking for addrspacecasts that are known never null.
1 parent 9cd6a6e commit 33d2a24

File tree

2 files changed

+125
-4
lines changed

2 files changed

+125
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2130,6 +2130,11 @@ static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL,
21302130
if (const auto *Arg = dyn_cast<Argument>(V); Arg && Arg->hasNonNullAttr())
21312131
return true;
21322132

2133+
// Check nonnull loads.
2134+
if (const auto *Load = dyn_cast<LoadInst>(V);
2135+
Load && Load->hasMetadata(LLVMContext::MD_nonnull))
2136+
return true;
2137+
21332138
// getUnderlyingObject may have looked through another addrspacecast, although
21342139
// the optimizable situations most likely folded out by now.
21352140
if (AS != cast<PointerType>(V->getType())->getAddressSpace())

llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll

Lines changed: 120 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,122 @@ define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) {
8888
ret void
8989
}
9090

91+
define void @local_to_flat_nonnull_load(ptr %p) {
92+
; OPT-LABEL: define void @local_to_flat_nonnull_load(
93+
; OPT-SAME: ptr [[P:%.*]]) {
94+
; OPT-NEXT: [[PTR:%.*]] = load ptr addrspace(3), ptr [[P]], align 4, !nonnull [[META0:![0-9]+]]
95+
; OPT-NEXT: [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
96+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
97+
; OPT-NEXT: ret void
98+
;
99+
; ASM-LABEL: local_to_flat_nonnull_load:
100+
; ASM: ; %bb.0:
101+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; ASM-NEXT: flat_load_dword v0, v[0:1]
103+
; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base
104+
; ASM-NEXT: v_mov_b32_e32 v1, s5
105+
; ASM-NEXT: v_mov_b32_e32 v2, 7
106+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
107+
; ASM-NEXT: flat_store_dword v[0:1], v2
108+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109+
; ASM-NEXT: s_setpc_b64 s[30:31]
110+
%ptr = load ptr addrspace(3), ptr %p, !nonnull !{}
111+
%x = addrspacecast ptr addrspace(3) %ptr to ptr
112+
store volatile i32 7, ptr %x
113+
ret void
114+
}
115+
116+
define void @private_to_flat_nonnull_load(ptr %p) {
117+
; OPT-LABEL: define void @private_to_flat_nonnull_load(
118+
; OPT-SAME: ptr [[P:%.*]]) {
119+
; OPT-NEXT: [[PTR:%.*]] = load ptr addrspace(5), ptr [[P]], align 4, !nonnull [[META0]]
120+
; OPT-NEXT: [[X:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]])
121+
; OPT-NEXT: store volatile i32 7, ptr [[X]], align 4
122+
; OPT-NEXT: ret void
123+
;
124+
; ASM-LABEL: private_to_flat_nonnull_load:
125+
; ASM: ; %bb.0:
126+
; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+
; ASM-NEXT: flat_load_dword v0, v[0:1]
128+
; ASM-NEXT: s_mov_b64 s[4:5], src_private_base
129+
; ASM-NEXT: v_mov_b32_e32 v1, s5
130+
; ASM-NEXT: v_mov_b32_e32 v2, 7
131+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
132+
; ASM-NEXT: flat_store_dword v[0:1], v2
133+
; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
134+
; ASM-NEXT: s_setpc_b64 s[30:31]
135+
%ptr = load ptr addrspace(5), ptr %p, !nonnull !{}
136+
%x = addrspacecast ptr addrspace(5) %ptr to ptr
137+
store volatile i32 7, ptr %x
138+
ret void
139+
}
140+
141+
define void @flat_to_local_nonnull_load(ptr %p) {
142+
; OPT-LABEL: define void @flat_to_local_nonnull_load(
143+
; OPT-SAME: ptr [[P:%.*]]) {
144+
; OPT-NEXT: [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
145+
; OPT-NEXT: [[X:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]])
146+
; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[X]], align 4
147+
; OPT-NEXT: ret void
148+
;
149+
; DAGISEL-ASM-LABEL: flat_to_local_nonnull_load:
150+
; DAGISEL-ASM: ; %bb.0:
151+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+
; DAGISEL-ASM-NEXT: flat_load_dword v0, v[0:1]
153+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
154+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
155+
; DAGISEL-ASM-NEXT: ds_write_b32 v0, v1
156+
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
157+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
158+
;
159+
; GISEL-ASM-LABEL: flat_to_local_nonnull_load:
160+
; GISEL-ASM: ; %bb.0:
161+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162+
; GISEL-ASM-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
163+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
164+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
165+
; GISEL-ASM-NEXT: ds_write_b32 v0, v1
166+
; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
167+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
168+
%ptr = load ptr, ptr %p, !nonnull !{}
169+
%x = addrspacecast ptr %ptr to ptr addrspace(3)
170+
store volatile i32 7, ptr addrspace(3) %x
171+
ret void
172+
}
173+
174+
define void @flat_to_private_nonnull_load(ptr %p) {
175+
; OPT-LABEL: define void @flat_to_private_nonnull_load(
176+
; OPT-SAME: ptr [[P:%.*]]) {
177+
; OPT-NEXT: [[PTR:%.*]] = load ptr, ptr [[P]], align 8, !nonnull [[META0]]
178+
; OPT-NEXT: [[X:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]])
179+
; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[X]], align 4
180+
; OPT-NEXT: ret void
181+
;
182+
; DAGISEL-ASM-LABEL: flat_to_private_nonnull_load:
183+
; DAGISEL-ASM: ; %bb.0:
184+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; DAGISEL-ASM-NEXT: flat_load_dword v0, v[0:1]
186+
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
187+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
188+
; DAGISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
189+
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
190+
; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31]
191+
;
192+
; GISEL-ASM-LABEL: flat_to_private_nonnull_load:
193+
; GISEL-ASM: ; %bb.0:
194+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195+
; GISEL-ASM-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
196+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
197+
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 7
198+
; GISEL-ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
199+
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
200+
; GISEL-ASM-NEXT: s_setpc_b64 s[30:31]
201+
%ptr = load ptr, ptr %p, !nonnull !{}
202+
%x = addrspacecast ptr %ptr to ptr addrspace(5)
203+
store volatile i32 7, ptr addrspace(5) %x
204+
ret void
205+
}
206+
91207
define void @private_alloca_to_flat(ptr %ptr) {
92208
; OPT-LABEL: define void @private_alloca_to_flat(
93209
; OPT-SAME: ptr [[PTR:%.*]]) {
@@ -218,15 +334,15 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
218334
; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0
219335
; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base
220336
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
221-
; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally
337+
; DAGISEL-ASM-NEXT: .LBB11_3: ; %finally
222338
; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
223339
; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7]
224340
; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
225341
; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
226342
; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2
227343
; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0)
228344
; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5]
229-
; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
345+
; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB11_3
230346
; DAGISEL-ASM-NEXT: ; %bb.4: ; %end
231347
; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5]
232348
; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)
@@ -249,14 +365,14 @@ define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) {
249365
; GISEL-ASM-NEXT: s_mov_b64 s[6:7], 0
250366
; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s9
251367
; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7
252-
; GISEL-ASM-NEXT: .LBB7_3: ; %finally
368+
; GISEL-ASM-NEXT: .LBB11_3: ; %finally
253369
; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1
254370
; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5]
255371
; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
256372
; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2
257373
; GISEL-ASM-NEXT: s_waitcnt vmcnt(0)
258374
; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7]
259-
; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3
375+
; GISEL-ASM-NEXT: s_cbranch_execnz .LBB11_3
260376
; GISEL-ASM-NEXT: ; %bb.4: ; %end
261377
; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7]
262378
; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0)

0 commit comments

Comments
 (0)