Skip to content

Commit ddb75ca

Browse files
authored
[AMDGPU] Utilities to asan instrument memory instructions. (llvm#98863)
This change adds the utilities required to asan instrument memory instructions. In "amdgpu-sw-lower-lds" pass llvm#87265, during lowering from LDS to global memory, new instructions in global memory would be created which need to be asan instrumented.
1 parent 455990d commit ddb75ca

File tree

3 files changed

+393
-0
lines changed

3 files changed

+393
-0
lines changed
+332
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===-------------------------------------------------------------===//
8+
9+
#include "AMDGPUAsanInstrumentation.h"
10+
11+
#define DEBUG_TYPE "amdgpu-asan-instrumentation"
12+
13+
using namespace llvm;
14+
15+
namespace llvm {
16+
namespace AMDGPU {
17+
18+
static uint64_t getRedzoneSizeForScale(int AsanScale) {
19+
// Redzone used for stack and globals is at least 32 bytes.
20+
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
21+
return std::max(32U, 1U << AsanScale);
22+
}
23+
24+
static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
25+
return getRedzoneSizeForScale(AsanScale);
26+
}
27+
28+
uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
29+
constexpr uint64_t kMaxRZ = 1 << 18;
30+
const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);
31+
32+
uint64_t RZ = 0;
33+
if (SizeInBytes <= MinRZ / 2) {
34+
// Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
35+
// at least 32 bytes, optimize when SizeInBytes is less than or equal to
36+
// half of MinRZ.
37+
RZ = MinRZ - SizeInBytes;
38+
} else {
39+
// Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
40+
RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
41+
42+
// Round up to multiple of MinRZ.
43+
if (SizeInBytes % MinRZ)
44+
RZ += MinRZ - (SizeInBytes % MinRZ);
45+
}
46+
47+
assert((RZ + SizeInBytes) % MinRZ == 0);
48+
49+
return RZ;
50+
}
51+
52+
static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
53+
size_t Res = llvm::countr_zero(TypeSize / 8);
54+
return Res;
55+
}
56+
57+
static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
58+
Value *Cond, bool Recover) {
59+
Value *ReportCond = Cond;
60+
if (!Recover) {
61+
auto *Ballot =
62+
IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
63+
ReportCond = IRB.CreateIsNotNull(Ballot);
64+
}
65+
66+
auto *Trm = SplitBlockAndInsertIfThen(
67+
ReportCond, &*IRB.GetInsertPoint(), false,
68+
MDBuilder(M.getContext()).createUnlikelyBranchWeights());
69+
Trm->getParent()->setName("asan.report");
70+
71+
if (Recover)
72+
return Trm;
73+
74+
Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
75+
IRB.SetInsertPoint(Trm);
76+
return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {}, {});
77+
}
78+
79+
static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
80+
Value *AddrLong, Value *ShadowValue,
81+
uint32_t TypeStoreSize, int AsanScale) {
82+
uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
83+
// Addr & (Granularity - 1)
84+
Value *LastAccessedByte =
85+
IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
86+
// (Addr & (Granularity - 1)) + size - 1
87+
if (TypeStoreSize / 8 > 1)
88+
LastAccessedByte = IRB.CreateAdd(
89+
LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
90+
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
91+
LastAccessedByte =
92+
IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
93+
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
94+
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
95+
}
96+
97+
static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
98+
Type *IntptrTy, Instruction *InsertBefore,
99+
Value *Addr, bool IsWrite,
100+
size_t AccessSizeIndex,
101+
Value *SizeArgument, bool Recover) {
102+
IRB.SetInsertPoint(InsertBefore);
103+
CallInst *Call = nullptr;
104+
SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
105+
SmallString<64> TypeStr{IsWrite ? "store" : "load"};
106+
SmallString<64> EndingStr{Recover ? "_noabort" : ""};
107+
108+
SmallString<128> AsanErrorCallbackSizedString;
109+
raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
110+
AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
111+
<< EndingStr;
112+
113+
SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
114+
AttributeList AL2;
115+
FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
116+
AsanErrorCallbackSizedOS.str(),
117+
FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
118+
SmallVector<Type *, 2> Args1{1, IntptrTy};
119+
AttributeList AL1;
120+
121+
SmallString<128> AsanErrorCallbackString;
122+
raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
123+
AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
124+
<< (1ULL << AccessSizeIndex) << EndingStr;
125+
126+
FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
127+
AsanErrorCallbackOS.str(),
128+
FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
129+
if (SizeArgument) {
130+
Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
131+
} else {
132+
Call = IRB.CreateCall(AsanErrorCallback, Addr);
133+
}
134+
135+
Call->setCannotMerge();
136+
return Call;
137+
}
138+
139+
static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
140+
Value *Shadow, int AsanScale, uint32_t AsanOffset) {
141+
// Shadow >> scale
142+
Shadow = IRB.CreateLShr(Shadow, AsanScale);
143+
if (AsanOffset == 0)
144+
return Shadow;
145+
// (Shadow >> scale) | offset
146+
Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
147+
return IRB.CreateAdd(Shadow, ShadowBase);
148+
}
149+
150+
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
151+
Instruction *InsertBefore, Value *Addr,
152+
MaybeAlign Alignment, uint32_t TypeStoreSize,
153+
bool IsWrite, Value *SizeArgument, bool UseCalls,
154+
bool Recover, int AsanScale, int AsanOffset) {
155+
Type *AddrTy = Addr->getType();
156+
Type *IntptrTy = M.getDataLayout().getIntPtrType(
157+
M.getContext(), AddrTy->getPointerAddressSpace());
158+
IRB.SetInsertPoint(InsertBefore);
159+
size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
160+
Type *ShadowTy = IntegerType::get(M.getContext(),
161+
std::max(8U, TypeStoreSize >> AsanScale));
162+
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
163+
Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
164+
Value *ShadowPtr =
165+
memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
166+
const uint64_t ShadowAlign =
167+
std::max<uint64_t>(Alignment.valueOrOne().value() >> AsanScale, 1);
168+
Value *ShadowValue = IRB.CreateAlignedLoad(
169+
ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
170+
Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
171+
auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
172+
TypeStoreSize, AsanScale);
173+
Cmp = IRB.CreateAnd(Cmp, Cmp2);
174+
Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
175+
Instruction *Crash =
176+
generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
177+
AccessSizeIndex, SizeArgument, Recover);
178+
Crash->setDebugLoc(OrigIns->getDebugLoc());
179+
return;
180+
}
181+
182+
void getInterestingMemoryOperands(
183+
Module &M, Instruction *I,
184+
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
185+
const DataLayout &DL = M.getDataLayout();
186+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
187+
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
188+
LI->getType(), LI->getAlign());
189+
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
190+
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
191+
SI->getValueOperand()->getType(), SI->getAlign());
192+
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
193+
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
194+
RMW->getValOperand()->getType(), std::nullopt);
195+
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
196+
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
197+
XCHG->getCompareOperand()->getType(),
198+
std::nullopt);
199+
} else if (auto CI = dyn_cast<CallInst>(I)) {
200+
switch (CI->getIntrinsicID()) {
201+
case Intrinsic::masked_load:
202+
case Intrinsic::masked_store:
203+
case Intrinsic::masked_gather:
204+
case Intrinsic::masked_scatter: {
205+
bool IsWrite = CI->getType()->isVoidTy();
206+
// Masked store has an initial operand for the value.
207+
unsigned OpOffset = IsWrite ? 1 : 0;
208+
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
209+
MaybeAlign Alignment = Align(1);
210+
// Otherwise no alignment guarantees. We probably got Undef.
211+
if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
212+
Alignment = Op->getMaybeAlignValue();
213+
Value *Mask = CI->getOperand(2 + OpOffset);
214+
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
215+
break;
216+
}
217+
case Intrinsic::masked_expandload:
218+
case Intrinsic::masked_compressstore: {
219+
bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
220+
unsigned OpOffset = IsWrite ? 1 : 0;
221+
auto BasePtr = CI->getOperand(OpOffset);
222+
MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
223+
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
224+
IRBuilder<> IB(I);
225+
Value *Mask = CI->getOperand(1 + OpOffset);
226+
Type *IntptrTy = M.getDataLayout().getIntPtrType(
227+
M.getContext(), BasePtr->getType()->getPointerAddressSpace());
228+
// Use the popcount of Mask as the effective vector length.
229+
Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
230+
Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
231+
Value *EVL = IB.CreateAddReduce(ExtMask);
232+
Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
233+
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
234+
EVL);
235+
break;
236+
}
237+
case Intrinsic::vp_load:
238+
case Intrinsic::vp_store:
239+
case Intrinsic::experimental_vp_strided_load:
240+
case Intrinsic::experimental_vp_strided_store: {
241+
auto *VPI = cast<VPIntrinsic>(CI);
242+
unsigned IID = CI->getIntrinsicID();
243+
bool IsWrite = CI->getType()->isVoidTy();
244+
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
245+
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
246+
MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
247+
Value *Stride = nullptr;
248+
if (IID == Intrinsic::experimental_vp_strided_store ||
249+
IID == Intrinsic::experimental_vp_strided_load) {
250+
Stride = VPI->getOperand(PtrOpNo + 1);
251+
// Use the pointer alignment as the element alignment if the stride is a
252+
// mutiple of the pointer alignment. Otherwise, the element alignment
253+
// should be Align(1).
254+
unsigned PointerAlign = Alignment.valueOrOne().value();
255+
if (!isa<ConstantInt>(Stride) ||
256+
cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
257+
Alignment = Align(1);
258+
}
259+
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
260+
VPI->getMaskParam(), VPI->getVectorLengthParam(),
261+
Stride);
262+
break;
263+
}
264+
case Intrinsic::vp_gather:
265+
case Intrinsic::vp_scatter: {
266+
auto *VPI = cast<VPIntrinsic>(CI);
267+
unsigned IID = CI->getIntrinsicID();
268+
bool IsWrite = IID == Intrinsic::vp_scatter;
269+
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
270+
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
271+
MaybeAlign Alignment = VPI->getPointerAlignment();
272+
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
273+
VPI->getMaskParam(),
274+
VPI->getVectorLengthParam());
275+
break;
276+
}
277+
case Intrinsic::amdgcn_raw_buffer_load:
278+
case Intrinsic::amdgcn_raw_ptr_buffer_load:
279+
case Intrinsic::amdgcn_raw_buffer_load_format:
280+
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
281+
case Intrinsic::amdgcn_raw_tbuffer_load:
282+
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
283+
case Intrinsic::amdgcn_struct_buffer_load:
284+
case Intrinsic::amdgcn_struct_ptr_buffer_load:
285+
case Intrinsic::amdgcn_struct_buffer_load_format:
286+
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
287+
case Intrinsic::amdgcn_struct_tbuffer_load:
288+
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
289+
case Intrinsic::amdgcn_s_buffer_load:
290+
case Intrinsic::amdgcn_global_load_tr_b64:
291+
case Intrinsic::amdgcn_global_load_tr_b128: {
292+
unsigned PtrOpNo = 0;
293+
bool IsWrite = false;
294+
Type *Ty = CI->getType();
295+
Value *Ptr = CI->getArgOperand(PtrOpNo);
296+
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
297+
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
298+
break;
299+
}
300+
case Intrinsic::amdgcn_raw_tbuffer_store:
301+
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
302+
case Intrinsic::amdgcn_raw_buffer_store:
303+
case Intrinsic::amdgcn_raw_ptr_buffer_store:
304+
case Intrinsic::amdgcn_raw_buffer_store_format:
305+
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
306+
case Intrinsic::amdgcn_struct_buffer_store:
307+
case Intrinsic::amdgcn_struct_ptr_buffer_store:
308+
case Intrinsic::amdgcn_struct_buffer_store_format:
309+
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
310+
case Intrinsic::amdgcn_struct_tbuffer_store:
311+
case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
312+
unsigned PtrOpNo = 1;
313+
bool IsWrite = true;
314+
Value *Ptr = CI->getArgOperand(PtrOpNo);
315+
Type *Ty = Ptr->getType();
316+
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
317+
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
318+
break;
319+
}
320+
default:
321+
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
322+
if (Type *Ty = CI->getParamByRefType(ArgNo)) {
323+
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
324+
} else if (Type *Ty = CI->getParamByValType(ArgNo)) {
325+
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
326+
}
327+
}
328+
}
329+
}
330+
}
331+
} // end namespace AMDGPU
332+
} // end namespace llvm
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
//===AMDGPUAsanInstrumentation.h - ASAN helper functions -*- C++- *===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===--------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
10+
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
11+
12+
#include "AMDGPU.h"
13+
#include "AMDGPUBaseInfo.h"
14+
#include "AMDGPUMemoryUtils.h"
15+
#include "llvm/ADT/SetOperations.h"
16+
#include "llvm/ADT/StringExtras.h"
17+
#include "llvm/ADT/StringMap.h"
18+
#include "llvm/ADT/StringRef.h"
19+
#include "llvm/IR/Constants.h"
20+
#include "llvm/IR/DerivedTypes.h"
21+
#include "llvm/IR/IRBuilder.h"
22+
#include "llvm/IR/Instructions.h"
23+
#include "llvm/IR/IntrinsicsAMDGPU.h"
24+
#include "llvm/IR/MDBuilder.h"
25+
#include "llvm/InitializePasses.h"
26+
#include "llvm/Pass.h"
27+
#include "llvm/Support/CommandLine.h"
28+
#include "llvm/Support/Debug.h"
29+
#include "llvm/Support/OptimizedStructLayout.h"
30+
#include "llvm/Support/raw_ostream.h"
31+
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
32+
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
33+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
34+
#include "llvm/Transforms/Utils/ModuleUtils.h"
35+
36+
namespace llvm {
37+
namespace AMDGPU {
38+
39+
/// Given SizeInBytes of the Value to be instrunmented,
40+
/// Returns the redzone size corresponding to it.
41+
uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes);
42+
43+
/// Instrument the memory operand Addr.
44+
/// Generates report blocks that catch the addressing errors.
45+
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
46+
Instruction *InsertBefore, Value *Addr,
47+
MaybeAlign Alignment, uint32_t TypeStoreSize,
48+
bool IsWrite, Value *SizeArgument, bool UseCalls,
49+
bool Recover, int Scale, int Offset);
50+
51+
/// Get all the memory operands from the instruction
52+
/// that needs to be instrumented
53+
void getInterestingMemoryOperands(
54+
Module &M, Instruction *I,
55+
SmallVectorImpl<InterestingMemoryOperand> &Interesting);
56+
57+
} // end namespace AMDGPU
58+
} // end namespace llvm
59+
60+
#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H

Diff for: llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_llvm_component_library(LLVMAMDGPUUtils
2+
AMDGPUAsanInstrumentation.cpp
23
AMDGPUAsmUtils.cpp
34
AMDGPUBaseInfo.cpp
45
AMDGPUDelayedMCExpr.cpp

0 commit comments

Comments
 (0)