Skip to content

Commit 97d7eec

Browse files
Artem Gindinsonvmaksimo
Artem Gindinson
authored andcommitted
Translate the llvm.fshl intrinsic function
"Funnel shift left" doesn't have an analogue in the OpenCL ExtInst set. We unroll `llvm.fshl.i<n>(i<n>, i<n>, i<n>)` into a small algorithm that performs the actual funnel shift. A detailed description of FSHL can be found at https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic Signed-off-by: Artem Gindinson <[email protected]>
1 parent b8e0e3c commit 97d7eec

File tree

2 files changed

+161
-7
lines changed

2 files changed

+161
-7
lines changed

llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp

+98-7
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,20 @@ class SPIRVRegularizeLLVM : public ModulePass {
8585
/// @spirv.llvm_memset_* and replace it with @llvm.memset.
8686
void lowerMemset(MemSetInst *MSI);
8787

88+
/// No SPIR-V counterpart for @llvm.fshl.i* intrinsic. It will be lowered
89+
/// to a newly generated @spirv.llvm_fshl_i* function.
90+
/// Conceptually, FSHL:
91+
/// 1. concatenates the ints, the first one being the more significant;
92+
/// 2. performs a left shift-rotate on the resulting doubled-sized int;
93+
/// 3. returns the most significant bits of the shift-rotate result,
94+
/// the number of bits being equal to the size of the original integers.
95+
/// The actual implementation algorithm will be slightly different to speed
96+
/// things up.
97+
void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic);
98+
void buildFunnelShiftLeftFunc(Function *FSHLFunc);
99+
100+
static std::string lowerLLVMIntrinsicName(IntrinsicInst *II);
101+
88102
static char ID;
89103

90104
private:
@@ -94,17 +108,22 @@ class SPIRVRegularizeLLVM : public ModulePass {
94108

95109
char SPIRVRegularizeLLVM::ID = 0;
96110

97-
void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
98-
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
99-
return; // To be handled in LLVMToSPIRV::transIntrinsicInst
100-
Function *IntrinsicFunc = MSI->getCalledFunction();
111+
std::string SPIRVRegularizeLLVM::lowerLLVMIntrinsicName(IntrinsicInst *II) {
112+
Function *IntrinsicFunc = II->getCalledFunction();
101113
assert(IntrinsicFunc && "Missing function");
102114
std::string FuncName = IntrinsicFunc->getName().str();
103115
std::replace(FuncName.begin(), FuncName.end(), '.', '_');
104116
FuncName = "spirv." + FuncName;
117+
return FuncName;
118+
}
119+
120+
void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
121+
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
122+
return; // To be handled in LLVMToSPIRV::transIntrinsicInst
123+
124+
std::string FuncName = lowerLLVMIntrinsicName(MSI);
105125
if (MSI->isVolatile())
106126
FuncName += ".volatile";
107-
108127
// Redirect @llvm.memset.* call to @spirv.llvm_memset_*
109128
Function *F = M->getFunction(FuncName);
110129
if (F) {
@@ -137,6 +156,75 @@ void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
137156
return;
138157
}
139158

159+
void SPIRVRegularizeLLVM::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
160+
if (!FSHLFunc->empty())
161+
return;
162+
163+
auto *IntTy = dyn_cast<IntegerType>(FSHLFunc->getReturnType());
164+
assert(IntTy && "llvm.fshl: expected an integer return type");
165+
assert(FSHLFunc->arg_size() == 3 && "llvm.fshl: expected 3 arguments");
166+
for (Argument &Arg : FSHLFunc->args())
167+
assert(Arg.getType()->getTypeID() == IntTy->getTypeID() &&
168+
"llvm.fshl: mismatched return type and argument types");
169+
170+
// Our function will require 3 basic blocks; the purpose of each will be
171+
// clarified below.
172+
auto *CondBB = BasicBlock::Create(M->getContext(), "cond", FSHLFunc);
173+
auto *RotateBB =
174+
BasicBlock::Create(M->getContext(), "rotate", FSHLFunc); // Main logic
175+
auto *PhiBB = BasicBlock::Create(M->getContext(), "phi", FSHLFunc);
176+
177+
IRBuilder<> Builder(CondBB);
178+
// If the number of bits to rotate for is divisible by the bitsize,
179+
// the shift becomes useless, and we should bypass the main logic in that
180+
// case.
181+
unsigned BitWidth = IntTy->getIntegerBitWidth();
182+
ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth});
183+
auto *RotateModVal =
184+
Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthConstant);
185+
ConstantInt *ZeroConstant = Builder.getInt({BitWidth, 0});
186+
auto *CheckRotateModIfZero = Builder.CreateICmpEQ(RotateModVal, ZeroConstant);
187+
Builder.CreateCondBr(CheckRotateModIfZero, /*True*/ PhiBB,
188+
/*False*/ RotateBB);
189+
190+
// Build the actual funnel shift rotate logic.
191+
Builder.SetInsertPoint(RotateBB);
192+
// Shift the more significant number left, the "rotate" number of bits
193+
// will be 0-filled on the right as a result of this regular shift.
194+
auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal);
195+
// We want the "rotate" number of the second int's MSBs to occupy the
196+
// rightmost "0 space" left by the previous operation. Therefore,
197+
// subtract the "rotate" number from the integer bitsize...
198+
auto *SubRotateVal = Builder.CreateSub(BitWidthConstant, RotateModVal);
199+
// ...and right-shift the second int by this number, zero-filling the MSBs.
200+
auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal);
201+
// A simple binary addition of the shifted ints yields the final result.
202+
auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight);
203+
Builder.CreateBr(PhiBB);
204+
205+
// PHI basic block. If no actual rotate was required, return the first, more
206+
// significant int. E.g. for 32-bit integers, it's equivalent to concatenating
207+
// the 2 ints and taking 32 MSBs.
208+
Builder.SetInsertPoint(PhiBB);
209+
PHINode *Phi = Builder.CreatePHI(IntTy, 0);
210+
Phi->addIncoming(FunnelShiftRes, RotateBB);
211+
Phi->addIncoming(FSHLFunc->getArg(0), CondBB);
212+
Builder.CreateRet(Phi);
213+
}
214+
215+
void SPIRVRegularizeLLVM::lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic) {
216+
// Get a separate function - otherwise, we'd have to rework the CFG of the
217+
// current one. Then simply replace the intrinsic uses with a call to the new
218+
// function.
219+
FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType();
220+
Type *FSHLRetTy = FSHLFuncTy->getReturnType();
221+
const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic);
222+
Function *FSHLFunc =
223+
getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName);
224+
buildFunnelShiftLeftFunc(FSHLFunc);
225+
FSHLIntrinsic->setCalledFunction(FSHLFunc);
226+
}
227+
140228
bool SPIRVRegularizeLLVM::runOnModule(Module &Module) {
141229
M = &Module;
142230
Ctx = &M->getContext();
@@ -170,8 +258,11 @@ bool SPIRVRegularizeLLVM::regularize() {
170258
Function *CF = Call->getCalledFunction();
171259
if (CF && CF->isIntrinsic()) {
172260
removeFnAttr(Call, Attribute::NoUnwind);
173-
if (auto *MSI = dyn_cast<MemSetInst>(Call))
261+
auto *II = cast<IntrinsicInst>(Call);
262+
if (auto *MSI = dyn_cast<MemSetInst>(II))
174263
lowerMemset(MSI);
264+
else if (II->getIntrinsicID() == Intrinsic::fshl)
265+
lowerFunnelShiftLeft(II);
175266
}
176267
}
177268

@@ -254,7 +345,7 @@ bool SPIRVRegularizeLLVM::regularize() {
254345
}
255346
}
256347
for (Instruction *V : ToErase) {
257-
assert(V->user_empty());
348+
assert(V->user_empty() && "User non-empty\n");
258349
V->eraseFromParent();
259350
}
260351
}

llvm-spirv/test/llvm.fshl.ll

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
3+
; RUN: llvm-spirv %t.bc -o %t.spv
4+
; RUN: spirv-val %t.spv
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
7+
target triple = "spir64-unknown-unknown"
8+
9+
; Function Attrs: nounwind readnone
10+
define spir_func i32 @Test(i32 %x, i32 %y) local_unnamed_addr #0 {
11+
entry:
12+
%0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 8)
13+
ret i32 %0
14+
}
15+
16+
; CHECK: TypeInt [[TYPE_INT:[0-9]+]] 32 0
17+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_ROTATE:[0-9]+]] 8
18+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_TYPE_SIZE:[0-9]+]] 32
19+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_0:[0-9]+]] 0
20+
; CHECK: TypeFunction [[TYPE_ORIG_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]]
21+
; CHECK: TypeFunction [[TYPE_FSHL_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]]
22+
; CHECK: TypeBool [[TYPE_BOOL:[0-9]+]]
23+
24+
; CHECK: Function [[TYPE_INT]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC]]
25+
; CHECK: FunctionParameter [[TYPE_INT]] [[X:[0-9]+]]
26+
; CHECK: FunctionParameter [[TYPE_INT]] [[Y:[0-9]+]]
27+
; CHECK: FunctionCall [[TYPE_INT]] [[CALL:[0-9]+]] [[FSHL_FUNC:[0-9]+]] [[X]] [[Y]] [[CONST_ROTATE]]
28+
; CHECK: ReturnValue [[CALL]]
29+
30+
; CHECK: Function [[TYPE_INT]] [[FSHL_FUNC]] {{[0-9]+}} [[TYPE_FSHL_FUNC]]
31+
; CHECK: FunctionParameter [[TYPE_INT]] [[X_FSHL:[0-9]+]]
32+
; CHECK: FunctionParameter [[TYPE_INT]] [[Y_FSHL:[0-9]+]]
33+
; CHECK: FunctionParameter [[TYPE_INT]] [[ROT:[0-9]+]]
34+
35+
; CHECK: Label [[MAIN_BB:[0-9]+]]
36+
; CHECK: UMod [[TYPE_INT]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE]]
37+
; CHECK: IEqual [[TYPE_BOOL]] [[ZERO_COND:[0-9]+]] [[ROTATE_MOD_SIZE]] [[CONST_0]]
38+
; CHECK: BranchConditional [[ZERO_COND]] [[PHI_BB:[0-9]+]] [[ROTATE_BB:[0-9]+]]
39+
40+
; CHECK: Label [[ROTATE_BB]]
41+
; CHECK: ShiftLeftLogical [[TYPE_INT]] [[X_SHIFT_LEFT:[0-9]+]] [[X_FSHL]] [[ROTATE_MOD_SIZE]]
42+
; CHECK: ISub [[TYPE_INT]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE]] [[ROTATE_MOD_SIZE]]
43+
; CHECK: ShiftRightLogical [[TYPE_INT]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_FSHL]] [[NEG_ROTATE]]
44+
; CHECK: BitwiseOr [[TYPE_INT]] [[FSHL_RESULT:[0-9]+]] [[X_SHIFT_LEFT]] [[Y_SHIFT_RIGHT]]
45+
; CHECK: Branch [[PHI_BB]]
46+
47+
; CHECK: Label [[PHI_BB]]
48+
; CHECK: Phi [[TYPE_INT]] [[PHI_INST:[0-9]+]] [[FSHL_RESULT]] [[ROTATE_BB]] [[X_FSHL]] [[MAIN_BB]]
49+
; CHECK: ReturnValue [[PHI_INST]]
50+
51+
; Function Attrs: nounwind readnone speculatable willreturn
52+
declare i32 @llvm.fshl.i32(i32, i32, i32) #1
53+
54+
attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
55+
attributes #1 = { nounwind readnone speculatable willreturn }
56+
57+
!llvm.module.flags = !{!0}
58+
!opencl.ocl.version = !{!1}
59+
!opencl.spir.version = !{!2}
60+
61+
!0 = !{i32 1, !"wchar_size", i32 4}
62+
!1 = !{i32 1, i32 0}
63+
!2 = !{i32 1, i32 2}

0 commit comments

Comments
 (0)