Skip to content

Commit 97d7eec

Browse files
Artem Gindinsonvmaksimo
authored andcommitted
Translate the llvm.fshl intrinsic function
"Funnel shift left" doesn't have an analogue in the OpenCL ExtInst set. We unroll `llvm.fshl.i<n>(i<n>, i<n>, i<n>)` into a small algorithm that performs the actual funnel shift. A detailed description of FSHL can be found at https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic Signed-off-by: Artem Gindinson <artem.gindinson@intel.com>
1 parent b8e0e3c commit 97d7eec

File tree

2 files changed

+161
-7
lines changed

2 files changed

+161
-7
lines changed

llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp

Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,20 @@ class SPIRVRegularizeLLVM : public ModulePass {
8585
/// @spirv.llvm_memset_* and replace it with @llvm.memset.
8686
void lowerMemset(MemSetInst *MSI);
8787

88+
/// No SPIR-V counterpart for @llvm.fshl.i* intrinsic. It will be lowered
89+
/// to a newly generated @spirv.llvm_fshl_i* function.
90+
/// Conceptually, FSHL:
91+
/// 1. concatenates the ints, the first one being the more significant;
92+
/// 2. performs a left shift-rotate on the resulting doubled-sized int;
93+
/// 3. returns the most significant bits of the shift-rotate result,
94+
/// the number of bits being equal to the size of the original integers.
95+
/// The actual implementation algorithm will be slightly different to speed
96+
/// things up.
97+
void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic);
98+
void buildFunnelShiftLeftFunc(Function *FSHLFunc);
99+
100+
static std::string lowerLLVMIntrinsicName(IntrinsicInst *II);
101+
88102
static char ID;
89103

90104
private:
@@ -94,17 +108,22 @@ class SPIRVRegularizeLLVM : public ModulePass {
94108

95109
char SPIRVRegularizeLLVM::ID = 0;
96110

97-
void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
98-
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
99-
return; // To be handled in LLVMToSPIRV::transIntrinsicInst
100-
Function *IntrinsicFunc = MSI->getCalledFunction();
111+
std::string SPIRVRegularizeLLVM::lowerLLVMIntrinsicName(IntrinsicInst *II) {
112+
Function *IntrinsicFunc = II->getCalledFunction();
101113
assert(IntrinsicFunc && "Missing function");
102114
std::string FuncName = IntrinsicFunc->getName().str();
103115
std::replace(FuncName.begin(), FuncName.end(), '.', '_');
104116
FuncName = "spirv." + FuncName;
117+
return FuncName;
118+
}
119+
120+
void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
121+
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
122+
return; // To be handled in LLVMToSPIRV::transIntrinsicInst
123+
124+
std::string FuncName = lowerLLVMIntrinsicName(MSI);
105125
if (MSI->isVolatile())
106126
FuncName += ".volatile";
107-
108127
// Redirect @llvm.memset.* call to @spirv.llvm_memset_*
109128
Function *F = M->getFunction(FuncName);
110129
if (F) {
@@ -137,6 +156,75 @@ void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
137156
return;
138157
}
139158

159+
void SPIRVRegularizeLLVM::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
160+
if (!FSHLFunc->empty())
161+
return;
162+
163+
auto *IntTy = dyn_cast<IntegerType>(FSHLFunc->getReturnType());
164+
assert(IntTy && "llvm.fshl: expected an integer return type");
165+
assert(FSHLFunc->arg_size() == 3 && "llvm.fshl: expected 3 arguments");
166+
for (Argument &Arg : FSHLFunc->args())
167+
assert(Arg.getType()->getTypeID() == IntTy->getTypeID() &&
168+
"llvm.fshl: mismatched return type and argument types");
169+
170+
// Our function will require 3 basic blocks; the purpose of each will be
171+
// clarified below.
172+
auto *CondBB = BasicBlock::Create(M->getContext(), "cond", FSHLFunc);
173+
auto *RotateBB =
174+
BasicBlock::Create(M->getContext(), "rotate", FSHLFunc); // Main logic
175+
auto *PhiBB = BasicBlock::Create(M->getContext(), "phi", FSHLFunc);
176+
177+
IRBuilder<> Builder(CondBB);
178+
// If the number of bits to rotate for is divisible by the bitsize,
179+
// the shift becomes useless, and we should bypass the main logic in that
180+
// case.
181+
unsigned BitWidth = IntTy->getIntegerBitWidth();
182+
ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth});
183+
auto *RotateModVal =
184+
Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthConstant);
185+
ConstantInt *ZeroConstant = Builder.getInt({BitWidth, 0});
186+
auto *CheckRotateModIfZero = Builder.CreateICmpEQ(RotateModVal, ZeroConstant);
187+
Builder.CreateCondBr(CheckRotateModIfZero, /*True*/ PhiBB,
188+
/*False*/ RotateBB);
189+
190+
// Build the actual funnel shift rotate logic.
191+
Builder.SetInsertPoint(RotateBB);
192+
// Shift the more significant number left, the "rotate" number of bits
193+
// will be 0-filled on the right as a result of this regular shift.
194+
auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal);
195+
// We want the "rotate" number of the second int's MSBs to occupy the
196+
// rightmost "0 space" left by the previous operation. Therefore,
197+
// subtract the "rotate" number from the integer bitsize...
198+
auto *SubRotateVal = Builder.CreateSub(BitWidthConstant, RotateModVal);
199+
// ...and right-shift the second int by this number, zero-filling the MSBs.
200+
auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal);
201+
// A simple binary addition of the shifted ints yields the final result.
202+
auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight);
203+
Builder.CreateBr(PhiBB);
204+
205+
// PHI basic block. If no actual rotate was required, return the first, more
206+
// significant int. E.g. for 32-bit integers, it's equivalent to concatenating
207+
// the 2 ints and taking 32 MSBs.
208+
Builder.SetInsertPoint(PhiBB);
209+
PHINode *Phi = Builder.CreatePHI(IntTy, 0);
210+
Phi->addIncoming(FunnelShiftRes, RotateBB);
211+
Phi->addIncoming(FSHLFunc->getArg(0), CondBB);
212+
Builder.CreateRet(Phi);
213+
}
214+
215+
void SPIRVRegularizeLLVM::lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic) {
216+
// Get a separate function - otherwise, we'd have to rework the CFG of the
217+
// current one. Then simply replace the intrinsic uses with a call to the new
218+
// function.
219+
FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType();
220+
Type *FSHLRetTy = FSHLFuncTy->getReturnType();
221+
const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic);
222+
Function *FSHLFunc =
223+
getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName);
224+
buildFunnelShiftLeftFunc(FSHLFunc);
225+
FSHLIntrinsic->setCalledFunction(FSHLFunc);
226+
}
227+
140228
bool SPIRVRegularizeLLVM::runOnModule(Module &Module) {
141229
M = &Module;
142230
Ctx = &M->getContext();
@@ -170,8 +258,11 @@ bool SPIRVRegularizeLLVM::regularize() {
170258
Function *CF = Call->getCalledFunction();
171259
if (CF && CF->isIntrinsic()) {
172260
removeFnAttr(Call, Attribute::NoUnwind);
173-
if (auto *MSI = dyn_cast<MemSetInst>(Call))
261+
auto *II = cast<IntrinsicInst>(Call);
262+
if (auto *MSI = dyn_cast<MemSetInst>(II))
174263
lowerMemset(MSI);
264+
else if (II->getIntrinsicID() == Intrinsic::fshl)
265+
lowerFunnelShiftLeft(II);
175266
}
176267
}
177268

@@ -254,7 +345,7 @@ bool SPIRVRegularizeLLVM::regularize() {
254345
}
255346
}
256347
for (Instruction *V : ToErase) {
257-
assert(V->user_empty());
348+
assert(V->user_empty() && "User non-empty\n");
258349
V->eraseFromParent();
259350
}
260351
}

llvm-spirv/test/llvm.fshl.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
3+
; RUN: llvm-spirv %t.bc -o %t.spv
4+
; RUN: spirv-val %t.spv
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
7+
target triple = "spir64-unknown-unknown"
8+
9+
; Function Attrs: nounwind readnone
10+
define spir_func i32 @Test(i32 %x, i32 %y) local_unnamed_addr #0 {
11+
entry:
12+
%0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 8)
13+
ret i32 %0
14+
}
15+
16+
; CHECK: TypeInt [[TYPE_INT:[0-9]+]] 32 0
17+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_ROTATE:[0-9]+]] 8
18+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_TYPE_SIZE:[0-9]+]] 32
19+
; CHECK-DAG: Constant [[TYPE_INT]] [[CONST_0:[0-9]+]] 0
20+
; CHECK: TypeFunction [[TYPE_ORIG_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]]
21+
; CHECK: TypeFunction [[TYPE_FSHL_FUNC:[0-9]+]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]] [[TYPE_INT]]
22+
; CHECK: TypeBool [[TYPE_BOOL:[0-9]+]]
23+
24+
; CHECK: Function [[TYPE_INT]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC]]
25+
; CHECK: FunctionParameter [[TYPE_INT]] [[X:[0-9]+]]
26+
; CHECK: FunctionParameter [[TYPE_INT]] [[Y:[0-9]+]]
27+
; CHECK: FunctionCall [[TYPE_INT]] [[CALL:[0-9]+]] [[FSHL_FUNC:[0-9]+]] [[X]] [[Y]] [[CONST_ROTATE]]
28+
; CHECK: ReturnValue [[CALL]]
29+
30+
; CHECK: Function [[TYPE_INT]] [[FSHL_FUNC]] {{[0-9]+}} [[TYPE_FSHL_FUNC]]
31+
; CHECK: FunctionParameter [[TYPE_INT]] [[X_FSHL:[0-9]+]]
32+
; CHECK: FunctionParameter [[TYPE_INT]] [[Y_FSHL:[0-9]+]]
33+
; CHECK: FunctionParameter [[TYPE_INT]] [[ROT:[0-9]+]]
34+
35+
; CHECK: Label [[MAIN_BB:[0-9]+]]
36+
; CHECK: UMod [[TYPE_INT]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE]]
37+
; CHECK: IEqual [[TYPE_BOOL]] [[ZERO_COND:[0-9]+]] [[ROTATE_MOD_SIZE]] [[CONST_0]]
38+
; CHECK: BranchConditional [[ZERO_COND]] [[PHI_BB:[0-9]+]] [[ROTATE_BB:[0-9]+]]
39+
40+
; CHECK: Label [[ROTATE_BB]]
41+
; CHECK: ShiftLeftLogical [[TYPE_INT]] [[X_SHIFT_LEFT:[0-9]+]] [[X_FSHL]] [[ROTATE_MOD_SIZE]]
42+
; CHECK: ISub [[TYPE_INT]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE]] [[ROTATE_MOD_SIZE]]
43+
; CHECK: ShiftRightLogical [[TYPE_INT]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_FSHL]] [[NEG_ROTATE]]
44+
; CHECK: BitwiseOr [[TYPE_INT]] [[FSHL_RESULT:[0-9]+]] [[X_SHIFT_LEFT]] [[Y_SHIFT_RIGHT]]
45+
; CHECK: Branch [[PHI_BB]]
46+
47+
; CHECK: Label [[PHI_BB]]
48+
; CHECK: Phi [[TYPE_INT]] [[PHI_INST:[0-9]+]] [[FSHL_RESULT]] [[ROTATE_BB]] [[X_FSHL]] [[MAIN_BB]]
49+
; CHECK: ReturnValue [[PHI_INST]]
50+
51+
; Function Attrs: nounwind readnone speculatable willreturn
52+
declare i32 @llvm.fshl.i32(i32, i32, i32) #1
53+
54+
attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
55+
attributes #1 = { nounwind readnone speculatable willreturn }
56+
57+
!llvm.module.flags = !{!0}
58+
!opencl.ocl.version = !{!1}
59+
!opencl.spir.version = !{!2}
60+
61+
!0 = !{i32 1, !"wchar_size", i32 4}
62+
!1 = !{i32 1, i32 0}
63+
!2 = !{i32 1, i32 2}

0 commit comments

Comments
 (0)