Skip to content

Commit 0f0079c

Browse files
[X86][GlobalISel] Added support for llvm.get.rounding (#147716)
- This implementation is adapted from SDAG X86TargetLowering::LowerGET_ROUNDING. - llvm.set.rounding will be added later because it involves MXCSR updates currently unsupported.
1 parent eee723f commit 0f0079c

File tree

12 files changed

+423
-137
lines changed

12 files changed

+423
-137
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2424,6 +2424,11 @@ class LLVM_ABI MachineIRBuilder {
24242424
return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
24252425
}
24262426

2427+
/// Build and insert \p Dst = G_GET_ROUNDING
2428+
MachineInstrBuilder buildGetRounding(const DstOp &Dst) {
2429+
return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {});
2430+
}
2431+
24272432
virtual MachineInstrBuilder
24282433
buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps,
24292434
std::optional<unsigned> Flags = std::nullopt);

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE)
744744
HANDLE_TARGET_OPCODE(G_SET_FPMODE)
745745
HANDLE_TARGET_OPCODE(G_RESET_FPMODE)
746746

747+
HANDLE_TARGET_OPCODE(G_GET_ROUNDING)
748+
747749
/// Generic pointer offset
748750
HANDLE_TARGET_OPCODE(G_PTR_ADD)
749751

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,12 @@ def G_READSTEADYCOUNTER : GenericInstruction {
12671267
let hasSideEffects = true;
12681268
}
12691269

1270+
def G_GET_ROUNDING : GenericInstruction {
1271+
let OutOperandList = (outs type0:$dst);
1272+
let InOperandList = (ins);
1273+
let hasSideEffects = true;
1274+
}
1275+
12701276
//------------------------------------------------------------------------------
12711277
// Memory ops
12721278
//------------------------------------------------------------------------------

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
25932593
case Intrinsic::reset_fpmode:
25942594
MIRBuilder.buildResetFPMode();
25952595
return true;
2596+
case Intrinsic::get_rounding:
2597+
MIRBuilder.buildGetRounding(getOrCreateVReg(CI));
2598+
return true;
25962599
case Intrinsic::vscale: {
25972600
MIRBuilder.buildVScale(getOrCreateVReg(CI), 1);
25982601
return true;

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
1818
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
1919
#include "llvm/CodeGen/MachineConstantPool.h"
20+
#include "llvm/CodeGen/MachineFrameInfo.h"
2021
#include "llvm/CodeGen/TargetOpcodes.h"
2122
#include "llvm/CodeGen/ValueTypes.h"
2223
#include "llvm/IR/DerivedTypes.h"
@@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
108109
.legalFor(HasSSE2 || UseX87, {s64})
109110
.legalFor(UseX87, {s80});
110111

112+
getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32});
113+
111114
// merge/unmerge
112115
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
113116
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
@@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
611614
return legalizeSITOFP(MI, MRI, Helper);
612615
case TargetOpcode::G_FPTOSI:
613616
return legalizeFPTOSI(MI, MRI, Helper);
617+
case TargetOpcode::G_GET_ROUNDING:
618+
return legalizeGETROUNDING(MI, MRI, Helper);
614619
}
615620
llvm_unreachable("expected switch to return");
616621
}
@@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
777782
return true;
778783
}
779784

785+
bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
786+
MachineRegisterInfo &MRI,
787+
LegalizerHelper &Helper) const {
788+
/*
789+
The rounding mode is in bits 11:10 of FPSR, and has the following
790+
settings:
791+
00 Round to nearest
792+
01 Round to -inf
793+
10 Round to +inf
794+
11 Round to 0
795+
796+
GET_ROUNDING, on the other hand, expects the following:
797+
-1 Undefined
798+
0 Round to 0
799+
1 Round to nearest
800+
2 Round to +inf
801+
3 Round to -inf
802+
803+
To perform the conversion, we use a packed lookup table of the four 2-bit
804+
values that we can index by FPSP[11:10]
805+
0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
806+
807+
(0x2d >> ((FPSR >> 9) & 6)) & 3
808+
*/
809+
810+
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
811+
MachineFunction &MF = MIRBuilder.getMF();
812+
Register Dst = MI.getOperand(0).getReg();
813+
LLT DstTy = MRI.getType(Dst);
814+
const LLT s8 = LLT::scalar(8);
815+
const LLT s16 = LLT::scalar(16);
816+
const LLT s32 = LLT::scalar(32);
817+
818+
// Save FP Control Word to stack slot
819+
int MemSize = 2;
820+
Align Alignment = Align(2);
821+
MachinePointerInfo PtrInfo;
822+
auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
823+
Alignment, PtrInfo);
824+
Register StackPtr = StackTemp.getReg(0);
825+
826+
auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
827+
MemSize, Alignment);
828+
829+
// Store FP Control Word to stack slot using G_FNSTCW16
830+
MIRBuilder.buildInstr(X86::G_FNSTCW16)
831+
.addUse(StackPtr)
832+
.addMemOperand(StoreMMO);
833+
834+
// Load FP Control Word from stack slot
835+
auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
836+
MemSize, Alignment);
837+
838+
auto CWD32 =
839+
MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO));
840+
auto Shifted8 = MIRBuilder.buildTrunc(
841+
s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9)));
842+
auto Masked32 = MIRBuilder.buildZExt(
843+
s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6)));
844+
845+
// LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
846+
// mode (from bits 11:10 of the control word) to the values expected by
847+
// GET_ROUNDING. The mapping is performed by shifting LUT right by the
848+
// extracted rounding mode and masking the result with 3 to obtain the final
849+
auto LUT = MIRBuilder.buildConstant(s32, 0x2d);
850+
auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32);
851+
auto RetVal =
852+
MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3));
853+
auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);
854+
855+
MIRBuilder.buildCopy(Dst, RetValTrunc);
856+
857+
MI.eraseFromParent();
858+
return true;
859+
}
860+
780861
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
781862
MachineInstr &MI) const {
782863
return true;

llvm/lib/Target/X86/GISel/X86LegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo {
5454

5555
bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
5656
LegalizerHelper &Helper) const;
57+
58+
bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
59+
LegalizerHelper &Helper) const;
5760
};
5861
} // namespace llvm
5962
#endif

llvm/lib/Target/X86/X86InstrGISel.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction {
2727
let mayStore = true;
2828
}
2929

30+
def G_FNSTCW16 : X86GenericInstruction {
31+
let OutOperandList = (outs);
32+
let InOperandList = (ins ptype0:$dst);
33+
let hasSideEffects = true;
34+
let mayStore = true;
35+
}
36+
3037
def : GINodeEquiv<G_FILD, X86fild>;
3138
def : GINodeEquiv<G_FIST, X86fp_to_mem>;
39+
def : GINodeEquiv<G_FNSTCW16, X86fp_cwd_get16>;

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,9 @@
636636
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
637637
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
638638
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
639+
# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices
640+
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
641+
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
639642
# DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
640643
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
641644
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK

llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,9 @@
624624
# DEBUG-NEXT: G_RESET_FPMODE (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
625625
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
626626
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
627+
# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices
628+
# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
629+
# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
627630
# DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
628631
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
629632
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK

0 commit comments

Comments
 (0)