Skip to content

[X86][GlobalISel] Added support for llvm.get.rounding #147716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -2382,6 +2382,11 @@ class LLVM_ABI MachineIRBuilder {
return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
}

/// Build and insert \p Dst = G_GET_ROUNDING
MachineInstrBuilder buildGetRounding(const DstOp &Dst) {
return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {});
}

virtual MachineInstrBuilder
buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps,
std::optional<unsigned> Flags = std::nullopt);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE)
HANDLE_TARGET_OPCODE(G_SET_FPMODE)
HANDLE_TARGET_OPCODE(G_RESET_FPMODE)

HANDLE_TARGET_OPCODE(G_GET_ROUNDING)

/// Generic pointer offset
HANDLE_TARGET_OPCODE(G_PTR_ADD)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,12 @@ def G_READSTEADYCOUNTER : GenericInstruction {
let hasSideEffects = true;
}

def G_GET_ROUNDING : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);
let hasSideEffects = true;
}

//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::reset_fpmode:
MIRBuilder.buildResetFPMode();
return true;
case Intrinsic::get_rounding:
MIRBuilder.buildGetRounding(getOrCreateVReg(CI));
return true;
case Intrinsic::vscale: {
MIRBuilder.buildVScale(getOrCreateVReg(CI), 1);
return true;
Expand Down
81 changes: 81 additions & 0 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.legalFor(HasSSE2 || UseX87, {s64})
.legalFor(UseX87, {s80});

getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32});

// merge/unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
Expand Down Expand Up @@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeSITOFP(MI, MRI, Helper);
case TargetOpcode::G_FPTOSI:
return legalizeFPTOSI(MI, MRI, Helper);
case TargetOpcode::G_GET_ROUNDING:
return legalizeGETROUNDING(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
Expand Down Expand Up @@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
return true;
}

bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const {
/*
The rounding mode is in bits 11:10 of FPSR, and has the following
settings:
00 Round to nearest
01 Round to -inf
10 Round to +inf
11 Round to 0

GET_ROUNDING, on the other hand, expects the following:
-1 Undefined
0 Round to 0
1 Round to nearest
2 Round to +inf
3 Round to -inf

To perform the conversion, we use a packed lookup table of the four 2-bit
values that we can index by FPSP[11:10]
0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]

(0x2d >> ((FPSR >> 9) & 6)) & 3
*/

MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineFunction &MF = MIRBuilder.getMF();
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);

// Save FP Control Word to stack slot
int MemSize = 2;
Align Alignment = Align(2);
MachinePointerInfo PtrInfo;
auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
Alignment, PtrInfo);
Register StackPtr = StackTemp.getReg(0);

auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
MemSize, Alignment);

// Store FP Control Word to stack slot using G_FNSTCW16
MIRBuilder.buildInstr(X86::G_FNSTCW16)
.addUse(StackPtr)
.addMemOperand(StoreMMO);

// Load FP Control Word from stack slot
auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
MemSize, Alignment);

auto CWD32 =
MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO));
auto Shifted8 = MIRBuilder.buildTrunc(
s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9)));
auto Masked32 = MIRBuilder.buildZExt(
s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6)));

// LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
// mode (from bits 11:10 of the control word) to the values expected by
// GET_ROUNDING. The mapping is performed by shifting LUT right by the
// extracted rounding mode and masking the result with 3 to obtain the final
auto LUT = MIRBuilder.buildConstant(s32, 0x2d);
auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32);
auto RetVal =
MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3));
auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);

MIRBuilder.buildCopy(Dst, RetValTrunc);

MI.eraseFromParent();
return true;
}

bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo {

bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;

bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction {
let mayStore = true;
}

def G_FNSTCW16 : X86GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins ptype0:$dst);
let hasSideEffects = true;
let mayStore = true;
}

def : GINodeEquiv<G_FILD, X86fild>;
def : GINodeEquiv<G_FIST, X86fp_to_mem>;
def : GINodeEquiv<G_FNSTCW16, X86fp_cwd_get16>;
Loading
Loading