diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 25fef99699fdf..01b94ac7218a5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -2382,6 +2382,11 @@ class LLVM_ABI MachineIRBuilder { return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {}); } + /// Build and insert \p Dst = G_GET_ROUNDING + MachineInstrBuilder buildGetRounding(const DstOp &Dst) { + return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {}); + } + virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef DstOps, ArrayRef SrcOps, std::optional Flags = std::nullopt); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 92fd60e03112a..5dfa1d5095b86 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -735,6 +735,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE) HANDLE_TARGET_OPCODE(G_SET_FPMODE) HANDLE_TARGET_OPCODE(G_RESET_FPMODE) +HANDLE_TARGET_OPCODE(G_GET_ROUNDING) + /// Generic pointer offset HANDLE_TARGET_OPCODE(G_PTR_ADD) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index a462b07461b41..a4a23cde42335 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1246,6 +1246,12 @@ def G_READSTEADYCOUNTER : GenericInstruction { let hasSideEffects = true; } +def G_GET_ROUNDING : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins); + let hasSideEffects = true; +} + //------------------------------------------------------------------------------ // Memory ops //------------------------------------------------------------------------------ diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index ef39fc74554c9..d7280eaba2440 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::reset_fpmode: MIRBuilder.buildResetFPMode(); return true; + case Intrinsic::get_rounding: + MIRBuilder.buildGetRounding(getOrCreateVReg(CI)); + return true; case Intrinsic::vscale: { MIRBuilder.buildVScale(getOrCreateVReg(CI), 1); return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 8e304c07ed5cb..7fe58539cd4ec 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, .legalFor(HasSSE2 || UseX87, {s64}) .legalFor(UseX87, {s80}); + getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32}); + // merge/unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; @@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, return legalizeSITOFP(MI, MRI, Helper); case TargetOpcode::G_FPTOSI: return legalizeFPTOSI(MI, MRI, Helper); + case TargetOpcode::G_GET_ROUNDING: + return legalizeGETROUNDING(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); } @@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, return true; } +bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + /* + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + GET_ROUNDING, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we use a packed lookup table of the four 2-bit + values that we can index by FPSP[11:10] + 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] + + (0x2d >> ((FPSR >> 9) & 6)) & 3 + */ + + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineFunction &MF = MIRBuilder.getMF(); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); + + // Save FP Control Word to stack slot + int MemSize = 2; + Align Alignment = Align(2); + MachinePointerInfo PtrInfo; + auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize), + Alignment, PtrInfo); + Register StackPtr = StackTemp.getReg(0); + + auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MemSize, Alignment); + + // Store FP Control Word to stack slot using G_FNSTCW16 + MIRBuilder.buildInstr(X86::G_FNSTCW16) + .addUse(StackPtr) + .addMemOperand(StoreMMO); + + // Load FP Control Word from stack slot + auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MemSize, Alignment); + + auto CWD32 = + MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO)); + auto Shifted8 = MIRBuilder.buildTrunc( + s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9))); + auto Masked32 = MIRBuilder.buildZExt( + s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6))); + + // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding + // mode (from bits 11:10 of the control word) to the values expected by + // GET_ROUNDING. The mapping is performed by shifting LUT right by the + // extracted rounding mode and masking the result with 3 to obtain the final + auto LUT = MIRBuilder.buildConstant(s32, 0x2d); + auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32); + auto RetVal = + MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3)); + auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal); + + MIRBuilder.buildCopy(Dst, RetValTrunc); + + MI.eraseFromParent(); + return true; +} + bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { return true; diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 1ba82674ed4c6..0003552d70ee0 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo { bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + + bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; }; } // namespace llvm #endif diff --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td index f4fa33807cd9a..39198214037a3 100644 --- a/llvm/lib/Target/X86/X86InstrGISel.td +++ b/llvm/lib/Target/X86/X86InstrGISel.td @@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction { let mayStore = true; } +def G_FNSTCW16 : X86GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst); + let hasSideEffects = true; + let mayStore = true; +} + def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index a96ef2a0faab3..b123962f327e4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -627,6 +627,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index d41d3f76436cc..b5092a0122769 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -615,6 +615,9 @@ # DEBUG-NEXT: G_RESET_FPMODE (opcode {{[0-9]+}}): 0 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll index a5908978a5438..1d7a8d8456c27 100644 --- a/llvm/test/CodeGen/X86/flt-rounds.ll +++ b/llvm/test/CodeGen/X86/flt-rounds.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86 -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86 -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,SDAG-X64 +; RUN: llc -mtriple=i686-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,GISEL-X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,GISEL-X64 declare i32 @llvm.get.rounding() @@ -37,139 +39,309 @@ define i32 @test_flt_rounds() nounwind { ; Make sure we preserve order with fesetround. define i32 @multiple_flt_rounds() nounwind { -; X86-LABEL: multiple_flt_rounds: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %esi -; X86-NEXT: subl $20, %esp -; X86-NEXT: movl $1024, (%esp) # imm = 0x400 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %esi -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl $3, %eax -; X86-NEXT: setne %bl -; X86-NEXT: movl $0, (%esp) -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: cmpl $1, %eax -; X86-NEXT: je .LBB1_2 -; X86-NEXT: # %bb.1: # %entry -; X86-NEXT: incl %ebx -; X86-NEXT: .LBB1_2: # %entry -; X86-NEXT: movl $3072, (%esp) # imm = 0xC00 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: movl $45, %eax -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: cmpl $1, %eax -; X86-NEXT: sbbl $-1, %ebx -; X86-NEXT: movl $2048, (%esp) # imm = 0x800 -; X86-NEXT: calll fesetround -; X86-NEXT: fnstcw {{[0-9]+}}(%esp) -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andb $6, %cl -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: andl $3, %esi -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl $2, %esi -; X86-NEXT: setne %cl -; X86-NEXT: negl %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %ecx, %ebx -; X86-NEXT: setne %al -; X86-NEXT: addl $20, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %ebx -; X86-NEXT: retl +; SDAG-X86-LABEL: multiple_flt_rounds: +; SDAG-X86: # %bb.0: # %entry +; SDAG-X86-NEXT: pushl %ebx +; SDAG-X86-NEXT: pushl %esi +; SDAG-X86-NEXT: subl $20, %esp +; SDAG-X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %esi +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: xorl %ebx, %ebx +; SDAG-X86-NEXT: cmpl $3, %eax +; SDAG-X86-NEXT: setne %bl +; SDAG-X86-NEXT: movl $0, (%esp) +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: cmpl $1, %eax +; SDAG-X86-NEXT: je .LBB1_2 +; SDAG-X86-NEXT: # %bb.1: # %entry +; SDAG-X86-NEXT: incl %ebx +; SDAG-X86-NEXT: .LBB1_2: # %entry +; SDAG-X86-NEXT: movl $3072, (%esp) # imm = 0xC00 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: movl $45, %eax +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %eax +; SDAG-X86-NEXT: andl $3, %eax +; SDAG-X86-NEXT: cmpl $1, %eax +; SDAG-X86-NEXT: sbbl $-1, %ebx +; SDAG-X86-NEXT: movl $2048, (%esp) # imm = 0x800 +; SDAG-X86-NEXT: calll fesetround +; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; SDAG-X86-NEXT: shrl $9, %ecx +; SDAG-X86-NEXT: andb $6, %cl +; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X86-NEXT: shrl %cl, %esi +; SDAG-X86-NEXT: andl $3, %esi +; SDAG-X86-NEXT: xorl %ecx, %ecx +; SDAG-X86-NEXT: cmpl $2, %esi +; SDAG-X86-NEXT: setne %cl +; SDAG-X86-NEXT: negl %ecx +; SDAG-X86-NEXT: xorl %eax, %eax +; SDAG-X86-NEXT: cmpl %ecx, %ebx +; SDAG-X86-NEXT: setne %al +; SDAG-X86-NEXT: addl $20, %esp +; SDAG-X86-NEXT: popl %esi +; SDAG-X86-NEXT: popl %ebx +; SDAG-X86-NEXT: retl ; -; X64-LABEL: multiple_flt_rounds: -; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbp -; X64-NEXT: pushq %r14 -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $16, %rsp -; X64-NEXT: movl $1024, %edi # imm = 0x400 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %ebx -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: cmpl $3, %eax -; X64-NEXT: setne %r14b -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: leal 1(%r14), %ebp -; X64-NEXT: cmpl $1, %eax -; X64-NEXT: cmovel %r14d, %ebp -; X64-NEXT: movl $3072, %edi # imm = 0xC00 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %eax -; X64-NEXT: andl $3, %eax -; X64-NEXT: cmpl $1, %eax -; X64-NEXT: sbbl $-1, %ebp -; X64-NEXT: movl $2048, %edi # imm = 0x800 -; X64-NEXT: callq fesetround -; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) -; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andb $6, %cl -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %ebx -; X64-NEXT: andl $3, %ebx -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl $2, %ebx -; X64-NEXT: setne %cl -; X64-NEXT: negl %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %ecx, %ebp -; X64-NEXT: setne %al -; X64-NEXT: addq $16, %rsp -; X64-NEXT: popq %rbx -; X64-NEXT: popq %r14 -; X64-NEXT: popq %rbp -; X64-NEXT: retq +; SDAG-X64-LABEL: multiple_flt_rounds: +; SDAG-X64: # %bb.0: # %entry +; SDAG-X64-NEXT: pushq %rbp +; SDAG-X64-NEXT: pushq %r14 +; SDAG-X64-NEXT: pushq %rbx +; SDAG-X64-NEXT: subq $16, %rsp +; SDAG-X64-NEXT: movl $1024, %edi # imm = 0x400 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %ebx +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: xorl %r14d, %r14d +; SDAG-X64-NEXT: cmpl $3, %eax +; SDAG-X64-NEXT: setne %r14b +; SDAG-X64-NEXT: xorl %edi, %edi +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: leal 1(%r14), %ebp +; SDAG-X64-NEXT: cmpl $1, %eax +; SDAG-X64-NEXT: cmovel %r14d, %ebp +; SDAG-X64-NEXT: movl $3072, %edi # imm = 0xC00 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: movl $45, %eax +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %eax +; SDAG-X64-NEXT: andl $3, %eax +; SDAG-X64-NEXT: cmpl $1, %eax +; SDAG-X64-NEXT: sbbl $-1, %ebp +; SDAG-X64-NEXT: movl $2048, %edi # imm = 0x800 +; SDAG-X64-NEXT: callq fesetround +; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; SDAG-X64-NEXT: shrl $9, %ecx +; SDAG-X64-NEXT: andb $6, %cl +; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; SDAG-X64-NEXT: shrl %cl, %ebx +; SDAG-X64-NEXT: andl $3, %ebx +; SDAG-X64-NEXT: xorl %ecx, %ecx +; SDAG-X64-NEXT: cmpl $2, %ebx +; SDAG-X64-NEXT: setne %cl +; SDAG-X64-NEXT: negl %ecx +; SDAG-X64-NEXT: xorl %eax, %eax +; SDAG-X64-NEXT: cmpl %ecx, %ebp +; SDAG-X64-NEXT: setne %al +; SDAG-X64-NEXT: addq $16, %rsp +; SDAG-X64-NEXT: popq %rbx +; SDAG-X64-NEXT: popq %r14 +; SDAG-X64-NEXT: popq %rbp +; SDAG-X64-NEXT: retq +; +; GISEL-X86-LABEL: multiple_flt_rounds: +; GISEL-X86: # %bb.0: # %entry +; GISEL-X86-NEXT: pushl %ebp +; GISEL-X86-NEXT: pushl %ebx +; GISEL-X86-NEXT: pushl %edi +; GISEL-X86-NEXT: pushl %esi +; GISEL-X86-NEXT: subl $12, %esp +; GISEL-X86-NEXT: movl $1, %ebp +; GISEL-X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %edi +; GISEL-X86-NEXT: movl $45, %eax +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %eax +; GISEL-X86-NEXT: andl $3, %eax +; GISEL-X86-NEXT: xorl %ebx, %ebx +; GISEL-X86-NEXT: cmpl $3, %eax +; GISEL-X86-NEXT: setne %bl +; GISEL-X86-NEXT: andl $1, %ebx +; GISEL-X86-NEXT: movl $0, (%esp) +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %edx +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %edx +; GISEL-X86-NEXT: andl $3, %edx +; GISEL-X86-NEXT: xorl %eax, %eax +; GISEL-X86-NEXT: cmpl $1, %edx +; GISEL-X86-NEXT: sete %cl +; GISEL-X86-NEXT: testl %ebx, %ebx +; GISEL-X86-NEXT: je .LBB1_2 +; GISEL-X86-NEXT: # %bb.1: # %entry +; GISEL-X86-NEXT: movl $2, %ebp +; GISEL-X86-NEXT: .LBB1_2: # %entry +; GISEL-X86-NEXT: xorl %esi, %esi +; GISEL-X86-NEXT: movb %cl, %al +; GISEL-X86-NEXT: andl $1, %eax +; GISEL-X86-NEXT: je .LBB1_4 +; GISEL-X86-NEXT: # %bb.3: # %entry +; GISEL-X86-NEXT: movl %ebx, %ebp +; GISEL-X86-NEXT: .LBB1_4: # %entry +; GISEL-X86-NEXT: movl $3072, (%esp) # imm = 0xC00 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: movl $45, %eax +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %eax +; GISEL-X86-NEXT: andl $3, %eax +; GISEL-X86-NEXT: xorl %ebx, %ebx +; GISEL-X86-NEXT: cmpl %esi, %eax +; GISEL-X86-NEXT: setne %bl +; GISEL-X86-NEXT: andl $1, %ebx +; GISEL-X86-NEXT: addl %ebp, %ebx +; GISEL-X86-NEXT: movl $2048, (%esp) # imm = 0x800 +; GISEL-X86-NEXT: calll fesetround +; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; GISEL-X86-NEXT: shrl $9, %ecx +; GISEL-X86-NEXT: andb $6, %cl +; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X86-NEXT: shrl %cl, %edi +; GISEL-X86-NEXT: andl $3, %edi +; GISEL-X86-NEXT: xorl %ecx, %ecx +; GISEL-X86-NEXT: movl $2, %eax +; GISEL-X86-NEXT: cmpl %eax, %edi +; GISEL-X86-NEXT: setne %cl +; GISEL-X86-NEXT: shll $31, %ecx +; GISEL-X86-NEXT: sarl $31, %ecx +; GISEL-X86-NEXT: xorl %eax, %eax +; GISEL-X86-NEXT: cmpl %ecx, %ebx +; GISEL-X86-NEXT: setne %al +; GISEL-X86-NEXT: andl $1, %eax +; GISEL-X86-NEXT: addl $12, %esp +; GISEL-X86-NEXT: popl %esi +; GISEL-X86-NEXT: popl %edi +; GISEL-X86-NEXT: popl %ebx +; GISEL-X86-NEXT: popl %ebp +; GISEL-X86-NEXT: retl +; +; GISEL-X64-LABEL: multiple_flt_rounds: +; GISEL-X64: # %bb.0: # %entry +; GISEL-X64-NEXT: pushq %rbp +; GISEL-X64-NEXT: pushq %r15 +; GISEL-X64-NEXT: pushq %r14 +; GISEL-X64-NEXT: pushq %rbx +; GISEL-X64-NEXT: pushq %rax +; GISEL-X64-NEXT: movl $1, %r14d +; GISEL-X64-NEXT: movl $2, %ebp +; GISEL-X64-NEXT: movl $1024, %edi # imm = 0x400 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw (%rsp) +; GISEL-X64-NEXT: movzwl (%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %ebx +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %r15d, %r15d +; GISEL-X64-NEXT: cmpl $3, %eax +; GISEL-X64-NEXT: setne %r15b +; GISEL-X64-NEXT: andl $1, %r15d +; GISEL-X64-NEXT: xorl %edi, %edi +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl $1, %eax +; GISEL-X64-NEXT: sete %cl +; GISEL-X64-NEXT: testl %r15d, %r15d +; GISEL-X64-NEXT: cmovel %r14d, %ebp +; GISEL-X64-NEXT: andl $1, %ecx +; GISEL-X64-NEXT: cmovnel %r15d, %ebp +; GISEL-X64-NEXT: movl $3072, %edi # imm = 0xC00 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: movl $45, %eax +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %eax +; GISEL-X64-NEXT: andl $3, %eax +; GISEL-X64-NEXT: xorl %r14d, %r14d +; GISEL-X64-NEXT: cmpl $0, %eax +; GISEL-X64-NEXT: setne %r14b +; GISEL-X64-NEXT: andl $1, %r14d +; GISEL-X64-NEXT: addl %ebp, %r14d +; GISEL-X64-NEXT: movl $2048, %edi # imm = 0x800 +; GISEL-X64-NEXT: callq fesetround +; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; GISEL-X64-NEXT: shrl $9, %ecx +; GISEL-X64-NEXT: andb $6, %cl +; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx +; GISEL-X64-NEXT: shrl %cl, %ebx +; GISEL-X64-NEXT: andl $3, %ebx +; GISEL-X64-NEXT: xorl %ecx, %ecx +; GISEL-X64-NEXT: cmpl $2, %ebx +; GISEL-X64-NEXT: setne %cl +; GISEL-X64-NEXT: shll $31, %ecx +; GISEL-X64-NEXT: sarl $31, %ecx +; GISEL-X64-NEXT: xorl %eax, %eax +; GISEL-X64-NEXT: cmpl %ecx, %r14d +; GISEL-X64-NEXT: setne %al +; GISEL-X64-NEXT: andl $1, %eax +; GISEL-X64-NEXT: addq $8, %rsp +; GISEL-X64-NEXT: popq %rbx +; GISEL-X64-NEXT: popq %r14 +; GISEL-X64-NEXT: popq %r15 +; GISEL-X64-NEXT: popq %rbp +; GISEL-X64-NEXT: retq entry: %call = tail call i32 @fesetround(i32 1024) %0 = tail call i32 @llvm.get.rounding() diff --git a/llvm/test/MC/ELF/mc-dump.s b/llvm/test/MC/ELF/mc-dump.s index 3788eb093eef2..fb29fcd880866 100644 --- a/llvm/test/MC/ELF/mc-dump.s +++ b/llvm/test/MC/ELF/mc-dump.s @@ -12,7 +12,7 @@ # CHECK-NEXT:0 Data Size:0 [] # CHECK-NEXT: Symbol @0 _start # CHECK-NEXT:0 Org Offset:3 Value:0 -# CHECK-NEXT:3 Relaxable Size:2 > +# CHECK-NEXT:3 Relaxable Size:2 > # CHECK-NEXT: Fixup @1 Value:.Ltmp0 Kind:4001 # CHECK-NEXT:5 Data Size:16 [48,8b,04,25,00,00,00,00,48,8b,04,25,00,00,00,00] # CHECK-NEXT: Fixup @4 Value:f0@ Kind:4017 diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td index 6e5d7de0732f0..0d6d4a3a29274 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td @@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3), // R00O-NEXT: GIM_Reject, // R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]] // R00O-NEXT: GIM_Reject, -// R00O-NEXT: }; // Size: 1878 bytes +// R00O-NEXT: }; // Size: 1882 bytes def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4), [(set GPR32:$dst,