Skip to content

Commit 6b95aa6

Browse files
authored
[RISCV] Use QC.INSBI for OR with immediate when ORI isn't possible (#147349)
When the immediate to the ORI is a ShiftedMask_32 that does not fit in 12-bits we can use the QC.INSBI instruction instead. We do not do this for cases where the ORI can be replaced with a BSETI since these can be compressesd when the Xqcibm extension (which QC.INSBI is a part of) is enabled.
1 parent 74995a6 commit 6b95aa6

File tree

4 files changed

+136
-3
lines changed

4 files changed

+136
-3
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,43 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
681681
return false;
682682
}
683683

684+
bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
685+
// Supported only in Xqcibm for now.
686+
if (!Subtarget->hasVendorXqcibm())
687+
return false;
688+
689+
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
690+
if (!N1C)
691+
return false;
692+
693+
int32_t C1 = N1C->getSExtValue();
694+
if (!isShiftedMask_32(C1) || isInt<12>(C1))
695+
return false;
696+
697+
// If C1 is a shifted mask (but can't be formed as an ORI),
698+
// use a bitfield insert of -1.
699+
// Transform (or x, C1)
700+
// -> (qc.insbi x, width, shift)
701+
const unsigned Leading = llvm::countl_zero((uint32_t)C1);
702+
const unsigned Trailing = llvm::countr_zero((uint32_t)C1);
703+
const unsigned Width = 32 - Leading - Trailing;
704+
705+
// If Zbs is enabled and it is a single bit set we can use BSETI which
706+
// can be compressed to C_BSETI when Xqcibm in enabled.
707+
if (Width == 1 && Subtarget->hasStdExtZbs())
708+
return false;
709+
710+
SDLoc DL(Node);
711+
MVT VT = Node->getSimpleValueType(0);
712+
713+
SDValue Ops[] = {CurDAG->getSignedTargetConstant(-1, DL, VT),
714+
CurDAG->getTargetConstant(Width, DL, VT),
715+
CurDAG->getTargetConstant(Trailing, DL, VT)};
716+
SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
717+
ReplaceNode(Node, BitIns);
718+
return true;
719+
}
720+
684721
bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
685722
// Only supported with XAndesPerf at the moment.
686723
if (!Subtarget->hasVendorXAndesPerf())
@@ -1298,7 +1335,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
12981335
ReplaceNode(Node, SRAI);
12991336
return;
13001337
}
1301-
case ISD::OR:
1338+
case ISD::OR: {
1339+
if (trySignedBitfieldInsertInMask(Node))
1340+
return;
1341+
1342+
if (tryShrinkShlLogicImm(Node))
1343+
return;
1344+
1345+
break;
1346+
}
13021347
case ISD::XOR:
13031348
if (tryShrinkShlLogicImm(Node))
13041349
return;

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
7979
bool tryShrinkShlLogicImm(SDNode *Node);
8080
bool trySignedBitfieldExtract(SDNode *Node);
8181
bool trySignedBitfieldInsertInSign(SDNode *Node);
82+
bool trySignedBitfieldInsertInMask(SDNode *Node);
8283
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT,
8384
SDValue X, unsigned Msb, unsigned Lsb);
8485
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT,

llvm/test/CodeGen/RISCV/xqcibm-cto-clo-brev.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
105105
;
106106
; RV32ZBBXQCIBM-LABEL: test_cttz_i16:
107107
; RV32ZBBXQCIBM: # %bb.0:
108-
; RV32ZBBXQCIBM-NEXT: lui a1, 16
109-
; RV32ZBBXQCIBM-NEXT: orn a0, a1, a0
108+
; RV32ZBBXQCIBM-NEXT: qc.insbi a0, -1, 1, 16
110109
; RV32ZBBXQCIBM-NEXT: ctz a0, a0
111110
; RV32ZBBXQCIBM-NEXT: ret
112111
%1 = xor i16 %a, -1
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefixes=RV32I
4+
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcibm -verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s -check-prefixes=RV32IXQCIBM
6+
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcibm,+zbs -verify-machineinstrs < %s \
7+
; RUN: | FileCheck %s -check-prefixes=RV32IXQCIBMZBS
8+
9+
10+
define i32 @test_ori(i32 %a) nounwind {
11+
; RV32I-LABEL: test_ori:
12+
; RV32I: # %bb.0:
13+
; RV32I-NEXT: ori a0, a0, 511
14+
; RV32I-NEXT: ret
15+
;
16+
; RV32IXQCIBM-LABEL: test_ori:
17+
; RV32IXQCIBM: # %bb.0:
18+
; RV32IXQCIBM-NEXT: ori a0, a0, 511
19+
; RV32IXQCIBM-NEXT: ret
20+
;
21+
; RV32IXQCIBMZBS-LABEL: test_ori:
22+
; RV32IXQCIBMZBS: # %bb.0:
23+
; RV32IXQCIBMZBS-NEXT: ori a0, a0, 511
24+
; RV32IXQCIBMZBS-NEXT: ret
25+
%or = or i32 %a, 511
26+
ret i32 %or
27+
}
28+
29+
define i32 @test_insbi_mask(i32 %a) nounwind {
30+
; RV32I-LABEL: test_insbi_mask:
31+
; RV32I: # %bb.0:
32+
; RV32I-NEXT: lui a1, 16
33+
; RV32I-NEXT: addi a1, a1, -1
34+
; RV32I-NEXT: or a0, a0, a1
35+
; RV32I-NEXT: ret
36+
;
37+
; RV32IXQCIBM-LABEL: test_insbi_mask:
38+
; RV32IXQCIBM: # %bb.0:
39+
; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 16, 0
40+
; RV32IXQCIBM-NEXT: ret
41+
;
42+
; RV32IXQCIBMZBS-LABEL: test_insbi_mask:
43+
; RV32IXQCIBMZBS: # %bb.0:
44+
; RV32IXQCIBMZBS-NEXT: qc.insbi a0, -1, 16, 0
45+
; RV32IXQCIBMZBS-NEXT: ret
46+
%or = or i32 %a, 65535
47+
ret i32 %or
48+
}
49+
50+
define i32 @test_insbi_shifted_mask(i32 %a) nounwind {
51+
; RV32I-LABEL: test_insbi_shifted_mask:
52+
; RV32I: # %bb.0:
53+
; RV32I-NEXT: lui a1, 15
54+
; RV32I-NEXT: or a0, a0, a1
55+
; RV32I-NEXT: ret
56+
;
57+
; RV32IXQCIBM-LABEL: test_insbi_shifted_mask:
58+
; RV32IXQCIBM: # %bb.0:
59+
; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 4, 12
60+
; RV32IXQCIBM-NEXT: ret
61+
;
62+
; RV32IXQCIBMZBS-LABEL: test_insbi_shifted_mask:
63+
; RV32IXQCIBMZBS: # %bb.0:
64+
; RV32IXQCIBMZBS-NEXT: qc.insbi a0, -1, 4, 12
65+
; RV32IXQCIBMZBS-NEXT: ret
66+
%or = or i32 %a, 61440
67+
ret i32 %or
68+
}
69+
70+
define i32 @test_single_bit_set(i32 %a) nounwind {
71+
; RV32I-LABEL: test_single_bit_set:
72+
; RV32I: # %bb.0:
73+
; RV32I-NEXT: lui a1, 1
74+
; RV32I-NEXT: or a0, a0, a1
75+
; RV32I-NEXT: ret
76+
;
77+
; RV32IXQCIBM-LABEL: test_single_bit_set:
78+
; RV32IXQCIBM: # %bb.0:
79+
; RV32IXQCIBM-NEXT: qc.insbi a0, -1, 1, 12
80+
; RV32IXQCIBM-NEXT: ret
81+
;
82+
; RV32IXQCIBMZBS-LABEL: test_single_bit_set:
83+
; RV32IXQCIBMZBS: # %bb.0:
84+
; RV32IXQCIBMZBS-NEXT: bseti a0, a0, 12
85+
; RV32IXQCIBMZBS-NEXT: ret
86+
%or = or i32 %a, 4096
87+
ret i32 %or
88+
}

0 commit comments

Comments
 (0)