Skip to content

Commit 22c590b

Browse files
authored
[RISCV][ISel] Optimize setcc with mask test idioms (#147015)
As we are converting more comparisons/differences of pointers into those of offsets in InstCombine, the mask test idiom `icmp eq/ne (and X, Mask), 0` may be more common in real-world programs. This patch eliminates unnecessary srli instructions for this pattern. We have a similar optimization for `RISCVISD::SELECT_CC/BR_CC`: https://github.com/llvm/llvm-project/blob/a89e232058a29260eb9bfe77b862715ce875f962/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L2416-L2446 However, I cannot reuse the function `translateSetCCForBranch` due to some regressions caused by other DAGCombiner folds: main...dtcxzyw:llvm-project:rv-mask-test. So this patch defers the transformation to ISel.
1 parent ae3d313 commit 22c590b

File tree

6 files changed

+152
-45
lines changed

6 files changed

+152
-45
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,10 @@ def riscv_seteq : ComplexPattern<XLenVT, 1, "selectSETEQ", [setcc]>;
16151615
def : Pat<(riscv_seteq (XLenVT GPR:$rs1)), (SLTIU GPR:$rs1, 1)>;
16161616
def : Pat<(riscv_setne (XLenVT GPR:$rs1)), (SLTU (XLenVT X0), GPR:$rs1)>;
16171617
def : Pat<(XLenVT (setne (XLenVT GPR:$rs1), -1)), (SLTIU GPR:$rs1, -1)>;
1618+
def : Pat<(XLenVT (seteq (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
1619+
(SLTIU (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))), 1)>;
1620+
def : Pat<(XLenVT (setne (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
1621+
(SLTU (XLenVT X0), (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))))>;
16181622

16191623
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
16201624
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -972,19 +972,19 @@ define i1 @fpclass(float %x) {
972972
; RV32I-NEXT: slli a2, a0, 1
973973
; RV32I-NEXT: lui a3, 2048
974974
; RV32I-NEXT: lui a4, 1046528
975-
; RV32I-NEXT: srli a2, a2, 1
975+
; RV32I-NEXT: srli a5, a2, 1
976976
; RV32I-NEXT: addi a3, a3, -1
977-
; RV32I-NEXT: addi a5, a2, -1
977+
; RV32I-NEXT: xor a0, a0, a5
978+
; RV32I-NEXT: xor a6, a5, a1
979+
; RV32I-NEXT: sltu a1, a1, a5
980+
; RV32I-NEXT: add a4, a5, a4
981+
; RV32I-NEXT: addi a5, a5, -1
978982
; RV32I-NEXT: sltu a3, a5, a3
979983
; RV32I-NEXT: lui a5, 520192
980-
; RV32I-NEXT: xor a0, a0, a2
981-
; RV32I-NEXT: add a4, a2, a4
982984
; RV32I-NEXT: sltu a4, a4, a5
983-
; RV32I-NEXT: xor a5, a2, a1
984-
; RV32I-NEXT: sltu a1, a1, a2
985985
; RV32I-NEXT: seqz a2, a2
986986
; RV32I-NEXT: snez a0, a0
987-
; RV32I-NEXT: seqz a5, a5
987+
; RV32I-NEXT: seqz a5, a6
988988
; RV32I-NEXT: and a3, a3, a0
989989
; RV32I-NEXT: or a2, a2, a5
990990
; RV32I-NEXT: and a0, a4, a0
@@ -1000,19 +1000,19 @@ define i1 @fpclass(float %x) {
10001000
; RV64I-NEXT: sext.w a0, a0
10011001
; RV64I-NEXT: li a3, 1
10021002
; RV64I-NEXT: lui a4, 2048
1003-
; RV64I-NEXT: lui a5, 520192
1004-
; RV64I-NEXT: srli a2, a2, 33
1005-
; RV64I-NEXT: addi a6, a4, -1
1006-
; RV64I-NEXT: xor a0, a0, a2
1007-
; RV64I-NEXT: subw a3, a2, a3
1008-
; RV64I-NEXT: sltu a3, a3, a6
1009-
; RV64I-NEXT: xor a6, a2, a1
1010-
; RV64I-NEXT: sltu a1, a1, a2
1011-
; RV64I-NEXT: subw a4, a2, a4
1003+
; RV64I-NEXT: srli a5, a2, 33
1004+
; RV64I-NEXT: xor a0, a0, a5
1005+
; RV64I-NEXT: subw a3, a5, a3
1006+
; RV64I-NEXT: xor a6, a5, a1
1007+
; RV64I-NEXT: sltu a1, a1, a5
1008+
; RV64I-NEXT: subw a5, a5, a4
1009+
; RV64I-NEXT: addi a4, a4, -1
1010+
; RV64I-NEXT: sltu a3, a3, a4
1011+
; RV64I-NEXT: lui a4, 520192
10121012
; RV64I-NEXT: seqz a2, a2
10131013
; RV64I-NEXT: snez a0, a0
10141014
; RV64I-NEXT: seqz a6, a6
1015-
; RV64I-NEXT: sltu a4, a4, a5
1015+
; RV64I-NEXT: sltu a4, a5, a4
10161016
; RV64I-NEXT: and a3, a3, a0
10171017
; RV64I-NEXT: or a2, a2, a6
10181018
; RV64I-NEXT: or a1, a2, a1

llvm/test/CodeGen/RISCV/float-intrinsics.ll

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,18 +1625,18 @@ define i1 @fpclass(float %x) {
16251625
; RV32I: # %bb.0:
16261626
; RV32I-NEXT: slli a1, a0, 1
16271627
; RV32I-NEXT: lui a2, 2048
1628-
; RV32I-NEXT: slti a0, a0, 0
16291628
; RV32I-NEXT: lui a3, 522240
16301629
; RV32I-NEXT: lui a4, 1046528
1631-
; RV32I-NEXT: srli a1, a1, 1
1630+
; RV32I-NEXT: srli a5, a1, 1
16321631
; RV32I-NEXT: addi a2, a2, -1
1633-
; RV32I-NEXT: addi a5, a1, -1
1632+
; RV32I-NEXT: xor a6, a5, a3
1633+
; RV32I-NEXT: slt a3, a3, a5
1634+
; RV32I-NEXT: add a4, a5, a4
1635+
; RV32I-NEXT: addi a5, a5, -1
16341636
; RV32I-NEXT: sltu a2, a5, a2
1635-
; RV32I-NEXT: xor a5, a1, a3
1636-
; RV32I-NEXT: slt a3, a3, a1
1637-
; RV32I-NEXT: add a4, a1, a4
1637+
; RV32I-NEXT: slti a0, a0, 0
16381638
; RV32I-NEXT: seqz a1, a1
1639-
; RV32I-NEXT: seqz a5, a5
1639+
; RV32I-NEXT: seqz a5, a6
16401640
; RV32I-NEXT: srli a4, a4, 24
16411641
; RV32I-NEXT: and a2, a2, a0
16421642
; RV32I-NEXT: or a1, a1, a5
@@ -1649,29 +1649,29 @@ define i1 @fpclass(float %x) {
16491649
;
16501650
; RV64I-LABEL: fpclass:
16511651
; RV64I: # %bb.0:
1652-
; RV64I-NEXT: sext.w a1, a0
1653-
; RV64I-NEXT: slli a0, a0, 33
1652+
; RV64I-NEXT: slli a1, a0, 33
16541653
; RV64I-NEXT: lui a2, 2048
16551654
; RV64I-NEXT: lui a3, 522240
16561655
; RV64I-NEXT: lui a4, 1046528
1657-
; RV64I-NEXT: srli a0, a0, 33
1656+
; RV64I-NEXT: srli a5, a1, 33
16581657
; RV64I-NEXT: addi a2, a2, -1
1659-
; RV64I-NEXT: slti a1, a1, 0
1660-
; RV64I-NEXT: addi a5, a0, -1
1658+
; RV64I-NEXT: xor a6, a5, a3
1659+
; RV64I-NEXT: slt a3, a3, a5
1660+
; RV64I-NEXT: add a4, a5, a4
1661+
; RV64I-NEXT: addi a5, a5, -1
16611662
; RV64I-NEXT: sltu a2, a5, a2
1662-
; RV64I-NEXT: xor a5, a0, a3
1663-
; RV64I-NEXT: slt a3, a3, a0
1664-
; RV64I-NEXT: add a4, a0, a4
1665-
; RV64I-NEXT: seqz a0, a0
1666-
; RV64I-NEXT: seqz a5, a5
1663+
; RV64I-NEXT: sext.w a0, a0
1664+
; RV64I-NEXT: slti a0, a0, 0
1665+
; RV64I-NEXT: seqz a1, a1
1666+
; RV64I-NEXT: seqz a5, a6
16671667
; RV64I-NEXT: srliw a4, a4, 24
1668-
; RV64I-NEXT: and a2, a2, a1
1669-
; RV64I-NEXT: or a0, a0, a5
1668+
; RV64I-NEXT: and a2, a2, a0
1669+
; RV64I-NEXT: or a1, a1, a5
16701670
; RV64I-NEXT: sltiu a4, a4, 127
1671-
; RV64I-NEXT: or a0, a0, a2
1672-
; RV64I-NEXT: or a0, a0, a3
1673-
; RV64I-NEXT: and a1, a4, a1
1674-
; RV64I-NEXT: or a0, a0, a1
1671+
; RV64I-NEXT: or a1, a1, a2
1672+
; RV64I-NEXT: or a1, a1, a3
1673+
; RV64I-NEXT: and a0, a4, a0
1674+
; RV64I-NEXT: or a0, a1, a0
16751675
; RV64I-NEXT: ret
16761676
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
16771677
ret i1 %cmp

llvm/test/CodeGen/RISCV/i32-icmp.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,3 +1136,57 @@ define i32 @icmp_sle_constant_neg_2050(i32 %a) nounwind {
11361136
%2 = zext i1 %1 to i32
11371137
ret i32 %2
11381138
}
1139+
1140+
define i32 @mask_test_eq(i32 %x) nounwind {
1141+
; RV32I-LABEL: mask_test_eq:
1142+
; RV32I: # %bb.0:
1143+
; RV32I-NEXT: slli a0, a0, 12
1144+
; RV32I-NEXT: seqz a0, a0
1145+
; RV32I-NEXT: ret
1146+
;
1147+
; RV32XQCILIA-LABEL: mask_test_eq:
1148+
; RV32XQCILIA: # %bb.0:
1149+
; RV32XQCILIA-NEXT: slli a0, a0, 12
1150+
; RV32XQCILIA-NEXT: seqz a0, a0
1151+
; RV32XQCILIA-NEXT: ret
1152+
%y = and i32 %x, 1048575
1153+
%cmp = icmp eq i32 %y, 0
1154+
%ext = zext i1 %cmp to i32
1155+
ret i32 %ext
1156+
}
1157+
1158+
define i32 @mask_test_ne(i32 %x) nounwind {
1159+
; RV32I-LABEL: mask_test_ne:
1160+
; RV32I: # %bb.0:
1161+
; RV32I-NEXT: slli a0, a0, 12
1162+
; RV32I-NEXT: snez a0, a0
1163+
; RV32I-NEXT: ret
1164+
;
1165+
; RV32XQCILIA-LABEL: mask_test_ne:
1166+
; RV32XQCILIA: # %bb.0:
1167+
; RV32XQCILIA-NEXT: slli a0, a0, 12
1168+
; RV32XQCILIA-NEXT: snez a0, a0
1169+
; RV32XQCILIA-NEXT: ret
1170+
%y = and i32 %x, 1048575
1171+
%cmp = icmp ne i32 %y, 0
1172+
%ext = zext i1 %cmp to i32
1173+
ret i32 %ext
1174+
}
1175+
1176+
define i32 @mask_test_eq_simm12(i32 %x) nounwind {
1177+
; RV32I-LABEL: mask_test_eq_simm12:
1178+
; RV32I: # %bb.0:
1179+
; RV32I-NEXT: andi a0, a0, 3
1180+
; RV32I-NEXT: seqz a0, a0
1181+
; RV32I-NEXT: ret
1182+
;
1183+
; RV32XQCILIA-LABEL: mask_test_eq_simm12:
1184+
; RV32XQCILIA: # %bb.0:
1185+
; RV32XQCILIA-NEXT: andi a0, a0, 3
1186+
; RV32XQCILIA-NEXT: seqz a0, a0
1187+
; RV32XQCILIA-NEXT: ret
1188+
%y = and i32 %x, 3
1189+
%cmp = icmp eq i32 %y, 0
1190+
%ext = zext i1 %cmp to i32
1191+
ret i32 %ext
1192+
}

llvm/test/CodeGen/RISCV/i64-icmp.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,4 +767,56 @@ define i64 @icmp_ne_zext_inreg_umin(i64 %a) nounwind {
767767
%4 = zext i1 %3 to i64
768768
ret i64 %4
769769
}
770+
771+
define i64 @mask_test_eq(i64 %x) nounwind {
772+
; RV64I-LABEL: mask_test_eq:
773+
; RV64I: # %bb.0:
774+
; RV64I-NEXT: slli a0, a0, 2
775+
; RV64I-NEXT: seqz a0, a0
776+
; RV64I-NEXT: ret
777+
%y = and i64 %x, 4611686018427387903
778+
%cmp = icmp eq i64 %y, 0
779+
%ext = zext i1 %cmp to i64
780+
ret i64 %ext
781+
}
782+
783+
define i64 @mask_test_ne(i64 %x) nounwind {
784+
; RV64I-LABEL: mask_test_ne:
785+
; RV64I: # %bb.0:
786+
; RV64I-NEXT: slli a0, a0, 2
787+
; RV64I-NEXT: snez a0, a0
788+
; RV64I-NEXT: ret
789+
%y = and i64 %x, 4611686018427387903
790+
%cmp = icmp ne i64 %y, 0
791+
%ext = zext i1 %cmp to i64
792+
ret i64 %ext
793+
}
794+
795+
define i64 @mask_test_eq_simm12(i64 %x) nounwind {
796+
; RV64I-LABEL: mask_test_eq_simm12:
797+
; RV64I: # %bb.0:
798+
; RV64I-NEXT: andi a0, a0, 3
799+
; RV64I-NEXT: seqz a0, a0
800+
; RV64I-NEXT: ret
801+
%y = and i64 %x, 3
802+
%cmp = icmp eq i64 %y, 0
803+
%ext = zext i1 %cmp to i64
804+
ret i64 %ext
805+
}
806+
807+
define i64 @mask_test_eq_multiuse(i64 %x, ptr %p) nounwind {
808+
; RV64I-LABEL: mask_test_eq_multiuse:
809+
; RV64I: # %bb.0:
810+
; RV64I-NEXT: slli a0, a0, 2
811+
; RV64I-NEXT: srli a2, a0, 2
812+
; RV64I-NEXT: seqz a0, a0
813+
; RV64I-NEXT: sd a2, 0(a1)
814+
; RV64I-NEXT: ret
815+
%y = and i64 %x, 4611686018427387903
816+
store i64 %y, ptr %p, align 8
817+
%cmp = icmp eq i64 %y, 0
818+
%ext = zext i1 %cmp to i64
819+
ret i64 %ext
820+
}
821+
770822
declare i64 @llvm.umin.i64(i64, i64)

llvm/test/CodeGen/RISCV/overflow-intrinsics.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
594594
; RV32: # %bb.0:
595595
; RV32-NEXT: addi a2, a0, 1
596596
; RV32-NEXT: slli a0, a2, 16
597-
; RV32-NEXT: srli a0, a0, 16
598597
; RV32-NEXT: seqz a0, a0
599598
; RV32-NEXT: sh a2, 0(a1)
600599
; RV32-NEXT: ret
@@ -603,7 +602,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
603602
; RV64: # %bb.0:
604603
; RV64-NEXT: addi a2, a0, 1
605604
; RV64-NEXT: slli a0, a2, 48
606-
; RV64-NEXT: srli a0, a0, 48
607605
; RV64-NEXT: seqz a0, a0
608606
; RV64-NEXT: sh a2, 0(a1)
609607
; RV64-NEXT: ret
@@ -759,10 +757,9 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
759757
; RV64-LABEL: uaddo_i42_increment_illegal_type:
760758
; RV64: # %bb.0:
761759
; RV64-NEXT: addi a2, a0, 1
762-
; RV64-NEXT: slli a0, a2, 22
763-
; RV64-NEXT: srli a3, a0, 22
760+
; RV64-NEXT: slli a3, a2, 22
764761
; RV64-NEXT: seqz a0, a3
765-
; RV64-NEXT: srli a3, a3, 32
762+
; RV64-NEXT: srli a3, a3, 54
766763
; RV64-NEXT: sw a2, 0(a1)
767764
; RV64-NEXT: sh a3, 4(a1)
768765
; RV64-NEXT: ret

0 commit comments

Comments
 (0)