Skip to content

Commit 296e8ca

Browse files
committed
[RISCV] Isel (sra (sext_inreg X, i16), C) -> (srai (slli X, (XLen-16), (XLen-16) + C).
Similar for (sra (sext_inreg X, i8), C). With Zbb, sext_inreg of i8 and i16 are legal for sext.b and sext.h. This transform makes the Zbb codegen the same as without Zbb. The shifts are more compressible. This also exposes an opportunity for CSE with another slli in the i16 sdiv by constant codegen.
1 parent 8acc3b4 commit 296e8ca

File tree

4 files changed

+67
-36
lines changed

4 files changed

+67
-36
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,37 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
572572

573573
break;
574574
}
575+
case ISD::SRA: {
576+
// Optimize (sra (sext_inreg X, i16), C) ->
577+
// (srai (slli X, (XLen-16), (XLen-16) + C)
578+
// And (sra (sext_inreg X, i8), C) ->
579+
// (srai (slli X, (XLen-8), (XLen-8) + C)
580+
// This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
581+
// This transform matches the code we get without Zbb. The shifts are more
582+
// compressible, and this can help expose CSE opportunities in the sdiv by
583+
// constant optimization.
584+
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
585+
if (!N1C)
586+
break;
587+
SDValue N0 = Node->getOperand(0);
588+
if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
589+
break;
590+
uint64_t ShAmt = N1C->getZExtValue();
591+
unsigned ExtSize =
592+
cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
593+
// ExtSize of 32 should use sraiw via tablegen pattern.
594+
if (ExtSize >= 32 || ShAmt >= ExtSize)
595+
break;
596+
unsigned LShAmt = Subtarget->getXLen() - ExtSize;
597+
SDNode *SLLI =
598+
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
599+
CurDAG->getTargetConstant(LShAmt, DL, VT));
600+
SDNode *SRAI = CurDAG->getMachineNode(
601+
RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
602+
CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
603+
ReplaceNode(Node, SRAI);
604+
return;
605+
}
575606
case ISD::AND: {
576607
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
577608
if (!N1C)

llvm/test/CodeGen/RISCV/div-by-constant.ll

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
645645
; RV32IMZB-NEXT: add a0, a1, a0
646646
; RV32IMZB-NEXT: andi a1, a0, 128
647647
; RV32IMZB-NEXT: srli a1, a1, 7
648-
; RV32IMZB-NEXT: sext.b a0, a0
649-
; RV32IMZB-NEXT: srai a0, a0, 2
648+
; RV32IMZB-NEXT: slli a0, a0, 24
649+
; RV32IMZB-NEXT: srai a0, a0, 26
650650
; RV32IMZB-NEXT: add a0, a0, a1
651651
; RV32IMZB-NEXT: ret
652652
;
@@ -674,8 +674,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
674674
; RV64IMZB-NEXT: addw a0, a1, a0
675675
; RV64IMZB-NEXT: andi a1, a0, 128
676676
; RV64IMZB-NEXT: srli a1, a1, 7
677-
; RV64IMZB-NEXT: sext.b a0, a0
678-
; RV64IMZB-NEXT: srai a0, a0, 2
677+
; RV64IMZB-NEXT: slli a0, a0, 56
678+
; RV64IMZB-NEXT: srai a0, a0, 58
679679
; RV64IMZB-NEXT: add a0, a0, a1
680680
; RV64IMZB-NEXT: ret
681681
%1 = sdiv i8 %a, 7
@@ -709,8 +709,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
709709
; RV32IMZB-NEXT: sub a0, a1, a0
710710
; RV32IMZB-NEXT: andi a1, a0, 128
711711
; RV32IMZB-NEXT: srli a1, a1, 7
712-
; RV32IMZB-NEXT: sext.b a0, a0
713-
; RV32IMZB-NEXT: srai a0, a0, 2
712+
; RV32IMZB-NEXT: slli a0, a0, 24
713+
; RV32IMZB-NEXT: srai a0, a0, 26
714714
; RV32IMZB-NEXT: add a0, a0, a1
715715
; RV32IMZB-NEXT: ret
716716
;
@@ -738,8 +738,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
738738
; RV64IMZB-NEXT: subw a0, a1, a0
739739
; RV64IMZB-NEXT: andi a1, a0, 128
740740
; RV64IMZB-NEXT: srli a1, a1, 7
741-
; RV64IMZB-NEXT: sext.b a0, a0
742-
; RV64IMZB-NEXT: srai a0, a0, 2
741+
; RV64IMZB-NEXT: slli a0, a0, 56
742+
; RV64IMZB-NEXT: srai a0, a0, 58
743743
; RV64IMZB-NEXT: add a0, a0, a1
744744
; RV64IMZB-NEXT: ret
745745
%1 = sdiv i8 %a, -7
@@ -846,8 +846,6 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
846846
ret i16 %1
847847
}
848848

849-
; FIXME: The Zbb test code has 1 more instruction after the mul because we don't
850-
; share a slli.
851849
define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
852850
; RV32IM-LABEL: sdiv16_constant_add_srai:
853851
; RV32IM: # %bb.0:
@@ -872,10 +870,9 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
872870
; RV32IMZB-NEXT: mul a1, a1, a2
873871
; RV32IMZB-NEXT: srli a1, a1, 16
874872
; RV32IMZB-NEXT: add a0, a1, a0
875-
; RV32IMZB-NEXT: slli a1, a0, 16
876-
; RV32IMZB-NEXT: srli a1, a1, 31
877-
; RV32IMZB-NEXT: sext.h a0, a0
878-
; RV32IMZB-NEXT: srai a0, a0, 3
873+
; RV32IMZB-NEXT: slli a0, a0, 16
874+
; RV32IMZB-NEXT: srli a1, a0, 31
875+
; RV32IMZB-NEXT: srai a0, a0, 19
879876
; RV32IMZB-NEXT: add a0, a0, a1
880877
; RV32IMZB-NEXT: ret
881878
;
@@ -902,18 +899,15 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
902899
; RV64IMZB-NEXT: mul a1, a1, a2
903900
; RV64IMZB-NEXT: srli a1, a1, 16
904901
; RV64IMZB-NEXT: addw a0, a1, a0
905-
; RV64IMZB-NEXT: slli a1, a0, 48
906-
; RV64IMZB-NEXT: srli a1, a1, 63
907-
; RV64IMZB-NEXT: sext.h a0, a0
908-
; RV64IMZB-NEXT: srai a0, a0, 3
902+
; RV64IMZB-NEXT: slli a0, a0, 48
903+
; RV64IMZB-NEXT: srli a1, a0, 63
904+
; RV64IMZB-NEXT: srai a0, a0, 51
909905
; RV64IMZB-NEXT: add a0, a0, a1
910906
; RV64IMZB-NEXT: ret
911907
%1 = sdiv i16 %a, 15
912908
ret i16 %1
913909
}
914910

915-
; FIXME: The Zbb test code has 1 more instruction after the mul because we don't
916-
; share a slli.
917911
define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
918912
; RV32IM-LABEL: sdiv16_constant_sub_srai:
919913
; RV32IM: # %bb.0:
@@ -938,10 +932,9 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
938932
; RV32IMZB-NEXT: mul a1, a1, a2
939933
; RV32IMZB-NEXT: srli a1, a1, 16
940934
; RV32IMZB-NEXT: sub a0, a1, a0
941-
; RV32IMZB-NEXT: slli a1, a0, 16
942-
; RV32IMZB-NEXT: srli a1, a1, 31
943-
; RV32IMZB-NEXT: sext.h a0, a0
944-
; RV32IMZB-NEXT: srai a0, a0, 3
935+
; RV32IMZB-NEXT: slli a0, a0, 16
936+
; RV32IMZB-NEXT: srli a1, a0, 31
937+
; RV32IMZB-NEXT: srai a0, a0, 19
945938
; RV32IMZB-NEXT: add a0, a0, a1
946939
; RV32IMZB-NEXT: ret
947940
;
@@ -968,10 +961,9 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
968961
; RV64IMZB-NEXT: mul a1, a1, a2
969962
; RV64IMZB-NEXT: srli a1, a1, 16
970963
; RV64IMZB-NEXT: subw a0, a1, a0
971-
; RV64IMZB-NEXT: slli a1, a0, 48
972-
; RV64IMZB-NEXT: srli a1, a1, 63
973-
; RV64IMZB-NEXT: sext.h a0, a0
974-
; RV64IMZB-NEXT: srai a0, a0, 3
964+
; RV64IMZB-NEXT: slli a0, a0, 48
965+
; RV64IMZB-NEXT: srli a1, a0, 63
966+
; RV64IMZB-NEXT: srai a0, a0, 51
975967
; RV64IMZB-NEXT: add a0, a0, a1
976968
; RV64IMZB-NEXT: ret
977969
%1 = sdiv i16 %a, -15

llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,8 @@ define i8 @srli_i8(i8 %a) nounwind {
492492
ret i8 %1
493493
}
494494

495+
; We could use sext.b+srai, but slli+srai offers more opportunities for
496+
; comppressed instructions.
495497
define i8 @srai_i8(i8 %a) nounwind {
496498
; RV32I-LABEL: srai_i8:
497499
; RV32I: # %bb.0:
@@ -501,8 +503,8 @@ define i8 @srai_i8(i8 %a) nounwind {
501503
;
502504
; RV32ZBB-LABEL: srai_i8:
503505
; RV32ZBB: # %bb.0:
504-
; RV32ZBB-NEXT: sext.b a0, a0
505-
; RV32ZBB-NEXT: srai a0, a0, 5
506+
; RV32ZBB-NEXT: slli a0, a0, 24
507+
; RV32ZBB-NEXT: srai a0, a0, 29
506508
; RV32ZBB-NEXT: ret
507509
;
508510
; RV32ZBP-LABEL: srai_i8:
@@ -538,6 +540,8 @@ define i16 @srli_i16(i16 %a) nounwind {
538540
ret i16 %1
539541
}
540542

543+
; We could use sext.h+srai, but slli+srai offers more opportunities for
544+
; comppressed instructions.
541545
define i16 @srai_i16(i16 %a) nounwind {
542546
; RV32I-LABEL: srai_i16:
543547
; RV32I: # %bb.0:
@@ -547,8 +551,8 @@ define i16 @srai_i16(i16 %a) nounwind {
547551
;
548552
; RV32ZBB-LABEL: srai_i16:
549553
; RV32ZBB: # %bb.0:
550-
; RV32ZBB-NEXT: sext.h a0, a0
551-
; RV32ZBB-NEXT: srai a0, a0, 9
554+
; RV32ZBB-NEXT: slli a0, a0, 16
555+
; RV32ZBB-NEXT: srai a0, a0, 25
552556
; RV32ZBB-NEXT: ret
553557
;
554558
; RV32ZBP-LABEL: srai_i16:

llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,8 @@ define i8 @srli_i8(i8 %a) nounwind {
563563
ret i8 %1
564564
}
565565

566+
; We could use sext.b+srai, but slli+srai offers more opportunities for
567+
; comppressed instructions.
566568
define i8 @srai_i8(i8 %a) nounwind {
567569
; RV64I-LABEL: srai_i8:
568570
; RV64I: # %bb.0:
@@ -572,8 +574,8 @@ define i8 @srai_i8(i8 %a) nounwind {
572574
;
573575
; RV64ZBB-LABEL: srai_i8:
574576
; RV64ZBB: # %bb.0:
575-
; RV64ZBB-NEXT: sext.b a0, a0
576-
; RV64ZBB-NEXT: srai a0, a0, 5
577+
; RV64ZBB-NEXT: slli a0, a0, 56
578+
; RV64ZBB-NEXT: srai a0, a0, 61
577579
; RV64ZBB-NEXT: ret
578580
;
579581
; RV64ZBP-LABEL: srai_i8:
@@ -609,6 +611,8 @@ define i16 @srli_i16(i16 %a) nounwind {
609611
ret i16 %1
610612
}
611613

614+
; We could use sext.h+srai, but slli+srai offers more opportunities for
615+
; comppressed instructions.
612616
define i16 @srai_i16(i16 %a) nounwind {
613617
; RV64I-LABEL: srai_i16:
614618
; RV64I: # %bb.0:
@@ -618,8 +622,8 @@ define i16 @srai_i16(i16 %a) nounwind {
618622
;
619623
; RV64ZBB-LABEL: srai_i16:
620624
; RV64ZBB: # %bb.0:
621-
; RV64ZBB-NEXT: sext.h a0, a0
622-
; RV64ZBB-NEXT: srai a0, a0, 9
625+
; RV64ZBB-NEXT: slli a0, a0, 48
626+
; RV64ZBB-NEXT: srai a0, a0, 57
623627
; RV64ZBB-NEXT: ret
624628
;
625629
; RV64ZBP-LABEL: srai_i16:

0 commit comments

Comments
 (0)