Skip to content

Commit 6882a30

Browse files
authored
[RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. (#148076)
These were removed in #147830 due to ignoring that these instructions operate on bytes. This patch adds them back with tests including a test for the byte boundary issue. I seperated out the commits to show bad optimization if we don't round Bits to the nearest byte.
1 parent 535d691 commit 6882a30

File tree

2 files changed

+191
-2
lines changed

2 files changed

+191
-2
lines changed

llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
323323
Worklist.push_back(std::make_pair(UserMI, Bits));
324324
break;
325325

326+
case RISCV::BREV8:
327+
case RISCV::ORC_B:
328+
// BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
329+
Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8)));
330+
break;
331+
326332
case RISCV::PseudoCCMOVGPR:
327333
case RISCV::PseudoCCMOVGPRNoX0:
328334
// Either operand 4 or operand 5 is returned by this instruction. If

llvm/test/CodeGen/RISCV/sextw-removal.ll

Lines changed: 185 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \
33
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I
4-
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
4+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
55
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB
6-
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
6+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
77
; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL
88

99
define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
@@ -1499,3 +1499,186 @@ bb7: ; preds = %bb2
14991499
}
15001500

15011501
declare i32 @llvm.riscv.vmv.x.s.nxv1i32( <vscale x 1 x i32>)
1502+
1503+
; Test that we can look through brev8 in hasAllNBitUsers.
1504+
define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3) {
1505+
; RV64I-LABEL: test21:
1506+
; RV64I: # %bb.0: # %entry
1507+
; RV64I-NEXT: addi a2, a2, -1
1508+
; RV64I-NEXT: lui a3, 61681
1509+
; RV64I-NEXT: lui a4, 209715
1510+
; RV64I-NEXT: addi a3, a3, -241
1511+
; RV64I-NEXT: addi a4, a4, 819
1512+
; RV64I-NEXT: slli a5, a3, 32
1513+
; RV64I-NEXT: add a3, a3, a5
1514+
; RV64I-NEXT: slli a5, a4, 32
1515+
; RV64I-NEXT: add a4, a4, a5
1516+
; RV64I-NEXT: li a5, 256
1517+
; RV64I-NEXT: .LBB25_1: # %bb2
1518+
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
1519+
; RV64I-NEXT: srli a6, a0, 4
1520+
; RV64I-NEXT: and a0, a0, a3
1521+
; RV64I-NEXT: and a6, a6, a3
1522+
; RV64I-NEXT: slli a0, a0, 4
1523+
; RV64I-NEXT: or a0, a6, a0
1524+
; RV64I-NEXT: srli a6, a0, 2
1525+
; RV64I-NEXT: and a0, a0, a4
1526+
; RV64I-NEXT: and a6, a6, a4
1527+
; RV64I-NEXT: slli a0, a0, 2
1528+
; RV64I-NEXT: or a0, a6, a0
1529+
; RV64I-NEXT: andi a6, a0, 65
1530+
; RV64I-NEXT: srli a0, a0, 1
1531+
; RV64I-NEXT: slli a6, a6, 1
1532+
; RV64I-NEXT: andi a0, a0, 1104
1533+
; RV64I-NEXT: or a0, a0, a6
1534+
; RV64I-NEXT: addi a2, a2, 1
1535+
; RV64I-NEXT: add a0, a0, a1
1536+
; RV64I-NEXT: bltu a2, a5, .LBB25_1
1537+
; RV64I-NEXT: # %bb.2: # %bb7
1538+
; RV64I-NEXT: sext.w a0, a0
1539+
; RV64I-NEXT: ret
1540+
;
1541+
; RV64ZBB-LABEL: test21:
1542+
; RV64ZBB: # %bb.0: # %entry
1543+
; RV64ZBB-NEXT: addi a2, a2, -1
1544+
; RV64ZBB-NEXT: li a3, 256
1545+
; RV64ZBB-NEXT: .LBB25_1: # %bb2
1546+
; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
1547+
; RV64ZBB-NEXT: brev8 a0, a0
1548+
; RV64ZBB-NEXT: andi a0, a0, 1234
1549+
; RV64ZBB-NEXT: addi a2, a2, 1
1550+
; RV64ZBB-NEXT: addw a0, a0, a1
1551+
; RV64ZBB-NEXT: bltu a2, a3, .LBB25_1
1552+
; RV64ZBB-NEXT: # %bb.2: # %bb7
1553+
; RV64ZBB-NEXT: ret
1554+
;
1555+
; NOREMOVAL-LABEL: test21:
1556+
; NOREMOVAL: # %bb.0: # %entry
1557+
; NOREMOVAL-NEXT: addi a2, a2, -1
1558+
; NOREMOVAL-NEXT: li a3, 256
1559+
; NOREMOVAL-NEXT: .LBB25_1: # %bb2
1560+
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
1561+
; NOREMOVAL-NEXT: brev8 a0, a0
1562+
; NOREMOVAL-NEXT: andi a0, a0, 1234
1563+
; NOREMOVAL-NEXT: addi a2, a2, 1
1564+
; NOREMOVAL-NEXT: add a0, a0, a1
1565+
; NOREMOVAL-NEXT: bltu a2, a3, .LBB25_1
1566+
; NOREMOVAL-NEXT: # %bb.2: # %bb7
1567+
; NOREMOVAL-NEXT: sext.w a0, a0
1568+
; NOREMOVAL-NEXT: ret
1569+
entry:
1570+
br label %bb2
1571+
1572+
bb2: ; preds = %bb2, %entry
1573+
%i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
1574+
%i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
1575+
%i3 = add i64 %i2, 1
1576+
%bswap = call i64 @llvm.bswap.i64(i64 %i1)
1577+
%bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
1578+
%i4 = and i64 %bitreverse, 1234
1579+
%i5 = add i64 %i4, %arg2
1580+
%i6 = icmp ugt i64 %i2, 255
1581+
br i1 %i6, label %bb7, label %bb2
1582+
1583+
bb7: ; preds = %bb2
1584+
%i7 = trunc i64 %i5 to i32
1585+
ret i32 %i7
1586+
}
1587+
1588+
; Negative test for looking through brev8. Make sure we consider that it works
1589+
; on bytes.
1590+
define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3) {
1591+
; RV64I-LABEL: test22:
1592+
; RV64I: # %bb.0: # %entry
1593+
; RV64I-NEXT: addi a2, a2, -1
1594+
; RV64I-NEXT: lui a3, %hi(.LCPI26_0)
1595+
; RV64I-NEXT: lui a4, %hi(.LCPI26_1)
1596+
; RV64I-NEXT: lui a5, %hi(.LCPI26_2)
1597+
; RV64I-NEXT: lui a6, %hi(.LCPI26_3)
1598+
; RV64I-NEXT: li a7, 69
1599+
; RV64I-NEXT: ld a3, %lo(.LCPI26_0)(a3)
1600+
; RV64I-NEXT: ld a4, %lo(.LCPI26_1)(a4)
1601+
; RV64I-NEXT: ld a5, %lo(.LCPI26_2)(a5)
1602+
; RV64I-NEXT: ld a6, %lo(.LCPI26_3)(a6)
1603+
; RV64I-NEXT: slli a7, a7, 32
1604+
; RV64I-NEXT: li t0, 65
1605+
; RV64I-NEXT: slli t0, t0, 28
1606+
; RV64I-NEXT: li t1, 256
1607+
; RV64I-NEXT: .LBB26_1: # %bb2
1608+
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
1609+
; RV64I-NEXT: slli t2, a0, 11
1610+
; RV64I-NEXT: slli a0, a0, 3
1611+
; RV64I-NEXT: and t2, t2, a3
1612+
; RV64I-NEXT: and a0, a0, a4
1613+
; RV64I-NEXT: or a0, a0, t2
1614+
; RV64I-NEXT: srli t2, a0, 2
1615+
; RV64I-NEXT: and a0, a0, a6
1616+
; RV64I-NEXT: and t2, t2, a5
1617+
; RV64I-NEXT: slli a0, a0, 2
1618+
; RV64I-NEXT: or a0, t2, a0
1619+
; RV64I-NEXT: srli t2, a0, 1
1620+
; RV64I-NEXT: and a0, a0, t0
1621+
; RV64I-NEXT: and t2, t2, a7
1622+
; RV64I-NEXT: slli a0, a0, 1
1623+
; RV64I-NEXT: or a0, t2, a0
1624+
; RV64I-NEXT: srli a0, a0, 28
1625+
; RV64I-NEXT: addi a2, a2, 1
1626+
; RV64I-NEXT: add a0, a0, a1
1627+
; RV64I-NEXT: bltu a2, t1, .LBB26_1
1628+
; RV64I-NEXT: # %bb.2: # %bb7
1629+
; RV64I-NEXT: sext.w a0, a0
1630+
; RV64I-NEXT: ret
1631+
;
1632+
; RV64ZBB-LABEL: test22:
1633+
; RV64ZBB: # %bb.0: # %entry
1634+
; RV64ZBB-NEXT: addi a2, a2, -1
1635+
; RV64ZBB-NEXT: li a3, 256
1636+
; RV64ZBB-NEXT: .LBB26_1: # %bb2
1637+
; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
1638+
; RV64ZBB-NEXT: slli a0, a0, 7
1639+
; RV64ZBB-NEXT: brev8 a0, a0
1640+
; RV64ZBB-NEXT: srli a0, a0, 28
1641+
; RV64ZBB-NEXT: andi a0, a0, 1234
1642+
; RV64ZBB-NEXT: addi a2, a2, 1
1643+
; RV64ZBB-NEXT: add a0, a0, a1
1644+
; RV64ZBB-NEXT: bltu a2, a3, .LBB26_1
1645+
; RV64ZBB-NEXT: # %bb.2: # %bb7
1646+
; RV64ZBB-NEXT: sext.w a0, a0
1647+
; RV64ZBB-NEXT: ret
1648+
;
1649+
; NOREMOVAL-LABEL: test22:
1650+
; NOREMOVAL: # %bb.0: # %entry
1651+
; NOREMOVAL-NEXT: addi a2, a2, -1
1652+
; NOREMOVAL-NEXT: li a3, 256
1653+
; NOREMOVAL-NEXT: .LBB26_1: # %bb2
1654+
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
1655+
; NOREMOVAL-NEXT: slli a0, a0, 7
1656+
; NOREMOVAL-NEXT: brev8 a0, a0
1657+
; NOREMOVAL-NEXT: srli a0, a0, 28
1658+
; NOREMOVAL-NEXT: andi a0, a0, 1234
1659+
; NOREMOVAL-NEXT: addi a2, a2, 1
1660+
; NOREMOVAL-NEXT: add a0, a0, a1
1661+
; NOREMOVAL-NEXT: bltu a2, a3, .LBB26_1
1662+
; NOREMOVAL-NEXT: # %bb.2: # %bb7
1663+
; NOREMOVAL-NEXT: sext.w a0, a0
1664+
; NOREMOVAL-NEXT: ret
1665+
entry:
1666+
br label %bb2
1667+
1668+
bb2: ; preds = %bb2, %entry
1669+
%i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
1670+
%i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
1671+
%i3 = add i64 %i2, 1
1672+
%shl = shl i64 %i1, 7
1673+
%bswap = call i64 @llvm.bswap.i64(i64 %shl)
1674+
%bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
1675+
%lshr = lshr i64 %bitreverse, 28
1676+
%i4 = and i64 %lshr, 1234
1677+
%i5 = add i64 %i4, %arg2
1678+
%i6 = icmp ugt i64 %i2, 255
1679+
br i1 %i6, label %bb7, label %bb2
1680+
1681+
bb7: ; preds = %bb2
1682+
%i7 = trunc i64 %i5 to i32
1683+
ret i32 %i7
1684+
}

0 commit comments

Comments
 (0)