Skip to content

Commit b481e85

Browse files
davemgreenmemfrob
authored andcommitted
[ARM] Fix for matching reductions that are both sext and zext.
Fix a silly mistake that was not making sure that _both_ operands were the correct extend code.
1 parent 9dc92cc commit b481e85

File tree

3 files changed

+73
-5
lines changed

3 files changed

+73
-5
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16063,7 +16063,7 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
1606316063
return false;
1606416064
SDValue ExtA = Mul->getOperand(0);
1606516065
SDValue ExtB = Mul->getOperand(1);
16066-
if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
16066+
if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
1606716067
return false;
1606816068
A = ExtA->getOperand(0);
1606916069
B = ExtB->getOperand(0);
@@ -16097,7 +16097,7 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
1609716097
return false;
1609816098
SDValue ExtA = Mul->getOperand(0);
1609916099
SDValue ExtB = Mul->getOperand(1);
16100-
if (ExtA->getOpcode() != ExtendCode && ExtB->getOpcode() != ExtendCode)
16100+
if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
1610116101
return false;
1610216102
A = ExtA->getOperand(0);
1610316103
B = ExtB->getOperand(0);

llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,22 @@ entry:
695695
define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_szext(<16 x i8> %x, <16 x i8> %y) {
696696
; CHECK-LABEL: add_v16i8_v16i16_szext:
697697
; CHECK: @ %bb.0: @ %entry
698-
; CHECK-NEXT: vmlav.s8 r0, q0, q1
698+
; CHECK-NEXT: .pad #32
699+
; CHECK-NEXT: sub sp, #32
700+
; CHECK-NEXT: add r0, sp, #16
701+
; CHECK-NEXT: mov r1, sp
702+
; CHECK-NEXT: vstrw.32 q1, [r0]
703+
; CHECK-NEXT: vstrw.32 q0, [r1]
704+
; CHECK-NEXT: vldrb.u16 q0, [r0, #8]
705+
; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
706+
; CHECK-NEXT: vldrb.s16 q2, [r1]
707+
; CHECK-NEXT: vmul.i16 q0, q1, q0
708+
; CHECK-NEXT: vldrb.u16 q1, [r0]
709+
; CHECK-NEXT: vmul.i16 q1, q2, q1
710+
; CHECK-NEXT: vadd.i16 q0, q1, q0
711+
; CHECK-NEXT: vaddv.u16 r0, q0
699712
; CHECK-NEXT: sxth r0, r0
713+
; CHECK-NEXT: add sp, #32
700714
; CHECK-NEXT: bx lr
701715
entry:
702716
%xx = sext <16 x i8> %x to <16 x i16>

llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,9 +1112,63 @@ entry:
11121112
define arm_aapcs_vfpcc signext i16 @add_v16i8_v16i16_szext(<16 x i8> %x, <16 x i8> %y, <16 x i8> %b) {
11131113
; CHECK-LABEL: add_v16i8_v16i16_szext:
11141114
; CHECK: @ %bb.0: @ %entry
1115-
; CHECK-NEXT: vpt.i8 eq, q2, zr
1116-
; CHECK-NEXT: vmlavt.s8 r0, q0, q1
1115+
; CHECK-NEXT: .pad #32
1116+
; CHECK-NEXT: sub sp, #32
1117+
; CHECK-NEXT: add r0, sp, #16
1118+
; CHECK-NEXT: mov r1, sp
1119+
; CHECK-NEXT: vstrw.32 q1, [r0]
1120+
; CHECK-NEXT: vstrw.32 q0, [r1]
1121+
; CHECK-NEXT: vcmp.i8 eq, q2, zr
1122+
; CHECK-NEXT: vmov.i8 q0, #0x0
1123+
; CHECK-NEXT: vmov.i8 q1, #0xff
1124+
; CHECK-NEXT: vldrb.u16 q2, [r0]
1125+
; CHECK-NEXT: vpsel q0, q1, q0
1126+
; CHECK-NEXT: vldrb.s16 q3, [r1]
1127+
; CHECK-NEXT: vmov.u8 r2, q0[0]
1128+
; CHECK-NEXT: vmov.16 q1[0], r2
1129+
; CHECK-NEXT: vmov.u8 r2, q0[1]
1130+
; CHECK-NEXT: vmov.16 q1[1], r2
1131+
; CHECK-NEXT: vmov.u8 r2, q0[2]
1132+
; CHECK-NEXT: vmov.16 q1[2], r2
1133+
; CHECK-NEXT: vmov.u8 r2, q0[3]
1134+
; CHECK-NEXT: vmov.16 q1[3], r2
1135+
; CHECK-NEXT: vmov.u8 r2, q0[4]
1136+
; CHECK-NEXT: vmov.16 q1[4], r2
1137+
; CHECK-NEXT: vmov.u8 r2, q0[5]
1138+
; CHECK-NEXT: vmov.16 q1[5], r2
1139+
; CHECK-NEXT: vmov.u8 r2, q0[6]
1140+
; CHECK-NEXT: vmov.16 q1[6], r2
1141+
; CHECK-NEXT: vmov.u8 r2, q0[7]
1142+
; CHECK-NEXT: vmov.16 q1[7], r2
1143+
; CHECK-NEXT: vmov.u8 r2, q0[8]
1144+
; CHECK-NEXT: vcmp.i16 ne, q1, zr
1145+
; CHECK-NEXT: vmov.i32 q1, #0x0
1146+
; CHECK-NEXT: vpst
1147+
; CHECK-NEXT: vmult.i16 q1, q3, q2
1148+
; CHECK-NEXT: vmov.16 q2[0], r2
1149+
; CHECK-NEXT: vmov.u8 r2, q0[9]
1150+
; CHECK-NEXT: vmov.16 q2[1], r2
1151+
; CHECK-NEXT: vmov.u8 r2, q0[10]
1152+
; CHECK-NEXT: vmov.16 q2[2], r2
1153+
; CHECK-NEXT: vmov.u8 r2, q0[11]
1154+
; CHECK-NEXT: vmov.16 q2[3], r2
1155+
; CHECK-NEXT: vmov.u8 r2, q0[12]
1156+
; CHECK-NEXT: vmov.16 q2[4], r2
1157+
; CHECK-NEXT: vmov.u8 r2, q0[13]
1158+
; CHECK-NEXT: vmov.16 q2[5], r2
1159+
; CHECK-NEXT: vmov.u8 r2, q0[14]
1160+
; CHECK-NEXT: vmov.16 q2[6], r2
1161+
; CHECK-NEXT: vmov.u8 r2, q0[15]
1162+
; CHECK-NEXT: vmov.16 q2[7], r2
1163+
; CHECK-NEXT: vldrb.u16 q0, [r0, #8]
1164+
; CHECK-NEXT: vcmp.i16 ne, q2, zr
1165+
; CHECK-NEXT: vldrb.s16 q2, [r1, #8]
1166+
; CHECK-NEXT: vmul.i16 q0, q2, q0
1167+
; CHECK-NEXT: vpst
1168+
; CHECK-NEXT: vaddt.i16 q1, q1, q0
1169+
; CHECK-NEXT: vaddv.u16 r0, q1
11171170
; CHECK-NEXT: sxth r0, r0
1171+
; CHECK-NEXT: add sp, #32
11181172
; CHECK-NEXT: bx lr
11191173
entry:
11201174
%c = icmp eq <16 x i8> %b, zeroinitializer

0 commit comments

Comments
 (0)