Skip to content

Commit ada0356

Browse files
committed
[X86] Extend all_of(icmp_eq()) / any_of(icmp_ne()) -> scalar integer fold to AVX512 targets
Extends 1bb95a3 to combine on AVX512 targets where the vXi1 type is legal Continues work on addressing Issue llvm#53419
1 parent 299f3ac commit ada0356

File tree

2 files changed

+36
-86
lines changed

2 files changed

+36
-86
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44646,6 +44646,23 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
4464644646
// Special case for (pre-legalization) vXi1 reductions.
4464744647
if (NumElts > 64 || !isPowerOf2_32(NumElts))
4464844648
return SDValue();
44649+
if (Match.getOpcode() == ISD::SETCC) {
44650+
ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
44651+
if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
44652+
(BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
44653+
// If representable as a scalar integer:
44654+
// For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.
44655+
// For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.
44656+
EVT VecVT = Match.getOperand(0).getValueType();
44657+
EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits());
44658+
if (TLI.isTypeLegal(IntVT)) {
44659+
SDValue LHS = DAG.getFreeze(Match.getOperand(0));
44660+
SDValue RHS = DAG.getFreeze(Match.getOperand(1));
44661+
return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS),
44662+
DAG.getBitcast(IntVT, RHS), CC);
44663+
}
44664+
}
44665+
}
4464944666
if (TLI.isTypeLegal(MatchVT)) {
4465044667
// If this is a legal AVX512 predicate type then we can just bitcast.
4465144668
EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts);
@@ -44657,20 +44674,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
4465744674
ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
4465844675
if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
4465944676
(BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
44660-
EVT VecVT = Match.getOperand(0).getValueType();
44661-
44662-
// If representable as a scalar integer:
44663-
// For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.
44664-
// For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.
44665-
EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits());
44666-
if (TLI.isTypeLegal(IntVT)) {
44667-
SDValue LHS = DAG.getFreeze(Match.getOperand(0));
44668-
SDValue RHS = DAG.getFreeze(Match.getOperand(1));
44669-
return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS),
44670-
DAG.getBitcast(IntVT, RHS), CC);
44671-
}
44672-
44673-
EVT VecSVT = VecVT.getScalarType();
44677+
EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
4467444678
if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) {
4467544679
NumElts *= VecSVT.getSizeInBits() / 8;
4467644680
EVT CmpVT = EVT::getVectorVT(Ctx, MVT::i8, NumElts);

llvm/test/CodeGen/X86/pr53419.ll

Lines changed: 18 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,12 @@ declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
1313
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
1414

1515
define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
16-
; SSE-LABEL: intrinsic_v2i8:
17-
; SSE: # %bb.0: # %bb
18-
; SSE-NEXT: movzwl (%rdi), %eax
19-
; SSE-NEXT: cmpw %ax, (%rsi)
20-
; SSE-NEXT: sete %al
21-
; SSE-NEXT: retq
22-
;
23-
; AVX-LABEL: intrinsic_v2i8:
24-
; AVX: # %bb.0: # %bb
25-
; AVX-NEXT: movzwl (%rdi), %eax
26-
; AVX-NEXT: cmpw %ax, (%rsi)
27-
; AVX-NEXT: sete %al
28-
; AVX-NEXT: retq
29-
;
30-
; AVX512-LABEL: intrinsic_v2i8:
31-
; AVX512: # %bb.0: # %bb
32-
; AVX512-NEXT: movzwl (%rsi), %eax
33-
; AVX512-NEXT: vmovd %eax, %xmm0
34-
; AVX512-NEXT: movzwl (%rdi), %eax
35-
; AVX512-NEXT: vmovd %eax, %xmm1
36-
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
37-
; AVX512-NEXT: knotw %k0, %k0
38-
; AVX512-NEXT: kmovd %k0, %eax
39-
; AVX512-NEXT: testb $3, %al
40-
; AVX512-NEXT: sete %al
41-
; AVX512-NEXT: retq
16+
; X64-LABEL: intrinsic_v2i8:
17+
; X64: # %bb.0: # %bb
18+
; X64-NEXT: movzwl (%rdi), %eax
19+
; X64-NEXT: cmpw %ax, (%rsi)
20+
; X64-NEXT: sete %al
21+
; X64-NEXT: retq
4222
;
4323
; X86-LABEL: intrinsic_v2i8:
4424
; X86: # %bb.0: # %bb
@@ -57,30 +37,12 @@ bb:
5737
}
5838

5939
define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
60-
; SSE-LABEL: intrinsic_v4i8:
61-
; SSE: # %bb.0: # %bb
62-
; SSE-NEXT: movl (%rdi), %eax
63-
; SSE-NEXT: cmpl %eax, (%rsi)
64-
; SSE-NEXT: sete %al
65-
; SSE-NEXT: retq
66-
;
67-
; AVX-LABEL: intrinsic_v4i8:
68-
; AVX: # %bb.0: # %bb
69-
; AVX-NEXT: movl (%rdi), %eax
70-
; AVX-NEXT: cmpl %eax, (%rsi)
71-
; AVX-NEXT: sete %al
72-
; AVX-NEXT: retq
73-
;
74-
; AVX512-LABEL: intrinsic_v4i8:
75-
; AVX512: # %bb.0: # %bb
76-
; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
77-
; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
78-
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
79-
; AVX512-NEXT: knotw %k0, %k0
80-
; AVX512-NEXT: kmovd %k0, %eax
81-
; AVX512-NEXT: testb $15, %al
82-
; AVX512-NEXT: sete %al
83-
; AVX512-NEXT: retq
40+
; X64-LABEL: intrinsic_v4i8:
41+
; X64: # %bb.0: # %bb
42+
; X64-NEXT: movl (%rdi), %eax
43+
; X64-NEXT: cmpl %eax, (%rsi)
44+
; X64-NEXT: sete %al
45+
; X64-NEXT: retq
8446
;
8547
; X86-LABEL: intrinsic_v4i8:
8648
; X86: # %bb.0: # %bb
@@ -99,28 +61,12 @@ bb:
9961
}
10062

10163
define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
102-
; SSE-LABEL: intrinsic_v8i8:
103-
; SSE: # %bb.0: # %bb
104-
; SSE-NEXT: movq (%rdi), %rax
105-
; SSE-NEXT: cmpq %rax, (%rsi)
106-
; SSE-NEXT: sete %al
107-
; SSE-NEXT: retq
108-
;
109-
; AVX-LABEL: intrinsic_v8i8:
110-
; AVX: # %bb.0: # %bb
111-
; AVX-NEXT: movq (%rdi), %rax
112-
; AVX-NEXT: cmpq %rax, (%rsi)
113-
; AVX-NEXT: sete %al
114-
; AVX-NEXT: retq
115-
;
116-
; AVX512-LABEL: intrinsic_v8i8:
117-
; AVX512: # %bb.0: # %bb
118-
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
119-
; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
120-
; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
121-
; AVX512-NEXT: kortestb %k0, %k0
122-
; AVX512-NEXT: setb %al
123-
; AVX512-NEXT: retq
64+
; X64-LABEL: intrinsic_v8i8:
65+
; X64: # %bb.0: # %bb
66+
; X64-NEXT: movq (%rdi), %rax
67+
; X64-NEXT: cmpq %rax, (%rsi)
68+
; X64-NEXT: sete %al
69+
; X64-NEXT: retq
12470
;
12571
; X86-LABEL: intrinsic_v8i8:
12672
; X86: # %bb.0: # %bb

0 commit comments

Comments
 (0)