Skip to content

Commit ed8dffe

Browse files
committed
[X86] getFauxShuffle - don't assume an UNDEF src element for AND/ANDNP results in an UNDEF shuffle mask index
The other src element might be zero, guaranteeing zero. Fixes #55157
1 parent ae81435 commit ed8dffe

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8041,11 +8041,11 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
80418041
uint64_t ZeroMask = IsAndN ? 255 : 0;
80428042
if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
80438043
return false;
8044+
// We can't assume an undef src element gives an undef dst - the other src
8045+
// might be zero.
8046+
if (!UndefElts.isZero())
8047+
return false;
80448048
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
8045-
if (UndefElts[i]) {
8046-
Mask.push_back(SM_SentinelUndef);
8047-
continue;
8048-
}
80498049
const APInt &ByteBits = EltBits[i];
80508050
if (ByteBits != 0 && ByteBits != 255)
80518051
return false;

llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3333,11 +3333,17 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
33333333
ret void
33343334
}
33353335

3336-
; FIXME: getFauxShuffle AND/ANDN decoding wrongly assumes an undef src always gives an undef dst.
3336+
; getFauxShuffle AND/ANDN decoding wrongly assumed an undef src always gives an undef dst.
33373337
define <2 x i64> @PR55157(<16 x i8>* %0) {
3338-
; CHECK-LABEL: PR55157:
3339-
; CHECK: # %bb.0:
3340-
; CHECK-NEXT: retq
3338+
; SSE-LABEL: PR55157:
3339+
; SSE: # %bb.0:
3340+
; SSE-NEXT: xorps %xmm0, %xmm0
3341+
; SSE-NEXT: retq
3342+
;
3343+
; AVX-LABEL: PR55157:
3344+
; AVX: # %bb.0:
3345+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
3346+
; AVX-NEXT: retq
33413347
%2 = load <16 x i8>, <16 x i8>* %0, align 16
33423348
%3 = icmp eq <16 x i8> %2, zeroinitializer
33433349
%4 = tail call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> zeroinitializer)

0 commit comments

Comments
 (0)