Skip to content

Commit 35a3925

Browse files
authored
[X86] widenSubVector - widen from smaller build vector if the upper elements are already the same padding elements (#122445)
Further simplifies some shuffle masks to help additional combines
1 parent 24bb180 commit 35a3925

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4144,9 +4144,20 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
41444144
static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
41454145
const X86Subtarget &Subtarget, SelectionDAG &DAG,
41464146
const SDLoc &dl) {
4147-
assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() &&
4148-
Vec.getValueType().getScalarType() == VT.getScalarType() &&
4147+
EVT VecVT = Vec.getValueType();
4148+
assert(VecVT.getFixedSizeInBits() <= VT.getFixedSizeInBits() &&
4149+
VecVT.getScalarType() == VT.getScalarType() &&
41494150
"Unsupported vector widening type");
4151+
// If the upper 128-bits of a build vector are already undef/zero, then try to
4152+
// widen from the lower 128-bits.
4153+
if (Vec.getOpcode() == ISD::BUILD_VECTOR && VecVT.is256BitVector()) {
4154+
unsigned NumSrcElts = VecVT.getVectorNumElements();
4155+
ArrayRef<SDUse> Hi = Vec->ops().drop_front(NumSrcElts / 2);
4156+
if (all_of(Hi, [&](SDValue V) {
4157+
return V.isUndef() || (ZeroNewElements && X86::isZeroNode(V));
4158+
}))
4159+
Vec = extract128BitVector(Vec, 0, DAG, dl);
4160+
}
41504161
SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
41514162
: DAG.getUNDEF(VT);
41524163
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,

llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ define <4 x double> @PR34175(ptr %p) {
442442
;
443443
; AVX512BW-LABEL: PR34175:
444444
; AVX512BW: # %bb.0:
445-
; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
445+
; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
446446
; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1
447447
; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2
448448
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
@@ -461,7 +461,7 @@ define <4 x double> @PR34175(ptr %p) {
461461
;
462462
; AVX512VBMI-LABEL: PR34175:
463463
; AVX512VBMI: # %bb.0:
464-
; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40]
464+
; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0]
465465
; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1
466466
; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2
467467
; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,7 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
598598
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1]
599599
; X64-AVX512-NEXT: vpermi2pd %zmm0, %zmm4, %zmm3
600600
; X64-AVX512-NEXT: vmovapd %ymm3, (%rsi)
601-
; X64-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,11,3,11]
602-
; X64-AVX512-NEXT: # ymm3 = mem[0,1,0,1]
601+
; X64-AVX512-NEXT: vmovapd {{.*#+}} xmm3 = [3,11]
603602
; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
604603
; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3]
605604
; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0

0 commit comments

Comments
 (0)