Skip to content

Commit 1a60c74

Browse files
authored
[CostModel][X86] SK_InsertSubvector inserted into the lowest subvector should be treated as SK_Select blend (#145892)
X86 uses implicit widening and BLEND/MOV shuffles in these cases - otherwise we still treat it as a SK_PermuteTwoSrc
1 parent 06a4394 commit 1a60c74

File tree

4 files changed

+20
-38
lines changed

4 files changed

+20
-38
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,8 +1676,10 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
16761676
SubLT.second == MVT::f32 && (Index == 0 || ST->hasSSE41()))
16771677
return 1;
16781678

1679-
// If the insertion isn't aligned, treat it like a 2-op shuffle.
1680-
Kind = TTI::SK_PermuteTwoSrc;
1679+
// If the insertion is the lowest subvector then it will be blended
1680+
// otherwise treat it like a 2-op shuffle.
1681+
Kind =
1682+
(Index == 0 && LT.first == 1) ? TTI::SK_Select : TTI::SK_PermuteTwoSrc;
16811683
}
16821684

16831685
// Handle some common (illegal) sub-vector types as they are often very cheap

llvm/test/Transforms/PhaseOrdering/X86/hadd.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,10 @@ define <16 x i16> @add_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) {
290290
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
291291
;
292292
; SSE4-LABEL: @add_v16i16_0123u56789uBCDEF(
293-
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
294-
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
295-
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
293+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
296294
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
297-
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 26, i32 poison, i32 30, i32 poison, i32 poison, i32 poison, i32 poison>
298295
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 27, i32 poison, i32 31, i32 poison, i32 poison, i32 poison, i32 poison>
299-
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 24, i32 26, i32 28, i32 30>
296+
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 8, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30>
300297
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 25, i32 27, i32 29, i32 31>
301298
; SSE4-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
302299
; SSE4-NEXT: ret <16 x i16> [[TMP9]]
@@ -404,11 +401,9 @@ define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
404401
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
405402
;
406403
; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0(
407-
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
408-
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
409-
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
404+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
410405
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
411-
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 28, i32 poison, i32 poison, i32 poison, i32 poison>
406+
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
412407
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
413408
; SSE4-NEXT: [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
414409
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>

llvm/test/Transforms/PhaseOrdering/X86/hsub.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,10 @@ define <16 x i16> @sub_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) {
290290
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
291291
;
292292
; SSE4-LABEL: @sub_v16i16_0123u56789uBCDEF(
293-
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
294-
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
295-
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
293+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
296294
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
297-
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 26, i32 poison, i32 30, i32 poison, i32 poison, i32 poison, i32 poison>
298295
; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 27, i32 poison, i32 31, i32 poison, i32 poison, i32 poison, i32 poison>
299-
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 24, i32 26, i32 28, i32 30>
296+
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 18, i32 20, i32 22, i32 8, i32 10, i32 poison, i32 14, i32 24, i32 26, i32 28, i32 30>
300297
; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 7, i32 8, i32 9, i32 poison, i32 11, i32 25, i32 27, i32 29, i32 31>
301298
; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]]
302299
; SSE4-NEXT: ret <16 x i16> [[TMP9]]
@@ -398,11 +395,9 @@ define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
398395
; SSE2-NEXT: ret <16 x i16> [[RESULT]]
399396
;
400397
; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0(
401-
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
402-
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
403-
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
398+
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
404399
; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
405-
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 28, i32 poison, i32 poison, i32 poison, i32 poison>
400+
; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
406401
; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
407402
; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
408403
; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>

llvm/test/Transforms/SLPVectorizer/X86/extracts-non-extendable.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,16 @@ define void @test(i64 %v) {
77
; CHECK-NEXT: [[BB:.*:]]
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[V]], i32 1
99
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> zeroinitializer, [[TMP0]]
10-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
11-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 0, [[TMP2]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 0, 0
13-
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP3]], [[TMP4]]
14-
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 0, 0
15-
; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP5]], [[TMP6]]
16-
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 0, 0
17-
; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP7]], [[TMP8]]
18-
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], false
19-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 0, [[TMP2]]
20-
; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]]
21-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 0, 0
22-
; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP12]], [[TMP13]]
23-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 0, 0
24-
; CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]]
25-
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 0, 0
26-
; CHECK-NEXT: [[TMP18:%.*]] = and i1 [[TMP16]], [[TMP17]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison>
11+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> <i64 poison, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0>, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 13, i32 14, i32 15>
12+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i64> zeroinitializer, [[TMP3]]
2713
; CHECK-NEXT: [[TMP19:%.*]] = icmp ult i64 0, 0
14+
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
15+
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
2816
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 [[TMP18]], i1 false
29-
; CHECK-NEXT: br i1 [[TMP20]], label %[[BB_I107_PREHEADER:.*]], label %[[BB_I27_I_PREHEADER:.*]]
17+
; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP20]]
18+
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 false, i1 false
19+
; CHECK-NEXT: br i1 [[OP_RDX1]], label %[[BB_I107_PREHEADER:.*]], label %[[BB_I27_I_PREHEADER:.*]]
3020
; CHECK: [[BB_I107_PREHEADER]]:
3121
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
3222
; CHECK-NEXT: [[DOTSROA_1278_10_EXTRACT_SHIFT83_I1622_1:%.*]] = xor i64 0, [[TMP21]]

0 commit comments

Comments
 (0)