Skip to content

Commit d632705

Browse files
committed
[AArch64] Use PerfectShuffle costs in AArch64TTIImpl::getShuffleCost
Given a shuffle with 4 elements size 16 or 32, we can use the costs directly from the PerfectShuffle tables to get a slightly more accurate cost for the resulting shuffle. Differential Revision: https://reviews.llvm.org/D123409
1 parent c45e4c1 commit d632705

File tree

6 files changed

+69
-47
lines changed

6 files changed

+69
-47
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11593,23 +11593,8 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1159311593

1159411594
if (VT.getVectorNumElements() == 4 &&
1159511595
(VT.is128BitVector() || VT.is64BitVector())) {
11596-
unsigned PFIndexes[4];
11597-
for (unsigned i = 0; i != 4; ++i) {
11598-
if (M[i] < 0)
11599-
PFIndexes[i] = 8;
11600-
else
11601-
PFIndexes[i] = M[i];
11602-
}
11603-
11604-
// Compute the index in the perfect shuffle table.
11605-
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
11606-
PFIndexes[2] * 9 + PFIndexes[3];
11607-
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
11608-
unsigned Cost = (PFEntry >> 30);
11609-
11610-
// The cost tables encode cost 0 or cost 1 shuffles using the value 0 in
11611-
// the top 2 bits.
11612-
if (Cost == 0)
11596+
unsigned Cost = getPerfectShuffleCost(M);
11597+
if (Cost <= 1)
1161311598
return true;
1161411599
}
1161511600

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
1515
#define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
1616

17+
#include "llvm/ADT/ArrayRef.h"
18+
1719
// 31 entries have cost 0
1820
// 730 entries have cost 1
1921
// 3658 entries have cost 2
@@ -6584,4 +6586,36 @@ static const unsigned PerfectShuffleTable[6561 + 1] = {
65846586
835584U, // <u,u,u,u>: Cost 0 copy LHS
65856587
0};
65866588

6589+
static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
6590+
assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");
6591+
6592+
// Special case zero-cost nop copies, from either LHS or RHS.
6593+
if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
6594+
return E.value() < 0 || E.value() == (int)E.index();
6595+
}))
6596+
return 0;
6597+
if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
6598+
return E.value() < 0 || E.value() == (int)E.index() + 4;
6599+
}))
6600+
return 0;
6601+
6602+
// Get the four mask elementd from the 2 inputs. Perfect shuffles encode undef
6603+
// elements with value 8.
6604+
unsigned PFIndexes[4];
6605+
for (unsigned i = 0; i != 4; ++i) {
6606+
assert(M[i] < 8 && "Expected a maximum entry of 8 for shuffle mask");
6607+
if (M[i] < 0)
6608+
PFIndexes[i] = 8;
6609+
else
6610+
PFIndexes[i] = M[i];
6611+
}
6612+
6613+
// Compute the index in the perfect shuffle table.
6614+
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6615+
PFIndexes[2] * 9 + PFIndexes[3];
6616+
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6617+
// And extract the cost from the upper bits. The cost is encoded as Cost-1.
6618+
return (PFEntry >> 30) + 1;
6619+
}
6620+
65876621
#endif

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "AArch64TargetTransformInfo.h"
1010
#include "AArch64ExpandImm.h"
11+
#include "AArch64PerfectShuffle.h"
1112
#include "MCTargetDesc/AArch64AddressingModes.h"
1213
#include "llvm/Analysis/IVDescriptors.h"
1314
#include "llvm/Analysis/LoopInfo.h"
@@ -2597,19 +2598,27 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
25972598
ArrayRef<const Value *> Args) {
25982599
Kind = improveShuffleKindFromMask(Kind, Mask);
25992600
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
2601+
2602+
// Check for broadcast loads.
2603+
if (Kind == TTI::SK_Broadcast) {
2604+
bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
2605+
if (IsLoad && LT.second.isVector() &&
2606+
isLegalBroadcastLoad(Tp->getElementType(),
2607+
LT.second.getVectorElementCount()))
2608+
return 0; // broadcast is handled by ld1r
2609+
}
2610+
2611+
// If we have 4 elements for the shuffle and a Mask, get the cost straight
2612+
// from the perfect shuffle tables.
2613+
if (Mask.size() == 4 && Tp->getElementCount() == ElementCount::getFixed(4) &&
2614+
(Tp->getScalarSizeInBits() == 16 || Tp->getScalarSizeInBits() == 32) &&
2615+
all_of(Mask, [](int E) { return E < 8; }))
2616+
return getPerfectShuffleCost(Mask);
2617+
26002618
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
26012619
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
26022620
Kind == TTI::SK_Reverse) {
26032621

2604-
// Check for broadcast loads.
2605-
if (Kind == TTI::SK_Broadcast) {
2606-
bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
2607-
if (IsLoad && LT.second.isVector() &&
2608-
isLegalBroadcastLoad(Tp->getElementType(),
2609-
LT.second.getVectorElementCount()))
2610-
return 0; // broadcast is handled by ld1r
2611-
}
2612-
26132622
static const CostTblEntry ShuffleTbl[] = {
26142623
// Broadcast shuffle kinds can be performed with 'dup'.
26152624
{ TTI::SK_Broadcast, MVT::v8i8, 1 },

llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ define void @shuffle() {
88
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
99
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
1010
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
1212
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1818
;
1919
%v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
@@ -39,16 +39,16 @@ define void @concat() {
3939
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4040
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4141
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
42-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4343
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4444
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
45-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
45+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4646
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4747
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
48-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
48+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4949
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5050
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
51-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
51+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5252
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5353
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5454
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -92,8 +92,8 @@ define void @insert_subvec() {
9292
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_4_2 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
9393
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_4_3 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
9494
; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i8_4_05 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
95-
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4i16_2_0 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
96-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2_1 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
95+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_2_0 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
96+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_2_1 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
9797
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8i16_2_0 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
9898
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_1 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
9999
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_2 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
@@ -105,7 +105,7 @@ define void @insert_subvec() {
105105
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_3 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
106106
; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v16i16_4_05 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
107107
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_2_0 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
108-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2_1 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
108+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_2_1 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
109109
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_2_0 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
110110
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_2_1 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
111111
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_2_2 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>

llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ define <16 x i8> @sel.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
2020
}
2121

2222
; COST-LABEL: sel.v4i16
23-
; COST: Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
23+
; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
2424
; CODE-LABEL: sel.v4i16
2525
; CODE: rev32 v0.4h, v0.4h
2626
; CODE: trn2 v0.4h, v0.4h, v1.4h

llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -308,16 +308,10 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
308308

309309
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) {
310310
; CHECK-LABEL: @reverse_hadd_v4f32(
311-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 2, i32 0>
312-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 3, i32 1>
313-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
314-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
315-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <2 x i32> <i32 2, i32 0>
316-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 3, i32 1>
317-
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[TMP6]]
318-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
319-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
320-
; CHECK-NEXT: ret <4 x float> [[TMP9]]
311+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
312+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
313+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
314+
; CHECK-NEXT: ret <4 x float> [[TMP3]]
321315
;
322316
%vecext = extractelement <4 x float> %a, i32 0
323317
%vecext1 = extractelement <4 x float> %a, i32 1

0 commit comments

Comments
 (0)