Skip to content

Commit fe04290

Browse files
authored
[AArch64] Change the default vscale-for-tuning to 1. (llvm#117174)
Most AArch64 cpus outside of Neoverse V1 (256) and A64FX (512) have an SVE vector length of 128, and in environments like Android (where no mcpu option is common) we would expect all cpus to match. This patch changes the default vector length to 128 with -mcpu=generic, to match the most common case.
1 parent 31b7d43 commit fe04290

15 files changed

+1387
-935
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
8888
unsigned StreamingHazardSize;
8989
unsigned MinSVEVectorSizeInBits;
9090
unsigned MaxSVEVectorSizeInBits;
91-
unsigned VScaleForTuning = 2;
91+
unsigned VScaleForTuning = 1;
9292
TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
9393

9494
bool EnableSubregLiveness;

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

Lines changed: 27 additions & 55 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 568 additions & 270 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

Lines changed: 26 additions & 50 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 40 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,42 +8,40 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
1010
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
11-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
1212
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1616
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1717
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
18-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
19-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
20-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
21-
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
22-
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1)
23-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
18+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
19+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
20+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
21+
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
22+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
23+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2424
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
25-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
26-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
27-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0
28-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
25+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
26+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
27+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
28+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2929
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3030
; CHECK: vector.body:
3131
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
32-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
33-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
32+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
33+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3434
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
3535
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
36-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3)
37-
; CHECK-NEXT: [[TMP15:%.*]] = lshr <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]]
38-
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 4 x i64> [[TMP15]] to <vscale x 4 x i8>
36+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
37+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
38+
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
3939
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
40-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
40+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
4141
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
42-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 8)
43-
; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
44-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
45-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0
46-
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
42+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
43+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
44+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4745
; CHECK: middle.block:
4846
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
4947
; CHECK: scalar.ph:
@@ -97,42 +95,40 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
9795
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9896
; CHECK: vector.ph:
9997
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
100-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
98+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
10199
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
102100
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103101
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104102
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105103
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106104
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
108-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110-
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
111-
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1)
112-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
105+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
106+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
107+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
108+
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i64> [[TMP8]], zeroinitializer
109+
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP9]], splat (i64 1)
110+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
113111
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
114-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
115-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
116-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0
117-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
112+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
113+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
114+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
115+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
118116
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
119117
; CHECK: vector.body:
120118
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
121-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
122-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
120+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
123121
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
124122
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
125-
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 3)
126-
; CHECK-NEXT: [[TMP15:%.*]] = lshr <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP14]]
127-
; CHECK-NEXT: [[TMP16:%.*]] = trunc <vscale x 4 x i64> [[TMP15]] to <vscale x 4 x i8>
123+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
124+
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
125+
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
128126
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
129-
; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP16]], ptr [[TMP17]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
127+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
130128
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
131-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
132-
; CHECK-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
133-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
134-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[TMP18]], i32 0
135-
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
129+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
130+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
131+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
136132
; CHECK: middle.block:
137133
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
138134
; CHECK: scalar.ph:

0 commit comments

Comments
 (0)