Skip to content

Commit 74e3dfe

Browse files
authored
[LV] Disable forcing interleaving for uncountable early exit loops (#147993)
Interleaving does not currently work properly when vectorising loops with uncountable early exits. Interleaving is already disabled for normal vectorisation and for the pragma/hint - this patch also disables it when using -force-vector-interleave.
1 parent 6bed7b7 commit 74e3dfe

File tree

4 files changed

+77
-409
lines changed

4 files changed

+77
-409
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10061,8 +10061,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1006110061
// Get user vectorization factor and interleave count.
1006210062
ElementCount UserVF = Hints.getWidth();
1006310063
unsigned UserIC = Hints.getInterleave();
10064-
if (LVL.hasUncountableEarlyExit() && UserIC != 1 &&
10065-
!VectorizerParams::isInterleaveForced()) {
10064+
if (LVL.hasUncountableEarlyExit() && UserIC != 1) {
1006610065
UserIC = 1;
1006710066
reportVectorizationInfo("Interleaving not supported for loops "
1006810067
"with uncountable early exits",

llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll

Lines changed: 8 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,60 +14,29 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
1414
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
1515
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
1616
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
17-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 64
18-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 510, [[TMP1]]
19-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
18+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2019
; CHECK: vector.ph:
2120
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
22-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64
21+
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
2322
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]]
2423
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]]
2524
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
26-
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 64
25+
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
2726
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]]
2827
; CHECK-NEXT: br label [[LOOP:%.*]]
2928
; CHECK: vector.body:
3029
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
3130
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
3231
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
3332
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
34-
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
35-
; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 16
36-
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP19]]
37-
; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64()
38-
; CHECK-NEXT: [[TMP37:%.*]] = mul nuw i64 [[TMP36]], 32
39-
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP37]]
40-
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
41-
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 48
42-
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP40]]
43-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
44-
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1
45-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP38]], align 1
46-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP41]], align 1
33+
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
4734
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
4835
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
49-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
50-
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 16
51-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP21]]
52-
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
53-
; CHECK-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 32
54-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP24]]
55-
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
56-
; CHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP26]], 48
57-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP27]]
58-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
59-
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 16 x i8>, ptr [[TMP22]], align 1
60-
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 16 x i8>, ptr [[TMP25]], align 1
61-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
62-
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
63-
; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD5]], [[WIDE_LOAD6]]
64-
; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]]
36+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
6537
; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
6638
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
67-
; CHECK-NEXT: [[TMP33:%.*]] = or <vscale x 16 x i1> [[TMP11]], [[TMP30]]
68-
; CHECK-NEXT: [[TMP34:%.*]] = or <vscale x 16 x i1> [[TMP33]], [[TMP31]]
69-
; CHECK-NEXT: [[TMP35:%.*]] = or <vscale x 16 x i1> [[TMP34]], [[TMP32]]
70-
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP35]])
39+
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP32]])
7140
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
7241
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
7342
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -77,26 +46,7 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
7746
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]]
7847
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
7948
; CHECK: vector.early.exit:
80-
; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64()
81-
; CHECK-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP63]], 16
82-
; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
83-
; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP42]], 3
84-
; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP62]], [[TMP44]]
85-
; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP31]], i1 true)
86-
; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP42]], 2
87-
; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[TMP58]], [[TMP46]]
88-
; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP46]], [[TMP42]]
89-
; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP47]], i64 [[TMP50]], i64 [[TMP45]]
90-
; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP30]], i1 true)
91-
; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP42]], 1
92-
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP64]], [[TMP52]]
93-
; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i64 [[TMP52]], [[TMP42]]
94-
; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP53]], i64 [[TMP56]], i64 [[TMP51]]
95-
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP11]], i1 true)
96-
; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP42]], 0
97-
; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[TMP65]], [[TMP15]]
98-
; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP15]], [[TMP42]]
99-
; CHECK-NEXT: [[TMP61:%.*]] = select i1 [[TMP59]], i64 [[TMP60]], i64 [[TMP57]]
49+
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
10050
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP61]]
10151
; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
10252
; CHECK-NEXT: br label [[LOOP_END]]

0 commit comments

Comments
 (0)