1
- ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s
1
+ ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization - -disable-output -stats -S 2>&1 | FileCheck %s
2
2
; REQUIRES: asserts
3
3
4
- ;
5
- ; We have 2 loops, one of them is vectorizable and the second one is not.
6
- ;
4
+ ; We have 3 loops, two of them are vectorizable (with one being early-exit
5
+ ; vectorized) and the third one is not.
7
6
8
- ; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization
9
- ; CHECK: 1 loop-vectorize - Number of loops vectorized
7
+ ; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
8
+ ; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
9
+ ; CHECK: 2 loop-vectorize - Number of loops vectorized
10
10
11
11
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
12
12
@@ -31,6 +31,36 @@ for.end: ; preds = %entry, %for.body
31
31
ret void
32
32
}
33
33
34
+ define i32 @early_exit_vectorized (i64 %end ) {
35
+ entry:
36
+ %p1 = alloca [1024 x i32 ]
37
+ %p2 = alloca [1024 x i32 ]
38
+ call void @init_mem (ptr %p1 , i64 1024 )
39
+ call void @init_mem (ptr %p2 , i64 1024 )
40
+ %end.clamped = and i64 %end , 1023
41
+ br label %for.body
42
+
43
+ for.body:
44
+ %ind = phi i64 [ %ind.next , %for.inc ], [ 0 , %entry ]
45
+ %arrayidx1 = getelementptr inbounds i32 , ptr %p1 , i64 %ind
46
+ %0 = load i32 , ptr %arrayidx1 , align 4
47
+ %arrayidx2 = getelementptr inbounds i32 , ptr %p2 , i64 %ind
48
+ %1 = load i32 , ptr %arrayidx2 , align 4
49
+ %cmp.early = icmp eq i32 %0 , %1
50
+ br i1 %cmp.early , label %found , label %for.inc
51
+
52
+ for.inc:
53
+ %ind.next = add i64 %ind , 1
54
+ %cmp = icmp ult i64 %ind.next , %end.clamped
55
+ br i1 %cmp , label %for.body , label %exit
56
+
57
+ found:
58
+ ret i32 1
59
+
60
+ exit:
61
+ ret i32 0
62
+ }
63
+
34
64
define void @not_vectorized (ptr nocapture %a , i64 %size ) {
35
65
entry:
36
66
%cmp1 = icmp sle i64 %size , 0
@@ -56,3 +86,5 @@ for.body: ; preds = %entry, %for.body
56
86
for.end: ; preds = %entry, %for.body
57
87
ret void
58
88
}
89
+
90
+ declare void @init_mem (ptr , i64 );
0 commit comments