@@ -9,11 +9,56 @@ target triple = "x86_64-unknown-linux-gnu"
9
9
10
10
define void @foo (i64* %p , i64* %p.last ) unnamed_addr #0 {
11
11
; CHECK-LABEL: @foo(
12
- ; CHECK: vector.body:
13
- ; CHECK: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP11:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
14
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP12:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
15
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP13:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
16
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP14:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
12
+ ; CHECK-NEXT: entry:
13
+ ; CHECK-NEXT: [[P4:%.*]] = ptrtoint i64* [[P:%.*]] to i64
14
+ ; CHECK-NEXT: [[P_LAST1:%.*]] = ptrtoint i64* [[P_LAST:%.*]] to i64
15
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P_LAST1]], -32
16
+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P4]]
17
+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 5
18
+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
19
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 16
20
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
21
+ ; CHECK: vector.ph:
22
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
23
+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
24
+ ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 16, i64 [[N_MOD_VF]]
25
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP5]]
26
+ ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 4
27
+ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[P]], i64 [[TMP6]]
28
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
29
+ ; CHECK: vector.body:
30
+ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
31
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
32
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
33
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 16, i64 20, i64 24, i64 28>
34
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 32, i64 36, i64 40, i64 44>
35
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 48, i64 52, i64 56, i64 60>
36
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i64*> [[TMP7]] to <4 x %0**>
37
+ ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64*> [[TMP8]] to <4 x %0**>
38
+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64*> [[TMP9]] to <4 x %0**>
39
+ ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i64*> [[TMP10]] to <4 x %0**>
40
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP11]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
41
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP12]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
42
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP13]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
43
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP14]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
44
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
45
+ ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
46
+ ; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 64
47
+ ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
48
+ ; CHECK: middle.block:
49
+ ; CHECK-NEXT: br label [[SCALAR_PH]]
50
+ ; CHECK: scalar.ph:
51
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
52
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
53
+ ; CHECK: loop:
54
+ ; CHECK-NEXT: [[P2:%.*]] = phi i64* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P_INC:%.*]], [[LOOP]] ]
55
+ ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, i64* [[P2]], i64 4
56
+ ; CHECK-NEXT: [[P3:%.*]] = bitcast i64* [[P2]] to %0**
57
+ ; CHECK-NEXT: [[V:%.*]] = load %0*, %0** [[P3]], align 8
58
+ ; CHECK-NEXT: [[B:%.*]] = icmp eq i64* [[P_INC]], [[P_LAST]]
59
+ ; CHECK-NEXT: br i1 [[B]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
60
+ ; CHECK: exit:
61
+ ; CHECK-NEXT: ret void
17
62
;
18
63
entry:
19
64
br label %loop
@@ -32,11 +77,56 @@ exit:
32
77
33
78
define void @bar (i64* %p , i64* %p.last ) unnamed_addr #0 {
34
79
; CHECK-LABEL: @bar(
35
- ; CHECK: vector.body:
36
- ; CHECK: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP11:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
37
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP12:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
38
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP13:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
39
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP14:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
80
+ ; CHECK-NEXT: entry:
81
+ ; CHECK-NEXT: [[P4:%.*]] = ptrtoint i64* [[P:%.*]] to i64
82
+ ; CHECK-NEXT: [[P_LAST1:%.*]] = ptrtoint i64* [[P_LAST:%.*]] to i64
83
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P_LAST1]], -32
84
+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P4]]
85
+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 5
86
+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
87
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 16
88
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
89
+ ; CHECK: vector.ph:
90
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
91
+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
92
+ ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 16, i64 [[N_MOD_VF]]
93
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP5]]
94
+ ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 4
95
+ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[P]], i64 [[TMP6]]
96
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
97
+ ; CHECK: vector.body:
98
+ ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
99
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
100
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
101
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 16, i64 20, i64 24, i64 28>
102
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 32, i64 36, i64 40, i64 44>
103
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 48, i64 52, i64 56, i64 60>
104
+ ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i64*> [[TMP7]] to <4 x %1**>
105
+ ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64*> [[TMP8]] to <4 x %1**>
106
+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64*> [[TMP9]] to <4 x %1**>
107
+ ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i64*> [[TMP10]] to <4 x %1**>
108
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP11]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
109
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP12]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
110
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP13]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
111
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP14]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
112
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
113
+ ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
114
+ ; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 64
115
+ ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116
+ ; CHECK: middle.block:
117
+ ; CHECK-NEXT: br label [[SCALAR_PH]]
118
+ ; CHECK: scalar.ph:
119
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
120
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
121
+ ; CHECK: loop:
122
+ ; CHECK-NEXT: [[P2:%.*]] = phi i64* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P_INC:%.*]], [[LOOP]] ]
123
+ ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, i64* [[P2]], i64 4
124
+ ; CHECK-NEXT: [[P3:%.*]] = bitcast i64* [[P2]] to %1**
125
+ ; CHECK-NEXT: [[V:%.*]] = load %1*, %1** [[P3]], align 8
126
+ ; CHECK-NEXT: [[B:%.*]] = icmp eq i64* [[P_INC]], [[P_LAST]]
127
+ ; CHECK-NEXT: br i1 [[B]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
128
+ ; CHECK: exit:
129
+ ; CHECK-NEXT: ret void
40
130
;
41
131
entry:
42
132
br label %loop
0 commit comments