@@ -9,56 +9,11 @@ target triple = "x86_64-unknown-linux-gnu"
9
9
10
10
define void @foo (i64* %p , i64* %p.last ) unnamed_addr #0 {
11
11
; CHECK-LABEL: @foo(
12
- ; CHECK-NEXT: entry:
13
- ; CHECK-NEXT: [[P4:%.*]] = ptrtoint i64* [[P:%.*]] to i64
14
- ; CHECK-NEXT: [[P_LAST1:%.*]] = ptrtoint i64* [[P_LAST:%.*]] to i64
15
- ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P_LAST1]], -32
16
- ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P4]]
17
- ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 5
18
- ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
19
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 16
20
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
21
- ; CHECK: vector.ph:
22
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
23
- ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
24
- ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 16, i64 [[N_MOD_VF]]
25
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP5]]
26
- ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 4
27
- ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[P]], i64 [[TMP6]]
28
- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
29
- ; CHECK: vector.body:
30
- ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
31
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
32
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
33
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 16, i64 20, i64 24, i64 28>
34
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 32, i64 36, i64 40, i64 44>
35
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 48, i64 52, i64 56, i64 60>
36
- ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i64*> [[TMP7]] to <4 x %0**>
37
- ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64*> [[TMP8]] to <4 x %0**>
38
- ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64*> [[TMP9]] to <4 x %0**>
39
- ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i64*> [[TMP10]] to <4 x %0**>
40
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP11]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
41
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP12]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
42
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP13]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
43
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP14]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
44
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
45
- ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
46
- ; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 64
47
- ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
48
- ; CHECK: middle.block:
49
- ; CHECK-NEXT: br label [[SCALAR_PH]]
50
- ; CHECK: scalar.ph:
51
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
52
- ; CHECK-NEXT: br label [[LOOP:%.*]]
53
- ; CHECK: loop:
54
- ; CHECK-NEXT: [[P2:%.*]] = phi i64* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P_INC:%.*]], [[LOOP]] ]
55
- ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, i64* [[P2]], i64 4
56
- ; CHECK-NEXT: [[P3:%.*]] = bitcast i64* [[P2]] to %0**
57
- ; CHECK-NEXT: [[V:%.*]] = load %0*, %0** [[P3]], align 8
58
- ; CHECK-NEXT: [[B:%.*]] = icmp eq i64* [[P_INC]], [[P_LAST]]
59
- ; CHECK-NEXT: br i1 [[B]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
60
- ; CHECK: exit:
61
- ; CHECK-NEXT: ret void
12
+ ; CHECK: vector.body:
13
+ ; CHECK: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP11:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
14
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP12:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
15
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP13:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
16
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %0*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.0(<4 x %0**> [[TMP14:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %0*> undef)
62
17
;
63
18
entry:
64
19
br label %loop
@@ -77,56 +32,11 @@ exit:
77
32
78
33
define void @bar (i64* %p , i64* %p.last ) unnamed_addr #0 {
79
34
; CHECK-LABEL: @bar(
80
- ; CHECK-NEXT: entry:
81
- ; CHECK-NEXT: [[P4:%.*]] = ptrtoint i64* [[P:%.*]] to i64
82
- ; CHECK-NEXT: [[P_LAST1:%.*]] = ptrtoint i64* [[P_LAST:%.*]] to i64
83
- ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[P_LAST1]], -32
84
- ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[P4]]
85
- ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 5
86
- ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
87
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 16
88
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
89
- ; CHECK: vector.ph:
90
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
91
- ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
92
- ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 16, i64 [[N_MOD_VF]]
93
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP5]]
94
- ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 4
95
- ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i64, i64* [[P]], i64 [[TMP6]]
96
- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
97
- ; CHECK: vector.body:
98
- ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i64* [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
99
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
100
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
101
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 16, i64 20, i64 24, i64 28>
102
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 32, i64 36, i64 40, i64 44>
103
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <4 x i64> <i64 48, i64 52, i64 56, i64 60>
104
- ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i64*> [[TMP7]] to <4 x %1**>
105
- ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64*> [[TMP8]] to <4 x %1**>
106
- ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i64*> [[TMP9]] to <4 x %1**>
107
- ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i64*> [[TMP10]] to <4 x %1**>
108
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP11]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
109
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP12]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
110
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP13]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
111
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP14]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
112
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
113
- ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
114
- ; CHECK-NEXT: [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 64
115
- ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116
- ; CHECK: middle.block:
117
- ; CHECK-NEXT: br label [[SCALAR_PH]]
118
- ; CHECK: scalar.ph:
119
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ]
120
- ; CHECK-NEXT: br label [[LOOP:%.*]]
121
- ; CHECK: loop:
122
- ; CHECK-NEXT: [[P2:%.*]] = phi i64* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[P_INC:%.*]], [[LOOP]] ]
123
- ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, i64* [[P2]], i64 4
124
- ; CHECK-NEXT: [[P3:%.*]] = bitcast i64* [[P2]] to %1**
125
- ; CHECK-NEXT: [[V:%.*]] = load %1*, %1** [[P3]], align 8
126
- ; CHECK-NEXT: [[B:%.*]] = icmp eq i64* [[P_INC]], [[P_LAST]]
127
- ; CHECK-NEXT: br i1 [[B]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
128
- ; CHECK: exit:
129
- ; CHECK-NEXT: ret void
35
+ ; CHECK: vector.body:
36
+ ; CHECK: [[WIDE_MASKED_GATHER:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP11:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
37
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP12:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
38
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP13:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
39
+ ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x %1*> @llvm.masked.gather.v4p0s_s.v4p0p0s_s.1(<4 x %1**> [[TMP14:%.*]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x %1*> undef)
130
40
;
131
41
entry:
132
42
br label %loop
0 commit comments