1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
2
3
3
4
define <2 x double > @test_insert_elt (<2 x double > %vec , double %val ) {
4
5
; CHECK-LABEL: test_insert_elt:
5
- ; CHECK: mov.d v0[0], v1[0]
6
+ ; CHECK: ; %bb.0:
7
+ ; CHECK-NEXT: ; kill: def $d1 killed $d1 def $q1
8
+ ; CHECK-NEXT: mov.d v0[0], v1[0]
9
+ ; CHECK-NEXT: ret
6
10
%res = insertelement <2 x double > %vec , double %val , i32 0
7
11
ret <2 x double > %res
8
12
}
9
13
10
14
define void @test_split_16B (<4 x float > %val , ptr %addr ) {
11
15
; CHECK-LABEL: test_split_16B:
12
- ; CHECK: str q0, [x0]
16
+ ; CHECK: ; %bb.0:
17
+ ; CHECK-NEXT: str q0, [x0]
18
+ ; CHECK-NEXT: ret
13
19
store <4 x float > %val , ptr %addr , align 8
14
20
ret void
15
21
}
16
22
17
23
define void @test_split_16B_splat (<4 x i32 >, ptr %addr ) {
18
24
; CHECK-LABEL: test_split_16B_splat:
19
- ; CHECK: str {{q[0-9]+}}
20
-
25
+ ; CHECK: ; %bb.0:
26
+ ; CHECK-NEXT: movi.4s v0, #42
27
+ ; CHECK-NEXT: str q0, [x0]
28
+ ; CHECK-NEXT: ret
21
29
%vec.tmp0 = insertelement <4 x i32 > undef , i32 42 , i32 0
22
30
%vec.tmp1 = insertelement <4 x i32 > %vec.tmp0 , i32 42 , i32 1
23
31
%vec.tmp2 = insertelement <4 x i32 > %vec.tmp1 , i32 42 , i32 2
24
32
%vec = insertelement <4 x i32 > %vec.tmp2 , i32 42 , i32 3
25
-
26
33
store <4 x i32 > %vec , ptr %addr , align 8
27
34
ret void
28
35
}
@@ -33,166 +40,176 @@ define void @test_split_16B_splat(<4 x i32>, ptr %addr) {
33
40
declare {%vec , %vec } @llvm.aarch64.neon.ld2r.v2f64.p0 (ptr )
34
41
define {%vec , %vec } @test_neon_load (ptr %addr ) {
35
42
; CHECK-LABEL: test_neon_load:
36
- ; CHECK: ld2r.2d { v0, v1 }, [x0]
43
+ ; CHECK: ; %bb.0:
44
+ ; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0]
45
+ ; CHECK-NEXT: ret
37
46
%res = call {%vec , %vec } @llvm.aarch64.neon.ld2r.v2f64.p0 (ptr %addr )
38
47
ret {%vec , %vec } %res
39
48
}
40
49
41
50
declare {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec , %vec , i64 , ptr )
42
51
define {%vec , %vec } @test_neon_load_lane (ptr %addr , %vec %in1 , %vec %in2 ) {
43
52
; CHECK-LABEL: test_neon_load_lane:
44
- ; CHECK: ld2.d { v0, v1 }[0], [x0]
53
+ ; CHECK: ; %bb.0:
54
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
55
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
56
+ ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0]
57
+ ; CHECK-NEXT: ret
45
58
%res = call {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 0 , ptr %addr )
46
59
ret {%vec , %vec } %res
47
60
}
48
61
49
62
declare void @llvm.aarch64.neon.st2.v2f64.p0 (%vec , %vec , ptr )
50
63
define void @test_neon_store (ptr %addr , %vec %in1 , %vec %in2 ) {
51
64
; CHECK-LABEL: test_neon_store:
52
- ; CHECK: st2.2d { v0, v1 }, [x0]
65
+ ; CHECK: ; %bb.0:
66
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
67
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
68
+ ; CHECK-NEXT: st2.2d { v0, v1 }, [x0]
69
+ ; CHECK-NEXT: ret
53
70
call void @llvm.aarch64.neon.st2.v2f64.p0 (%vec %in1 , %vec %in2 , ptr %addr )
54
71
ret void
55
72
}
56
73
57
74
declare void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec , %vec , i64 , ptr )
58
75
define void @test_neon_store_lane (ptr %addr , %vec %in1 , %vec %in2 ) {
59
76
; CHECK-LABEL: test_neon_store_lane:
60
- ; CHECK: st2.d { v0, v1 }[1], [x0]
77
+ ; CHECK: ; %bb.0:
78
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
79
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
80
+ ; CHECK-NEXT: st2.d { v0, v1 }[1], [x0]
81
+ ; CHECK-NEXT: ret
61
82
call void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 1 , ptr %addr )
62
83
ret void
63
84
}
64
85
65
86
declare {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr )
66
87
define {{%vec , %vec }, ptr } @test_neon_load_post (ptr %addr , i32 %offset ) {
67
88
; CHECK-LABEL: test_neon_load_post:
68
- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
69
- ; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]]
70
-
89
+ ; CHECK: ; %bb.0:
90
+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
91
+ ; CHECK-NEXT: sxtw x8, w1
92
+ ; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8
93
+ ; CHECK-NEXT: ret
71
94
%vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr %addr )
72
-
73
95
%addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
74
-
75
96
%res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
76
97
%res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
77
98
ret {{%vec , %vec }, ptr } %res
78
99
}
79
100
80
101
define {{%vec , %vec }, ptr } @test_neon_load_post_lane (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
81
102
; CHECK-LABEL: test_neon_load_post_lane:
82
- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
83
- ; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]]
84
-
103
+ ; CHECK: ; %bb.0:
104
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
105
+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
106
+ ; CHECK-NEXT: sxtw x8, w1
107
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
108
+ ; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0], x8
109
+ ; CHECK-NEXT: ret
85
110
%vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 1 , ptr %addr )
86
-
87
111
%addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
88
-
89
112
%res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
90
113
%res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
91
114
ret {{%vec , %vec }, ptr } %res
92
115
}
93
116
94
117
define ptr @test_neon_store_post (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
95
118
; CHECK-LABEL: test_neon_store_post:
96
- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
97
- ; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]]
98
-
119
+ ; CHECK: ; %bb.0:
120
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
121
+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
122
+ ; CHECK-NEXT: sxtw x8, w1
123
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
124
+ ; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8
125
+ ; CHECK-NEXT: ret
99
126
call void @llvm.aarch64.neon.st2.v2f64.p0 (%vec %in1 , %vec %in2 , ptr %addr )
100
-
101
127
%addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
102
-
103
128
ret ptr %addr.new
104
129
}
105
130
106
131
define ptr @test_neon_store_post_lane (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
107
132
; CHECK-LABEL: test_neon_store_post_lane:
108
- ; CHECK: sxtw [[OFFSET:x[0-9]+]], w1
109
- ; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]]
110
-
133
+ ; CHECK: ; %bb.0:
134
+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
135
+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
136
+ ; CHECK-NEXT: sxtw x8, w1
137
+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
138
+ ; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8
139
+ ; CHECK-NEXT: ret
111
140
call void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 0 , ptr %addr )
112
-
113
141
%addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
114
-
115
142
ret ptr %addr.new
116
143
}
117
144
118
145
; ld1 is slightly different because it goes via ISelLowering of normal IR ops
119
146
; rather than an intrinsic.
120
147
define {%vec , ptr } @test_neon_ld1_post_lane (ptr %addr , i32 %offset , %vec %in ) {
121
148
; CHECK-LABEL: test_neon_ld1_post_lane:
122
- ; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32
123
- ; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]]
124
-
149
+ ; CHECK: ; %bb.0:
150
+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
151
+ ; CHECK-NEXT: sbfiz x8, x1, #3, #32
152
+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0], x8
153
+ ; CHECK-NEXT: ret
125
154
%loaded = load double , ptr %addr , align 8
126
155
%newvec = insertelement %vec %in , double %loaded , i32 0
127
-
128
156
%addr.new = getelementptr inbounds double , ptr %addr , i32 %offset
129
-
130
157
%res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
131
158
%res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
132
-
133
159
ret {%vec , ptr } %res
134
160
}
135
161
136
162
define {{%vec , %vec }, ptr } @test_neon_load_post_exact (ptr %addr ) {
137
163
; CHECK-LABEL: test_neon_load_post_exact:
138
- ; CHECK: ld2.2d { v0, v1 }, [x0], #32
139
-
164
+ ; CHECK: ; %bb.0:
165
+ ; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32
166
+ ; CHECK-NEXT: ret
140
167
%vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr %addr )
141
-
142
168
%addr.new = getelementptr inbounds i8 , ptr %addr , i32 32
143
-
144
169
%res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
145
170
%res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
146
171
ret {{%vec , %vec }, ptr } %res
147
172
}
148
173
149
174
define {%vec , ptr } @test_neon_ld1_post_lane_exact (ptr %addr , %vec %in ) {
150
175
; CHECK-LABEL: test_neon_ld1_post_lane_exact:
151
- ; CHECK: ld1.d { v0 }[0], [x0], #8
152
-
176
+ ; CHECK: ; %bb.0:
177
+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0], #8
178
+ ; CHECK-NEXT: ret
153
179
%loaded = load double , ptr %addr , align 8
154
180
%newvec = insertelement %vec %in , double %loaded , i32 0
155
-
156
181
%addr.new = getelementptr inbounds double , ptr %addr , i32 1
157
-
158
182
%res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
159
183
%res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
160
-
161
184
ret {%vec , ptr } %res
162
185
}
163
186
164
187
; As in the general load/store case, this GEP has defined semantics when the
165
188
; address wraps. We cannot use post-indexed addressing.
166
189
define {%vec , ptr } @test_neon_ld1_notpost_lane_exact (ptr %addr , %vec %in ) {
167
190
; CHECK-LABEL: test_neon_ld1_notpost_lane_exact:
168
- ; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8
169
- ; CHECK: add w0, w0, #8
170
- ; CHECK: ret
171
-
191
+ ; CHECK: ; %bb.0:
192
+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0]
193
+ ; CHECK-NEXT: add w0, w0, #8
194
+ ; CHECK-NEXT: ret
172
195
%loaded = load double , ptr %addr , align 8
173
196
%newvec = insertelement %vec %in , double %loaded , i32 0
174
-
175
197
%addr.new = getelementptr double , ptr %addr , i32 1
176
-
177
198
%res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
178
199
%res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
179
-
180
200
ret {%vec , ptr } %res
181
201
}
182
202
183
203
define {%vec , ptr } @test_neon_ld1_notpost_lane (ptr %addr , i32 %offset , %vec %in ) {
184
204
; CHECK-LABEL: test_neon_ld1_notpost_lane:
185
- ; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}}
186
- ; CHECK: add w0, w0, w1, lsl #3
187
- ; CHECK: ret
188
-
205
+ ; CHECK: ; %bb.0:
206
+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0]
207
+ ; CHECK-NEXT: add w0, w0, w1, lsl #3
208
+ ; CHECK-NEXT: ret
189
209
%loaded = load double , ptr %addr , align 8
190
210
%newvec = insertelement %vec %in , double %loaded , i32 0
191
-
192
211
%addr.new = getelementptr double , ptr %addr , i32 %offset
193
-
194
212
%res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
195
213
%res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
196
-
197
214
ret {%vec , ptr } %res
198
215
}
0 commit comments