@@ -185,6 +185,19 @@ define <vscale x 2 x float> @copysign_nxv2f32_nxv2f64(<vscale x 2 x float> %a, <
185
185
ret <vscale x 2 x float > %r
186
186
}
187
187
188
+ define <vscale x 2 x float > @copysign_nxv2f32_nxv2bf16 (<vscale x 2 x float > %a , <vscale x 2 x bfloat> %b ) {
189
+ ; CHECK-LABEL: copysign_nxv2f32_nxv2bf16:
190
+ ; CHECK: // %bb.0:
191
+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
192
+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
193
+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
194
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
195
+ ; CHECK-NEXT: ret
196
+ %tmp0 = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x float >
197
+ %r = call <vscale x 2 x float > @llvm.copysign.nxv2f32 (<vscale x 2 x float > %a , <vscale x 2 x float > %tmp0 )
198
+ ret <vscale x 2 x float > %r
199
+ }
200
+
188
201
;
189
202
; llvm.copysign.nxv4f32
190
203
;
@@ -230,6 +243,19 @@ define <vscale x 4 x float> @copysign_nxv4f32_nxv4f64(<vscale x 4 x float> %a, <
230
243
ret <vscale x 4 x float > %r
231
244
}
232
245
246
+ define <vscale x 4 x float > @copysign_nxv4f32_nxv4bf16 (<vscale x 4 x float > %a , <vscale x 4 x bfloat> %b ) {
247
+ ; CHECK-LABEL: copysign_nxv4f32_nxv4bf16:
248
+ ; CHECK: // %bb.0:
249
+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
250
+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
251
+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
252
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
253
+ ; CHECK-NEXT: ret
254
+ %tmp0 = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float >
255
+ %r = call <vscale x 4 x float > @llvm.copysign.nxv4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %tmp0 )
256
+ ret <vscale x 4 x float > %r
257
+ }
258
+
233
259
;
234
260
; llvm.copysign.nxv2f64
235
261
;
@@ -273,9 +299,137 @@ define <vscale x 2 x double> @copysign_nxv2f64_nxv2f64(<vscale x 2 x double> %a,
273
299
ret <vscale x 2 x double > %r
274
300
}
275
301
302
+ define <vscale x 2 x double > @copysign_nxv2f64_nxv2bf16 (<vscale x 2 x double > %a , <vscale x 2 x bfloat> %b ) {
303
+ ; CHECK-LABEL: copysign_nxv2f64_nxv2bf16:
304
+ ; CHECK: // %bb.0:
305
+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
306
+ ; CHECK-NEXT: ptrue p0.d
307
+ ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
308
+ ; CHECK-NEXT: fcvt z1.d, p0/m, z1.s
309
+ ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
310
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
311
+ ; CHECK-NEXT: ret
312
+ %b.ext = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x double >
313
+ %r = call <vscale x 2 x double > @llvm.copysign.nxv2f64 (<vscale x 2 x double > %a , <vscale x 2 x double > %b.ext )
314
+ ret <vscale x 2 x double > %r
315
+ }
316
+
317
+ ;
318
+ ; llvm.copysign.nxv2bf16
319
+ ;
320
+
321
+ define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) {
322
+ ; CHECK-LABEL: copysign_nxv2bf16_nxv2bf16:
323
+ ; CHECK: // %bb.0:
324
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
325
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
326
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
327
+ ; CHECK-NEXT: ret
328
+ %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b )
329
+ ret <vscale x 2 x bfloat> %r
330
+ }
331
+
332
+ define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2f32 (<vscale x 2 x bfloat> %a , <vscale x 2 x float > %b ) {
333
+ ; CHECK-LABEL: copysign_nxv2bf16_nxv2f32:
334
+ ; CHECK: // %bb.0:
335
+ ; CHECK-NEXT: ptrue p0.d
336
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
337
+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
338
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
339
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
340
+ ; CHECK-NEXT: ret
341
+ %tmp0 = fptrunc <vscale x 2 x float > %b to <vscale x 2 x bfloat>
342
+ %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %tmp0 )
343
+ ret <vscale x 2 x bfloat> %r
344
+ }
345
+
346
+ ; TODO: Cannot downconvert from double to bfloat
347
+ ;define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %a, <vscale x 2 x double> %b) {
348
+ ; %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x bfloat>
349
+ ; %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %tmp0)
350
+ ; ret <vscale x 2 x bfloat> %r
351
+ ;}
352
+
353
+ ;
354
+ ; llvm.copysign.nxv2bf16
355
+ ;
356
+
357
+ define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) {
358
+ ; CHECK-LABEL: copysign_nxv4bf16_nxv4bf16:
359
+ ; CHECK: // %bb.0:
360
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
361
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
362
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
363
+ ; CHECK-NEXT: ret
364
+ %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b )
365
+ ret <vscale x 4 x bfloat> %r
366
+ }
367
+
368
+ define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4f32 (<vscale x 4 x bfloat> %a , <vscale x 4 x float > %b ) {
369
+ ; CHECK-LABEL: copysign_nxv4bf16_nxv4f32:
370
+ ; CHECK: // %bb.0:
371
+ ; CHECK-NEXT: ptrue p0.s
372
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
373
+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
374
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
375
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
376
+ ; CHECK-NEXT: ret
377
+ %b.trunc = fptrunc <vscale x 4 x float > %b to <vscale x 4 x bfloat>
378
+ %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b.trunc )
379
+ ret <vscale x 4 x bfloat> %r
380
+ }
381
+
382
+ ; TODO: Cannot downconvert from double to bfloat
383
+ ;define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %a, <vscale x 4 x double> %b) {
384
+ ; %b.trunc = fptrunc <vscale x 4 x double> %b to <vscale x 4 x bfloat>
385
+ ; %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b.trunc)
386
+ ; ret <vscale x 4 x bfloat> %r
387
+ ;}
388
+
389
+ ;
390
+ ; llvm.copysign.nxv8bf16
391
+ ;
392
+
393
+ define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) {
394
+ ; CHECK-LABEL: copysign_nxv8bf16_nxv8bf16:
395
+ ; CHECK: // %bb.0:
396
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
397
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
398
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
399
+ ; CHECK-NEXT: ret
400
+ %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b )
401
+ ret <vscale x 8 x bfloat> %r
402
+ }
403
+
404
+ define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8f32 (<vscale x 8 x bfloat> %a , <vscale x 8 x float > %b ) {
405
+ ; CHECK-LABEL: copysign_nxv8bf16_nxv8f32:
406
+ ; CHECK: // %bb.0:
407
+ ; CHECK-NEXT: ptrue p0.s
408
+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
409
+ ; CHECK-NEXT: bfcvt z2.h, p0/m, z2.s
410
+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
411
+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
412
+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
413
+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
414
+ ; CHECK-NEXT: ret
415
+ %b.trunc = fptrunc <vscale x 8 x float > %b to <vscale x 8 x bfloat>
416
+ %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b.trunc )
417
+ ret <vscale x 8 x bfloat> %r
418
+ }
419
+
420
+ ; TODO: Cannot downconvert from double to bfloat
421
+ ;define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %a, <vscale x 8 x double> %b) {
422
+ ; %b.trunc = fptrunc <vscale x 8 x double> %b to <vscale x 8 x bfloat>
423
+ ; %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b.trunc)
424
+ ; ret <vscale x 8 x bfloat> %r
425
+ ;}
426
+
276
427
declare <vscale x 2 x half > @llvm.copysign.nxv2f16 (<vscale x 2 x half > %a , <vscale x 2 x half > %b )
277
428
declare <vscale x 4 x half > @llvm.copysign.nxv4f16 (<vscale x 4 x half > %a , <vscale x 4 x half > %b )
278
429
declare <vscale x 8 x half > @llvm.copysign.nxv8f16 (<vscale x 8 x half > %a , <vscale x 8 x half > %b )
279
430
declare <vscale x 2 x float > @llvm.copysign.nxv2f32 (<vscale x 2 x float > %a , <vscale x 2 x float > %b )
280
431
declare <vscale x 4 x float > @llvm.copysign.nxv4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %b )
281
432
declare <vscale x 2 x double > @llvm.copysign.nxv2f64 (<vscale x 2 x double > %a , <vscale x 2 x double > %b )
433
+ declare <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b )
434
+ declare <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b )
435
+ declare <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b )
0 commit comments