|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh -verify-machineinstrs < %s | FileCheck %s |
| 2 | +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s |
| 3 | +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s |
3 | 4 |
|
4 | 5 | define <vscale x 1 x half> @test_vp_reverse_nxv1f16_masked(<vscale x 1 x half> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
|
5 | 6 | ; CHECK-LABEL: test_vp_reverse_nxv1f16_masked:
|
@@ -435,3 +436,177 @@ define <vscale x 32 x half> @test_vp_reverse_nxv32f16(<vscale x 32 x half> %src,
|
435 | 436 | %dst = call <vscale x 32 x half> @llvm.experimental.vp.reverse.nxv32f16(<vscale x 32 x half> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl)
|
436 | 437 | ret <vscale x 32 x half> %dst
|
437 | 438 | }
|
| 439 | + |
| 440 | +define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16_masked(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) { |
| 441 | +; CHECK-LABEL: test_vp_reverse_nxv1bf16_masked: |
| 442 | +; CHECK: # %bb.0: |
| 443 | +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma |
| 444 | +; CHECK-NEXT: vid.v v9, v0.t |
| 445 | +; CHECK-NEXT: addi a0, a0, -1 |
| 446 | +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t |
| 447 | +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t |
| 448 | +; CHECK-NEXT: vmv1r.v v8, v9 |
| 449 | +; CHECK-NEXT: ret |
| 450 | + %dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> %mask, i32 %evl) |
| 451 | + ret <vscale x 1 x bfloat> %dst |
| 452 | +} |
| 453 | + |
| 454 | +define <vscale x 1 x bfloat> @test_vp_reverse_nxv1bf16(<vscale x 1 x bfloat> %src, i32 zeroext %evl) { |
| 455 | +; CHECK-LABEL: test_vp_reverse_nxv1bf16: |
| 456 | +; CHECK: # %bb.0: |
| 457 | +; CHECK-NEXT: addi a1, a0, -1 |
| 458 | +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma |
| 459 | +; CHECK-NEXT: vid.v v9 |
| 460 | +; CHECK-NEXT: vrsub.vx v10, v9, a1 |
| 461 | +; CHECK-NEXT: vrgather.vv v9, v8, v10 |
| 462 | +; CHECK-NEXT: vmv1r.v v8, v9 |
| 463 | +; CHECK-NEXT: ret |
| 464 | + |
| 465 | + %dst = call <vscale x 1 x bfloat> @llvm.experimental.vp.reverse.nxv1bf16(<vscale x 1 x bfloat> %src, <vscale x 1 x i1> splat (i1 1), i32 %evl) |
| 466 | + ret <vscale x 1 x bfloat> %dst |
| 467 | +} |
| 468 | + |
| 469 | +define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16_masked(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) { |
| 470 | +; CHECK-LABEL: test_vp_reverse_nxv2bf16_masked: |
| 471 | +; CHECK: # %bb.0: |
| 472 | +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma |
| 473 | +; CHECK-NEXT: vid.v v9, v0.t |
| 474 | +; CHECK-NEXT: addi a0, a0, -1 |
| 475 | +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t |
| 476 | +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t |
| 477 | +; CHECK-NEXT: vmv1r.v v8, v9 |
| 478 | +; CHECK-NEXT: ret |
| 479 | + %dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> %mask, i32 %evl) |
| 480 | + ret <vscale x 2 x bfloat> %dst |
| 481 | +} |
| 482 | + |
| 483 | +define <vscale x 2 x bfloat> @test_vp_reverse_nxv2bf16(<vscale x 2 x bfloat> %src, i32 zeroext %evl) { |
| 484 | +; CHECK-LABEL: test_vp_reverse_nxv2bf16: |
| 485 | +; CHECK: # %bb.0: |
| 486 | +; CHECK-NEXT: addi a1, a0, -1 |
| 487 | +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma |
| 488 | +; CHECK-NEXT: vid.v v9 |
| 489 | +; CHECK-NEXT: vrsub.vx v10, v9, a1 |
| 490 | +; CHECK-NEXT: vrgather.vv v9, v8, v10 |
| 491 | +; CHECK-NEXT: vmv1r.v v8, v9 |
| 492 | +; CHECK-NEXT: ret |
| 493 | + |
| 494 | + %dst = call <vscale x 2 x bfloat> @llvm.experimental.vp.reverse.nxv2bf16(<vscale x 2 x bfloat> %src, <vscale x 2 x i1> splat (i1 1), i32 %evl) |
| 495 | + ret <vscale x 2 x bfloat> %dst |
| 496 | +} |
| 497 | + |
| 498 | +define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16_masked(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) { |
| 499 | +; CHECK-LABEL: test_vp_reverse_nxv4bf16_masked: |
| 500 | +; CHECK: # %bb.0: |
| 501 | +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma |
| 502 | +; CHECK-NEXT: vid.v v9, v0.t |
| 503 | +; CHECK-NEXT: addi a0, a0, -1 |
| 504 | +; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t |
| 505 | +; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t |
| 506 | +; CHECK-NEXT: vmv.v.v v8, v9 |
| 507 | +; CHECK-NEXT: ret |
| 508 | + %dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> %mask, i32 %evl) |
| 509 | + ret <vscale x 4 x bfloat> %dst |
| 510 | +} |
| 511 | + |
| 512 | +define <vscale x 4 x bfloat> @test_vp_reverse_nxv4bf16(<vscale x 4 x bfloat> %src, i32 zeroext %evl) { |
| 513 | +; CHECK-LABEL: test_vp_reverse_nxv4bf16: |
| 514 | +; CHECK: # %bb.0: |
| 515 | +; CHECK-NEXT: addi a1, a0, -1 |
| 516 | +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma |
| 517 | +; CHECK-NEXT: vid.v v9 |
| 518 | +; CHECK-NEXT: vrsub.vx v10, v9, a1 |
| 519 | +; CHECK-NEXT: vrgather.vv v9, v8, v10 |
| 520 | +; CHECK-NEXT: vmv.v.v v8, v9 |
| 521 | +; CHECK-NEXT: ret |
| 522 | + |
| 523 | + %dst = call <vscale x 4 x bfloat> @llvm.experimental.vp.reverse.nxv4bf16(<vscale x 4 x bfloat> %src, <vscale x 4 x i1> splat (i1 1), i32 %evl) |
| 524 | + ret <vscale x 4 x bfloat> %dst |
| 525 | +} |
| 526 | + |
| 527 | +define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16_masked(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) { |
| 528 | +; CHECK-LABEL: test_vp_reverse_nxv8bf16_masked: |
| 529 | +; CHECK: # %bb.0: |
| 530 | +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma |
| 531 | +; CHECK-NEXT: vid.v v10, v0.t |
| 532 | +; CHECK-NEXT: addi a0, a0, -1 |
| 533 | +; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t |
| 534 | +; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t |
| 535 | +; CHECK-NEXT: vmv.v.v v8, v10 |
| 536 | +; CHECK-NEXT: ret |
| 537 | + %dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> %mask, i32 %evl) |
| 538 | + ret <vscale x 8 x bfloat> %dst |
| 539 | +} |
| 540 | + |
| 541 | +define <vscale x 8 x bfloat> @test_vp_reverse_nxv8bf16(<vscale x 8 x bfloat> %src, i32 zeroext %evl) { |
| 542 | +; CHECK-LABEL: test_vp_reverse_nxv8bf16: |
| 543 | +; CHECK: # %bb.0: |
| 544 | +; CHECK-NEXT: addi a1, a0, -1 |
| 545 | +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma |
| 546 | +; CHECK-NEXT: vid.v v10 |
| 547 | +; CHECK-NEXT: vrsub.vx v12, v10, a1 |
| 548 | +; CHECK-NEXT: vrgather.vv v10, v8, v12 |
| 549 | +; CHECK-NEXT: vmv.v.v v8, v10 |
| 550 | +; CHECK-NEXT: ret |
| 551 | + |
| 552 | + %dst = call <vscale x 8 x bfloat> @llvm.experimental.vp.reverse.nxv8bf16(<vscale x 8 x bfloat> %src, <vscale x 8 x i1> splat (i1 1), i32 %evl) |
| 553 | + ret <vscale x 8 x bfloat> %dst |
| 554 | +} |
| 555 | + |
| 556 | +define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16_masked(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) { |
| 557 | +; CHECK-LABEL: test_vp_reverse_nxv16bf16_masked: |
| 558 | +; CHECK: # %bb.0: |
| 559 | +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma |
| 560 | +; CHECK-NEXT: vid.v v12, v0.t |
| 561 | +; CHECK-NEXT: addi a0, a0, -1 |
| 562 | +; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t |
| 563 | +; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t |
| 564 | +; CHECK-NEXT: vmv.v.v v8, v12 |
| 565 | +; CHECK-NEXT: ret |
| 566 | + %dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> %mask, i32 %evl) |
| 567 | + ret <vscale x 16 x bfloat> %dst |
| 568 | +} |
| 569 | + |
| 570 | +define <vscale x 16 x bfloat> @test_vp_reverse_nxv16bf16(<vscale x 16 x bfloat> %src, i32 zeroext %evl) { |
| 571 | +; CHECK-LABEL: test_vp_reverse_nxv16bf16: |
| 572 | +; CHECK: # %bb.0: |
| 573 | +; CHECK-NEXT: addi a1, a0, -1 |
| 574 | +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma |
| 575 | +; CHECK-NEXT: vid.v v12 |
| 576 | +; CHECK-NEXT: vrsub.vx v16, v12, a1 |
| 577 | +; CHECK-NEXT: vrgather.vv v12, v8, v16 |
| 578 | +; CHECK-NEXT: vmv.v.v v8, v12 |
| 579 | +; CHECK-NEXT: ret |
| 580 | + |
| 581 | + %dst = call <vscale x 16 x bfloat> @llvm.experimental.vp.reverse.nxv16bf16(<vscale x 16 x bfloat> %src, <vscale x 16 x i1> splat (i1 1), i32 %evl) |
| 582 | + ret <vscale x 16 x bfloat> %dst |
| 583 | +} |
| 584 | + |
| 585 | +define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16_masked(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) { |
| 586 | +; CHECK-LABEL: test_vp_reverse_nxv32bf16_masked: |
| 587 | +; CHECK: # %bb.0: |
| 588 | +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma |
| 589 | +; CHECK-NEXT: vid.v v16, v0.t |
| 590 | +; CHECK-NEXT: addi a0, a0, -1 |
| 591 | +; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t |
| 592 | +; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t |
| 593 | +; CHECK-NEXT: vmv.v.v v8, v16 |
| 594 | +; CHECK-NEXT: ret |
| 595 | + %dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> %mask, i32 %evl) |
| 596 | + ret <vscale x 32 x bfloat> %dst |
| 597 | +} |
| 598 | + |
| 599 | +define <vscale x 32 x bfloat> @test_vp_reverse_nxv32bf16(<vscale x 32 x bfloat> %src, i32 zeroext %evl) { |
| 600 | +; CHECK-LABEL: test_vp_reverse_nxv32bf16: |
| 601 | +; CHECK: # %bb.0: |
| 602 | +; CHECK-NEXT: addi a1, a0, -1 |
| 603 | +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma |
| 604 | +; CHECK-NEXT: vid.v v16 |
| 605 | +; CHECK-NEXT: vrsub.vx v24, v16, a1 |
| 606 | +; CHECK-NEXT: vrgather.vv v16, v8, v24 |
| 607 | +; CHECK-NEXT: vmv.v.v v8, v16 |
| 608 | +; CHECK-NEXT: ret |
| 609 | + |
| 610 | + %dst = call <vscale x 32 x bfloat> @llvm.experimental.vp.reverse.nxv32bf16(<vscale x 32 x bfloat> %src, <vscale x 32 x i1> splat (i1 1), i32 %evl) |
| 611 | + ret <vscale x 32 x bfloat> %dst |
| 612 | +} |
0 commit comments