Skip to content

Commit 6f748fd

Browse files
committed
[RISCV] Add coverage for optimizations in deinterleave load lowering
1 parent ddb018f commit 6f748fd

File tree

2 files changed

+171
-0
lines changed

2 files changed

+171
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,32 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_load_nxv16i
4949
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %res1
5050
}
5151

52+
define <vscale x 16 x i8> @vector_deinterleave_load_nxv16i8_nxv32i8_oneactive(ptr %p) {
53+
; CHECK-LABEL: vector_deinterleave_load_nxv16i8_nxv32i8_oneactive:
54+
; CHECK: # %bb.0:
55+
; CHECK-NEXT: vl4r.v v12, (a0)
56+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
57+
; CHECK-NEXT: vnsrl.wi v8, v12, 0
58+
; CHECK-NEXT: ret
59+
%vec = load <vscale x 32 x i8>, ptr %p
60+
%deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
61+
%t0 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %deinterleaved.results, 0
62+
ret <vscale x 16 x i8> %t0
63+
}
64+
65+
define <vscale x 16 x i8> @vector_deinterleave_load_nxv16i8_nxv32i8_oneactive2(ptr %p) {
66+
; CHECK-LABEL: vector_deinterleave_load_nxv16i8_nxv32i8_oneactive2:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vl4r.v v12, (a0)
69+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
70+
; CHECK-NEXT: vnsrl.wi v8, v12, 8
71+
; CHECK-NEXT: ret
72+
%vec = load <vscale x 32 x i8>, ptr %p
73+
%deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
74+
%t1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %deinterleaved.results, 1
75+
ret <vscale x 16 x i8> %t1
76+
}
77+
5278
; Shouldn't be lowered to vlseg because it's unaligned
5379
define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_load_nxv8i16_nxv16i16_align1(ptr %p) {
5480
; CHECK-LABEL: vector_deinterleave_load_nxv8i16_nxv16i16_align1:
@@ -380,6 +406,90 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
380406
ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res3
381407
}
382408

409+
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
410+
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
411+
; CHECK: # %bb.0:
412+
; CHECK-NEXT: addi sp, sp, -16
413+
; CHECK-NEXT: .cfi_def_cfa_offset 16
414+
; CHECK-NEXT: csrr a1, vlenb
415+
; CHECK-NEXT: slli a1, a1, 2
416+
; CHECK-NEXT: sub sp, sp, a1
417+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
418+
; CHECK-NEXT: vl4r.v v8, (a0)
419+
; CHECK-NEXT: addi a0, sp, 16
420+
; CHECK-NEXT: vs4r.v v8, (a0)
421+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
422+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
423+
; CHECK-NEXT: csrr a0, vlenb
424+
; CHECK-NEXT: slli a0, a0, 2
425+
; CHECK-NEXT: add sp, sp, a0
426+
; CHECK-NEXT: .cfi_def_cfa sp, 16
427+
; CHECK-NEXT: addi sp, sp, 16
428+
; CHECK-NEXT: .cfi_def_cfa_offset 0
429+
; CHECK-NEXT: ret
430+
%vec = load <vscale x 32 x i8>, ptr %p
431+
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
432+
%t0 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %d0, 0
433+
ret <vscale x 8 x i8> %t0
434+
}
435+
436+
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
437+
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
438+
; CHECK: # %bb.0:
439+
; CHECK-NEXT: addi sp, sp, -16
440+
; CHECK-NEXT: .cfi_def_cfa_offset 16
441+
; CHECK-NEXT: csrr a1, vlenb
442+
; CHECK-NEXT: slli a1, a1, 2
443+
; CHECK-NEXT: sub sp, sp, a1
444+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
445+
; CHECK-NEXT: vl4r.v v8, (a0)
446+
; CHECK-NEXT: addi a0, sp, 16
447+
; CHECK-NEXT: vs4r.v v8, (a0)
448+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
449+
; CHECK-NEXT: vlseg4e8.v v5, (a0)
450+
; CHECK-NEXT: csrr a0, vlenb
451+
; CHECK-NEXT: slli a0, a0, 2
452+
; CHECK-NEXT: add sp, sp, a0
453+
; CHECK-NEXT: .cfi_def_cfa sp, 16
454+
; CHECK-NEXT: addi sp, sp, 16
455+
; CHECK-NEXT: .cfi_def_cfa_offset 0
456+
; CHECK-NEXT: ret
457+
%vec = load <vscale x 32 x i8>, ptr %p
458+
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
459+
%t0 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %d0, 3
460+
ret <vscale x 8 x i8> %t0
461+
}
462+
463+
define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deinterleave_load_factor4_twoactive(ptr %p) {
464+
; CHECK-LABEL: vector_deinterleave_load_factor4_twoactive:
465+
; CHECK: # %bb.0:
466+
; CHECK-NEXT: addi sp, sp, -16
467+
; CHECK-NEXT: .cfi_def_cfa_offset 16
468+
; CHECK-NEXT: csrr a1, vlenb
469+
; CHECK-NEXT: slli a1, a1, 2
470+
; CHECK-NEXT: sub sp, sp, a1
471+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
472+
; CHECK-NEXT: vl4r.v v8, (a0)
473+
; CHECK-NEXT: addi a0, sp, 16
474+
; CHECK-NEXT: vs4r.v v8, (a0)
475+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
476+
; CHECK-NEXT: vlseg4e8.v v8, (a0)
477+
; CHECK-NEXT: csrr a0, vlenb
478+
; CHECK-NEXT: slli a0, a0, 2
479+
; CHECK-NEXT: add sp, sp, a0
480+
; CHECK-NEXT: .cfi_def_cfa sp, 16
481+
; CHECK-NEXT: addi sp, sp, 16
482+
; CHECK-NEXT: .cfi_def_cfa_offset 0
483+
; CHECK-NEXT: ret
484+
%vec = load <vscale x 32 x i8>, ptr %p
485+
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
486+
%t0 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %d0, 0
487+
%t1 = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %d0, 1
488+
%res0 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } poison, <vscale x 8 x i8> %t0, 0
489+
%res1 = insertvalue { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res0, <vscale x 8 x i8> %t1, 1
490+
ret { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } %res1
491+
}
492+
383493
define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @vector_deinterleave_load_factor5(ptr %p) {
384494
; CHECK-LABEL: vector_deinterleave_load_factor5:
385495
; CHECK: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3683,3 +3683,64 @@ define {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vs
36833683
%res = call {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave8.nxv16f64(<vscale x 16 x double> %arg)
36843684
ret {<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>} %res
36853685
}
3686+
3687+
define <vscale x 16 x i8> @vector_deinterleave_nxv16i8_nxv32i8_oneactive(<vscale x 32 x i8> %vec) {
3688+
; V-LABEL: vector_deinterleave_nxv16i8_nxv32i8_oneactive:
3689+
; V: # %bb.0:
3690+
; V-NEXT: vsetvli a0, zero, e8, m2, ta, ma
3691+
; V-NEXT: vnsrl.wi v12, v8, 0
3692+
; V-NEXT: vmv.v.v v8, v12
3693+
; V-NEXT: ret
3694+
;
3695+
; ZIP-LABEL: vector_deinterleave_nxv16i8_nxv32i8_oneactive:
3696+
; ZIP: # %bb.0:
3697+
; ZIP-NEXT: vsetvli a0, zero, e8, m2, ta, ma
3698+
; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
3699+
; ZIP-NEXT: vmv.v.v v8, v12
3700+
; ZIP-NEXT: ret
3701+
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
3702+
%ext = extractvalue {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval, 0
3703+
ret <vscale x 16 x i8> %ext
3704+
}
3705+
3706+
define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive(<vscale x 8 x float> %arg) nounwind {
3707+
; CHECK-LABEL: vector_deinterleave_nxv1f32_nxv8f32_oneactive:
3708+
; CHECK: # %bb.0:
3709+
; CHECK-NEXT: addi sp, sp, -16
3710+
; CHECK-NEXT: csrr a0, vlenb
3711+
; CHECK-NEXT: slli a0, a0, 2
3712+
; CHECK-NEXT: sub sp, sp, a0
3713+
; CHECK-NEXT: addi a0, sp, 16
3714+
; CHECK-NEXT: vs4r.v v8, (a0)
3715+
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
3716+
; CHECK-NEXT: vlseg8e32.v v8, (a0)
3717+
; CHECK-NEXT: csrr a0, vlenb
3718+
; CHECK-NEXT: slli a0, a0, 2
3719+
; CHECK-NEXT: add sp, sp, a0
3720+
; CHECK-NEXT: addi sp, sp, 16
3721+
; CHECK-NEXT: ret
3722+
%res = call {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} @llvm.vector.deinterleave8.nxv8f32(<vscale x 8 x float> %arg)
3723+
%ext = extractvalue {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} %res, 0
3724+
ret <vscale x 1 x float> %ext
3725+
}
3726+
3727+
define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vscale x 8 x float> %arg) nounwind {
3728+
; CHECK-LABEL: vector_deinterleave_nxv1f32_nxv8f32_oneactive2:
3729+
; CHECK: # %bb.0:
3730+
; CHECK-NEXT: addi sp, sp, -16
3731+
; CHECK-NEXT: csrr a0, vlenb
3732+
; CHECK-NEXT: slli a0, a0, 2
3733+
; CHECK-NEXT: sub sp, sp, a0
3734+
; CHECK-NEXT: addi a0, sp, 16
3735+
; CHECK-NEXT: vs4r.v v8, (a0)
3736+
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
3737+
; CHECK-NEXT: vlseg8e32.v v3, (a0)
3738+
; CHECK-NEXT: csrr a0, vlenb
3739+
; CHECK-NEXT: slli a0, a0, 2
3740+
; CHECK-NEXT: add sp, sp, a0
3741+
; CHECK-NEXT: addi sp, sp, 16
3742+
; CHECK-NEXT: ret
3743+
%res = call {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} @llvm.vector.deinterleave8.nxv8f32(<vscale x 8 x float> %arg)
3744+
%ext = extractvalue {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} %res, 5
3745+
ret <vscale x 1 x float> %ext
3746+
}

0 commit comments

Comments
 (0)