Skip to content

Commit 4f1ba5a

Browse files
committed
[RISCV] Add additional gather/scatter addressing coverage
Inspired by s2101 in tsvc
1 parent 797d519 commit 4f1ba5a

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2535,3 +2535,57 @@ define <4 x i32> @reassociate_with_constant_splat(ptr %base, i32 %index, <4 x i3
25352535
ret <4 x i32> %res
25362536
}
25372537

2538+
define <4 x i32> @diagonal(ptr %base, <4 x i64> %vecidx) {
2539+
; RV32-LABEL: diagonal:
2540+
; RV32: # %bb.0:
2541+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2542+
; RV32-NEXT: vnsrl.wi v10, v8, 0
2543+
; RV32-NEXT: vsll.vi v8, v10, 10
2544+
; RV32-NEXT: vadd.vx v8, v8, a0
2545+
; RV32-NEXT: vsll.vi v9, v10, 2
2546+
; RV32-NEXT: vadd.vv v8, v8, v9
2547+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2548+
; RV32-NEXT: ret
2549+
;
2550+
; RV64-LABEL: diagonal:
2551+
; RV64: # %bb.0:
2552+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2553+
; RV64-NEXT: vsll.vi v10, v8, 10
2554+
; RV64-NEXT: vadd.vx v10, v10, a0
2555+
; RV64-NEXT: vsll.vi v8, v8, 2
2556+
; RV64-NEXT: vadd.vv v10, v10, v8
2557+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2558+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2559+
; RV64-NEXT: ret
2560+
%gep = getelementptr inbounds nuw [256 x [256 x float]], ptr %base, i64 0, <4 x i64> %vecidx, <4 x i64> %vecidx
2561+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2562+
ret <4 x i32> %res
2563+
}
2564+
2565+
define <4 x i32> @diagonal_i32(ptr %base, <4 x i32> %vecidx) {
2566+
; RV32-LABEL: diagonal_i32:
2567+
; RV32: # %bb.0:
2568+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2569+
; RV32-NEXT: vsll.vi v9, v8, 10
2570+
; RV32-NEXT: vadd.vx v9, v9, a0
2571+
; RV32-NEXT: vsll.vi v8, v8, 2
2572+
; RV32-NEXT: vadd.vv v8, v9, v8
2573+
; RV32-NEXT: vluxei32.v v8, (zero), v8
2574+
; RV32-NEXT: ret
2575+
;
2576+
; RV64-LABEL: diagonal_i32:
2577+
; RV64: # %bb.0:
2578+
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2579+
; RV64-NEXT: vsext.vf2 v10, v8
2580+
; RV64-NEXT: vsll.vi v8, v10, 10
2581+
; RV64-NEXT: vadd.vx v8, v8, a0
2582+
; RV64-NEXT: vsll.vi v10, v10, 2
2583+
; RV64-NEXT: vadd.vv v10, v8, v10
2584+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2585+
; RV64-NEXT: vluxei64.v v8, (zero), v10
2586+
; RV64-NEXT: ret
2587+
%gep = getelementptr inbounds nuw [256 x [256 x float]], ptr %base, i64 0, <4 x i32> %vecidx, <4 x i32> %vecidx
2588+
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
2589+
ret <4 x i32> %res
2590+
}
2591+

0 commit comments

Comments
 (0)