Skip to content

Commit 826a9d5

Browse files
committed
Adds tail undisturbed for RVV Level 2 operations
During the last iteration of some RVV operations, accumulators can get overwritten when VL < VLMAX and tail policy is agnostic. Commit changes intrinsics tail policy to undistrubed.
1 parent 8df0289 commit 826a9d5

File tree

4 files changed

+36
-36
lines changed

4 files changed

+36
-36
lines changed

kernel/riscv64/gemv_t_rvv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3636
#define VLEV_FLOAT __riscv_vle32_v_f32m8
3737
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
3838
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
39-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
39+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
4040
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
4141
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
4242
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define VLEV_FLOAT __riscv_vle64_v_f64m8
5050
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
5151
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
52-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
52+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
5353
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
5454
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
5555
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
7979

8080
va = VLEV_FLOAT(a_ptr, vl);
8181
vx = VLEV_FLOAT(x_ptr, vl);
82-
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
82+
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
8383
}
8484

8585
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
@@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
103103

104104
va = VLEV_FLOAT(a_ptr, vl);
105105
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
106-
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
106+
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
107107
}
108108

109109
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);

kernel/riscv64/symv_L_rvv.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3737
#define VSEV_FLOAT __riscv_vse32_v_f32m8
3838
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
3939
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
40-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
40+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
4141
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
4242
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
4343
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
@@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5656
#define VSEV_FLOAT __riscv_vse64_v_f64m8
5757
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
5858
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
59-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
59+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
6060
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
6161
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
6262
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
@@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
100100
VSEV_FLOAT(&y[i], vy, vl);
101101

102102
vx = VLEV_FLOAT(&x[i], vl);
103-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
103+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
104104

105105
}
106106
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
130130
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
131131

132132
vx = VLEV_FLOAT(&x[i], vl);
133-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
133+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
134134

135135
iy += inc_yv;
136136
}
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
163163
VSEV_FLOAT(&y[i], vy, vl);
164164

165165
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
166-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
166+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
167167

168168
ix += inc_xv;
169169
}
@@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
201201
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
202202

203203
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
204-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
204+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
205205

206206
ix += inc_xv;
207207
iy += inc_yv;

kernel/riscv64/symv_U_rvv.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
#define VSEV_FLOAT __riscv_vse32_v_f32m8
3939
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
4040
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
41-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
41+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
4242
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
4343
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
4444
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
@@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5757
#define VSEV_FLOAT __riscv_vse64_v_f64m8
5858
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
5959
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
60-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
60+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
6161
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
6262
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
6363
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
@@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
101101
VSEV_FLOAT(&y[i], vy, vl);
102102

103103
vx = VLEV_FLOAT(&x[i], vl);
104-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
104+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
105105
}
106106
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);
107107

@@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
130130
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
131131

132132
vx = VLEV_FLOAT(&x[i], vl);
133-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
133+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
134134

135135
iy += inc_yv;
136136
}
@@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
163163
VSEV_FLOAT(&y[i], vy, vl);
164164

165165
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
166-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
166+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
167167

168168
ix += inc_xv;
169169
}
@@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
200200
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
201201

202202
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
203-
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
203+
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
204204
ix += inc_xv;
205205
iy += inc_yv;
206206
}

kernel/riscv64/zgemv_t_rvv.c

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
3636
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
3737
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
38-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
39-
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
38+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
39+
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
4040
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
4141
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
4242
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
@@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
5050
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
5151
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
52-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
53-
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
52+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
53+
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
5454
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
5555
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
5656
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
@@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
9090
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);
9191

9292
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
93-
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
94-
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
95-
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
96-
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
93+
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
94+
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
95+
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
96+
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
9797
#else
98-
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
99-
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
100-
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
101-
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
98+
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
99+
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
100+
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
101+
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
102102
#endif
103103
j += vl * 2;
104104
ix += vl * inc_x * 2;
@@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
134134
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
135135

136136
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
137-
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
138-
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
139-
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
140-
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
137+
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
138+
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
139+
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
140+
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
141141
#else
142-
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
143-
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
144-
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
145-
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
142+
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
143+
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
144+
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
145+
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
146146
#endif
147147
j += vl * 2;
148148
ix += vl * inc_x * 2;

0 commit comments

Comments
 (0)