Skip to content

Commit 1332f8a

Browse files
authored
Merge pull request #4159 from OMaghiarIMG/risc-v-tail-policy
Set tail policy to undisturbed for RVV intrinsics accumulators
2 parents 2d316c2 + 826a9d5 commit 1332f8a

24 files changed

+204
-204
lines changed

kernel/riscv64/amax_rvv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3939
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
4040
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
4141
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
42-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
42+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
4343
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
4444
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
4545
#else
@@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5353
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
5454
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
5555
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
56-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
56+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
5757
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
5858
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
5959
#endif
@@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
7878

7979
vx = VLEV_FLOAT(x, vl);
8080
vx = VFABSV_FLOAT(vx, vl);
81-
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
81+
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
8282
}
8383

8484
} else {
@@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
9090

9191
vx = VLSEV_FLOAT(x, stride_x, vl);
9292
vx = VFABSV_FLOAT(vx, vl);
93-
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
93+
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
9494
}
9595

9696
}

kernel/riscv64/amin_rvv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3939
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
4040
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
4141
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
42-
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
42+
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
4343
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
4444
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
4545
#else
@@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5353
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
5454
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
5555
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
56-
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
56+
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
5757
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
5858
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
5959
#endif
@@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
7878

7979
vx = VLEV_FLOAT(x, vl);
8080
vx = VFABSV_FLOAT(vx, vl);
81-
vmin = VFMINVV_FLOAT(vmin, vx, vl);
81+
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
8282
}
8383

8484
} else {
@@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
9090

9191
vx = VLSEV_FLOAT(x, stride_x, vl);
9292
vx = VFABSV_FLOAT(vx, vl);
93-
vmin = VFMINVV_FLOAT(vmin, vx, vl);
93+
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
9494
}
9595

9696
}

kernel/riscv64/asum_rvv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3636
#define VLEV_FLOAT __riscv_vle32_v_f32m8
3737
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
3838
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
39-
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
39+
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
4040
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
4141
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
4242
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
@@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5050
#define VLEV_FLOAT __riscv_vle64_v_f64m8
5151
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
5252
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
53-
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
53+
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
5454
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
5555
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
5656
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
@@ -76,7 +76,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
7676

7777
vx = VLEV_FLOAT(x, vl);
7878
vx = VFABSV_FLOAT(vx, vl);
79-
vsum = VFADDVV_FLOAT(vsum, vx, vl);
79+
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
8080
}
8181

8282
} else {
@@ -88,7 +88,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
8888

8989
vx = VLSEV_FLOAT(x, stride_x, vl);
9090
vx = VFABSV_FLOAT(vx, vl);
91-
vsum = VFADDVV_FLOAT(vsum, vx, vl);
91+
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
9292
}
9393

9494
}

kernel/riscv64/dot_rvv.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
4949
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
5050
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
5151

52-
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
52+
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
5353
#else
5454
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
5555
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
5656

57-
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
57+
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
5858
#endif
5959
}
6060

@@ -69,12 +69,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
6969
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
7070
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
7171

72-
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
72+
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
7373
#else
7474
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
7575
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
7676

77-
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
77+
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
7878
#endif
7979
}
8080
} else if (1 == inc_y) {
@@ -88,12 +88,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
8888
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
8989
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
9090

91-
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
91+
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
9292
#else
9393
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
9494
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
9595

96-
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
96+
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
9797
#endif
9898
}
9999
} else {
@@ -108,12 +108,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
108108
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
109109
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
110110

111-
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
111+
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
112112
#else
113113
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
114114
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
115115

116-
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
116+
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
117117
#endif
118118
}
119119
}

kernel/riscv64/gemv_t_rvv.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3636
#define VLEV_FLOAT __riscv_vle32_v_f32m8
3737
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
3838
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
39-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
39+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
4040
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
4141
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
4242
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define VLEV_FLOAT __riscv_vle64_v_f64m8
5050
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
5151
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
52-
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
52+
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
5353
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
5454
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
5555
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
7979

8080
va = VLEV_FLOAT(a_ptr, vl);
8181
vx = VLEV_FLOAT(x_ptr, vl);
82-
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
82+
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
8383
}
8484

8585
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
@@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
103103

104104
va = VLEV_FLOAT(a_ptr, vl);
105105
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
106-
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
106+
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
107107
}
108108

109109
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);

kernel/riscv64/iamax_rvv.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
4343
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
4444
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
45-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
45+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
4646
#define VFIRSTM __riscv_vfirst_m_b8
4747
#define UINT_V_T vuint64m8_t
48-
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
48+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
4949
#define VIDV_UINT __riscv_vid_v_u64m8
50-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
50+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
5151
#define VADDVX_UINT __riscv_vadd_vx_u64m8
5252
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
5353
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -68,12 +68,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6868
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
6969
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
7070
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
71-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
71+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
7272
#define VFIRSTM __riscv_vfirst_m_b4
7373
#define UINT_V_T vuint32m8_t
74-
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
74+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
7575
#define VIDV_UINT __riscv_vid_v_u32m8
76-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
76+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
7777
#define VADDVX_UINT __riscv_vadd_vx_u32m8
7878
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
7979
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -106,11 +106,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
106106

107107
//index where element greater than v_max
108108
mask = VMFLTVV_FLOAT(v_max, vx, vl);
109-
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
110-
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
109+
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
110+
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
111111

112112
//update v_max
113-
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
113+
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
114114
}
115115

116116
} else {
@@ -125,11 +125,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
125125

126126
//index where element greater than v_max
127127
mask = VMFLTVV_FLOAT(v_max, vx, vl);
128-
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
129-
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
128+
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
129+
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
130130

131131
//update v_max
132-
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
132+
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
133133
}
134134

135135
}

kernel/riscv64/iamin_rvv.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4343
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
4444
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
4545
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
46-
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
46+
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
4747
#define VFIRSTM __riscv_vfirst_m_b8
4848
#define UINT_V_T vuint64m8_t
49-
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
49+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
5050
#define VIDV_UINT __riscv_vid_v_u64m8
51-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
51+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
5252
#define VADDVX_UINT __riscv_vadd_vx_u64m8
5353
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
5454
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -69,12 +69,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6969
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
7070
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
7171
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
72-
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
72+
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
7373
#define VFIRSTM __riscv_vfirst_m_b4
7474
#define UINT_V_T vuint32m8_t
75-
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
75+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
7676
#define VIDV_UINT __riscv_vid_v_u32m8
77-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
77+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
7878
#define VADDVX_UINT __riscv_vadd_vx_u32m8
7979
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
8080
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -107,11 +107,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
107107

108108
// index where element less than v_min
109109
mask = VMFLTVV_FLOAT(vx, v_min, vl);
110-
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
111-
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
110+
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
111+
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
112112

113113
//update v_min and start_index j
114-
v_min = VFMINVV_FLOAT(v_min, vx, vl);
114+
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
115115
}
116116

117117
} else {
@@ -126,11 +126,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
126126

127127
// index where element less than v_min
128128
mask = VMFLTVV_FLOAT(vx, v_min, vl);
129-
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
130-
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
129+
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
130+
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
131131

132132
//update v_min and start_index j
133-
v_min = VFMINVV_FLOAT(v_min, vx, vl);
133+
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
134134
}
135135

136136
}

kernel/riscv64/imax_rvv.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
4343
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
4444
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
45-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
45+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
4646
#define VFIRSTM __riscv_vfirst_m_b8
4747
#define UINT_V_T vuint64m8_t
48-
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
48+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
4949
#define VIDV_UINT __riscv_vid_v_u64m8
50-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
50+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
5151
#define VADDVX_UINT __riscv_vadd_vx_u64m8
5252
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
5353
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6767
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
6868
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
6969
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
70-
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
70+
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
7171
#define VFIRSTM __riscv_vfirst_m_b4
7272
#define UINT_V_T vuint32m8_t
73-
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
73+
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
7474
#define VIDV_UINT __riscv_vid_v_u32m8
75-
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
75+
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
7676
#define VADDVX_UINT __riscv_vadd_vx_u32m8
7777
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
7878
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
104104

105105
//index where element greater than v_max
106106
mask = VMFLTVV_FLOAT(v_max, vx, vl);
107-
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
108-
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
107+
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
108+
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
109109

110110
//update v_max and start_index j
111-
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
111+
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
112112
}
113113

114114
} else {
@@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
122122

123123
//index where element greater than v_max
124124
mask = VMFLTVV_FLOAT(v_max, vx, vl);
125-
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
126-
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
125+
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
126+
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
127127

128128
//update v_max and start_index j
129-
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
129+
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
130130
}
131131

132132
}

0 commit comments

Comments
 (0)