@@ -51,11 +51,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51
51
52
52
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
53
53
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
54
+ #define FLOAT_V_M1_T JOIN(vfloat, ELEN, m1, _t, _)
54
55
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
55
56
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
56
57
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
57
58
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
58
59
#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _)
60
+ #define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
61
+ #define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
62
+
63
+ #ifdef RISCV_0p10_INTRINSICS
64
+ #define VFREDSUMVS_FLOAT (va , vb , gvl ) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
65
+ #else
66
+ #define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
67
+ #endif
59
68
60
69
int CNAME (BLASLONG n , BLASLONG dummy0 , BLASLONG dummy1 , FLOAT da , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y , FLOAT * dummy , BLASLONG dummy2 )
61
70
{
@@ -123,7 +132,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
123
132
VSEV_FLOAT (& y [j ], vy0 , gvl );
124
133
j += gvl ;
125
134
}
126
- } else if ( inc_x == 1 ) {
135
+ } else if ( 1 == inc_x && 0 != inc_y ) {
127
136
stride_y = inc_y * sizeof (FLOAT );
128
137
gvl = VSETVL (n );
129
138
if (gvl <= n /2 ){
@@ -151,6 +160,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
151
160
VSSEV_FLOAT (& y [j * inc_y ], stride_y , vy0 , gvl );
152
161
j += gvl ;
153
162
}
163
+ } else if ( 0 == inc_y ) {
164
+ BLASLONG stride_x = inc_x * sizeof (FLOAT );
165
+ size_t in_vl = VSETVL (n );
166
+ vy0 = VFMVVF_FLOAT ( y [0 ], in_vl );
167
+
168
+ for (size_t vl ; n > 0 ; n -= vl , x += vl * inc_x ) {
169
+ vl = VSETVL (n );
170
+ vx0 = VLSEV_FLOAT (x , stride_x , vl );
171
+ vy0 = VFMACCVF_FLOAT (vy0 , da , vx0 , vl );
172
+ }
173
+ FLOAT_V_M1_T v_res = VFMVVF_FLOAT_M1 ( 0.0f , 1 );
174
+ v_res = VFREDSUMVS_FLOAT ( vy0 , v_res , in_vl );
175
+ y [0 ] = EXTRACT_FLOAT (v_res );
154
176
}else {
155
177
stride_x = inc_x * sizeof (FLOAT );
156
178
stride_y = inc_y * sizeof (FLOAT );
0 commit comments