Skip to content

Commit e1afb23

Browse files
kseniyazaytsevaAndreySokolovSC
authored andcommitted
Fix BLAS and LAPACK tests for C910V and RISCV64_ZVL256B targets
* Fixed bugs in dgemm, [a]min\max, asum kernels * Added zero checks for BLAS kernels * Added dsdot implementation for RVV 0.7.1 * Fixed bugs in _vector files for C910V and RISCV64_ZVL256B targets * Added additional definitions for RISCV64_ZVL256B target
1 parent 88e9941 commit e1afb23

19 files changed

+205
-38
lines changed

Makefile.prebuild

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
5959
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
6060
endif
6161

62+
ifeq ($(TARGET), RISCV64_ZVL256B)
63+
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
64+
endif
65+
6266
ifeq ($(TARGET), RISCV64_GENERIC)
6367
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
6468
endif

Makefile.riscv64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ ifeq ($(CORE), x280)
66
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
77
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
88
endif
9+
ifeq ($(CORE), RISCV64_ZVL256B)
10+
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d
11+
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
12+
endif
913
ifeq ($(CORE), RISCV64_GENERIC)
1014
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
1115
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Z14
121121
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
122122
C910V
123123
x280
124+
RISCV64_ZVL256B
124125

125126
11.LOONGARCH64:
126127
LOONGSONGENERIC

getarch.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16921692
#else
16931693
#endif
16941694

1695+
#ifdef FORCE_RISCV64_ZVL256B
1696+
#define FORCE
1697+
#define ARCHITECTURE "RISCV64"
1698+
#define SUBARCHITECTURE "RISCV64_ZVL256B"
1699+
#define SUBDIRNAME "riscv64"
1700+
#define ARCHCONFIG "-DRISCV64_ZVL256B " \
1701+
"-DL1_DATA_SIZE=64536 -DL1_DATA_LINESIZE=32 " \
1702+
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
1703+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
1704+
#define LIBNAME "riscv64_zvl256b"
1705+
#define CORENAME "RISCV64_ZVL256B"
1706+
#else
1707+
#endif
1708+
16951709

16961710
#if defined(FORCE_E2K) || defined(__e2k__)
16971711
#define FORCE

kernel/riscv64/KERNEL.C910V

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ SDOTKERNEL = dot_vector.c
5959
DDOTKERNEL = dot_vector.c
6060
CDOTKERNEL = zdot_vector.c
6161
ZDOTKERNEL = zdot_vector.c
62+
DSDOTKERNEL = dsdot_vector.c
6263

6364
SNRM2KERNEL = nrm2_vector.c
6465
DNRM2KERNEL = nrm2_vector.c

kernel/riscv64/amin_vector.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
# define LMUL m2
3232
# if defined(DOUBLE)
3333
# define ELEN 64
34+
# define ABS fabs
3435
# else
3536
# define ELEN 32
37+
# define ABS fabsf
3638
# endif
3739
#else
3840
# define LMUL m8
3941
# if defined(DOUBLE)
4042
# define ELEN 64
43+
# define ABS fabs
4144
# else
4245
# define ELEN 32
46+
# define ABS fabsf
4347
# endif
4448
#endif
4549

@@ -69,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
6973
FLOAT minf=0.0;
7074
if (n <= 0 || inc_x <= 0) return(minf);
7175

72-
minf = *x;
76+
minf = ABS(*x);
7377
x += inc_x;
7478
--n;
7579
if (n == 0) return(minf);

kernel/riscv64/asum_vector.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6767
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
6868
{
6969
BLASLONG i=0, j=0;
70-
BLASLONG ix=0;
7170
FLOAT asumf=0.0;
7271
if (n <= 0 || inc_x <= 0) return(asumf);
7372
unsigned int gvl = 0;
@@ -103,17 +102,15 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
103102
unsigned int stride_x = inc_x * sizeof(FLOAT);
104103
if(gvl <= n/2){
105104
v_sum = VFMVVF_FLOAT(0, gvl);
106-
BLASLONG inc_xv = inc_x * gvl;
107105
for(i=0,j=0; i<n/(gvl*2); i++){
108-
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
106+
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
109107
v0 = VFABS_FLOAT(v0, gvl);
110108
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
111109

112-
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
110+
v1 = VLSEV_FLOAT(&x[(j+gvl)*inc_x], stride_x, gvl);
113111
v1 = VFABS_FLOAT(v1, gvl);
114112
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
115113
j += gvl * 2;
116-
inc_xv += inc_xv * 2;
117114
}
118115
v_res = VFREDSUMVS_FLOAT(v_sum, v_res, gvl);
119116
}

kernel/riscv64/axpby_vector.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6060

6161
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
6262
{
63-
if (n < 0) return(0);
63+
if (n <= 0) return(0);
6464

6565
BLASLONG i=0, j=0;
6666
unsigned int gvl = 0;

kernel/riscv64/dgemm_kernel_8x4_c910v.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
196196

197197
asm volatile(
198198
"vsetvli zero, zero, e64,m1 \n\t"
199-
"fmv.w.x ft11, zero \n\t"
199+
"fmv.d.x ft11, zero \n\t"
200200
"mv t0, %[BK] \n\t"
201201

202202
"vfmv.v.f v16, ft11 \n\t"

kernel/riscv64/dsdot_vector.c

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/***************************************************************************
2+
Copyright (c) 2023, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
31+
{
32+
BLASLONG i=0, j=0;
33+
double dot = 0.0 ;
34+
35+
if ( n < 1 ) return(dot);
36+
vfloat64m4_t vr;
37+
vfloat32m2_t vx, vy;
38+
unsigned int gvl = 0;
39+
vfloat64m1_t v_res, v_z0;
40+
gvl = vsetvlmax_e64m1();
41+
v_res = vfmv_v_f_f64m1(0, gvl);
42+
v_z0 = vfmv_v_f_f64m1(0, gvl);
43+
44+
if(inc_x == 1 && inc_y == 1){
45+
gvl = vsetvl_e64m4(n);
46+
vr = vfmv_v_f_f64m4(0, gvl);
47+
for(i=0,j=0; i<n/gvl; i++){
48+
vx = vle32_v_f32m2(&x[j], gvl);
49+
vy = vle32_v_f32m2(&y[j], gvl);
50+
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
51+
j += gvl;
52+
}
53+
if(j > 0){
54+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
55+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
56+
}
57+
//tail
58+
if(j < n){
59+
gvl = vsetvl_e64m4(n-j);
60+
vx = vle32_v_f32m2(&x[j], gvl);
61+
vy = vle32_v_f32m2(&y[j], gvl);
62+
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
63+
//vr = vfdot_vv_f32m2(vx, vy, gvl);
64+
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
65+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
66+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
67+
}
68+
}else if(inc_y == 1){
69+
gvl = vsetvl_e64m4(n);
70+
vr = vfmv_v_f_f64m4(0, gvl);
71+
int stride_x = inc_x * sizeof(FLOAT);
72+
for(i=0,j=0; i<n/gvl; i++){
73+
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
74+
vy = vle32_v_f32m2(&y[j], gvl);
75+
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
76+
j += gvl;
77+
}
78+
if(j > 0){
79+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
80+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
81+
82+
}
83+
//tail
84+
if(j < n){
85+
gvl = vsetvl_e64m4(n-j);
86+
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
87+
vy = vle32_v_f32m2(&y[j], gvl);
88+
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
89+
//vr = vfdot_vv_f32m2(vx, vy, gvl);
90+
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
91+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
92+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
93+
94+
}
95+
}else if(inc_x == 1){
96+
gvl = vsetvl_e64m4(n);
97+
vr = vfmv_v_f_f64m4(0, gvl);
98+
int stride_y = inc_y * sizeof(FLOAT);
99+
for(i=0,j=0; i<n/gvl; i++){
100+
vx = vle32_v_f32m2(&x[j], gvl);
101+
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
102+
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
103+
j += gvl;
104+
}
105+
if(j > 0){
106+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
107+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
108+
109+
}
110+
//tail
111+
if(j < n){
112+
gvl = vsetvl_e64m4(n-j);
113+
vx = vle32_v_f32m2(&x[j], gvl);
114+
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
115+
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
116+
//vr = vfdot_vv_f32m2(vx, vy, gvl);
117+
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
118+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
119+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
120+
121+
}
122+
}else{
123+
gvl = vsetvl_e64m4(n);
124+
vr = vfmv_v_f_f64m4(0, gvl);
125+
int stride_x = inc_x * sizeof(FLOAT);
126+
int stride_y = inc_y * sizeof(FLOAT);
127+
for(i=0,j=0; i<n/gvl; i++){
128+
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
129+
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
130+
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
131+
j += gvl;
132+
}
133+
if(j > 0){
134+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
135+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
136+
137+
}
138+
//tail
139+
if(j < n){
140+
gvl = vsetvl_e64m4(n-j);
141+
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
142+
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
143+
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
144+
//vr = vfdot_vv_f32m2(vx, vy, gvl);
145+
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
146+
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
147+
dot += (double)vfmv_f_s_f64m1_f64(v_res);
148+
149+
}
150+
}
151+
return(dot);
152+
}

0 commit comments

Comments
 (0)