Skip to content

Commit 27816fa

Browse files
authored
Merge pull request #4472 from sergei-lewis/dev/slewis/merge-from-riscv
Merge risc-v branch to develop
2 parents ec74dcd + 3ffd686 commit 27816fa

File tree

177 files changed

+41327
-2576
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+41327
-2576
lines changed

Makefile.prebuild

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,22 @@ ifeq ($(TARGET), CK860FV)
5959
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
6060
endif
6161

62+
ifeq ($(TARGET), x280)
63+
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
64+
endif
65+
66+
ifeq ($(TARGET), RISCV64_ZVL256B)
67+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
68+
endif
69+
70+
ifeq ($(TARGET), RISCV64_ZVL128B)
71+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
72+
endif
73+
74+
ifeq ($(TARGET), RISCV64_GENERIC)
75+
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
76+
endif
77+
6278
all: getarch_2nd
6379
./getarch_2nd 0 >> $(TARGET_MAKE)
6480
./getarch_2nd 1 >> $(TARGET_CONF)

Makefile.riscv64

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,19 @@ ifeq ($(CORE), C910V)
22
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
33
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
44
endif
5+
ifeq ($(CORE), x280)
6+
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
7+
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
8+
endif
9+
ifeq ($(CORE), RISCV64_ZVL256B)
10+
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
11+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
12+
endif
13+
ifeq ($(CORE), RISCV64_ZVL128B)
14+
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
15+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
16+
endif
17+
ifeq ($(CORE), RISCV64_GENERIC)
18+
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
19+
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
20+
endif

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
198198
```
199199
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)
200200

201+
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
202+
```sh
203+
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
204+
```
205+
201206
### Support for multiple targets in a single library
202207

203208
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.

TargetList.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,11 @@ Z13
118118
Z14
119119

120120
10.RISC-V 64:
121-
RISCV64_GENERIC
121+
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
122+
RISCV64_ZVL128B
122123
C910V
124+
x280
125+
RISCV64_ZVL256B
123126

124127
11.LOONGARCH64:
125128
LOONGSONGENERIC

benchmark/Makefile

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ ESSL=/opt/ibm/lib
3737
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
3838
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
3939

40+
# x280 temporary workaround for gfortran
41+
ifeq ($(TARGET), x280)
42+
CCOMMON_OPT:=$(filter-out -mllvm --riscv-v-vector-bits-min=512,$(CCOMMON_OPT))
43+
endif
44+
45+
4046
ifneq ($(NO_LAPACK), 1)
4147
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
4248
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
@@ -265,9 +271,9 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
265271
ismax.goto idmax.goto \
266272
isamin.goto idamin.goto icamin.goto izamin.goto \
267273
ismin.goto idmin.goto \
268-
samax.goto damax.goto scamax.goto dzamax.goto \
274+
samax.goto damax.goto camax.goto zamax.goto \
269275
smax.goto dmax.goto \
270-
samin.goto damin.goto scamin.goto dzamin.goto \
276+
samin.goto damin.goto camin.goto zamin.goto \
271277
smin.goto dmin.goto \
272278
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
273279
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
@@ -2832,12 +2838,12 @@ samax.goto : samax.$(SUFFIX) ../$(LIBNAME)
28322838
damax.goto : damax.$(SUFFIX) ../$(LIBNAME)
28332839
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28342840

2835-
############################################## SCAMAX ##############################################
2836-
scamax.goto : scamax.$(SUFFIX) ../$(LIBNAME)
2841+
############################################## CAMAX ##############################################
2842+
camax.goto : camax.$(SUFFIX) ../$(LIBNAME)
28372843
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28382844

2839-
############################################## DZAMAX ##############################################
2840-
dzamax.goto : dzamax.$(SUFFIX) ../$(LIBNAME)
2845+
############################################## ZAMAX ##############################################
2846+
zamax.goto : zamax.$(SUFFIX) ../$(LIBNAME)
28412847
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28422848

28432849
############################################## SMAX ##############################################
@@ -2856,12 +2862,12 @@ samin.goto : samin.$(SUFFIX) ../$(LIBNAME)
28562862
damin.goto : damin.$(SUFFIX) ../$(LIBNAME)
28572863
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28582864

2859-
############################################## SCAMIN ##############################################
2860-
scamin.goto : scamin.$(SUFFIX) ../$(LIBNAME)
2865+
############################################## CAMIN ##############################################
2866+
camin.goto : camin.$(SUFFIX) ../$(LIBNAME)
28612867
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28622868

2863-
############################################## DZAMIN ##############################################
2864-
dzamin.goto : dzamin.$(SUFFIX) ../$(LIBNAME)
2869+
############################################## ZAMIN ##############################################
2870+
zamin.goto : zamin.$(SUFFIX) ../$(LIBNAME)
28652871
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28662872

28672873
############################################## SMIN ##############################################
@@ -3383,10 +3389,10 @@ samax.$(SUFFIX) : amax.c
33833389
damax.$(SUFFIX) : amax.c
33843390
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
33853391

3386-
scamax.$(SUFFIX) : amax.c
3392+
camax.$(SUFFIX) : amax.c
33873393
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
33883394

3389-
dzamax.$(SUFFIX) : amax.c
3395+
zamax.$(SUFFIX) : amax.c
33903396
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
33913397

33923398

@@ -3403,10 +3409,10 @@ samin.$(SUFFIX) : amin.c
34033409
damin.$(SUFFIX) : amin.c
34043410
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
34053411

3406-
scamin.$(SUFFIX) : amin.c
3412+
camin.$(SUFFIX) : amin.c
34073413
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
34083414

3409-
dzamin.$(SUFFIX) : amin.c
3415+
zamin.$(SUFFIX) : amin.c
34103416
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
34113417

34123418

@@ -3436,4 +3442,4 @@ smallscaling: smallscaling.c ../$(LIBNAME)
34363442
clean ::
34373443
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
34383444

3439-
include $(TOPDIR)/Makefile.tail
3445+
include $(TOPDIR)/Makefile.tail

cblas.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,15 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
303303
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
304304
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
305305

306+
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
307+
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
308+
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
309+
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
310+
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
311+
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
312+
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
313+
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
314+
306315
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
307316
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
308317
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,

common_interface.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *
498498
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
499499
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
500500

501+
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *,
502+
float *, blasint *, float *, blasint *, float *, float *, blasint *);
503+
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *,
504+
double *, blasint *, double *, blasint *, double *, double *, blasint *);
505+
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *,
506+
float *, blasint *, float *, blasint *, float *, float *, blasint *);
507+
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *,
508+
double *, blasint *, double *, blasint *, double *, double *, blasint *);
509+
501510
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *,
502511
float *, float *, blasint *, float *, blasint *,
503512
float *, float *, blasint *);

common_riscv64.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,26 @@ static inline int blas_quickdivide(blasint x, blasint y){
9191
#define BUFFER_SIZE ( 32 << 20)
9292
#define SEEK_ADDRESS
9393

94-
#if defined(C910V)
95-
#include <riscv_vector.h>
94+
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) || defined(x280)
95+
# include <riscv_vector.h>
96+
#endif
97+
98+
#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 )
99+
// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this
100+
#define RISCV_0p10_INTRINSICS
101+
#define RISCV_RVV(x) x
102+
#else
103+
#define RISCV_RVV(x) __riscv_ ## x
104+
#endif
105+
106+
#if defined(C910V) || defined(RISCV64_ZVL256B)
107+
# if !defined(DOUBLE)
108+
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f32m1_f32)(v)
109+
# else
110+
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f64m1_f64)(v)
111+
# endif
112+
#else
113+
# define EXTRACT_FLOAT(v) (v[0])
96114
#endif
97115

98116
#endif

cpuid_riscv64.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7070
/* or implied, of The University of Texas at Austin. */
7171
/*********************************************************************/
7272

73-
#define CPU_GENERIC 0
74-
#define CPU_C910V 1
73+
#define CPU_GENERIC 0
74+
#define CPU_C910V 1
75+
#define CPU_x280 2
76+
#define CPU_RISCV64_ZVL256B 3
77+
#define CPU_RISCV64_ZVL128B 4
7578

7679
static char *cpuname[] = {
7780
"RISCV64_GENERIC",
78-
"C910V"
81+
"C910V",
82+
"x280",
83+
"CPU_RISCV64_ZVL256B",
84+
"CPU_RISCV64_ZVL128B"
7985
};
8086

8187
int detect(void){

ctest/c_cblat1.f

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ SUBROUTINE CHECK1(SFAC)
9696
INTEGER ICAMAXTEST
9797
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST
9898
* .. External Subroutines ..
99-
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1
99+
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1
100100
* .. Intrinsic Functions ..
101101
INTRINSIC MAX
102102
* .. Common blocks ..
@@ -214,8 +214,8 @@ SUBROUTINE CHECK1(SFAC)
214214
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1),
215215
+ STRUE4(NP1),SFAC)
216216
ELSE IF (ICASE.EQ.8) THEN
217-
* .. CSCAL ..
218-
CALL CSCAL(N,CA,CX,INCX)
217+
* .. CSCALTEST ..
218+
CALL CSCALTEST(N,CA,CX,INCX)
219219
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX),
220220
+ SFAC)
221221
ELSE IF (ICASE.EQ.9) THEN
@@ -236,14 +236,14 @@ SUBROUTINE CHECK1(SFAC)
236236
*
237237
INCX = 1
238238
IF (ICASE.EQ.8) THEN
239-
* CSCAL
239+
* CSCALTEST
240240
* Add a test for alpha equal to zero.
241241
CA = (0.0E0,0.0E0)
242242
DO 80 I = 1, 5
243243
MWPCT(I) = (0.0E0,0.0E0)
244244
MWPCS(I) = (1.0E0,1.0E0)
245245
80 CONTINUE
246-
CALL CSCAL(5,CA,CX,INCX)
246+
CALL CSCALTEST(5,CA,CX,INCX)
247247
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC)
248248
ELSE IF (ICASE.EQ.9) THEN
249249
* CSSCALTEST

0 commit comments

Comments
 (0)