Skip to content

Commit 10c22f4

Browse files
authored
Merge pull request #4355 from imaginationtech/img-riscv64-zvl128b
[RISC-V] Add RISC-V Vector 128-bit target
2 parents 88e9941 + ccbc3f8 commit 10c22f4

16 files changed

+6864
-3
lines changed

Makefile.prebuild

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
5959
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
6060
endif
6161

62+
ifeq ($(TARGET), RISCV64_ZVL128B)
63+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
64+
endif
65+
6266
ifeq ($(TARGET), RISCV64_GENERIC)
6367
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
6468
endif

Makefile.riscv64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ ifeq ($(CORE), x280)
66
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
77
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
88
endif
9+
ifeq ($(CORE), RISCV64_ZVL128B)
10+
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
11+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
12+
endif
913
ifeq ($(CORE), RISCV64_GENERIC)
1014
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
1115
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ Z14
119119

120120
10.RISC-V 64:
121121
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
122+
RISCV64_ZVL128B
122123
C910V
123124
x280
124125

common_riscv64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
9191
#define BUFFER_SIZE ( 32 << 20)
9292
#define SEEK_ADDRESS
9393

94-
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC)))
94+
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B)
9595
# include <riscv_vector.h>
9696
#endif
9797

cpuid_riscv64.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7474
#define CPU_C910V 1
7575
#define CPU_x280 2
7676
#define CPU_RISCV64_ZVL256B 3
77+
#define CPU_RISCV64_ZVL128B 4
7778

7879
static char *cpuname[] = {
7980
"RISCV64_GENERIC",
8081
"C910V",
8182
"x280",
82-
"CPU_RISCV64_ZVL256B"
83+
"CPU_RISCV64_ZVL256B",
84+
"CPU_RISCV64_ZVL128B"
8385
};
8486

8587
int detect(void){

getarch.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16911691
#define CORENAME "x280"
16921692
#else
16931693
#endif
1694-
1694+
#ifdef FORCE_RISCV64_ZVL128B
1695+
#define FORCE
1696+
#define ARCHITECTURE "RISCV64"
1697+
#define SUBARCHITECTURE "RISCV64_ZVL128B"
1698+
#define SUBDIRNAME "riscv64"
1699+
#define ARCHCONFIG "-DRISCV64_ZVL128B " \
1700+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
1701+
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
1702+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
1703+
#define LIBNAME "riscv64_zvl128b"
1704+
#define CORENAME "RISCV64_ZVL128B"
1705+
#endif
16951706

16961707
#if defined(FORCE_E2K) || defined(__e2k__)
16971708
#define FORCE

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
SAMAXKERNEL = amax_rvv.c
2+
DAMAXKERNEL = amax_rvv.c
3+
CAMAXKERNEL = zamax_rvv.c
4+
ZAMAXKERNEL = zamax_rvv.c
5+
6+
SAMINKERNEL = amin_rvv.c
7+
DAMINKERNEL = amin_rvv.c
8+
CAMINKERNEL = zamin_rvv.c
9+
ZAMINKERNEL = zamin_rvv.c
10+
11+
SMAXKERNEL = max_rvv.c
12+
DMAXKERNEL = max_rvv.c
13+
14+
SMINKERNEL = min_rvv.c
15+
DMINKERNEL = min_rvv.c
16+
17+
ISAMAXKERNEL = iamax_rvv.c
18+
IDAMAXKERNEL = iamax_rvv.c
19+
ICAMAXKERNEL = izamax_rvv.c
20+
IZAMAXKERNEL = izamax_rvv.c
21+
22+
ISAMINKERNEL = iamin_rvv.c
23+
IDAMINKERNEL = iamin_rvv.c
24+
ICAMINKERNEL = izamin_rvv.c
25+
IZAMINKERNEL = izamin_rvv.c
26+
27+
ISMAXKERNEL = imax_rvv.c
28+
IDMAXKERNEL = imax_rvv.c
29+
30+
ISMINKERNEL = imin_rvv.c
31+
IDMINKERNEL = imin_rvv.c
32+
33+
SASUMKERNEL = asum_rvv.c
34+
DASUMKERNEL = asum_rvv.c
35+
CASUMKERNEL = zasum_rvv.c
36+
ZASUMKERNEL = zasum_rvv.c
37+
38+
SSUMKERNEL = sum_rvv.c
39+
DSUMKERNEL = sum_rvv.c
40+
CSUMKERNEL = zsum_rvv.c
41+
ZSUMKERNEL = zsum_rvv.c
42+
43+
SAXPYKERNEL = axpy_rvv.c
44+
DAXPYKERNEL = axpy_rvv.c
45+
CAXPYKERNEL = zaxpy_rvv.c
46+
ZAXPYKERNEL = zaxpy_rvv.c
47+
48+
SAXPBYKERNEL = axpby_rvv.c
49+
DAXPBYKERNEL = axpby_rvv.c
50+
CAXPBYKERNEL = zaxpby_rvv.c
51+
ZAXPBYKERNEL = zaxpby_rvv.c
52+
53+
SCOPYKERNEL = copy_rvv.c
54+
DCOPYKERNEL = copy_rvv.c
55+
CCOPYKERNEL = zcopy_rvv.c
56+
ZCOPYKERNEL = zcopy_rvv.c
57+
58+
SDOTKERNEL = dot_rvv.c
59+
DDOTKERNEL = dot_rvv.c
60+
CDOTKERNEL = zdot_rvv.c
61+
ZDOTKERNEL = zdot_rvv.c
62+
DSDOTKERNEL = dot_rvv.c
63+
64+
SNRM2KERNEL = nrm2_rvv.c
65+
DNRM2KERNEL = nrm2_rvv.c
66+
CNRM2KERNEL = znrm2_rvv.c
67+
ZNRM2KERNEL = znrm2_rvv.c
68+
69+
SROTKERNEL = rot_rvv.c
70+
DROTKERNEL = rot_rvv.c
71+
CROTKERNEL = zrot_rvv.c
72+
ZROTKERNEL = zrot_rvv.c
73+
74+
SSCALKERNEL = scal_rvv.c
75+
DSCALKERNEL = scal_rvv.c
76+
CSCALKERNEL = zscal_rvv.c
77+
ZSCALKERNEL = zscal_rvv.c
78+
79+
SSWAPKERNEL = swap_rvv.c
80+
DSWAPKERNEL = swap_rvv.c
81+
CSWAPKERNEL = zswap_rvv.c
82+
ZSWAPKERNEL = zswap_rvv.c
83+
84+
SGEMVNKERNEL = gemv_n_rvv.c
85+
DGEMVNKERNEL = gemv_n_rvv.c
86+
CGEMVNKERNEL = zgemv_n_rvv.c
87+
ZGEMVNKERNEL = zgemv_n_rvv.c
88+
89+
SGEMVTKERNEL = gemv_t_rvv.c
90+
DGEMVTKERNEL = gemv_t_rvv.c
91+
CGEMVTKERNEL = zgemv_t_rvv.c
92+
ZGEMVTKERNEL = zgemv_t_rvv.c
93+
94+
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl128b.c
95+
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
96+
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
97+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
98+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
99+
100+
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
101+
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
102+
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
103+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
104+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
105+
endif
106+
107+
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
108+
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
109+
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
110+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
111+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
112+
113+
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
114+
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
115+
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
116+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
117+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
118+
endif
119+
120+
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl128b.c
121+
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
122+
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
123+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
124+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
125+
126+
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
127+
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
128+
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
129+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
130+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
131+
endif
132+
133+
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl128b.c
134+
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
135+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
136+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
137+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
138+
139+
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
140+
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
141+
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
142+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
143+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
144+
endif
145+
146+
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl128b.c
147+
STRMMUNCOPY_M = ../generic/trmm_uncopy_$(SGEMM_UNROLL_M).c
148+
STRMMLNCOPY_M = ../generic/trmm_lncopy_$(SGEMM_UNROLL_M).c
149+
STRMMUTCOPY_M = ../generic/trmm_utcopy_$(SGEMM_UNROLL_M).c
150+
STRMMLTCOPY_M = ../generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c
151+
152+
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
153+
DTRMMUNCOPY_M = ../generic/trmm_uncopy_$(DGEMM_UNROLL_M).c
154+
DTRMMLNCOPY_M = ../generic/trmm_lncopy_$(DGEMM_UNROLL_M).c
155+
DTRMMUTCOPY_M = ../generic/trmm_utcopy_$(DGEMM_UNROLL_M).c
156+
DTRMMLTCOPY_M = ../generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c
157+
158+
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl128b.c
159+
CTRMMUNCOPY_M = ../generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c
160+
CTRMMLNCOPY_M = ../generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c
161+
CTRMMUTCOPY_M = ../generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c
162+
CTRMMLTCOPY_M = ../generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c
163+
164+
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl128b.c
165+
ZTRMMUNCOPY_M = ../generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c
166+
ZTRMMLNCOPY_M = ../generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c
167+
ZTRMMUTCOPY_M = ../generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c
168+
ZTRMMLTCOPY_M = ../generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c
169+
170+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
171+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
172+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
173+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
174+
175+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
176+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
177+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
178+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
179+
180+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
181+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
182+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
183+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
184+
185+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
186+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
187+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
188+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
189+
190+
SSYMV_U_KERNEL = symv_U_rvv.c
191+
SSYMV_L_KERNEL = symv_L_rvv.c
192+
DSYMV_U_KERNEL = symv_U_rvv.c
193+
DSYMV_L_KERNEL = symv_L_rvv.c
194+
CSYMV_U_KERNEL = zsymv_U_rvv.c
195+
CSYMV_L_KERNEL = zsymv_L_rvv.c
196+
ZSYMV_U_KERNEL = zsymv_U_rvv.c
197+
ZSYMV_L_KERNEL = zsymv_L_rvv.c
198+
199+
CHEMV_L_KERNEL = zhemv_LM_rvv.c
200+
CHEMV_M_KERNEL = zhemv_LM_rvv.c
201+
CHEMV_U_KERNEL = zhemv_UV_rvv.c
202+
CHEMV_V_KERNEL = zhemv_UV_rvv.c
203+
ZHEMV_L_KERNEL = zhemv_LM_rvv.c
204+
ZHEMV_M_KERNEL = zhemv_LM_rvv.c
205+
ZHEMV_U_KERNEL = zhemv_UV_rvv.c
206+
ZHEMV_V_KERNEL = zhemv_UV_rvv.c
207+
208+
SSYMMUCOPY_M = ../generic/symm_ucopy_$(SGEMM_UNROLL_M).c
209+
SSYMMLCOPY_M = ../generic/symm_lcopy_$(SGEMM_UNROLL_M).c
210+
211+
DSYMMUCOPY_M = ../generic/symm_ucopy_$(DGEMM_UNROLL_M).c
212+
DSYMMLCOPY_M = ../generic/symm_lcopy_$(DGEMM_UNROLL_M).c
213+
214+
CSYMMUCOPY_M = ../generic/zsymm_ucopy_$(CGEMM_UNROLL_M).c
215+
CSYMMLCOPY_M = ../generic/zsymm_lcopy_$(CGEMM_UNROLL_M).c
216+
217+
ZSYMMUCOPY_M = ../generic/zsymm_ucopy_$(ZGEMM_UNROLL_M).c
218+
ZSYMMLCOPY_M = ../generic/zsymm_lcopy_$(ZGEMM_UNROLL_M).c
219+
220+
CHEMMLTCOPY_M = ../generic/zhemm_ltcopy_$(CGEMM_UNROLL_M).c
221+
CHEMMUTCOPY_M = ../generic/zhemm_utcopy_$(CGEMM_UNROLL_M).c
222+
223+
ZHEMMLTCOPY_M = ../generic/zhemm_ltcopy_$(ZGEMM_UNROLL_M).c
224+
ZHEMMUTCOPY_M = ../generic/zhemm_utcopy_$(ZGEMM_UNROLL_M).c
225+
226+
LSAME_KERNEL = ../generic/lsame.c
227+
228+
SCABS_KERNEL = ../generic/cabs.c
229+
DCABS_KERNEL = ../generic/cabs.c
230+
QCABS_KERNEL = ../generic/cabs.c
231+
232+
ifndef SGEMM_BETA
233+
SGEMM_BETA = gemm_beta_rvv.c
234+
endif
235+
ifndef DGEMM_BETA
236+
DGEMM_BETA = gemm_beta_rvv.c
237+
endif
238+
ifndef CGEMM_BETA
239+
CGEMM_BETA = zgemm_beta_rvv.c
240+
endif
241+
ifndef ZGEMM_BETA
242+
ZGEMM_BETA = zgemm_beta_rvv.c
243+
endif

0 commit comments

Comments
 (0)