Skip to content

Commit 9c49a81

Browse files
Resolve conflicts
2 parents e1afb23 + 10c22f4 commit 9c49a81

16 files changed

+6864
-3
lines changed

Makefile.prebuild

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ ifeq ($(TARGET), RISCV64_ZVL256B)
6363
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
6464
endif
6565

66+
ifeq ($(TARGET), RISCV64_ZVL128B)
67+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
68+
endif
69+
6670
ifeq ($(TARGET), RISCV64_GENERIC)
6771
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
6872
endif

Makefile.riscv64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ ifeq ($(CORE), RISCV64_ZVL256B)
1010
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d
1111
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
1212
endif
13+
ifeq ($(CORE), RISCV64_ZVL128B)
14+
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
15+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
16+
endif
1317
ifeq ($(CORE), RISCV64_GENERIC)
1418
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
1519
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ Z14
119119

120120
10.RISC-V 64:
121121
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
122+
RISCV64_ZVL128B
122123
C910V
123124
x280
124125
RISCV64_ZVL256B

common_riscv64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
9191
#define BUFFER_SIZE ( 32 << 20)
9292
#define SEEK_ADDRESS
9393

94-
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC)))
94+
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B)
9595
# include <riscv_vector.h>
9696
#endif
9797

cpuid_riscv64.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7474
#define CPU_C910V 1
7575
#define CPU_x280 2
7676
#define CPU_RISCV64_ZVL256B 3
77+
#define CPU_RISCV64_ZVL128B 4
7778

7879
static char *cpuname[] = {
7980
"RISCV64_GENERIC",
8081
"C910V",
8182
"x280",
82-
"CPU_RISCV64_ZVL256B"
83+
"CPU_RISCV64_ZVL256B",
84+
"CPU_RISCV64_ZVL128B"
8385
};
8486

8587
int detect(void){

getarch.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1703,9 +1703,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17031703
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
17041704
#define LIBNAME "riscv64_zvl256b"
17051705
#define CORENAME "RISCV64_ZVL256B"
1706-
#else
17071706
#endif
17081707

1708+
#ifdef FORCE_RISCV64_ZVL128B
1709+
#define FORCE
1710+
#define ARCHITECTURE "RISCV64"
1711+
#define SUBARCHITECTURE "RISCV64_ZVL128B"
1712+
#define SUBDIRNAME "riscv64"
1713+
#define ARCHCONFIG "-DRISCV64_ZVL128B " \
1714+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
1715+
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
1716+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
1717+
#define LIBNAME "riscv64_zvl128b"
1718+
#define CORENAME "RISCV64_ZVL128B"
1719+
#endif
17091720

17101721
#if defined(FORCE_E2K) || defined(__e2k__)
17111722
#define FORCE

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
SAMAXKERNEL = amax_rvv.c
2+
DAMAXKERNEL = amax_rvv.c
3+
CAMAXKERNEL = zamax_rvv.c
4+
ZAMAXKERNEL = zamax_rvv.c
5+
6+
SAMINKERNEL = amin_rvv.c
7+
DAMINKERNEL = amin_rvv.c
8+
CAMINKERNEL = zamin_rvv.c
9+
ZAMINKERNEL = zamin_rvv.c
10+
11+
SMAXKERNEL = max_rvv.c
12+
DMAXKERNEL = max_rvv.c
13+
14+
SMINKERNEL = min_rvv.c
15+
DMINKERNEL = min_rvv.c
16+
17+
ISAMAXKERNEL = iamax_rvv.c
18+
IDAMAXKERNEL = iamax_rvv.c
19+
ICAMAXKERNEL = izamax_rvv.c
20+
IZAMAXKERNEL = izamax_rvv.c
21+
22+
ISAMINKERNEL = iamin_rvv.c
23+
IDAMINKERNEL = iamin_rvv.c
24+
ICAMINKERNEL = izamin_rvv.c
25+
IZAMINKERNEL = izamin_rvv.c
26+
27+
ISMAXKERNEL = imax_rvv.c
28+
IDMAXKERNEL = imax_rvv.c
29+
30+
ISMINKERNEL = imin_rvv.c
31+
IDMINKERNEL = imin_rvv.c
32+
33+
SASUMKERNEL = asum_rvv.c
34+
DASUMKERNEL = asum_rvv.c
35+
CASUMKERNEL = zasum_rvv.c
36+
ZASUMKERNEL = zasum_rvv.c
37+
38+
SSUMKERNEL = sum_rvv.c
39+
DSUMKERNEL = sum_rvv.c
40+
CSUMKERNEL = zsum_rvv.c
41+
ZSUMKERNEL = zsum_rvv.c
42+
43+
SAXPYKERNEL = axpy_rvv.c
44+
DAXPYKERNEL = axpy_rvv.c
45+
CAXPYKERNEL = zaxpy_rvv.c
46+
ZAXPYKERNEL = zaxpy_rvv.c
47+
48+
SAXPBYKERNEL = axpby_rvv.c
49+
DAXPBYKERNEL = axpby_rvv.c
50+
CAXPBYKERNEL = zaxpby_rvv.c
51+
ZAXPBYKERNEL = zaxpby_rvv.c
52+
53+
SCOPYKERNEL = copy_rvv.c
54+
DCOPYKERNEL = copy_rvv.c
55+
CCOPYKERNEL = zcopy_rvv.c
56+
ZCOPYKERNEL = zcopy_rvv.c
57+
58+
SDOTKERNEL = dot_rvv.c
59+
DDOTKERNEL = dot_rvv.c
60+
CDOTKERNEL = zdot_rvv.c
61+
ZDOTKERNEL = zdot_rvv.c
62+
DSDOTKERNEL = dot_rvv.c
63+
64+
SNRM2KERNEL = nrm2_rvv.c
65+
DNRM2KERNEL = nrm2_rvv.c
66+
CNRM2KERNEL = znrm2_rvv.c
67+
ZNRM2KERNEL = znrm2_rvv.c
68+
69+
SROTKERNEL = rot_rvv.c
70+
DROTKERNEL = rot_rvv.c
71+
CROTKERNEL = zrot_rvv.c
72+
ZROTKERNEL = zrot_rvv.c
73+
74+
SSCALKERNEL = scal_rvv.c
75+
DSCALKERNEL = scal_rvv.c
76+
CSCALKERNEL = zscal_rvv.c
77+
ZSCALKERNEL = zscal_rvv.c
78+
79+
SSWAPKERNEL = swap_rvv.c
80+
DSWAPKERNEL = swap_rvv.c
81+
CSWAPKERNEL = zswap_rvv.c
82+
ZSWAPKERNEL = zswap_rvv.c
83+
84+
SGEMVNKERNEL = gemv_n_rvv.c
85+
DGEMVNKERNEL = gemv_n_rvv.c
86+
CGEMVNKERNEL = zgemv_n_rvv.c
87+
ZGEMVNKERNEL = zgemv_n_rvv.c
88+
89+
SGEMVTKERNEL = gemv_t_rvv.c
90+
DGEMVTKERNEL = gemv_t_rvv.c
91+
CGEMVTKERNEL = zgemv_t_rvv.c
92+
ZGEMVTKERNEL = zgemv_t_rvv.c
93+
94+
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl128b.c
95+
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
96+
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
97+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
98+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
99+
100+
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
101+
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
102+
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
103+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
104+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
105+
endif
106+
107+
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
108+
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
109+
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
110+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
111+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
112+
113+
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
114+
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
115+
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
116+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
117+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
118+
endif
119+
120+
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl128b.c
121+
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
122+
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
123+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
124+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
125+
126+
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
127+
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
128+
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
129+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
130+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
131+
endif
132+
133+
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl128b.c
134+
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
135+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
136+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
137+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
138+
139+
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
140+
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
141+
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
142+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
143+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
144+
endif
145+
146+
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl128b.c
147+
STRMMUNCOPY_M = ../generic/trmm_uncopy_$(SGEMM_UNROLL_M).c
148+
STRMMLNCOPY_M = ../generic/trmm_lncopy_$(SGEMM_UNROLL_M).c
149+
STRMMUTCOPY_M = ../generic/trmm_utcopy_$(SGEMM_UNROLL_M).c
150+
STRMMLTCOPY_M = ../generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c
151+
152+
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
153+
DTRMMUNCOPY_M = ../generic/trmm_uncopy_$(DGEMM_UNROLL_M).c
154+
DTRMMLNCOPY_M = ../generic/trmm_lncopy_$(DGEMM_UNROLL_M).c
155+
DTRMMUTCOPY_M = ../generic/trmm_utcopy_$(DGEMM_UNROLL_M).c
156+
DTRMMLTCOPY_M = ../generic/trmm_ltcopy_$(DGEMM_UNROLL_M).c
157+
158+
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl128b.c
159+
CTRMMUNCOPY_M = ../generic/ztrmm_uncopy_$(CGEMM_UNROLL_M).c
160+
CTRMMLNCOPY_M = ../generic/ztrmm_lncopy_$(CGEMM_UNROLL_M).c
161+
CTRMMUTCOPY_M = ../generic/ztrmm_utcopy_$(CGEMM_UNROLL_M).c
162+
CTRMMLTCOPY_M = ../generic/ztrmm_ltcopy_$(CGEMM_UNROLL_M).c
163+
164+
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl128b.c
165+
ZTRMMUNCOPY_M = ../generic/ztrmm_uncopy_$(ZGEMM_UNROLL_M).c
166+
ZTRMMLNCOPY_M = ../generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c
167+
ZTRMMUTCOPY_M = ../generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c
168+
ZTRMMLTCOPY_M = ../generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c
169+
170+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
171+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
172+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
173+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
174+
175+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
176+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
177+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
178+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
179+
180+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
181+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
182+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
183+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
184+
185+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
186+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
187+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
188+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
189+
190+
SSYMV_U_KERNEL = symv_U_rvv.c
191+
SSYMV_L_KERNEL = symv_L_rvv.c
192+
DSYMV_U_KERNEL = symv_U_rvv.c
193+
DSYMV_L_KERNEL = symv_L_rvv.c
194+
CSYMV_U_KERNEL = zsymv_U_rvv.c
195+
CSYMV_L_KERNEL = zsymv_L_rvv.c
196+
ZSYMV_U_KERNEL = zsymv_U_rvv.c
197+
ZSYMV_L_KERNEL = zsymv_L_rvv.c
198+
199+
CHEMV_L_KERNEL = zhemv_LM_rvv.c
200+
CHEMV_M_KERNEL = zhemv_LM_rvv.c
201+
CHEMV_U_KERNEL = zhemv_UV_rvv.c
202+
CHEMV_V_KERNEL = zhemv_UV_rvv.c
203+
ZHEMV_L_KERNEL = zhemv_LM_rvv.c
204+
ZHEMV_M_KERNEL = zhemv_LM_rvv.c
205+
ZHEMV_U_KERNEL = zhemv_UV_rvv.c
206+
ZHEMV_V_KERNEL = zhemv_UV_rvv.c
207+
208+
SSYMMUCOPY_M = ../generic/symm_ucopy_$(SGEMM_UNROLL_M).c
209+
SSYMMLCOPY_M = ../generic/symm_lcopy_$(SGEMM_UNROLL_M).c
210+
211+
DSYMMUCOPY_M = ../generic/symm_ucopy_$(DGEMM_UNROLL_M).c
212+
DSYMMLCOPY_M = ../generic/symm_lcopy_$(DGEMM_UNROLL_M).c
213+
214+
CSYMMUCOPY_M = ../generic/zsymm_ucopy_$(CGEMM_UNROLL_M).c
215+
CSYMMLCOPY_M = ../generic/zsymm_lcopy_$(CGEMM_UNROLL_M).c
216+
217+
ZSYMMUCOPY_M = ../generic/zsymm_ucopy_$(ZGEMM_UNROLL_M).c
218+
ZSYMMLCOPY_M = ../generic/zsymm_lcopy_$(ZGEMM_UNROLL_M).c
219+
220+
CHEMMLTCOPY_M = ../generic/zhemm_ltcopy_$(CGEMM_UNROLL_M).c
221+
CHEMMUTCOPY_M = ../generic/zhemm_utcopy_$(CGEMM_UNROLL_M).c
222+
223+
ZHEMMLTCOPY_M = ../generic/zhemm_ltcopy_$(ZGEMM_UNROLL_M).c
224+
ZHEMMUTCOPY_M = ../generic/zhemm_utcopy_$(ZGEMM_UNROLL_M).c
225+
226+
LSAME_KERNEL = ../generic/lsame.c
227+
228+
SCABS_KERNEL = ../generic/cabs.c
229+
DCABS_KERNEL = ../generic/cabs.c
230+
QCABS_KERNEL = ../generic/cabs.c
231+
232+
ifndef SGEMM_BETA
233+
SGEMM_BETA = gemm_beta_rvv.c
234+
endif
235+
ifndef DGEMM_BETA
236+
DGEMM_BETA = gemm_beta_rvv.c
237+
endif
238+
ifndef CGEMM_BETA
239+
CGEMM_BETA = zgemm_beta_rvv.c
240+
endif
241+
ifndef ZGEMM_BETA
242+
ZGEMM_BETA = zgemm_beta_rvv.c
243+
endif

0 commit comments

Comments
 (0)