Skip to content

Commit ec14e16

Browse files
committed
fix: resolve non-RISCV host build failed issue
- adjust interface to disable "small matrix" pathway - separate HFLOAT16 from BFLOAT16 - remove SHGEMM_UNROLL_M and SHGEMM_UNROLL_N equal conditions Related to PR#5290 Co-authored-by Martin
1 parent fb89820 commit ec14e16

File tree

10 files changed

+68
-40
lines changed

10 files changed

+68
-40
lines changed

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ COMMON_PROF = -pg
309309
# BUILD_BFLOAT16 = 1
310310

311311
# If you want to enable the experimental HFLOAT16 support
312-
# BUILD_HFLOAT16 = 1
312+
BUILD_HFLOAT16 = 1
313313

314314
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
315315
# will be allocated on the heap rather than the stack. (This array alone requires

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,6 +1898,8 @@ export NO_LASX
18981898

18991899
export SBGEMM_UNROLL_M
19001900
export SBGEMM_UNROLL_N
1901+
export SHGEMM_UNROLL_M
1902+
export SHGEMM_UNROLL_N
19011903
export SGEMM_UNROLL_M
19021904
export SGEMM_UNROLL_N
19031905
export DGEMM_UNROLL_M

exports/Makefile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,10 @@ dll : ../$(LIBDLLNAME)
133133
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
134134

135135
$(LIBPREFIX).def : $(GENSYM)
136-
./$(GENSYM) win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
136+
./$(GENSYM) win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
137137

138138
libgoto_hpl.def : $(GENSYM)
139-
./$(GENSYM) win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
139+
./$(GENSYM) win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
140140

141141
ifeq ($(OSNAME), Darwin)
142142
ifeq ($(FIXED_LIBNAME),1)
@@ -301,23 +301,23 @@ static : ../$(LIBNAME)
301301
rm -f goto.$(SUFFIX)
302302

303303
osx.def : $(GENSYM) ../Makefile.system ../getarch.c
304-
./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
304+
./$(GENSYM) osx $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
305305

306306
aix.def : $(GENSYM) ../Makefile.system ../getarch.c
307-
./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
307+
./$(GENSYM) aix $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
308308

309309
objcopy.def : $(GENSYM) ../Makefile.system ../getarch.c
310-
./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
310+
./$(GENSYM) objcopy $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
311311

312312
objconv.def : $(GENSYM) ../Makefile.system ../getarch.c
313-
./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
313+
./$(GENSYM) objconv $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
314314

315315
test : linktest.c
316316
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
317317
rm -f linktest
318318

319319
linktest.c : $(GENSYM) ../Makefile.system ../getarch.c
320-
./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
320+
./$(GENSYM) linktest $(ARCH) "$(BU)" $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_HFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
321321

322322
clean ::
323323
@rm -f *.def *.dylib __.SYMDEF* *.renamed

exports/gensymbol

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3816,13 +3816,20 @@ shift
38163816
p16=$9
38173817
shift
38183818
p17=$9
3819+
shift
3820+
p18=$9
38193821

38203822
if [ $p13 -eq 1 ]; then
3821-
blasobjs="$blasobjs $bfblasobjs $hfblasobjs"
3822-
cblasobjs="$cblasobjs $bfcblasobjs $hfcblasobjs"
3823+
blasobjs="$blasobjs $bfblasobjs"
3824+
cblasobjs="$cblasobjs $bfcblasobjs"
38233825
fi
38243826

38253827
if [ $p14 -eq 1 ]; then
3828+
blasobjs="$blasobjs $hfblasobjs"
3829+
cblasobjs="$cblasobjs $hfcblasobjs"
3830+
fi
3831+
3832+
if [ $p15 -eq 1 ]; then
38263833
blasobjs="$blasobjs $blasobjss"
38273834
cblasobjs="$cblasobjs $cblasobjss"
38283835
lapackobjs="$lapackobjs $lapackobjss"
@@ -3835,11 +3842,11 @@ if [ $p14 -eq 1 ]; then
38353842
lapackeobjs="$lapackeobjs $lapackeobjss"
38363843
fi
38373844

3838-
if [ $p15 -eq 1 ]; then
3845+
if [ $p16 -eq 1 ]; then
38393846
blasobjs="$blasobjs $blasobjsd"
38403847
cblasobjs="$cblasobjs $cblasobjsd"
38413848
lapackobjs="$lapackobjs $lapackobjsd"
3842-
if [ $p14 -eq 0 ]; then
3849+
if [ $p15 -eq 0 ]; then
38433850
lapackobjs2="$lapackobjs2 $lapackobjs2ds"
38443851
fi
38453852
lapackobjs2="$lapackobjs2 $lapackobjs2d $lapackobjs2dz"
@@ -3849,14 +3856,14 @@ if [ $p15 -eq 1 ]; then
38493856
lapackeobjs="$lapackeobjs $lapackeobjsd"
38503857
fi
38513858

3852-
if [ $p16 -eq 1 ]; then
3859+
if [ $p17 -eq 1 ]; then
38533860
blasobjs="$blasobjs $blasobjsc"
38543861
cblasobjs="$cblasobjs $cblasobjsc"
38553862
gemm3mobjs="$gemm3mobjs $gemm3mobjsc"
38563863
cblasgemm3mobjs="$cblasgemm3mobjs $cblasgemm3mobjsc"
38573864
lapackobjs="$lapackobjs $lapackobjsc"
38583865
lapackobjs2="$lapackobjs2 $lapackobjs2c $lapackobjs2zc"
3859-
if [ $p14 -eq 0 ]; then
3866+
if [ $p15 -eq 0 ]; then
38603867
lapackobjs2="$lapackobjs2 $lapackobjs2sc"
38613868
fi
38623869
lapack_deprecated_objs="$lapack_deprecated_objs $lapack_deprecated_objsc"
@@ -3865,17 +3872,17 @@ if [ $p16 -eq 1 ]; then
38653872
lapackeobjs="$lapackeobjs $lapackeobjsc"
38663873
fi
38673874

3868-
if [ $p17 -eq 1 ]; then
3875+
if [ $p18 -eq 1 ]; then
38693876
blasobjs="$blasobjs $blasobjsz"
38703877
cblasobjs="$cblasobjs $cblasobjsz"
38713878
gemm3mobjs="$gemm3mobjs $gemm3mobjsz"
38723879
cblasgemm3mobjs="$cblasgemm3mobjs $cblasgemm3mobjsz"
38733880
lapackobjs="$lapackobjs $lapackobjsz"
38743881
lapackobjs2="$lapackobjs2 $lapackobjs2z"
3875-
if [ $p16 -eq 0 ]; then
3882+
if [ $p17 -eq 0 ]; then
38763883
lapackobjs2="$lapackobjs2 $lapackobjs2zc"
38773884
fi
3878-
if [ $p15 -eq 0 ]; then
3885+
if [ $p16 -eq 0 ]; then
38793886
lapackobjs2="$lapackobjs2 $lapackobjs2dz"
38803887
fi
38813888
lapack_deprecated_objs="$lapack_deprecated_objs $lapack_deprecated_objsz"

exports/gensymbol.pl

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3774,10 +3774,14 @@
37743774
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
37753775

37763776
if ($ARGV[12] == 1) {
3777-
@blasobjs = (@blasobjs, @bfblasobjs, @hfblasobjs);
3778-
@cblasobjs = (@cblasobjs, @bfcblasobjs, @hfcblasobjs);
3777+
@blasobjs = (@blasobjs, @bfblasobjs);
3778+
@cblasobjs = (@cblasobjs, @bfcblasobjs);
37793779
}
37803780
if ($ARGV[13] == 1) {
3781+
@blasobjs = (@blasobjs, @hfblasobjs);
3782+
@cblasobjs = (@cblasobjs, @hfcblasobjs);
3783+
}
3784+
if ($ARGV[14] == 1) {
37813785
@blasobjs = (@blasobjs, @blasobjss);
37823786
@cblasobjs = (@cblasobjs, @cblasobjss);
37833787
@lapackobjs = (@lapackobjs, @lapackobjss);
@@ -3789,11 +3793,11 @@
37893793
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
37903794
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
37913795
}
3792-
if ($ARGV[14] == 1) {
3796+
if ($ARGV[15] == 1) {
37933797
@blasobjs = (@blasobjs, @blasobjsd);
37943798
@cblasobjs = (@cblasobjs, @cblasobjsd);
37953799
@lapackobjs = (@lapackobjs, @lapackobjsd);
3796-
if ($ARGV[13] == 0) {
3800+
if ($ARGV[14] == 0) {
37973801
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
37983802
}
37993803
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d, @lapackobjs2dz);
@@ -3802,32 +3806,32 @@
38023806
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
38033807
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
38043808
}
3805-
if ($ARGV[15] == 1) {
3809+
if ($ARGV[16] == 1) {
38063810
@blasobjs = (@blasobjs, @blasobjsc);
38073811
@cblasobjs = (@cblasobjs, @cblasobjsc);
38083812
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc);
38093813
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsc);
38103814
@lapackobjs = (@lapackobjs, @lapackobjsc);
38113815
@lapackobjs2 = (@lapackobjs2, @lapackobjs2c, @lapackobjs2zc);
3812-
if ($ARGV[13] == 0) {
3816+
if ($ARGV[14] == 0) {
38133817
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
38143818
}
38153819
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsc);
38163820
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsc);
38173821
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
38183822
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
38193823
}
3820-
if ($ARGV[16] == 1) {
3824+
if ($ARGV[17] == 1) {
38213825
@blasobjs = (@blasobjs, @blasobjsz);
38223826
@cblasobjs = (@cblasobjs, @cblasobjsz);
38233827
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz);
38243828
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsz);
38253829
@lapackobjs = (@lapackobjs, @lapackobjsz);
38263830
@lapackobjs2 = (@lapackobjs2, @lapackobjs2z);
3827-
if ($ARGV[15] == 0) {
3831+
if ($ARGV[16] == 0) {
38283832
@lapackobjs2 = (@lapackobjs2, @lapackobjs2zc);
38293833
}
3830-
if ($ARGV[14] == 0) {
3834+
if ($ARGV[15] == 0) {
38313835
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz);
38323836
}
38333837
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsz);

interface/gemm.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
#elif defined(BFLOAT16)
5757
#define ERROR_NAME "SBGEMM "
5858
#define GEMV BLASFUNC(sbgemv)
59+
#elif defined(HFLOAT16)
60+
#define ERROR_NAME "SHGEMM "
5961
#else
6062
#define ERROR_NAME "SGEMM "
6163
#define GEMV BLASFUNC(sgemv)
@@ -111,7 +113,7 @@ static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, B
111113
#endif
112114
};
113115

114-
#if defined(SMALL_MATRIX_OPT) && !defined(GEMM3M) && !defined(XDOUBLE)
116+
#if defined(SMALL_MATRIX_OPT) && !defined(GEMM3M) && !defined(XDOUBLE) &&!defined(HFLOAT16)
115117
#define USE_SMALL_MATRIX_OPT 1
116118
#else
117119
#define USE_SMALL_MATRIX_OPT 0
@@ -219,11 +221,11 @@ static inline int get_gemm_optimal_nthreads_neoversev2(double MNK, int ncpu) {
219221

220222
static inline int get_gemm_optimal_nthreads(double MNK) {
221223
int ncpu = num_cpu_avail(3);
222-
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
224+
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
223225
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
224-
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
226+
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
225227
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
226-
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
228+
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
227229
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
228230
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
229231
}
@@ -417,7 +419,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
417419

418420
PRINT_DEBUG_CNAME;
419421

420-
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
422+
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
421423
#if defined(ARCH_x86) && (defined(USE_SGEMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH))
422424
#if defined(DYNAMIC_ARCH)
423425
if (support_avx512() )
@@ -577,7 +579,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
577579
args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
578580
#endif
579581

580-
#if defined(GEMM_GEMV_FORWARD) && !defined(GEMM3M) && !defined(COMPLEX) && (!defined(BFLOAT16) || defined(GEMM_GEMV_FORWARD_BF16))
582+
#if defined(GEMM_GEMV_FORWARD) && !defined(GEMM3M) && !defined(COMPLEX) && !defined(HFLOAT16) && (!defined(BFLOAT16) || defined(GEMM_GEMV_FORWARD_BF16))
581583
#if defined(ARCH_ARM64)
582584
// The gemv kernels in arm64/{gemv_n.S,gemv_n_sve.c,gemv_t.S,gemv_t_sve.c}
583585
// perform poorly in certain circumstances. We use the following boolean

kernel/Makefile.L3

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,14 @@ ifeq ($(BUILD_HFLOAT16), 1)
133133
ifndef SHGEMMKERNEL
134134
SHGEMM_BETA = ../generic/gemm_beta.c
135135
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
136-
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
137-
SHGEMMITCOPY = ../generic/gemm_tcopy_2.c
138136
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
139137
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
140-
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
141-
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
142138
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
143139
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
140+
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
141+
SHGEMMITCOPY = ../generic/gemm_tcopy_2.c
142+
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
143+
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
144144
endif
145145

146146
SHKERNELOBJS += \
@@ -726,15 +726,15 @@ $(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
726726
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
727727
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
728728

729-
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
729+
#ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
730730

731731
$(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY)
732732
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
733733

734734
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY)
735735
$(CC) $(CFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
736736

737-
endif
737+
#endif
738738
endif
739739

740740
$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
@@ -2957,14 +2957,14 @@ $(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
29572957
$(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY)
29582958
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
29592959

2960-
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
2960+
#ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
29612961
$(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY)
29622962
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
29632963

29642964
$(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY)
29652965
$(CC) $(PFLAGS) -c -DHFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
29662966

2967-
endif
2967+
#endif
29682968
endif
29692969

29702970
$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY)

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,13 @@ ifndef ZGEMM_BETA
246246
ZGEMM_BETA = zgemm_beta_rvv.c
247247
endif
248248

249+
ifeq ($(BUILD_BFLOAT16), 1)
249250
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl128b.c
250251
SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c
251252
SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c
252253
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
253254
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
254255
ifndef SHGEMM_BETA
255256
SHGEMM_BETA = gemm_beta_rvv.c
257+
endif
256258
endif

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ DOMATCOPY_CN = omatcopy_cn_vector.c
210210
SOMATCOPY_CN = omatcopy_cn_vector.c
211211

212212

213+
ifeq ($(BUILD_BFLOAT16), 1)
213214
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c
214215
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
215216
SHGEMMINCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_M).c
@@ -224,6 +225,7 @@ SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
224225
ifndef SHGEMM_BETA
225226
SHGEMM_BETA = gemm_beta_rvv.c
226227
endif
228+
endif
227229

228230
SAXPBYKERNEL = axpby_vector_v2.c
229231
DAXPBYKERNEL = axpby_vector_v2.c

kernel/x86_64/KERNEL.HASWELL

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,12 @@ DASUMKERNEL = dasum.c
106106

107107
SROTKERNEL = srot.c
108108
DROTKERNEL = drot.c
109+
110+
111+
ifeq ($(BUILD_BFLOAT16), 1)
112+
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
113+
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
114+
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
115+
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
116+
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
117+
endif

0 commit comments

Comments
 (0)