Skip to content

Commit 718fb73

Browse files
authored
Merge pull request #4976 from martin-frbg/m3m_exprec
[WIP]Add better workaround for GEMM3M on GENERIC and re-enable EXPRECISION for x86/x86_64 targets
2 parents 73527aa + c125866 commit 718fb73

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

Makefile.system

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ endif
446446

447447
ifeq ($(OSNAME), Linux)
448448
EXTRALIB += -lm
449-
NO_EXPRECISION = 1
449+
#NO_EXPRECISION = 1
450450
endif
451451

452452
ifeq ($(OSNAME), Android)
@@ -572,7 +572,7 @@ NO_BINARY_MODE = 1
572572
endif
573573

574574
ifeq ($(CORE), generic)
575-
NO_EXPRECISION = 1
575+
#NO_EXPRECISION = 1
576576
endif
577577

578578
ifndef NO_EXPRECISION
@@ -595,7 +595,7 @@ endif
595595
ifeq ($(ARCH), x86_64)
596596

597597
ifeq ($(CORE), generic)
598-
NO_EXPRECISION = 1
598+
#NO_EXPRECISION = 1
599599
endif
600600

601601
ifndef NO_EXPRECISION
@@ -828,8 +828,8 @@ BINARY_DEFINED = 1
828828

829829
ifeq ($(F_COMPILER), GFORTRAN)
830830
ifeq ($(C_COMPILER), GCC)
831-
# EXPRECISION = 1
832-
# CCOMMON_OPT += -DEXPRECISION
831+
EXPRECISION = 1
832+
CCOMMON_OPT += -DEXPRECISION
833833
endif
834834
endif
835835
endif
@@ -1392,17 +1392,15 @@ endif
13921392
endif
13931393

13941394
ifeq ($(F_COMPILER), CRAY)
1395-
CCOMMON_OPT += -DF_INTERFACE_CRAYFC
1395+
CCOMMON_OPT += -DF_INTERFACE_INTEL
13961396
FCOMMON_OPT += -hnopattern
13971397
ifdef INTERFACE64
13981398
ifneq ($(INTERFACE64), 0)
13991399
FCOMMON_OPT += -s integer64
14001400
endif
14011401
endif
1402-
ifeq ($(USE_OPENMP), 1)
1403-
FCOMMON_OPT += -fopenmp
1404-
else
1405-
FCOMMON_OPT += -fno-openmp
1402+
ifneq ($(USE_OPENMP), 1)
1403+
FCOMMON_OPT += -O noomp
14061404
endif
14071405
endif
14081406

interface/gemm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
#endif
8787

8888
static int (*gemm[])(blas_arg_t *, BLASLONG *, BLASLONG *, IFLOAT *, IFLOAT *, BLASLONG) = {
89-
#ifndef GEMM3M
89+
#if !defined(GEMM3M) || defined(GENERIC)
9090
GEMM_NN, GEMM_TN, GEMM_RN, GEMM_CN,
9191
GEMM_NT, GEMM_TT, GEMM_RT, GEMM_CT,
9292
GEMM_NR, GEMM_TR, GEMM_RR, GEMM_CR,

param.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4033,6 +4033,8 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
40334033
#define CGEMM_DEFAULT_UNROLL_N 2
40344034
#define ZGEMM_DEFAULT_UNROLL_N 2
40354035
#define XGEMM_DEFAULT_UNROLL_N 1
4036+
#define CGEMM3M_DEFAULT_UNROLL_N 2
4037+
#define ZGEMM3M_DEFAULT_UNROLL_N 2
40364038

40374039
#ifdef ARCH_X86
40384040
#define SGEMM_DEFAULT_UNROLL_M 2
@@ -4048,6 +4050,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
40484050
#define CGEMM_DEFAULT_UNROLL_M 2
40494051
#define ZGEMM_DEFAULT_UNROLL_M 2
40504052
#define XGEMM_DEFAULT_UNROLL_M 1
4053+
#define CGEMM3M_DEFAULT_UNROLL_M 2
4054+
#define ZGEMM3M_DEFAULT_UNROLL_M 2
4055+
#define CGEMM3M_DEFAULT_P 448
4056+
#define ZGEMM3M_DEFAULT_P 224
4057+
#define XGEMM3M_DEFAULT_P 112
4058+
#define CGEMM3M_DEFAULT_Q 224
4059+
#define ZGEMM3M_DEFAULT_Q 224
4060+
#define XGEMM3M_DEFAULT_Q 224
4061+
#define CGEMM3M_DEFAULT_R 12288
4062+
#define ZGEMM3M_DEFAULT_R 12288
4063+
#define XGEMM3M_DEFAULT_R 12288
4064+
40514065
#endif
40524066

40534067
#ifdef ARCH_MIPS

0 commit comments

Comments
 (0)