Skip to content

Commit e596781

Browse files
authored
Merge pull request #110 from xianyi/develop
rebase
2 parents 9efc3f0 + ff74319 commit e596781

File tree

12 files changed

+640
-287
lines changed

12 files changed

+640
-287
lines changed

Makefile.system

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ endif
9393
ifdef TARGET
9494
GETARCH_FLAGS := -DFORCE_$(TARGET)
9595
GETARCH_FLAGS += -DUSER_TARGET
96+
ifeq ($(TARGET), GENERIC)
97+
ifeq ($(DYNAMIC_ARCH), 1)
98+
override NO_EXPRECISION=1
99+
export NO_EXPRECiSION
100+
endif
101+
endif
96102
endif
97103

98104
# Force fallbacks for 32bit

cmake/os.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,14 @@ if (X86)
8484
set(NO_EXPRECISION 1)
8585
endif ()
8686

87+
if (DYNAMIC_ARCH)
88+
if (TARGET)
89+
if (${TARGET} STREQUAL "GENERIC")
90+
set(NO_EXPRECISION 1)
91+
endif ()
92+
endif ()
93+
endif ()
94+
8795
if (UTEST_CHECK)
8896
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
8997
set(SANITY_CHECK 1)

cmake/prebuild.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,36 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
139139
set(CGEMM3M_UNROLL_N 4)
140140
set(ZGEMM3M_UNROLL_M 4)
141141
set(ZGEMM3M_UNROLL_N 4)
142+
elseif ("${TCORE}" STREQUAL "BARCELONA")
143+
file(APPEND ${TARGET_CONF_TEMP}
144+
"#define HAVE_SSE3\n")
145+
elseif ("${TCORE}" STREQUAL "STEAMROLLER")
146+
file(APPEND ${TARGET_CONF_TEMP}
147+
"#define HAVE_SSE3\n")
148+
elseif ("${TCORE}" STREQUAL "EXCAVATOR")
149+
file(APPEND ${TARGET_CONF_TEMP}
150+
"#define HAVE_SSE3\n")
151+
elseif ("${TCORE}" STREQUAL "NEHALEM")
152+
file(APPEND ${TARGET_CONF_TEMP}
153+
"#define HAVE_SSE3\n")
154+
elseif ("${TCORE}" STREQUAL "PRESCOTT")
155+
file(APPEND ${TARGET_CONF_TEMP}
156+
"#define HAVE_SSE3\n")
157+
elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
158+
file(APPEND ${TARGET_CONF_TEMP}
159+
"#define HAVE_AVX\n")
160+
elseif ("${TCORE}" STREQUAL "HASWELL")
161+
file(APPEND ${TARGET_CONF_TEMP}
162+
"#define HAVE_AVX2\n")
163+
elseif ("${TCORE}" STREQUAL "ZEN")
164+
file(APPEND ${TARGET_CONF_TEMP}
165+
"#define HAVE_AVX2\n")
166+
elseif ("${TCORE}" STREQUAL "SKYLAKEX")
167+
file(APPEND ${TARGET_CONF_TEMP}
168+
"#define HAVE_AVX512\n")
169+
elseif ("${TCORE}" STREQUAL "COOPERLAKE")
170+
file(APPEND ${TARGET_CONF_TEMP}
171+
"#define HAVE_AVX512\n")
142172
elseif ("${TCORE}" STREQUAL "ARMV7")
143173
file(APPEND ${TARGET_CONF_TEMP}
144174
"#define L1_DATA_SIZE\t65536\n"

cmake/system.cmake

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,36 @@ if (DEFINED TARGET)
6464
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
6565
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
6666
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
67-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
67+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
6868
endif()
6969
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
70-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
70+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2")
7171
endif()
7272
endif()
73+
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
74+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
75+
endif()
76+
if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2)
77+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
78+
endif()
79+
if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX)
80+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx")
81+
endif()
82+
if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR")
83+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
84+
endif()
85+
if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3")
86+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
87+
endif()
88+
if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO")
89+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
90+
endif()
91+
if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM")
92+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
93+
endif()
94+
if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON")
95+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
96+
endif()
7397
if (DEFINED HAVE_SSE)
7498
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
7599
endif()

common_arm64.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,8 @@ static inline int blas_quickdivide(blasint x, blasint y){
142142
#define HUGE_PAGESIZE ( 4 << 20)
143143

144144
#ifndef BUFFERSIZE
145-
#if defined(CORTEXA57)
146-
#define BUFFER_SIZE (20 << 20)
147-
#elif defined(TSV110) || defined(EMAG8180)
148145
#define BUFFER_SIZE (32 << 20)
149146
#else
150-
#define BUFFER_SIZE (16 << 20)
151-
#endif
152-
#else
153147
#define BUFFER_SIZE (32 << BUFFERSIZE)
154148
#endif
155149

f_check

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ if ($compiler eq "") {
3333
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
3434
"pathf90", "pathf95",
3535
"pgf95", "pgf90", "pgf77",
36-
"flang",
36+
"flang", "egfortran",
3737
"ifort");
3838

3939
OUTER:

kernel/arm/zdot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
7373
i++ ;
7474

7575
}
76-
#if !defined(__POWER__)
76+
#if !defined(__PPC__)
7777
CREAL(result) = dot[0];
7878
CIMAG(result) = dot[1];
7979
#else

kernel/power/KERNEL.POWER10

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
3434
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
3535

3636
DGEMMKERNEL = dgemm_kernel_power10.c
37-
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
38-
DGEMMITCOPY = dgemm_tcopy_16_power8.S
39-
DGEMMONCOPY = dgemm_ncopy_4_power8.S
40-
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
41-
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
42-
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
37+
DGEMMINCOPY =
38+
DGEMMITCOPY =
39+
DGEMMONCOPY = dgemm_ncopy_8_power10.c
40+
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
41+
DGEMMINCOPYOBJ =
42+
DGEMMITCOPYOBJ =
4343
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
4444
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
4545

@@ -69,7 +69,7 @@ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
6969
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
7070

7171
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
72-
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
72+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
7373
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
7474
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
7575

0 commit comments

Comments
 (0)