Skip to content

Commit d2faa1b

Browse files
authored
Merge pull request #2978 from martin-frbg/fixdynfeatures
Fix handling of cpu capability flags in DYNAMIC_ARCH builds
2 parents ff74319 + 1c4cfdc commit d2faa1b

File tree

6 files changed

+106
-116
lines changed

6 files changed

+106
-116
lines changed

Makefile.system

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
INCLUDED = 1
77

88
ifndef TOPDIR
9-
TOPDIR = .
9+
TOPDIR = .
1010
endif
1111

1212
# If ARCH is not set, we use the host system's architecture for getarch compile options.
@@ -252,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
252252
ifndef TARGET_CORE
253253
include $(TOPDIR)/Makefile.conf
254254
else
255+
HAVE_NEON=
256+
HAVE_VFP=
257+
HAVE_VFPV3=
258+
HAVE_VFPV4=
259+
HAVE_MMX=
260+
HAVE_SSE=
261+
HAVE_SSE2=
262+
HAVE_SSE3=
263+
HAVE_SSSE3=
264+
HAVE_SSE4_1=
265+
HAVE_SSE4_2=
266+
HAVE_SSE4A=
267+
HAVE_SSE5=
268+
HAVE_AVX=
269+
HAVE_AVX2=
270+
HAVE_FMA3=
255271
include $(TOPDIR)/Makefile_kernel.conf
256272
endif
257273

@@ -1522,6 +1538,8 @@ export HAVE_SSE4_2
15221538
export HAVE_SSE4A
15231539
export HAVE_SSE5
15241540
export HAVE_AVX
1541+
export HAVE_AVX2
1542+
export HAVE_FMA3
15251543
export HAVE_VFP
15261544
export HAVE_VFPV3
15271545
export HAVE_VFPV4

Makefile.x86_64

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ endif
99
endif
1010

1111
ifdef HAVE_SSE3
12-
ifndef DYNAMIC_ARCH
1312
CCOMMON_OPT += -msse3
1413
FCOMMON_OPT += -msse3
14+
endif
1515
ifdef HAVE_SSSE3
1616
CCOMMON_OPT += -mssse3
1717
FCOMMON_OPT += -mssse3
@@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
2020
CCOMMON_OPT += -msse4.1
2121
FCOMMON_OPT += -msse4.1
2222
endif
23+
ifdef HAVE_AVX
24+
CCOMMON_OPT += -mavx
25+
FCOMMON_OPT += -mavx
2326
endif
27+
ifdef HAVE_AVX2
28+
CCOMMON_OPT += -mavx2
29+
FCOMMON_OPT += -mavx2
30+
endif
31+
ifdef HAVE_FMA3
32+
CCOMMON_OPT += -mfma
33+
FCOMMON_OPT += -mfma
2434
endif
2535

2636
ifeq ($(CORE), SKYLAKEX)
@@ -66,8 +76,7 @@ endif
6676
endif
6777
endif
6878

69-
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
70-
ifndef DYNAMIC_ARCH
79+
ifdef HAVE_AVX2
7180
ifndef NO_AVX2
7281
ifeq ($(C_COMPILER), GCC)
7382
# AVX2 support was added in 4.7.0
@@ -96,7 +105,6 @@ endif
96105
endif
97106
endif
98107
endif
99-
endif
100108

101109

102110

cmake/cc.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,15 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
9696
endif ()
9797
endif ()
9898

99-
if (${CORE} STREQUAL "SKYLAKEX")
99+
if (${CORE} STREQUAL SKYLAKEX)
100100
if (NOT DYNAMIC_ARCH)
101101
if (NOT NO_AVX512)
102102
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
103103
endif ()
104104
endif ()
105105
endif ()
106106

107-
if (${CORE} STREQUAL "COOPERLAKE")
107+
if (${CORE} STREQUAL COOPERLAKE)
108108
if (NOT DYNAMIC_ARCH)
109109
if (NOT NO_AVX512)
110110
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)

cmake/prebuild.cmake

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -139,36 +139,6 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
139139
set(CGEMM3M_UNROLL_N 4)
140140
set(ZGEMM3M_UNROLL_M 4)
141141
set(ZGEMM3M_UNROLL_N 4)
142-
elseif ("${TCORE}" STREQUAL "BARCELONA")
143-
file(APPEND ${TARGET_CONF_TEMP}
144-
"#define HAVE_SSE3\n")
145-
elseif ("${TCORE}" STREQUAL "STEAMROLLER")
146-
file(APPEND ${TARGET_CONF_TEMP}
147-
"#define HAVE_SSE3\n")
148-
elseif ("${TCORE}" STREQUAL "EXCAVATOR")
149-
file(APPEND ${TARGET_CONF_TEMP}
150-
"#define HAVE_SSE3\n")
151-
elseif ("${TCORE}" STREQUAL "NEHALEM")
152-
file(APPEND ${TARGET_CONF_TEMP}
153-
"#define HAVE_SSE3\n")
154-
elseif ("${TCORE}" STREQUAL "PRESCOTT")
155-
file(APPEND ${TARGET_CONF_TEMP}
156-
"#define HAVE_SSE3\n")
157-
elseif ("${TCORE}" STREQUAL "SANDYBRIDGE")
158-
file(APPEND ${TARGET_CONF_TEMP}
159-
"#define HAVE_AVX\n")
160-
elseif ("${TCORE}" STREQUAL "HASWELL")
161-
file(APPEND ${TARGET_CONF_TEMP}
162-
"#define HAVE_AVX2\n")
163-
elseif ("${TCORE}" STREQUAL "ZEN")
164-
file(APPEND ${TARGET_CONF_TEMP}
165-
"#define HAVE_AVX2\n")
166-
elseif ("${TCORE}" STREQUAL "SKYLAKEX")
167-
file(APPEND ${TARGET_CONF_TEMP}
168-
"#define HAVE_AVX512\n")
169-
elseif ("${TCORE}" STREQUAL "COOPERLAKE")
170-
file(APPEND ${TARGET_CONF_TEMP}
171-
"#define HAVE_AVX512\n")
172142
elseif ("${TCORE}" STREQUAL "ARMV7")
173143
file(APPEND ${TARGET_CONF_TEMP}
174144
"#define L1_DATA_SIZE\t65536\n"
@@ -586,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
586556
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
587557
endif ()
588558
endif ()
559+
unset (HAVE_AVX2)
560+
unset (HAVE_AVX)
561+
unset (HAVE_FMA3)
562+
unset (HAVE_MMX)
563+
unset (HAVE_SSE)
564+
unset (HAVE_SSE2)
565+
unset (HAVE_SSE3)
566+
unset (HAVE_SSSE3)
567+
unset (HAVE_SSE4A)
568+
unset (HAVE_SSE4_1)
569+
unset (HAVE_SSE4_2)
570+
unset (HAVE_NEON)
571+
unset (HAVE_VFP)
572+
unset (HAVE_VFPV3)
573+
unset (HAVE_VFPV4)
589574
message(STATUS "Running getarch")
590575

591576
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way

cmake/system.cmake

Lines changed: 58 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -44,74 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
4444
endif ()
4545
endif ()
4646

47-
if (DEFINED TARGET)
48-
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
49-
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
50-
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
51-
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
52-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
53-
else()
54-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
55-
endif()
56-
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
57-
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
58-
# endif()
59-
endif()
60-
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
61-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
62-
endif()
63-
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
64-
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
65-
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
66-
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
67-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
68-
endif()
69-
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
70-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse -msse3 -mavx2")
71-
endif()
72-
endif()
73-
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
74-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
75-
endif()
76-
if (${TARGET} STREQUAL "ZEN" AND NOT NO_AVX2)
77-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx2")
78-
endif()
79-
if (${TARGET} STREQUAL "SANDYBRIDGE" AND NOT NO_AVX)
80-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3 -mavx")
81-
endif()
82-
if (${TARGET} STREQUAL "BARCELONA" OR ${TARGET} STREQUAL "STEAMROLLER" OR ${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "EXCAVATOR")
83-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
84-
endif()
85-
if (${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "BOBCAT" OR ${TARGET} STREQUAL "OPTERON_SSE3")
86-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
87-
endif()
88-
if (${TARGET} STREQUAL "PRESCOTT" OR ${TARGET} STREQUAL "NANO")
89-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
90-
endif()
91-
if (${TARGET} STREQUAL "NEHALEM" OR ${TARGET} STREQUAL "ATOM")
92-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
93-
endif()
94-
if (${TARGET} STREQUAL "CORE2" OR ${TARGET} STREQUAL "PENRYN" OR ${TARGET} STREQUAL "DUNNINGTON")
95-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
96-
endif()
97-
if (DEFINED HAVE_SSE)
98-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
99-
endif()
100-
if (DEFINED HAVE_SSE2)
101-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
102-
endif()
103-
if (DEFINED HAVE_SSE3)
104-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
105-
endif()
106-
if (DEFINED HAVE_SSSE3)
107-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
108-
endif()
109-
if (DEFINED HAVE_SSE4_1)
110-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
111-
endif()
112-
endif()
11347

11448
if (DEFINED TARGET)
49+
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
11550
message(STATUS "Targeting the ${TARGET} architecture.")
11651
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
11752
endif ()
@@ -211,6 +146,63 @@ else()
211146
endif ()
212147

213148
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
149+
if (DEFINED TARGET)
150+
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
151+
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
152+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
153+
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
154+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
155+
else()
156+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
157+
endif()
158+
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
159+
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
160+
# endif()
161+
endif()
162+
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
163+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
164+
endif()
165+
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
166+
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
167+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
168+
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
169+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
170+
endif()
171+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
172+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
173+
endif()
174+
endif()
175+
if (DEFINED HAVE_AVX)
176+
if (NOT NO_AVX)
177+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
178+
endif()
179+
endif()
180+
if (DEFINED HAVE_AVX2)
181+
if (NOT NO_AVX2)
182+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
183+
endif()
184+
endif()
185+
if (DEFINED HAVE_FMA3)
186+
if (NOT NO_AVX2)
187+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
188+
endif()
189+
endif()
190+
if (DEFINED HAVE_SSE)
191+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
192+
endif()
193+
if (DEFINED HAVE_SSE2)
194+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
195+
endif()
196+
if (DEFINED HAVE_SSE3)
197+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
198+
endif()
199+
if (DEFINED HAVE_SSSE3)
200+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
201+
endif()
202+
if (DEFINED HAVE_SSE4_1)
203+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
204+
endif()
205+
endif()
214206
if (DEFINED BINARY)
215207
message(STATUS "Compiling a ${BINARY}-bit binary.")
216208
endif ()

kernel/Makefile

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,6 @@ endif
55
TOPDIR = ..
66
include $(TOPDIR)/Makefile.system
77

8-
ifdef HAVE_SSE3
9-
CFLAGS += -msse3
10-
endif
11-
ifdef HAVE_SSSE3
12-
CFLAGS += -mssse3
13-
endif
14-
158
ifeq ($(ARCH), power)
169
ifeq ($(C_COMPILER), CLANG)
1710
override CFLAGS += -fno-integrated-as
@@ -38,12 +31,6 @@ ifdef NO_AVX2
3831
endif
3932

4033
ifdef TARGET_CORE
41-
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
42-
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
43-
endif
44-
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
45-
override CFLAGS += -msse -msse2
46-
endif
4734
ifeq ($(TARGET_CORE), COOPERLAKE)
4835
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
4936
ifeq ($(GCCVERSIONGTEQ10), 1)

0 commit comments

Comments
 (0)