Skip to content

Commit 628b335

Browse files
Merge branch 'develop' of https://github.com/quickwritereader/OpenBLAS into develop
2 parents 0f105dd + 7c51cc8 commit 628b335

File tree

197 files changed

+17727
-7267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

197 files changed

+17727
-7267
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ matrix:
149149

150150
- &test-macos
151151
os: osx
152-
osx_image: xcode8.3
152+
osx_image: xcode10.1
153153
before_script:
154154
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
155155
- brew update
@@ -160,6 +160,7 @@ matrix:
160160
- BTYPE="BINARY=64 INTERFACE64=1"
161161

162162
- <<: *test-macos
163+
osx_image: xcode8.3
163164
env:
164165
- BTYPE="BINARY=32"
165166

CMakeLists.txt

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ endif()
4242

4343
#######
4444

45+
if(MSVC AND MSVC_STATIC_CRT)
46+
set(CompilerFlags
47+
CMAKE_CXX_FLAGS
48+
CMAKE_CXX_FLAGS_DEBUG
49+
CMAKE_CXX_FLAGS_RELEASE
50+
CMAKE_C_FLAGS
51+
CMAKE_C_FLAGS_DEBUG
52+
CMAKE_C_FLAGS_RELEASE
53+
)
54+
foreach(CompilerFlag ${CompilerFlags})
55+
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
56+
endforeach()
57+
endif()
4558

4659
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
4760

@@ -62,10 +75,10 @@ endif ()
6275

6376
set(SUBDIRS ${BLASDIRS})
6477
if (NOT NO_LAPACK)
65-
list(APPEND SUBDIRS lapack)
6678
if(BUILD_RELAPACK)
6779
list(APPEND SUBDIRS relapack/src)
6880
endif()
81+
list(APPEND SUBDIRS lapack)
6982
endif ()
7083

7184
# set which float types we want to build for
@@ -134,7 +147,7 @@ endif ()
134147

135148
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
136149
if(MSVC)
137-
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
150+
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
138151
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
139152
endif()
140153
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
@@ -149,15 +162,9 @@ if (${DYNAMIC_ARCH})
149162
endforeach()
150163
endif ()
151164

152-
# Only build shared libs for MSVC
153-
if (MSVC)
154-
set(BUILD_SHARED_LIBS ON)
155-
endif()
156-
157-
158165
# add objects to the openblas lib
159166
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
160-
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
167+
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
161168

162169
# Android needs to explicitly link against libm
163170
if(ANDROID)
@@ -166,7 +173,7 @@ endif()
166173

167174
# Handle MSVC exports
168175
if(MSVC AND BUILD_SHARED_LIBS)
169-
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
176+
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
170177
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
171178
else()
172179
# Creates verbose .def file (51KB vs 18KB)
@@ -217,6 +224,14 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
217224
SOVERSION ${OpenBLAS_MAJOR_VERSION}
218225
)
219226

227+
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
228+
if (NOT MSVC)
229+
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
230+
else()
231+
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
232+
endif()
233+
endif()
234+
220235
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
221236
if (NOT DEFINED ARCH)
222237
set(ARCH_IN "x86_64")
@@ -314,7 +329,7 @@ install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
314329
if(NOT NOFORTRAN)
315330
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
316331

317-
set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
332+
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
318333
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
319334
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
320335
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
@@ -327,10 +342,11 @@ endif()
327342
if(NOT NO_CBLAS)
328343
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
329344

345+
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
330346
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
331347
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
332-
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
333-
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
348+
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
349+
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
334350
endif()
335351

336352
if(NOT NO_LAPACKE)

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ endif
9696
@echo
9797

9898
shared :
99-
ifndef NO_SHARED
99+
ifneq ($(NO_SHARED), 1)
100100
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
101101
@$(MAKE) -C exports so
102102
@ln -fs $(LIBSONAME) $(LIBPREFIX).so

Makefile.arm64

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,8 @@ ifeq ($(CORE), THUNDERX2T99)
3838
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
3939
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
4040
endif
41+
42+
ifeq ($(CORE), TSV110)
43+
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
44+
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
45+
endif

Makefile.install

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ ifndef NO_LAPACKE
5858
endif
5959

6060
#for install static library
61-
ifndef NO_STATIC
61+
ifneq ($(NO_STATIC),1)
6262
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
6363
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
6464
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
6565
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
6666
endif
6767
#for install shared library
68-
ifndef NO_SHARED
68+
ifneq ($(NO_SHARED),1)
6969
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
7070
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
7171
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@@ -106,14 +106,14 @@ ifndef NO_LAPACKE
106106
endif
107107

108108
#for install static library
109-
ifndef NO_STATIC
109+
ifneq ($(NO_STATIC),1)
110110
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
111111
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
112112
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
113113
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
114114
endif
115115
#for install shared library
116-
ifndef NO_SHARED
116+
ifneq ($(NO_SHARED),1)
117117
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
118118
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
119119
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@@ -138,7 +138,7 @@ endif
138138
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
139139
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
140140

141-
ifndef NO_SHARED
141+
ifneq ($(NO_SHARED),1)
142142
#ifeq logical or
143143
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
144144
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

Makefile.rule

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ VERSION = 0.3.6.dev
4848
# HOSTCC = gcc
4949

5050
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
51+
# Please note that AVX is not available on 32-bit.
52+
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
5153
# BINARY=64
5254

5355
# About threaded BLAS. It will be automatically detected if you don't
@@ -57,7 +59,7 @@ VERSION = 0.3.6.dev
5759
# USE_THREAD = 0
5860

5961
# If you're going to use this library with OpenMP, please comment it in.
60-
# This flag is always set for POWER8. Don't modify the flag
62+
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
6163
# USE_OPENMP = 1
6264

6365
# The OpenMP scheduler to use - by default this is "static" and you
@@ -68,36 +70,45 @@ VERSION = 0.3.6.dev
6870
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
6971
# CCOMMON_OPT += -DOMP_SCHED=dynamic
7072

71-
# You can define maximum number of threads. Basically it should be
72-
# less than actual number of cores. If you don't specify one, it's
73-
# automatically detected by the the script.
73+
# You can define the maximum number of threads. Basically it should be less
74+
# than or equal to the number of CPU threads. If you don't specify one, it's
75+
# automatically detected by the build system.
76+
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
77+
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
78+
# detection includes logical CPUs, thus allowing the use of SMT.
79+
# Users may opt at runtime to use less than NUM_THREADS threads.
80+
#
81+
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
82+
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way
83+
# some internal structures are allocated, using a large NUM_THREADS value has a RAM
84+
# footprint penalty, even if users reduce the actual number of threads at runtime.
7485
# NUM_THREADS = 24
7586

7687
# If you have enabled USE_OPENMP and your application would call
77-
# OpenBLAS's calculation API from multi threads, please comment it in.
78-
# This flag defines how many instances of OpenBLAS's calculation API can
79-
# actually run in parallel. If more threads call OpenBLAS's calculation API,
88+
# OpenBLAS's calculation API from multiple threads, please comment this in.
89+
# This flag defines how many instances of OpenBLAS's calculation API can actually
90+
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
8091
# they need to wait for the preceding API calls to finish or risk data corruption.
8192
# NUM_PARALLEL = 2
8293

83-
# if you don't need to install the static library, please comment it in.
94+
# If you don't need to install the static library, please comment this in.
8495
# NO_STATIC = 1
8596

86-
# if you don't need generate the shared library, please comment it in.
97+
# If you don't need to generate the shared library, please comment this in.
8798
# NO_SHARED = 1
8899

89-
# If you don't need CBLAS interface, please comment it in.
100+
# If you don't need the CBLAS interface, please comment this in.
90101
# NO_CBLAS = 1
91102

92-
# If you only want CBLAS interface without installing Fortran compiler,
93-
# please comment it in.
103+
# If you only want the CBLAS interface without installing a Fortran compiler,
104+
# please comment this in.
94105
# ONLY_CBLAS = 1
95106

96-
# If you don't need LAPACK, please comment it in.
97-
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
107+
# If you don't need LAPACK, please comment this in.
108+
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
98109
# NO_LAPACK = 1
99110

100-
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
111+
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
101112
# NO_LAPACKE = 1
102113

103114
# Build LAPACK Deprecated functions since LAPACK 3.6.0
@@ -106,7 +117,7 @@ BUILD_LAPACK_DEPRECATED = 1
106117
# Build RecursiveLAPACK on top of LAPACK
107118
# BUILD_RELAPACK = 1
108119

109-
# If you want to use legacy threaded Level 3 implementation.
120+
# If you want to use the legacy threaded Level 3 implementation.
110121
# USE_SIMPLE_THREADED_LEVEL3 = 1
111122

112123
# If you want to use the new, still somewhat experimental code that uses
@@ -116,19 +127,27 @@ BUILD_LAPACK_DEPRECATED = 1
116127
# USE_TLS = 1
117128

118129
# If you want to drive whole 64bit region by BLAS. Not all Fortran
119-
# compiler supports this. It's safe to keep comment it out if you
120-
# are not sure(equivalent to "-i8" option).
130+
# compilers support this. It's safe to keep this commented out if you
131+
# are not sure. (This is equivalent to the "-i8" ifort option).
121132
# INTERFACE64 = 1
122133

123134
# Unfortunately most of kernel won't give us high quality buffer.
124135
# BLAS tries to find the best region before entering main function,
125136
# but it will consume time. If you don't like it, you can disable one.
126137
NO_WARMUP = 1
127138

128-
# If you want to disable CPU/Memory affinity on Linux.
139+
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
140+
# This feature is only implemented on Linux, and is always disabled on other platforms.
141+
# Enabling affinity handling may improve performance, especially on NUMA systems, but
142+
# it may conflict with certain applications that also try to manage affinity.
143+
# This conflict can result in threads of the application calling OpenBLAS ending up locked
144+
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
145+
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
146+
# else modifies affinity settings.
147+
# Note: enabling affinity has been known to cause problems with NumPy and R
129148
NO_AFFINITY = 1
130149

131-
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
150+
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
132151
# BIGNUMA = 1
133152

134153
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
@@ -180,7 +199,7 @@ NO_AFFINITY = 1
180199
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
181200
# GEMM_MULTITHREAD_THRESHOLD = 4
182201

183-
# If you need santy check by comparing reference BLAS. It'll be very
202+
# If you need sanity check by comparing results to reference BLAS. It'll be very
184203
# slow (Not implemented yet).
185204
# SANITY_CHECK = 1
186205

Makefile.system

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ endif
9595
ifeq ($(TARGET), ZEN)
9696
GETARCH_FLAGS := -DFORCE_BARCELONA
9797
endif
98+
ifeq ($(TARGET), ARMV8)
99+
GETARCH_FLAGS := -DFORCE_ARMV7
100+
endif
98101
endif
99102

100103

@@ -152,7 +155,8 @@ GETARCH_FLAGS += -DNO_AVX
152155
endif
153156

154157
ifeq ($(BINARY), 32)
155-
GETARCH_FLAGS += -DNO_AVX
158+
GETARCH_FLAGS += -DNO_AVX -DNO_AVX2 -DNO_AVX512
159+
NO_AVX512 = 1
156160
endif
157161

158162
ifeq ($(NO_AVX2), 1)

Makefile.zarch

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,7 @@ CCOMMON_OPT += -march=z13 -mzvector
44
FCOMMON_OPT += -march=z13 -mzvector
55
endif
66

7+
ifeq ($(CORE), Z14)
8+
CCOMMON_OPT += -march=z14 -mzvector
9+
FCOMMON_OPT += -march=z14 -mzvector
10+
endif

TargetList.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ CORTEXA73
9191
FALKOR
9292
THUNDERX
9393
THUNDERX2T99
94+
TSV110
9495

9596
9.System Z:
9697
ZARCH_GENERIC
9798
Z13
99+
Z14

appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ before_build:
5353
- ps: if (-Not (Test-Path .\build)) { mkdir build }
5454
- cd build
5555
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
56-
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl ..
56+
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
5757
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
58-
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON ..
58+
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
5959

6060
build_script:
6161
- cmake --build .

0 commit comments

Comments
 (0)