Skip to content

Commit 422a8fa

Browse files
authored
Merge pull request #1747 from xianyi/develop
Merge develop into 0.3.x for 0.3.3
2 parents e8a68ef + 5bac15a commit 422a8fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2552
-181
lines changed

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 2)
9+
set(OpenBLAS_PATCH_VERSION 3.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
@@ -150,6 +150,7 @@ endif()
150150

151151
# add objects to the openblas lib
152152
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
153+
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
153154

154155
# Android needs to explicitly link against libm
155156
if(ANDROID)
@@ -169,6 +170,7 @@ endif()
169170
# Set output for libopenblas
170171
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
171172
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
173+
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
172174

173175
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
174176
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )

Changelog.txt

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,115 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.2
4+
30-Jul-2018
5+
6+
common:
7+
* fixes for regressions caused by the rewrite of the thread
8+
initialization code in 0.3.1
9+
10+
POWER:
11+
* fixed cpu autodetection for the BSDs
12+
13+
MIPS64:
14+
* fixed utest errors in AXPY, DSDOT, ROT and SWAP
15+
16+
x86_64:
17+
* added autodetection of AMD Ryzen 2
18+
* fixed build with older versions of MSVC
19+
20+
====================================================================
21+
Version 0.3.1
22+
01-Jul-2018
23+
24+
common:
25+
* rewritten thread initialization code with significantly reduced overhead
26+
* added CBLAS interfaces to the IxAMIN BLAS extension functions
27+
* fixed the lapack-test target
28+
* CMAKE builds now create an OpenBLASConfig.cmake file
29+
* ZAXPY now uses a single thread for small input sizes
30+
* the LAPACK code was updated from Reference-LAPACK/lapack#253
31+
(fixing LAPACKE interfaces to Aasen's functions)
32+
33+
POWER:
34+
* corrected CROT and ZROT behaviour with zero INC_X
35+
36+
ARMV7:
37+
* corrected xDOT behaviour with zero INC_X or INC_Y
38+
39+
x86_64:
40+
* retired some older targets of DYNAMIC_ARCH builds to a new option DYNAMIC_OLDER,
41+
this affects PENRYN,DUNNINGTON,OPTERON,OPTERON_SSE3,BOBCAT,ATOM and NANO
42+
(which will still be supported via the slower PRESCOTT kernels when this option is not set)
43+
* added an option DYNAMIC_LIST that (used in conjunction with DYNAMIC_ARCH) allows to
44+
specify the list of x86_64 targets to include. Any target not on the list will be supported
45+
by the Sandybridge or Nehalem kernels if available, or by Prescott.
46+
* improved SWITCH_RATIO on Haswell for increased GEMM throughput
47+
* added initial support for Intel Skylake X, including an AVX512 SGEMM kernel
48+
* added autodetection of Intel Cannon Lake series as Skylake X
49+
* added a default L2 cache size for hypervisors that return zero here (Chromebook)
50+
* fixed a name clash with recent Windows10 headers that broke the build with (at least)
51+
recent mingw from MSYS2
52+
* fixed a link error in mixed clang/gfortran builds with OpenMP
53+
* updated the OSX deployment target to 10.8
54+
* switched on parallel make for builds on MS Windows by default
55+
56+
x86:
57+
* fixed SSWAP and DSWAP behaviour with zero INC_X and INC_Y
58+
59+
====================================================================
60+
Version 0.3.0
61+
23-May-2108
62+
63+
common:
64+
* fixed some more thread race and locking bugs
65+
* added preliminary support for calling an OpenMP build of the library from multiple threads
66+
* removed performance impact of thread locks added in 0.2.20 on OpenMP code
67+
* general code cleanup
68+
* optimized DSDOT implementation
69+
* improved thread distribution for GEMM
70+
* corrected IMATCOPY/OMATCOPY implementation
71+
* fixed out-of-bounds accesses in the multithreaded xBMV/xPMV and SYMV implementations
72+
* cmake build improvements
73+
* pkgconfig file now contains build options
74+
* openblas_get_config() now reports USE_OPENMP and NUM_THREADS settings used for the build
75+
* corrections and improvements for systems with more than 64 cpus
76+
* LAPACK code updated to 3.8.0 including later fixes
77+
* added ReLAPACK, a recursive implementation of several LAPACK functions
78+
* Rewrote ROTMG to handle cases that the netlib code failed to address
79+
* Disabled (broken) multithreading code for xTRMV
80+
* corrected prototypes of complex CBLAS functions to make our cblas.h match the generally accepted standard
81+
* shared memory access failures on startup are now handled more gracefully
82+
* restored utests from earlier releases (and made them pass on all affected systems)
83+
84+
SPARC:
85+
* several fixes for cpu autodetection
86+
87+
POWER:
88+
* corrected vector register overwriting in several Power8 kernels
89+
* optimized additional BLAS functions
90+
91+
ARM:
92+
* added support for CortexA53 and A72
93+
* added autodetection for ThunderX2T99
94+
* made most optimized kernels the default for generic ARMv8 targets
95+
96+
x86_64:
97+
* parallelized DDOT kernel for Haswell
98+
* changed alignment directives in assembly kernels to boost performance on OSX
99+
* fixed register handling in the GEMV microkernels (bug exposed by gcc7)
100+
* added support for building on OpenBSD and Dragonfly
101+
* updated compiler options to work with Intel release 2018
102+
* support fully optimized build with clang/flang on Microsoft Windows
103+
* fixed building on AIX
104+
105+
IBM Z:
106+
* added optimized BLAS 1/2 functions
107+
108+
MIPS:
109+
* fixed cpu autodetection helper code
110+
* added mips32 1004K cpu (Mediatek MT7621 and similar SoC)
111+
* added mips64 I6500 cpu
112+
2113
====================================================================
3114
Version 0.2.20
4115
24-Jul-2017

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ endif
9797

9898
shared :
9999
ifndef NO_SHARED
100-
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
100+
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
101101
@$(MAKE) -C exports so
102102
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
103103
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@@ -267,6 +267,8 @@ ifeq ($(F_COMPILER), GFORTRAN)
267267
ifdef SMP
268268
ifeq ($(OSNAME), WINNT)
269269
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
270+
else ifeq ($(OSNAME), Haiku)
271+
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
270272
else
271273
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
272274
endif

Makefile.install

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ endif
6666
#for install shared library
6767
ifndef NO_SHARED
6868
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
69-
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
69+
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
7070
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
7171
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
7272
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \

Makefile.rule

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.2
6+
VERSION = 0.3.3.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -107,7 +107,13 @@ BUILD_LAPACK_DEPRECATED = 1
107107
# BUILD_RELAPACK = 1
108108

109109
# If you want to use legacy threaded Level 3 implementation.
110-
# USE_SIMPLE_THREADED_LEVEL3 = 1
110+
USE_SIMPLE_THREADED_LEVEL3 = 1
111+
112+
# If you want to use the new, still somewhat experimental code that uses
113+
# thread-local storage instead of a central memory buffer in memory.c
114+
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
115+
# for this to work.
116+
USE_TLS = 1
111117

112118
# If you want to drive whole 64bit region by BLAS. Not all Fortran
113119
# compiler supports this. It's safe to keep comment it out if you

Makefile.system

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,10 @@ ifdef USE_SIMPLE_THREADED_LEVEL3
10181018
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
10191019
endif
10201020

1021+
ifdef USE_TLS
1022+
CCOMMON_OPT += -DUSE_TLS
1023+
endif
1024+
10211025
ifndef SYMBOLPREFIX
10221026
SYMBOLPREFIX =
10231027
endif

Makefile.x86_64

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ ifeq ($(CORE), SKYLAKEX)
1212
ifndef NO_AVX512
1313
CCOMMON_OPT += -march=skylake-avx512
1414
FCOMMON_OPT += -march=skylake-avx512
15+
ifeq ($(OSNAME), CYGWIN_NT)
16+
CCOMMON_OPT += -fno-asynchronous-unwind-tables
17+
endif
1518
endif
1619
endif
1720

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Please read `GotoBLAS_01Readme.txt`.
110110
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
111111
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
112112
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
113+
- **Intel Skylake**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
113114
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
114115
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
115116
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
@@ -200,6 +201,7 @@ Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2
200201
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
201202
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
202203
Clang 3.0 will generate the wrong AVX binary code.
204+
* Please use GCC version 6 or LLVM version 6 and above to compile Skyalke AVX512 kernels.
203205
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
204206
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
205207
the library with `BIGNUMA=1`.

benchmark/gemv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
122122

123123
FLOAT *a, *x, *y;
124124
FLOAT alpha[] = {1.0, 1.0};
125-
FLOAT beta [] = {1.0, 1.0};
125+
FLOAT beta [] = {1.0, 0.0};
126126
char trans='N';
127127
blasint m, i, j;
128128
blasint inc_x=1,inc_y=1;

c_check

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ $os = WINNT if ($data =~ /OS_WINNT/);
6464
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
6565
$os = Interix if ($data =~ /OS_INTERIX/);
6666
$os = Android if ($data =~ /OS_ANDROID/);
67+
$os = Haiku if ($data =~ /OS_HAIKU/);
6768

6869
$architecture = x86 if ($data =~ /ARCH_X86/);
6970
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
@@ -223,14 +224,15 @@ $data =~ /globl\s([_\.]*)(.*)/;
223224
$need_fu = $1;
224225

225226
$cross = 0;
226-
$cross = 1 if ($os ne $hostos);
227227

228228
if ($architecture ne $hostarch) {
229229
$cross = 1;
230230
$cross = 0 if (($hostarch eq "x86_64") && ($architecture eq "x86"));
231231
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips"));
232232
}
233233

234+
$cross = 1 if ($os ne $hostos);
235+
234236
$openmp = "" if $ENV{USE_OPENMP} != 1;
235237

236238
$linker_L = "";

0 commit comments

Comments
 (0)