Skip to content

Commit 65502c6

Browse files
authored
Merge pull request #3216 from xianyi/develop
Update from develop for 0.3.15 release
2 parents 2f6d35c + f71627f commit 65502c6

File tree

236 files changed

+12391
-2407
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

236 files changed

+12391
-2407
lines changed

.travis.yml

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,12 +224,21 @@ matrix:
224224
before_script:
225225
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
226226
- brew update
227-
- brew install gcc@10
228227
script:
229228
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
230229
env:
231-
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
232-
230+
- BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
231+
232+
- <<: *test-macos
233+
osx_image: xcode12
234+
before_script:
235+
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
236+
- brew update
237+
script:
238+
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
239+
env:
240+
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
241+
233242
# - <<: *test-macos
234243
# osx_image: xcode10
235244
# env:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 14)
9+
set(OpenBLAS_PATCH_VERSION 14.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions

Changelog.txt

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,54 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.15
4+
2-May-2021
5+
6+
common:
7+
- imported improvements and bugfixes from Reference-LAPACK 3.9.1
8+
- imported LAPACKE interface fixes from Reference-LAPACK PRs 534 + 537
9+
- fixed a problem in the cpu detection of 0.3.14 that prevented cross-compilation
10+
- fixed a sequence problem in the generation of softlinks to the library in GMAKE
11+
12+
RISC V:
13+
- fixed compilation on RISCV (missing entry in getarch)
14+
- fixed a potential division by zero in CROTG and ZROTG
15+
16+
POWER:
17+
- fixed LAPACK testsuite failures seen with the NVIDIA HPC compiler
18+
- improved CGEMM, DGEMM and ZGEMM performance on POWER10
19+
- added an optimized ZGEMV kernel for POWER10
20+
- fixed a potential division by zero in CROTG and ZROTG
21+
22+
x86_64:
23+
- added support for Intel Control-flow Enforcement Technology (CET)
24+
- reverted the DOMATCOPY_RT code to the generic C version
25+
- fixed a bug in the AVX512 SGEMM kernel introduced in 0.3.14
26+
- fixed misapplication of -msse flag to non-SSE cpus in DYNAMIC_ARCH
27+
- added support for compilation of the benchmarks on older OSX versions
28+
- fix propagation of the NO_AVX512 option in CMAKE builds
29+
- fix compilation of the AVX512 SGEMM kernel with clang-cl on Windows
30+
- fixed compilation of the CTESTs with INTERFACE64=1 (random faults on OSX)
31+
- corrected the Haswell DROT kernel to require AVX2/FMA3 rather than AVX512
32+
33+
ARM:
34+
- fixed a potential division by zero in CROTG and ZROTG
35+
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
36+
37+
ARM64:
38+
- fixed spurious reads outside the array in the SGEMM tcopy macro
39+
- fixed a potential division by zero in CROTG and ZROTG
40+
- fixed a segmentation fault in DYNAMIC_ARCH builds (reappeared in 0.3.14)
41+
42+
MIPS
43+
- fixed a potential division by zero in CROTG and ZROTG
44+
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
45+
46+
MIPS64:
47+
- fixed a potential division by zero in CROTG and ZROTG
48+
49+
SPARC:
50+
- fixed a potential division by zero in CROTG and ZROTG
51+
252
====================================================================
353
Version 0.3.14
454
17-Mar-2021

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ ifeq ($(NO_SHARED), 1)
167167
$(error OpenBLAS: neither static nor shared are enabled.)
168168
endif
169169
endif
170-
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
171170
@for d in $(SUBDIRS) ; \
172171
do if test -d $$d; then \
173172
$(MAKE) -C $$d $(@F) || exit 1 ; \
@@ -196,6 +195,7 @@ endif
196195
ifdef USE_THREAD
197196
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
198197
endif
198+
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
199199
@touch lib.grd
200200

201201
prof : prof_blas prof_lapack

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.14
6+
VERSION = 0.3.14.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.x86

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11
# COMPILER_PREFIX = mingw32-
22

3+
ifndef DYNAMIC_ARCH
4+
ADD_CPUFLAGS = 1
5+
else
6+
ifdef TARGET_CORE
7+
ADD_CPUFLAGS = 1
8+
endif
9+
endif
10+
11+
ifdef ADD_CPUFLAGS
312
ifdef HAVE_SSE
413
CCOMMON_OPT += -msse
14+
ifneq ($(F_COMPILER), NAG)
515
FCOMMON_OPT += -msse
616
endif
7-
17+
endif
18+
endif
819

920
ifeq ($(OSNAME), Interix)
1021
ARFLAGS = -m x86

Makefile.x86_64

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ endif
88
endif
99
endif
1010

11+
12+
ifndef DYNAMIC_ARCH
13+
ADD_CPUFLAGS = 1
14+
else
15+
ifdef TARGET_CORE
16+
ADD_CPUFLAGS = 1
17+
endif
18+
endif
19+
20+
ifdef ADD_CPUFLAGS
1121
ifdef HAVE_SSE3
1222
CCOMMON_OPT += -msse3
1323
ifneq ($(F_COMPILER), NAG)
@@ -44,7 +54,6 @@ endif
4454
endif
4555

4656
ifeq ($(CORE), SKYLAKEX)
47-
ifndef DYNAMIC_ARCH
4857
ifndef NO_AVX512
4958
CCOMMON_OPT += -march=skylake-avx512
5059
ifneq ($(F_COMPILER), NAG)
@@ -62,10 +71,8 @@ endif
6271
endif
6372
endif
6473
endif
65-
endif
6674

6775
ifeq ($(CORE), COOPERLAKE)
68-
ifndef DYNAMIC_ARCH
6976
ifndef NO_AVX512
7077
ifeq ($(C_COMPILER), GCC)
7178
# cooperlake support was added in 10.1
@@ -88,7 +95,6 @@ endif
8895
endif
8996
endif
9097
endif
91-
endif
9298

9399
ifdef HAVE_AVX2
94100
ifndef NO_AVX2
@@ -120,6 +126,7 @@ endif
120126
endif
121127
endif
122128

129+
endif
123130

124131

125132
ifeq ($(OSNAME), Interix)

azure-pipelines.yml

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@ trigger:
44
branches:
55
include:
66
- develop
7-
7+
resources:
8+
containers:
9+
- container: oneapi-hpckit
10+
image: intel/oneapi-hpckit:latest
11+
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
12+
- container: oneapi-basekit
13+
image: intel/oneapi-basekit:latest
14+
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
15+
816
jobs:
917
# manylinux1 is useful to test because the
1018
# standard Docker container uses an old version
@@ -68,4 +76,64 @@ jobs:
6876
dir
6977
openblas_utest.exe
7078
71-
79+
- job: OSX_OpenMP
80+
pool:
81+
vmImage: 'macOS-10.15'
82+
steps:
83+
- script: |
84+
brew update
85+
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10
86+
87+
- job: OSX_GCC_Nothreads
88+
pool:
89+
vmImage: 'macOS-10.15'
90+
steps:
91+
- script: |
92+
brew update
93+
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
94+
95+
- job: OSX_OpenMP_Clang
96+
pool:
97+
vmImage: 'macOS-10.15'
98+
variables:
99+
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
100+
LIBRARY_PATH: /usr/local/opt/llvm/lib
101+
steps:
102+
- script: |
103+
brew update
104+
brew install llvm libomp
105+
make TARGET=CORE2 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang FC=gfortran-10
106+
107+
- job: OSX_Ifort_Clang
108+
pool:
109+
vmImage: 'macOS-10.15'
110+
variables:
111+
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
112+
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
113+
LIBRARY_PATH: /usr/local/opt/llvm/lib
114+
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
115+
steps:
116+
- script: |
117+
brew update
118+
brew install llvm libomp
119+
sudo mkdir -p /opt/intel
120+
sudo chown $USER /opt/intel
121+
displayName: prepare for cache restore
122+
- task: Cache@2
123+
inputs:
124+
path: /opt/intel/oneapi
125+
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
126+
cacheHitVar: CACHE_RESTORED
127+
- script: |
128+
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
129+
hdiutil attach webimage.dmg
130+
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
131+
installer_exit_code=$?
132+
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
133+
exit $installer_exit_code
134+
displayName: install
135+
condition: ne(variables.CACHE_RESTORED, 'true')
136+
- script: |
137+
source /opt/intel/oneapi/setvars.sh
138+
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
139+

benchmark/bench.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#include <time.h>
44
#ifdef __CYGWIN32__
55
#include <sys/time.h>
6+
#elif defined(__APPLE__)
7+
#include <mach/mach_time.h>
68
#endif
79
#include "common.h"
810

cmake/lapack.cmake

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ set(SLASRC
6666
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
6767
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
6868
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
69-
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
69+
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
7070
slarrv.f slartv.f
7171
slarz.f slarzb.f slarzt.f slasy2.f
7272
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
@@ -112,14 +112,14 @@ set(SLASRC
112112
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
113113
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
114114
sgelqt.f sgelqt3.f sgemlqt.f
115-
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
115+
sgetsls.f sgetsqrhrt.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
116116
sgelq.f slaswlq.f slamswlq.f sgemlq.f
117117
stplqt.f stplqt2.f stpmlqt.f
118118
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
119119
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
120120
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
121121
sgesvdq.f slaorhr_col_getrfnp.f
122-
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
122+
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
123123

124124
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
125125
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -171,7 +171,7 @@ set(CLASRC
171171
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
172172
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
173173
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
174-
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
174+
clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
175175
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
176176
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
177177
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
@@ -209,14 +209,14 @@ set(CLASRC
209209
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
210210
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
211211
cgelqt.f cgelqt3.f cgemlqt.f
212-
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
212+
cgetsls.f cgetsqrhrt.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
213213
cgelq.f claswlq.f clamswlq.f cgemlq.f
214214
ctplqt.f ctplqt2.f ctpmlqt.f
215215
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
216216
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
217217
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
218218
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
219-
cungtsqr.f cunhr_col.f )
219+
cungtsqr.f cungtsqr_row.f cunhr_col.f )
220220

221221
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
222222
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -253,7 +253,7 @@ set(DLASRC
253253
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
254254
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
255255
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
256-
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
256+
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
257257
dlargv.f dlarrv.f dlartv.f
258258
dlarz.f dlarzb.f dlarzt.f dlasy2.f
259259
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
@@ -300,14 +300,14 @@ set(DLASRC
300300
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
301301
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
302302
dgelqt.f dgelqt3.f dgemlqt.f
303-
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
303+
dgetsls.f dgetsqrhrt.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
304304
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
305305
dtplqt.f dtplqt2.f dtpmlqt.f
306306
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
307307
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
308308
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
309309
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
310-
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
310+
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
311311

312312
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
313313
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -360,7 +360,7 @@ set(ZLASRC
360360
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
361361
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
362362
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
363-
zlarcm.f zlarf.f zlarfb.f
363+
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
364364
zlarfg.f zlarfgp.f zlarft.f
365365
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
366366
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
@@ -402,13 +402,13 @@ set(ZLASRC
402402
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
403403
ztplqt.f ztplqt2.f ztpmlqt.f
404404
zgelqt.f zgelqt3.f zgemlqt.f
405-
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
405+
zgetsls.f zgetsqrhrt.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
406406
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
407407
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
408408
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
409409
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
410410
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
411-
zungtsqr.f zunhr_col.f)
411+
zungtsqr.f zungtsqr_row.f zunhr_col.f)
412412

413413
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
414414
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

0 commit comments

Comments
 (0)