Skip to content

Commit 15ff556

Browse files
authored
Merge pull request #3536 from xianyi/develop
Update from develop for release 0.3.20
2 parents dec53e0 + 1564b63 commit 15ff556

File tree

106 files changed

+15212
-249
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+15212
-249
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,14 @@ if (APPLE AND DYNAMIC_ARCH AND BUILD_SHARED_LIBS)
251251
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
252252
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
253253
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
254+
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
254255
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
255256
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
256257
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
257258
else ()
258259
set (CMAKE_C_CREATE_SHARED_LIBRARY
259260
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
261+
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
260262
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
261263
endif ()
262264
endif()

CONTRIBUTORS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,9 @@ In chronological order:
201201
* Bine Brank <https://github.com/binebrank>
202202
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
203203
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
204+
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
205+
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
206+
* [2022-01-18] SVE kernels and copy functions for TRSM
207+
208+
* Ilya Kurdyukov <https://github.com/ilyakurdyukov>
209+
* [2021-02-21] Add basic support for the Elbrus E2000 architecture

Changelog.txt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,39 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.20
4+
20-Feb-2022
5+
6+
general:
7+
- some code cleanup, with added casts etc.
8+
- fixed obtaining the cpu count with OpenMP and OMP_PROC_BIND unset
9+
- fixed pivot index calculation by ?LASWP for negative increments other than one
10+
- fixed input argument check in LAPACK ? GEQRT2
11+
- improved the check for a Fortran compiler in CMAKE builds
12+
- disabled building OpenBLAS' optimized versions of LAPACK complex SPMV,SPR,SYMV,SYR with NO_LAPACK=1
13+
- fixed building of LAPACK on certain distributed filesystems with parallel gmake
14+
- fixed building the shared library on MacOS with classic flang
15+
16+
x86_64:
17+
- fixed cross-compilation with CMAKE for CORE2 target
18+
- fixed miscompilation of AVX512 code in DYNAMIC_ARCH builds
19+
- added support for the "incidental" AVX512 hardware in Alder Lake when enabled in BIOS
20+
21+
E2K:
22+
- add new architecture (Russian Elbrus E2000 family)
23+
24+
SPARC:
25+
- fix IMIN/IMAX
26+
27+
ARMV8:
28+
- added SVE-enabled CGEMM and ZGEMM kernels for ARMV8SVE and A64FX
29+
- added support for Neoverse N2 and V1 cpus
30+
31+
MIPS,MIPS64:
32+
- fixed autodetection of MSA capability
33+
34+
LOONGARCH64:
35+
- added an optimized DGEMM kernel
36+
237
====================================================================
338
Version 0.3.19
439
19-Dec-2021

Makefile.arm64

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,66 @@ endif
7878
endif
7979
endif
8080

81+
# Use a72 tunings because Neoverse-V1 is only available
82+
# in GCC>=9.4
83+
ifeq ($(CORE), NEOVERSEV1)
84+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
85+
ifeq ($(GCCVERSIONGTEQ9), 1)
86+
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10)))
87+
CCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
88+
ifneq ($(F_COMPILER), NAG)
89+
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
90+
endif
91+
else
92+
CCOMMON_OPT += -march=armv8.4-a -mtune=native
93+
ifneq ($(F_COMPILER), NAG)
94+
FCOMMON_OPT += -march=armv8.4-a -mtune=native
95+
endif
96+
endif
97+
else
98+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
99+
ifneq ($(F_COMPILER), NAG)
100+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
101+
endif
102+
endif
103+
else
104+
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
105+
ifneq ($(F_COMPILER), NAG)
106+
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
107+
endif
108+
endif
109+
endif
110+
111+
# Use a72 tunings because Neoverse-N2 is only available
112+
# in GCC>=9.4
113+
ifeq ($(CORE), NEOVERSEN2)
114+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
115+
ifeq ($(GCCVERSIONGTEQ9), 1)
116+
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10)))
117+
CCOMMON_OPT += -march=armv8.5-a -mtune=neoverse-n2
118+
ifneq ($(F_COMPILER), NAG)
119+
FCOMMON_OPT += -march=armv8.5-a -mtune=neoverse-n2
120+
endif
121+
else
122+
CCOMMON_OPT += -march=armv8.5-a -mtune=native
123+
ifneq ($(F_COMPILER), NAG)
124+
FCOMMON_OPT += -march=armv8.5-a -mtune=native
125+
endif
126+
endif
127+
else
128+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
129+
ifneq ($(F_COMPILER), NAG)
130+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
131+
endif
132+
endif
133+
else
134+
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
135+
ifneq ($(F_COMPILER), NAG)
136+
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
137+
endif
138+
endif
139+
endif
140+
81141
# Use a53 tunings because a55 is only available in GCC>=8.1
82142
ifeq ($(CORE), CORTEXA55)
83143
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))

Makefile.e2k

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
COPT = -Wall -O2 # -DGEMMTEST

Makefile.prebuild

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
export BINARY
44
export USE_OPENMP
55

6+
ifdef DYNAMIC_ARCH
7+
override HOST_CFLAGS += -DDYNAMIC_ARCH
8+
endif
9+
610
ifdef TARGET_CORE
711
TARGET_MAKE = Makefile_kernel.conf
812
TARGET_CONF = config_kernel.h

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.19
6+
VERSION = 0.3.19.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

Makefile.system

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo
277277
GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
278278

279279
# Generating Makefile.conf and config.h
280-
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
280+
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
281281

282282
ifndef TARGET_CORE
283283
include $(TOPDIR)/Makefile.conf
@@ -374,6 +374,7 @@ else
374374
endif
375375
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
376376
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
377+
GCCMINORVERSIONGTEQ4 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 4)
377378
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
378379
endif
379380

@@ -654,6 +655,8 @@ DYNAMIC_CORE += CORTEXA57
654655
DYNAMIC_CORE += CORTEXA72
655656
DYNAMIC_CORE += CORTEXA73
656657
DYNAMIC_CORE += NEOVERSEN1
658+
DYNAMIC_CORE += NEOVERSEV1
659+
DYNAMIC_CORE += NEOVERSEN2
657660
DYNAMIC_CORE += CORTEXA55
658661
DYNAMIC_CORE += FALKOR
659662
DYNAMIC_CORE += THUNDERX

TargetList.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ CORTEXA57
9393
CORTEXA72
9494
CORTEXA73
9595
NEOVERSEN1
96+
NEOVERSEV1
97+
NEOVERSEN2
9698
CORTEXA55
9799
EMAG8180
98100
FALKOR
@@ -113,3 +115,7 @@ C910V
113115

114116
11.LOONGARCH64:
115117
LOONGSON3R5
118+
119+
12. Elbrus E2000:
120+
E2K
121+

azure-pipelines.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ jobs:
224224
225225
- job: OSX_IOS_ARMV8
226226
pool:
227-
vmImage: 'macOS-10.15'
227+
vmImage: 'macOS-11'
228228
variables:
229229
CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
230230
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0

0 commit comments

Comments
 (0)