Skip to content

Commit b29fd48

Browse files
authored
Merge branch 'develop' into win_tidy
2 parents 0a7ae32 + b1ae777 commit b29fd48

File tree

621 files changed

+96927
-21876
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

621 files changed

+96927
-21876
lines changed

.github/workflows/c910v.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ jobs:
1414
if: "github.repository == 'OpenMathLib/OpenBLAS'"
1515
runs-on: ubuntu-latest
1616
env:
17-
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282
18-
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1-20220906.tar.gz
17+
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
18+
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
1919
strategy:
2020
fail-fast: false
2121
matrix:
@@ -76,7 +76,7 @@ jobs:
7676
run: |
7777
wget ${xuetie_toolchain}/${toolchain_file_name}
7878
tar -xvf ${toolchain_file_name} -C /opt
79-
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.6.1/bin:$PATH"
79+
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
8080
8181
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
8282

.github/workflows/loongarch64.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ jobs:
1616
include:
1717
- target: LOONGSONGENERIC
1818
triple: loongarch64-unknown-linux-gnu
19-
opts: NO_SHARED=1 TARGET=LOONGSONGENERIC
19+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
2020
- target: LOONGSON3R5
2121
triple: loongarch64-unknown-linux-gnu
22-
opts: NO_SHARED=1 TARGET=LOONGSON3R5
22+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
2323
- target: LOONGSON2K1000
2424
triple: loongarch64-unknown-linux-gnu
25-
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
25+
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
2626
- target: DYNAMIC_ARCH
2727
triple: loongarch64-unknown-linux-gnu
2828
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
@@ -40,8 +40,9 @@ jobs:
4040
4141
- name: Download and install loongarch64-toolchain
4242
run: |
43-
wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
44-
tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
43+
wget https://github.com/sunhaiyong1978/CLFS-for-LoongArch/releases/download/8.1/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
44+
#wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
45+
tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
4546
4647
- name: Set env
4748
run: |

CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
88

99
set(OpenBLAS_MAJOR_VERSION 0)
1010
set(OpenBLAS_MINOR_VERSION 3)
11-
set(OpenBLAS_PATCH_VERSION 25.dev)
11+
set(OpenBLAS_PATCH_VERSION 26.dev)
1212

1313
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1414

@@ -256,15 +256,15 @@ if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
256256
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
257257
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
258258
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
259-
"sh -c '${CMAKE_AR} -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
259+
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
260260
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
261-
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -undefined dynamic_lookup -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
261+
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
262262
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
263263
else ()
264264
set (CMAKE_C_CREATE_SHARED_LIBRARY
265265
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
266-
"sh -c '${CMAKE_AR} -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
267-
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -undefined dynamic_lookup -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
266+
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
267+
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
268268
endif ()
269269
endif()
270270

CONTRIBUTORS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,6 @@ In chronological order:
220220
* Mark Seminatore <https://github.com/mseminatore>
221221
* [2023-11-09] Improve Windows threading performance scaling
222222
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency
223-
223+
224+
* Dirreke <https://github.com/mseminatore>
225+
* [2024-01-16] Add basic support for the CSKY architecture

Changelog.txt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,49 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.26
4+
2-Jan-2024
5+
6+
general:
7+
- improved the version of openblas.pc that is created by the CMAKE build
8+
- fixed a CMAKE-specific build problem on older versions of MacOS
9+
- worked around linking problems on old versions of MacOS
10+
- corrected installation location of the lapacke_mangling header in CMAKE builds
11+
- added type declarations for complex variables to the MSVC-specific parts of the LAPACK header
12+
- significantly sped up ?GESV for small problem sizes by introducing a lower bound for multithreading
13+
- imported additions and corrections from the Reference-LAPACK project:
14+
- added new LAPACK functions for truncated QR with pivoting (Reference-LAPACK PRs 891&941)
15+
- handle miscalculation of minimum work array size in corner cases (Reference-LAPACK PR 942)
16+
- fixed use of uninitialized variables in ?GEDMD and improved inline documentation (PR 959)
17+
- fixed use of uninitialized variables (and consequential failures) in ?BBCSD (PR 967)
18+
- added tests for the recently introduced Dynamic Mode Decomposition functions (PR 736)
19+
- fixed several memory leaks in the LAPACK testsuite (PR 953)
20+
- fixed counting of testsuite results by the Python script (PR 954)
21+
22+
x86-64:
23+
- fixed computation of CASUM on SkylakeX and newer targets in the special
24+
case that AVX512 is not supported by the compiler or operating environment
25+
- fixed potential undefined behaviour in the CASUM/ZASUM kernels for AVX512 targets
26+
- worked around a problem in the pre-AVX kernels for GEMV
27+
- sped up the thread management code on MS Windows
28+
29+
arm64:
30+
- fixed building of the LAPACK testsuite with Xcode 15 on Apple M1 and newer
31+
- sped up the thread management code on MS Windows
32+
- sped up SGEMM and DGEMM on Neoverse V1 and N1
33+
- sped up ?DOT on SVE-capable targets
34+
- reduced the number of targets in DYNAMIC_ARCH builds by eliminating functionally equivalent ones
35+
- included support for Apple M1 and newer targets in DYNAMIC_ARCH builds
36+
37+
power:
38+
- improved the SGEMM kernel for POWER10
39+
- fixed compilation with (very) old versions of gcc
40+
- fixed detection of old 32bit PPC targets in CMAKE-based builds
41+
- added autodetection of the POWERPC 7400 subtype
42+
- fixed CMAKE-based compilation for PPCG4 and PPC970 targets
43+
44+
loongarch64:
45+
- added and improved optimized kernels for almost all BLAS functions
46+
247
====================================================================
348
Version 0.3.25
449
12-Nov-2023

Makefile.arm64

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,19 +104,25 @@ ifneq ($(F_COMPILER), NAG)
104104
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
105105
endif
106106
else
107-
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=native
107+
CCOMMON_OPT += -march=armv8.4-a+sve
108+
ifneq ($(CROSS), 1)
109+
CCOMMON_OPT += -mtune=native
110+
endif
108111
ifneq ($(F_COMPILER), NAG)
109-
FCOMMON_OPT += -march=armv8.4-a -mtune=native
112+
FCOMMON_OPT += -march=armv8.4-a
113+
ifneq ($(CROSS), 1)
114+
FCOMMON_OPT += -mtune=native
115+
endif
110116
endif
111117
endif
112118
else
113-
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
119+
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
114120
ifneq ($(F_COMPILER), NAG)
115121
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
116122
endif
117123
endif
118124
else
119-
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
125+
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
120126
ifneq ($(F_COMPILER), NAG)
121127
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
122128
endif
@@ -132,25 +138,31 @@ ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
132138
ifneq ($(OSNAME), Darwin)
133139
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
134140
else
135-
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
141+
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
136142
endif
137143
ifneq ($(F_COMPILER), NAG)
138144
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
139145
endif
140146
else
141-
CCOMMON_OPT += -march=armv8.5-a+sve -mtune=native
147+
CCOMMON_OPT += -march=armv8.5-a+sve
148+
ifneq ($(CROSS), 1)
149+
CCOMMON_OPT += -mtune=native
150+
endif
142151
ifneq ($(F_COMPILER), NAG)
143-
FCOMMON_OPT += -march=armv8.5-a -mtune=native
152+
FCOMMON_OPT += -march=armv8.5-a
153+
ifneq ($(CROSS), 1)
154+
FCOMMON_OPT += -mtune=native
155+
endif
144156
endif
145157
endif
146158
else
147-
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
159+
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
148160
ifneq ($(F_COMPILER), NAG)
149161
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
150162
endif
151163
endif
152164
else
153-
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
165+
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
154166
ifneq ($(F_COMPILER), NAG)
155167
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
156168
endif

Makefile.csky

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ifeq ($(CORE), CK860FV)
2+
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
3+
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
4+
endif

Makefile.prebuild

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,26 @@ ifeq ($(TARGET), C910V)
5555
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
5656
endif
5757

58+
ifeq ($(TARGET), CK860FV)
59+
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
60+
endif
61+
62+
ifeq ($(TARGET), x280)
63+
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
64+
endif
65+
66+
ifeq ($(TARGET), RISCV64_ZVL256B)
67+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
68+
endif
69+
70+
ifeq ($(TARGET), RISCV64_ZVL128B)
71+
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
72+
endif
73+
74+
ifeq ($(TARGET), RISCV64_GENERIC)
75+
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
76+
endif
77+
5878
all: getarch_2nd
5979
./getarch_2nd 0 >> $(TARGET_MAKE)
6080
./getarch_2nd 1 >> $(TARGET_CONF)

Makefile.riscv64

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,19 @@ ifeq ($(CORE), C910V)
22
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
33
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
44
endif
5+
ifeq ($(CORE), x280)
6+
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
7+
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
8+
endif
9+
ifeq ($(CORE), RISCV64_ZVL256B)
10+
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
11+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
12+
endif
13+
ifeq ($(CORE), RISCV64_ZVL128B)
14+
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
15+
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
16+
endif
17+
ifeq ($(CORE), RISCV64_GENERIC)
18+
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
19+
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
20+
endif

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.25.dev
6+
VERSION = 0.3.26.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

0 commit comments

Comments
 (0)