Skip to content

Commit c125866

Browse files
authored
Merge branch 'OpenMathLib:develop' into m3m_exprec
2 parents d04686a + 36b0fb3 commit c125866

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+1125
-264
lines changed

.github/workflows/dynamic_arch.yml

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ jobs:
158158
strategy:
159159
fail-fast: false
160160
matrix:
161-
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
161+
msystem: [UCRT64, MINGW32, CLANG64]
162162
idx: [int32, int64]
163163
build-type: [Release]
164164
include:
@@ -174,14 +174,6 @@ jobs:
174174
idx: int32
175175
target-prefix: mingw-w64-clang-x86_64
176176
fc-pkg: fc
177-
# Compiling with Flang 16 seems to cause test errors on machines
178-
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
179-
no-avx512-flags: -DNO_AVX512=1
180-
- msystem: CLANG32
181-
idx: int32
182-
target-prefix: mingw-w64-clang-i686
183-
fc-pkg: cc
184-
c-lapack-flags: -DC_LAPACK=ON
185177
- msystem: UCRT64
186178
idx: int64
187179
idx64-flags: -DBINARY=64 -DINTERFACE64=1
@@ -192,9 +184,6 @@ jobs:
192184
idx64-flags: -DBINARY=64 -DINTERFACE64=1
193185
target-prefix: mingw-w64-clang-x86_64
194186
fc-pkg: fc
195-
# Compiling with Flang 16 seems to cause test errors on machines
196-
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
197-
no-avx512-flags: -DNO_AVX512=1
198187
- msystem: UCRT64
199188
idx: int32
200189
target-prefix: mingw-w64-ucrt-x86_64
@@ -203,8 +192,6 @@ jobs:
203192
exclude:
204193
- msystem: MINGW32
205194
idx: int64
206-
- msystem: CLANG32
207-
idx: int64
208195

209196
defaults:
210197
run:
@@ -280,8 +267,6 @@ jobs:
280267
-DNUM_THREADS=64 \
281268
-DTARGET=CORE2 \
282269
${{ matrix.idx64-flags }} \
283-
${{ matrix.c-lapack-flags }} \
284-
${{ matrix.no-avx512-flags }} \
285270
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
286271
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
287272
..

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
cmake_minimum_required(VERSION 3.16.0)
66

7+
set (CMAKE_ASM_SOURCE_FILE_EXTENSIONS "S")
78
project(OpenBLAS C ASM)
89

910
set(OpenBLAS_MAJOR_VERSION 0)

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,6 @@ In chronological order:
229229

230230
* Christopher Daley <https://github.com/cdaley>
231231
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems
232+
233+
* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
234+
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

Makefile.arm64

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,4 +351,31 @@ endif
351351

352352
endif
353353

354+
else
355+
# NVIDIA HPC options necessary to enable SVE in the compiler
356+
ifeq ($(CORE), THUNDERX2T99)
357+
CCOMMON_OPT += -tp=thunderx2t99
358+
FCOMMON_OPT += -tp=thunderx2t99
359+
endif
360+
ifeq ($(CORE), NEOVERSEN1)
361+
CCOMMON_OPT += -tp=neoverse-n1
362+
FCOMMON_OPT += -tp=neoverse-n1
363+
endif
364+
ifeq ($(CORE), NEOVERSEV1)
365+
CCOMMON_OPT += -tp=neoverse-v1
366+
FCOMMON_OPT += -tp=neoverse-v1
367+
endif
368+
ifeq ($(CORE), NEOVERSEV2)
369+
CCOMMON_OPT += -tp=neoverse-v2
370+
FCOMMON_OPT += -tp=neoverse-v2
371+
endif
372+
ifeq ($(CORE), ARMV8SVE)
373+
CCOMMON_OPT += -tp=neoverse-v2
374+
FCOMMON_OPT += -tp=neoverse-v2
375+
endif
376+
ifeq ($(CORE), ARMV9SVE)
377+
CCOMMON_OPT += -tp=neoverse-v2
378+
FCOMMON_OPT += -tp=neoverse-v2
379+
endif
380+
354381
endif

Makefile.install

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,22 +191,29 @@ endif
191191
#Generating OpenBLASConfig.cmake
192192
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
193193
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
194-
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
194+
@echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
195+
@echo "SET(OpenBLAS_INCLUDE_DIRS \$${_OpenBLAS_ROOT_DIR}/include)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
195196

196197
ifneq ($(NO_SHARED),1)
197198
#ifeq logical or
198199
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
199-
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
200+
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
200201
endif
201202
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
202-
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
203+
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/bin/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
203204
endif
204205
ifeq ($(OSNAME), Darwin)
205-
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
206+
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
207+
endif
208+
@echo "add_library(OpenBLAS::OpenBLAS SHARED IMPORTED)"
209+
@echo "target_include_directories(OpenBLAS::OpenBLAS INTERFACE \$${OpenBLAS_INCLUDE_DIRS})"
210+
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
211+
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_LOCATION \$${OpenBLAS_LIBRARIES})"
212+
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_IMPLIB \$${_OpenBLAS_ROOT_DIR}/lib/libopenblas.lib)"
206213
endif
207214
else
208215
#only static
209-
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
216+
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
210217
endif
211218
#Generating OpenBLASConfigVersion.cmake
212219
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)

Makefile.system

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,13 @@ NO_AFFINITY = 1
16131613
endif
16141614
endif
16151615

1616+
ifeq ($(ARCH), POWER)
1617+
ifeq ($(DEBUG), 1)
1618+
CCOMMON_OPT := $(filter-out -O%, $(CCOMMON_OPT)) -O0
1619+
FCOMMON_OPT := $(filter-out -O%, $(FCOMMON_OPT)) -O0
1620+
endif
1621+
endif
1622+
16161623
ifdef NO_AFFINITY
16171624
ifeq ($(NO_AFFINITY), 0)
16181625
override undefine NO_AFFINITY

README.md

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,36 +15,44 @@ OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=O
1515

1616
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.
1717

18-
Please read the documentation in the OpenBLAS folder: <https://github.com/OpenMathLib/OpenBLAS/docs>.
18+
For more information about OpenBLAS, please see:
19+
20+
- The documentation at [openmathlib.org/OpenBLAS/docs/](http://www.openmathlib.org/OpenBLAS/docs),
21+
- The home page at [openmathlib.org/OpenBLAS/](http://www.openmathlib.org/OpenBLAS).
1922

2023
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
2124
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
22-
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
25+
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare [here](https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/) or YouTube [here](https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek) may be helpful.
2326

2427
## Binary Packages
2528

2629
We provide official binary packages for the following platform:
2730

2831
* Windows x86/x86_64
2932

30-
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/OpenMathLib/OpenBLAS/releases](https://github.com/OpenMathLib/OpenBLAS/releases).
33+
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the [Releases section of the GitHub project page](https://github.com/OpenMathLib/OpenBLAS/releases).
34+
35+
OpenBLAS is also packaged for many package managers - see [the installation section of the docs](http://www.openmathlib.org/OpenBLAS/docs/install/) for details.
3136

3237
## Installation from Source
3338

34-
Download from project homepage, https://github.com/OpenMathLib/OpenBLAS/, or check out the code
35-
using Git from https://github.com/OpenMathLib/OpenBLAS.git. (If you want the most up to date version, be
36-
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
37-
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
38-
Most can also be given directly on the make or cmake command line.
39+
Obtain the source code from https://github.com/OpenMathLib/OpenBLAS/. Note that the default branch
40+
is `develop` (a `master` branch is still present, but far out of date).
41+
42+
Build-time parameters can be chosen in `Makefile.rule`, see there for a short description of each option.
43+
Most options can also be given directly on the command line as parameters to your `make` or `cmake` invocation.
3944

4045
### Dependencies
4146

4247
Building OpenBLAS requires the following to be installed:
4348

4449
* GNU Make or CMake
45-
* A C compiler, e.g. GCC or Clang
50+
* A C compiler, e.g. GCC or Clang
4651
* A Fortran compiler (optional, for LAPACK)
4752

53+
In general, using a recent version of the compiler is strongly recommended.
54+
If a Fortran compiler is not available, it is possible to compile an older version of the included LAPACK
55+
that has been machine-translated to C.
4856

4957
### Normal compile
5058

@@ -60,6 +68,9 @@ For building with `cmake`, the usual conventions apply, i.e. create a build dire
6068
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
6169
build options you plan to set.
6270

71+
For more details, see the [Building from source](http://www.openmathlib.org/OpenBLAS/docs/install/#building-from-source)
72+
section in the docs.
73+
6374
### Cross compile
6475

6576
Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler.
@@ -76,10 +87,12 @@ Examples:
7687
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc
7788
```
7889

79-
You can find instructions for other cases both in the "Supported Systems" section below and in the docs folder. The .yml scripts included with the sources (which contain the
90+
You can find instructions for other cases both in the "Supported Systems" section below and in
91+
the [Building from source docs](http://www.openmathlib.org/OpenBLAS/docs/install).
92+
The `.yml` scripts included with the sources (which contain the
8093
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints.
8194

82-
When compiling for a more modern CPU TARGET of the same architecture, e.g. TARGET=SKYLAKEX on a HASWELL host, option "CROSS=1" can be used to suppress the automatic invocation of the tests at the end of the build.
95+
When compiling for a more modern CPU target of the same architecture, e.g. `TARGET=SKYLAKEX` on a `HASWELL` host, option `CROSS=1` can be used to suppress the automatic invocation of the tests at the end of the build.
8396

8497
### Debug version
8598

@@ -325,11 +338,14 @@ Please see Changelog.txt.
325338
326339
## Troubleshooting
327340
328-
* Please read the [FAQ](https://github.com/OpenMathLib/OpenBLAS/docs/faq,md) in the docs folder first.
341+
* Please read the [FAQ](http://www.openmathlib.org/OpenBLAS/docs/faq) section of the docs first.
329342
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
330343
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
331344
Clang 3.0 will generate the wrong AVX binary code.
332-
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
345+
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake/CooperLake AVX512 kernels
346+
* Please use LLVM version 18 and above (version 19 and above on Windows) if you plan to use
347+
its new flang compiler for Fortran
348+
* Please use GCC version 11 and above to compile OpenBLAS on the POWER architecture
333349
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`),
334350
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
335351
the library with `BIGNUMA=1`.
@@ -350,4 +366,4 @@ Please see Changelog.txt.
350366
351367
## Donation
352368
353-
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
369+
Please see [the donations section](http://www.openmathlib.org/OpenBLAS/docs/about/#donations) in the docs.

benchmark/pybench/README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,17 @@ have all what it takes to build OpenBLAS from source, plus `python` and
4343
$ python -mpip install numpy meson ninja pytest pytest-benchmark
4444
```
4545

46-
The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/test_blas.py`.
46+
The Meson build system looks for the installed OpenBLAS using pkgconfig, so the openblas.pc created during the OpenBLAS build needs
47+
to be somewhere on the search path of pkgconfig or in a folder pointed to by the environment variable PKG_CONFIG_PATH.
48+
49+
If you want to build the benchmark suite using flang (or flang-new) instead of gfortran for the Fortran parts, you currently need
50+
to edit the meson.build file and change the line `'fortran_std=legacy'` to `'fortran_std=none'` to work around an incompatibility
51+
between Meson and flang.
52+
53+
If you are building and running the benchmark under MS Windows, it may be necessary to copy the generated openblas_wrap module from
54+
your build folder to the `benchmarks` folder.
55+
56+
The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/bench_blas.py`.
4757

4858
An ASV compatible benchmark suite is planned but currently not implemented.
4959

c_check

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ hostarch=`uname -m | sed -e 's/i.86/x86/'`
66
if [ "$hostos" = "AIX" ] || [ "$hostos" = "SunOS" ]; then
77
hostarch=`uname -p`
88
fi
9+
if [ "$hostarch" = "evbarm" ]; then
10+
hostarch=`uname -p`
11+
fi
912
case "$hostarch" in
1013
amd64) hostarch=x86_64 ;;
1114
arm*) [ "$hostarch" = "arm64" ] || hostarch='arm' ;;

cmake/f_check.cmake

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,15 @@ if (NOT ONLY_CBLAS)
4545

4646
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
4747
# TODO: set FEXTRALIB flags a la f_check?
48-
48+
if (NOT (${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND x${CMAKE_Fortran_COMPILER_ID} MATCHES "IntelLLVM"))
4949
set(BU "_")
5050
file(APPEND ${TARGET_CONF_TEMP}
5151
"#define BUNDERSCORE _\n"
5252
"#define NEEDBUNDERSCORE 1\n"
5353
"#define NEED2UNDERSCORES 0\n")
54-
54+
else ()
55+
set (FCOMMON_OPT "${FCOMMON_OPT} /fp:precise /recursive /names:lowercase /assume:nounderscore")
56+
endif()
5557
else ()
5658

5759
#When we only build CBLAS, we set NOFORTRAN=2

0 commit comments

Comments
 (0)