Skip to content

Commit 93747fb

Browse files
author
Chip-Kerchner
committed
Merge remote-tracking branch 'origin/develop' into power10Copies
2 parents 4e738e5 + d9f1478 commit 93747fb

File tree

177 files changed

+53536
-1372
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+53536
-1372
lines changed

.cirrus.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ task:
2929
- mkdir build
3030
- cd build
3131
- cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
32-
- make
32+
- make -j 4
3333

3434
task:
3535
name: AppleM1/GCC/MAKE/OPENMP

CMakeLists.txt

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,21 +249,22 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|Drago
249249
endif()
250250
endif()
251251

252-
if (APPLE AND DYNAMIC_ARCH AND BUILD_SHARED_LIBS)
252+
# Seems that this hack doesn't required since macOS 11 Big Sur
253+
if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
253254
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
254255
if (NOT NOFORTRAN)
255256
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
256257
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
257-
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
258-
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
258+
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
259+
"sh -c '${CMAKE_AR} -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
259260
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
260-
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
261+
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -undefined dynamic_lookup -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
261262
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
262263
else ()
263264
set (CMAKE_C_CREATE_SHARED_LIBRARY
264-
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
265-
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
266-
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
265+
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
266+
"sh -c '${CMAKE_AR} -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
267+
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -undefined dynamic_lookup -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
267268
endif ()
268269
endif()
269270

@@ -541,7 +542,7 @@ if(NOT NO_LAPACKE)
541542
ADD_CUSTOM_TARGET(genlapacke
542543
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
543544
)
544-
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
545+
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
545546
endif()
546547

547548
# Install pkg-config files

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,6 @@ In chronological order:
216216

217217
* Pablo Romero <https://github.com/pablorcum>
218218
* [2022-08] Fix building from sources for QNX
219+
220+
* Mark Seminatore <https://github.com/mseminatore>
221+
* [2023-11-09] Improve Windows threading performance scaling

GotoBLAS_06WeirdPerformance.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
operation is finished.
1212

1313

14-
2. Simlar problem may happen under virtual machine. If supervisor
14+
2. Similar problem may happen under virtual machine. If supervisor
1515
allocates different cores for each scheduling, BLAS performnace
1616
will be bad. This is because BLAS also utilizes all cache,
1717
unexpected re-schedule for different core may result of heavy

Makefile.power

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,19 @@ endif
1111

1212
ifeq ($(CORE), POWER10)
1313
ifneq ($(C_COMPILER), PGI)
14+
ifeq ($(C_COMPILER), GCC)
15+
ifeq ($(GCCVERSIONGTEQ10), 1)
1416
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
17+
else ifneq ($(GCCVERSIONGT4), 1)
18+
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
19+
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
20+
else
21+
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
22+
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
23+
endif
24+
else
25+
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
26+
endif
1527
ifeq ($(F_COMPILER), IBM)
1628
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
1729
else

Makefile.system

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/vers
407407
endif
408408
ifeq (x$(XCVER), x 15)
409409
CCOMMON_OPT += -Wl,-ld_classic
410+
FCOMMON_OPT += -Wl,-ld_classic
410411
endif
411412
endif
412413

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
202202

203203
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
204204

205-
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
205+
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
206206

207207
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
208208
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.

benchmark/trsv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ int main(int argc, char *argv[]){
127127
long long muls = n*(n+1)/2.0;
128128
long long adds = (n - 1.0)*n/2.0;
129129

130-
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
130+
fprintf(stderr, "%10d : %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
131131
if(a != NULL){
132132
free(a);
133133
}

c_check

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,7 @@ if [ "$architecture" = "loongarch64" ]; then
199199
tmpd="$(mktemp -d)"
200200
tmplsx="$tmpd/lsx.c"
201201
codelsx='"vadd.b $vr0, $vr0, $vr0"'
202-
lsx_flags='-march=loongarch64 -mlsx'
203-
printf "#include <lsxintrin.h>\n\n" >> "$tmplsx"
202+
lsx_flags='-march=loongarch64'
204203
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx"
205204
args="$lsx_flags -o $tmplsx.o $tmplsx"
206205
{
@@ -211,8 +210,7 @@ if [ "$architecture" = "loongarch64" ]; then
211210

212211
tmplasx="$tmpd/lasx.c"
213212
codelasx='"xvadd.b $xr0, $xr0, $xr0"'
214-
lasx_flags='-march=loongarch64 -mlasx'
215-
printf "#include <lasxintrin.h>\n\n" >> "$tmplasx"
213+
lasx_flags='-march=loongarch64'
216214
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx"
217215
args="$lasx_flags -o $tmplasx.o $tmplasx"
218216
{

c_check.pl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@
241241
} else {
242242
$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
243243
$codelsx = '"vadd.b $vr0, $vr0, $vr0"';
244-
$lsx_flags = "-march=loongarch64 -mlsx";
245-
print $tmplsx "#include <lsxintrin.h>\n\n";
244+
$lsx_flags = "-march=loongarch64";
246245
print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n";
247246

248247
$args = "$lsx_flags -o $tmplsx.o $tmplsx";
@@ -257,8 +256,7 @@
257256

258257
$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
259258
$codelasx = '"xvadd.b $xr0, $xr0, $xr0"';
260-
$lasx_flags = "-march=loongarch64 -mlasx";
261-
print $tmplasx "#include <lasxintrin.h>\n\n";
259+
$lasx_flags = "-march=loongarch64";
262260
print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n";
263261

264262
$args = "$lasx_flags -o $tmplasx.o $tmplasx";

0 commit comments

Comments
 (0)