Skip to content

Commit 42909ce

Browse files
authored
Merge branch 'xianyi:develop' into issue4130
2 parents a2a1845 + 12d8f21 commit 42909ce

File tree

211 files changed

+9139
-3330
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

211 files changed

+9139
-3330
lines changed

.cirrus.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ task:
3030
- cd build
3131
- cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
3232
- make
33+
34+
task:
35+
name: AppleM1/GCC/MAKE/OPENMP
36+
compile_script:
37+
- brew install gcc@11
38+
- export PATH=/opt/homebrew/bin:$PATH
39+
- export LDFLAGS="-L/opt/homebrew/lib"
40+
- export CPPFLAGS="-I/opt/homebrew/include"
41+
- make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
3342

3443
macos_instance:
3544
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest

.github/workflows/dynamic_arch.yml

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,40 +151,53 @@ jobs:
151151
strategy:
152152
fail-fast: false
153153
matrix:
154-
msystem: [MINGW64, MINGW32, CLANG64]
154+
msystem: [MINGW64, MINGW32, CLANG64, CLANG32]
155155
idx: [int32, int64]
156156
build-type: [Release]
157157
include:
158158
- msystem: MINGW64
159159
idx: int32
160160
target-prefix: mingw-w64-x86_64
161-
fc-pkg: mingw-w64-x86_64-gcc-fortran
161+
fc-pkg: fc
162162
- msystem: MINGW32
163163
idx: int32
164164
target-prefix: mingw-w64-i686
165-
fc-pkg: mingw-w64-i686-gcc-fortran
165+
fc-pkg: fc
166166
- msystem: CLANG64
167167
idx: int32
168168
target-prefix: mingw-w64-clang-x86_64
169+
fc-pkg: fc
170+
# Compiling with Flang 16 seems to cause test errors on machines
171+
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
172+
no-avx512-flags: -DNO_AVX512=1
173+
- msystem: CLANG32
174+
idx: int32
175+
target-prefix: mingw-w64-clang-i686
176+
fc-pkg: cc
169177
c-lapack-flags: -DC_LAPACK=ON
170178
- msystem: MINGW64
171179
idx: int64
172180
idx64-flags: -DBINARY=64 -DINTERFACE64=1
173181
target-prefix: mingw-w64-x86_64
174-
fc-pkg: mingw-w64-x86_64-gcc-fortran
182+
fc-pkg: fc
175183
- msystem: CLANG64
176184
idx: int64
177185
idx64-flags: -DBINARY=64 -DINTERFACE64=1
178186
target-prefix: mingw-w64-clang-x86_64
179-
c-lapack-flags: -DC_LAPACK=ON
187+
fc-pkg: fc
188+
# Compiling with Flang 16 seems to cause test errors on machines
189+
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
190+
no-avx512-flags: -DNO_AVX512=1
180191
- msystem: MINGW64
181192
idx: int32
182193
target-prefix: mingw-w64-x86_64
183-
fc-pkg: mingw-w64-x86_64-gcc-fortran
194+
fc-pkg: fc
184195
build-type: None
185196
exclude:
186197
- msystem: MINGW32
187198
idx: int64
199+
- msystem: CLANG32
200+
idx: int64
188201

189202
defaults:
190203
run:
@@ -209,7 +222,7 @@ jobs:
209222
install: >-
210223
base-devel
211224
${{ matrix.target-prefix }}-cc
212-
${{ matrix.fc-pkg }}
225+
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
213226
${{ matrix.target-prefix }}-cmake
214227
${{ matrix.target-prefix }}-ninja
215228
${{ matrix.target-prefix }}-ccache
@@ -261,6 +274,7 @@ jobs:
261274
-DTARGET=CORE2 \
262275
${{ matrix.idx64-flags }} \
263276
${{ matrix.c-lapack-flags }} \
277+
${{ matrix.no-avx512-flags }} \
264278
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
265279
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
266280
..
@@ -280,9 +294,22 @@ jobs:
280294
key: ${{ steps.ccache-prepare.outputs.key }}
281295

282296
- name: Run tests
297+
id: run-ctest
283298
timeout-minutes: 60
284299
run: cd build && ctest
285300

301+
- name: Re-run tests
302+
if: always() && (steps.run-ctest.outcome == 'failure')
303+
timeout-minutes: 60
304+
run: |
305+
cd build
306+
echo "::group::Re-run ctest"
307+
ctest --rerun-failed --output-on-failure || true
308+
echo "::endgroup::"
309+
echo "::group::Log from these tests"
310+
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
311+
echo "::endgroup::"
312+
286313
287314
cross_build:
288315
runs-on: ubuntu-22.04

.github/workflows/loongarch64.yml

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
name: loongarch64 qemu test
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
TEST:
7+
runs-on: ubuntu-latest
8+
strategy:
9+
fail-fast: false
10+
matrix:
11+
include:
12+
- target: LOONGSONGENERIC
13+
triple: loongarch64-unknown-linux-gnu
14+
opts: NO_SHARED=1 TARGET=LOONGSONGENERIC
15+
- target: LOONGSON3R5
16+
triple: loongarch64-unknown-linux-gnu
17+
opts: NO_SHARED=1 TARGET=LOONGSON3R5
18+
- target: LOONGSON2K1000
19+
triple: loongarch64-unknown-linux-gnu
20+
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
21+
22+
steps:
23+
- name: Checkout repository
24+
uses: actions/checkout@v3
25+
26+
- name: Install APT deps
27+
run: |
28+
sudo add-apt-repository ppa:savoury1/virtualisation
29+
sudo apt-get update
30+
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
31+
qemu-user-static
32+
33+
- name: Download and install loongarch64-toolchain
34+
run: |
35+
wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
36+
tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
37+
38+
- name: Set env
39+
run: |
40+
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
41+
echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV
42+
43+
- name: Compilation cache
44+
uses: actions/cache@v3
45+
with:
46+
path: ~/.ccache
47+
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
48+
restore-keys: |
49+
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
50+
ccache-${{ runner.os }}-${{ matrix.target }}
51+
52+
- name: Configure ccache
53+
run: |
54+
test -d ~/.ccache || mkdir -p ~/.ccache
55+
echo "max_size = 300M" > ~/.ccache/ccache.conf
56+
echo "compression = true" >> ~/.ccache/ccache.conf
57+
ccache -s
58+
59+
- name: Disable utest dsdot:dsdot_n_1
60+
run: |
61+
echo -n > utest/test_dsdot.c
62+
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
63+
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
64+
65+
- name: Build OpenBLAS
66+
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
67+
68+
- name: Test
69+
run: |
70+
qemu-loongarch64-static ./utest/openblas_utest
71+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
72+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
73+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
74+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
75+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
76+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
77+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
78+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
79+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
80+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
81+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
82+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
83+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
84+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
85+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
86+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
87+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
88+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
89+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
90+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
91+
rm -f ./test/?BLAT2.SUMM
92+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
93+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
94+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
95+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
96+
rm -f ./test/?BLAT2.SUMM
97+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
98+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
99+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
100+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
101+
rm -f ./test/?BLAT3.SUMM
102+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
103+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
104+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
105+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
106+
rm -f ./test/?BLAT3.SUMM
107+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
108+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
109+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
110+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ test/SBLAT3.SUMM
7272
test/ZBLAT2.SUMM
7373
test/ZBLAT3.SUMM
7474
test/SHBLAT3.SUMM
75+
test/SBBLAT3.SUMM
7576
test/cblat1
7677
test/cblat2
7778
test/cblat3
@@ -82,6 +83,7 @@ test/sblat1
8283
test/sblat2
8384
test/sblat3
8485
test/test_shgemm
86+
test/test_sbgemm
8587
test/zblat1
8688
test/zblat2
8789
test/zblat3

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pipeline {
77
stages {
88
stage('Build') {
99
steps {
10-
sh 'make'
10+
sh 'make clean && make'
1111
}
1212
}
1313
}

Jenkinsfile.pwr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ pipeline {
99
steps {
1010
sh 'sudo apt update'
1111
sh 'sudo apt install gfortran -y'
12-
sh 'make'
12+
sh 'make clean && make'
1313
}
1414
}
1515
}

Makefile.system

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,11 @@ GCCMINORVERSIONGTEQ4 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d
384384
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
385385
endif
386386

387+
ifeq ($(C_COMPILER), CLANG)
388+
CLANGVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
389+
CLANGVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
390+
endif
391+
387392
#
388393
# OS dependent settings
389394
#
@@ -668,6 +673,7 @@ DYNAMIC_CORE += NEOVERSEN1
668673
ifneq ($(NO_SVE), 1)
669674
DYNAMIC_CORE += NEOVERSEV1
670675
DYNAMIC_CORE += NEOVERSEN2
676+
DYNAMIC_CORE += ARMV8SVE
671677
endif
672678
DYNAMIC_CORE += CORTEXA55
673679
DYNAMIC_CORE += FALKOR
@@ -1086,8 +1092,9 @@ endif
10861092
endif
10871093
endif
10881094

1089-
ifeq ($(F_COMPILER), GFORTRAN)
1095+
ifeq ($(F_COMPILER), $(filter $(F_COMPILER),GFORTRAN FLANGNEW))
10901096
CCOMMON_OPT += -DF_INTERFACE_GFORT
1097+
ifeq ($(F_COMPILER), GFORTRAN)
10911098
FCOMMON_OPT += -Wall
10921099
# make single-threaded LAPACK calls thread-safe #1847
10931100
FCOMMON_OPT += -frecursive
@@ -1101,6 +1108,7 @@ EXTRALIB += -lgfortran
11011108
endif
11021109
endif
11031110
endif
1111+
endif
11041112
ifdef NO_BINARY_MODE
11051113
ifeq ($(ARCH), $(filter $(ARCH),mips64))
11061114
ifdef BINARY64
@@ -1767,6 +1775,8 @@ export TARGET_CORE
17671775
export NO_AVX512
17681776
export NO_AVX2
17691777
export BUILD_BFLOAT16
1778+
export NO_LSX
1779+
export NO_LASX
17701780

17711781
export SBGEMM_UNROLL_M
17721782
export SBGEMM_UNROLL_N

Makefile.x86_64

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,31 @@ endif
7575
ifeq ($(CORE), COOPERLAKE)
7676
ifndef NO_AVX512
7777
ifeq ($(C_COMPILER), GCC)
78-
# cooperlake support was added in 10.1
79-
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
80-
CCOMMON_OPT += -march=cooperlake
81-
ifneq ($(F_COMPILER), NAG)
82-
FCOMMON_OPT += -march=cooperlake
83-
endif
84-
else # gcc not support, fallback to avx512
85-
CCOMMON_OPT += -march=skylake-avx512
86-
ifneq ($(F_COMPILER), NAG)
87-
FCOMMON_OPT += -march=skylake-avx512
88-
endif
89-
endif
78+
# cooperlake support was added in 10.1
79+
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
80+
CCOMMON_OPT += -march=cooperlake
81+
ifneq ($(F_COMPILER), NAG)
82+
FCOMMON_OPT += -march=cooperlake
83+
endif
84+
else # gcc not support, fallback to avx512
85+
CCOMMON_OPT += -march=skylake-avx512
86+
ifneq ($(F_COMPILER), NAG)
87+
FCOMMON_OPT += -march=skylake-avx512
88+
endif
89+
endif
90+
else ifeq ($(C_COMPILER), CLANG)
91+
# cooperlake support was added in clang 9
92+
ifeq ($(CLANGVERSIONGTEQ9), 1)
93+
CCOMMON_OPT += -march=cooperlake
94+
ifneq ($(F_COMPILER), NAG)
95+
FCOMMON_OPT += -march=cooperlake
96+
endif
97+
else # not supported in clang, fallback to avx512
98+
CCOMMON_OPT += -march=skylake-avx512
99+
ifneq ($(F_COMPILER), NAG)
100+
FCOMMON_OPT += -march=skylake-avx512
101+
endif
102+
endif
90103
endif
91104
ifeq ($(OSNAME), CYGWIN_NT)
92105
CCOMMON_OPT += -fno-asynchronous-unwind-tables
@@ -104,18 +117,31 @@ endif
104117
ifeq ($(CORE), SAPPHIRERAPIDS)
105118
ifndef NO_AVX512
106119
ifeq ($(C_COMPILER), GCC)
107-
# sapphire rapids support was added in 11
108-
ifeq ($(GCCVERSIONGTEQ11), 1)
109-
CCOMMON_OPT += -march=sapphirerapids
110-
ifneq ($(F_COMPILER), NAG)
111-
FCOMMON_OPT += -march=sapphirerapids
112-
endif
113-
else # gcc not support, fallback to avx512
114-
CCOMMON_OPT += -march=skylake-avx512
115-
ifneq ($(F_COMPILER), NAG)
116-
FCOMMON_OPT += -march=skylake-avx512
117-
endif
118-
endif
120+
# sapphire rapids support was added in 11
121+
ifeq ($(GCCVERSIONGTEQ11), 1)
122+
CCOMMON_OPT += -march=sapphirerapids
123+
ifneq ($(F_COMPILER), NAG)
124+
FCOMMON_OPT += -march=sapphirerapids
125+
endif
126+
else # gcc not support, fallback to avx512
127+
CCOMMON_OPT += -march=skylake-avx512
128+
ifneq ($(F_COMPILER), NAG)
129+
FCOMMON_OPT += -march=skylake-avx512
130+
endif
131+
endif
132+
else ifeq ($(C_COMPILER), CLANG)
133+
# cooperlake support was added in clang 12
134+
ifeq ($(CLANGVERSIONGTEQ12), 1)
135+
CCOMMON_OPT += -march=cooperlake
136+
ifneq ($(F_COMPILER), NAG)
137+
FCOMMON_OPT += -march=cooperlake
138+
endif
139+
else # not supported in clang, fallback to avx512
140+
CCOMMON_OPT += -march=skylake-avx512
141+
ifneq ($(F_COMPILER), NAG)
142+
FCOMMON_OPT += -march=skylake-avx512
143+
endif
144+
endif
119145
endif
120146
ifeq ($(OSNAME), CYGWIN_NT)
121147
CCOMMON_OPT += -fno-asynchronous-unwind-tables

0 commit comments

Comments
 (0)