Skip to content

Commit ef4a7e3

Browse files
authored
Merge pull request #4127 from XiWeiGu/LoongArch64-CI
LoongArch64 CI
2 parents b63e458 + 96bf226 commit ef4a7e3

File tree

7 files changed

+208
-14
lines changed

7 files changed

+208
-14
lines changed

.github/workflows/loongarch64.yml

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
name: loongarch64 qemu test
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
TEST:
7+
runs-on: ubuntu-latest
8+
strategy:
9+
fail-fast: false
10+
matrix:
11+
include:
12+
- target: LOONGSONGENERIC
13+
triple: loongarch64-unknown-linux-gnu
14+
opts: NO_SHARED=1 TARGET=LOONGSONGENERIC
15+
- target: LOONGSON3R5
16+
triple: loongarch64-unknown-linux-gnu
17+
opts: NO_SHARED=1 TARGET=LOONGSON3R5
18+
- target: LOONGSON2K1000
19+
triple: loongarch64-unknown-linux-gnu
20+
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
21+
22+
steps:
23+
- name: Checkout repository
24+
uses: actions/checkout@v3
25+
26+
- name: Install APT deps
27+
run: |
28+
sudo add-apt-repository ppa:savoury1/virtualisation
29+
sudo apt-get update
30+
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
31+
qemu-user-static
32+
33+
- name: Download and install loongarch64-toolchain
34+
run: |
35+
wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
36+
tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
37+
38+
- name: Set env
39+
run: |
40+
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
41+
echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV
42+
43+
- name: Compilation cache
44+
uses: actions/cache@v3
45+
with:
46+
path: ~/.ccache
47+
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
48+
restore-keys: |
49+
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
50+
ccache-${{ runner.os }}-${{ matrix.target }}
51+
52+
- name: Configure ccache
53+
run: |
54+
test -d ~/.ccache || mkdir -p ~/.ccache
55+
echo "max_size = 300M" > ~/.ccache/ccache.conf
56+
echo "compression = true" >> ~/.ccache/ccache.conf
57+
ccache -s
58+
59+
- name: Disable utest dsdot:dsdot_n_1
60+
run: |
61+
echo -n > utest/test_dsdot.c
62+
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
63+
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
64+
65+
- name: Build OpenBLAS
66+
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
67+
68+
- name: Test
69+
run: |
70+
qemu-loongarch64-static ./utest/openblas_utest
71+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
72+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
73+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
74+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
75+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
76+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
77+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
78+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
79+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
80+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
81+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
82+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
83+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
84+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
85+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
86+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
87+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
88+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
89+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
90+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
91+
rm -f ./test/?BLAT2.SUMM
92+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
93+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
94+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
95+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
96+
rm -f ./test/?BLAT2.SUMM
97+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
98+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
99+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
100+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
101+
rm -f ./test/?BLAT3.SUMM
102+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
103+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
104+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
105+
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
106+
rm -f ./test/?BLAT3.SUMM
107+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
108+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
109+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
110+
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,8 @@ export TARGET_CORE
17701770
export NO_AVX512
17711771
export NO_AVX2
17721772
export BUILD_BFLOAT16
1773+
export NO_LSX
1774+
export NO_LASX
17731775

17741776
export SBGEMM_UNROLL_M
17751777
export SBGEMM_UNROLL_N

c_check

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,37 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
185185
rm -rf "$tmpd"
186186
fi
187187

188+
no_lsx=0
189+
no_lasx=0
190+
if [ "$architecture" = "loongarch64" ]; then
191+
tmpd="$(mktemp -d)"
192+
tmplsx="$tmpd/lsx.c"
193+
codelsx='"vadd.b $vr0, $vr0, $vr0"'
194+
lsx_flags='-march=loongarch64 -mlsx'
195+
printf "#include <lsxintrin.h>\n\n" >> "$tmplsx"
196+
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx"
197+
args="$lsx_flags -o $tmplsx.o $tmplsx"
198+
{
199+
$compiler_name $flags $args >/dev/null 2>&1
200+
} || {
201+
no_lsx=1
202+
}
203+
204+
tmplasx="$tmpd/lasx.c"
205+
codelasx='"xvadd.b $xr0, $xr0, $xr0"'
206+
lasx_flags='-march=loongarch64 -mlasx'
207+
printf "#include <lasxintrin.h>\n\n" >> "$tmplasx"
208+
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx"
209+
args="$lasx_flags -o $tmplasx.o $tmplasx"
210+
{
211+
$compiler_name $flags $args >/dev/null 2>&1
212+
} || {
213+
no_lasx=1
214+
}
215+
216+
rm -rf "$tmpd"
217+
fi
218+
188219
case "$data" in
189220
*ARCH_X86_64*) architecture=x86_64 ;;
190221
*ARCH_X86*) architecture=x86 ;;
@@ -399,6 +430,8 @@ done
399430
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
400431
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
401432
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
433+
[ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n"
434+
[ "$no_lasx" -eq 1 ] && printf "NO_LASX=1\n"
402435
} >> "$makefile"
403436

404437
os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ `
@@ -414,6 +447,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' `
414447
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu"
415448
[ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n"
416449
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n"
450+
[ "$no_lsx" -eq 1 ] && printf "#define NO_LSX\t1\n"
451+
[ "$no_lasx" -eq 1 ] && printf "#define NO_LASX\t1\n"
417452
} >> "$config"
418453

419454

c_check.pl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,47 @@
232232
}
233233
}
234234

235+
$no_lsx = 0;
236+
$no_lasx = 0;
237+
if (($architecture eq "loongarch64")) {
238+
eval "use File::Temp qw(tempfile)";
239+
if ($@){
240+
warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility";
241+
} else {
242+
$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
243+
$codelsx = '"vadd.b $vr0, $vr0, $vr0"';
244+
$lsx_flags = "-march=loongarch64 -mlsx";
245+
print $tmplsx "#include <lsxintrin.h>\n\n";
246+
print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n";
247+
248+
$args = "$lsx_flags -o $tmplsx.o $tmplsx";
249+
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
250+
system(@cmd) == 0;
251+
if ($? != 0) {
252+
$no_lsx = 1;
253+
} else {
254+
$no_lsx = 0;
255+
}
256+
unlink("$tmplsx.o");
257+
258+
$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
259+
$codelasx = '"xvadd.b $xr0, $xr0, $xr0"';
260+
$lasx_flags = "-march=loongarch64 -mlasx";
261+
print $tmplasx "#include <lasxintrin.h>\n\n";
262+
print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n";
263+
264+
$args = "$lasx_flags -o $tmplasx.o $tmplasx";
265+
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
266+
system(@cmd) == 0;
267+
if ($? != 0) {
268+
$no_lasx = 1;
269+
} else {
270+
$no_lasx = 0;
271+
}
272+
unlink("$tmplasx.o");
273+
}
274+
}
275+
235276
$architecture = x86 if ($data =~ /ARCH_X86/);
236277
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
237278
$architecture = e2k if ($data =~ /ARCH_E2K/);
@@ -424,6 +465,8 @@
424465
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
425466
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
426467
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;
468+
print MAKEFILE "NO_LSX=1\n" if $no_lsx eq 1;
469+
print MAKEFILE "NO_LASX=1\n" if $no_lasx eq 1;
427470

428471
$os =~ tr/[a-z]/[A-Z]/;
429472
$architecture =~ tr/[a-z]/[A-Z]/;
@@ -437,6 +480,8 @@
437480
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
438481
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
439482
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1;
483+
print CONFFILE "#define NO_LSX\t1\n" if $no_lsx eq 1;
484+
print CONFFILE "#define NO_LASX\t1\n" if $no_lasx eq 1;
440485

441486

442487
if ($os eq "LINUX") {

cpuid_loongarch64.c

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232
**********************************************************************************/
3333

3434
#include <stdint.h>
35+
#include <sys/auxv.h>
3536

3637
/* If LASX extension instructions supported,
3738
* using core LOONGSON3R5
@@ -46,9 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4647
#define CPU_LOONGSON3R5 1
4748
#define CPU_LOONGSON2K1000 2
4849

49-
#define LOONGARCH_CFG2 0x02
50-
#define LOONGARCH_LASX 1<<7
51-
#define LOONGARCH_LSX 1<<6
50+
#define LA_HWCAP_LSX (1<<4)
51+
#define LA_HWCAP_LASX (1<<5)
5252

5353
static char *cpuname[] = {
5454
"LOONGSONGENERIC",
@@ -64,17 +64,11 @@ static char *cpuname_lower[] = {
6464

6565
int detect(void) {
6666
#ifdef __linux
67-
uint32_t reg = 0;
67+
int flag = (int)getauxval(AT_HWCAP);
6868

69-
__asm__ volatile (
70-
"cpucfg %0, %1 \n\t"
71-
: "+&r"(reg)
72-
: "r"(LOONGARCH_CFG2)
73-
);
74-
75-
if (reg & LOONGARCH_LASX)
69+
if (flag & LA_HWCAP_LASX)
7670
return CPU_LOONGSON3R5;
77-
else if (reg & LOONGARCH_LSX)
71+
else if (flag & LA_HWCAP_LSX)
7872
return CPU_LOONGSON2K1000;
7973
else
8074
return CPU_GENERIC;

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
ifndef NO_LASX
12
DGEMMKERNEL = dgemm_kernel_16x4.S
23
DGEMMINCOPY = dgemm_ncopy_16.S
34
DGEMMITCOPY = dgemm_tcopy_16.S
@@ -7,6 +8,7 @@ DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
78
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
89
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
910
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
11+
endif
1012

1113
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
1214
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

param.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2845,15 +2845,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28452845
#define GEMM_DEFAULT_OFFSET_B 0
28462846
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
28472847

2848-
#define SGEMM_DEFAULT_UNROLL_N 8
2848+
#if defined(NO_LASX)
2849+
#define DGEMM_DEFAULT_UNROLL_N 8
2850+
#define DGEMM_DEFAULT_UNROLL_M 2
2851+
#else
28492852
#define DGEMM_DEFAULT_UNROLL_N 4
2853+
#define DGEMM_DEFAULT_UNROLL_M 16
2854+
#endif
2855+
2856+
#define SGEMM_DEFAULT_UNROLL_N 8
28502857
#define QGEMM_DEFAULT_UNROLL_N 2
28512858
#define CGEMM_DEFAULT_UNROLL_N 4
28522859
#define ZGEMM_DEFAULT_UNROLL_N 4
28532860
#define XGEMM_DEFAULT_UNROLL_N 1
28542861

28552862
#define SGEMM_DEFAULT_UNROLL_M 2
2856-
#define DGEMM_DEFAULT_UNROLL_M 16
28572863
#define QGEMM_DEFAULT_UNROLL_M 2
28582864
#define CGEMM_DEFAULT_UNROLL_M 1
28592865
#define ZGEMM_DEFAULT_UNROLL_M 1

0 commit comments

Comments
 (0)