Skip to content

Commit fbb8949

Browse files
authored
Merge pull request #22 from xianyi/develop
rebase
2 parents 4548475 + e711659 commit fbb8949

File tree

830 files changed

+38286
-12001
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

830 files changed

+38286
-12001
lines changed

CONTRIBUTORS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,4 @@ In chronological order:
178178
* [2019-11-06] optimize AVX512 SGEMM
179179
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
180180
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
181-
* [2019-12-27] AVX2 CGEMM3M kernel
181+
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels

Makefile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild
247247

248248
lapack_prebuild :
249249
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
250-
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
251-
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
250+
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
251+
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
252252
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
253-
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
253+
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
254254
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
255-
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
255+
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
256256
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
257257
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
258-
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
259-
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
258+
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
259+
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
260260
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
261-
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
262-
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
261+
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
262+
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
263263
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
264-
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
264+
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
265265
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
266266
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
267267
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
@@ -319,7 +319,7 @@ lapack-test :
319319
ifneq ($(CROSS), 1)
320320
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
321321
./testsecond; ./testdsecnd; ./testieee; ./testversion )
322-
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
322+
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
323323
endif
324324

325325
lapack-runtest:

Makefile.system

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386)
2525
override ARCH=x86
2626
else ifeq ($(ARCH), aarch64)
2727
override ARCH=arm64
28+
else ifeq ($(ARCH), zarch)
29+
override ARCH=zarch
2830
endif
2931

3032
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
@@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99
558560
DYNAMIC_CORE += TSV110
559561
endif
560562

563+
ifeq ($(ARCH), zarch)
564+
DYNAMIC_CORE = Z13
565+
DYNAMIC_CORE += Z14
566+
endif
567+
561568
ifeq ($(ARCH), power)
562569
DYNAMIC_CORE = POWER6
563570
DYNAMIC_CORE += POWER8

cmake/lapack.cmake

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ set(SLASRC
115115
stplqt.f stplqt2.f stpmlqt.f
116116
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
117117
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
118-
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
118+
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
119+
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
120+
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
119121

120122
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
121123
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -210,7 +212,9 @@ set(CLASRC
210212
ctplqt.f ctplqt2.f ctpmlqt.f
211213
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
212214
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
213-
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
215+
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
216+
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
217+
cungtsqr.f cunhr_col.f )
214218

215219
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
216220
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -299,7 +303,9 @@ set(DLASRC
299303
dtplqt.f dtplqt2.f dtpmlqt.f
300304
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
301305
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
302-
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
306+
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
307+
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
308+
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
303309

304310
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
305311
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -398,7 +404,9 @@ set(ZLASRC
398404
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
399405
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
400406
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
401-
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
407+
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
408+
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
409+
zungtsqr.f zunhr_col.f)
402410

403411
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
404412
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

cmake/lapacke.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,8 @@ set(DSRC
715715
lapacke_dgesv_work.c
716716
lapacke_dgesvd.c
717717
lapacke_dgesvd_work.c
718+
lapacke_dgesvdq.c
719+
lapacke_dgesvdq_work.c
718720
lapacke_dgesvdx.c
719721
lapacke_dgesvdx_work.c
720722
lapacke_dgesvj.c
@@ -1287,6 +1289,8 @@ set(SSRC
12871289
lapacke_sgesv_work.c
12881290
lapacke_sgesvd.c
12891291
lapacke_sgesvd_work.c
1292+
lapacke_sgesvdq.c
1293+
lapacke_sgesvdq_work.c
12901294
lapacke_sgesvdx.c
12911295
lapacke_sgesvdx_work.c
12921296
lapacke_sgesvj.c
@@ -1853,6 +1857,8 @@ set(ZSRC
18531857
lapacke_zgesv_work.c
18541858
lapacke_zgesvd.c
18551859
lapacke_zgesvd_work.c
1860+
lapacke_zgesvdq.c
1861+
lapacke_zgesvdq_work.c
18561862
lapacke_zgesvdx.c
18571863
lapacke_zgesvdx_work.c
18581864
lapacke_zgesvj.c

ctest/din3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
55
T LOGICAL FLAG, T TO TEST ERROR EXITS.
66
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
77
16.0 THRESHOLD VALUE OF TEST RATIO
8-
6 NUMBER OF VALUES OF N
8+
7 NUMBER OF VALUES OF N
99
1 2 3 5 7 9 35 VALUES OF N
1010
3 NUMBER OF VALUES OF ALPHA
1111
0.0 1.0 0.7 VALUES OF ALPHA

ctest/sin3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
55
T LOGICAL FLAG, T TO TEST ERROR EXITS.
66
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
77
16.0 THRESHOLD VALUE OF TEST RATIO
8-
6 NUMBER OF VALUES OF N
8+
7 NUMBER OF VALUES OF N
99
0 1 2 3 5 9 35 VALUES OF N
1010
3 NUMBER OF VALUES OF ALPHA
1111
0.0 1.0 0.7 VALUES OF ALPHA

driver/others/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@ else
2121
ifeq ($(ARCH),power)
2222
COMMONOBJS += dynamic_power.$(SUFFIX)
2323
else
24+
ifeq ($(ARCH),zarch)
25+
COMMONOBJS += dynamic_zarch.$(SUFFIX)
26+
else
2427
COMMONOBJS += dynamic.$(SUFFIX)
2528
endif
2629
endif
30+
endif
2731
else
2832
COMMONOBJS += parameter.$(SUFFIX)
2933
endif
@@ -85,9 +89,13 @@ else
8589
ifeq ($(ARCH),power)
8690
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX)
8791
else
92+
ifeq ($(ARCH),zarch)
93+
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX)
94+
else
8895
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
8996
endif
9097
endif
98+
endif
9199
else
92100
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
93101
endif

driver/others/dynamic_zarch.c

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
2+
#include "common.h"
3+
4+
extern gotoblas_t gotoblas_Z13;
5+
extern gotoblas_t gotoblas_Z14;
6+
extern gotoblas_t gotoblas_Z15;
7+
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
8+
//extern gotoblas_t gotoblas_Z14;
9+
//#endif
10+
11+
#define NUM_CORETYPES 5
12+
13+
extern void openblas_warning(int verbose, const char* msg);
14+
15+
static char* corename[] = {
16+
"unknown",
17+
"Z13",
18+
"Z14",
19+
"Z15",
20+
"ZARCH_GENERIC",
21+
};
22+
23+
char* gotoblas_corename(void) {
24+
if (gotoblas == &gotoblas_Z13) return corename[1];
25+
if (gotoblas == &gotoblas_Z14) return corename[2];
26+
if (gotoblas == &gotoblas_Z15) return corename[3];
27+
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
28+
// if (gotoblas == &gotoblas_POWER9) return corename[3];
29+
//#endif
30+
return corename[0]; // try generic?
31+
}
32+
33+
// __builtin_cpu_is is not supported by zarch
34+
static gotolabs_t* get_coretype(void) {
35+
FILE* infile;
36+
char buffer[512], * p;
37+
38+
p = (char*)NULL;
39+
infile = fopen("/proc/sysinfo", "r");
40+
while (fgets(buffer, sizeof(buffer), infile)) {
41+
if (!strncmp("Type", buffer, 4)) {
42+
p = strchr(buffer, ':') + 2;
43+
#if 0
44+
fprintf(stderr, "%s\n", p);
45+
#endif
46+
break;
47+
}
48+
}
49+
50+
fclose(infile);
51+
52+
if (strstr(p, "2964")) return &gotoblas_Z13;
53+
if (strstr(p, "2965")) return &gotoblas_Z13;
54+
if (strstr(p, "3906")) return &gotoblas_Z14;
55+
if (strstr(p, "3907")) return &gotoblas_Z14;
56+
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14
57+
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14
58+
59+
return NULL; // should be ZARCH_GENERIC
60+
}
61+
62+
static gotoblas_t* force_coretype(char* coretype) {
63+
64+
int i;
65+
int found = -1;
66+
char message[128];
67+
68+
for (i = 0; i < NUM_CORETYPES; i++)
69+
{
70+
if (!strncasecmp(coretype, corename[i], 20))
71+
{
72+
found = i;
73+
break;
74+
}
75+
}
76+
77+
switch (found)
78+
{
79+
case 1: return (&gotoblas_Z13);
80+
case 2: return (&gotoblas_Z14);
81+
case 3: return (&gotoblas_Z15);
82+
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
83+
// case 3: return (&gotoblas_POWER9);
84+
//#endif
85+
default: return NULL;
86+
}
87+
snprintf(message, 128, "Core not found: %s\n", coretype);
88+
openblas_warning(1, message);
89+
}
90+
91+
void gotoblas_dynamic_init(void) {
92+
93+
char coremsg[128];
94+
char coren[22];
95+
char* p;
96+
97+
98+
if (gotoblas) return;
99+
100+
p = getenv("OPENBLAS_CORETYPE");
101+
if (p)
102+
{
103+
gotoblas = force_coretype(p);
104+
}
105+
else
106+
{
107+
gotoblas = get_coretype();
108+
}
109+
110+
if (gotoblas == NULL)
111+
{
112+
snprintf(coremsg, 128, "Falling back to Z14 core\n");
113+
openblas_warning(1, coremsg);
114+
gotoblas = &gotoblas_Z14;
115+
}
116+
117+
if (gotoblas && gotoblas->init) {
118+
strncpy(coren, gotoblas_corename(), 20);
119+
sprintf(coremsg, "Core: %s\n", coren);
120+
openblas_warning(2, coremsg);
121+
gotoblas->init();
122+
}
123+
else {
124+
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
125+
exit(1);
126+
}
127+
}
128+
129+
void gotoblas_dynamic_quit(void) {
130+
gotoblas = NULL;
131+
}

exports/gensymbol

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,19 @@
694694

695695
# functions added for lapack-3.8.0
696696

697-
ilaenv2stage
697+
ilaenv2stage,
698+
699+
# functions added for lapack-3.9.0
700+
cgesvdq,
701+
cungtsqr,
702+
dcombssq,
703+
dgesvdq,
704+
dorgtsqr,
705+
scombssq,
706+
sgesvdq,
707+
sorgtsqr,
708+
zgesvdq,
709+
zungtsqr
698710
);
699711

700712
@lapack_extendedprecision_objs = (
@@ -3347,6 +3359,15 @@
33473359
LAPACKE_zsytrf_aa_2stage_work,
33483360
LAPACKE_zsytrs_aa_2stage,
33493361
LAPACKE_zsytrs_aa_2stage_work,
3362+
3363+
# new functions from 3.9.0
3364+
LAPACKE_dgesvdq,
3365+
LAPACKE_dgesvdq_work,
3366+
LAPACKE_sgesvdq,
3367+
LAPACKE_sgesvdq_work,
3368+
LAPACKE_zgesvdq,
3369+
LAPACKE_zgesvdq_work
3370+
33503371
);
33513372

33523373
#These function may need 2 underscores.
@@ -3419,7 +3440,13 @@
34193440
dsytrf_aa_2stage, dsytrs_aa_2stage,
34203441
zhesv_aa_2stage, zhetrf_aa_2stage,
34213442
zhetrs_aa_2stage, zsysv_aa_2stage,
3422-
zsytrf_aa_2stage, zsytrs_aa_2stage
3443+
zsytrf_aa_2stage, zsytrs_aa_2stage,
3444+
# 3.9.0
3445+
claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col,
3446+
dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col,
3447+
slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col,
3448+
zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col
3449+
34233450
);
34243451

34253452

0 commit comments

Comments
 (0)