Skip to content

Commit af3606d

Browse files
authored
Merge pull request #3987 from xianyi/develop
Merge from develop branch for 0.3.23
2 parents 13add99 + cd2e80c commit af3606d

File tree

10 files changed

+81
-52
lines changed

10 files changed

+81
-52
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
88

99
set(OpenBLAS_MAJOR_VERSION 0)
1010
set(OpenBLAS_MINOR_VERSION 3)
11-
set(OpenBLAS_PATCH_VERSION 22)
11+
set(OpenBLAS_PATCH_VERSION 22.dev)
1212

1313
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1414

Changelog.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.23
4+
01-Apr-2023
5+
6+
general:
7+
- fixed a serious regression in GETRF/GETF2 and ZGETRF/ZGETF2 where
8+
subnormal but nonzero data elements triggered the singularity flag
9+
- fixed a long-standing bug in CSPR/ZSPR in single-threaded operation
10+
for cases where elements of the X vector are real numbers (or
11+
complex with only the real part zero)
12+
- fixed gmake builds with the option NO_LAPACK
13+
- fixed a few instances in the gmake Makefiles where expressly
14+
setting NO_LAPACK=0 or NO_LAPACKE=0 would have the opposite effect
15+
16+
x86_64:
17+
- added further CPUID values for Intel Raptor Lake
18+
219
====================================================================
320
Version 0.3.22
421
26-Mar-2023

Makefile.install

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ endif
7777
endif
7878

7979
ifneq ($(OSNAME), AIX)
80-
ifndef NO_LAPACKE
80+
ifneq ($(NO_LAPACKE), 1)
8181
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
8282
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
8383
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@@ -127,7 +127,7 @@ endif
127127

128128
else
129129
#install on AIX has different options syntax
130-
ifndef NO_LAPACKE
130+
ifneq ($(NO_LAPACKE), 1)
131131
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
132132
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
133133
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"

Makefile.rule

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#
44

55
# This library's version
6-
VERSION = 0.3.22
6+
VERSION = 0.3.22.dev
77

88
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
99
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

cpuid_x86.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,6 +1547,8 @@ int get_cpuname(void){
15471547
case 11: //family 6 exmodel 11
15481548
switch (model) {
15491549
case 7: // Raptor Lake
1550+
case 10:
1551+
case 15:
15501552
if(support_avx2())
15511553
return CPUTYPE_HASWELL;
15521554
if(support_avx())
@@ -2348,6 +2350,8 @@ int get_coretype(void){
23482350
case 11:
23492351
switch (model) {
23502352
case 7: // Raptor Lake
2353+
case 10:
2354+
case 15:
23512355
#ifndef NO_AVX2
23522356
if(support_avx2())
23532357
return CORE_HASWELL;

driver/level2/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ CBLASOBJS += \
9292
ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \
9393
ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX)
9494

95-
ifndef NO_LAPACK
95+
ifneq ($(NO_LAPACK), 1)
9696
CBLASOBJS += \
9797
cspmv_U.$(SUFFIX) cspmv_L.$(SUFFIX) \
9898
cspr_U.$(SUFFIX) cspr_L.$(SUFFIX) \

driver/level2/zspr_k.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
5353

5454
for (i = 0; i < m; i++){
5555
#ifndef LOWER
56-
if ((X[i * 2 + 0] != ZERO) && (X[i * 2 + 1] != ZERO)) {
56+
if ((X[i * 2 + 0] != ZERO) || (X[i * 2 + 1] != ZERO)) {
5757
AXPYU_K(i + 1, 0, 0,
5858
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
5959
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],
6060
X, 1, a, 1, NULL, 0);
6161
}
6262
a += (i + 1) * 2;
6363
#else
64-
if ((X[i * 2 + 0] != ZERO) && (X[i * 2 + 1] != ZERO)) {
64+
if ((X[i * 2 + 0] != ZERO) || (X[i * 2 + 1] != ZERO)) {
6565
AXPYU_K(m - i, 0, 0,
6666
alpha_r * X[i * 2 + 0] - alpha_i * X[i * 2 + 1],
6767
alpha_i * X[i * 2 + 0] + alpha_r * X[i * 2 + 1],

interface/Makefile

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ CBLAS2OBJS = \
9292
cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \
9393
ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \
9494
csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \
95-
csbmv.$(SUFFIX) cspmv.$(SUFFIX) \
96-
cspr.$(SUFFIX) cspr2.$(SUFFIX) \
97-
csymv.$(SUFFIX) csyr.$(SUFFIX) \
95+
csbmv.$(SUFFIX) \
96+
cspr2.$(SUFFIX) \
9897
ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \
9998
ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \
10099
chemv.$(SUFFIX) chbmv.$(SUFFIX) \
@@ -122,9 +121,8 @@ ZBLAS2OBJS = \
122121
zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \
123122
ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \
124123
zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \
125-
zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \
126-
zspr.$(SUFFIX) zspr2.$(SUFFIX) \
127-
zsymv.$(SUFFIX) zsyr.$(SUFFIX) \
124+
zsbmv.$(SUFFIX) \
125+
zspr2.$(SUFFIX) \
128126
ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \
129127
ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \
130128
zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \
@@ -447,7 +445,8 @@ QLAPACKOBJS = \
447445
CLAPACKOBJS = \
448446
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
449447
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
450-
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX)
448+
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) \
449+
cspr.$(SUFFIX) cspmv.$(SUFFIX) csymv.$(SUFFIX) csyr.$(SUFFIX)
451450

452451
#ZLAPACKOBJS = \
453452
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
@@ -458,8 +457,8 @@ CLAPACKOBJS = \
458457
ZLAPACKOBJS = \
459458
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
460459
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
461-
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX)
462-
460+
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) \
461+
zspr.$(SUFFIX) zspmv.$(SUFFIX) zsymv.$(SUFFIX) zsyr.$(SUFFIX)
463462

464463
XLAPACKOBJS = \
465464
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
@@ -1021,7 +1020,7 @@ dsymv.$(SUFFIX) dsymv.$(PSUFFIX) : symv.c
10211020
qsymv.$(SUFFIX) qsymv.$(PSUFFIX) : symv.c
10221021
$(CC) -c $(CFLAGS) $< -o $(@F)
10231022

1024-
ifndef NO_LAPACK
1023+
ifneq ($(NO_LAPACK), 1)
10251024
csymv.$(SUFFIX) csymv.$(PSUFFIX) : zsymv.c
10261025
$(CC) -c $(CFLAGS) $< -o $(@F)
10271026

@@ -1041,7 +1040,7 @@ dsyr.$(SUFFIX) dsyr.$(PSUFFIX) : syr.c
10411040
qsyr.$(SUFFIX) qsyr.$(PSUFFIX) : syr.c
10421041
$(CC) -c $(CFLAGS) $< -o $(@F)
10431042

1044-
ifndef NO_LAPACK
1043+
ifneq ($(NO_LAPACK), 1)
10451044
csyr.$(SUFFIX) csyr.$(PSUFFIX) : zsyr.c
10461045
$(CC) -c $(CFLAGS) $< -o $(@F)
10471046

@@ -1115,7 +1114,7 @@ dspmv.$(SUFFIX) dspmv.$(PSUFFIX) : spmv.c
11151114
qspmv.$(SUFFIX) qspmv.$(PSUFFIX) : spmv.c
11161115
$(CC) -c $(CFLAGS) $< -o $(@F)
11171116

1118-
ifndef NO_LAPACK
1117+
ifneq ($(NO_LAPACK), 1)
11191118
cspmv.$(SUFFIX) cspmv.$(PSUFFIX) : zspmv.c
11201119
$(CC) -c $(CFLAGS) $< -o $(@F)
11211120

@@ -1135,7 +1134,7 @@ dspr.$(SUFFIX) dspr.$(PSUFFIX) : spr.c
11351134
qspr.$(SUFFIX) qspr.$(PSUFFIX) : spr.c
11361135
$(CC) -c $(CFLAGS) $< -o $(@F)
11371136

1138-
ifndef NO_LAPACK
1137+
ifneq ($(NO_LAPACK), 1)
11391138
cspr.$(SUFFIX) cspr.$(PSUFFIX) : zspr.c
11401139
$(CC) -c $(CFLAGS) $< -o $(@F)
11411140

lapack/getf2/getf2_k.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,21 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
100100
jp--;
101101
temp1 = *(b + jp);
102102

103-
//if (temp1 != ZERO) {
103+
if (temp1 != ZERO) {
104+
#if defined(DOUBLE)
104105
if (fabs(temp1) >= DBL_MIN ) {
105-
temp1 = dp1 / temp1;
106-
107-
if (jp != j) {
108-
SWAP_K(j + 1, 0, 0, ZERO, a + j, lda, a + jp, lda, NULL, 0);
109-
}
110-
if (j + 1 < m) {
111-
SCAL_K(m - j - 1, 0, 0, temp1, b + j + 1, 1, NULL, 0, NULL, 0);
112-
}
106+
#else
107+
if (fabs(temp1) >= FLT_MIN ) {
108+
#endif
109+
temp1 = dp1 / temp1;
110+
111+
if (jp != j) {
112+
SWAP_K(j + 1, 0, 0, ZERO, a + j, lda, a + jp, lda, NULL, 0);
113+
}
114+
if (j + 1 < m) {
115+
SCAL_K(m - j - 1, 0, 0, temp1, b + j + 1, 1, NULL, 0, NULL, 0);
116+
}
117+
}
113118
} else {
114119
if (!info) info = j + 1;
115120
}

lapack/getf2/zgetf2_k.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -106,30 +106,34 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
106106
temp1 = *(b + jp * 2 + 0);
107107
temp2 = *(b + jp * 2 + 1);
108108

109-
// if ((temp1 != ZERO) || (temp2 != ZERO)) {
110-
if ((fabs(temp1) >= DBL_MIN) && (fabs(temp2) >= DBL_MIN)) {
111-
112-
if (jp != j) {
113-
SWAP_K(j + 1, 0, 0, ZERO, ZERO, a + j * 2, lda,
109+
if ((temp1 != ZERO) || (temp2 != ZERO)) {
110+
#if defined(DOUBLE)
111+
if ((fabs(temp1) >= DBL_MIN) || (fabs(temp2) >= DBL_MIN)) {
112+
#else
113+
if ((fabs(temp1) >= FLT_MIN) || (fabs(temp2) >= FLT_MIN)) {
114+
#endif
115+
if (jp != j) {
116+
SWAP_K(j + 1, 0, 0, ZERO, ZERO, a + j * 2, lda,
114117
a + jp * 2, lda, NULL, 0);
115-
}
116-
117-
if (fabs(temp1) >= fabs(temp2)){
118-
ratio = temp2 / temp1;
119-
den = dp1 /(temp1 * ( 1 + ratio * ratio));
120-
temp3 = den;
121-
temp4 = -ratio * den;
122-
} else {
123-
ratio = temp1 / temp2;
124-
den = dp1 /(temp2 * ( 1 + ratio * ratio));
125-
temp3 = ratio * den;
126-
temp4 = -den;
127-
}
128-
129-
if (j + 1 < m) {
130-
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
131-
b + (j + 1) * 2, 1, NULL, 0, NULL, 0);
132-
}
118+
}
119+
120+
if (fabs(temp1) >= fabs(temp2)){
121+
ratio = temp2 / temp1;
122+
den = dp1 /(temp1 * ( 1 + ratio * ratio));
123+
temp3 = den;
124+
temp4 = -ratio * den;
125+
} else {
126+
ratio = temp1 / temp2;
127+
den = dp1 /(temp2 * ( 1 + ratio * ratio));
128+
temp3 = ratio * den;
129+
temp4 = -den;
130+
}
131+
132+
if (j + 1 < m) {
133+
SCAL_K(m - j - 1, 0, 0, temp3, temp4,
134+
b + (j + 1) * 2, 1, NULL, 0, NULL, 0);
135+
}
136+
}
133137
} else {
134138
if (!info) info = j + 1;
135139
}

0 commit comments

Comments
 (0)