Skip to content

Commit efcf712

Browse files
authored
Merge pull request #4003 from martin-frbg/issue3995
Fix instabilities in CGEMM/CTRMM/DNRM2 on Apple M1/M2 under OSX
2 parents 51dd133 + 44164e3 commit efcf712

File tree

5 files changed

+12
-11
lines changed

5 files changed

+12
-11
lines changed

cpuid_arm64.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,9 @@ int detect(void)
267267
}
268268
#else
269269
#ifdef __APPLE__
270-
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
271-
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
272-
if (value == 3660830781) return CPU_VORTEX; //A15/M2
270+
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
271+
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
272+
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
273273
#endif
274274
return CPU_ARMV8;
275275
#endif

kernel/arm64/cgemm_kernel_8x4.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define pCRow3 x15
5050
#define pA x16
5151
#define alphaR w17
52-
#define alphaI w18
52+
#define alphaI w19
5353

5454
#define alpha0_R s10
5555
#define alphaV0_R v10.s[0]

kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define pCRow3 x15
5050
#define pA x16
5151
#define alphaR w17
52-
#define alphaI w18
52+
#define alphaI w19
5353

5454
#define alpha0_R s10
5555
#define alphaV0_R v10.s[0]

kernel/arm64/ctrmm_kernel_8x4.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4949
#define pCRow3 x15
5050
#define pA x16
5151
#define alphaR w17
52-
#define alphaI w18
53-
#define temp x19
54-
#define tempOffset x20
55-
#define tempK x21
52+
#define alphaI w19
53+
#define temp x20
54+
#define tempOffset x21
55+
#define tempK x22
5656

5757
#define alpha0_R s10
5858
#define alphaV0_R v10.s[0]

kernel/arm64/dznrm2_thunderx2t99.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828

2929
#include "common.h"
30-
30+
#include <float.h>
3131
#include <arm_neon.h>
3232

3333
#if defined(SMP)
@@ -404,7 +404,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
404404
#else
405405
nrm2_compute(n, x, inc_x, &ssq, &scale);
406406
#endif
407-
if (fabs(scale) <1.e-300) return 0.;
407+
volatile FLOAT sca = fabs(scale);
408+
if (sca < DBL_MIN) return 0.;
408409
ssq = sqrt(ssq) * scale;
409410

410411
return ssq;

0 commit comments

Comments
 (0)