Skip to content

Commit f8ad534

Browse files
committed
Fix casum fallback kernel.
This kernel is only used on Skylake+ if the kernel with AVX512 intrinsics can't be used, but used the variable x1 incorrectly in the tail end of the loop, as it is still at the initial value instead of where x points to. This caused 55 "other error"s in the LAPACK tests (#4282) This change makes casum.c as similar as possible as zasum.c, because zasum.c does this correctly.
1 parent cb29507 commit f8ad534

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

kernel/x86_64/casum.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
#endif
1010

1111
#ifndef HAVE_CASUM_KERNEL
12-
static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
12+
static FLOAT casum_kernel(BLASLONG n, FLOAT *x)
1313
{
1414

1515
BLASLONG i=0;
1616
BLASLONG n_8 = n & -8;
17-
FLOAT *x = x1;
17+
FLOAT *x1 = x;
1818
FLOAT temp0, temp1, temp2, temp3;
1919
FLOAT temp4, temp5, temp6, temp7;
2020
FLOAT sum0 = 0.0;
@@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
2424
FLOAT sum4 = 0.0;
2525

2626
while (i < n_8) {
27-
temp0 = ABS_K(x[0]);
28-
temp1 = ABS_K(x[1]);
29-
temp2 = ABS_K(x[2]);
30-
temp3 = ABS_K(x[3]);
31-
temp4 = ABS_K(x[4]);
32-
temp5 = ABS_K(x[5]);
33-
temp6 = ABS_K(x[6]);
34-
temp7 = ABS_K(x[7]);
27+
temp0 = ABS_K(x1[0]);
28+
temp1 = ABS_K(x1[1]);
29+
temp2 = ABS_K(x1[2]);
30+
temp3 = ABS_K(x1[3]);
31+
temp4 = ABS_K(x1[4]);
32+
temp5 = ABS_K(x1[5]);
33+
temp6 = ABS_K(x1[6]);
34+
temp7 = ABS_K(x1[7]);
3535

3636
sum0 += temp0;
3737
sum1 += temp1;
@@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
4343
sum2 += temp6;
4444
sum3 += temp7;
4545

46-
x+=8;
46+
x1+=8;
4747
i+=4;
4848
}
4949

5050
while (i < n) {
51-
sum4 += (ABS_K(x1[0]) + ABS_K(x1[1]));
51+
sum4 += ABS_K(x1[0]) + ABS_K(x1[1]);
5252
x1 += 2;
5353
i++;
5454
}

0 commit comments

Comments
 (0)