Skip to content

Commit 969601a

Browse files
committed
X86_64: Fixed bug in zscal
Fixed handling of NAN and INF arguments when inc is greater than 1.
1 parent bb043a0 commit 969601a

File tree

1 file changed

+50
-41
lines changed

1 file changed

+50
-41
lines changed

kernel/x86_64/zscal.c

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,16 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha , FLOAT *x )
6969

7070
for( i=0; i<n; i+=4 )
7171
{
72-
t0 = da_r *x[0] - da_i *x[1];
73-
t1 = da_r *x[2] - da_i *x[3];
74-
t2 = da_r *x[4] - da_i *x[5];
75-
t3 = da_r *x[6] - da_i *x[7];
72+
t0 = da_r *x[0] - da_i *x[1];
73+
t1 = da_r *x[2] - da_i *x[3];
74+
t2 = da_r *x[4] - da_i *x[5];
75+
t3 = da_r *x[6] - da_i *x[7];
7676

7777
x[1] = da_r * x[1] + da_i * x[0];
7878
x[3] = da_r * x[3] + da_i * x[2];
7979
x[5] = da_r * x[5] + da_i * x[4];
8080
x[7] = da_r * x[7] + da_i * x[6];
81-
81+
8282
x[0] = t0;
8383
x[2] = t1;
8484
x[4] = t2;
@@ -99,16 +99,16 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha , FLOAT *x )
9999

100100
for( i=0; i<n; i+=4 )
101101
{
102-
t0 = - da_i *x[1];
103-
t1 = - da_i *x[3];
104-
t2 = - da_i *x[5];
105-
t3 = - da_i *x[7];
102+
t0 = - da_i *x[1];
103+
t1 = - da_i *x[3];
104+
t2 = - da_i *x[5];
105+
t3 = - da_i *x[7];
106106

107107
x[1] = da_i * x[0];
108108
x[3] = da_i * x[2];
109109
x[5] = da_i * x[4];
110110
x[7] = da_i * x[6];
111-
111+
112112
x[0] = t0;
113113
x[2] = t1;
114114
x[4] = t2;
@@ -129,16 +129,16 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha , FLOAT *x )
129129

130130
for( i=0; i<n; i+=4 )
131131
{
132-
t0 = da_r *x[0];
133-
t1 = da_r *x[2];
134-
t2 = da_r *x[4];
135-
t3 = da_r *x[6];
132+
t0 = da_r *x[0];
133+
t1 = da_r *x[2];
134+
t2 = da_r *x[4];
135+
t3 = da_r *x[6];
136136

137137
x[1] = da_r * x[1];
138138
x[3] = da_r * x[3];
139139
x[5] = da_r * x[5];
140140
x[7] = da_r * x[7];
141-
141+
142142
x[0] = t0;
143143
x[2] = t1;
144144
x[4] = t2;
@@ -157,14 +157,14 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha , FLOAT *x )
157157
BLASLONG i;
158158
for( i=0; i<n; i+=4 )
159159
{
160-
x[0] = 0.0;
161-
x[1] = 0.0;
162-
x[2] = 0.0;
163-
x[3] = 0.0;
164-
x[4] = 0.0;
165-
x[5] = 0.0;
166-
x[6] = 0.0;
167-
x[7] = 0.0;
160+
x[0] = 0.0;
161+
x[1] = 0.0;
162+
x[2] = 0.0;
163+
x[3] = 0.0;
164+
x[4] = 0.0;
165+
x[5] = 0.0;
166+
x[6] = 0.0;
167+
x[7] = 0.0;
168168
x+=8;
169169
}
170170

@@ -186,10 +186,10 @@ static void zscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
186186

187187
for ( i=0; i<n; i+=4 )
188188
{
189-
t0 = da_r * x[0] - da_i *x[1];
190-
t1 = da_r * x[inc_x] - da_i *x[inc_x + 1];
191-
t2 = da_r * x[inc_x2] - da_i *x[inc_x2 + 1];
192-
t3 = da_r * x[inc_x3] - da_i *x[inc_x3 + 1];
189+
t0 = da_r * x[0] - da_i *x[1];
190+
t1 = da_r * x[inc_x] - da_i *x[inc_x + 1];
191+
t2 = da_r * x[inc_x2] - da_i *x[inc_x2 + 1];
192+
t3 = da_r * x[inc_x3] - da_i *x[inc_x3 + 1];
193193

194194
x[1] = da_i * x[0] + da_r * x[1];
195195
x[inc_x +1] = da_i * x[inc_x] + da_r * x[inc_x +1];
@@ -228,7 +228,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
228228
{
229229
while(j < n1)
230230
{
231-
231+
232232
x[i]=0.0;
233233
x[i+1]=0.0;
234234
x[i+inc_x]=0.0;
@@ -240,7 +240,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
240240

241241
while(j < n)
242242
{
243-
243+
244244
x[i]=0.0;
245245
x[i+1]=0.0;
246246
i += inc_x ;
@@ -253,11 +253,17 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
253253
{
254254
while(j < n1)
255255
{
256-
257-
temp0 = -da_i * x[i+1];
256+
257+
if (isnan(x[i]) || isinf(x[i]))
258+
temp0 = NAN;
259+
else
260+
temp0 = -da_i * x[i+1];
258261
x[i+1] = da_i * x[i];
259262
x[i] = temp0;
260-
temp1 = -da_i * x[i+1+inc_x];
263+
if (isnan(x[i+inc_x]) || isinf(x[i+inc_x]))
264+
temp1 = NAN;
265+
else
266+
temp1 = -da_i * x[i+1+inc_x];
261267
x[i+1+inc_x] = da_i * x[i+inc_x];
262268
x[i+inc_x] = temp1;
263269
i += 2*inc_x ;
@@ -267,8 +273,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
267273

268274
while(j < n)
269275
{
270-
271-
temp0 = -da_i * x[i+1];
276+
277+
if (isnan(x[i]) || isinf(x[i]))
278+
temp0 = NAN;
279+
else
280+
temp0 = -da_i * x[i+1];
272281
x[i+1] = da_i * x[i];
273282
x[i] = temp0;
274283
i += inc_x ;
@@ -291,7 +300,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
291300

292301
while(j < n1)
293302
{
294-
303+
295304
temp0 = da_r * x[i];
296305
x[i+1] = da_r * x[i+1];
297306
x[i] = temp0;
@@ -305,7 +314,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
305314

306315
while(j < n)
307316
{
308-
317+
309318
temp0 = da_r * x[i];
310319
x[i+1] = da_r * x[i+1];
311320
x[i] = temp0;
@@ -368,7 +377,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
368377
}
369378
i = n1 << 1;
370379
j = n1;
371-
380+
372381
if ( da_r == 0.0 || da_r != da_r )
373382
{
374383
if ( da_i == 0.0 )
@@ -385,7 +394,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
385394
}
386395

387396
}
388-
else if (da_r < -FLT_MAX || da_r > FLT_MAX) {
397+
else if (da_r < -FLT_MAX || da_r > FLT_MAX) {
389398
while(j < n)
390399
{
391400
x[i]= NAN;
@@ -404,7 +413,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
404413
if (x[i] < -FLT_MAX || x[i] > FLT_MAX)
405414
temp0 = NAN;
406415
x[i+1] = da_i * x[i];
407-
if ( x[i] == x[i]) //preserve NaN
416+
if ( x[i] == x[i]) //preserve NaN
408417
x[i] = temp0;
409418
i += 2 ;
410419
j++;
@@ -420,7 +429,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
420429
{
421430
while(j < n)
422431
{
423-
432+
424433
temp0 = da_r * x[i];
425434
x[i+1] = da_r * x[i+1];
426435
x[i] = temp0;
@@ -442,7 +451,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
442451

443452
}
444453

445-
}
454+
}
446455

447456
}
448457

0 commit comments

Comments
 (0)