Skip to content

Commit fb7c53c

Browse files
authored
Merge pull request #4807 from martin-frbg/scalfixes
[WIP]Make NAN handling in the SCAL kernels depend on the dummy2 parameter
2 parents 15c53dd + b613754 commit fb7c53c

File tree

14 files changed

+151
-29
lines changed

14 files changed

+151
-29
lines changed

kernel/arm/scal.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,22 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
4343
if ( (n <= 0) || (inc_x <= 0))
4444
return(0);
4545

46+
if (dummy2 == 0) {
47+
while(j < n)
48+
{
4649

47-
while(j < n)
48-
{
50+
if ( da == 0.0 )
51+
x[i]=0.0;
52+
else
53+
x[i] = da * x[i] ;
54+
55+
i += inc_x ;
56+
j++;
57+
}
58+
} else {
59+
60+
while(j < n)
61+
{
4962

5063
if ( da == 0.0 )
5164
if (!isnan(x[i]) && !isinf(x[i])) {
@@ -59,6 +72,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
5972
i += inc_x ;
6073
j++;
6174

75+
}
6276
}
6377
return 0;
6478

kernel/arm64/scal.S

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333
#define X_COPY x5 /* X vector address */
3434
#define INC_X x4 /* X stride */
3535
#define I x1 /* loop variable */
36-
36+
#define FLAG x9
3737
/*******************************************************************************
3838
* Macro definitions
3939
*******************************************************************************/
@@ -168,9 +168,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
168168
cmp N, xzr
169169
ble .Lscal_kernel_L999
170170

171-
//fcmp DA, #0.0
172-
//beq .Lscal_kernel_zero
171+
ldr FLAG, [sp]
172+
cmp FLAG, #1
173+
beq .Lscal_kernel_nansafe
174+
175+
fcmp DA, #0.0
176+
beq .Lscal_kernel_zero
173177

178+
.Lscal_kernel_nansafe:
174179
cmp INC_X, #1
175180
bne .Lscal_kernel_S_BEGIN
176181

kernel/power/dscal.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ static void dscal_kernel_8_zero (BLASLONG n, FLOAT *x)
7373

7474
for( i=0; i<n; i+=8 )
7575
{
76+
x[0] = alpha;
77+
x[1] = alpha;
78+
x[2] = alpha;
79+
x[3] = alpha;
80+
x[4] = alpha;
81+
x[5] = alpha;
82+
x[6] = alpha;
83+
x[7] = alpha;
84+
#if 0
7685
if(isfinite(x[0]))
7786
x[0] = alpha;
7887
else
@@ -106,7 +115,8 @@ static void dscal_kernel_8_zero (BLASLONG n, FLOAT *x)
106115
else
107116
x[7] = NAN;
108117
x+=8;
109-
}
118+
#endif
119+
}
110120

111121
}
112122

@@ -130,6 +140,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
130140
if ( n >= 16 )
131141
{
132142
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
143+
if (dummy2 == 0)
144+
for (j = 0; j < align; j++) {
145+
x [j] = 0.0;
146+
}
147+
else
133148
for (j = 0; j < align; j++) {
134149
if (isfinite(x[j]))
135150
x[j] = 0.0;
@@ -151,7 +166,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
151166
j=n1;
152167
}
153168
#endif
154-
169+
if (dummy2 == 0)
170+
while(j < n)
171+
{
172+
x[j]=0.0;
173+
j++;
174+
}
175+
else
155176
while(j < n)
156177
{
157178
if (!isfinite(x[j]))
@@ -202,7 +223,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
202223

203224
if ( da == 0.0 )
204225
{
205-
226+
if (dummy2 == 0)
227+
while(j < n)
228+
{
229+
x[i]=0.0;
230+
i += inc_x;
231+
j++;
232+
}
233+
else
206234
while(j < n)
207235
{
208236
if (!isfinite(x[i]))

kernel/power/scal.S

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,23 @@
4747
#ifndef __64BIT__
4848
#define X r6
4949
#define INCX r7
50+
#define FLAG r11
5051
#else
5152
#define X r7
5253
#define INCX r8
54+
#define FLAG r12
5355
#endif
5456
#endif
5557

5658
#if defined(_AIX) || defined(__APPLE__)
5759
#if !defined(__64BIT__) && defined(DOUBLE)
5860
#define X r8
5961
#define INCX r9
62+
#define FLAG r13
6063
#else
6164
#define X r7
6265
#define INCX r8
66+
#define FLAG r12
6367
#endif
6468
#endif
6569

@@ -84,9 +88,12 @@
8488
cmpwi cr0, N, 0
8589
blelr- cr0
8690

87-
// fcmpu cr0, FZERO, ALPHA
88-
// bne- cr0, LL(A1I1)
89-
b LL(A1I1)
91+
fcmpu cr0, FZERO, ALPHA
92+
bne- cr0, LL(A1I1)
93+
94+
ld FLAG, 48+64+8(SP)
95+
cmpwi cr0, FLAG, 1
96+
beq- cr0, LL(A1I1)
9097

9198
cmpwi cr0, INCX, SIZE
9299
bne- cr0, LL(A0IN)

kernel/power/sscal.c

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,24 @@ static void sscal_kernel_16_zero( BLASLONG n, FLOAT *x )
7474

7575
for( i=0; i<n; i+=8 )
7676
{
77-
if (isfinite(x[0]))
77+
x[0] = alpha;
78+
x[1] = alpha;
79+
x[2] = alpha;
80+
x[3] = alpha;
81+
x[4] = alpha;
82+
x[5] = alpha;
83+
x[6] = alpha;
84+
x[7] = alpha;
85+
x[8] = alpha;
86+
x[9] = alpha;
87+
x[10] = alpha;
88+
x[11] = alpha;
89+
x[12] = alpha;
90+
x[13] = alpha;
91+
x[14] = alpha;
92+
x[15] = alpha;
93+
#if 0
94+
if (isfinite(x[0]))
7895
x[0] = alpha;
7996
else
8097
x[0] = NAN;
@@ -107,7 +124,8 @@ static void sscal_kernel_16_zero( BLASLONG n, FLOAT *x )
107124
else
108125
x[7] = NAN;
109126
x+=8;
110-
}
127+
#endif
128+
}
111129

112130
}
113131

@@ -132,6 +150,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
132150
if ( n >= 32 )
133151
{
134152
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
153+
if (dummy2 == 0)
154+
for (j = 0; j < align; j++){
155+
x[j] = 0.0;
156+
}
157+
else
135158
for (j = 0; j < align; j++) {
136159
if (isfinite(x[j]))
137160
x[j] = 0.0;
@@ -153,9 +176,15 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
153176
j=n1;
154177
}
155178
#endif
156-
179+
if (dummy2 == 0)
157180
while(j < n)
158181
{
182+
x[j] = 0.0;
183+
j++;
184+
}
185+
else
186+
while(j < n)
187+
{
159188
if (isfinite(x[j]))
160189
x[j]=0.0;
161190
else
@@ -204,7 +233,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
204233

205234
if ( da == 0.0 )
206235
{
207-
236+
if (dummy2 == 0)
237+
while(j < n)
238+
{
239+
x[i]=0.0;
240+
i += inc_x;
241+
j++;
242+
}
243+
else
208244
while(j < n)
209245
{
210246
if (isfinite(x[i]))

kernel/riscv64/scal.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
4343
if ( (n <= 0) || (inc_x <= 0))
4444
return(0);
4545

46-
47-
while(j < n)
48-
{
46+
if (dummy2 == 0) {
47+
while(j < n)
48+
{
4949

5050
if ( da == 0.0 )
5151
if (isfinite(x[i]))
@@ -57,7 +57,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
5757

5858
i += inc_x ;
5959
j++;
60+
}
61+
} else {
62+
while(j < n)
63+
{
6064

65+
if ( da == 0.0 )
66+
x[i]=0.0;
67+
else
68+
x[i] = da * x[i] ;
69+
70+
i += inc_x ;
71+
j++;
72+
}
6173
}
6274
return 0;
6375

kernel/riscv64/scal_rvv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
5656
FLOAT_V_T v0;
5757

5858
if(inc_x == 1) {
59-
if(da == 0.0) {
59+
if(dummy2 == 0 && da == 0.0) {
6060
int gvl = VSETVL_MAX;
6161
v0 = VFMVVF_FLOAT(0.0, gvl);
6262
for (size_t vl; n > 0; n -= vl, x += vl) {
@@ -75,7 +75,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
7575
} else {
7676
BLASLONG stride_x = inc_x * sizeof(FLOAT);
7777

78-
if(da == 0.0) {
78+
if(dummy2 == 0 && da == 0.0) {
7979
int gvl = VSETVL_MAX;
8080
v0 = VFMVVF_FLOAT(0.0, gvl);
8181
for (size_t vl; n > 0; n -= vl, x += vl*inc_x) {

kernel/riscv64/scal_vector.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
7171
FLOAT_V_T v0, v1;
7272
unsigned int gvl = 0;
7373
if(inc_x == 1){
74-
if (0){ //if(da == 0.0){
74+
if(dummy2 == 0 && da == 0.0){
7575
memset(&x[0], 0, n * sizeof(FLOAT));
7676
}else{
7777
gvl = VSETVL(n);
@@ -96,7 +96,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
9696
}
9797
}
9898
}else{
99-
if (0) { //if(da == 0.0){
99+
if(dummy2 == 0 && da == 0.0){
100100
BLASLONG stride_x = inc_x * sizeof(FLOAT);
101101
BLASLONG ix = 0;
102102
gvl = VSETVL(n);

kernel/x86/scal.S

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,19 +57,24 @@
5757
#ifdef XDOUBLE
5858
movl 44(%esp),%edi
5959
movl 48(%esp),%esi
60+
movl 64(%esp),%ecx
6061
#elif defined(DOUBLE)
6162
movl 36(%esp),%edi
6263
movl 40(%esp),%esi
64+
movl 56(%esp),%ecx
6365
#else
6466
movl 32(%esp),%edi
6567
movl 36(%esp),%esi
68+
movl 54(%esp),%ecx
6669
#endif
6770

6871
ftst
6972
fnstsw %ax
7073
andb $68, %ah
71-
// je .L300 # Alpha != ZERO
72-
jmp .L300
74+
je .L300 # Alpha != ZERO
75+
76+
cmpl $1,%ecx # dummy2 flag
77+
je .L300
7378

7479
/* Alpha == ZERO */
7580
cmpl $1,%esi

kernel/x86_64/scal_atom.S

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,10 @@
6060
#ifdef WINDOWS_ABI
6161
movq 40(%rsp), X
6262
movq 48(%rsp), INCX
63-
63+
movq 64(%rsp), %r9
6464
movaps %xmm3, %xmm0
65+
#else
66+
movq 24(%rsp), %r9
6567
#endif
6668

6769
SAVEREGISTERS
@@ -73,6 +75,10 @@
7375
lea (, INCX, SIZE), INCX
7476
comisd %xmm0, %xmm1
7577
jne .L100
78+
jp .L100
79+
80+
cmpq $1, %r9
81+
je .L100
7682

7783
/* Alpha == ZERO */
7884
cmpq $SIZE, INCX

0 commit comments

Comments
 (0)