Skip to content

Commit f3cebb3

Browse files
committed
x86: Fixed numpy CI failure when the target is ZEN.
1 parent e1eef56 commit f3cebb3

File tree

3 files changed

+283
-300
lines changed

3 files changed

+283
-300
lines changed

interface/scal.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
8585
if (nthreads == 1) {
8686
#endif
8787

88-
SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0);
88+
SCAL_K(n, 0, 0, alpha, x, incx, NULL, 0, NULL, 1);
8989

9090
#ifdef SMP
9191
} else {
@@ -102,7 +102,7 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
102102
#else
103103
&alpha,
104104
#endif
105-
x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads);
105+
x, incx, NULL, 0, NULL, 1, (int (*)(void))SCAL_K, nthreads);
106106

107107
}
108108
#endif

kernel/x86_64/dscal.c

Lines changed: 148 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -43,41 +43,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4343
static void dscal_kernel_8( BLASLONG n, FLOAT *da , FLOAT *x )
4444
{
4545

46-
BLASLONG i;
47-
FLOAT alpha = *da;
48-
49-
for( i=0; i<n; i+=8 )
50-
{
51-
x[0] *= alpha;
52-
x[1] *= alpha;
53-
x[2] *= alpha;
54-
x[3] *= alpha;
55-
x[4] *= alpha;
56-
x[5] *= alpha;
57-
x[6] *= alpha;
58-
x[7] *= alpha;
59-
x+=8;
60-
}
46+
BLASLONG i;
47+
FLOAT alpha = *da;
48+
49+
for( i=0; i<n; i+=8 )
50+
{
51+
x[0] *= alpha;
52+
x[1] *= alpha;
53+
x[2] *= alpha;
54+
x[3] *= alpha;
55+
x[4] *= alpha;
56+
x[5] *= alpha;
57+
x[6] *= alpha;
58+
x[7] *= alpha;
59+
x+=8;
60+
}
6161

6262
}
6363

6464

6565
static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha , FLOAT *x )
6666
{
6767

68-
BLASLONG i;
69-
for( i=0; i<n; i+=8 )
70-
{
71-
x[0] = 0.0;
72-
x[1] = 0.0;
73-
x[2] = 0.0;
74-
x[3] = 0.0;
75-
x[4] = 0.0;
76-
x[5] = 0.0;
77-
x[6] = 0.0;
78-
x[7] = 0.0;
79-
x+=8;
80-
}
68+
BLASLONG i;
69+
for( i=0; i<n; i+=8 )
70+
{
71+
x[0] = 0.0;
72+
x[1] = 0.0;
73+
x[2] = 0.0;
74+
x[3] = 0.0;
75+
x[4] = 0.0;
76+
x[5] = 0.0;
77+
x[6] = 0.0;
78+
x[7] = 0.0;
79+
x+=8;
80+
}
8181

8282
}
8383

@@ -89,51 +89,51 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
8989
static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_x)
9090
{
9191

92-
FLOAT *x1=NULL;
93-
BLASLONG inc_x3;
92+
FLOAT *x1=NULL;
93+
BLASLONG inc_x3;
9494

95-
inc_x <<= 3;
96-
inc_x3 = (inc_x << 1) + inc_x;
95+
inc_x <<= 3;
96+
inc_x3 = (inc_x << 1) + inc_x;
9797

9898
__asm__ __volatile__
9999
(
100-
"movddup (%3), %%xmm0 \n\t" // alpha
100+
"movddup (%3), %%xmm0 \n\t" // alpha
101101

102-
"leaq (%1,%4,4), %2 \n\t"
102+
"leaq (%1,%4,4), %2 \n\t"
103103

104-
".p2align 4 \n\t"
104+
".p2align 4 \n\t"
105105

106-
"1: \n\t"
107-
"movsd (%1) , %%xmm4 \n\t"
108-
"movhpd (%1,%4,1), %%xmm4 \n\t"
109-
"movsd (%1,%4,2), %%xmm5 \n\t"
110-
"movhpd (%1,%5,1), %%xmm5 \n\t"
106+
"1: \n\t"
107+
"movsd (%1) , %%xmm4 \n\t"
108+
"movhpd (%1,%4,1), %%xmm4 \n\t"
109+
"movsd (%1,%4,2), %%xmm5 \n\t"
110+
"movhpd (%1,%5,1), %%xmm5 \n\t"
111111

112-
"movsd (%2) , %%xmm6 \n\t"
113-
"movhpd (%2,%4,1), %%xmm6 \n\t"
114-
"movsd (%2,%4,2), %%xmm7 \n\t"
115-
"movhpd (%2,%5,1), %%xmm7 \n\t"
112+
"movsd (%2) , %%xmm6 \n\t"
113+
"movhpd (%2,%4,1), %%xmm6 \n\t"
114+
"movsd (%2,%4,2), %%xmm7 \n\t"
115+
"movhpd (%2,%5,1), %%xmm7 \n\t"
116116

117-
"mulpd %%xmm0, %%xmm4 \n\t"
118-
"mulpd %%xmm0, %%xmm5 \n\t"
119-
"mulpd %%xmm0, %%xmm6 \n\t"
120-
"mulpd %%xmm0, %%xmm7 \n\t"
117+
"mulpd %%xmm0, %%xmm4 \n\t"
118+
"mulpd %%xmm0, %%xmm5 \n\t"
119+
"mulpd %%xmm0, %%xmm6 \n\t"
120+
"mulpd %%xmm0, %%xmm7 \n\t"
121121

122-
"movsd %%xmm4 , (%1) \n\t"
123-
"movhpd %%xmm4 , (%1,%4,1) \n\t"
124-
"movsd %%xmm5 , (%1,%4,2) \n\t"
125-
"movhpd %%xmm5 , (%1,%5,1) \n\t"
122+
"movsd %%xmm4 , (%1) \n\t"
123+
"movhpd %%xmm4 , (%1,%4,1) \n\t"
124+
"movsd %%xmm5 , (%1,%4,2) \n\t"
125+
"movhpd %%xmm5 , (%1,%5,1) \n\t"
126126

127-
"movsd %%xmm6 , (%2) \n\t"
128-
"movhpd %%xmm6 , (%2,%4,1) \n\t"
129-
"movsd %%xmm7 , (%2,%4,2) \n\t"
130-
"movhpd %%xmm7 , (%2,%5,1) \n\t"
127+
"movsd %%xmm6 , (%2) \n\t"
128+
"movhpd %%xmm6 , (%2,%4,1) \n\t"
129+
"movsd %%xmm7 , (%2,%4,2) \n\t"
130+
"movhpd %%xmm7 , (%2,%5,1) \n\t"
131131

132-
"leaq (%1,%4,8), %1 \n\t"
133-
"leaq (%2,%4,8), %2 \n\t"
132+
"leaq (%1,%4,8), %1 \n\t"
133+
"leaq (%2,%4,8), %2 \n\t"
134134

135-
"subq $8, %0 \n\t"
136-
"jnz 1b \n\t"
135+
"subq $8, %0 \n\t"
136+
"jnz 1b \n\t"
137137

138138
:
139139
"+r" (n), // 0
@@ -150,106 +150,96 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
150150
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
151151
"memory"
152152
);
153-
154-
155153
}
156154

157155
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
158156
{
159-
BLASLONG i=0,j=0;
160-
161-
if ( inc_x != 1 )
162-
{
163-
164-
if ( da == 0.0 )
165-
{
166-
167-
BLASLONG n1 = n & -2;
168-
169-
while(j < n1)
170-
{
171-
172-
if (isinf(x[i])||isnan(x[i]))
173-
x[i]=NAN;
174-
else x[i]=0.0;
175-
if (isinf(x[i+inc_x])||isnan(x[i+inc_x]))
176-
x[i+inc_x]=NAN;
177-
else x[i+inc_x]=0.0;
178-
i += 2*inc_x ;
179-
j+=2;
180-
181-
}
182-
183-
while(j < n)
184-
{
185-
186-
if (isinf(x[i])||isnan(x[i]))
187-
x[i]=NAN;
188-
else x[i]=0.0;
189-
i += inc_x ;
190-
j++;
191-
192-
}
193-
}
194-
else
195-
{
196-
197-
BLASLONG n1 = n & -8;
198-
if ( n1 > 0 )
199-
{
200-
dscal_kernel_inc_8(n1, &da, x, inc_x);
201-
i = n1 * inc_x;
202-
j = n1;
203-
}
204-
205-
while(j < n)
206-
{
207-
208-
x[i] *= da;
209-
i += inc_x ;
210-
j++;
211-
212-
}
213-
214-
}
215-
216-
return(0);
217-
}
218-
219-
BLASLONG n1 = n & -8;
220-
if ( n1 > 0 )
221-
{
222-
// if ( da == 0.0 )
223-
// dscal_kernel_8_zero(n1 , &da , x);
224-
// else
225-
dscal_kernel_8(n1 , &da , x);
226-
}
227-
228-
if ( da == 0.0 )
229-
{
230-
for ( i=n1 ; i<n; i++ )
231-
{
232-
if(isinf(x[i])||isnan(x[i]))
233-
x[i]=NAN;
234-
else x[i] = 0.0;
235-
}
236-
}
237-
else if (isinf(da)){
238-
for ( i=n1 ; i<n; i++)
239-
if (x[i]==0.) x[i]=NAN;
240-
else x[i] *=da;
241-
}
242-
else
243-
{
244-
245-
for ( i=n1 ; i<n; i++ )
246-
{
247-
if(isinf(x[i]))
248-
x[i]=NAN;
249-
else x[i] *= da;
250-
}
251-
}
252-
return(0);
157+
BLASLONG i = 0, j = 0;
158+
159+
// Resolved issue 4728 when the caller is dscal
160+
if (dummy2 == 1 && da == 0.0)
161+
{
162+
if ( inc_x != 1 )
163+
{
164+
BLASLONG n1 = n & -8;
165+
if ( n1 > 0 )
166+
{
167+
dscal_kernel_inc_8(n1, &da, x, inc_x);
168+
i = n1 * inc_x;
169+
j = n1;
170+
}
171+
while(j < n)
172+
{
173+
x[i] *= da;
174+
i += inc_x ;
175+
j++;
176+
}
177+
}
178+
else
179+
{
180+
BLASLONG n1 = n & -8;
181+
if ( n1 > 0)
182+
dscal_kernel_8(n1 , &da , x);
183+
for ( i = n1 ; i < n; i++ )
184+
x[i] *= da;
185+
}
186+
}
187+
else
188+
{
189+
if ( inc_x != 1 )
190+
{
191+
if( da == 0.0)
192+
{
193+
BLASLONG n1 = n & -2;
194+
while(j < n1)
195+
{
196+
x[i] = 0.0;
197+
x[i+inc_x] = 0.0;
198+
i += 2 * inc_x ;
199+
j += 2;
200+
}
201+
while(j < n)
202+
{
203+
x[i] = 0.0;
204+
i += inc_x ;
205+
j++;
206+
}
207+
}
208+
else
209+
{
210+
BLASLONG n1 = n & -8;
211+
if ( n1 > 0 )
212+
{
213+
dscal_kernel_inc_8(n1, &da, x, inc_x);
214+
i = n1 * inc_x;
215+
j = n1;
216+
}
217+
while(j < n)
218+
{
219+
x[i] *= da;
220+
i += inc_x ;
221+
j++;
222+
}
223+
}
224+
}
225+
else
226+
{
227+
if ( da == 0.0 )
228+
{
229+
BLASLONG n1 = n & -8;
230+
if ( n1 > 0)
231+
dscal_kernel_8_zero(n1, &da, x);
232+
for ( i = n1 ; i < n; i++ )
233+
x[i] = 0.0;
234+
}
235+
else
236+
{
237+
BLASLONG n1 = n & -8;
238+
if ( n1 > 0)
239+
dscal_kernel_8(n1 , &da , x);
240+
for ( i = n1 ; i < n; i++ )
241+
x[i] *= da;
242+
}
243+
}
244+
}
253245
}
254-
255-

0 commit comments

Comments
 (0)