@@ -86,18 +86,26 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
86
86
87
87
#endif
88
88
89
- OPENBLAS_COMPLEX_FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
89
+
90
+ #if defined(SMP )
91
+ extern int blas_level1_thread_with_return_value (int mode , BLASLONG m , BLASLONG n ,
92
+ BLASLONG k , void * alpha , void * a , BLASLONG lda , void * b , BLASLONG ldb ,
93
+ void * c , BLASLONG ldc , int (* function )(), int nthreads );
94
+ #endif
95
+
96
+
97
+
98
+ static void zdot_compute (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y ,OPENBLAS_COMPLEX_FLOAT * result )
90
99
{
91
100
BLASLONG i ;
92
101
BLASLONG ix ,iy ;
93
102
FLOAT dot [4 ] = { 0.0 , 0.0 , 0.0 , 0.0 } ;
94
-
103
+
95
104
if ( n <= 0 )
96
105
{
97
- // CREAL(result) = 0.0 ;
98
- // CIMAG(result) = 0.0 ;
99
- OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (0.0 ,0.0 );
100
- return (result );
106
+ OPENBLAS_COMPLEX_FLOAT res = OPENBLAS_MAKE_COMPLEX_FLOAT (0.0 ,0.0 );
107
+ * result = res ;
108
+ return ;
101
109
102
110
}
103
111
@@ -150,18 +158,68 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
150
158
}
151
159
152
160
#if !defined(CONJ )
153
- OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot [0 ]- dot [1 ],dot [2 ]+ dot [3 ]);
154
- // CREAL(result) = dot[0] - dot[1];
155
- // CIMAG(result) = dot[2] + dot[3];
161
+ OPENBLAS_COMPLEX_FLOAT res = OPENBLAS_MAKE_COMPLEX_FLOAT (dot [0 ]- dot [1 ],dot [2 ]+ dot [3 ]);
156
162
#else
157
- OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot [0 ]+ dot [1 ],dot [2 ]- dot [3 ]);
158
- // CREAL(result) = dot[0] + dot[1];
159
- // CIMAG(result) = dot[2] - dot[3];
163
+ OPENBLAS_COMPLEX_FLOAT res = OPENBLAS_MAKE_COMPLEX_FLOAT (dot [0 ]+ dot [1 ],dot [2 ]- dot [3 ]);
164
+ #endif
165
+ * result = res ;
166
+ return ;
167
+ }
160
168
169
+ #if defined(SMP )
170
+ static int zdot_thread_function (BLASLONG n , BLASLONG dummy0 ,
171
+ BLASLONG dummy1 , FLOAT dummy2 , FLOAT * x , BLASLONG inc_x , FLOAT * y ,
172
+ BLASLONG inc_y , FLOAT * result , BLASLONG dummy3 )
173
+ {
174
+ zdot_compute (n , x , inc_x , y , inc_y , (void * )result );
175
+ return 0 ;
176
+ }
161
177
#endif
162
178
163
- return (result );
179
+ OPENBLAS_COMPLEX_FLOAT CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
180
+ {
181
+ #if defined(SMP )
182
+ int nthreads ;
183
+ FLOAT dummy_alpha ;
184
+ #endif
185
+ OPENBLAS_COMPLEX_FLOAT zdot ;
186
+ CREAL (zdot ) = 0.0 ;
187
+ CIMAG (zdot ) = 0.0 ;
164
188
165
- }
189
+ #if defined(SMP )
190
+ if (inc_x == 0 || inc_y == 0 || n <= 10000 )
191
+ nthreads = 1 ;
192
+ else
193
+ nthreads = num_cpu_avail (1 );
194
+
195
+ if (nthreads == 1 ) {
196
+ zdot_compute (n , x , inc_x , y , inc_y , & zdot );
197
+ } else {
198
+ int mode , i ;
199
+ char result [MAX_CPU_NUMBER * sizeof (double ) * 2 ];
200
+ OPENBLAS_COMPLEX_FLOAT * ptr ;
201
+
202
+ #if !defined(DOUBLE )
203
+ mode = BLAS_SINGLE | BLAS_COMPLEX ;
204
+ #else
205
+ mode = BLAS_DOUBLE | BLAS_COMPLEX ;
206
+ #endif
207
+
208
+ blas_level1_thread_with_return_value (mode , n , 0 , 0 , & dummy_alpha ,
209
+ x , inc_x , y , inc_y , result , 0 ,
210
+ ( void * )zdot_thread_function , nthreads );
166
211
212
+ ptr = (OPENBLAS_COMPLEX_FLOAT * )result ;
213
+ for (i = 0 ; i < nthreads ; i ++ ) {
214
+ CREAL (zdot ) = CREAL (zdot ) + CREAL (* ptr );
215
+ CIMAG (zdot ) = CIMAG (zdot ) + CIMAG (* ptr );
216
+ ptr = (void * )(((char * )ptr ) + sizeof (double ) * 2 );
217
+ }
218
+ }
219
+ #else
220
+ zdot_compute (n , x , inc_x , y , inc_y , & zdot );
221
+ #endif
222
+
223
+ return zdot ;
224
+ }
167
225
0 commit comments