50
50
51
51
/* This is a thread implementation for Win32 lazy implementation */
52
52
53
- #if defined (__GNUC__ ) && (__GNUC__ < 6 )
54
- #define WIN_CAS (dest , exch , comp ) __sync_val_compare_and_swap(dest, comp, exch)
55
- #else
56
- #if defined(_WIN64 )
57
- #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange64(dest, exch, comp)
58
- #else
59
- #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange(dest, exch, comp)
60
- #endif
61
- #endif
62
-
63
53
/* Thread server common information */
64
54
typedef struct {
65
- HANDLE taskSemaphore ;
55
+ CRITICAL_SECTION lock ;
56
+ HANDLE filled ;
57
+ HANDLE killed ;
66
58
67
59
blas_queue_t * queue ; /* Parameter Pointer */
68
60
int shutdown ; /* server shutdown flag */
@@ -79,6 +71,8 @@ static blas_pool_t pool;
79
71
static HANDLE blas_threads [MAX_CPU_NUMBER ];
80
72
static DWORD blas_threads_id [MAX_CPU_NUMBER ];
81
73
74
+
75
+
82
76
static void legacy_exec (void * func , int mode , blas_arg_t * args , void * sb ){
83
77
84
78
if (!(mode & BLAS_COMPLEX )){
@@ -204,6 +198,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
204
198
205
199
/* This is a main routine of threads. Each thread waits until job is */
206
200
/* queued. */
201
+
207
202
static DWORD WINAPI blas_thread_server (void * arg ){
208
203
209
204
/* Thread identifier */
@@ -212,7 +207,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
212
207
#endif
213
208
214
209
void * buffer , * sa , * sb ;
215
- volatile blas_queue_t * queue ;
210
+ blas_queue_t * queue ;
211
+ DWORD action ;
212
+ HANDLE handles [] = {pool .filled , pool .killed };
216
213
217
214
/* Each server needs each buffer */
218
215
buffer = blas_memory_alloc (2 );
@@ -229,32 +226,28 @@ static DWORD WINAPI blas_thread_server(void *arg){
229
226
fprintf (STDERR , "Server[%2ld] Waiting for Queue.\n" , cpu );
230
227
#endif
231
228
232
- // all worker threads wait on the semaphore
233
- WaitForSingleObject (pool .taskSemaphore , INFINITE );
229
+ do {
230
+ action = WaitForMultipleObjects (2 , handles , FALSE, INFINITE );
231
+ } while ((action != WAIT_OBJECT_0 ) && (action != WAIT_OBJECT_0 + 1 ));
232
+
233
+ if (action == WAIT_OBJECT_0 + 1 ) break ;
234
234
235
- // kill the thread if we are shutting down the server
236
- if (pool .shutdown )
237
- break ;
238
-
239
235
#ifdef SMP_DEBUG
240
236
fprintf (STDERR , "Server[%2ld] Got it.\n" , cpu );
241
237
#endif
242
238
243
- // grab a queued task and update the list
244
- volatile blas_queue_t * queue_next ;
245
- INT_PTR prev_value ;
246
- do {
247
- queue = (volatile blas_queue_t * )pool .queue ;
248
- if (!queue )
249
- break ;
239
+ EnterCriticalSection (& pool .lock );
240
+
241
+ queue = pool .queue ;
242
+ if (queue ) pool .queue = queue -> next ;
250
243
251
- queue_next = (volatile blas_queue_t * )queue -> next ;
252
- prev_value = WIN_CAS ((INT_PTR * )& pool .queue , (INT_PTR )queue_next , (INT_PTR )queue );
253
- } while (prev_value != queue );
244
+ LeaveCriticalSection (& pool .lock );
254
245
255
246
if (queue ) {
256
247
int (* routine )(blas_arg_t * , void * , void * , void * , void * , BLASLONG ) = queue -> routine ;
257
248
249
+ if (pool .queue ) SetEvent (pool .filled );
250
+
258
251
sa = queue -> sa ;
259
252
sb = queue -> sb ;
260
253
@@ -339,8 +332,13 @@ static DWORD WINAPI blas_thread_server(void *arg){
339
332
fprintf (STDERR , "Server[%2ld] Finished!\n" , cpu );
340
333
#endif
341
334
342
- // mark our sub-task as complete
343
- InterlockedDecrement (& queue -> status );
335
+ EnterCriticalSection (& queue -> lock );
336
+
337
+ queue -> status = BLAS_STATUS_FINISHED ;
338
+
339
+ LeaveCriticalSection (& queue -> lock );
340
+
341
+ SetEvent (queue -> finish );
344
342
}
345
343
346
344
/* Shutdown procedure */
@@ -355,7 +353,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
355
353
}
356
354
357
355
/* Initializing routine */
358
- int blas_thread_init (void ){
356
+ int blas_thread_init (void ){
359
357
BLASLONG i ;
360
358
361
359
if (blas_server_avail || (blas_cpu_number <= 1 )) return 0 ;
@@ -369,7 +367,9 @@ static DWORD WINAPI blas_thread_server(void *arg){
369
367
370
368
if (!blas_server_avail ){
371
369
372
- pool .taskSemaphore = CreateSemaphore (NULL , 0 , blas_cpu_number - 1 , NULL );
370
+ InitializeCriticalSection (& pool .lock );
371
+ pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
372
+ pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
373
373
374
374
pool .shutdown = 0 ;
375
375
pool .queue = NULL ;
@@ -391,10 +391,11 @@ static DWORD WINAPI blas_thread_server(void *arg){
391
391
/*
392
392
User can call one of two routines.
393
393
394
- exec_blas_async ... immediately returns after jobs are queued.
394
+ exec_blas_async ... immediately returns after jobs are queued.
395
395
396
- exec_blas ... returns after jobs are finished.
396
+ exec_blas ... returns after jobs are finished.
397
397
*/
398
+
398
399
int exec_blas_async (BLASLONG pos , blas_queue_t * queue ){
399
400
400
401
#if defined(SMP_SERVER )
@@ -408,7 +409,8 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
408
409
current = queue ;
409
410
410
411
while (current ) {
411
- current -> status = 1 ;
412
+ InitializeCriticalSection (& current -> lock );
413
+ current -> finish = CreateEvent (NULL , FALSE, FALSE, NULL );
412
414
current -> position = pos ;
413
415
414
416
#ifdef CONSISTENT_FPCSR
@@ -420,10 +422,19 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
420
422
pos ++ ;
421
423
}
422
424
423
- pool .queue = queue ;
425
+ EnterCriticalSection (& pool .lock );
426
+
427
+ if (pool .queue ) {
428
+ current = pool .queue ;
429
+ while (current -> next ) current = current -> next ;
430
+ current -> next = queue ;
431
+ } else {
432
+ pool .queue = queue ;
433
+ }
434
+
435
+ LeaveCriticalSection (& pool .lock );
424
436
425
- // start up worker threads
426
- ReleaseSemaphore (pool .taskSemaphore , pos - 1 , NULL );
437
+ SetEvent (pool .filled );
427
438
428
439
return 0 ;
429
440
}
@@ -439,9 +450,10 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
439
450
fprintf (STDERR , "Waiting Queue ..\n" );
440
451
#endif
441
452
442
- // spin-wait on each sub-task to finish
443
- while (* ((volatile int * )& queue -> status ))
444
- YIELDING ;
453
+ WaitForSingleObject (queue -> finish , INFINITE );
454
+
455
+ CloseHandle (queue -> finish );
456
+ DeleteCriticalSection (& queue -> lock );
445
457
446
458
queue = queue -> next ;
447
459
num -- ;
@@ -489,21 +501,18 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
489
501
490
502
/* Shutdown procedure, but user don't have to call this routine. The */
491
503
/* kernel automatically kill threads. */
504
+
492
505
int BLASFUNC (blas_thread_shutdown )(void ){
493
506
494
507
int i ;
495
508
496
- #ifdef SMP_DEBUG
497
- fprintf (STDERR , "blas_thread_shutdown..\n" );
498
- #endif
499
-
500
509
if (!blas_server_avail ) return 0 ;
501
510
502
511
LOCK_COMMAND (& server_lock );
503
512
504
513
if (blas_server_avail ){
505
514
506
- pool . shutdown = 1 ;
515
+ SetEvent ( pool . killed ) ;
507
516
508
517
for (i = 0 ; i < blas_num_threads - 1 ; i ++ ){
509
518
// Could also just use WaitForMultipleObjects
@@ -519,7 +528,8 @@ int BLASFUNC(blas_thread_shutdown)(void){
519
528
CloseHandle (blas_threads [i ]);
520
529
}
521
530
522
- CloseHandle (pool .taskSemaphore );
531
+ CloseHandle (pool .filled );
532
+ CloseHandle (pool .killed );
523
533
524
534
blas_server_avail = 0 ;
525
535
}
@@ -549,14 +559,16 @@ void goto_set_num_threads(int num_threads)
549
559
//increased_threads = 1;
550
560
if (!blas_server_avail ){
551
561
552
- pool .taskSemaphore = CreateSemaphore (NULL , 0 , blas_cpu_number - 1 , NULL );
562
+ InitializeCriticalSection (& pool .lock );
563
+ pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
564
+ pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
553
565
554
566
pool .shutdown = 0 ;
555
567
pool .queue = NULL ;
556
568
blas_server_avail = 1 ;
557
569
}
558
570
559
- for (i = blas_num_threads ; i < num_threads - 1 ; i ++ ){
571
+ for (i = blas_num_threads - 1 ; i < num_threads - 1 ; i ++ ){
560
572
561
573
blas_threads [i ] = CreateThread (NULL , 0 ,
562
574
blas_thread_server , (void * )i ,
0 commit comments