51
51
/* This is a thread implementation for Win32 lazy implementation */
52
52
53
53
/* Thread server common information */
54
- typedef struct {
55
- CRITICAL_SECTION lock ;
56
- HANDLE filled ;
57
- HANDLE killed ;
58
-
59
- blas_queue_t * queue ; /* Parameter Pointer */
60
- int shutdown ; /* server shutdown flag */
61
-
62
- } blas_pool_t ;
54
+ //typedef struct{
55
+ // CRITICAL_SECTION lock;
56
+ // HANDLE filled;
57
+ // HANDLE killed;
58
+ //
59
+ // blas_queue_t *queue; /* Parameter Pointer */
60
+ // int shutdown; /* server shutdown flag */
61
+ //
62
+ //} blas_pool_t;
63
+
64
+ static blas_queue_t * work_queue = NULL ;
65
+ static HANDLE kickoff_event = NULL ;
66
+ static CRITICAL_SECTION queue_lock ;
63
67
64
68
/* We need this global for checking if initialization is finished. */
65
69
int blas_server_avail = 0 ;
66
70
67
71
/* Local Variables */
68
72
static BLASULONG server_lock = 0 ;
69
73
70
- static blas_pool_t pool ;
74
+ // static blas_pool_t pool;
71
75
static HANDLE blas_threads [MAX_CPU_NUMBER ];
72
76
static DWORD blas_threads_id [MAX_CPU_NUMBER ];
73
77
@@ -209,7 +213,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
209
213
void * buffer , * sa , * sb ;
210
214
blas_queue_t * queue ;
211
215
DWORD action ;
212
- HANDLE handles [] = {pool .filled , pool .killed };
216
+ // HANDLE handles[] = {pool.filled, pool.killed};
213
217
214
218
/* Each server needs each buffer */
215
219
buffer = blas_memory_alloc (2 );
@@ -225,29 +229,38 @@ static DWORD WINAPI blas_thread_server(void *arg){
225
229
#ifdef SMP_DEBUG
226
230
fprintf (STDERR , "Server[%2ld] Waiting for Queue.\n" , cpu );
227
231
#endif
228
-
229
- do {
230
- action = WaitForMultipleObjects (2 , handles , FALSE, INFINITE );
231
- } while ((action != WAIT_OBJECT_0 ) && (action != WAIT_OBJECT_0 + 1 ));
232
-
233
- if (action == WAIT_OBJECT_0 + 1 ) break ;
232
+ // event raised when work is added to the queue
233
+ WaitForSingleObject (kickoff_event , INFINITE );
234
234
235
235
#ifdef SMP_DEBUG
236
236
fprintf (STDERR , "Server[%2ld] Got it.\n" , cpu );
237
237
#endif
238
238
239
- EnterCriticalSection (& pool .lock );
239
+ #if 1
240
+ EnterCriticalSection (& queue_lock );
240
241
241
- queue = pool .queue ;
242
- if (queue ) pool .queue = queue -> next ;
242
+ queue = work_queue ;
243
+ if (queue )
244
+ work_queue = work_queue -> next ;
243
245
244
- LeaveCriticalSection (& pool .lock );
246
+ LeaveCriticalSection (& queue_lock );
247
+ #else
248
+ volatile work_queue_t * queue_next ;
249
+
250
+ INT_PTR prev_value ;
251
+ do {
252
+ queue = (volatile work_queue_t * )work_queue ;
253
+ if (!queue )
254
+ break ;
255
+
256
+ queue_next = (volatile work_queue_t * )queue -> next ;
257
+ prev_value = WIN_CAS ((INT_PTR * )& work_queue , (INT_PTR )queue_next , (INT_PTR )queue );
258
+ } while (prev_value != work_item );
259
+ #endif
245
260
246
261
if (queue ) {
247
262
int (* routine )(blas_arg_t * , void * , void * , void * , void * , BLASLONG ) = queue -> routine ;
248
263
249
- if (pool .queue ) SetEvent (pool .filled );
250
-
251
264
sa = queue -> sa ;
252
265
sb = queue -> sb ;
253
266
@@ -331,14 +344,6 @@ static DWORD WINAPI blas_thread_server(void *arg){
331
344
#ifdef SMP_DEBUG
332
345
fprintf (STDERR , "Server[%2ld] Finished!\n" , cpu );
333
346
#endif
334
-
335
- EnterCriticalSection (& queue -> lock );
336
-
337
- queue -> status = BLAS_STATUS_FINISHED ;
338
-
339
- LeaveCriticalSection (& queue -> lock );
340
-
341
- SetEvent (queue -> finish );
342
347
}
343
348
344
349
/* Shutdown procedure */
@@ -366,13 +371,10 @@ int blas_thread_init(void){
366
371
#endif
367
372
368
373
if (!blas_server_avail ){
374
+ // create the kickoff Event
375
+ kickoff_event = CreateEvent (NULL , TRUE, FALSE, NULL );
369
376
370
- InitializeCriticalSection (& pool .lock );
371
- pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
372
- pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
373
-
374
- pool .shutdown = 0 ;
375
- pool .queue = NULL ;
377
+ InitializeCriticalSection (& queue_lock );
376
378
377
379
for (i = 0 ; i < blas_cpu_number - 1 ; i ++ ){
378
380
blas_threads [i ] = CreateThread (NULL , 0 ,
@@ -409,32 +411,39 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
409
411
current = queue ;
410
412
411
413
while (current ) {
412
- InitializeCriticalSection (& current -> lock );
413
- current -> finish = CreateEvent (NULL , FALSE, FALSE, NULL );
414
414
current -> position = pos ;
415
415
416
416
#ifdef CONSISTENT_FPCSR
417
417
__asm__ __volatile__ ("fnstcw %0" : "=m" (current -> x87_mode ));
418
418
__asm__ __volatile__ ("stmxcsr %0" : "=m" (current -> sse_mode ));
419
419
#endif
420
420
421
+ current -> finished = 0 ;
421
422
current = current -> next ;
422
423
pos ++ ;
423
424
}
424
425
425
- EnterCriticalSection (& pool . lock );
426
+ EnterCriticalSection (& queue_lock );
426
427
427
- if (pool .queue ) {
428
- current = pool .queue ;
429
- while (current -> next ) current = current -> next ;
430
- current -> next = queue ;
431
- } else {
432
- pool .queue = queue ;
428
+ if (!work_queue )
429
+ {
430
+ work_queue = queue ;
433
431
}
432
+ else
433
+ {
434
+ blas_queue_t * next_item = work_queue ;
434
435
435
- LeaveCriticalSection (& pool .lock );
436
+ // find the end of the work queue
437
+ while (next_item )
438
+ next_item = next_item -> next ;
436
439
437
- SetEvent (pool .filled );
440
+ // add new work to the end
441
+ next_item = queue ;
442
+ }
443
+
444
+ LeaveCriticalSection (& queue_lock );
445
+
446
+ SetEvent (kickoff_event );
438
447
439
448
return 0 ;
440
449
}
@@ -449,21 +458,26 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
449
458
#ifdef SMP_DEBUG
450
459
fprintf (STDERR , "Waiting Queue ..\n" );
451
460
#endif
461
+ while (!queue -> finished )
462
+ YIELDING ;
452
463
453
- WaitForSingleObject (queue -> finish , INFINITE );
454
-
455
- CloseHandle (queue -> finish );
456
- DeleteCriticalSection (& queue -> lock );
457
-
458
- queue = queue -> next ;
459
- num -- ;
464
+ queue = queue -> next ;
465
+ num -- ;
460
466
}
461
467
462
468
#ifdef SMP_DEBUG
463
469
fprintf (STDERR , "Completely Done.\n\n" );
464
470
#endif
471
+ // if work was added to the queue after this batch we can't sleep the worker threads
472
+ // by resetting the event
473
+ EnterCriticalSection (& queue_lock );
465
474
466
- return 0 ;
475
+ if (work_queue == NULL )
476
+ ResetEvent (kickoff_event );
477
+
478
+ LeaveCriticalSection (& queue_lock );
479
+
480
+ return 0 ;
467
481
}
468
482
469
483
/* Execute Threads */
@@ -512,8 +526,6 @@ int BLASFUNC(blas_thread_shutdown)(void){
512
526
513
527
if (blas_server_avail ){
514
528
515
- SetEvent (pool .killed );
516
-
517
529
for (i = 0 ; i < blas_num_threads - 1 ; i ++ ){
518
530
// Could also just use WaitForMultipleObjects
519
531
DWORD wait_thread_value = WaitForSingleObject (blas_threads [i ], 50 );
@@ -528,9 +540,6 @@ int BLASFUNC(blas_thread_shutdown)(void){
528
540
CloseHandle (blas_threads [i ]);
529
541
}
530
542
531
- CloseHandle (pool .filled );
532
- CloseHandle (pool .killed );
533
-
534
543
blas_server_avail = 0 ;
535
544
}
536
545
@@ -558,13 +567,11 @@ void goto_set_num_threads(int num_threads)
558
567
559
568
//increased_threads = 1;
560
569
if (!blas_server_avail ){
570
+ // create the kickoff Event
571
+ kickoff_event = CreateEvent (NULL , TRUE, FALSE, NULL );
561
572
562
- InitializeCriticalSection (& pool .lock );
563
- pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
564
- pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
573
+ InitializeCriticalSection (& queue_lock );
565
574
566
- pool .shutdown = 0 ;
567
- pool .queue = NULL ;
568
575
blas_server_avail = 1 ;
569
576
}
570
577
0 commit comments