51
51
/* This is a thread implementation for Win32 lazy implementation */
52
52
53
53
/* Thread server common information */
54
- //typedef struct{
55
- // CRITICAL_SECTION lock;
56
- // HANDLE filled;
57
- // HANDLE killed;
58
- //
59
- // blas_queue_t *queue; /* Parameter Pointer */
60
- // int shutdown; /* server shutdown flag */
61
- //
62
- //} blas_pool_t;
63
54
64
55
static blas_queue_t * work_queue = NULL ;
65
56
static HANDLE kickoff_event = NULL ;
@@ -71,11 +62,19 @@ int blas_server_avail = 0;
71
62
/* Local Variables */
72
63
static BLASULONG server_lock = 0 ;
73
64
74
- //static blas_pool_t pool;
75
65
static HANDLE blas_threads [MAX_CPU_NUMBER ];
76
66
static DWORD blas_threads_id [MAX_CPU_NUMBER ];
67
+ static volatile int thread_target ; // target num of live threads, volatile for cross-thread reads
77
68
78
-
69
+ #if defined (__GNUC__ ) && (__GNUC__ < 6 )
70
+ #define WIN_CAS (dest , exch , comp ) __sync_val_compare_and_swap(dest, comp, exch)
71
+ #else
72
+ #if defined(_WIN64 )
73
+ #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange64(dest, exch, comp)
74
+ #else
75
+ #define WIN_CAS (dest , exch , comp ) InterlockedCompareExchange(dest, exch, comp)
76
+ #endif
77
+ #endif
79
78
80
79
static void legacy_exec (void * func , int mode , blas_arg_t * args , void * sb ){
81
80
@@ -206,14 +205,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
206
205
static DWORD WINAPI blas_thread_server (void * arg ){
207
206
208
207
/* Thread identifier */
209
- #ifdef SMP_DEBUG
210
208
BLASLONG cpu = (BLASLONG )arg ;
211
- #endif
212
209
213
210
void * buffer , * sa , * sb ;
214
211
blas_queue_t * queue ;
215
- DWORD action ;
216
- //HANDLE handles[] = {pool.filled, pool.killed};
217
212
218
213
/* Each server needs each buffer */
219
214
buffer = blas_memory_alloc (2 );
@@ -232,6 +227,12 @@ static DWORD WINAPI blas_thread_server(void *arg){
232
227
// event raised when work is added to the queue
233
228
WaitForSingleObject (kickoff_event , INFINITE );
234
229
230
+ if (cpu > thread_target - 2 )
231
+ {
232
+ //printf("thread [%d] exiting.\n", cpu);
233
+ break ; // excess thread, so worker thread exits
234
+ }
235
+
235
236
#ifdef SMP_DEBUG
236
237
fprintf (STDERR , "Server[%2ld] Got it.\n" , cpu );
237
238
#endif
@@ -245,17 +246,17 @@ static DWORD WINAPI blas_thread_server(void *arg){
245
246
246
247
LeaveCriticalSection (& queue_lock );
247
248
#else
248
- volatile work_queue_t * queue_next ;
249
+ volatile blas_queue_t * queue_next ;
249
250
250
251
INT_PTR prev_value ;
251
252
do {
252
- queue = (volatile work_queue_t * )work_queue ;
253
+ queue = (volatile blas_queue_t * )work_queue ;
253
254
if (!queue )
254
255
break ;
255
256
256
- queue_next = (volatile work_queue_t * )queue -> next ;
257
+ queue_next = (volatile blas_queue_t * )queue -> next ;
257
258
prev_value = WIN_CAS ((INT_PTR * )& work_queue , (INT_PTR )queue_next , (INT_PTR )queue );
258
- } while (prev_value != work_item );
259
+ } while (prev_value != queue );
259
260
#endif
260
261
261
262
if (queue ) {
@@ -377,9 +378,13 @@ int blas_thread_init(void){
377
378
// create the kickoff Event
378
379
kickoff_event = CreateEvent (NULL , TRUE, FALSE, NULL );
379
380
381
+ thread_target = blas_cpu_number ;
382
+
380
383
InitializeCriticalSection (& queue_lock );
381
384
382
385
for (i = 0 ; i < blas_cpu_number - 1 ; i ++ ){
386
+ //printf("thread_init: creating thread [%d]\n", i);
387
+
383
388
blas_threads [i ] = CreateThread (NULL , 0 ,
384
389
blas_thread_server , (void * )i ,
385
390
0 , & blas_threads_id [i ]);
@@ -564,10 +569,36 @@ void goto_set_num_threads(int num_threads)
564
569
565
570
if (num_threads > MAX_CPU_NUMBER ) num_threads = MAX_CPU_NUMBER ;
566
571
572
+ if (blas_server_avail && num_threads < blas_num_threads ) {
573
+ LOCK_COMMAND (& server_lock );
574
+
575
+ thread_target = num_threads ;
576
+
577
+ SetEvent (kickoff_event );
578
+
579
+ for (i = num_threads - 1 ; i < blas_num_threads - 1 ; i ++ ) {
580
+ //printf("set_num_threads: waiting on thread [%d] to quit.\n", i);
581
+
582
+ WaitForSingleObject (blas_threads [i ], INFINITE );
583
+
584
+ //printf("set_num_threads: thread [%d] has quit.\n", i);
585
+
586
+ CloseHandle (blas_threads [i ]);
587
+ }
588
+
589
+ blas_num_threads = num_threads ;
590
+
591
+ ResetEvent (kickoff_event );
592
+
593
+ UNLOCK_COMMAND (& server_lock );
594
+ }
595
+
567
596
if (num_threads > blas_num_threads ) {
568
597
569
598
LOCK_COMMAND (& server_lock );
570
599
600
+ thread_target = num_threads ;
601
+
571
602
//increased_threads = 1;
572
603
if (!blas_server_avail ){
573
604
// create the kickoff Event
@@ -579,6 +610,7 @@ void goto_set_num_threads(int num_threads)
579
610
}
580
611
581
612
for (i = (blas_num_threads > 0 ) ? blas_num_threads - 1 : 0 ; i < num_threads - 1 ; i ++ ){
613
+ //printf("set_num_threads: creating thread [%d]\n", i);
582
614
583
615
blas_threads [i ] = CreateThread (NULL , 0 ,
584
616
blas_thread_server , (void * )i ,
0 commit comments