Skip to content

Commit edac80d

Browse files
committed
some cleanup, dynamically scale threads, add missing WIN_CASE defn
1 parent 4ebf814 commit edac80d

File tree

1 file changed

+51
-19
lines changed

1 file changed

+51
-19
lines changed

driver/others/blas_server_win32.c

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,6 @@
5151
/* This is a thread implementation for Win32 lazy implementation */
5252

5353
/* Thread server common information */
54-
//typedef struct{
55-
// CRITICAL_SECTION lock;
56-
// HANDLE filled;
57-
// HANDLE killed;
58-
//
59-
// blas_queue_t *queue; /* Parameter Pointer */
60-
// int shutdown; /* server shutdown flag */
61-
//
62-
//} blas_pool_t;
6354

6455
static blas_queue_t *work_queue = NULL;
6556
static HANDLE kickoff_event = NULL;
@@ -71,11 +62,19 @@ int blas_server_avail = 0;
7162
/* Local Variables */
7263
static BLASULONG server_lock = 0;
7364

74-
//static blas_pool_t pool;
7565
static HANDLE blas_threads [MAX_CPU_NUMBER];
7666
static DWORD blas_threads_id[MAX_CPU_NUMBER];
67+
static volatile int thread_target; // target num of live threads, volatile for cross-thread reads
7768

78-
69+
#if defined (__GNUC__) && (__GNUC__ < 6)
70+
#define WIN_CAS(dest, exch, comp) __sync_val_compare_and_swap(dest, comp, exch)
71+
#else
72+
#if defined(_WIN64)
73+
#define WIN_CAS(dest, exch, comp) InterlockedCompareExchange64(dest, exch, comp)
74+
#else
75+
#define WIN_CAS(dest, exch, comp) InterlockedCompareExchange(dest, exch, comp)
76+
#endif
77+
#endif
7978

8079
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
8180

@@ -206,14 +205,10 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
206205
static DWORD WINAPI blas_thread_server(void *arg){
207206

208207
/* Thread identifier */
209-
#ifdef SMP_DEBUG
210208
BLASLONG cpu = (BLASLONG)arg;
211-
#endif
212209

213210
void *buffer, *sa, *sb;
214211
blas_queue_t *queue;
215-
DWORD action;
216-
//HANDLE handles[] = {pool.filled, pool.killed};
217212

218213
/* Each server needs each buffer */
219214
buffer = blas_memory_alloc(2);
@@ -232,6 +227,12 @@ static DWORD WINAPI blas_thread_server(void *arg){
232227
// event raised when work is added to the queue
233228
WaitForSingleObject(kickoff_event, INFINITE);
234229

230+
if (cpu > thread_target - 2)
231+
{
232+
//printf("thread [%d] exiting.\n", cpu);
233+
break; // excess thread, so worker thread exits
234+
}
235+
235236
#ifdef SMP_DEBUG
236237
fprintf(STDERR, "Server[%2ld] Got it.\n", cpu);
237238
#endif
@@ -245,17 +246,17 @@ static DWORD WINAPI blas_thread_server(void *arg){
245246

246247
LeaveCriticalSection(&queue_lock);
247248
#else
248-
volatile work_queue_t* queue_next;
249+
volatile blas_queue_t* queue_next;
249250

250251
INT_PTR prev_value;
251252
do {
252-
queue = (volatile work_queue_t*)work_queue;
253+
queue = (volatile blas_queue_t*)work_queue;
253254
if (!queue)
254255
break;
255256

256-
queue_next = (volatile work_queue_t*)queue->next;
257+
queue_next = (volatile blas_queue_t*)queue->next;
257258
prev_value = WIN_CAS((INT_PTR*)&work_queue, (INT_PTR)queue_next, (INT_PTR)queue);
258-
} while (prev_value != work_item);
259+
} while (prev_value != queue);
259260
#endif
260261

261262
if (queue) {
@@ -377,9 +378,13 @@ int blas_thread_init(void){
377378
// create the kickoff Event
378379
kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL);
379380

381+
thread_target = blas_cpu_number;
382+
380383
InitializeCriticalSection(&queue_lock);
381384

382385
for(i = 0; i < blas_cpu_number - 1; i++){
386+
//printf("thread_init: creating thread [%d]\n", i);
387+
383388
blas_threads[i] = CreateThread(NULL, 0,
384389
blas_thread_server, (void *)i,
385390
0, &blas_threads_id[i]);
@@ -564,10 +569,36 @@ void goto_set_num_threads(int num_threads)
564569

565570
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
566571

572+
if (blas_server_avail && num_threads < blas_num_threads) {
573+
LOCK_COMMAND(&server_lock);
574+
575+
thread_target = num_threads;
576+
577+
SetEvent(kickoff_event);
578+
579+
for (i = num_threads - 1; i < blas_num_threads - 1; i++) {
580+
//printf("set_num_threads: waiting on thread [%d] to quit.\n", i);
581+
582+
WaitForSingleObject(blas_threads[i], INFINITE);
583+
584+
//printf("set_num_threads: thread [%d] has quit.\n", i);
585+
586+
CloseHandle(blas_threads[i]);
587+
}
588+
589+
blas_num_threads = num_threads;
590+
591+
ResetEvent(kickoff_event);
592+
593+
UNLOCK_COMMAND(&server_lock);
594+
}
595+
567596
if (num_threads > blas_num_threads) {
568597

569598
LOCK_COMMAND(&server_lock);
570599

600+
thread_target = num_threads;
601+
571602
//increased_threads = 1;
572603
if (!blas_server_avail){
573604
// create the kickoff Event
@@ -579,6 +610,7 @@ void goto_set_num_threads(int num_threads)
579610
}
580611

581612
for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++){
613+
//printf("set_num_threads: creating thread [%d]\n", i);
582614

583615
blas_threads[i] = CreateThread(NULL, 0,
584616
blas_thread_server, (void *)i,

0 commit comments

Comments
 (0)