Skip to content

Commit 7e9a4ba

Browse files
authored
Merge pull request #4741 from shivammonaka/Pthread_Scalability_Improvement
Enhancing Core Utilization in BLAS Calls: A Scalable Architecture
2 parents 0773695 + 9e22d70 commit 7e9a4ba

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

driver/level3/level3_thread.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
570570
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
571571
#else
572572
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
573+
static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER;
574+
volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER;
573575
#endif
574576

575577
blas_arg_t newarg;
@@ -639,6 +641,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
639641
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
640642
#else
641643
pthread_mutex_lock(&level3_lock);
644+
while(CPU_AVAILABLE < nthreads) {
645+
pthread_cond_wait(&level3_wakeup, &level3_lock);
646+
}
647+
CPU_AVAILABLE -= nthreads;
648+
WMB;
649+
pthread_mutex_unlock(&level3_lock);
642650
#endif
643651

644652
#ifdef USE_ALLOC_HEAP
@@ -783,6 +791,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
783791
#elif defined(OS_WINDOWS)
784792
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
785793
#else
794+
pthread_mutex_lock(&level3_lock);
795+
CPU_AVAILABLE += nthreads;
796+
WMB;
797+
pthread_cond_signal(&level3_wakeup);
786798
pthread_mutex_unlock(&level3_lock);
787799
#endif
788800

0 commit comments

Comments
 (0)