@@ -408,7 +408,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
408
408
409
409
/* Make sure if no one is using another buffer */
410
410
for (i = 0 ; i < args -> nthreads ; i ++ )
411
- while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;};
411
+ while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;MB ; };
412
412
413
413
STOP_RPCC (waiting1 );
414
414
@@ -441,7 +441,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
441
441
442
442
for (i = 0 ; i < args -> nthreads ; i ++ )
443
443
job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ] = (BLASLONG )buffer [bufferside ];
444
- }
444
+ WMB ;
445
+ }
445
446
446
447
current = mypos ;
447
448
@@ -458,7 +459,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
458
459
START_RPCC ();
459
460
460
461
/* thread has to wait */
461
- while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;};
462
+ while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;MB ; };
462
463
463
464
STOP_RPCC (waiting2 );
464
465
@@ -477,6 +478,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
477
478
478
479
if (m_to - m_from == min_i ) {
479
480
job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] = 0 ;
481
+ WMB ;
480
482
}
481
483
}
482
484
} while (current != mypos );
@@ -517,6 +519,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
517
519
if (is + min_i >= m_to ) {
518
520
/* Thread doesn't need this buffer any more */
519
521
job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] = 0 ;
522
+ WMB ;
520
523
}
521
524
}
522
525
@@ -541,7 +544,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
541
544
542
545
/* Make sure if no one is using another buffer */
543
546
for (i = 0 ; i < args -> nthreads ; i ++ )
544
- while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;};
547
+ while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;MB ; };
545
548
546
549
STOP_RPCC (waiting1 );
547
550
@@ -595,7 +598,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
595
598
START_RPCC ();
596
599
597
600
/* thread has to wait */
598
- while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;};
601
+ while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;MB ; };
599
602
600
603
STOP_RPCC (waiting2 );
601
604
@@ -613,6 +616,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
613
616
614
617
if (m_to - m_from == min_i ) {
615
618
job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] = 0 ;
619
+ WMB ;
616
620
}
617
621
}
618
622
} while (current != mypos );
@@ -677,7 +681,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
677
681
678
682
/* Make sure if no one is using another buffer */
679
683
for (i = 0 ; i < args -> nthreads ; i ++ )
680
- while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;};
684
+ while (job [mypos ].working [i ][CACHE_LINE_SIZE * bufferside ]) {YIELDING ;MB ; };
681
685
682
686
STOP_RPCC (waiting1 );
683
687
@@ -731,7 +735,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
731
735
START_RPCC ();
732
736
733
737
/* thread has to wait */
734
- while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;};
738
+ while (job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] == 0 ) {YIELDING ;MB ; };
735
739
736
740
STOP_RPCC (waiting2 );
737
741
@@ -748,8 +752,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
748
752
}
749
753
750
754
if (m_to - m_from == min_i ) {
751
- job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] = 0 ;
752
- }
755
+ job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] &= 0 ;
756
+ WMB ;
757
+ }
753
758
}
754
759
} while (current != mypos );
755
760
@@ -787,7 +792,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
787
792
#endif
788
793
if (is + min_i >= m_to ) {
789
794
/* Thread doesn't need this buffer any more */
790
- job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] = 0 ;
795
+ job [current ].working [mypos ][CACHE_LINE_SIZE * bufferside ] &= 0 ;
796
+ WMB ;
791
797
}
792
798
}
793
799
@@ -804,7 +810,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
804
810
805
811
for (i = 0 ; i < args -> nthreads ; i ++ ) {
806
812
for (xxx = 0 ; xxx < DIVIDE_RATE ; xxx ++ ) {
807
- while (job [mypos ].working [i ][CACHE_LINE_SIZE * xxx ] ) {YIELDING ;};
813
+ while (job [mypos ].working [i ][CACHE_LINE_SIZE * xxx ] ) {YIELDING ;MB ; };
808
814
}
809
815
}
810
816
@@ -840,6 +846,15 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
840
846
static int gemm_driver (blas_arg_t * args , BLASLONG * range_m , BLASLONG
841
847
* range_n , FLOAT * sa , FLOAT * sb , BLASLONG mypos ){
842
848
849
+ #ifndef USE_OPENMP
850
+ #ifndef OS_WINDOWS
851
+ static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER ;
852
+ #else
853
+ CRITICAL_SECTION level3_lock ;
854
+ InitializeCriticalSection ((PCRITICAL_SECTION )& level3_lock );
855
+ #endif
856
+ #endif
857
+
843
858
blas_arg_t newarg ;
844
859
845
860
blas_queue_t queue [MAX_CPU_NUMBER ];
@@ -869,6 +884,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
869
884
mode = BLAS_SINGLE | BLAS_REAL | BLAS_NODE ;
870
885
#endif
871
886
887
+ #ifndef USE_OPENMP
888
+ #ifndef OS_WINDOWS
889
+ pthread_mutex_lock (& level3_lock );
890
+ #else
891
+ EnterCriticalSection ((PCRITICAL_SECTION )& level3_lock );
892
+ #endif
893
+ #endif
894
+
872
895
newarg .m = args -> m ;
873
896
newarg .n = args -> n ;
874
897
newarg .k = args -> k ;
@@ -973,6 +996,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
973
996
free (job );
974
997
#endif
975
998
999
+ #ifndef USE_OPENMP
1000
+ #ifndef OS_WINDOWS
1001
+ pthread_mutex_unlock (& level3_lock );
1002
+ #else
1003
+ LeaveCriticalSection ((PCRITICAL_SECTION )& level3_lock );
1004
+ #endif
1005
+ #endif
1006
+
976
1007
return 0 ;
977
1008
}
978
1009
0 commit comments