@@ -48,6 +48,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
48
extern int blas_level1_thread_with_return_value (int mode , BLASLONG m , BLASLONG n ,
49
49
BLASLONG k , void * alpha , void * a , BLASLONG lda , void * b , BLASLONG ldb ,
50
50
void * c , BLASLONG ldc , int (* function )(), int nthreads );
51
+
52
+ #ifdef DYNAMIC_ARCH
53
+ extern char * gotoblas_corename (void );
54
+ #endif
55
+
56
+ #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
57
+ static inline int get_dot_optimal_nthreads_neoversev1 (BLASLONG N , int ncpu ) {
58
+ #ifdef DOUBLE
59
+ return (N <= 10000L ) ? 1
60
+ : (N <= 64500L ) ? 1
61
+ : (N <= 100000L ) ? MIN (ncpu , 2 )
62
+ : (N <= 150000L ) ? MIN (ncpu , 4 )
63
+ : (N <= 260000L ) ? MIN (ncpu , 8 )
64
+ : (N <= 360000L ) ? MIN (ncpu , 16 )
65
+ : (N <= 520000L ) ? MIN (ncpu , 24 )
66
+ : (N <= 1010000L ) ? MIN (ncpu , 56 )
67
+ : ncpu ;
68
+ #else
69
+ return (N <= 10000L ) ? 1
70
+ : (N <= 110000L ) ? 1
71
+ : (N <= 200000L ) ? MIN (ncpu , 2 )
72
+ : (N <= 280000L ) ? MIN (ncpu , 4 )
73
+ : (N <= 520000L ) ? MIN (ncpu , 8 )
74
+ : (N <= 830000L ) ? MIN (ncpu , 16 )
75
+ : (N <= 1010000L ) ? MIN (ncpu , 24 )
76
+ : ncpu ;
77
+ #endif
78
+ }
79
+ #endif
80
+
81
+ static inline int get_dot_optimal_nthreads (BLASLONG n ) {
82
+ int ncpu = num_cpu_avail (1 );
83
+
84
+ #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
85
+ return get_dot_optimal_nthreads_neoversev1 (n , ncpu );
86
+ #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
87
+ if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
88
+ return get_dot_optimal_nthreads_neoversev1 (n , ncpu );
89
+ }
90
+ #endif
91
+
92
+ // Default case
93
+ if (n <= 10000L )
94
+ return 1 ;
95
+ else
96
+ return num_cpu_avail (1 );
97
+ }
51
98
#endif
52
99
53
100
static RETURN_TYPE dot_compute (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y )
@@ -85,10 +132,10 @@ RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y
85
132
RETURN_TYPE dot = 0.0 ;
86
133
87
134
#if defined(SMP )
88
- if (inc_x == 0 || inc_y == 0 || n <= 10000 )
135
+ if (inc_x == 0 || inc_y == 0 )
89
136
nthreads = 1 ;
90
137
else
91
- nthreads = num_cpu_avail ( 1 );
138
+ nthreads = get_dot_optimal_nthreads ( n );
92
139
93
140
if (nthreads == 1 ) {
94
141
dot = dot_compute (n , x , inc_x , y , inc_y );
@@ -105,7 +152,7 @@ RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y
105
152
106
153
blas_level1_thread_with_return_value (mode , n , 0 , 0 , & dummy_alpha ,
107
154
x , inc_x , y , inc_y , result , 0 ,
108
- ( void * )dot_thread_function , nthreads );
155
+ (void * )dot_thread_function , nthreads );
109
156
110
157
ptr = (RETURN_TYPE * )result ;
111
158
for (i = 0 ; i < nthreads ; i ++ ) {
0 commit comments