@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
70
70
71
71
#if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
72
72
static inline int get_gemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
73
- return
74
- MN < 25600L ? 1
75
- : MN < 63001L ? MIN (ncpu , 4 )
76
- : MN < 459684L ? MIN (ncpu , 16 )
77
- : ncpu ;
73
+ #ifdef DOUBLE
74
+ return (MN < 8100L ) ? 1
75
+ : (MN < 12100L ) ? MIN (ncpu , 2 )
76
+ : (MN < 36100L ) ? MIN (ncpu , 4 )
77
+ : (MN < 84100L ) ? MIN (ncpu , 8 )
78
+ : (MN < 348100L ) ? MIN (ncpu , 16 )
79
+ : (MN < 435600L ) ? MIN (ncpu , 24 )
80
+ : (MN < 810000L ) ? MIN (ncpu , 32 )
81
+ : (MN < 1050625L ) ? MIN (ncpu , 40 )
82
+ : ncpu ;
83
+ #else
84
+ return (MN < 25600L ) ? 1
85
+ : (MN < 63001L ) ? MIN (ncpu , 4 )
86
+ : (MN < 459684L ) ? MIN (ncpu , 16 )
87
+ : ncpu ;
88
+ #endif
78
89
}
79
90
#endif
80
91
@@ -89,50 +100,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
89
100
}
90
101
#endif
91
102
92
- //thread throttling for dgemv
93
- #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV1 )
94
- static inline int get_dgemv_optimal_nthreads_neoversev1 (BLASLONG MN , int ncpu ) {
95
-
96
- return
97
- MN < 8100L ? 1
98
- : MN < 12100L ? MIN (ncpu , 2 )
99
- : MN < 36100L ? MIN (ncpu , 4 )
100
- : MN < 84100L ? MIN (ncpu , 8 )
101
- : MN < 348100L ? MIN (ncpu , 16 )
102
- : MN < 435600L ? MIN (ncpu , 24 )
103
- : MN < 810000L ? MIN (ncpu , 32 )
104
- : MN < 1050625 ? MIN (ncpu , 40 )
105
- : ncpu ;
106
-
107
- }
108
- #endif
109
-
110
103
static inline int get_gemv_optimal_nthreads (BLASLONG MN ) {
111
104
int ncpu = num_cpu_avail (3 );
112
105
#if defined(_WIN64 ) && defined(_M_ARM64 )
113
106
if (MN > 100000000L )
114
107
return num_cpu_avail (4 );
115
108
return 1 ;
116
109
#endif
117
- #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined( BFLOAT16 )
110
+ #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
118
111
return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
119
- #elif defined(NEOVERSEV1 ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
120
- return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
121
112
#elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
122
113
return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
123
- #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined( BFLOAT16 )
114
+ #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
124
115
if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
125
116
return get_gemv_optimal_nthreads_neoversev1 (MN , ncpu );
126
117
}
127
118
if (strcmp (gotoblas_corename (), "neoversev2" ) == 0 ) {
128
119
return get_gemv_optimal_nthreads_neoversev2 (MN , ncpu );
129
120
}
130
- #elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && defined(DOUBLE ) && !defined(BFLOAT16 )
131
- if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
132
- return get_dgemv_optimal_nthreads_neoversev1 (MN , ncpu );
133
- }
134
-
135
-
136
121
#endif
137
122
138
123
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
0 commit comments