@@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
198
198
}
199
199
#endif
200
200
201
+ #if defined(DYNAMIC_ARCH ) || defined(NEOVERSEV2 )
202
+ static inline int get_gemm_optimal_nthreads_neoversev2 (double MNK , int ncpu ) {
203
+ return
204
+ MNK < 125000L ? 1
205
+ : MNK < 1092727L ? MIN (ncpu , 6 )
206
+ : MNK < 2628072L ? MIN (ncpu , 8 )
207
+ : MNK < 8000000L ? MIN (ncpu , 12 )
208
+ : MNK < 20346417L ? MIN (ncpu , 16 )
209
+ : MNK < 57066625L ? MIN (ncpu , 24 )
210
+ : MNK < 91125000L ? MIN (ncpu , 28 )
211
+ : MNK < 238328000L ? MIN (ncpu , 40 )
212
+ : MNK < 454756609L ? MIN (ncpu , 48 )
213
+ : MNK < 857375000L ? MIN (ncpu , 56 )
214
+ : MNK < 1073741824L ? MIN (ncpu , 64 )
215
+ : ncpu ;
216
+ }
217
+ #endif
218
+
201
219
static inline int get_gemm_optimal_nthreads (double MNK ) {
202
220
int ncpu = num_cpu_avail (3 );
203
221
#if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
204
222
return get_gemm_optimal_nthreads_neoversev1 (MNK , ncpu );
223
+ #elif defined(NEOVERSEV2 ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
224
+ return get_gemm_optimal_nthreads_neoversev2 (MNK , ncpu );
205
225
#elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(DOUBLE ) && !defined(BFLOAT16 )
206
226
if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
207
227
return get_gemm_optimal_nthreads_neoversev1 (MNK , ncpu );
208
228
}
229
+ if (strcmp (gotoblas_corename (), "neoversev2" ) == 0 ) {
230
+ return get_gemm_optimal_nthreads_neoversev2 (MNK , ncpu );
231
+ }
209
232
#endif
210
233
if ( MNK <= (SMP_THRESHOLD_MIN * (double ) GEMM_MULTITHREAD_THRESHOLD ) ) {
211
234
return 1 ;
0 commit comments