Skip to content

Commit 4d5b13f

Browse files
Add thread throttling profile for SGEMV on NEOVERSEV1
1 parent 76db346 commit 4d5b13f

File tree

2 files changed

+35
-6
lines changed

2 files changed

+35
-6
lines changed

CONTRIBUTORS.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,4 +231,7 @@ In chronological order:
231231
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems
232232

233233
* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
234-
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
234+
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
235+
236+
* Marek Michalowski <https://github.com/michalowski-arm>
237+
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`

interface/gemv.c

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
6363
};
6464
#endif
6565

66+
#ifdef DYNAMIC_ARCH
67+
extern char* gotoblas_corename(void);
68+
#endif
69+
70+
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
71+
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
72+
return
73+
MN < 25600L ? 1
74+
: MN < 63001L ? MIN(ncpu, 4)
75+
: MN < 459684L ? MIN(ncpu, 16)
76+
: ncpu;
77+
}
78+
#endif
79+
80+
static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
81+
int ncpu = num_cpu_avail(3);
82+
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
83+
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
84+
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
85+
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
86+
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
87+
}
88+
#endif
89+
90+
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
91+
return 1;
92+
else
93+
return num_cpu_avail(2);
94+
}
95+
6696
#ifndef CBLAS
6797

6898
void NAME(char *TRANS, blasint *M, blasint *N,
@@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order,
225255
STACK_ALLOC(buffer_size, FLOAT, buffer);
226256

227257
#ifdef SMP
228-
229-
if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD )
230-
nthreads = 1;
231-
else
232-
nthreads = num_cpu_avail(2);
258+
nthreads = get_gemv_optimal_nthreads(1L * m * n);
233259

234260
if (nthreads == 1) {
235261
#endif

0 commit comments

Comments
 (0)