Skip to content

Commit c03a81b

Browse files
authored
Merge pull request #5141 from michalowski-arm/fork-throttle
Add throttling profile for SGEMM and SGEMV on `NEOVERSEV2`
2 parents 643966d + 650a062 commit c03a81b

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

CONTRIBUTORS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,10 @@ In chronological order:
237237
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
238238
* [2025-01-21] Optimize gemv_t_sve_v1x3 kernel
239239

240-
* Marek Michalowski <https://github.com/michalowski-arm>
240+
* Marek Michalowski <marek.michalowski@arm.com>
241241
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
242+
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2`
243+
* [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2`
242244

243245
* Ye Tao <ye.tao@arm.com>
244246
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1

interface/gemm.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
198198
}
199199
#endif
200200

201+
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
202+
static inline int get_gemm_optimal_nthreads_neoversev2(double MNK, int ncpu) {
203+
return
204+
MNK < 125000L ? 1
205+
: MNK < 1092727L ? MIN(ncpu, 6)
206+
: MNK < 2628072L ? MIN(ncpu, 8)
207+
: MNK < 8000000L ? MIN(ncpu, 12)
208+
: MNK < 20346417L ? MIN(ncpu, 16)
209+
: MNK < 57066625L ? MIN(ncpu, 24)
210+
: MNK < 91125000L ? MIN(ncpu, 28)
211+
: MNK < 238328000L ? MIN(ncpu, 40)
212+
: MNK < 454756609L ? MIN(ncpu, 48)
213+
: MNK < 857375000L ? MIN(ncpu, 56)
214+
: MNK < 1073741824L ? MIN(ncpu, 64)
215+
: ncpu;
216+
}
217+
#endif
218+
201219
static inline int get_gemm_optimal_nthreads(double MNK) {
202220
int ncpu = num_cpu_avail(3);
203221
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
204222
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
223+
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
224+
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
205225
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
206226
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
207227
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
208228
}
229+
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
230+
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
231+
}
209232
#endif
210233
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
211234
return 1;

interface/gemv.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,30 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
7777
}
7878
#endif
7979

80+
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
81+
static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
82+
return
83+
MN < 24964L ? 1
84+
: MN < 65536L ? MIN(ncpu, 8)
85+
: MN < 262144L ? MIN(ncpu, 32)
86+
: MN < 1638400L ? MIN(ncpu, 64)
87+
: ncpu;
88+
}
89+
#endif
90+
8091
static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
8192
int ncpu = num_cpu_avail(3);
8293
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
8394
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
95+
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
96+
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
8497
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
8598
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
8699
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
87100
}
101+
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
102+
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
103+
}
88104
#endif
89105

90106
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )

0 commit comments

Comments
 (0)