Skip to content

Commit 8e289ec

Browse files
shubham.chaudharishubham.chaudhari
authored andcommitted
Simplified thread throttling function in gemv
1 parent 189dbbc commit 8e289ec

File tree

1 file changed

+18
-33
lines changed

1 file changed

+18
-33
lines changed

interface/gemv.c

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
7070

7171
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
7272
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
73-
return
74-
MN < 25600L ? 1
75-
: MN < 63001L ? MIN(ncpu, 4)
76-
: MN < 459684L ? MIN(ncpu, 16)
77-
: ncpu;
73+
#ifdef DOUBLE
74+
return (MN < 8100L) ? 1
75+
: (MN < 12100L) ? MIN(ncpu, 2)
76+
: (MN < 36100L) ? MIN(ncpu, 4)
77+
: (MN < 84100L) ? MIN(ncpu, 8)
78+
: (MN < 348100L) ? MIN(ncpu, 16)
79+
: (MN < 435600L) ? MIN(ncpu, 24)
80+
: (MN < 810000L) ? MIN(ncpu, 32)
81+
: (MN < 1050625L) ? MIN(ncpu, 40)
82+
: ncpu;
83+
#else
84+
return (MN < 25600L) ? 1
85+
: (MN < 63001L) ? MIN(ncpu, 4)
86+
: (MN < 459684L) ? MIN(ncpu, 16)
87+
: ncpu;
88+
#endif
7889
}
7990
#endif
8091

@@ -89,50 +100,24 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
89100
}
90101
#endif
91102

92-
//thread throttling for dgemv
93-
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
94-
static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
95-
96-
return
97-
MN < 8100L ? 1
98-
: MN < 12100L ? MIN(ncpu, 2)
99-
: MN < 36100L ? MIN(ncpu, 4)
100-
: MN < 84100L ? MIN(ncpu, 8)
101-
: MN < 348100L ? MIN(ncpu, 16)
102-
: MN < 435600L ? MIN(ncpu, 24)
103-
: MN < 810000L ? MIN(ncpu, 32)
104-
: MN < 1050625 ? MIN(ncpu, 40)
105-
: ncpu;
106-
107-
}
108-
#endif
109-
110103
static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
111104
int ncpu = num_cpu_avail(3);
112105
#if defined(_WIN64) && defined(_M_ARM64)
113106
if (MN > 100000000L)
114107
return num_cpu_avail(4);
115108
return 1;
116109
#endif
117-
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
110+
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
118111
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
119-
#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16)
120-
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu);
121112
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
122113
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
123-
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
114+
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16)
124115
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
125116
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
126117
}
127118
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
128119
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
129120
}
130-
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16)
131-
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
132-
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu);
133-
}
134-
135-
136121
#endif
137122

138123
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )

0 commit comments

Comments
 (0)