Skip to content

Commit 6b83079

Browse files
authored
Count cpu cores on ARMV8 and use that to pick the GEMM_PQ parameters (#2267)
There is currently no simple way to query cache sizes on ARMV8, so this takes the number of cores as a trivial indication if the target is a server-class device with a big cache, or just a single-board toy or smartphone.
1 parent 673e5a0 commit 6b83079

File tree

2 files changed

+51
-10
lines changed

2 files changed

+51
-10
lines changed

cpuid_arm64.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,33 @@ void get_subdirname(void)
206206
printf("arm64");
207207
}
208208

209+
void get_cpucount(void)
210+
{
211+
int n=0;
212+
213+
#ifdef linux
214+
FILE *infile;
215+
char buffer[2048], *p,*t;
216+
p = (char *) NULL ;
217+
218+
infile = fopen("/proc/cpuinfo", "r");
219+
220+
while (fgets(buffer, sizeof(buffer), infile))
221+
{
222+
223+
if (!strncmp("processor", buffer, 9))
224+
n++;
225+
}
226+
227+
fclose(infile);
228+
229+
printf("#define NUM_CORES %d\n",n);
230+
#endif
231+
232+
}
233+
234+
235+
209236
void get_cpuconfig(void)
210237
{
211238

@@ -309,6 +336,7 @@ void get_cpuconfig(void)
309336
printf("#define DTB_SIZE 4096 \n");
310337
break;
311338
}
339+
get_cpucount();
312340
}
313341

314342

@@ -351,5 +379,3 @@ void get_features(void)
351379
#endif
352380
return;
353381
}
354-
355-

param.h

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,15 +2636,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26362636
#define ZGEMM_DEFAULT_UNROLL_M 4
26372637
#define ZGEMM_DEFAULT_UNROLL_N 4
26382638

2639-
#define SGEMM_DEFAULT_P 512
2640-
#define DGEMM_DEFAULT_P 256
2641-
#define CGEMM_DEFAULT_P 256
2642-
#define ZGEMM_DEFAULT_P 128
2639+
/*FIXME: this should be using the cache size, but there is currently no easy way to
2640+
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
2641+
is a big desktop or server with abundant cache rather than a phone or embedded device */
2642+
#if NUM_CORES > 8
2643+
#define SGEMM_DEFAULT_P 512
2644+
#define DGEMM_DEFAULT_P 256
2645+
#define CGEMM_DEFAULT_P 256
2646+
#define ZGEMM_DEFAULT_P 128
2647+
2648+
#define SGEMM_DEFAULT_Q 1024
2649+
#define DGEMM_DEFAULT_Q 512
2650+
#define CGEMM_DEFAULT_Q 512
2651+
#define ZGEMM_DEFAULT_Q 512
2652+
#else
2653+
#define SGEMM_DEFAULT_P 128
2654+
#define DGEMM_DEFAULT_P 160
2655+
#define CGEMM_DEFAULT_P 128
2656+
#define ZGEMM_DEFAULT_P 128
26432657

2644-
#define SGEMM_DEFAULT_Q 1024
2645-
#define DGEMM_DEFAULT_Q 512
2646-
#define CGEMM_DEFAULT_Q 512
2647-
#define ZGEMM_DEFAULT_Q 512
2658+
#define SGEMM_DEFAULT_Q 352
2659+
#define DGEMM_DEFAULT_Q 128
2660+
#define CGEMM_DEFAULT_Q 224
2661+
#define ZGEMM_DEFAULT_Q 112
2662+
#endif
26482663

26492664
#define SGEMM_DEFAULT_R 4096
26502665
#define DGEMM_DEFAULT_R 4096

0 commit comments

Comments
 (0)