25
25
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
26
*****************************************************************************/
27
27
28
+ #include <stdlib.h>
28
29
#include <string.h>
29
30
#ifdef __APPLE__
30
31
#include <sys/sysctl.h>
@@ -33,6 +34,20 @@ size_t length=sizeof(value);
33
34
int64_t value64 ;
34
35
size_t length64 = sizeof (value64 );
35
36
#endif
37
+ #if (defined OS_LINUX || defined OS_ANDROID )
38
+ #include <asm/hwcap.h>
39
+ #include <sys/auxv.h>
40
+ #ifndef HWCAP_CPUID
41
+ #define HWCAP_CPUID (1 << 11)
42
+ #endif
43
+ #ifndef HWCAP_SVE
44
+ #define HWCAP_SVE (1 << 22)
45
+ #endif
46
+
47
+ #define get_cpu_ftr (id , var ) ({ \
48
+ __asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
49
+ })
50
+ #endif
36
51
37
52
#define CPU_UNKNOWN 0
38
53
#define CPU_ARMV8 1
@@ -42,11 +57,11 @@ size_t length64=sizeof(value64);
42
57
#define CPU_CORTEXA57 3
43
58
#define CPU_CORTEXA72 4
44
59
#define CPU_CORTEXA73 5
45
- #define CPU_CORTEXA76 23
60
+ #define CPU_CORTEXA76 23
46
61
#define CPU_NEOVERSEN1 11
47
62
#define CPU_NEOVERSEV1 16
48
63
#define CPU_NEOVERSEN2 17
49
- #define CPU_NEOVERSEV2 24
64
+ #define CPU_NEOVERSEV2 24
50
65
#define CPU_CORTEXX1 18
51
66
#define CPU_CORTEXX2 19
52
67
#define CPU_CORTEXA510 20
@@ -93,7 +108,7 @@ static char *cpuname[] = {
93
108
"CORTEXA710" ,
94
109
"FT2000" ,
95
110
"CORTEXA76" ,
96
- "NEOVERSEV2"
111
+ "NEOVERSEV2"
97
112
};
98
113
99
114
static char * cpuname_lower [] = {
@@ -121,9 +136,13 @@ static char *cpuname_lower[] = {
121
136
"cortexa710" ,
122
137
"ft2000" ,
123
138
"cortexa76" ,
124
- "neoversev2"
139
+ "neoversev2"
125
140
};
126
141
142
+ static int cpulowperf = 0 ;
143
+ static int cpumidperf = 0 ;
144
+ static int cpuhiperf = 0 ;
145
+
127
146
int get_feature (char * search )
128
147
{
129
148
@@ -158,33 +177,108 @@ int get_feature(char *search)
158
177
#endif
159
178
return (0 );
160
179
}
161
-
180
+ static int cpusort (const void * model1 , const void * model2 )
181
+ {
182
+ return (* (int * )model2 - * (int * )model1 );
183
+ }
162
184
163
185
int detect (void )
164
186
{
165
187
166
188
#if defined( __linux ) || defined( __NetBSD__ )
167
-
189
+ int n ,i ,ii ;
190
+ int midr_el1 ;
191
+ int implementer ;
192
+ int cpucap [1024 ];
193
+ int cpucores [1024 ];
168
194
FILE * infile ;
169
- char buffer [512 ], * p , * cpu_part = NULL , * cpu_implementer = NULL ;
195
+ char cpupart [6 ],cpuimpl [6 ];
196
+ char * cpu_impl = NULL ,* cpu_pt = NULL ;
197
+ char buffer [2048 ], * p , * cpu_part = NULL , * cpu_implementer = NULL ;
170
198
p = (char * ) NULL ;
171
-
172
- infile = fopen ("/proc/cpuinfo" , "r" );
173
- while (fgets (buffer , sizeof (buffer ), infile )) {
174
- if ((cpu_part != NULL ) && (cpu_implementer != NULL )) {
175
- break ;
199
+ cpulowperf = cpumidperf = cpuhiperf = 0 ;
200
+ for (i = 0 ;i < 1024 ;i ++ )cpucores [i ]= 0 ;
201
+ n = 0 ;
202
+ infile = fopen ("/sys/devices/system/cpu/possible" , "r" );
203
+ if (!infile ) {
204
+ infile = fopen ("/proc/cpuinfo" , "r" );
205
+ while (fgets (buffer , sizeof (buffer ), infile )) {
206
+ if (!strncmp ("processor" , buffer , 9 ))
207
+ n ++ ;
176
208
}
177
-
178
- if ((cpu_part == NULL ) && !strncmp ("CPU part" , buffer , 8 )) {
179
- cpu_part = strchr (buffer , ':' ) + 2 ;
180
- cpu_part = strdup (cpu_part );
181
- } else if ((cpu_implementer == NULL ) && !strncmp ("CPU implementer" , buffer , 15 )) {
182
- cpu_implementer = strchr (buffer , ':' ) + 2 ;
183
- cpu_implementer = strdup (cpu_implementer );
209
+ } else {
210
+ fgets (buffer , sizeof (buffer ), infile );
211
+ sscanf (buffer ,"0-%d" ,& n );
212
+ n ++ ;
213
+ }
214
+ fclose (infile );
215
+
216
+ cpu_implementer = NULL ;
217
+ for (i = 0 ;i < n ;i ++ ){
218
+ sprintf (buffer ,"/sys/devices/system/cpu/cpu%d/regs/identification/midr_el1" ,i );
219
+ infile = fopen (buffer ,"r" );
220
+ if (!infile ) {
221
+ infile = fopen ("/proc/cpuinfo" , "r" );
222
+ for (ii = 0 ;ii < n ;ii ++ ){
223
+ cpu_part = NULL ;cpu_implementer = NULL ;
224
+ while (fgets (buffer , sizeof (buffer ), infile )) {
225
+ if ((cpu_part != NULL ) && (cpu_implementer != NULL )) {
226
+ break ;
227
+ }
228
+
229
+ if ((cpu_part == NULL ) && !strncmp ("CPU part" , buffer , 8 )) {
230
+ cpu_pt = strchr (buffer , ':' ) + 2 ;
231
+ cpu_part = strdup (cpu_pt );
232
+ cpucores [i ]= strtol (cpu_part ,NULL ,0 );
233
+
234
+ } else if ((cpu_implementer == NULL ) && !strncmp ("CPU implementer" , buffer , 15 )) {
235
+ cpu_impl = strchr (buffer , ':' ) + 2 ;
236
+ cpu_implementer = strdup (cpu_impl );
237
+ }
238
+
239
+ }
240
+ if (strstr (cpu_implementer , "0x41" )) {
241
+ if (cpucores [ii ] >= 0xd4b ) cpuhiperf ++ ;
242
+ else
243
+ if (cpucores [ii ] >= 0xd07 ) cpumidperf ++ ;
244
+ else cpulowperf ++ ;
245
+ }
246
+ else cpulowperf ++ ;
247
+ }
248
+ fclose (infile );
249
+ break ;
250
+ } else {
251
+ (void )fgets (buffer , sizeof (buffer ), infile );
252
+ midr_el1 = strtoul (buffer ,NULL ,16 );
253
+ fclose (infile );
254
+ implementer = (midr_el1 >> 24 ) & 0xFF ;
255
+ cpucores [i ] = (midr_el1 >> 4 ) & 0xFFF ;
256
+ sprintf (buffer ,"/sys/devices/system/cpu/cpu%d/cpu_capacity" ,i );
257
+ infile = fopen (buffer ,"r" );
258
+ if (!infile ) {
259
+ if (implementer == 65 ) {
260
+ if (cpucores [i ] >= 0xd4b ) cpuhiperf ++ ;
261
+ else
262
+ if (cpucores [i ] >= 0xd07 ) cpumidperf ++ ;
263
+ else cpulowperf ++ ;
264
+ }
265
+ else cpulowperf ++ ;
266
+ } else {
267
+ (void )fgets (buffer , sizeof (buffer ), infile );
268
+ sscanf (buffer ,"%d" ,& cpucap [i ]);
269
+ if (cpucap [i ] >= 1000 ) cpuhiperf ++ ;
270
+ else
271
+ if (cpucap [i ] >= 500 ) cpumidperf ++ ;
272
+ else cpulowperf ++ ;
273
+ fclose (infile );
274
+ }
184
275
}
276
+ sprintf (cpuimpl ,"0x%2x" ,implementer );
277
+ cpu_implementer = strdup (cpuimpl );
185
278
}
186
-
187
- fclose (infile );
279
+ qsort (cpucores ,1024 ,sizeof (int ),cpusort );
280
+ sprintf (cpupart ,"0x%3x" ,cpucores [0 ]);
281
+ cpu_part = strdup (cpupart );
188
282
if (cpu_part != NULL && cpu_implementer != NULL ) {
189
283
// Arm
190
284
if (strstr (cpu_implementer , "0x41" )) {
@@ -219,7 +313,7 @@ int detect(void)
219
313
else if (strstr (cpu_part , "0xd4f" )) //NVIDIA Grace et al.
220
314
return CPU_NEOVERSEV2 ;
221
315
else if (strstr (cpu_part , "0xd0b" ))
222
- return CPU_CORTEXA76 ;
316
+ return CPU_CORTEXA76 ;
223
317
}
224
318
// Qualcomm
225
319
else if (strstr (cpu_implementer , "0x51" ) && strstr (cpu_part , "0xc00" ))
@@ -277,11 +371,20 @@ int detect(void)
277
371
}
278
372
#else
279
373
#ifdef __APPLE__
374
+ sysctlbyname ("hw.ncpu" ,& value64 ,& length64 ,NULL ,0 );
375
+ cpulowperf = value64 ;
376
+ sysctlbyname ("hw.nperflevels" ,& value64 ,& length64 ,NULL ,0 );
377
+ if (value64 > 1 ) {
378
+ sysctlbyname ("hw.perflevel0.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
379
+ cpuhiperf = value64 ;
380
+ sysctlbyname ("hw.perflevel1.cpusperl" ,& value64 ,& length64 ,NULL ,0 );
381
+ cpulowperf = value64 ;
382
+ }
280
383
sysctlbyname ("hw.cpufamily" ,& value64 ,& length64 ,NULL ,0 );
281
384
if (value64 == 131287967 || value64 == 458787763 ) return CPU_VORTEX ; //A12/M1
282
385
if (value64 == 3660830781 ) return CPU_VORTEX ; //A15/M2
283
- if (value64 == 2271604202 ) return CPU_VORTEX ; //A16/M3
284
- if (value64 == 1867590060 ) return CPU_VORTEX ; //M4
386
+ if (value64 == 2271604202 ) return CPU_VORTEX ; //A16/M3
387
+ if (value64 == 1867590060 ) return CPU_VORTEX ; //M4
285
388
#endif
286
389
return CPU_ARMV8 ;
287
390
#endif
@@ -331,10 +434,22 @@ int n=0;
331
434
fclose (infile );
332
435
333
436
printf ("#define NUM_CORES %d\n" ,n );
437
+ if (cpulowperf > 0 )
438
+ printf ("#define NUM_CORES_LP %d\n" ,cpulowperf );
439
+ if (cpumidperf > 0 )
440
+ printf ("#define NUM_CORES_MP %d\n" ,cpumidperf );
441
+ if (cpuhiperf > 0 )
442
+ printf ("#define NUM_CORES_HP %d\n" ,cpuhiperf );
334
443
#endif
335
444
#ifdef __APPLE__
336
445
sysctlbyname ("hw.physicalcpu_max" ,& value ,& length ,NULL ,0 );
337
446
printf ("#define NUM_CORES %d\n" ,value );
447
+ if (cpulowperf > 0 )
448
+ printf ("#define NUM_CORES_LP %d\n" ,cpulowperf );
449
+ if (cpumidperf > 0 )
450
+ printf ("#define NUM_CORES_MP %d\n" ,cpumidperf );
451
+ if (cpuhiperf > 0 )
452
+ printf ("#define NUM_CORES_HP %d\n" ,cpuhiperf );
338
453
#endif
339
454
}
340
455
@@ -347,7 +462,6 @@ void get_cpuconfig(void)
347
462
printf ("#define ARMV8\n" );
348
463
printf ("#define HAVE_NEON\n" ); // This shouldn't be necessary
349
464
printf ("#define HAVE_VFPV4\n" ); // This shouldn't be necessary
350
-
351
465
int d = detect ();
352
466
switch (d )
353
467
{
@@ -402,8 +516,8 @@ void get_cpuconfig(void)
402
516
break ;
403
517
404
518
case CPU_NEOVERSEV1 :
405
- printf ("#define HAVE_SVE 1\n" );
406
- case CPU_CORTEXA76 :
519
+ printf ("#define HAVE_SVE 1\n" );
520
+ case CPU_CORTEXA76 :
407
521
printf ("#define %s\n" , cpuname [d ]);
408
522
printf ("#define L1_CODE_SIZE 65536\n" );
409
523
printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -431,32 +545,32 @@ void get_cpuconfig(void)
431
545
printf ("#define L2_ASSOCIATIVE 8\n" );
432
546
printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
433
547
printf ("#define DTB_SIZE 4096\n" );
434
- printf ("#define HAVE_SVE 1\n" );
548
+ printf ("#define HAVE_SVE 1\n" );
435
549
break ;
436
- case CPU_NEOVERSEV2 :
550
+ case CPU_NEOVERSEV2 :
437
551
printf ("#define ARMV9\n" );
438
- printf ("#define HAVE_SVE 1\n" );
439
- printf ("#define %s\n" , cpuname [d ]);
440
- printf ("#define L1_CODE_SIZE 65536\n" );
441
- printf ("#define L1_CODE_LINESIZE 64\n" );
442
- printf ("#define L1_CODE_ASSOCIATIVE 4\n" );
443
- printf ("#define L1_DATA_SIZE 65536\n" );
444
- printf ("#define L1_DATA_LINESIZE 64\n" );
445
- printf ("#define L1_DATA_ASSOCIATIVE 4\n" );
446
- printf ("#define L2_SIZE 1048576\n" );
447
- printf ("#define L2_LINESIZE 64\n" );
448
- printf ("#define L2_ASSOCIATIVE 8\n" );
449
- // L1 Data TLB = 48 entries
450
- // L2 Data TLB = 2048 entries
451
- printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
452
- printf ("#define DTB_SIZE 4096\n" ); // Set to 4096 for symmetry with other configs.
453
- break ;
552
+ printf ("#define HAVE_SVE 1\n" );
553
+ printf ("#define %s\n" , cpuname [d ]);
554
+ printf ("#define L1_CODE_SIZE 65536\n" );
555
+ printf ("#define L1_CODE_LINESIZE 64\n" );
556
+ printf ("#define L1_CODE_ASSOCIATIVE 4\n" );
557
+ printf ("#define L1_DATA_SIZE 65536\n" );
558
+ printf ("#define L1_DATA_LINESIZE 64\n" );
559
+ printf ("#define L1_DATA_ASSOCIATIVE 4\n" );
560
+ printf ("#define L2_SIZE 1048576\n" );
561
+ printf ("#define L2_LINESIZE 64\n" );
562
+ printf ("#define L2_ASSOCIATIVE 8\n" );
563
+ // L1 Data TLB = 48 entries
564
+ // L2 Data TLB = 2048 entries
565
+ printf ("#define DTB_DEFAULT_ENTRIES 48\n" );
566
+ printf ("#define DTB_SIZE 4096\n" ); // Set to 4096 for symmetry with other configs.
567
+ break ;
454
568
case CPU_CORTEXA510 :
455
569
case CPU_CORTEXA710 :
456
570
case CPU_CORTEXX1 :
457
571
case CPU_CORTEXX2 :
458
572
printf ("#define ARMV9\n" );
459
- printf ("#define HAVE_SVE 1\n" );
573
+ printf ("#define HAVE_SVE 1\n" );
460
574
printf ("#define %s\n" , cpuname [d ]);
461
575
printf ("#define L1_CODE_SIZE 65536\n" );
462
576
printf ("#define L1_CODE_LINESIZE 64\n" );
@@ -559,8 +673,6 @@ void get_cpuconfig(void)
559
673
case CPU_VORTEX :
560
674
printf ("#define VORTEX \n" );
561
675
#ifdef __APPLE__
562
- sysctlbyname ("hw.cpufamily" ,& value64 ,& length64 ,NULL ,0 );
563
- if (value64 == 1867590060 ) printf ("#define HAVE_SME 1\n" );; //M4
564
676
sysctlbyname ("hw.l1icachesize" ,& value64 ,& length64 ,NULL ,0 );
565
677
printf ("#define L1_CODE_SIZE %lld \n" ,value64 );
566
678
sysctlbyname ("hw.cachelinesize" ,& value64 ,& length64 ,NULL ,0 );
@@ -575,7 +687,7 @@ void get_cpuconfig(void)
575
687
break ;
576
688
case CPU_A64FX :
577
689
printf ("#define A64FX\n" );
578
- printf ("#define HAVE_SVE 1\n" );
690
+ printf ("#define HAVE_SVE 1\n" );
579
691
printf ("#define L1_CODE_SIZE 65535\n" );
580
692
printf ("#define L1_DATA_SIZE 65535\n" );
581
693
printf ("#define L1_DATA_LINESIZE 256\n" );
0 commit comments