Skip to content

Commit a1cd99e

Browse files
maciejwieczorretmanshuahkh
authored andcommitted
selftests/resctrl: Adjust effective L3 cache size with SNC enabled
Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA nodes. Systems may support splitting into either two, three, four or six nodes. When SNC mode is enabled the effective amount of L3 cache available for allocation is divided by the number of nodes per L3. It's possible to detect which SNC mode is active by comparing the number of CPUs that share a cache with CPU0, with the number of CPUs on node0. Detect SNC mode once and let other tests inherit that information. Update CFLAGS after including lib.mk in the Makefile so that fallthrough macro can be used. To check if SNC detection is reliable one can check the /sys/devices/system/cpu/offline file. If it's empty, it means all cores are operational and the ratio should be calculated correctly. If it has any contents, it means the detected SNC mode can't be trusted and should be disabled. Check if detection was not reliable due to offline cpus. If it was skip running tests since the results couldn't be trusted. Co-developed-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
1 parent 89ae643 commit a1cd99e

File tree

4 files changed

+119
-1
lines changed

4 files changed

+119
-1
lines changed

tools/testing/selftests/resctrl/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ TEST_GEN_PROGS := resctrl_tests
88
LOCAL_HDRS += $(wildcard *.h)
99

1010
include ../lib.mk
11+
CFLAGS += -I$(top_srcdir)/tools/include
1112

1213
$(OUTPUT)/resctrl_tests: $(wildcard *.c)

tools/testing/selftests/resctrl/resctrl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <signal.h>
1212
#include <dirent.h>
1313
#include <stdbool.h>
14+
#include <ctype.h>
1415
#include <sys/stat.h>
1516
#include <sys/ioctl.h>
1617
#include <sys/mount.h>
@@ -21,6 +22,7 @@
2122
#include <sys/eventfd.h>
2223
#include <asm/unistd.h>
2324
#include <linux/perf_event.h>
25+
#include <linux/compiler.h>
2426
#include "../kselftest.h"
2527

2628
#define MB (1024 * 1024)
@@ -156,8 +158,11 @@ struct perf_event_read {
156158
*/
157159
extern volatile int *value_sink;
158160

161+
extern int snc_unreliable;
162+
159163
extern char llc_occup_path[1024];
160164

165+
int snc_nodes_per_l3_cache(void);
161166
int get_vendor(void);
162167
bool check_resctrlfs_support(void);
163168
int filter_dmesg(void);

tools/testing/selftests/resctrl/resctrl_tests.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static bool test_vendor_specific_check(const struct resctrl_test *test)
118118

119119
static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams)
120120
{
121-
int ret;
121+
int ret, snc_mode;
122122

123123
if (test->disabled)
124124
return;
@@ -128,8 +128,15 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
128128
return;
129129
}
130130

131+
snc_mode = snc_nodes_per_l3_cache();
132+
131133
ksft_print_msg("Starting %s test ...\n", test->name);
132134

135+
if (snc_mode == 1 && snc_unreliable && get_vendor() == ARCH_INTEL) {
136+
ksft_test_result_skip("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n");
137+
return;
138+
}
139+
133140
if (test_prepare(test)) {
134141
ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
135142
return;

tools/testing/selftests/resctrl/resctrlfs.c

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313

1414
#include "resctrl.h"
1515

16+
int snc_unreliable;
17+
1618
static int find_resctrl_mount(char *buffer)
1719
{
1820
FILE *mounts;
@@ -156,6 +158,98 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
156158
return 0;
157159
}
158160

161+
/*
162+
* Count number of CPUs in a /sys bitmap
163+
*/
164+
static unsigned int count_sys_bitmap_bits(char *name)
165+
{
166+
FILE *fp = fopen(name, "r");
167+
int count = 0, c;
168+
169+
if (!fp)
170+
return 0;
171+
172+
while ((c = fgetc(fp)) != EOF) {
173+
if (!isxdigit(c))
174+
continue;
175+
switch (c) {
176+
case 'f':
177+
count++;
178+
fallthrough;
179+
case '7': case 'b': case 'd': case 'e':
180+
count++;
181+
fallthrough;
182+
case '3': case '5': case '6': case '9': case 'a': case 'c':
183+
count++;
184+
fallthrough;
185+
case '1': case '2': case '4': case '8':
186+
count++;
187+
break;
188+
}
189+
}
190+
fclose(fp);
191+
192+
return count;
193+
}
194+
195+
static bool cpus_offline_empty(void)
196+
{
197+
char offline_cpus_str[64];
198+
FILE *fp;
199+
200+
fp = fopen("/sys/devices/system/cpu/offline", "r");
201+
if (!fp) {
202+
ksft_perror("Could not open /sys/devices/system/cpu/offline");
203+
return 0;
204+
}
205+
206+
if (fscanf(fp, "%63s", offline_cpus_str) < 0) {
207+
if (!errno) {
208+
fclose(fp);
209+
return 1;
210+
}
211+
ksft_perror("Could not read /sys/devices/system/cpu/offline");
212+
}
213+
214+
fclose(fp);
215+
216+
return 0;
217+
}
218+
219+
/*
220+
* Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
221+
* If any CPUs are offline declare the detection as unreliable.
222+
*/
223+
int snc_nodes_per_l3_cache(void)
224+
{
225+
int node_cpus, cache_cpus;
226+
static int snc_mode;
227+
228+
if (!snc_mode) {
229+
snc_mode = 1;
230+
if (!cpus_offline_empty()) {
231+
ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n");
232+
ksft_print_msg("Setting SNC mode to disabled.\n");
233+
snc_unreliable = 1;
234+
return snc_mode;
235+
}
236+
node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
237+
cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
238+
239+
if (!node_cpus || !cache_cpus) {
240+
ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
241+
snc_unreliable = 1;
242+
return snc_mode;
243+
}
244+
snc_mode = cache_cpus / node_cpus;
245+
246+
if (snc_mode > 1)
247+
ksft_print_msg("SNC-%d mode discovered.\n", snc_mode);
248+
}
249+
250+
return snc_mode;
251+
}
252+
159253
/*
160254
* get_cache_size - Get cache size for a specified CPU
161255
* @cpu_no: CPU number
@@ -211,6 +305,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
211305
break;
212306
}
213307

308+
/*
309+
* The amount of cache represented by each bit in the masks
310+
* in the schemata file is reduced by a factor equal to SNC
311+
* nodes per L3 cache.
312+
* E.g. on a SNC-2 system with a 100MB L3 cache a test that
313+
* allocates memory from its local SNC node (default behavior
314+
* without using libnuma) will only see 50 MB llc_occupancy
315+
* with a fully populated L3 mask in the schemata file.
316+
*/
317+
if (cache_num == 3)
318+
*cache_size /= snc_nodes_per_l3_cache();
214319
return 0;
215320
}
216321

0 commit comments

Comments
 (0)