Skip to content

Commit 393fb31

Browse files
liu-song-6akpm00
authored andcommitted
watchdog: allow nmi watchdog to use raw perf event
NMI watchdog permanently consumes one hardware counters per CPU on the system. For systems that use many hardware counters, this causes more aggressive time multiplexing of perf events. OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely used. Add kernel cmdline arg nmi_watchdog=rNNN to configure the watchdog to use raw event. For example, on Intel CPUs, we can use "r300" to configure the watchdog to use ref-cycles event. If the raw event does not work, fall back to use "cycles". [akpm@linux-foundation.org: fix kerneldoc] Link: https://lkml.kernel.org/r/20240430060236.1878002-2-song@kernel.org Signed-off-by: Song Liu <song@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 602ba77 commit 393fb31

File tree

4 files changed

+53
-2
lines changed

4 files changed

+53
-2
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3773,10 +3773,12 @@
37733773
Format: [state][,regs][,debounce][,die]
37743774

37753775
nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
3776-
Format: [panic,][nopanic,][num]
3776+
Format: [panic,][nopanic,][rNNN,][num]
37773777
Valid num: 0 or 1
37783778
0 - turn hardlockup detector in nmi_watchdog off
37793779
1 - turn hardlockup detector in nmi_watchdog on
3780+
rNNN - configure the watchdog with raw perf event 0xNNN
3781+
37803782
When panic is specified, panic when an NMI watchdog
37813783
timeout occurs (or 'nopanic' to not panic on an NMI
37823784
watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set)
@@ -7464,4 +7466,3 @@
74647466
memory, and other data can't be written using
74657467
xmon commands.
74667468
off xmon is disabled.
7467-

include/linux/nmi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
105105
extern void hardlockup_detector_perf_stop(void);
106106
extern void hardlockup_detector_perf_restart(void);
107107
extern void hardlockup_detector_perf_cleanup(void);
108+
extern void hardlockup_config_perf_event(const char *str);
108109
#else
109110
static inline void hardlockup_detector_perf_stop(void) { }
110111
static inline void hardlockup_detector_perf_restart(void) { }
111112
static inline void hardlockup_detector_perf_cleanup(void) { }
113+
static inline void hardlockup_config_perf_event(const char *str) { }
112114
#endif
113115

114116
void watchdog_hardlockup_stop(void);

kernel/watchdog.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ static int __init hardlockup_panic_setup(char *str)
8080
watchdog_hardlockup_user_enabled = 0;
8181
else if (!strncmp(str, "1", 1))
8282
watchdog_hardlockup_user_enabled = 1;
83+
else if (!strncmp(str, "r", 1))
84+
hardlockup_config_perf_event(str + 1);
8385
while (*(str++)) {
8486
if (*str == ',') {
8587
str++;

kernel/watchdog_perf.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ static struct perf_event_attr wd_hw_attr = {
9090
.disabled = 1,
9191
};
9292

93+
static struct perf_event_attr fallback_wd_hw_attr = {
94+
.type = PERF_TYPE_HARDWARE,
95+
.config = PERF_COUNT_HW_CPU_CYCLES,
96+
.size = sizeof(struct perf_event_attr),
97+
.pinned = 1,
98+
.disabled = 1,
99+
};
100+
93101
/* Callback function for perf event subsystem */
94102
static void watchdog_overflow_callback(struct perf_event *event,
95103
struct perf_sample_data *data,
@@ -122,6 +130,13 @@ static int hardlockup_detector_event_create(void)
122130
/* Try to register using hardware perf events */
123131
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
124132
watchdog_overflow_callback, NULL);
133+
if (IS_ERR(evt)) {
134+
wd_attr = &fallback_wd_hw_attr;
135+
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
136+
evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
137+
watchdog_overflow_callback, NULL);
138+
}
139+
125140
if (IS_ERR(evt)) {
126141
pr_debug("Perf event create on CPU %d failed with %ld\n", cpu,
127142
PTR_ERR(evt));
@@ -259,3 +274,34 @@ int __init watchdog_hardlockup_probe(void)
259274
}
260275
return ret;
261276
}
277+
278+
/**
279+
* hardlockup_config_perf_event - Overwrite config of wd_hw_attr.
280+
*
281+
* @str: number which identifies the raw perf event to use
282+
*/
283+
void __init hardlockup_config_perf_event(const char *str)
284+
{
285+
u64 config;
286+
char buf[24];
287+
char *comma = strchr(str, ',');
288+
289+
if (!comma) {
290+
if (kstrtoull(str, 16, &config))
291+
return;
292+
} else {
293+
unsigned int len = comma - str;
294+
295+
if (len >= sizeof(buf))
296+
return;
297+
298+
if (strscpy(buf, str, sizeof(buf)) < 0)
299+
return;
300+
buf[len] = 0;
301+
if (kstrtoull(buf, 16, &config))
302+
return;
303+
}
304+
305+
wd_hw_attr.type = PERF_TYPE_RAW;
306+
wd_hw_attr.config = config;
307+
}

0 commit comments

Comments
 (0)