Skip to content

Commit 4e58aae

Browse files
paulmckrcuneeraju
authored andcommitted
rcu: Restrict access to RCU CPU stall notifiers
Although the RCU CPU stall notifiers can be useful for dumping state when tracking down delicate forward-progress bugs where NUMA effects cause cache lines to be delivered to a given CPU regularly, but always in a state that prevents that CPU from making forward progress. These bugs can be detected by the RCU CPU stall-warning mechanism, but in some cases, the stall-warnings printk()s disrupt the forward-progress bug before any useful state can be obtained. Unfortunately, the notifier mechanism added by commit 5b404fd ("rcu: Add RCU CPU stall notifier") can make matters worse if used at all carelessly. For example, if the stall warning was caused by a lock not being released, then any attempt to acquire that lock in the notifier will hang. This will prevent not only the notifier from producing any useful output, but it will also prevent the stall-warning message from ever appearing. This commit therefore hides this new RCU CPU stall notifier mechanism under a new RCU_CPU_STALL_NOTIFIER Kconfig option that depends on both DEBUG_KERNEL and RCU_EXPERT. In addition, the rcupdate.rcu_cpu_stall_notifiers=1 kernel boot parameter must also be specified. The RCU_CPU_STALL_NOTIFIER Kconfig option's help text contains a warning and explains the dangers of careless use, recommending lockless notifier code. In addition, a WARN() is triggered each time that an attempt is made to register a stall-warning notifier in kernels built with CONFIG_RCU_CPU_STALL_NOTIFIER=y. This combination of measures will keep use of this mechanism confined to debug kernels and away from routine deployments. [ paulmck: Apply Dan Carpenter feedback. ] Fixes: 5b404fd ("rcu: Add RCU CPU stall notifier") Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.iitr10@gmail.com>
1 parent 98b1cc8 commit 4e58aae

File tree

7 files changed

+62
-12
lines changed

7 files changed

+62
-12
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5302,6 +5302,12 @@
53025302
Dump ftrace buffer after reporting RCU CPU
53035303
stall warning.
53045304

5305+
rcupdate.rcu_cpu_stall_notifiers= [KNL]
5306+
Provide RCU CPU stall notifiers, but see the
5307+
warnings in the RCU_CPU_STALL_NOTIFIER Kconfig
5308+
option's help text. TL;DR: You almost certainly
5309+
do not want rcupdate.rcu_cpu_stall_notifiers.
5310+
53055311
rcupdate.rcu_cpu_stall_suppress= [KNL]
53065312
Suppress RCU CPU stall warning messages.
53075313

include/linux/rcu_notifier.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,20 @@
1313
#define RCU_STALL_NOTIFY_NORM 1
1414
#define RCU_STALL_NOTIFY_EXP 2
1515

16-
#ifdef CONFIG_RCU_STALL_COMMON
16+
#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
1717

1818
#include <linux/notifier.h>
1919
#include <linux/types.h>
2020

2121
int rcu_stall_chain_notifier_register(struct notifier_block *n);
2222
int rcu_stall_chain_notifier_unregister(struct notifier_block *n);
2323

24-
#else // #ifdef CONFIG_RCU_STALL_COMMON
24+
#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
2525

2626
// No RCU CPU stall warnings in Tiny RCU.
2727
static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; }
2828
static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; }
2929

30-
#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON
30+
#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
3131

3232
#endif /* __LINUX_RCU_NOTIFIER_H */

kernel/rcu/Kconfig.debug

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,31 @@ config RCU_CPU_STALL_CPUTIME
105105
The boot option rcupdate.rcu_cpu_stall_cputime has the same function
106106
as this one, but will override this if it exists.
107107

108+
config RCU_CPU_STALL_NOTIFIER
109+
bool "Provide RCU CPU-stall notifiers"
110+
depends on RCU_STALL_COMMON
111+
depends on DEBUG_KERNEL
112+
depends on RCU_EXPERT
113+
default n
114+
help
115+
WARNING: You almost certainly do not want this!!!
116+
117+
Enable RCU CPU-stall notifiers, which are invoked just before
118+
printing the RCU CPU stall warning. As such, bugs in notifier
119+
callbacks can prevent stall warnings from being printed.
120+
And the whole reason that a stall warning is being printed is
121+
that something is hung up somewhere. Therefore, the notifier
122+
callbacks must be written extremely carefully, preferably
123+
containing only lockless code. After all, it is quite possible
124+
that the whole reason that the RCU CPU stall is happening in
125+
the first place is that someone forgot to release whatever lock
126+
that you are thinking of acquiring. In which case, having your
127+
notifier callback acquire that lock will hang, preventing the
128+
RCU CPU stall warning from appearing.
129+
130+
Say Y here if you want RCU CPU stall notifiers (you don't want them)
131+
Say N if you are unsure.
132+
108133
config RCU_TRACE
109134
bool "Enable tracing for RCU"
110135
depends on DEBUG_KERNEL

kernel/rcu/rcu.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,8 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
262262
return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended();
263263
}
264264

265+
extern int rcu_cpu_stall_notifiers;
266+
265267
#ifdef CONFIG_RCU_STALL_COMMON
266268

267269
extern int rcu_cpu_stall_ftrace_dump;
@@ -659,10 +661,10 @@ static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; }
659661
bool rcu_cpu_beenfullyonline(int cpu);
660662
#endif
661663

662-
#ifdef CONFIG_RCU_STALL_COMMON
664+
#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
663665
int rcu_stall_notifier_call_chain(unsigned long val, void *v);
664-
#else // #ifdef CONFIG_RCU_STALL_COMMON
666+
#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
665667
static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
666-
#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON
668+
#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
667669

668670
#endif /* __LINUX_RCU_H */

kernel/rcu/rcutorture.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2450,10 +2450,12 @@ static int rcu_torture_stall(void *args)
24502450
unsigned long stop_at;
24512451

24522452
VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
2453-
ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block);
2454-
if (ret)
2455-
pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n",
2456-
__func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : "");
2453+
if (rcu_cpu_stall_notifiers) {
2454+
ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block);
2455+
if (ret)
2456+
pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n",
2457+
__func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : "");
2458+
}
24572459
if (stall_cpu_holdoff > 0) {
24582460
VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff");
24592461
schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
@@ -2497,7 +2499,7 @@ static int rcu_torture_stall(void *args)
24972499
cur_ops->readunlock(idx);
24982500
}
24992501
pr_alert("%s end.\n", __func__);
2500-
if (!ret) {
2502+
if (rcu_cpu_stall_notifiers && !ret) {
25012503
ret = rcu_stall_chain_notifier_unregister(&rcu_torture_stall_block);
25022504
if (ret)
25032505
pr_info("%s: rcu_stall_chain_notifier_unregister() returned %d.\n", __func__, ret);

kernel/rcu/tree_stall.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,7 @@ static int __init rcu_sysrq_init(void)
10611061
}
10621062
early_initcall(rcu_sysrq_init);
10631063

1064+
#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
10641065

10651066
//////////////////////////////////////////////////////////////////////////////
10661067
//
@@ -1081,7 +1082,13 @@ static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list);
10811082
*/
10821083
int rcu_stall_chain_notifier_register(struct notifier_block *n)
10831084
{
1084-
return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n);
1085+
int rcsn = rcu_cpu_stall_notifiers;
1086+
1087+
WARN(1, "Adding %pS() to RCU stall notifier list (%s).\n", n->notifier_call,
1088+
rcsn ? "possibly suppressing RCU CPU stall warnings" : "failed, so all is well");
1089+
if (rcsn)
1090+
return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n);
1091+
return -EEXIST;
10851092
}
10861093
EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register);
10871094

@@ -1115,3 +1122,5 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v)
11151122
{
11161123
return atomic_notifier_call_chain(&rcu_cpu_stall_notifier_list, val, v);
11171124
}
1125+
1126+
#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER

kernel/rcu/update.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,9 +538,15 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
538538
EXPORT_SYMBOL_GPL(torture_sched_setaffinity);
539539
#endif
540540

541+
int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful)
542+
EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers);
543+
541544
#ifdef CONFIG_RCU_STALL_COMMON
542545
int rcu_cpu_stall_ftrace_dump __read_mostly;
543546
module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
547+
#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
548+
module_param(rcu_cpu_stall_notifiers, int, 0444);
549+
#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
544550
int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
545551
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
546552
module_param(rcu_cpu_stall_suppress, int, 0644);

0 commit comments

Comments
 (0)