Skip to content

Commit b96e7a5

Browse files
joelagnelFrederic Weisbecker
authored andcommitted
rcu/tree: Defer setting of jiffies during stall reset
There are instances where rcu_cpu_stall_reset() is called when jiffies did not get a chance to update for a long time. Before jiffies is updated, the CPU stall detector can go off triggering false-positives where a just-started grace period appears to be ages old. In the past, we disabled stall detection in rcu_cpu_stall_reset() however this got changed [1]. This is resulting in false-positives in KGDB usecase [2]. Fix this by deferring the update of jiffies to the third run of the FQS loop. This is more robust, as, even if rcu_cpu_stall_reset() is called just before jiffies is read, we would end up pushing out the jiffies read by 3 more FQS loops. Meanwhile the CPU stall detection will be delayed and we will not get any false positives. [1] https://lore.kernel.org/all/20210521155624.174524-2-senozhatsky@chromium.org/ [2] https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Tested with rcutorture.cpu_stall option as well to verify stall behavior with/without patch. Tested-by: Huacai Chen <chenhuacai@loongson.cn> Reported-by: Binbin Zhou <zhoubinbin@loongson.cn> Closes: https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Suggested-by: Paul McKenney <paulmck@kernel.org> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Fixes: a80be42 ("rcu: Do not disable GP stall detection in rcu_cpu_stall_reset()") Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
1 parent 7c1b3e0 commit b96e7a5

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

kernel/rcu/tree.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,10 +1556,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
15561556
*/
15571557
static void rcu_gp_fqs(bool first_time)
15581558
{
1559+
int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall);
15591560
struct rcu_node *rnp = rcu_get_root();
15601561

15611562
WRITE_ONCE(rcu_state.gp_activity, jiffies);
15621563
WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1);
1564+
1565+
WARN_ON_ONCE(nr_fqs > 3);
1566+
/* Only countdown nr_fqs for stall purposes if jiffies moves. */
1567+
if (nr_fqs) {
1568+
if (nr_fqs == 1) {
1569+
WRITE_ONCE(rcu_state.jiffies_stall,
1570+
jiffies + rcu_jiffies_till_stall_check());
1571+
}
1572+
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs);
1573+
}
1574+
15631575
if (first_time) {
15641576
/* Collect dyntick-idle snapshots. */
15651577
force_qs_rnp(dyntick_save_progress_counter);

kernel/rcu/tree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,10 @@ struct rcu_state {
386386
/* in jiffies. */
387387
unsigned long jiffies_stall; /* Time at which to check */
388388
/* for CPU stalls. */
389+
int nr_fqs_jiffies_stall; /* Number of fqs loops after
390+
* which read jiffies and set
391+
* jiffies_stall. Stall
392+
* warnings disabled if !0. */
389393
unsigned long jiffies_resched; /* Time at which to resched */
390394
/* a reluctant CPU. */
391395
unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */

kernel/rcu/tree_stall.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,17 @@ static void panic_on_rcu_stall(void)
150150
/**
151151
* rcu_cpu_stall_reset - restart stall-warning timeout for current grace period
152152
*
153+
* To perform the reset request from the caller, disable stall detection until
154+
* 3 fqs loops have passed. This is required to ensure a fresh jiffies is
155+
* loaded. It should be safe to do from the fqs loop as enough timer
156+
* interrupts and context switches should have passed.
157+
*
153158
* The caller must disable hard irqs.
154159
*/
155160
void rcu_cpu_stall_reset(void)
156161
{
157-
WRITE_ONCE(rcu_state.jiffies_stall,
158-
jiffies + rcu_jiffies_till_stall_check());
162+
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3);
163+
WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX);
159164
}
160165

161166
//////////////////////////////////////////////////////////////////////////////
@@ -171,6 +176,7 @@ static void record_gp_stall_check_time(void)
171176
WRITE_ONCE(rcu_state.gp_start, j);
172177
j1 = rcu_jiffies_till_stall_check();
173178
smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
179+
WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0);
174180
WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
175181
rcu_state.jiffies_resched = j + j1 / 2;
176182
rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
@@ -726,6 +732,16 @@ static void check_cpu_stall(struct rcu_data *rdp)
726732
!rcu_gp_in_progress())
727733
return;
728734
rcu_stall_kick_kthreads();
735+
736+
/*
737+
* Check if it was requested (via rcu_cpu_stall_reset()) that the FQS
738+
* loop has to set jiffies to ensure a non-stale jiffies value. This
739+
* is required to have good jiffies value after coming out of long
740+
* breaks of jiffies updates. Not doing so can cause false positives.
741+
*/
742+
if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0)
743+
return;
744+
729745
j = jiffies;
730746

731747
/*

0 commit comments

Comments
 (0)