Skip to content

Commit b359457

Browse files
committed
ring-buffer: Fix waking up ring buffer readers
A task can wait on a ring buffer for when it fills up to a specific watermark. The writer will check the minimum watermark that waiters are waiting for and if the ring buffer is past that, it will wake up all the waiters. The waiters are in a wait loop, and will first check if a signal is pending and then check if the ring buffer is at the desired level where it should break out of the loop. If a file that uses a ring buffer closes, and there's threads waiting on the ring buffer, it needs to wake up those threads. To do this, a "wait_index" was used. Before entering the wait loop, the waiter will read the wait_index. On wakeup, it will check if the wait_index is different than when it entered the loop, and will exit the loop if it is. The waker will only need to update the wait_index before waking up the waiters. This had a couple of bugs. One trivial one and one broken by design. The trivial bug was that the waiter checked the wait_index after the schedule() call. It had to be checked between the prepare_to_wait() and the schedule() which it was not. The main bug is that the first check to set the default wait_index will always be outside the prepare_to_wait() and the schedule(). That's because the ring_buffer_wait() doesn't have enough context to know if it should break out of the loop. The loop itself is not needed, because all the callers to the ring_buffer_wait() also has their own loop, as the callers have a better sense of what the context is to decide whether to break out of the loop or not. Just have the ring_buffer_wait() block once, and if it gets woken up, exit the function and let the callers decide what to do next. Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.792933613@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: linke li <lilinke99@qq.com> Cc: Rabin Vincent <rabin@rab.in> Fixes: e30f53a ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 095fe48 commit b359457

File tree

1 file changed

+68
-71
lines changed

1 file changed

+68
-71
lines changed

kernel/trace/ring_buffer.c

Lines changed: 68 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,6 @@ struct rb_irq_work {
384384
struct irq_work work;
385385
wait_queue_head_t waiters;
386386
wait_queue_head_t full_waiters;
387-
long wait_index;
388387
bool waiters_pending;
389388
bool full_waiters_pending;
390389
bool wakeup_full;
@@ -798,14 +797,40 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
798797
rbwork = &cpu_buffer->irq_work;
799798
}
800799

801-
rbwork->wait_index++;
802-
/* make sure the waiters see the new index */
803-
smp_wmb();
804-
805800
/* This can be called in any context */
806801
irq_work_queue(&rbwork->work);
807802
}
808803

804+
static bool rb_watermark_hit(struct trace_buffer *buffer, int cpu, int full)
805+
{
806+
struct ring_buffer_per_cpu *cpu_buffer;
807+
bool ret = false;
808+
809+
/* Reads of all CPUs always waits for any data */
810+
if (cpu == RING_BUFFER_ALL_CPUS)
811+
return !ring_buffer_empty(buffer);
812+
813+
cpu_buffer = buffer->buffers[cpu];
814+
815+
if (!ring_buffer_empty_cpu(buffer, cpu)) {
816+
unsigned long flags;
817+
bool pagebusy;
818+
819+
if (!full)
820+
return true;
821+
822+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
823+
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
824+
ret = !pagebusy && full_hit(buffer, cpu, full);
825+
826+
if (!cpu_buffer->shortest_full ||
827+
cpu_buffer->shortest_full > full)
828+
cpu_buffer->shortest_full = full;
829+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
830+
}
831+
return ret;
832+
}
833+
809834
/**
810835
* ring_buffer_wait - wait for input to the ring buffer
811836
* @buffer: buffer to wait on
@@ -821,7 +846,6 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
821846
struct ring_buffer_per_cpu *cpu_buffer;
822847
DEFINE_WAIT(wait);
823848
struct rb_irq_work *work;
824-
long wait_index;
825849
int ret = 0;
826850

827851
/*
@@ -840,81 +864,54 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
840864
work = &cpu_buffer->irq_work;
841865
}
842866

843-
wait_index = READ_ONCE(work->wait_index);
844-
845-
while (true) {
846-
if (full)
847-
prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
848-
else
849-
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
850-
851-
/*
852-
* The events can happen in critical sections where
853-
* checking a work queue can cause deadlocks.
854-
* After adding a task to the queue, this flag is set
855-
* only to notify events to try to wake up the queue
856-
* using irq_work.
857-
*
858-
* We don't clear it even if the buffer is no longer
859-
* empty. The flag only causes the next event to run
860-
* irq_work to do the work queue wake up. The worse
861-
* that can happen if we race with !trace_empty() is that
862-
* an event will cause an irq_work to try to wake up
863-
* an empty queue.
864-
*
865-
* There's no reason to protect this flag either, as
866-
* the work queue and irq_work logic will do the necessary
867-
* synchronization for the wake ups. The only thing
868-
* that is necessary is that the wake up happens after
869-
* a task has been queued. It's OK for spurious wake ups.
870-
*/
871-
if (full)
872-
work->full_waiters_pending = true;
873-
else
874-
work->waiters_pending = true;
875-
876-
if (signal_pending(current)) {
877-
ret = -EINTR;
878-
break;
879-
}
880-
881-
if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
882-
break;
883-
884-
if (cpu != RING_BUFFER_ALL_CPUS &&
885-
!ring_buffer_empty_cpu(buffer, cpu)) {
886-
unsigned long flags;
887-
bool pagebusy;
888-
bool done;
889-
890-
if (!full)
891-
break;
892-
893-
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
894-
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
895-
done = !pagebusy && full_hit(buffer, cpu, full);
867+
if (full)
868+
prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
869+
else
870+
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
896871

897-
if (!cpu_buffer->shortest_full ||
898-
cpu_buffer->shortest_full > full)
899-
cpu_buffer->shortest_full = full;
900-
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
901-
if (done)
902-
break;
903-
}
872+
/*
873+
* The events can happen in critical sections where
874+
* checking a work queue can cause deadlocks.
875+
* After adding a task to the queue, this flag is set
876+
* only to notify events to try to wake up the queue
877+
* using irq_work.
878+
*
879+
* We don't clear it even if the buffer is no longer
880+
* empty. The flag only causes the next event to run
881+
* irq_work to do the work queue wake up. The worse
882+
* that can happen if we race with !trace_empty() is that
883+
* an event will cause an irq_work to try to wake up
884+
* an empty queue.
885+
*
886+
* There's no reason to protect this flag either, as
887+
* the work queue and irq_work logic will do the necessary
888+
* synchronization for the wake ups. The only thing
889+
* that is necessary is that the wake up happens after
890+
* a task has been queued. It's OK for spurious wake ups.
891+
*/
892+
if (full)
893+
work->full_waiters_pending = true;
894+
else
895+
work->waiters_pending = true;
904896

905-
schedule();
897+
if (rb_watermark_hit(buffer, cpu, full))
898+
goto out;
906899

907-
/* Make sure to see the new wait index */
908-
smp_rmb();
909-
if (wait_index != work->wait_index)
910-
break;
900+
if (signal_pending(current)) {
901+
ret = -EINTR;
902+
goto out;
911903
}
912904

905+
schedule();
906+
out:
913907
if (full)
914908
finish_wait(&work->full_waiters, &wait);
915909
else
916910
finish_wait(&work->waiters, &wait);
917911

912+
if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
913+
ret = -EINTR;
914+
918915
return ret;
919916
}
920917

0 commit comments

Comments
 (0)