Skip to content

Commit 42fb0a1

Browse files
committed
tracing/ring-buffer: Have polling block on watermark
Currently the way polling works on the ring buffer is broken. It will return immediately if there's any data in the ring buffer whereas a read will block until the watermark (defined by the tracefs buffer_percent file) is hit. That is, a select() or poll() will return as if there's data available, but then the following read will block. This is broken for the way select()s and poll()s are supposed to work. Have the polling on the ring buffer also block the same way reads and splice does on the ring buffer. Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home Cc: Linux Trace Kernel <linux-trace-kernel@vger.kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Primiano Tucci <primiano@google.com> Cc: stable@vger.kernel.org Fixes: 1e0d671 ("ring-buffer: Do not wake up a splice waiter when page is not full") Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 094226a commit 42fb0a1

File tree

3 files changed

+38
-21
lines changed

3 files changed

+38
-21
lines changed

include/linux/ring_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
100100

101101
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
102102
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
103-
struct file *filp, poll_table *poll_table);
103+
struct file *filp, poll_table *poll_table, int full);
104104
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
105105

106106
#define RING_BUFFER_ALL_CPUS -1

kernel/trace/ring_buffer.c

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
907907
return cnt - read;
908908
}
909909

910+
static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
911+
{
912+
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
913+
size_t nr_pages;
914+
size_t dirty;
915+
916+
nr_pages = cpu_buffer->nr_pages;
917+
if (!nr_pages || !full)
918+
return true;
919+
920+
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
921+
922+
return (dirty * 100) > (full * nr_pages);
923+
}
924+
910925
/*
911926
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
912927
*
@@ -1046,22 +1061,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
10461061
!ring_buffer_empty_cpu(buffer, cpu)) {
10471062
unsigned long flags;
10481063
bool pagebusy;
1049-
size_t nr_pages;
1050-
size_t dirty;
1064+
bool done;
10511065

10521066
if (!full)
10531067
break;
10541068

10551069
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
10561070
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
1057-
nr_pages = cpu_buffer->nr_pages;
1058-
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
1071+
done = !pagebusy && full_hit(buffer, cpu, full);
1072+
10591073
if (!cpu_buffer->shortest_full ||
10601074
cpu_buffer->shortest_full > full)
10611075
cpu_buffer->shortest_full = full;
10621076
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1063-
if (!pagebusy &&
1064-
(!nr_pages || (dirty * 100) > full * nr_pages))
1077+
if (done)
10651078
break;
10661079
}
10671080

@@ -1087,6 +1100,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
10871100
* @cpu: the cpu buffer to wait on
10881101
* @filp: the file descriptor
10891102
* @poll_table: The poll descriptor
1103+
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
10901104
*
10911105
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
10921106
* as data is added to any of the @buffer's cpu buffers. Otherwise
@@ -1096,23 +1110,30 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
10961110
* zero otherwise.
10971111
*/
10981112
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
1099-
struct file *filp, poll_table *poll_table)
1113+
struct file *filp, poll_table *poll_table, int full)
11001114
{
11011115
struct ring_buffer_per_cpu *cpu_buffer;
11021116
struct rb_irq_work *work;
11031117

1104-
if (cpu == RING_BUFFER_ALL_CPUS)
1118+
if (cpu == RING_BUFFER_ALL_CPUS) {
11051119
work = &buffer->irq_work;
1106-
else {
1120+
full = 0;
1121+
} else {
11071122
if (!cpumask_test_cpu(cpu, buffer->cpumask))
11081123
return -EINVAL;
11091124

11101125
cpu_buffer = buffer->buffers[cpu];
11111126
work = &cpu_buffer->irq_work;
11121127
}
11131128

1114-
poll_wait(filp, &work->waiters, poll_table);
1115-
work->waiters_pending = true;
1129+
if (full) {
1130+
poll_wait(filp, &work->full_waiters, poll_table);
1131+
work->full_waiters_pending = true;
1132+
} else {
1133+
poll_wait(filp, &work->waiters, poll_table);
1134+
work->waiters_pending = true;
1135+
}
1136+
11161137
/*
11171138
* There's a tight race between setting the waiters_pending and
11181139
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -1128,6 +1149,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
11281149
*/
11291150
smp_mb();
11301151

1152+
if (full)
1153+
return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
1154+
11311155
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
11321156
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
11331157
return EPOLLIN | EPOLLRDNORM;
@@ -3155,10 +3179,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
31553179
static __always_inline void
31563180
rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
31573181
{
3158-
size_t nr_pages;
3159-
size_t dirty;
3160-
size_t full;
3161-
31623182
if (buffer->irq_work.waiters_pending) {
31633183
buffer->irq_work.waiters_pending = false;
31643184
/* irq_work_queue() supplies it's own memory barriers */
@@ -3182,10 +3202,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
31823202

31833203
cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
31843204

3185-
full = cpu_buffer->shortest_full;
3186-
nr_pages = cpu_buffer->nr_pages;
3187-
dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
3188-
if (full && nr_pages && (dirty * 100) <= full * nr_pages)
3205+
if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
31893206
return;
31903207

31913208
cpu_buffer->irq_work.wakeup_full = true;

kernel/trace/trace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6681,7 +6681,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
66816681
return EPOLLIN | EPOLLRDNORM;
66826682
else
66836683
return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6684-
filp, poll_table);
6684+
filp, poll_table, iter->tr->buffer_percent);
66856685
}
66866686

66876687
static __poll_t

0 commit comments

Comments
 (0)