Skip to content

Commit b237e1f

Browse files
petrpavlurostedt
authored andcommitted
ring-buffer: Limit time with disabled interrupts in rb_check_pages()
The function rb_check_pages() validates the integrity of a specified per-CPU tracing ring buffer. It does so by traversing the underlying linked list and checking its next and prev links. To guarantee that the list isn't modified during the check, a caller typically needs to take cpu_buffer->reader_lock. This prevents the check from running concurrently, for example, with a potential reader which can make the list temporarily inconsistent when swapping its old reader page into the buffer. A problem with this approach is that the time when interrupts are disabled is non-deterministic, dependent on the ring buffer size. This particularly affects PREEMPT_RT because the reader_lock is a raw spinlock which doesn't become sleepable on PREEMPT_RT kernels. Modify the check so it still attempts to traverse the entire list, but gives up the reader_lock between checking individual pages. Introduce for this purpose a new variable ring_buffer_per_cpu.cnt which is bumped any time the list is modified. The value is used by rb_check_pages() to detect such a change and restart the check. Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Link: https://lore.kernel.org/20241015112810.27203-1-petr.pavlu@suse.com Signed-off-by: Petr Pavlu <petr.pavlu@suse.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 09661f7 commit b237e1f

File tree

1 file changed

+72
-26
lines changed

1 file changed

+72
-26
lines changed

kernel/trace/ring_buffer.c

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
482482
unsigned long nr_pages;
483483
unsigned int current_context;
484484
struct list_head *pages;
485+
/* pages generation counter, incremented when the list changes */
486+
unsigned long cnt;
485487
struct buffer_page *head_page; /* read from head */
486488
struct buffer_page *tail_page; /* write to tail */
487489
struct buffer_page *commit_page; /* committed pages */
@@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
14751477
RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
14761478
}
14771479

1480+
static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer,
1481+
struct list_head *list)
1482+
{
1483+
if (RB_WARN_ON(cpu_buffer,
1484+
rb_list_head(rb_list_head(list->next)->prev) != list))
1485+
return false;
1486+
1487+
if (RB_WARN_ON(cpu_buffer,
1488+
rb_list_head(rb_list_head(list->prev)->next) != list))
1489+
return false;
1490+
1491+
return true;
1492+
}
1493+
14781494
/**
14791495
* rb_check_pages - integrity check of buffer pages
14801496
* @cpu_buffer: CPU buffer with pages to test
14811497
*
14821498
* As a safety measure we check to make sure the data pages have not
14831499
* been corrupted.
1484-
*
1485-
* Callers of this function need to guarantee that the list of pages doesn't get
1486-
* modified during the check. In particular, if it's possible that the function
1487-
* is invoked with concurrent readers which can swap in a new reader page then
1488-
* the caller should take cpu_buffer->reader_lock.
14891500
*/
14901501
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
14911502
{
1492-
struct list_head *head = rb_list_head(cpu_buffer->pages);
1493-
struct list_head *tmp;
1503+
struct list_head *head, *tmp;
1504+
unsigned long buffer_cnt;
1505+
unsigned long flags;
1506+
int nr_loops = 0;
14941507

1495-
if (RB_WARN_ON(cpu_buffer,
1496-
rb_list_head(rb_list_head(head->next)->prev) != head))
1508+
/*
1509+
* Walk the linked list underpinning the ring buffer and validate all
1510+
* its next and prev links.
1511+
*
1512+
* The check acquires the reader_lock to avoid concurrent processing
1513+
* with code that could be modifying the list. However, the lock cannot
1514+
* be held for the entire duration of the walk, as this would make the
1515+
* time when interrupts are disabled non-deterministic, dependent on the
1516+
* ring buffer size. Therefore, the code releases and re-acquires the
1517+
* lock after checking each page. The ring_buffer_per_cpu.cnt variable
1518+
* is then used to detect if the list was modified while the lock was
1519+
* not held, in which case the check needs to be restarted.
1520+
*
1521+
* The code attempts to perform the check at most three times before
1522+
* giving up. This is acceptable because this is only a self-validation
1523+
* to detect problems early on. In practice, the list modification
1524+
* operations are fairly spaced, and so this check typically succeeds at
1525+
* most on the second try.
1526+
*/
1527+
again:
1528+
if (++nr_loops > 3)
14971529
return;
14981530

1499-
if (RB_WARN_ON(cpu_buffer,
1500-
rb_list_head(rb_list_head(head->prev)->next) != head))
1501-
return;
1531+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1532+
head = rb_list_head(cpu_buffer->pages);
1533+
if (!rb_check_links(cpu_buffer, head))
1534+
goto out_locked;
1535+
buffer_cnt = cpu_buffer->cnt;
1536+
tmp = head;
1537+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
15021538

1503-
for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
1504-
if (RB_WARN_ON(cpu_buffer,
1505-
rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
1506-
return;
1539+
while (true) {
1540+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
15071541

1508-
if (RB_WARN_ON(cpu_buffer,
1509-
rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
1510-
return;
1542+
if (buffer_cnt != cpu_buffer->cnt) {
1543+
/* The list was updated, try again. */
1544+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1545+
goto again;
1546+
}
1547+
1548+
tmp = rb_list_head(tmp->next);
1549+
if (tmp == head)
1550+
/* The iteration circled back, all is done. */
1551+
goto out_locked;
1552+
1553+
if (!rb_check_links(cpu_buffer, tmp))
1554+
goto out_locked;
1555+
1556+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
15111557
}
1558+
1559+
out_locked:
1560+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
15121561
}
15131562

15141563
/*
@@ -2535,6 +2584,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
25352584

25362585
/* make sure pages points to a valid page in the ring buffer */
25372586
cpu_buffer->pages = next_page;
2587+
cpu_buffer->cnt++;
25382588

25392589
/* update head page */
25402590
if (head_bit)
@@ -2641,6 +2691,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
26412691
* pointer to point to end of list
26422692
*/
26432693
head_page->prev = last_page;
2694+
cpu_buffer->cnt++;
26442695
success = true;
26452696
break;
26462697
}
@@ -2876,12 +2927,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
28762927
*/
28772928
synchronize_rcu();
28782929
for_each_buffer_cpu(buffer, cpu) {
2879-
unsigned long flags;
2880-
28812930
cpu_buffer = buffer->buffers[cpu];
2882-
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
28832931
rb_check_pages(cpu_buffer);
2884-
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
28852932
}
28862933
atomic_dec(&buffer->record_disabled);
28872934
}
@@ -5299,6 +5346,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
52995346
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
53005347
rb_inc_page(&cpu_buffer->head_page);
53015348

5349+
cpu_buffer->cnt++;
53025350
local_inc(&cpu_buffer->pages_read);
53035351

53045352
/* Finally update the reader page to the new head */
@@ -5838,12 +5886,9 @@ void
58385886
ring_buffer_read_finish(struct ring_buffer_iter *iter)
58395887
{
58405888
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
5841-
unsigned long flags;
58425889

58435890
/* Use this opportunity to check the integrity of the ring buffer. */
5844-
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
58455891
rb_check_pages(cpu_buffer);
5846-
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
58475892

58485893
atomic_dec(&cpu_buffer->resize_disabled);
58495894
kfree(iter->event);
@@ -6760,6 +6805,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
67606805
/* Install the new pages, remove the head from the list */
67616806
cpu_buffer->pages = cpu_buffer->new_pages.next;
67626807
list_del_init(&cpu_buffer->new_pages);
6808+
cpu_buffer->cnt++;
67636809

67646810
cpu_buffer->head_page
67656811
= list_entry(cpu_buffer->pages, struct buffer_page, list);

0 commit comments

Comments
 (0)