@@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
482
482
unsigned long nr_pages ;
483
483
unsigned int current_context ;
484
484
struct list_head * pages ;
485
+ /* pages generation counter, incremented when the list changes */
486
+ unsigned long cnt ;
485
487
struct buffer_page * head_page ; /* read from head */
486
488
struct buffer_page * tail_page ; /* write to tail */
487
489
struct buffer_page * commit_page ; /* committed pages */
@@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
1475
1477
RB_WARN_ON (cpu_buffer , val & RB_FLAG_MASK );
1476
1478
}
1477
1479
1480
+ static bool rb_check_links (struct ring_buffer_per_cpu * cpu_buffer ,
1481
+ struct list_head * list )
1482
+ {
1483
+ if (RB_WARN_ON (cpu_buffer ,
1484
+ rb_list_head (rb_list_head (list -> next )-> prev ) != list ))
1485
+ return false;
1486
+
1487
+ if (RB_WARN_ON (cpu_buffer ,
1488
+ rb_list_head (rb_list_head (list -> prev )-> next ) != list ))
1489
+ return false;
1490
+
1491
+ return true;
1492
+ }
1493
+
1478
1494
/**
1479
1495
* rb_check_pages - integrity check of buffer pages
1480
1496
* @cpu_buffer: CPU buffer with pages to test
1481
1497
*
1482
1498
* As a safety measure we check to make sure the data pages have not
1483
1499
* been corrupted.
1484
- *
1485
- * Callers of this function need to guarantee that the list of pages doesn't get
1486
- * modified during the check. In particular, if it's possible that the function
1487
- * is invoked with concurrent readers which can swap in a new reader page then
1488
- * the caller should take cpu_buffer->reader_lock.
1489
1500
*/
1490
1501
static void rb_check_pages (struct ring_buffer_per_cpu * cpu_buffer )
1491
1502
{
1492
- struct list_head * head = rb_list_head (cpu_buffer -> pages );
1493
- struct list_head * tmp ;
1503
+ struct list_head * head , * tmp ;
1504
+ unsigned long buffer_cnt ;
1505
+ unsigned long flags ;
1506
+ int nr_loops = 0 ;
1494
1507
1495
- if (RB_WARN_ON (cpu_buffer ,
1496
- rb_list_head (rb_list_head (head -> next )-> prev ) != head ))
1508
+ /*
1509
+ * Walk the linked list underpinning the ring buffer and validate all
1510
+ * its next and prev links.
1511
+ *
1512
+ * The check acquires the reader_lock to avoid concurrent processing
1513
+ * with code that could be modifying the list. However, the lock cannot
1514
+ * be held for the entire duration of the walk, as this would make the
1515
+ * time when interrupts are disabled non-deterministic, dependent on the
1516
+ * ring buffer size. Therefore, the code releases and re-acquires the
1517
+ * lock after checking each page. The ring_buffer_per_cpu.cnt variable
1518
+ * is then used to detect if the list was modified while the lock was
1519
+ * not held, in which case the check needs to be restarted.
1520
+ *
1521
+ * The code attempts to perform the check at most three times before
1522
+ * giving up. This is acceptable because this is only a self-validation
1523
+ * to detect problems early on. In practice, the list modification
1524
+ * operations are fairly spaced, and so this check typically succeeds at
1525
+ * most on the second try.
1526
+ */
1527
+ again :
1528
+ if (++ nr_loops > 3 )
1497
1529
return ;
1498
1530
1499
- if (RB_WARN_ON (cpu_buffer ,
1500
- rb_list_head (rb_list_head (head -> prev )-> next ) != head ))
1501
- return ;
1531
+ raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
1532
+ head = rb_list_head (cpu_buffer -> pages );
1533
+ if (!rb_check_links (cpu_buffer , head ))
1534
+ goto out_locked ;
1535
+ buffer_cnt = cpu_buffer -> cnt ;
1536
+ tmp = head ;
1537
+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
1502
1538
1503
- for (tmp = rb_list_head (head -> next ); tmp != head ; tmp = rb_list_head (tmp -> next )) {
1504
- if (RB_WARN_ON (cpu_buffer ,
1505
- rb_list_head (rb_list_head (tmp -> next )-> prev ) != tmp ))
1506
- return ;
1539
+ while (true) {
1540
+ raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
1507
1541
1508
- if (RB_WARN_ON (cpu_buffer ,
1509
- rb_list_head (rb_list_head (tmp -> prev )-> next ) != tmp ))
1510
- return ;
1542
+ if (buffer_cnt != cpu_buffer -> cnt ) {
1543
+ /* The list was updated, try again. */
1544
+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
1545
+ goto again ;
1546
+ }
1547
+
1548
+ tmp = rb_list_head (tmp -> next );
1549
+ if (tmp == head )
1550
+ /* The iteration circled back, all is done. */
1551
+ goto out_locked ;
1552
+
1553
+ if (!rb_check_links (cpu_buffer , tmp ))
1554
+ goto out_locked ;
1555
+
1556
+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
1511
1557
}
1558
+
1559
+ out_locked :
1560
+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
1512
1561
}
1513
1562
1514
1563
/*
@@ -2384,9 +2433,9 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
2384
2433
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
2385
2434
* @size: the size in bytes per cpu that is needed.
2386
2435
* @flags: attributes to set for the ring buffer.
2436
+ * @order: sub-buffer order
2387
2437
* @start: start of allocated range
2388
2438
* @range_size: size of allocated range
2389
- * @order: sub-buffer order
2390
2439
* @key: ring buffer reader_lock_key.
2391
2440
*
2392
2441
* Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -2532,6 +2581,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
2532
2581
2533
2582
/* make sure pages points to a valid page in the ring buffer */
2534
2583
cpu_buffer -> pages = next_page ;
2584
+ cpu_buffer -> cnt ++ ;
2535
2585
2536
2586
/* update head page */
2537
2587
if (head_bit )
@@ -2638,6 +2688,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
2638
2688
* pointer to point to end of list
2639
2689
*/
2640
2690
head_page -> prev = last_page ;
2691
+ cpu_buffer -> cnt ++ ;
2641
2692
success = true;
2642
2693
break ;
2643
2694
}
@@ -2873,12 +2924,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
2873
2924
*/
2874
2925
synchronize_rcu ();
2875
2926
for_each_buffer_cpu (buffer , cpu ) {
2876
- unsigned long flags ;
2877
-
2878
2927
cpu_buffer = buffer -> buffers [cpu ];
2879
- raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
2880
2928
rb_check_pages (cpu_buffer );
2881
- raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
2882
2929
}
2883
2930
atomic_dec (& buffer -> record_disabled );
2884
2931
}
@@ -4010,7 +4057,7 @@ static const char *show_irq_str(int bits)
4010
4057
return type [bits ];
4011
4058
}
4012
4059
4013
- /* Assume this is an trace event */
4060
+ /* Assume this is a trace event */
4014
4061
static const char * show_flags (struct ring_buffer_event * event )
4015
4062
{
4016
4063
struct trace_entry * entry ;
@@ -5296,6 +5343,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
5296
5343
rb_list_head (reader -> list .next )-> prev = & cpu_buffer -> reader_page -> list ;
5297
5344
rb_inc_page (& cpu_buffer -> head_page );
5298
5345
5346
+ cpu_buffer -> cnt ++ ;
5299
5347
local_inc (& cpu_buffer -> pages_read );
5300
5348
5301
5349
/* Finally update the reader page to the new head */
@@ -5835,12 +5883,9 @@ void
5835
5883
ring_buffer_read_finish (struct ring_buffer_iter * iter )
5836
5884
{
5837
5885
struct ring_buffer_per_cpu * cpu_buffer = iter -> cpu_buffer ;
5838
- unsigned long flags ;
5839
5886
5840
5887
/* Use this opportunity to check the integrity of the ring buffer. */
5841
- raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
5842
5888
rb_check_pages (cpu_buffer );
5843
- raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
5844
5889
5845
5890
atomic_dec (& cpu_buffer -> resize_disabled );
5846
5891
kfree (iter -> event );
@@ -6757,6 +6802,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
6757
6802
/* Install the new pages, remove the head from the list */
6758
6803
cpu_buffer -> pages = cpu_buffer -> new_pages .next ;
6759
6804
list_del_init (& cpu_buffer -> new_pages );
6805
+ cpu_buffer -> cnt ++ ;
6760
6806
6761
6807
cpu_buffer -> head_page
6762
6808
= list_entry (cpu_buffer -> pages , struct buffer_page , list );
0 commit comments