Skip to content

Commit 538c429

Browse files
committed
Merge tag 'trace-v6.16-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull more tracing fixes from Steven Rostedt: - Fix regression of waiting a long time on updating trace event filters When the faultable trace points were added, it needed task trace RCU synchronization. This was added to the tracepoint_synchronize_unregister() function. The filter logic always called this function whenever it updated the trace event filters before freeing the old filters. This increased the time of "trace-cmd record" from taking 13 seconds to running over 2 minutes to complete. Move the freeing of the filters to call_rcu*() logic, which brings the time back down to 13 seconds. - Fix ring_buffer_subbuf_order_set() error path lock protection The error path of the ring_buffer_subbuf_order_set() released the mutex too early and allowed subsequent accesses to setting the subbuffer size to corrupt the data and cause a bug. By moving the mutex locking to the end of the error path, it prevents the reentrant access to the critical data and also allows the function to convert the taking of the mutex over to the guard() logic. - Remove unused power management clock events The clock events were added in 2010 for power management. In 2011 arm used them. In 2013 the code they were used in was removed. These events have been wasting memory since then. - Fix sparse warnings There was a few places that sparse warned about trace_events_filter.c where file->filter was referenced directly, but it is annotated with an __rcu tag. Use the helper functions and fix them up to use rcu_dereference() properly. * tag 'trace-v6.16-3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: tracing: Add rcu annotation around file->filter accesses tracing: PM: Remove unused clock events ring-buffer: Fix buffer locking in ring_buffer_subbuf_order_set() tracing: Fix regression of filter waiting a long time on RCU synchronization
2 parents 8630c59 + 549e914 commit 538c429

File tree

3 files changed

+143
-100
lines changed

3 files changed

+143
-100
lines changed

include/trace/events/power.h

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -337,53 +337,6 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate,
337337
TP_ARGS(name, state)
338338
);
339339

340-
/*
341-
* The clock events are used for clock enable/disable and for
342-
* clock rate change
343-
*/
344-
DECLARE_EVENT_CLASS(clock,
345-
346-
TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
347-
348-
TP_ARGS(name, state, cpu_id),
349-
350-
TP_STRUCT__entry(
351-
__string( name, name )
352-
__field( u64, state )
353-
__field( u64, cpu_id )
354-
),
355-
356-
TP_fast_assign(
357-
__assign_str(name);
358-
__entry->state = state;
359-
__entry->cpu_id = cpu_id;
360-
),
361-
362-
TP_printk("%s state=%lu cpu_id=%lu", __get_str(name),
363-
(unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
364-
);
365-
366-
DEFINE_EVENT(clock, clock_enable,
367-
368-
TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
369-
370-
TP_ARGS(name, state, cpu_id)
371-
);
372-
373-
DEFINE_EVENT(clock, clock_disable,
374-
375-
TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
376-
377-
TP_ARGS(name, state, cpu_id)
378-
);
379-
380-
DEFINE_EVENT(clock, clock_set_rate,
381-
382-
TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id),
383-
384-
TP_ARGS(name, state, cpu_id)
385-
);
386-
387340
/*
388341
* The power domain events are used for power domains transitions
389342
*/

kernel/trace/ring_buffer.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6795,7 +6795,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
67956795
old_size = buffer->subbuf_size;
67966796

67976797
/* prevent another thread from changing buffer sizes */
6798-
mutex_lock(&buffer->mutex);
6798+
guard(mutex)(&buffer->mutex);
67996799
atomic_inc(&buffer->record_disabled);
68006800

68016801
/* Make sure all commits have finished */
@@ -6900,7 +6900,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
69006900
}
69016901

69026902
atomic_dec(&buffer->record_disabled);
6903-
mutex_unlock(&buffer->mutex);
69046903

69056904
return 0;
69066905

@@ -6909,7 +6908,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
69096908
buffer->subbuf_size = old_size;
69106909

69116910
atomic_dec(&buffer->record_disabled);
6912-
mutex_unlock(&buffer->mutex);
69136911

69146912
for_each_buffer_cpu(buffer, cpu) {
69156913
cpu_buffer = buffer->buffers[cpu];

kernel/trace/trace_events_filter.c

Lines changed: 142 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,9 @@ static void append_filter_err(struct trace_array *tr,
12501250

12511251
static inline struct event_filter *event_filter(struct trace_event_file *file)
12521252
{
1253-
return file->filter;
1253+
return rcu_dereference_protected(file->filter,
1254+
lockdep_is_held(&event_mutex));
1255+
12541256
}
12551257

12561258
/* caller must hold event_mutex */
@@ -1320,7 +1322,7 @@ void free_event_filter(struct event_filter *filter)
13201322
static inline void __remove_filter(struct trace_event_file *file)
13211323
{
13221324
filter_disable(file);
1323-
remove_filter_string(file->filter);
1325+
remove_filter_string(event_filter(file));
13241326
}
13251327

13261328
static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
@@ -1335,22 +1337,139 @@ static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
13351337
}
13361338
}
13371339

1340+
struct filter_list {
1341+
struct list_head list;
1342+
struct event_filter *filter;
1343+
};
1344+
1345+
struct filter_head {
1346+
struct list_head list;
1347+
struct rcu_head rcu;
1348+
};
1349+
1350+
1351+
static void free_filter_list(struct rcu_head *rhp)
1352+
{
1353+
struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu);
1354+
struct filter_list *filter_item, *tmp;
1355+
1356+
list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) {
1357+
__free_filter(filter_item->filter);
1358+
list_del(&filter_item->list);
1359+
kfree(filter_item);
1360+
}
1361+
kfree(filter_list);
1362+
}
1363+
1364+
static void free_filter_list_tasks(struct rcu_head *rhp)
1365+
{
1366+
call_rcu(rhp, free_filter_list);
1367+
}
1368+
1369+
/*
1370+
* The tracepoint_synchronize_unregister() is a double rcu call.
1371+
* It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu().
1372+
* Instead of waiting for it, simply call these via the call_rcu*()
1373+
* variants.
1374+
*/
1375+
static void delay_free_filter(struct filter_head *head)
1376+
{
1377+
call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks);
1378+
}
1379+
1380+
static void try_delay_free_filter(struct event_filter *filter)
1381+
{
1382+
struct filter_head *head;
1383+
struct filter_list *item;
1384+
1385+
head = kmalloc(sizeof(*head), GFP_KERNEL);
1386+
if (!head)
1387+
goto free_now;
1388+
1389+
INIT_LIST_HEAD(&head->list);
1390+
1391+
item = kmalloc(sizeof(*item), GFP_KERNEL);
1392+
if (!item) {
1393+
kfree(head);
1394+
goto free_now;
1395+
}
1396+
1397+
item->filter = filter;
1398+
list_add_tail(&item->list, &head->list);
1399+
delay_free_filter(head);
1400+
return;
1401+
1402+
free_now:
1403+
/* Make sure the filter is not being used */
1404+
tracepoint_synchronize_unregister();
1405+
__free_filter(filter);
1406+
}
1407+
13381408
static inline void __free_subsystem_filter(struct trace_event_file *file)
13391409
{
1340-
__free_filter(file->filter);
1410+
__free_filter(event_filter(file));
13411411
file->filter = NULL;
13421412
}
13431413

1414+
static inline void event_set_filter(struct trace_event_file *file,
1415+
struct event_filter *filter)
1416+
{
1417+
rcu_assign_pointer(file->filter, filter);
1418+
}
1419+
1420+
static inline void event_clear_filter(struct trace_event_file *file)
1421+
{
1422+
RCU_INIT_POINTER(file->filter, NULL);
1423+
}
1424+
13441425
static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
1345-
struct trace_array *tr)
1426+
struct trace_array *tr,
1427+
struct event_filter *filter)
13461428
{
13471429
struct trace_event_file *file;
1430+
struct filter_head *head;
1431+
struct filter_list *item;
1432+
1433+
head = kmalloc(sizeof(*head), GFP_KERNEL);
1434+
if (!head)
1435+
goto free_now;
1436+
1437+
INIT_LIST_HEAD(&head->list);
1438+
1439+
item = kmalloc(sizeof(*item), GFP_KERNEL);
1440+
if (!item) {
1441+
kfree(head);
1442+
goto free_now;
1443+
}
1444+
1445+
item->filter = filter;
1446+
list_add_tail(&item->list, &head->list);
13481447

13491448
list_for_each_entry(file, &tr->events, list) {
13501449
if (file->system != dir)
13511450
continue;
1451+
item = kmalloc(sizeof(*item), GFP_KERNEL);
1452+
if (!item)
1453+
goto free_now;
1454+
item->filter = event_filter(file);
1455+
list_add_tail(&item->list, &head->list);
1456+
event_clear_filter(file);
1457+
}
1458+
1459+
delay_free_filter(head);
1460+
return;
1461+
free_now:
1462+
tracepoint_synchronize_unregister();
1463+
1464+
if (head)
1465+
free_filter_list(&head->rcu);
1466+
1467+
list_for_each_entry(file, &tr->events, list) {
1468+
if (file->system != dir || !file->filter)
1469+
continue;
13521470
__free_subsystem_filter(file);
13531471
}
1472+
__free_filter(filter);
13541473
}
13551474

13561475
int filter_assign_type(const char *type)
@@ -2120,22 +2239,6 @@ static inline void event_set_filtered_flag(struct trace_event_file *file)
21202239
trace_buffered_event_enable();
21212240
}
21222241

2123-
static inline void event_set_filter(struct trace_event_file *file,
2124-
struct event_filter *filter)
2125-
{
2126-
rcu_assign_pointer(file->filter, filter);
2127-
}
2128-
2129-
static inline void event_clear_filter(struct trace_event_file *file)
2130-
{
2131-
RCU_INIT_POINTER(file->filter, NULL);
2132-
}
2133-
2134-
struct filter_list {
2135-
struct list_head list;
2136-
struct event_filter *filter;
2137-
};
2138-
21392242
static int process_system_preds(struct trace_subsystem_dir *dir,
21402243
struct trace_array *tr,
21412244
struct filter_parse_error *pe,
@@ -2144,11 +2247,16 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
21442247
struct trace_event_file *file;
21452248
struct filter_list *filter_item;
21462249
struct event_filter *filter = NULL;
2147-
struct filter_list *tmp;
2148-
LIST_HEAD(filter_list);
2250+
struct filter_head *filter_list;
21492251
bool fail = true;
21502252
int err;
21512253

2254+
filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL);
2255+
if (!filter_list)
2256+
return -ENOMEM;
2257+
2258+
INIT_LIST_HEAD(&filter_list->list);
2259+
21522260
list_for_each_entry(file, &tr->events, list) {
21532261

21542262
if (file->system != dir)
@@ -2175,7 +2283,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
21752283
if (!filter_item)
21762284
goto fail_mem;
21772285

2178-
list_add_tail(&filter_item->list, &filter_list);
2286+
list_add_tail(&filter_item->list, &filter_list->list);
21792287
/*
21802288
* Regardless of if this returned an error, we still
21812289
* replace the filter for the call.
@@ -2195,31 +2303,22 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
21952303
* Do a synchronize_rcu() and to ensure all calls are
21962304
* done with them before we free them.
21972305
*/
2198-
tracepoint_synchronize_unregister();
2199-
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
2200-
__free_filter(filter_item->filter);
2201-
list_del(&filter_item->list);
2202-
kfree(filter_item);
2203-
}
2306+
delay_free_filter(filter_list);
22042307
return 0;
22052308
fail:
22062309
/* No call succeeded */
2207-
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
2208-
list_del(&filter_item->list);
2209-
kfree(filter_item);
2210-
}
2310+
free_filter_list(&filter_list->rcu);
22112311
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
22122312
return -EINVAL;
22132313
fail_mem:
22142314
__free_filter(filter);
2315+
22152316
/* If any call succeeded, we still need to sync */
22162317
if (!fail)
2217-
tracepoint_synchronize_unregister();
2218-
list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
2219-
__free_filter(filter_item->filter);
2220-
list_del(&filter_item->list);
2221-
kfree(filter_item);
2222-
}
2318+
delay_free_filter(filter_list);
2319+
else
2320+
free_filter_list(&filter_list->rcu);
2321+
22232322
return -ENOMEM;
22242323
}
22252324

@@ -2361,9 +2460,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
23612460

23622461
event_clear_filter(file);
23632462

2364-
/* Make sure the filter is not being used */
2365-
tracepoint_synchronize_unregister();
2366-
__free_filter(filter);
2463+
try_delay_free_filter(filter);
23672464

23682465
return 0;
23692466
}
@@ -2387,11 +2484,8 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
23872484

23882485
event_set_filter(file, filter);
23892486

2390-
if (tmp) {
2391-
/* Make sure the call is done with the filter */
2392-
tracepoint_synchronize_unregister();
2393-
__free_filter(tmp);
2394-
}
2487+
if (tmp)
2488+
try_delay_free_filter(tmp);
23952489
}
23962490

23972491
return err;
@@ -2417,9 +2511,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
24172511
filter = system->filter;
24182512
system->filter = NULL;
24192513
/* Ensure all filters are no longer used */
2420-
tracepoint_synchronize_unregister();
2421-
filter_free_subsystem_filters(dir, tr);
2422-
__free_filter(filter);
2514+
filter_free_subsystem_filters(dir, tr, filter);
24232515
return 0;
24242516
}
24252517

0 commit comments

Comments
 (0)