Skip to content

Commit 44dc5c4

Browse files
committed
tracing: Fix wasted memory in saved_cmdlines logic
While looking at improving the saved_cmdlines cache I found a huge amount of wasted memory that should be used for the cmdlines. The tracing data saves pids during the trace. At sched switch, if a trace occurred, it will save the comm of the task that did the trace. This is saved in a "cache" that maps pids to comms and exposed to user space via the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by default 128 comms. The structure that uses this creates an array to store the pids using PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure to be of the size of 131104 bytes on 64 bit machines. In hex: 131104 = 0x20020, and since the kernel allocates generic memory in powers of two, the kernel would allocate 0x40000 or 262144 bytes to store this structure. That leaves 131040 bytes of wasted space. Worse, the structure points to an allocated array to store the comm names, which is 16 bytes times the amount of names to save (currently 128), which is 2048 bytes. Instead of allocating a separate array, make the structure end with a variable length string and use the extra space for that. This is similar to a recommendation that Linus had made about eventfs_inode names: https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundation.org/ Instead of allocating a separate string array to hold the saved comms, have the structure end with: char saved_cmdlines[]; and round up to the next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN It will use this extra space for the saved_cmdline portion. Now, instead of saving only 128 comms by default, by using this wasted space at the end of the structure it can save over 8000 comms and even saves space by removing the need for allocating the other array. Link: https://lore.kernel.org/linux-trace-kernel/20240209063622.1f7b6d5f@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Vincent Donnefort <vdonnefort@google.com> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Mete Durlu <meted@linux.ibm.com> Fixes: 939c7a4 ("tracing: Introduce saved_cmdlines_size file") Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent a8b9cf6 commit 44dc5c4

File tree

1 file changed

+37
-38
lines changed

1 file changed

+37
-38
lines changed

kernel/trace/trace.c

Lines changed: 37 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,7 +2320,7 @@ struct saved_cmdlines_buffer {
23202320
unsigned *map_cmdline_to_pid;
23212321
unsigned cmdline_num;
23222322
int cmdline_idx;
2323-
char *saved_cmdlines;
2323+
char saved_cmdlines[];
23242324
};
23252325
static struct saved_cmdlines_buffer *savedcmd;
23262326

@@ -2334,47 +2334,58 @@ static inline void set_cmdline(int idx, const char *cmdline)
23342334
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
23352335
}
23362336

2337-
static int allocate_cmdlines_buffer(unsigned int val,
2338-
struct saved_cmdlines_buffer *s)
2337+
static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2338+
{
2339+
int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2340+
2341+
kfree(s->map_cmdline_to_pid);
2342+
free_pages((unsigned long)s, order);
2343+
}
2344+
2345+
static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
23392346
{
2347+
struct saved_cmdlines_buffer *s;
2348+
struct page *page;
2349+
int orig_size, size;
2350+
int order;
2351+
2352+
/* Figure out how much is needed to hold the given number of cmdlines */
2353+
orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2354+
order = get_order(orig_size);
2355+
size = 1 << (order + PAGE_SHIFT);
2356+
page = alloc_pages(GFP_KERNEL, order);
2357+
if (!page)
2358+
return NULL;
2359+
2360+
s = page_address(page);
2361+
memset(s, 0, sizeof(*s));
2362+
2363+
/* Round up to actual allocation */
2364+
val = (size - sizeof(*s)) / TASK_COMM_LEN;
2365+
s->cmdline_num = val;
2366+
23402367
s->map_cmdline_to_pid = kmalloc_array(val,
23412368
sizeof(*s->map_cmdline_to_pid),
23422369
GFP_KERNEL);
2343-
if (!s->map_cmdline_to_pid)
2344-
return -ENOMEM;
2345-
2346-
s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2347-
if (!s->saved_cmdlines) {
2348-
kfree(s->map_cmdline_to_pid);
2349-
return -ENOMEM;
2370+
if (!s->map_cmdline_to_pid) {
2371+
free_saved_cmdlines_buffer(s);
2372+
return NULL;
23502373
}
23512374

23522375
s->cmdline_idx = 0;
2353-
s->cmdline_num = val;
23542376
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
23552377
sizeof(s->map_pid_to_cmdline));
23562378
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
23572379
val * sizeof(*s->map_cmdline_to_pid));
23582380

2359-
return 0;
2381+
return s;
23602382
}
23612383

23622384
static int trace_create_savedcmd(void)
23632385
{
2364-
int ret;
2365-
2366-
savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2367-
if (!savedcmd)
2368-
return -ENOMEM;
2369-
2370-
ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2371-
if (ret < 0) {
2372-
kfree(savedcmd);
2373-
savedcmd = NULL;
2374-
return -ENOMEM;
2375-
}
2386+
savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
23762387

2377-
return 0;
2388+
return savedcmd ? 0 : -ENOMEM;
23782389
}
23792390

23802391
int is_tracing_stopped(void)
@@ -6056,26 +6067,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
60566067
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
60576068
}
60586069

6059-
static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6060-
{
6061-
kfree(s->saved_cmdlines);
6062-
kfree(s->map_cmdline_to_pid);
6063-
kfree(s);
6064-
}
6065-
60666070
static int tracing_resize_saved_cmdlines(unsigned int val)
60676071
{
60686072
struct saved_cmdlines_buffer *s, *savedcmd_temp;
60696073

6070-
s = kmalloc(sizeof(*s), GFP_KERNEL);
6074+
s = allocate_cmdlines_buffer(val);
60716075
if (!s)
60726076
return -ENOMEM;
60736077

6074-
if (allocate_cmdlines_buffer(val, s) < 0) {
6075-
kfree(s);
6076-
return -ENOMEM;
6077-
}
6078-
60796078
preempt_disable();
60806079
arch_spin_lock(&trace_cmdline_lock);
60816080
savedcmd_temp = savedcmd;

0 commit comments

Comments
 (0)