Skip to content

Commit 2e17ce1

Browse files
committed
Merge tag 'slab-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - Conversion of slub_debug stack traces to stackdepot, allowing more useful debugfs-based inspection for e.g. memory leak debugging. Allocation and free debugfs info now includes full traces and is sorted by the unique trace frequency. The stackdepot conversion was already attempted last year but reverted by ae14c63. The memory overhead (while not actually enabled on boot) has been meanwhile solved by making the large stackdepot allocation dynamic. The xfstest issues haven't been reproduced on current kernel locally nor in -next, so the slab cache layout changes that originally made that bug manifest were probably not the root cause. - Refactoring of dma-kmalloc caches creation. - Trivial cleanups such as removal of unused parameters, fixes and clarifications of comments. - Hyeonggon Yoo joins as a reviewer. * tag 'slab-for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: MAINTAINERS: add myself as reviewer for slab mm/slub: remove unused kmem_cache_order_objects max mm: slab: fix comment for __assume_kmalloc_alignment mm: slab: fix comment for ARCH_KMALLOC_MINALIGN mm/slub: remove unneeded return value of slab_pad_check mm/slab_common: move dma-kmalloc caches creation into new_kmalloc_cache() mm/slub: remove meaningless node check in ___slab_alloc() mm/slub: remove duplicate flag in allocate_slab() mm/slub: remove unused parameter in setup_object*() mm/slab.c: fix comments slab, documentation: add description of debugfs files for SLUB caches mm/slub: sort debugfs output by frequency of stack traces mm/slub: distinguish and print stack traces in debugfs files mm/slub: use stackdepot to save stack trace in objects mm/slub: move struct track init out of set_track() lib/stackdepot: allow requesting early initialization dynamically mm/slub, kunit: Make slub_kunit unaffected by user specified flags mm/slab: remove some unused functions
2 parents caa2898 + e001897 commit 2e17ce1

File tree

14 files changed

+283
-143
lines changed

14 files changed

+283
-143
lines changed

Documentation/vm/slub.rst

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,5 +384,69 @@ c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
384384
40,60`` range will plot only samples collected between 40th and
385385
60th seconds).
386386

387+
388+
DebugFS files for SLUB
389+
======================
390+
391+
For more information about current state of SLUB caches with the user tracking
392+
debug option enabled, debugfs files are available, typically under
393+
/sys/kernel/debug/slab/<cache>/ (created only for caches with enabled user
394+
tracking). There are 2 types of these files with the following debug
395+
information:
396+
397+
1. alloc_traces::
398+
399+
Prints information about unique allocation traces of the currently
400+
allocated objects. The output is sorted by frequency of each trace.
401+
402+
Information in the output:
403+
Number of objects, allocating function, minimal/average/maximal jiffies since alloc,
404+
pid range of the allocating processes, cpu mask of allocating cpus, and stack trace.
405+
406+
Example:::
407+
408+
1085 populate_error_injection_list+0x97/0x110 age=166678/166680/166682 pid=1 cpus=1::
409+
__slab_alloc+0x6d/0x90
410+
kmem_cache_alloc_trace+0x2eb/0x300
411+
populate_error_injection_list+0x97/0x110
412+
init_error_injection+0x1b/0x71
413+
do_one_initcall+0x5f/0x2d0
414+
kernel_init_freeable+0x26f/0x2d7
415+
kernel_init+0xe/0x118
416+
ret_from_fork+0x22/0x30
417+
418+
419+
2. free_traces::
420+
421+
Prints information about unique freeing traces of the currently allocated
422+
objects. The freeing traces thus come from the previous life-cycle of the
423+
objects and are reported as not available for objects allocated for the first
424+
time. The output is sorted by frequency of each trace.
425+
426+
Information in the output:
427+
Number of objects, freeing function, minimal/average/maximal jiffies since free,
428+
pid range of the freeing processes, cpu mask of freeing cpus, and stack trace.
429+
430+
Example:::
431+
432+
1980 <not-available> age=4294912290 pid=0 cpus=0
433+
51 acpi_ut_update_ref_count+0x6a6/0x782 age=236886/237027/237772 pid=1 cpus=1
434+
kfree+0x2db/0x420
435+
acpi_ut_update_ref_count+0x6a6/0x782
436+
acpi_ut_update_object_reference+0x1ad/0x234
437+
acpi_ut_remove_reference+0x7d/0x84
438+
acpi_rs_get_prt_method_data+0x97/0xd6
439+
acpi_get_irq_routing_table+0x82/0xc4
440+
acpi_pci_irq_find_prt_entry+0x8e/0x2e0
441+
acpi_pci_irq_lookup+0x3a/0x1e0
442+
acpi_pci_irq_enable+0x77/0x240
443+
pcibios_enable_device+0x39/0x40
444+
do_pci_enable_device.part.0+0x5d/0xe0
445+
pci_enable_device_flags+0xfc/0x120
446+
pci_enable_device+0x13/0x20
447+
virtio_pci_probe+0x9e/0x170
448+
local_pci_probe+0x48/0x80
449+
pci_device_probe+0x105/0x1c0
450+
387451
Christoph Lameter, May 30, 2007
388452
Sergey Senozhatsky, October 23, 2015

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18163,6 +18163,7 @@ M: Joonsoo Kim <iamjoonsoo.kim@lge.com>
1816318163
M: Andrew Morton <akpm@linux-foundation.org>
1816418164
M: Vlastimil Babka <vbabka@suse.cz>
1816518165
R: Roman Gushchin <roman.gushchin@linux.dev>
18166+
R: Hyeonggon Yoo <42.hyeyoo@gmail.com>
1816618167
L: linux-mm@kvack.org
1816718168
S: Maintained
1816818169
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git

include/linux/slab.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@
112112
#define SLAB_KASAN 0
113113
#endif
114114

115+
/*
116+
* Ignore user specified debugging flags.
117+
* Intended for caches created for self-tests so they have only flags
118+
* specified in the code and other flags are ignored.
119+
*/
120+
#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U)
121+
115122
/* The following flags affect the page allocator grouping pages by mobility */
116123
/* Objects are reclaimable */
117124
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
@@ -190,7 +197,7 @@ void kmem_dump_obj(void *object);
190197
/*
191198
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
192199
* alignment larger than the alignment of a 64-bit integer.
193-
* Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
200+
* Setting ARCH_DMA_MINALIGN in arch headers allows that.
194201
*/
195202
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
196203
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
@@ -210,9 +217,9 @@ void kmem_dump_obj(void *object);
210217
#endif
211218

212219
/*
213-
* kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
214-
* pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
215-
* aligned pointers.
220+
* kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN.
221+
* kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN
222+
* and ARCH_SLAB_MINALIGN, but here we only assume the former alignment.
216223
*/
217224
#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
218225
#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)

include/linux/slub_def.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ struct kmem_cache {
105105
struct kmem_cache_order_objects oo;
106106

107107
/* Allocation and freeing of slabs */
108-
struct kmem_cache_order_objects max;
109108
struct kmem_cache_order_objects min;
110109
gfp_t allocflags; /* gfp flags to use on each alloc */
111110
int refcount; /* Refcount for slab cache destroy */

include/linux/stackdepot.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,36 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
2020
gfp_t gfp_flags, bool can_alloc);
2121

2222
/*
23-
* Every user of stack depot has to call this during its own init when it's
24-
* decided that it will be calling stack_depot_save() later.
23+
* Every user of stack depot has to call stack_depot_init() during its own init
24+
* when it's decided that it will be calling stack_depot_save() later. This is
25+
* recommended for e.g. modules initialized later in the boot process, when
26+
* slab_is_available() is true.
2527
*
2628
* The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot
2729
* enabled as part of mm_init(), for subsystems where it's known at compile time
2830
* that stack depot will be used.
31+
*
32+
* Another alternative is to call stack_depot_want_early_init(), when the
33+
* decision to use stack depot is taken e.g. when evaluating kernel boot
34+
* parameters, which precedes the enablement point in mm_init().
35+
*
36+
* stack_depot_init() and stack_depot_want_early_init() can be called regardless
37+
* of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print
38+
* functions should only be called from code that makes sure CONFIG_STACKDEPOT
39+
* is enabled.
2940
*/
41+
#ifdef CONFIG_STACKDEPOT
3042
int stack_depot_init(void);
3143

32-
#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT
33-
static inline int stack_depot_early_init(void) { return stack_depot_init(); }
44+
void __init stack_depot_want_early_init(void);
45+
46+
/* This is supposed to be called only from mm_init() */
47+
int __init stack_depot_early_init(void);
3448
#else
49+
static inline int stack_depot_init(void) { return 0; }
50+
51+
static inline void stack_depot_want_early_init(void) { }
52+
3553
static inline int stack_depot_early_init(void) { return 0; }
3654
#endif
3755

init/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1875,6 +1875,7 @@ config SLUB_DEBUG
18751875
default y
18761876
bool "Enable SLUB debugging support" if EXPERT
18771877
depends on SLUB && SYSFS
1878+
select STACKDEPOT if STACKTRACE_SUPPORT
18781879
help
18791880
SLUB has extensive debug support features. Disabling these can
18801881
result in significant savings in code size. This also disables

lib/Kconfig.debug

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ config DEBUG_SLAB
710710
config SLUB_DEBUG_ON
711711
bool "SLUB debugging on by default"
712712
depends on SLUB && SLUB_DEBUG
713+
select STACKDEPOT_ALWAYS_INIT if STACKTRACE_SUPPORT
713714
default n
714715
help
715716
Boot with debugging on by default. SLUB boots by default with

lib/slub_kunit.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ static int slab_errors;
1212
static void test_clobber_zone(struct kunit *test)
1313
{
1414
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
15-
SLAB_RED_ZONE, NULL);
15+
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
1616
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
1717

1818
kasan_disable_current();
@@ -30,7 +30,7 @@ static void test_clobber_zone(struct kunit *test)
3030
static void test_next_pointer(struct kunit *test)
3131
{
3232
struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
33-
SLAB_POISON, NULL);
33+
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
3434
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
3535
unsigned long tmp;
3636
unsigned long *ptr_addr;
@@ -75,7 +75,7 @@ static void test_next_pointer(struct kunit *test)
7575
static void test_first_word(struct kunit *test)
7676
{
7777
struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
78-
SLAB_POISON, NULL);
78+
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
7979
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
8080

8181
kmem_cache_free(s, p);
@@ -90,7 +90,7 @@ static void test_first_word(struct kunit *test)
9090
static void test_clobber_50th_byte(struct kunit *test)
9191
{
9292
struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
93-
SLAB_POISON, NULL);
93+
SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
9494
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
9595

9696
kmem_cache_free(s, p);
@@ -106,7 +106,7 @@ static void test_clobber_50th_byte(struct kunit *test)
106106
static void test_clobber_redzone_free(struct kunit *test)
107107
{
108108
struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
109-
SLAB_RED_ZONE, NULL);
109+
SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
110110
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
111111

112112
kasan_disable_current();

lib/stackdepot.c

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ struct stack_record {
6666
unsigned long entries[]; /* Variable-sized array of entries. */
6767
};
6868

69+
static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
70+
static bool __stack_depot_early_init_passed __initdata;
71+
6972
static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
7073

7174
static int depot_index;
@@ -162,38 +165,58 @@ static int __init is_stack_depot_disabled(char *str)
162165
}
163166
early_param("stack_depot_disable", is_stack_depot_disabled);
164167

165-
/*
166-
* __ref because of memblock_alloc(), which will not be actually called after
167-
* the __init code is gone, because at that point slab_is_available() is true
168-
*/
169-
__ref int stack_depot_init(void)
168+
void __init stack_depot_want_early_init(void)
169+
{
170+
/* Too late to request early init now */
171+
WARN_ON(__stack_depot_early_init_passed);
172+
173+
__stack_depot_want_early_init = true;
174+
}
175+
176+
int __init stack_depot_early_init(void)
177+
{
178+
size_t size;
179+
180+
/* This is supposed to be called only once, from mm_init() */
181+
if (WARN_ON(__stack_depot_early_init_passed))
182+
return 0;
183+
184+
__stack_depot_early_init_passed = true;
185+
186+
if (!__stack_depot_want_early_init || stack_depot_disable)
187+
return 0;
188+
189+
size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
190+
pr_info("Stack Depot early init allocating hash table with memblock_alloc, %zu bytes\n",
191+
size);
192+
stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
193+
194+
if (!stack_table) {
195+
pr_err("Stack Depot hash table allocation failed, disabling\n");
196+
stack_depot_disable = true;
197+
return -ENOMEM;
198+
}
199+
200+
return 0;
201+
}
202+
203+
int stack_depot_init(void)
170204
{
171205
static DEFINE_MUTEX(stack_depot_init_mutex);
206+
int ret = 0;
172207

173208
mutex_lock(&stack_depot_init_mutex);
174209
if (!stack_depot_disable && !stack_table) {
175-
size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
176-
int i;
177-
178-
if (slab_is_available()) {
179-
pr_info("Stack Depot allocating hash table with kvmalloc\n");
180-
stack_table = kvmalloc(size, GFP_KERNEL);
181-
} else {
182-
pr_info("Stack Depot allocating hash table with memblock_alloc\n");
183-
stack_table = memblock_alloc(size, SMP_CACHE_BYTES);
184-
}
185-
if (stack_table) {
186-
for (i = 0; i < STACK_HASH_SIZE; i++)
187-
stack_table[i] = NULL;
188-
} else {
210+
pr_info("Stack Depot allocating hash table with kvcalloc\n");
211+
stack_table = kvcalloc(STACK_HASH_SIZE, sizeof(struct stack_record *), GFP_KERNEL);
212+
if (!stack_table) {
189213
pr_err("Stack Depot hash table allocation failed, disabling\n");
190214
stack_depot_disable = true;
191-
mutex_unlock(&stack_depot_init_mutex);
192-
return -ENOMEM;
215+
ret = -ENOMEM;
193216
}
194217
}
195218
mutex_unlock(&stack_depot_init_mutex);
196-
return 0;
219+
return ret;
197220
}
198221
EXPORT_SYMBOL_GPL(stack_depot_init);
199222

mm/page_owner.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,12 @@ static void init_early_allocated_pages(void);
4545

4646
static int __init early_page_owner_param(char *buf)
4747
{
48-
return kstrtobool(buf, &page_owner_enabled);
48+
int ret = kstrtobool(buf, &page_owner_enabled);
49+
50+
if (page_owner_enabled)
51+
stack_depot_want_early_init();
52+
53+
return ret;
4954
}
5055
early_param("page_owner", early_page_owner_param);
5156

@@ -83,8 +88,6 @@ static __init void init_page_owner(void)
8388
if (!page_owner_enabled)
8489
return;
8590

86-
stack_depot_init();
87-
8891
register_dummy_stack();
8992
register_failure_stack();
9093
register_early_stack();

0 commit comments

Comments
 (0)