22
22
#include <linux/list.h>
23
23
#include <linux/mm.h>
24
24
#include <linux/mutex.h>
25
+ #include <linux/poison.h>
25
26
#include <linux/printk.h>
26
27
#include <linux/rculist.h>
27
28
#include <linux/rcupdate.h>
43
44
#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
44
45
#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
45
46
STACK_DEPOT_EXTRA_BITS)
46
- #if IS_ENABLED (CONFIG_KMSAN ) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32
47
- /*
48
- * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack
49
- * traces. As KMSAN does not support evicting stack traces from the stack
50
- * depot, the stack depot capacity might be reached quickly with large stack
51
- * records. Adjust the maximum number of stack depot pools for this case.
52
- */
53
- #define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16))
54
- #else
55
47
#define DEPOT_POOLS_CAP 8192
56
- #endif
57
48
#define DEPOT_MAX_POOLS \
58
49
(((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
59
50
(1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
@@ -93,9 +84,6 @@ struct stack_record {
93
84
};
94
85
};
95
86
96
- #define DEPOT_STACK_RECORD_SIZE \
97
- ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN)
98
-
99
87
static bool stack_depot_disabled ;
100
88
static bool __stack_depot_early_init_requested __initdata = IS_ENABLED (CONFIG_STACKDEPOT_ALWAYS_INIT );
101
89
static bool __stack_depot_early_init_passed __initdata ;
@@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS];
121
109
static void * new_pool ;
122
110
/* Number of pools in stack_pools. */
123
111
static int pools_num ;
112
+ /* Offset to the unused space in the currently used pool. */
113
+ static size_t pool_offset = DEPOT_POOL_SIZE ;
124
114
/* Freelist of stack records within stack_pools. */
125
115
static LIST_HEAD (free_stacks );
126
- /*
127
- * Stack depot tries to keep an extra pool allocated even before it runs out
128
- * of space in the currently used pool. This flag marks whether this extra pool
129
- * needs to be allocated. It has the value 0 when either an extra pool is not
130
- * yet allocated or if the limit on the number of pools is reached.
131
- */
132
- static bool new_pool_required = true;
133
116
/* The lock must be held when performing pool or freelist modifications. */
134
117
static DEFINE_RAW_SPINLOCK (pool_lock );
135
118
136
119
/* Statistics counters for debugfs. */
137
120
enum depot_counter_id {
138
- DEPOT_COUNTER_ALLOCS ,
139
- DEPOT_COUNTER_FREES ,
140
- DEPOT_COUNTER_INUSE ,
121
+ DEPOT_COUNTER_REFD_ALLOCS ,
122
+ DEPOT_COUNTER_REFD_FREES ,
123
+ DEPOT_COUNTER_REFD_INUSE ,
141
124
DEPOT_COUNTER_FREELIST_SIZE ,
125
+ DEPOT_COUNTER_PERSIST_COUNT ,
126
+ DEPOT_COUNTER_PERSIST_BYTES ,
142
127
DEPOT_COUNTER_COUNT ,
143
128
};
144
129
static long counters [DEPOT_COUNTER_COUNT ];
145
130
static const char * const counter_names [] = {
146
- [DEPOT_COUNTER_ALLOCS ] = "allocations " ,
147
- [DEPOT_COUNTER_FREES ] = "frees " ,
148
- [DEPOT_COUNTER_INUSE ] = "in_use " ,
131
+ [DEPOT_COUNTER_REFD_ALLOCS ] = "refcounted_allocations " ,
132
+ [DEPOT_COUNTER_REFD_FREES ] = "refcounted_frees " ,
133
+ [DEPOT_COUNTER_REFD_INUSE ] = "refcounted_in_use " ,
149
134
[DEPOT_COUNTER_FREELIST_SIZE ] = "freelist_size" ,
135
+ [DEPOT_COUNTER_PERSIST_COUNT ] = "persistent_count" ,
136
+ [DEPOT_COUNTER_PERSIST_BYTES ] = "persistent_bytes" ,
150
137
};
151
138
static_assert (ARRAY_SIZE (counter_names ) == DEPOT_COUNTER_COUNT );
152
139
@@ -294,48 +281,52 @@ int stack_depot_init(void)
294
281
EXPORT_SYMBOL_GPL (stack_depot_init );
295
282
296
283
/*
297
- * Initializes new stack depot @pool, release all its entries to the freelist,
298
- * and update the list of pools.
284
+ * Initializes new stack pool, and updates the list of pools.
299
285
*/
300
- static void depot_init_pool (void * pool )
286
+ static bool depot_init_pool (void * * prealloc )
301
287
{
302
- int offset ;
303
-
304
288
lockdep_assert_held (& pool_lock );
305
289
306
- /* Initialize handles and link stack records into the freelist. */
307
- for (offset = 0 ; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE ;
308
- offset += DEPOT_STACK_RECORD_SIZE ) {
309
- struct stack_record * stack = pool + offset ;
310
-
311
- stack -> handle .pool_index = pools_num ;
312
- stack -> handle .offset = offset >> DEPOT_STACK_ALIGN ;
313
- stack -> handle .extra = 0 ;
314
-
315
- /*
316
- * Stack traces of size 0 are never saved, and we can simply use
317
- * the size field as an indicator if this is a new unused stack
318
- * record in the freelist.
319
- */
320
- stack -> size = 0 ;
290
+ if (unlikely (pools_num >= DEPOT_MAX_POOLS )) {
291
+ /* Bail out if we reached the pool limit. */
292
+ WARN_ON_ONCE (pools_num > DEPOT_MAX_POOLS ); /* should never happen */
293
+ WARN_ON_ONCE (!new_pool ); /* to avoid unnecessary pre-allocation */
294
+ WARN_ONCE (1 , "Stack depot reached limit capacity" );
295
+ return false;
296
+ }
321
297
322
- INIT_LIST_HEAD (& stack -> hash_list );
323
- /*
324
- * Add to the freelist front to prioritize never-used entries:
325
- * required in case there are entries in the freelist, but their
326
- * RCU cookie still belongs to the current RCU grace period
327
- * (there can still be concurrent readers).
328
- */
329
- list_add (& stack -> free_list , & free_stacks );
330
- counters [DEPOT_COUNTER_FREELIST_SIZE ]++ ;
298
+ if (!new_pool && * prealloc ) {
299
+ /* We have preallocated memory, use it. */
300
+ WRITE_ONCE (new_pool , * prealloc );
301
+ * prealloc = NULL ;
331
302
}
332
303
304
+ if (!new_pool )
305
+ return false; /* new_pool and *prealloc are NULL */
306
+
333
307
/* Save reference to the pool to be used by depot_fetch_stack(). */
334
- stack_pools [pools_num ] = pool ;
308
+ stack_pools [pools_num ] = new_pool ;
309
+
310
+ /*
311
+ * Stack depot tries to keep an extra pool allocated even before it runs
312
+ * out of space in the currently used pool.
313
+ *
314
+ * To indicate that a new preallocation is needed new_pool is reset to
315
+ * NULL; do not reset to NULL if we have reached the maximum number of
316
+ * pools.
317
+ */
318
+ if (pools_num < DEPOT_MAX_POOLS )
319
+ WRITE_ONCE (new_pool , NULL );
320
+ else
321
+ WRITE_ONCE (new_pool , STACK_DEPOT_POISON );
335
322
336
323
/* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
337
324
WRITE_ONCE (pools_num , pools_num + 1 );
338
325
ASSERT_EXCLUSIVE_WRITER (pools_num );
326
+
327
+ pool_offset = 0 ;
328
+
329
+ return true;
339
330
}
340
331
341
332
/* Keeps the preallocated memory to be used for a new stack depot pool. */
@@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc)
347
338
* If a new pool is already saved or the maximum number of
348
339
* pools is reached, do not use the preallocated memory.
349
340
*/
350
- if (! new_pool_required )
341
+ if (new_pool )
351
342
return ;
352
343
353
- /*
354
- * Use the preallocated memory for the new pool
355
- * as long as we do not exceed the maximum number of pools.
356
- */
357
- if (pools_num < DEPOT_MAX_POOLS ) {
358
- new_pool = * prealloc ;
359
- * prealloc = NULL ;
360
- }
361
-
362
- /*
363
- * At this point, either a new pool is kept or the maximum
364
- * number of pools is reached. In either case, take note that
365
- * keeping another pool is not required.
366
- */
367
- WRITE_ONCE (new_pool_required , false);
344
+ WRITE_ONCE (new_pool , * prealloc );
345
+ * prealloc = NULL ;
368
346
}
369
347
370
348
/*
371
- * Try to initialize a new stack depot pool from either a previous or the
372
- * current pre-allocation, and release all its entries to the freelist .
349
+ * Try to initialize a new stack record from the current pool, a cached pool, or
350
+ * the current pre-allocation.
373
351
*/
374
- static bool depot_try_init_pool (void * * prealloc )
352
+ static struct stack_record * depot_pop_free_pool (void * * prealloc , size_t size )
375
353
{
354
+ struct stack_record * stack ;
355
+ void * current_pool ;
356
+ u32 pool_index ;
357
+
376
358
lockdep_assert_held (& pool_lock );
377
359
378
- /* Check if we have a new pool saved and use it. */
379
- if (new_pool ) {
380
- depot_init_pool ( new_pool ) ;
381
- new_pool = NULL ;
360
+ if ( pool_offset + size > DEPOT_POOL_SIZE ) {
361
+ if (! depot_init_pool ( prealloc ))
362
+ return NULL ;
363
+ }
382
364
383
- /* Take note that we might need a new new_pool. */
384
- if (pools_num < DEPOT_MAX_POOLS )
385
- WRITE_ONCE (new_pool_required , true);
365
+ if (WARN_ON_ONCE (pools_num < 1 ))
366
+ return NULL ;
367
+ pool_index = pools_num - 1 ;
368
+ current_pool = stack_pools [pool_index ];
369
+ if (WARN_ON_ONCE (!current_pool ))
370
+ return NULL ;
386
371
387
- return true;
388
- }
372
+ stack = current_pool + pool_offset ;
389
373
390
- /* Bail out if we reached the pool limit . */
391
- if ( unlikely ( pools_num >= DEPOT_MAX_POOLS )) {
392
- WARN_ONCE ( 1 , "Stack depot reached limit capacity" ) ;
393
- return false ;
394
- }
374
+ /* Pre-initialize handle once . */
375
+ stack -> handle . pool_index = pool_index ;
376
+ stack -> handle . offset = pool_offset >> DEPOT_STACK_ALIGN ;
377
+ stack -> handle . extra = 0 ;
378
+ INIT_LIST_HEAD ( & stack -> hash_list );
395
379
396
- /* Check if we have preallocated memory and use it. */
397
- if (* prealloc ) {
398
- depot_init_pool (* prealloc );
399
- * prealloc = NULL ;
400
- return true;
401
- }
380
+ pool_offset += size ;
402
381
403
- return false ;
382
+ return stack ;
404
383
}
405
384
406
- /* Try to find next free usable entry. */
385
+ /* Try to find next free usable entry from the freelist . */
407
386
static struct stack_record * depot_pop_free (void )
408
387
{
409
388
struct stack_record * stack ;
@@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void)
420
399
* check the first entry.
421
400
*/
422
401
stack = list_first_entry (& free_stacks , struct stack_record , free_list );
423
- if (stack -> size && !poll_state_synchronize_rcu (stack -> rcu_state ))
402
+ if (!poll_state_synchronize_rcu (stack -> rcu_state ))
424
403
return NULL ;
425
404
426
405
list_del (& stack -> free_list );
@@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void)
429
408
return stack ;
430
409
}
431
410
411
+ static inline size_t depot_stack_record_size (struct stack_record * s , unsigned int nr_entries )
412
+ {
413
+ const size_t used = flex_array_size (s , entries , nr_entries );
414
+ const size_t unused = sizeof (s -> entries ) - used ;
415
+
416
+ WARN_ON_ONCE (sizeof (s -> entries ) < used );
417
+
418
+ return ALIGN (sizeof (struct stack_record ) - unused , 1 << DEPOT_STACK_ALIGN );
419
+ }
420
+
432
421
/* Allocates a new stack in a stack depot pool. */
433
422
static struct stack_record *
434
- depot_alloc_stack (unsigned long * entries , int size , u32 hash , void * * prealloc )
423
+ depot_alloc_stack (unsigned long * entries , unsigned int nr_entries , u32 hash , depot_flags_t flags , void * * prealloc )
435
424
{
436
- struct stack_record * stack ;
425
+ struct stack_record * stack = NULL ;
426
+ size_t record_size ;
437
427
438
428
lockdep_assert_held (& pool_lock );
439
429
440
430
/* This should already be checked by public API entry points. */
441
- if (WARN_ON_ONCE (!size ))
431
+ if (WARN_ON_ONCE (!nr_entries ))
442
432
return NULL ;
443
433
444
- /* Check if we have a stack record to save the stack trace. */
445
- stack = depot_pop_free ();
446
- if (!stack ) {
447
- /* No usable entries on the freelist - try to refill the freelist. */
448
- if (!depot_try_init_pool (prealloc ))
449
- return NULL ;
434
+ /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
435
+ if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES )
436
+ nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES ;
437
+
438
+ if (flags & STACK_DEPOT_FLAG_GET ) {
439
+ /*
440
+ * Evictable entries have to allocate the max. size so they may
441
+ * safely be re-used by differently sized allocations.
442
+ */
443
+ record_size = depot_stack_record_size (stack , CONFIG_STACKDEPOT_MAX_FRAMES );
450
444
stack = depot_pop_free ();
451
- if ( WARN_ON (! stack ))
452
- return NULL ;
445
+ } else {
446
+ record_size = depot_stack_record_size ( stack , nr_entries ) ;
453
447
}
454
448
455
- /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
456
- if (size > CONFIG_STACKDEPOT_MAX_FRAMES )
457
- size = CONFIG_STACKDEPOT_MAX_FRAMES ;
449
+ if (!stack ) {
450
+ stack = depot_pop_free_pool (prealloc , record_size );
451
+ if (!stack )
452
+ return NULL ;
453
+ }
458
454
459
455
/* Save the stack trace. */
460
456
stack -> hash = hash ;
461
- stack -> size = size ;
462
- /* stack->handle is already filled in by depot_init_pool(). */
463
- refcount_set (& stack -> count , 1 );
464
- memcpy (stack -> entries , entries , flex_array_size (stack , entries , size ));
457
+ stack -> size = nr_entries ;
458
+ /* stack->handle is already filled in by depot_pop_free_pool(). */
459
+ memcpy (stack -> entries , entries , flex_array_size (stack , entries , nr_entries ));
460
+
461
+ if (flags & STACK_DEPOT_FLAG_GET ) {
462
+ refcount_set (& stack -> count , 1 );
463
+ counters [DEPOT_COUNTER_REFD_ALLOCS ]++ ;
464
+ counters [DEPOT_COUNTER_REFD_INUSE ]++ ;
465
+ } else {
466
+ /* Warn on attempts to switch to refcounting this entry. */
467
+ refcount_set (& stack -> count , REFCOUNT_SATURATED );
468
+ counters [DEPOT_COUNTER_PERSIST_COUNT ]++ ;
469
+ counters [DEPOT_COUNTER_PERSIST_BYTES ] += record_size ;
470
+ }
465
471
466
472
/*
467
473
* Let KMSAN know the stored stack record is initialized. This shall
468
474
* prevent false positive reports if instrumented code accesses it.
469
475
*/
470
- kmsan_unpoison_memory (stack , DEPOT_STACK_RECORD_SIZE );
476
+ kmsan_unpoison_memory (stack , record_size );
471
477
472
- counters [DEPOT_COUNTER_ALLOCS ]++ ;
473
- counters [DEPOT_COUNTER_INUSE ]++ ;
474
478
return stack ;
475
479
}
476
480
@@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack)
538
542
list_add_tail (& stack -> free_list , & free_stacks );
539
543
540
544
counters [DEPOT_COUNTER_FREELIST_SIZE ]++ ;
541
- counters [DEPOT_COUNTER_FREES ]++ ;
542
- counters [DEPOT_COUNTER_INUSE ]-- ;
545
+ counters [DEPOT_COUNTER_REFD_FREES ]++ ;
546
+ counters [DEPOT_COUNTER_REFD_INUSE ]-- ;
543
547
544
548
printk_deferred_exit ();
545
549
raw_spin_unlock_irqrestore (& pool_lock , flags );
@@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
660
664
* Allocate memory for a new pool if required now:
661
665
* we won't be able to do that under the lock.
662
666
*/
663
- if (unlikely (can_alloc && READ_ONCE (new_pool_required ))) {
667
+ if (unlikely (can_alloc && ! READ_ONCE (new_pool ))) {
664
668
/*
665
669
* Zero out zone modifiers, as we don't have specific zone
666
670
* requirements. Keep the flags related to allocation in atomic
@@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
681
685
found = find_stack (bucket , entries , nr_entries , hash , depot_flags );
682
686
if (!found ) {
683
687
struct stack_record * new =
684
- depot_alloc_stack (entries , nr_entries , hash , & prealloc );
688
+ depot_alloc_stack (entries , nr_entries , hash , depot_flags , & prealloc );
685
689
686
690
if (new ) {
687
691
/*
0 commit comments