Skip to content

Commit e06635e

Browse files
committed
Merge tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: - Add new slab_strict_numa boot parameter to enforce per-object memory policies on top of slab folio policies, for systems where saving cost of remote accesses is more important than minimizing slab allocation overhead (Christoph Lameter) - Fix for freeptr_offset alignment check being too strict for m68k (Geert Uytterhoeven) - krealloc() fixes for not violating __GFP_ZERO guarantees on krealloc() when slub_debug (redzone and object tracking) is enabled (Feng Tang) - Fix a memory leak in case sysfs registration fails for a slab cache, and also no longer fail to create the cache in that case (Hyeonggon Yoo) - Fix handling of detected consistency problems (due to buggy slab user) with slub_debug enabled, so that it does not cause further list corruption bugs (yuan.gao) - Code cleanup and kerneldocs polishing (Zhen Lei, Vlastimil Babka) * tag 'slab-for-6.13-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: slab: Fix too strict alignment check in create_cache() mm/slab: Allow cache creation to proceed even if sysfs registration fails mm/slub: Avoid list corruption when removing a slab from the full list mm/slub, kunit: Add testcase for krealloc redzone and zeroing mm/slub: Improve redzone check and zeroing for krealloc() mm/slub: Consider kfence case for get_orig_size() SLUB: Add support for per object memory policies mm, slab: add kerneldocs for common SLAB_ flags mm/slab: remove duplicate check in create_cache() mm/slub: Move krealloc() and related code to slub.c mm/kasan: Don't store metadata inside kmalloc object when slub_debug_orig_size is on
2 parents f5f4745 + 9008fe8 commit e06635e

File tree

8 files changed

+324
-136
lines changed

8 files changed

+324
-136
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6158,6 +6158,16 @@
61586158
For more information see Documentation/mm/slub.rst.
61596159
(slub_nomerge legacy name also accepted for now)
61606160

6161+
slab_strict_numa [MM]
6162+
Support memory policies on a per object level
6163+
in the slab allocator. The default is for memory
6164+
policies to be applied at the folio level when
6165+
a new folio is needed or a partial folio is
6166+
retrieved from the lists. Increases overhead
6167+
in the slab fastpaths but gains more accurate
6168+
NUMA kernel object placement which helps with slow
6169+
interconnects in NUMA systems.
6170+
61616171
slram= [HW,MTD]
61626172

61636173
smart2= [HW]

Documentation/mm/slub.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,15 @@ can be influenced by kernel parameters:
175175
``slab_max_order`` to 0, what cause minimum possible order of
176176
slabs allocation.
177177

178+
``slab_strict_numa``
179+
Enables the application of memory policies on each
180+
allocation. This results in more accurate placement of
181+
objects which may result in the reduction of accesses
182+
to remote nodes. The default is to only apply memory
183+
policies at the folio level when a new folio is acquired
184+
or a folio is retrieved from the lists. Enabling this
185+
option reduces the fastpath performance of the slab allocator.
186+
178187
SLUB Debug output
179188
=================
180189

include/linux/slab.h

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,17 @@ enum _slab_flag_bits {
7777
#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON)
7878
/* Indicate a kmalloc slab */
7979
#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC)
80-
/* Align objs on cache lines */
80+
/**
81+
* define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
82+
*
83+
* Sufficiently large objects are aligned on cache line boundary. For object
84+
* size smaller than a half of cache line size, the alignment is on the half of
85+
* cache line size. In general, if object size is smaller than 1/2^n of cache
86+
* line size, the alignment is adjusted to 1/2^n.
87+
*
88+
* If explicit alignment is also requested by the respective
89+
* &struct kmem_cache_args field, the greater of both is alignments is applied.
90+
*/
8191
#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
8292
/* Use GFP_DMA memory */
8393
#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA)
@@ -87,8 +97,8 @@ enum _slab_flag_bits {
8797
#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER)
8898
/* Panic if kmem_cache_create() fails */
8999
#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
90-
/*
91-
* SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
100+
/**
101+
* define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS!
92102
*
93103
* This delays freeing the SLAB page by a grace period, it does _NOT_
94104
* delay object freeing. This means that if you do kmem_cache_free()
@@ -99,20 +109,22 @@ enum _slab_flag_bits {
99109
* stays valid, the trick to using this is relying on an independent
100110
* object validation pass. Something like:
101111
*
102-
* begin:
103-
* rcu_read_lock();
104-
* obj = lockless_lookup(key);
105-
* if (obj) {
106-
* if (!try_get_ref(obj)) // might fail for free objects
107-
* rcu_read_unlock();
108-
* goto begin;
112+
* ::
113+
*
114+
* begin:
115+
* rcu_read_lock();
116+
* obj = lockless_lookup(key);
117+
* if (obj) {
118+
* if (!try_get_ref(obj)) // might fail for free objects
119+
* rcu_read_unlock();
120+
* goto begin;
109121
*
110-
* if (obj->key != key) { // not the object we expected
111-
* put_ref(obj);
112-
* rcu_read_unlock();
113-
* goto begin;
114-
* }
115-
* }
122+
* if (obj->key != key) { // not the object we expected
123+
* put_ref(obj);
124+
* rcu_read_unlock();
125+
* goto begin;
126+
* }
127+
* }
116128
* rcu_read_unlock();
117129
*
118130
* This is useful if we need to approach a kernel structure obliquely,
@@ -137,7 +149,6 @@ enum _slab_flag_bits {
137149
*
138150
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
139151
*/
140-
/* Defer freeing slabs to RCU */
141152
#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
142153
/* Trace allocations and frees */
143154
#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE)
@@ -170,7 +181,12 @@ enum _slab_flag_bits {
170181
#else
171182
# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED
172183
#endif
173-
/* Account to memcg */
184+
/**
185+
* define SLAB_ACCOUNT - Account allocations to memcg.
186+
*
187+
* All object allocations from this cache will be memcg accounted, regardless of
188+
* __GFP_ACCOUNT being or not being passed to individual allocations.
189+
*/
174190
#ifdef CONFIG_MEMCG
175191
# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
176192
#else
@@ -197,7 +213,13 @@ enum _slab_flag_bits {
197213
#endif
198214

199215
/* The following flags affect the page allocator grouping pages by mobility */
200-
/* Objects are reclaimable */
216+
/**
217+
* define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
218+
*
219+
* Use this flag for caches that have an associated shrinker. As a result, slab
220+
* pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by
221+
* mobility, and are accounted in SReclaimable counter in /proc/meminfo
222+
*/
201223
#ifndef CONFIG_SLUB_TINY
202224
#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT)
203225
#else

lib/slub_kunit.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,47 @@ static void test_leak_destroy(struct kunit *test)
192192
KUNIT_EXPECT_EQ(test, 2, slab_errors);
193193
}
194194

195+
static void test_krealloc_redzone_zeroing(struct kunit *test)
196+
{
197+
u8 *p;
198+
int i;
199+
struct kmem_cache *s = test_kmem_cache_create("TestSlub_krealloc", 64,
200+
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
201+
202+
p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 48));
203+
memset(p, 0xff, 48);
204+
205+
kasan_disable_current();
206+
OPTIMIZER_HIDE_VAR(p);
207+
208+
/* Test shrink */
209+
p = krealloc(p, 40, GFP_KERNEL | __GFP_ZERO);
210+
for (i = 40; i < 64; i++)
211+
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
212+
213+
/* Test grow within the same 64B kmalloc object */
214+
p = krealloc(p, 56, GFP_KERNEL | __GFP_ZERO);
215+
for (i = 40; i < 56; i++)
216+
KUNIT_EXPECT_EQ(test, p[i], 0);
217+
for (i = 56; i < 64; i++)
218+
KUNIT_EXPECT_EQ(test, p[i], SLUB_RED_ACTIVE);
219+
220+
validate_slab_cache(s);
221+
KUNIT_EXPECT_EQ(test, 0, slab_errors);
222+
223+
memset(p, 0xff, 56);
224+
/* Test grow with allocating a bigger 128B object */
225+
p = krealloc(p, 112, GFP_KERNEL | __GFP_ZERO);
226+
for (i = 0; i < 56; i++)
227+
KUNIT_EXPECT_EQ(test, p[i], 0xff);
228+
for (i = 56; i < 112; i++)
229+
KUNIT_EXPECT_EQ(test, p[i], 0);
230+
231+
kfree(p);
232+
kasan_enable_current();
233+
kmem_cache_destroy(s);
234+
}
235+
195236
static int test_init(struct kunit *test)
196237
{
197238
slab_errors = 0;
@@ -214,6 +255,7 @@ static struct kunit_case test_cases[] = {
214255
KUNIT_CASE(test_kmalloc_redzone_access),
215256
KUNIT_CASE(test_kfree_rcu),
216257
KUNIT_CASE(test_leak_destroy),
258+
KUNIT_CASE(test_krealloc_redzone_zeroing),
217259
{}
218260
};
219261

mm/kasan/generic.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,9 +392,12 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
392392
* 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
393393
* be touched after it was freed, or
394394
* 2. Object has a constructor, which means it's expected to
395-
* retain its content until the next allocation.
395+
* retain its content until the next allocation, or
396+
* 3. It is from a kmalloc cache which enables the debug option
397+
* to store original size.
396398
*/
397-
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor) {
399+
if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
400+
slub_debug_orig_size(cache)) {
398401
cache->kasan_info.free_meta_offset = *size;
399402
*size += sizeof(struct kasan_free_meta);
400403
goto free_meta_added;

mm/slab.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ struct slab {
7373
struct {
7474
unsigned inuse:16;
7575
unsigned objects:15;
76+
/*
77+
* If slab debugging is enabled then the
78+
* frozen bit can be reused to indicate
79+
* that the slab was corrupted
80+
*/
7681
unsigned frozen:1;
7782
};
7883
};
@@ -695,6 +700,12 @@ void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
695700
void __check_heap_object(const void *ptr, unsigned long n,
696701
const struct slab *slab, bool to_user);
697702

703+
static inline bool slub_debug_orig_size(struct kmem_cache *s)
704+
{
705+
return (kmem_cache_debug_flags(s, SLAB_STORE_USER) &&
706+
(s->flags & SLAB_KMALLOC));
707+
}
708+
698709
#ifdef CONFIG_SLUB_DEBUG
699710
void skip_orig_size_check(struct kmem_cache *s, const void *object);
700711
#endif

mm/slab_common.c

Lines changed: 14 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -222,15 +222,12 @@ static struct kmem_cache *create_cache(const char *name,
222222
struct kmem_cache *s;
223223
int err;
224224

225-
if (WARN_ON(args->useroffset + args->usersize > object_size))
226-
args->useroffset = args->usersize = 0;
227-
228225
/* If a custom freelist pointer is requested make sure it's sane. */
229226
err = -EINVAL;
230227
if (args->use_freeptr_offset &&
231228
(args->freeptr_offset >= object_size ||
232229
!(flags & SLAB_TYPESAFE_BY_RCU) ||
233-
!IS_ALIGNED(args->freeptr_offset, sizeof(freeptr_t))))
230+
!IS_ALIGNED(args->freeptr_offset, __alignof__(freeptr_t))))
234231
goto out;
235232

236233
err = -ENOMEM;
@@ -257,11 +254,23 @@ static struct kmem_cache *create_cache(const char *name,
257254
* @object_size: The size of objects to be created in this cache.
258255
* @args: Additional arguments for the cache creation (see
259256
* &struct kmem_cache_args).
260-
* @flags: See %SLAB_* flags for an explanation of individual @flags.
257+
* @flags: See the desriptions of individual flags. The common ones are listed
258+
* in the description below.
261259
*
262260
* Not to be called directly, use the kmem_cache_create() wrapper with the same
263261
* parameters.
264262
*
263+
* Commonly used @flags:
264+
*
265+
* &SLAB_ACCOUNT - Account allocations to memcg.
266+
*
267+
* &SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries.
268+
*
269+
* &SLAB_RECLAIM_ACCOUNT - Objects are reclaimable.
270+
*
271+
* &SLAB_TYPESAFE_BY_RCU - Slab page (not individual objects) freeing delayed
272+
* by a grace period - see the full description before using.
273+
*
265274
* Context: Cannot be called within a interrupt, but can be interrupted.
266275
*
267276
* Return: a pointer to the cache on success, NULL on failure.
@@ -1199,90 +1208,6 @@ module_init(slab_proc_init);
11991208

12001209
#endif /* CONFIG_SLUB_DEBUG */
12011210

1202-
static __always_inline __realloc_size(2) void *
1203-
__do_krealloc(const void *p, size_t new_size, gfp_t flags)
1204-
{
1205-
void *ret;
1206-
size_t ks;
1207-
1208-
/* Check for double-free before calling ksize. */
1209-
if (likely(!ZERO_OR_NULL_PTR(p))) {
1210-
if (!kasan_check_byte(p))
1211-
return NULL;
1212-
ks = ksize(p);
1213-
} else
1214-
ks = 0;
1215-
1216-
/* If the object still fits, repoison it precisely. */
1217-
if (ks >= new_size) {
1218-
/* Zero out spare memory. */
1219-
if (want_init_on_alloc(flags)) {
1220-
kasan_disable_current();
1221-
memset(kasan_reset_tag(p) + new_size, 0, ks - new_size);
1222-
kasan_enable_current();
1223-
}
1224-
1225-
p = kasan_krealloc((void *)p, new_size, flags);
1226-
return (void *)p;
1227-
}
1228-
1229-
ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
1230-
if (ret && p) {
1231-
/* Disable KASAN checks as the object's redzone is accessed. */
1232-
kasan_disable_current();
1233-
memcpy(ret, kasan_reset_tag(p), ks);
1234-
kasan_enable_current();
1235-
}
1236-
1237-
return ret;
1238-
}
1239-
1240-
/**
1241-
* krealloc - reallocate memory. The contents will remain unchanged.
1242-
* @p: object to reallocate memory for.
1243-
* @new_size: how many bytes of memory are required.
1244-
* @flags: the type of memory to allocate.
1245-
*
1246-
* If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size
1247-
* is 0 and @p is not a %NULL pointer, the object pointed to is freed.
1248-
*
1249-
* If __GFP_ZERO logic is requested, callers must ensure that, starting with the
1250-
* initial memory allocation, every subsequent call to this API for the same
1251-
* memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that
1252-
* __GFP_ZERO is not fully honored by this API.
1253-
*
1254-
* This is the case, since krealloc() only knows about the bucket size of an
1255-
* allocation (but not the exact size it was allocated with) and hence
1256-
* implements the following semantics for shrinking and growing buffers with
1257-
* __GFP_ZERO.
1258-
*
1259-
* new bucket
1260-
* 0 size size
1261-
* |--------|----------------|
1262-
* | keep | zero |
1263-
*
1264-
* In any case, the contents of the object pointed to are preserved up to the
1265-
* lesser of the new and old sizes.
1266-
*
1267-
* Return: pointer to the allocated memory or %NULL in case of error
1268-
*/
1269-
void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
1270-
{
1271-
void *ret;
1272-
1273-
if (unlikely(!new_size)) {
1274-
kfree(p);
1275-
return ZERO_SIZE_PTR;
1276-
}
1277-
1278-
ret = __do_krealloc(p, new_size, flags);
1279-
if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
1280-
kfree(p);
1281-
1282-
return ret;
1283-
}
1284-
EXPORT_SYMBOL(krealloc_noprof);
1285-
12861211
/**
12871212
* kfree_sensitive - Clear sensitive information in memory before freeing
12881213
* @p: object to free memory of

0 commit comments

Comments
 (0)