Skip to content

Commit f7c80fa

Browse files
Christoph Lametertehcaster
authored andcommitted
SLUB: Add support for per object memory policies
The old SLAB allocator used to support memory policies on a per allocation bases. In SLUB the memory policies are applied on a per page frame / folio bases. Doing so avoids having to check memory policies in critical code paths for kmalloc and friends. This worked on general well on Intel/AMD/PowerPC because the interconnect technology is mature and can minimize the latencies through intelligent caching even if a small object is not placed optimally. However, on ARM we have an emergence of new NUMA interconnect technology based more on embedded devices. Caching of remote content can currently be ineffective using the standard building blocks / mesh available on that platform. Such architectures benefit if each slab object is individually placed according to memory policies and other restrictions. This patch adds another kernel parameter slab_strict_numa If that is set then a static branch is activated that will cause the hotpaths of the allocator to evaluate the current memory allocation policy. Each object will be properly placed by paying the price of extra processing and SLUB will no longer defer to the page allocator to apply memory policies at the folio level. This patch improves performance of memcached running on Ampere Altra 2P system (ARM Neoverse N1 processor) by 3.6% due to accurate placement of small kernel objects. Tested-by: Huang Shijie <shijie@os.amperecomputing.com> Signed-off-by: Christoph Lameter (Ampere) <cl@gentwo.org> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent 7045738 commit f7c80fa

File tree

3 files changed

+62
-0
lines changed

3 files changed

+62
-0
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6147,6 +6147,16 @@
61476147
For more information see Documentation/mm/slub.rst.
61486148
(slub_nomerge legacy name also accepted for now)
61496149

6150+
slab_strict_numa [MM]
6151+
Support memory policies on a per object level
6152+
in the slab allocator. The default is for memory
6153+
policies to be applied at the folio level when
6154+
a new folio is needed or a partial folio is
6155+
retrieved from the lists. Increases overhead
6156+
in the slab fastpaths but gains more accurate
6157+
NUMA kernel object placement which helps with slow
6158+
interconnects in NUMA systems.
6159+
61506160
slram= [HW,MTD]
61516161

61526162
smart2= [HW]

Documentation/mm/slub.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,15 @@ can be influenced by kernel parameters:
175175
``slab_max_order`` to 0, what cause minimum possible order of
176176
slabs allocation.
177177

178+
``slab_strict_numa``
179+
Enables the application of memory policies on each
180+
allocation. This results in more accurate placement of
181+
objects which may result in the reduction of accesses
182+
to remote nodes. The default is to only apply memory
183+
policies at the folio level when a new folio is acquired
184+
or a folio is retrieved from the lists. Enabling this
185+
option reduces the fastpath performance of the slab allocator.
186+
178187
SLUB Debug output
179188
=================
180189

mm/slub.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,10 @@ DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
218218
#endif
219219
#endif /* CONFIG_SLUB_DEBUG */
220220

221+
#ifdef CONFIG_NUMA
222+
static DEFINE_STATIC_KEY_FALSE(strict_numa);
223+
#endif
224+
221225
/* Structure holding parameters for get_partial() call chain */
222226
struct partial_context {
223227
gfp_t flags;
@@ -3956,6 +3960,28 @@ static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
39563960
object = c->freelist;
39573961
slab = c->slab;
39583962

3963+
#ifdef CONFIG_NUMA
3964+
if (static_branch_unlikely(&strict_numa) &&
3965+
node == NUMA_NO_NODE) {
3966+
3967+
struct mempolicy *mpol = current->mempolicy;
3968+
3969+
if (mpol) {
3970+
/*
3971+
* Special BIND rule support. If existing slab
3972+
* is in permitted set then do not redirect
3973+
* to a particular node.
3974+
* Otherwise we apply the memory policy to get
3975+
* the node we need to allocate on.
3976+
*/
3977+
if (mpol->mode != MPOL_BIND || !slab ||
3978+
!node_isset(slab_nid(slab), mpol->nodes))
3979+
3980+
node = mempolicy_slab_node();
3981+
}
3982+
}
3983+
#endif
3984+
39593985
if (!USE_LOCKLESS_FAST_PATH() ||
39603986
unlikely(!object || !slab || !node_match(slab, node))) {
39613987
object = __slab_alloc(s, gfpflags, node, addr, c, orig_size);
@@ -5602,6 +5628,23 @@ static int __init setup_slub_min_objects(char *str)
56025628
__setup("slab_min_objects=", setup_slub_min_objects);
56035629
__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
56045630

5631+
#ifdef CONFIG_NUMA
5632+
static int __init setup_slab_strict_numa(char *str)
5633+
{
5634+
if (nr_node_ids > 1) {
5635+
static_branch_enable(&strict_numa);
5636+
pr_info("SLUB: Strict NUMA enabled.\n");
5637+
} else {
5638+
pr_warn("slab_strict_numa parameter set on non NUMA system.\n");
5639+
}
5640+
5641+
return 1;
5642+
}
5643+
5644+
__setup("slab_strict_numa", setup_slab_strict_numa);
5645+
#endif
5646+
5647+
56055648
#ifdef CONFIG_HARDENED_USERCOPY
56065649
/*
56075650
* Rejects incorrectly sized objects and objects that are to be copied

0 commit comments

Comments
 (0)