Skip to content

Commit 03add82

Browse files
committed
allowedips: allocate nodes in kmem_cache
The previous commit moved from O(n) to O(1) for removal, but in the process introduced an additional pointer member to a struct that increased the size from 60 to 68 bytes, putting nodes in the 128-byte slab. With deployed systems having as many as 2 million nodes, this represents a significant doubling in memory usage (128 MiB -> 256 MiB). Fix this by using our own kmem_cache, that's sized exactly right. This also makes wireguard's memory usage more transparent in tools like slabtop and /proc/slabinfo. Suggested-by: Arnd Bergmann <arnd@arndb.de> Suggested-by: Matthew Wilcox <willy@infradead.org> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
1 parent b56d48c commit 03add82

File tree

3 files changed

+38
-13
lines changed

3 files changed

+38
-13
lines changed

src/allowedips.c

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include "allowedips.h"
77
#include "peer.h"
88

9+
static struct kmem_cache *node_cache;
10+
911
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
1012
{
1113
if (bits == 32) {
@@ -31,11 +33,6 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
3133
#define CHOOSE_NODE(parent, key) \
3234
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
3335

34-
static void node_free_rcu(struct rcu_head *rcu)
35-
{
36-
kfree(container_of(rcu, struct allowedips_node, rcu));
37-
}
38-
3936
static void push_rcu(struct allowedips_node **stack,
4037
struct allowedips_node __rcu *p, unsigned int *len)
4138
{
@@ -45,6 +42,11 @@ static void push_rcu(struct allowedips_node **stack,
4542
}
4643
}
4744

45+
static void node_free_rcu(struct rcu_head *rcu)
46+
{
47+
kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
48+
}
49+
4850
static void root_free_rcu(struct rcu_head *rcu)
4951
{
5052
struct allowedips_node *node, *stack[128] = {
@@ -54,7 +56,7 @@ static void root_free_rcu(struct rcu_head *rcu)
5456
while (len > 0 && (node = stack[--len])) {
5557
push_rcu(stack, node->bit[0], &len);
5658
push_rcu(stack, node->bit[1], &len);
57-
kfree(node);
59+
kmem_cache_free(node_cache, node);
5860
}
5961
}
6062

@@ -169,7 +171,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
169171
return -EINVAL;
170172

171173
if (!rcu_access_pointer(*trie)) {
172-
node = kzalloc(sizeof(*node), GFP_KERNEL);
174+
node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
173175
if (unlikely(!node))
174176
return -ENOMEM;
175177
RCU_INIT_POINTER(node->peer, peer);
@@ -185,7 +187,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
185187
return 0;
186188
}
187189

188-
newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
190+
newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
189191
if (unlikely(!newnode))
190192
return -ENOMEM;
191193
RCU_INIT_POINTER(newnode->peer, peer);
@@ -218,10 +220,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
218220
return 0;
219221
}
220222

221-
node = kzalloc(sizeof(*node), GFP_KERNEL);
223+
node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
222224
if (unlikely(!node)) {
223225
list_del(&newnode->peer_list);
224-
kfree(newnode);
226+
kmem_cache_free(node_cache, newnode);
225227
return -ENOMEM;
226228
}
227229
INIT_LIST_HEAD(&node->peer_list);
@@ -311,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct allowedips *table,
311313
if (child)
312314
child->parent_bit = node->parent_bit;
313315
*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
314-
kfree_rcu(node, rcu);
316+
call_rcu(&node->rcu, node_free_rcu);
315317

316318
/* TODO: Note that we currently don't walk up and down in order to
317319
* free any potential filler nodes. This means that this function
@@ -355,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
355357
return NULL;
356358
}
357359

360+
int __init wg_allowedips_slab_init(void)
361+
{
362+
node_cache = KMEM_CACHE(allowedips_node, 0);
363+
return node_cache ? 0 : -ENOMEM;
364+
}
365+
366+
void wg_allowedips_slab_uninit(void)
367+
{
368+
rcu_barrier();
369+
kmem_cache_destroy(node_cache);
370+
}
371+
358372
#include "selftest/allowedips.c"

src/allowedips.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct allowedips_node {
1919
u8 bits[16] __aligned(__alignof(u64));
2020

2121
/* Keep rarely used members at bottom to be beyond cache line. */
22-
struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
22+
struct allowedips_node *__rcu *parent_bit;
2323
union {
2424
struct list_head peer_list;
2525
struct rcu_head rcu;
@@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
5353
bool wg_allowedips_selftest(void);
5454
#endif
5555

56+
int wg_allowedips_slab_init(void);
57+
void wg_allowedips_slab_uninit(void);
58+
5659
#endif /* _WG_ALLOWEDIPS_H */

src/main.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,15 @@ static int __init mod_init(void)
2626
(ret = curve25519_mod_init()))
2727
return ret;
2828

29+
ret = wg_allowedips_slab_init();
30+
if (ret < 0)
31+
goto err_allowedips;
32+
2933
#ifdef DEBUG
34+
ret = -ENOTRECOVERABLE;
3035
if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
3136
!wg_ratelimiter_selftest())
32-
return -ENOTRECOVERABLE;
37+
goto err_peer;
3338
#endif
3439
wg_noise_init();
3540

@@ -55,6 +60,8 @@ static int __init mod_init(void)
5560
err_device:
5661
wg_peer_uninit();
5762
err_peer:
63+
wg_allowedips_slab_uninit();
64+
err_allowedips:
5865
return ret;
5966
}
6067

@@ -63,6 +70,7 @@ static void __exit mod_exit(void)
6370
wg_genetlink_uninit();
6471
wg_device_uninit();
6572
wg_peer_uninit();
73+
wg_allowedips_slab_uninit();
6674
}
6775

6876
module_init(mod_init);

0 commit comments

Comments
 (0)