Skip to content

Commit 383461d

Browse files
committed
allowedips: free empty intermediate nodes when removing single node
When removing single nodes, it's possible that that node's parent is an empty intermediate node, in which case, it too should be removed. Otherwise the trie fills up and never is fully emptied, leading to gradual memory leaks over time for tries that are modified often. There was originally code to do this, but was removed during refactoring in 2016 and never reworked. Now that we have proper parent pointers from the previous commits, we can implement this properly. In order to reduce branching and expensive comparisons, we want to keep the double pointer for parent assignment (which lets us easily chain up to the root), but we still need to actually get the parent's base address. So encode the bit number into the last two bits of the pointer, and pack and unpack it as needed. This is a little bit clumsy but is the fastest and less memory wasteful of the compromises. Note that we align the root struct here to a minimum of 4, because it's embedded into a larger struct, and we're relying on having the bottom two bits for our flag, which would only be 16-bit aligned on m68k. The existing macro-based helpers were a bit unwieldy for adding the bit packing to, so this commit replaces them with safer and clearer ordinary functions. We add a test to the randomized/fuzzer part of the selftests, to free the randomized tries by-peer, refuzz it, and repeat, until it's supposed to be empty, and then then see if that actually resulted in the whole thing being emptied. That combined with kmemcheck should hopefully make sure this commit is doing what it should. Along the way this resulted in various other cleanups of the tests and fixes for recent graphviz. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
1 parent 03add82 commit 383461d

File tree

3 files changed

+137
-131
lines changed

3 files changed

+137
-131
lines changed

src/allowedips.c

Lines changed: 58 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
3030
node->bitlen = bits;
3131
memcpy(node->bits, src, bits / 8U);
3232
}
33-
#define CHOOSE_NODE(parent, key) \
34-
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
33+
34+
static inline u8 choose(struct allowedips_node *node, const u8 *key)
35+
{
36+
return (key[node->bit_at_a] >> node->bit_at_b) & 1;
37+
}
3538

3639
static void push_rcu(struct allowedips_node **stack,
3740
struct allowedips_node __rcu *p, unsigned int *len)
@@ -112,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
112115
found = node;
113116
if (node->cidr == bits)
114117
break;
115-
node = rcu_dereference_bh(CHOOSE_NODE(node, key));
118+
node = rcu_dereference_bh(node->bit[choose(node, key)]);
116119
}
117120
return found;
118121
}
@@ -144,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
144147
u8 cidr, u8 bits, struct allowedips_node **rnode,
145148
struct mutex *lock)
146149
{
147-
struct allowedips_node *node = rcu_dereference_protected(trie,
148-
lockdep_is_held(lock));
150+
struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
149151
struct allowedips_node *parent = NULL;
150152
bool exact = false;
151153

@@ -155,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
155157
exact = true;
156158
break;
157159
}
158-
node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
159-
lockdep_is_held(lock));
160+
node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
160161
}
161162
*rnode = parent;
162163
return exact;
163164
}
164165

166+
static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
167+
{
168+
node->parent_bit_packed = (unsigned long)parent | bit;
169+
rcu_assign_pointer(*parent, node);
170+
}
171+
172+
static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
173+
{
174+
u8 bit = choose(parent, node->bits);
175+
connect_node(&parent->bit[bit], bit, node);
176+
}
177+
165178
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
166179
u8 cidr, struct wg_peer *peer, struct mutex *lock)
167180
{
@@ -177,8 +190,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
177190
RCU_INIT_POINTER(node->peer, peer);
178191
list_add_tail(&node->peer_list, &peer->allowedips_list);
179192
copy_and_assign_cidr(node, key, cidr, bits);
180-
rcu_assign_pointer(node->parent_bit, trie);
181-
rcu_assign_pointer(*trie, node);
193+
connect_node(trie, 2, node);
182194
return 0;
183195
}
184196
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -197,26 +209,22 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
197209
if (!node) {
198210
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
199211
} else {
200-
down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
212+
const u8 bit = choose(node, key);
213+
down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
201214
if (!down) {
202-
rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
203-
rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
215+
connect_node(&node->bit[bit], bit, newnode);
204216
return 0;
205217
}
206218
}
207219
cidr = min(cidr, common_bits(down, key, bits));
208220
parent = node;
209221

210222
if (newnode->cidr == cidr) {
211-
rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
212-
rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
213-
if (!parent) {
214-
rcu_assign_pointer(newnode->parent_bit, trie);
215-
rcu_assign_pointer(*trie, newnode);
216-
} else {
217-
rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
218-
rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
219-
}
223+
choose_and_connect_node(newnode, down);
224+
if (!parent)
225+
connect_node(trie, 2, newnode);
226+
else
227+
choose_and_connect_node(parent, newnode);
220228
return 0;
221229
}
222230

@@ -229,17 +237,12 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
229237
INIT_LIST_HEAD(&node->peer_list);
230238
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
231239

232-
rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
233-
rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
234-
rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
235-
rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
236-
if (!parent) {
237-
rcu_assign_pointer(node->parent_bit, trie);
238-
rcu_assign_pointer(*trie, node);
239-
} else {
240-
rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
241-
rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
242-
}
240+
choose_and_connect_node(node, down);
241+
choose_and_connect_node(node, newnode);
242+
if (!parent)
243+
connect_node(trie, 2, node);
244+
else
245+
choose_and_connect_node(parent, node);
243246
return 0;
244247
}
245248

@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
297300
void wg_allowedips_remove_by_peer(struct allowedips *table,
298301
struct wg_peer *peer, struct mutex *lock)
299302
{
300-
struct allowedips_node *node, *child, *tmp;
303+
struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
304+
bool free_parent;
301305

302306
if (list_empty(&peer->allowedips_list))
303307
return;
@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct allowedips *table,
307311
RCU_INIT_POINTER(node->peer, NULL);
308312
if (node->bit[0] && node->bit[1])
309313
continue;
310-
child = rcu_dereference_protected(
311-
node->bit[!rcu_access_pointer(node->bit[0])],
312-
lockdep_is_held(lock));
314+
child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
315+
lockdep_is_held(lock));
313316
if (child)
314-
child->parent_bit = node->parent_bit;
315-
*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
317+
child->parent_bit_packed = node->parent_bit_packed;
318+
parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
319+
*parent_bit = child;
320+
parent = (void *)parent_bit -
321+
offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
322+
free_parent = !rcu_access_pointer(node->bit[0]) &&
323+
!rcu_access_pointer(node->bit[1]) &&
324+
(node->parent_bit_packed & 3) <= 1 &&
325+
!rcu_access_pointer(parent->peer);
326+
if (free_parent)
327+
child = rcu_dereference_protected(
328+
parent->bit[!(node->parent_bit_packed & 1)],
329+
lockdep_is_held(lock));
316330
call_rcu(&node->rcu, node_free_rcu);
317-
318-
/* TODO: Note that we currently don't walk up and down in order to
319-
* free any potential filler nodes. This means that this function
320-
* doesn't free up as much as it could, which could be revisited
321-
* at some point.
322-
*/
331+
if (!free_parent)
332+
continue;
333+
if (child)
334+
child->parent_bit_packed = parent->parent_bit_packed;
335+
*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
336+
call_rcu(&parent->rcu, node_free_rcu);
323337
}
324338
}
325339

src/allowedips.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct allowedips_node {
1919
u8 bits[16] __aligned(__alignof(u64));
2020

2121
/* Keep rarely used members at bottom to be beyond cache line. */
22-
struct allowedips_node *__rcu *parent_bit;
22+
unsigned long parent_bit_packed;
2323
union {
2424
struct list_head peer_list;
2525
struct rcu_head rcu;
@@ -30,7 +30,7 @@ struct allowedips {
3030
struct allowedips_node __rcu *root4;
3131
struct allowedips_node __rcu *root6;
3232
u64 seq;
33-
};
33+
} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
3434

3535
void wg_allowedips_init(struct allowedips *table);
3636
void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);

0 commit comments

Comments
 (0)