Skip to content

Commit 0de2046

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: Implement verifier support for rqspinlock
Introduce verifier-side support for rqspinlock kfuncs. The first step is allowing bpf_res_spin_lock type to be defined in map values and allocated objects, so BTF-side is updated with a new BPF_RES_SPIN_LOCK field to recognize and validate. Any object cannot have both bpf_spin_lock and bpf_res_spin_lock, only one of them (and at most one of them per-object, like before) must be present. The bpf_res_spin_lock can also be used to protect objects that require lock protection for their kfuncs, like BPF rbtree and linked list. The verifier plumbing to simulate success and failure cases when calling the kfuncs is done by pushing a new verifier state to the verifier state stack which will verify the failure case upon calling the kfunc. The path where success is indicated creates all lock reference state and IRQ state (if necessary for irqsave variants). In the case of failure, the state clears the registers r0-r5, sets the return value, and skips kfunc processing, proceeding to the next instruction. When marking the return value for success case, the value is marked as 0, and for the failure case as [-MAX_ERRNO, -1]. Then, in the program, whenever user checks the return value as 'if (ret)' or 'if (ret < 0)' the verifier never traverses such branches for success cases, and would be aware that the lock is not held in such cases. We push the kfunc state in check_kfunc_call whenever rqspinlock kfuncs are invoked. We introduce a kfunc_class state to avoid mixing lock irqrestore kfuncs with IRQ state created by bpf_local_irq_save. With all this infrastructure, these kfuncs become usable in programs while satisfying all safety properties required by the kernel. Acked-by: Eduard Zingerman <eddyz87@gmail.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20250316040541.108729-24-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent 97eb35f commit 0de2046

File tree

5 files changed

+231
-45
lines changed

5 files changed

+231
-45
lines changed

include/linux/bpf.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ enum btf_field_type {
205205
BPF_REFCOUNT = (1 << 9),
206206
BPF_WORKQUEUE = (1 << 10),
207207
BPF_UPTR = (1 << 11),
208+
BPF_RES_SPIN_LOCK = (1 << 12),
208209
};
209210

210211
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -240,6 +241,7 @@ struct btf_record {
240241
u32 cnt;
241242
u32 field_mask;
242243
int spin_lock_off;
244+
int res_spin_lock_off;
243245
int timer_off;
244246
int wq_off;
245247
int refcount_off;
@@ -315,6 +317,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
315317
switch (type) {
316318
case BPF_SPIN_LOCK:
317319
return "bpf_spin_lock";
320+
case BPF_RES_SPIN_LOCK:
321+
return "bpf_res_spin_lock";
318322
case BPF_TIMER:
319323
return "bpf_timer";
320324
case BPF_WORKQUEUE:
@@ -347,6 +351,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
347351
switch (type) {
348352
case BPF_SPIN_LOCK:
349353
return sizeof(struct bpf_spin_lock);
354+
case BPF_RES_SPIN_LOCK:
355+
return sizeof(struct bpf_res_spin_lock);
350356
case BPF_TIMER:
351357
return sizeof(struct bpf_timer);
352358
case BPF_WORKQUEUE:
@@ -377,6 +383,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
377383
switch (type) {
378384
case BPF_SPIN_LOCK:
379385
return __alignof__(struct bpf_spin_lock);
386+
case BPF_RES_SPIN_LOCK:
387+
return __alignof__(struct bpf_res_spin_lock);
380388
case BPF_TIMER:
381389
return __alignof__(struct bpf_timer);
382390
case BPF_WORKQUEUE:
@@ -420,6 +428,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
420428
case BPF_RB_ROOT:
421429
/* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
422430
case BPF_SPIN_LOCK:
431+
case BPF_RES_SPIN_LOCK:
423432
case BPF_TIMER:
424433
case BPF_WORKQUEUE:
425434
case BPF_KPTR_UNREF:

include/linux/bpf_verifier.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,14 @@ struct bpf_reg_state {
115115
int depth:30;
116116
} iter;
117117

118+
/* For irq stack slots */
119+
struct {
120+
enum {
121+
IRQ_NATIVE_KFUNC,
122+
IRQ_LOCK_KFUNC,
123+
} kfunc_class;
124+
} irq;
125+
118126
/* Max size from any of the above. */
119127
struct {
120128
unsigned long raw1;
@@ -255,9 +263,11 @@ struct bpf_reference_state {
255263
* default to pointer reference on zero initialization of a state.
256264
*/
257265
enum ref_state_type {
258-
REF_TYPE_PTR = 1,
259-
REF_TYPE_IRQ = 2,
260-
REF_TYPE_LOCK = 3,
266+
REF_TYPE_PTR = (1 << 1),
267+
REF_TYPE_IRQ = (1 << 2),
268+
REF_TYPE_LOCK = (1 << 3),
269+
REF_TYPE_RES_LOCK = (1 << 4),
270+
REF_TYPE_RES_LOCK_IRQ = (1 << 5),
261271
} type;
262272
/* Track each reference created with a unique id, even if the same
263273
* instruction creates the reference multiple times (eg, via CALL).

kernel/bpf/btf.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3481,6 +3481,15 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_
34813481
goto end;
34823482
}
34833483
}
3484+
if (field_mask & BPF_RES_SPIN_LOCK) {
3485+
if (!strcmp(name, "bpf_res_spin_lock")) {
3486+
if (*seen_mask & BPF_RES_SPIN_LOCK)
3487+
return -E2BIG;
3488+
*seen_mask |= BPF_RES_SPIN_LOCK;
3489+
type = BPF_RES_SPIN_LOCK;
3490+
goto end;
3491+
}
3492+
}
34843493
if (field_mask & BPF_TIMER) {
34853494
if (!strcmp(name, "bpf_timer")) {
34863495
if (*seen_mask & BPF_TIMER)
@@ -3659,6 +3668,7 @@ static int btf_find_field_one(const struct btf *btf,
36593668

36603669
switch (field_type) {
36613670
case BPF_SPIN_LOCK:
3671+
case BPF_RES_SPIN_LOCK:
36623672
case BPF_TIMER:
36633673
case BPF_WORKQUEUE:
36643674
case BPF_LIST_NODE:
@@ -3952,6 +3962,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39523962
return ERR_PTR(-ENOMEM);
39533963

39543964
rec->spin_lock_off = -EINVAL;
3965+
rec->res_spin_lock_off = -EINVAL;
39553966
rec->timer_off = -EINVAL;
39563967
rec->wq_off = -EINVAL;
39573968
rec->refcount_off = -EINVAL;
@@ -3979,6 +3990,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
39793990
/* Cache offset for faster lookup at runtime */
39803991
rec->spin_lock_off = rec->fields[i].offset;
39813992
break;
3993+
case BPF_RES_SPIN_LOCK:
3994+
WARN_ON_ONCE(rec->spin_lock_off >= 0);
3995+
/* Cache offset for faster lookup at runtime */
3996+
rec->res_spin_lock_off = rec->fields[i].offset;
3997+
break;
39823998
case BPF_TIMER:
39833999
WARN_ON_ONCE(rec->timer_off >= 0);
39844000
/* Cache offset for faster lookup at runtime */
@@ -4022,9 +4038,15 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
40224038
rec->cnt++;
40234039
}
40244040

4041+
if (rec->spin_lock_off >= 0 && rec->res_spin_lock_off >= 0) {
4042+
ret = -EINVAL;
4043+
goto end;
4044+
}
4045+
40254046
/* bpf_{list_head, rb_node} require bpf_spin_lock */
40264047
if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
4027-
btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
4048+
btf_record_has_field(rec, BPF_RB_ROOT)) &&
4049+
(rec->spin_lock_off < 0 && rec->res_spin_lock_off < 0)) {
40284050
ret = -EINVAL;
40294051
goto end;
40304052
}
@@ -5637,7 +5659,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
56375659

56385660
type = &tab->types[tab->cnt];
56395661
type->btf_id = i;
5640-
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
5662+
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
56415663
BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
56425664
BPF_KPTR, t->size);
56435665
/* The record cannot be unset, treat it as an error if so */

kernel/bpf/syscall.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ void btf_record_free(struct btf_record *rec)
648648
case BPF_RB_ROOT:
649649
case BPF_RB_NODE:
650650
case BPF_SPIN_LOCK:
651+
case BPF_RES_SPIN_LOCK:
651652
case BPF_TIMER:
652653
case BPF_REFCOUNT:
653654
case BPF_WORKQUEUE:
@@ -700,6 +701,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
700701
case BPF_RB_ROOT:
701702
case BPF_RB_NODE:
702703
case BPF_SPIN_LOCK:
704+
case BPF_RES_SPIN_LOCK:
703705
case BPF_TIMER:
704706
case BPF_REFCOUNT:
705707
case BPF_WORKQUEUE:
@@ -777,6 +779,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
777779

778780
switch (fields[i].type) {
779781
case BPF_SPIN_LOCK:
782+
case BPF_RES_SPIN_LOCK:
780783
break;
781784
case BPF_TIMER:
782785
bpf_timer_cancel_and_free(field_ptr);
@@ -1212,7 +1215,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12121215
return -EINVAL;
12131216

12141217
map->record = btf_parse_fields(btf, value_type,
1215-
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
1218+
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
12161219
BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
12171220
map->value_size);
12181221
if (!IS_ERR_OR_NULL(map->record)) {
@@ -1231,6 +1234,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
12311234
case 0:
12321235
continue;
12331236
case BPF_SPIN_LOCK:
1237+
case BPF_RES_SPIN_LOCK:
12341238
if (map->map_type != BPF_MAP_TYPE_HASH &&
12351239
map->map_type != BPF_MAP_TYPE_ARRAY &&
12361240
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&

0 commit comments

Comments
 (0)