Skip to content

Commit ad061cf

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2022-10-03 We've added 10 non-merge commits during the last 23 day(s) which contain a total of 14 files changed, 130 insertions(+), 69 deletions(-). The main changes are: 1) Fix dynptr helper API to gate behind CAP_BPF given it was not intended for unprivileged BPF programs, from Kumar Kartikeya Dwivedi. 2) Fix need_wakeup flag inheritance from umem buffer pool for shared xsk sockets, from Jalal Mostafa. 3) Fix truncated last_member_type_id in btf_struct_resolve() which had a wrong storage type, from Lorenz Bauer. 4) Fix xsk back-pressure mechanism on tx when amount of produced descriptors to CQ is lower than what was grabbed from xsk tx ring, from Maciej Fijalkowski. 5) Fix wrong cgroup attach flags being displayed to effective progs, from Pu Lehui. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: xsk: Inherit need_wakeup flag for shared sockets bpf: Gate dynptr API behind CAP_BPF selftests/bpf: Adapt cgroup effective query uapi change bpftool: Fix wrong cgroup attach flags being assigned to effective progs bpf, cgroup: Reject prog_attach_flags array when effective query bpf: Ensure correct locking around vulnerable function find_vpid() bpf: btf: fix truncated last_member_type_id in btf_struct_resolve selftests/xsk: Add missing close() on netns fd xsk: Fix backpressure mechanism on Tx MAINTAINERS: Add include/linux/tnum.h to BPF CORE ==================== Link: https://lore.kernel.org/r/20221003201957.13149-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 3a4d061 + 60240bc commit ad061cf

File tree

14 files changed

+130
-69
lines changed

14 files changed

+130
-69
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3825,6 +3825,7 @@ F: kernel/bpf/dispatcher.c
38253825
F: kernel/bpf/trampoline.c
38263826
F: include/linux/bpf*
38273827
F: include/linux/filter.h
3828+
F: include/linux/tnum.h
38283829

38293830
BPF [BTF]
38303831
M: Martin KaFai Lau <martin.lau@linux.dev>

include/net/xsk_buff_pool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
9595
struct xdp_umem *umem);
9696
int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
9797
u16 queue_id, u16 flags);
98-
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
98+
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
9999
struct net_device *dev, u16 queue_id);
100100
int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs);
101101
void xp_destroy(struct xsk_buff_pool *pool);

include/uapi/linux/bpf.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ enum {
12331233

12341234
/* Query effective (directly attached + inherited from ancestor cgroups)
12351235
* programs that will be executed for events within a cgroup.
1236-
* attach_flags with this flag are returned only for directly attached programs.
1236+
* attach_flags with this flag are always returned 0.
12371237
*/
12381238
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
12391239

@@ -1432,7 +1432,10 @@ union bpf_attr {
14321432
__u32 attach_flags;
14331433
__aligned_u64 prog_ids;
14341434
__u32 prog_cnt;
1435-
__aligned_u64 prog_attach_flags; /* output: per-program attach_flags */
1435+
/* output: per-program attach_flags.
1436+
* not allowed to be set during effective query.
1437+
*/
1438+
__aligned_u64 prog_attach_flags;
14361439
} query;
14371440

14381441
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */

kernel/bpf/btf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3128,7 +3128,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
31283128
if (v->next_member) {
31293129
const struct btf_type *last_member_type;
31303130
const struct btf_member *last_member;
3131-
u16 last_member_type_id;
3131+
u32 last_member_type_id;
31323132

31333133
last_member = btf_type_member(v->t) + v->next_member - 1;
31343134
last_member_type_id = last_member->type;

kernel/bpf/cgroup.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10201020
union bpf_attr __user *uattr)
10211021
{
10221022
__u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
1023+
bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
10231024
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
10241025
enum bpf_attach_type type = attr->query.attach_type;
10251026
enum cgroup_bpf_attach_type from_atype, to_atype;
@@ -1029,8 +1030,12 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10291030
int total_cnt = 0;
10301031
u32 flags;
10311032

1033+
if (effective_query && prog_attach_flags)
1034+
return -EINVAL;
1035+
10321036
if (type == BPF_LSM_CGROUP) {
1033-
if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
1037+
if (!effective_query && attr->query.prog_cnt &&
1038+
prog_ids && !prog_attach_flags)
10341039
return -EINVAL;
10351040

10361041
from_atype = CGROUP_LSM_START;
@@ -1045,7 +1050,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10451050
}
10461051

10471052
for (atype = from_atype; atype <= to_atype; atype++) {
1048-
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
1053+
if (effective_query) {
10491054
effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
10501055
lockdep_is_held(&cgroup_mutex));
10511056
total_cnt += bpf_prog_array_length(effective);
@@ -1054,6 +1059,8 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10541059
}
10551060
}
10561061

1062+
/* always output uattr->query.attach_flags as 0 during effective query */
1063+
flags = effective_query ? 0 : flags;
10571064
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
10581065
return -EFAULT;
10591066
if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
@@ -1068,7 +1075,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10681075
}
10691076

10701077
for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
1071-
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
1078+
if (effective_query) {
10721079
effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
10731080
lockdep_is_held(&cgroup_mutex));
10741081
cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
@@ -1090,15 +1097,16 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10901097
if (++i == cnt)
10911098
break;
10921099
}
1093-
}
10941100

1095-
if (prog_attach_flags) {
1096-
flags = cgrp->bpf.flags[atype];
1101+
if (prog_attach_flags) {
1102+
flags = cgrp->bpf.flags[atype];
10971103

1098-
for (i = 0; i < cnt; i++)
1099-
if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
1100-
return -EFAULT;
1101-
prog_attach_flags += cnt;
1104+
for (i = 0; i < cnt; i++)
1105+
if (copy_to_user(prog_attach_flags + i,
1106+
&flags, sizeof(flags)))
1107+
return -EFAULT;
1108+
prog_attach_flags += cnt;
1109+
}
11021110
}
11031111

11041112
prog_ids += cnt;

kernel/bpf/helpers.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,26 +1627,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
16271627
return &bpf_ringbuf_discard_proto;
16281628
case BPF_FUNC_ringbuf_query:
16291629
return &bpf_ringbuf_query_proto;
1630-
case BPF_FUNC_ringbuf_reserve_dynptr:
1631-
return &bpf_ringbuf_reserve_dynptr_proto;
1632-
case BPF_FUNC_ringbuf_submit_dynptr:
1633-
return &bpf_ringbuf_submit_dynptr_proto;
1634-
case BPF_FUNC_ringbuf_discard_dynptr:
1635-
return &bpf_ringbuf_discard_dynptr_proto;
16361630
case BPF_FUNC_for_each_map_elem:
16371631
return &bpf_for_each_map_elem_proto;
16381632
case BPF_FUNC_loop:
16391633
return &bpf_loop_proto;
16401634
case BPF_FUNC_strncmp:
16411635
return &bpf_strncmp_proto;
1642-
case BPF_FUNC_dynptr_from_mem:
1643-
return &bpf_dynptr_from_mem_proto;
1644-
case BPF_FUNC_dynptr_read:
1645-
return &bpf_dynptr_read_proto;
1646-
case BPF_FUNC_dynptr_write:
1647-
return &bpf_dynptr_write_proto;
1648-
case BPF_FUNC_dynptr_data:
1649-
return &bpf_dynptr_data_proto;
16501636
default:
16511637
break;
16521638
}
@@ -1675,6 +1661,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
16751661
return &bpf_timer_cancel_proto;
16761662
case BPF_FUNC_kptr_xchg:
16771663
return &bpf_kptr_xchg_proto;
1664+
case BPF_FUNC_ringbuf_reserve_dynptr:
1665+
return &bpf_ringbuf_reserve_dynptr_proto;
1666+
case BPF_FUNC_ringbuf_submit_dynptr:
1667+
return &bpf_ringbuf_submit_dynptr_proto;
1668+
case BPF_FUNC_ringbuf_discard_dynptr:
1669+
return &bpf_ringbuf_discard_dynptr_proto;
1670+
case BPF_FUNC_dynptr_from_mem:
1671+
return &bpf_dynptr_from_mem_proto;
1672+
case BPF_FUNC_dynptr_read:
1673+
return &bpf_dynptr_read_proto;
1674+
case BPF_FUNC_dynptr_write:
1675+
return &bpf_dynptr_write_proto;
1676+
case BPF_FUNC_dynptr_data:
1677+
return &bpf_dynptr_data_proto;
16781678
default:
16791679
break;
16801680
}

kernel/bpf/syscall.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4395,7 +4395,9 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
43954395
if (attr->task_fd_query.flags != 0)
43964396
return -EINVAL;
43974397

4398+
rcu_read_lock();
43984399
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
4400+
rcu_read_unlock();
43994401
if (!task)
44004402
return -ENOENT;
44014403

net/xdp/xsk.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -355,16 +355,15 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entr
355355
return nb_pkts;
356356
}
357357

358-
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
358+
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts)
359359
{
360360
struct xdp_sock *xs;
361-
u32 nb_pkts;
362361

363362
rcu_read_lock();
364363
if (!list_is_singular(&pool->xsk_tx_list)) {
365364
/* Fallback to the non-batched version */
366365
rcu_read_unlock();
367-
return xsk_tx_peek_release_fallback(pool, max_entries);
366+
return xsk_tx_peek_release_fallback(pool, nb_pkts);
368367
}
369368

370369
xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
@@ -373,25 +372,26 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries)
373372
goto out;
374373
}
375374

376-
max_entries = xskq_cons_nb_entries(xs->tx, max_entries);
377-
nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries);
378-
if (!nb_pkts) {
379-
xs->tx->queue_empty_descs++;
380-
goto out;
381-
}
375+
nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts);
382376

383377
/* This is the backpressure mechanism for the Tx path. Try to
384378
* reserve space in the completion queue for all packets, but
385379
* if there are fewer slots available, just process that many
386380
* packets. This avoids having to implement any buffering in
387381
* the Tx path.
388382
*/
389-
nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
383+
nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts);
390384
if (!nb_pkts)
391385
goto out;
392386

393-
xskq_cons_release_n(xs->tx, max_entries);
387+
nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts);
388+
if (!nb_pkts) {
389+
xs->tx->queue_empty_descs++;
390+
goto out;
391+
}
392+
394393
__xskq_cons_release(xs->tx);
394+
xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
395395
xs->sk.sk_write_space(&xs->sk);
396396

397397
out:
@@ -954,8 +954,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
954954
goto out_unlock;
955955
}
956956

957-
err = xp_assign_dev_shared(xs->pool, umem_xs->umem,
958-
dev, qid);
957+
err = xp_assign_dev_shared(xs->pool, umem_xs, dev,
958+
qid);
959959
if (err) {
960960
xp_destroy(xs->pool);
961961
xs->pool = NULL;

net/xdp/xsk_buff_pool.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,17 +212,18 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
212212
return err;
213213
}
214214

215-
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
215+
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
216216
struct net_device *dev, u16 queue_id)
217217
{
218218
u16 flags;
219+
struct xdp_umem *umem = umem_xs->umem;
219220

220221
/* One fill and completion ring required for each queue id. */
221222
if (!pool->fq || !pool->cq)
222223
return -EINVAL;
223224

224225
flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY;
225-
if (pool->uses_need_wakeup)
226+
if (umem_xs->pool->uses_need_wakeup)
226227
flags |= XDP_USE_NEED_WAKEUP;
227228

228229
return xp_assign_dev(pool, dev, queue_id, flags);

net/xdp/xsk_queue.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q,
205205
return false;
206206
}
207207

208+
static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
209+
{
210+
q->cached_cons += cnt;
211+
}
212+
208213
static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
209214
u32 max)
210215
{
@@ -226,6 +231,8 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff
226231
cached_cons++;
227232
}
228233

234+
/* Release valid plus any invalid entries */
235+
xskq_cons_release_n(q, cached_cons - q->cached_cons);
229236
return nb_entries;
230237
}
231238

@@ -291,11 +298,6 @@ static inline void xskq_cons_release(struct xsk_queue *q)
291298
q->cached_cons++;
292299
}
293300

294-
static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt)
295-
{
296-
q->cached_cons += cnt;
297-
}
298-
299301
static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
300302
{
301303
/* No barriers needed since data is not accessed */
@@ -350,21 +352,17 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
350352
return 0;
351353
}
352354

353-
static inline u32 xskq_prod_reserve_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
354-
u32 max)
355+
static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
356+
u32 nb_entries)
355357
{
356358
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
357-
u32 nb_entries, i, cached_prod;
358-
359-
nb_entries = xskq_prod_nb_free(q, max);
359+
u32 i, cached_prod;
360360

361361
/* A, matches D */
362362
cached_prod = q->cached_prod;
363363
for (i = 0; i < nb_entries; i++)
364364
ring->desc[cached_prod++ & q->ring_mask] = descs[i].addr;
365365
q->cached_prod = cached_prod;
366-
367-
return nb_entries;
368366
}
369367

370368
static inline int xskq_prod_reserve_desc(struct xsk_queue *q,

0 commit comments

Comments
 (0)