Skip to content

Commit f16d411

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2023-09-06 We've added 9 non-merge commits during the last 6 day(s) which contain a total of 12 files changed, 189 insertions(+), 44 deletions(-). The main changes are: 1) Fix bpf_sk_storage to address an invalid wait context lockdep report and another one to address missing omem uncharge, from Martin KaFai Lau. 2) Two BPF recursion detection related fixes, from Sebastian Andrzej Siewior. 3) Fix tailcall limit enforcement in trampolines for s390 JIT, from Ilya Leoshkevich. 4) Fix a sockmap refcount race where skbs in sk_psock_backlog can be referenced after user space side has already skb_consumed them, from John Fastabend. 5) Fix BPF CI flake/race wrt sockmap vsock write test where the transport endpoint is not connected, from Xu Kuohai. 6) Follow-up doc fix to address a cross-link warning, from Eduard Zingerman. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: Check bpf_sk_storage has uncharged sk_omem_alloc bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc bpf: bpf_sk_storage: Fix invalid wait context lockdep report s390/bpf: Pass through tail call counter in trampolines bpf: Assign bpf_tramp_run_ctx::saved_run_ctx before recursion check. bpf: Invoke __bpf_prog_exit_sleepable_recur() on recursion in kern_sys_bpf(). bpf, sockmap: Fix skb refcnt race after locking changes docs/bpf: Fix "file doesn't exist" warnings in {llvm_reloc,btf}.rst selftests/bpf: Fix a CI failure caused by vsock write ==================== Link: https://lore.kernel.org/r/20230906095117.16941-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 1a961e7 + a96d1cf commit f16d411

File tree

12 files changed

+189
-44
lines changed

12 files changed

+189
-44
lines changed

Documentation/bpf/btf.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
803803
within a single ``btf_ext_info_sec`` describe relocations applied to
804804
section named by ``btf_ext_info_sec->sec_name_off``.
805805

806-
See :ref:`Documentation/bpf/llvm_reloc <btf-co-re-relocations>`
806+
See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
807807
for more information on CO-RE relocations.
808808

809809
4.2 .BTF_ids section

Documentation/bpf/llvm_reloc.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ CO-RE Relocations
250250
From object file point of view CO-RE mechanism is implemented as a set
251251
of CO-RE specific relocation records. These relocation records are not
252252
related to ELF relocations and are encoded in .BTF.ext section.
253-
See :ref:`Documentation/bpf/btf <BTF_Ext_Section>` for more
253+
See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
254254
information on .BTF.ext structure.
255255

256256
CO-RE relocations are applied to BPF instructions to update immediate

arch/s390/net/bpf_jit_comp.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
20882088
*/
20892089
int r14_off; /* Offset of saved %r14 */
20902090
int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
2091+
int tccnt_off; /* Offset of saved tailcall counter */
20912092
int do_fexit; /* do_fexit: label */
20922093
};
20932094

@@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
22582259
tjit->r14_off = alloc_stack(tjit, sizeof(u64));
22592260
tjit->run_ctx_off = alloc_stack(tjit,
22602261
sizeof(struct bpf_tramp_run_ctx));
2262+
tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
22612263
/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
22622264
tjit->stack_size -= STACK_FRAME_OVERHEAD;
22632265
tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
22642266

22652267
/* aghi %r15,-stack_size */
22662268
EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
2269+
/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
2270+
_EMIT6(0xd203f000 | tjit->tccnt_off,
2271+
0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
22672272
/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
22682273
if (nr_reg_args)
22692274
EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
24002405
(nr_stack_args * sizeof(u64) - 1) << 16 |
24012406
tjit->stack_args_off,
24022407
0xf000 | tjit->orig_stack_args_off);
2408+
/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2409+
_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
24032410
/* lgr %r1,%r8 */
24042411
EMIT4(0xb9040000, REG_1, REG_8);
24052412
/* %r1() */
@@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
24562463
if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
24572464
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
24582465
tjit->retval_off);
2466+
/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2467+
_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
2468+
0xf000 | tjit->tccnt_off);
24592469
/* aghi %r15,stack_size */
24602470
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
24612471
/* Emit an expoline for the following indirect jump. */

kernel/bpf/bpf_local_storage.c

Lines changed: 15 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
553553
void *value, u64 map_flags, gfp_t gfp_flags)
554554
{
555555
struct bpf_local_storage_data *old_sdata = NULL;
556-
struct bpf_local_storage_elem *selem = NULL;
556+
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
557557
struct bpf_local_storage *local_storage;
558558
unsigned long flags;
559559
int err;
@@ -607,11 +607,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
607607
}
608608
}
609609

610-
if (gfp_flags == GFP_KERNEL) {
611-
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
612-
if (!selem)
613-
return ERR_PTR(-ENOMEM);
614-
}
610+
/* A lookup has just been done before and concluded a new selem is
611+
* needed. The chance of an unnecessary alloc is unlikely.
612+
*/
613+
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
614+
if (!alloc_selem)
615+
return ERR_PTR(-ENOMEM);
615616

616617
raw_spin_lock_irqsave(&local_storage->lock, flags);
617618

@@ -623,13 +624,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
623624
* simple.
624625
*/
625626
err = -EAGAIN;
626-
goto unlock_err;
627+
goto unlock;
627628
}
628629

629630
old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
630631
err = check_flags(old_sdata, map_flags);
631632
if (err)
632-
goto unlock_err;
633+
goto unlock;
633634

634635
if (old_sdata && (map_flags & BPF_F_LOCK)) {
635636
copy_map_value_locked(&smap->map, old_sdata->data, value,
@@ -638,23 +639,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
638639
goto unlock;
639640
}
640641

641-
if (gfp_flags != GFP_KERNEL) {
642-
/* local_storage->lock is held. Hence, we are sure
643-
* we can unlink and uncharge the old_sdata successfully
644-
* later. Hence, instead of charging the new selem now
645-
* and then uncharge the old selem later (which may cause
646-
* a potential but unnecessary charge failure), avoid taking
647-
* a charge at all here (the "!old_sdata" check) and the
648-
* old_sdata will not be uncharged later during
649-
* bpf_selem_unlink_storage_nolock().
650-
*/
651-
selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
652-
if (!selem) {
653-
err = -ENOMEM;
654-
goto unlock_err;
655-
}
656-
}
657-
642+
alloc_selem = NULL;
658643
/* First, link the new selem to the map */
659644
bpf_selem_link_map(smap, selem);
660645

@@ -665,20 +650,16 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
665650
if (old_sdata) {
666651
bpf_selem_unlink_map(SELEM(old_sdata));
667652
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
668-
false, false);
653+
true, false);
669654
}
670655

671656
unlock:
672657
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
673-
return SDATA(selem);
674-
675-
unlock_err:
676-
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
677-
if (selem) {
658+
if (alloc_selem) {
678659
mem_uncharge(smap, owner, smap->elem_size);
679-
bpf_selem_free(selem, smap, true);
660+
bpf_selem_free(alloc_selem, smap, true);
680661
}
681-
return ERR_PTR(err);
662+
return err ? ERR_PTR(err) : SDATA(selem);
682663
}
683664

684665
static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
@@ -779,7 +760,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
779760
* of the loop will set the free_cgroup_storage to true.
780761
*/
781762
free_storage = bpf_selem_unlink_storage_nolock(
782-
local_storage, selem, false, true);
763+
local_storage, selem, true, true);
783764
}
784765
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
785766

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5502,9 +5502,9 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
55025502
}
55035503

55045504
run_ctx.bpf_cookie = 0;
5505-
run_ctx.saved_run_ctx = NULL;
55065505
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
55075506
/* recursion detected */
5507+
__bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
55085508
bpf_prog_put(prog);
55095509
return -EBUSY;
55105510
}

kernel/bpf/trampoline.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -926,13 +926,12 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
926926
migrate_disable();
927927
might_fault();
928928

929+
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
930+
929931
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
930932
bpf_prog_inc_misses_counter(prog);
931933
return 0;
932934
}
933-
934-
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
935-
936935
return bpf_prog_start_time();
937936
}
938937

net/core/skmsg.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
612612
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
613613
u32 off, u32 len, bool ingress)
614614
{
615+
int err = 0;
616+
615617
if (!ingress) {
616618
if (!sock_writeable(psock->sk))
617619
return -EAGAIN;
618620
return skb_send_sock(psock->sk, skb, off, len);
619621
}
620-
return sk_psock_skb_ingress(psock, skb, off, len);
622+
skb_get(skb);
623+
err = sk_psock_skb_ingress(psock, skb, off, len);
624+
if (err < 0)
625+
kfree_skb(skb);
626+
return err;
621627
}
622628

623629
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
685691
} while (len);
686692

687693
skb = skb_dequeue(&psock->ingress_skb);
688-
if (!ingress) {
689-
kfree_skb(skb);
690-
}
694+
kfree_skb(skb);
691695
}
692696
end:
693697
mutex_unlock(&psock->work_mutex);
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2023 Facebook */
3+
#include <test_progs.h>
4+
#include <bpf/libbpf.h>
5+
#include <sys/types.h>
6+
#include <sys/socket.h>
7+
#include "sk_storage_omem_uncharge.skel.h"
8+
9+
void test_sk_storage_omem_uncharge(void)
10+
{
11+
struct sk_storage_omem_uncharge *skel;
12+
int sk_fd = -1, map_fd, err, value;
13+
socklen_t optlen;
14+
15+
skel = sk_storage_omem_uncharge__open_and_load();
16+
if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
17+
return;
18+
map_fd = bpf_map__fd(skel->maps.sk_storage);
19+
20+
/* A standalone socket not binding to addr:port,
21+
* so nentns is not needed.
22+
*/
23+
sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
24+
if (!ASSERT_GE(sk_fd, 0, "socket"))
25+
goto done;
26+
27+
optlen = sizeof(skel->bss->cookie);
28+
err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
29+
if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
30+
goto done;
31+
32+
value = 0;
33+
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
34+
if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
35+
goto done;
36+
37+
value = 0xdeadbeef;
38+
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
39+
if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
40+
goto done;
41+
42+
err = sk_storage_omem_uncharge__attach(skel);
43+
if (!ASSERT_OK(err, "attach"))
44+
goto done;
45+
46+
close(sk_fd);
47+
sk_fd = -1;
48+
49+
ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
50+
ASSERT_EQ(skel->bss->omem, 0, "omem");
51+
52+
done:
53+
sk_storage_omem_uncharge__destroy(skel);
54+
if (sk_fd != -1)
55+
close(sk_fd);
56+
}

tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,32 @@
179179
__ret; \
180180
})
181181

182+
static inline int poll_connect(int fd, unsigned int timeout_sec)
183+
{
184+
struct timeval timeout = { .tv_sec = timeout_sec };
185+
fd_set wfds;
186+
int r, eval;
187+
socklen_t esize = sizeof(eval);
188+
189+
FD_ZERO(&wfds);
190+
FD_SET(fd, &wfds);
191+
192+
r = select(fd + 1, NULL, &wfds, NULL, &timeout);
193+
if (r == 0)
194+
errno = ETIME;
195+
if (r != 1)
196+
return -1;
197+
198+
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
199+
return -1;
200+
if (eval != 0) {
201+
errno = eval;
202+
return -1;
203+
}
204+
205+
return 0;
206+
}
207+
182208
static inline int poll_read(int fd, unsigned int timeout_sec)
183209
{
184210
struct timeval timeout = { .tv_sec = timeout_sec };

tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
14521452
if (p < 0)
14531453
goto close_cli;
14541454

1455+
if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
1456+
FAIL_ERRNO("poll_connect");
1457+
goto close_acc;
1458+
}
1459+
14551460
*v0 = p;
14561461
*v1 = c;
14571462

14581463
return 0;
14591464

1465+
close_acc:
1466+
close(p);
14601467
close_cli:
14611468
close(c);
14621469
close_srv:

0 commit comments

Comments
 (0)