Skip to content

Commit 1ef09e1

Browse files
author
Martin KaFai Lau
committed
Merge branch 'bpf: Fix src IP addr related limitation in bpf_*_fib_lookup()'
Martynas Pumputis says: ==================== The patchset fixes the limitation of bpf_*_fib_lookup() helper, which prevents it from being used in BPF dataplanes with network interfaces which have more than one IP addr. See the first patch for more details. Thanks! * v2->v3: Address Martin KaFai Lau's feedback * v1->v2: Use IPv6 stubs to fix compilation when CONFIG_IPV6=m. ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2 parents 1be84ca + b0f7a8c commit 1ef09e1

File tree

6 files changed

+120
-7
lines changed

6 files changed

+120
-7
lines changed

include/net/ipv6_stubs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
8585
sockptr_t optval, unsigned int optlen);
8686
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
8787
sockptr_t optval, sockptr_t optlen);
88+
int (*ipv6_dev_get_saddr)(struct net *net,
89+
const struct net_device *dst_dev,
90+
const struct in6_addr *daddr,
91+
unsigned int prefs,
92+
struct in6_addr *saddr);
8893
};
8994
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
9095

include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,6 +3264,11 @@ union bpf_attr {
32643264
* and *params*->smac will not be set as output. A common
32653265
* use case is to call **bpf_redirect_neigh**\ () after
32663266
* doing **bpf_fib_lookup**\ ().
3267+
* **BPF_FIB_LOOKUP_SRC**
3268+
* Derive and set source IP addr in *params*->ipv{4,6}_src
3269+
* for the nexthop. If the src addr cannot be derived,
3270+
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
3271+
* case, *params*->dmac and *params*->smac are not set either.
32673272
*
32683273
* *ctx* is either **struct xdp_md** for XDP programs or
32693274
* **struct sk_buff** tc cls_act programs.
@@ -6964,6 +6969,7 @@ enum {
69646969
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
69656970
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
69666971
BPF_FIB_LOOKUP_TBID = (1U << 3),
6972+
BPF_FIB_LOOKUP_SRC = (1U << 4),
69676973
};
69686974

69696975
enum {
@@ -6976,6 +6982,7 @@ enum {
69766982
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
69776983
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
69786984
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
6985+
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
69796986
};
69806987

69816988
struct bpf_fib_lookup {
@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
70107017
__u32 rt_metric;
70117018
};
70127019

7020+
/* input: source address to consider for lookup
7021+
* output: source address result from lookup
7022+
*/
70137023
union {
70147024
__be32 ipv4_src;
70157025
__u32 ipv6_src[4]; /* in6_addr; network order */

net/core/filter.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
58505850
params->rt_metric = res.fi->fib_priority;
58515851
params->ifindex = dev->ifindex;
58525852

5853+
if (flags & BPF_FIB_LOOKUP_SRC)
5854+
params->ipv4_src = fib_result_prefsrc(net, &res);
5855+
58535856
/* xdp and cls_bpf programs are run in RCU-bh so
58545857
* rcu_read_lock_bh is not needed here
58555858
*/
@@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
59925995
params->rt_metric = res.f6i->fib6_metric;
59935996
params->ifindex = dev->ifindex;
59945997

5998+
if (flags & BPF_FIB_LOOKUP_SRC) {
5999+
if (res.f6i->fib6_prefsrc.plen) {
6000+
*src = res.f6i->fib6_prefsrc.addr;
6001+
} else {
6002+
err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
6003+
&fl6.daddr, 0,
6004+
src);
6005+
if (err)
6006+
return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
6007+
}
6008+
}
6009+
59956010
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
59966011
goto set_fwd_params;
59976012

@@ -6010,7 +6025,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
60106025
#endif
60116026

60126027
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
6013-
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
6028+
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
6029+
BPF_FIB_LOOKUP_SRC)
60146030

60156031
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
60166032
struct bpf_fib_lookup *, params, int, plen, u32, flags)

net/ipv6/af_inet6.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
10611061
.udp6_lib_lookup = __udp6_lib_lookup,
10621062
.ipv6_setsockopt = do_ipv6_setsockopt,
10631063
.ipv6_getsockopt = do_ipv6_getsockopt,
1064+
.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
10641065
};
10651066

10661067
static int __init inet6_init(void)

tools/include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,6 +3264,11 @@ union bpf_attr {
32643264
* and *params*->smac will not be set as output. A common
32653265
* use case is to call **bpf_redirect_neigh**\ () after
32663266
* doing **bpf_fib_lookup**\ ().
3267+
* **BPF_FIB_LOOKUP_SRC**
3268+
* Derive and set source IP addr in *params*->ipv{4,6}_src
3269+
* for the nexthop. If the src addr cannot be derived,
3270+
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
3271+
* case, *params*->dmac and *params*->smac are not set either.
32673272
*
32683273
* *ctx* is either **struct xdp_md** for XDP programs or
32693274
* **struct sk_buff** tc cls_act programs.
@@ -6964,6 +6969,7 @@ enum {
69646969
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
69656970
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
69666971
BPF_FIB_LOOKUP_TBID = (1U << 3),
6972+
BPF_FIB_LOOKUP_SRC = (1U << 4),
69676973
};
69686974

69696975
enum {
@@ -6976,6 +6982,7 @@ enum {
69766982
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
69776983
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
69786984
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
6985+
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
69796986
};
69806987

69816988
struct bpf_fib_lookup {
@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
70107017
__u32 rt_metric;
70117018
};
70127019

7020+
/* input: source address to consider for lookup
7021+
* output: source address result from lookup
7022+
*/
70137023
union {
70147024
__be32 ipv4_src;
70157025
__u32 ipv6_src[4]; /* in6_addr; network order */

tools/testing/selftests/bpf/prog_tests/fib_lookup.c

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111

1212
#define NS_TEST "fib_lookup_ns"
1313
#define IPV6_IFACE_ADDR "face::face"
14+
#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
15+
#define IPV6_ADDR_DST "face::3"
1416
#define IPV6_NUD_FAILED_ADDR "face::1"
1517
#define IPV6_NUD_STALE_ADDR "face::2"
1618
#define IPV4_IFACE_ADDR "10.0.0.254"
19+
#define IPV4_IFACE_ADDR_SEC "10.1.0.254"
20+
#define IPV4_ADDR_DST "10.2.0.254"
1721
#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
1822
#define IPV4_NUD_STALE_ADDR "10.0.0.2"
1923
#define IPV4_TBID_ADDR "172.0.0.254"
@@ -31,6 +35,7 @@ struct fib_lookup_test {
3135
const char *desc;
3236
const char *daddr;
3337
int expected_ret;
38+
const char *expected_src;
3439
int lookup_flags;
3540
__u32 tbid;
3641
__u8 dmac[6];
@@ -69,6 +74,22 @@ static const struct fib_lookup_test tests[] = {
6974
.daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
7075
.lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
7176
.dmac = DMAC_INIT2, },
77+
{ .desc = "IPv4 set src addr from netdev",
78+
.daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
79+
.expected_src = IPV4_IFACE_ADDR,
80+
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
81+
{ .desc = "IPv6 set src addr from netdev",
82+
.daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
83+
.expected_src = IPV6_IFACE_ADDR,
84+
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
85+
{ .desc = "IPv4 set prefsrc addr from route",
86+
.daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
87+
.expected_src = IPV4_IFACE_ADDR_SEC,
88+
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
89+
{ .desc = "IPv6 set prefsrc addr route",
90+
.daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
91+
.expected_src = IPV6_IFACE_ADDR_SEC,
92+
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
7293
};
7394

7495
static int ifindex;
@@ -97,6 +118,13 @@ static int setup_netns(void)
97118
SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
98119
SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
99120

121+
/* Setup for prefsrc IP addr selection */
122+
SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
123+
SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
124+
125+
SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
126+
SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
127+
100128
/* Setup for tbid lookup tests */
101129
SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
102130
SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
@@ -133,19 +161,25 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
133161

134162
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
135163
params->family = AF_INET6;
136-
ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
137-
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
138-
return -1;
164+
if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
165+
ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
166+
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
167+
return -1;
168+
}
169+
139170
return 0;
140171
}
141172

142173
ret = inet_pton(AF_INET, test->daddr, &params->ipv4_dst);
143174
if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
144175
return -1;
145176
params->family = AF_INET;
146-
ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
147-
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
148-
return -1;
177+
178+
if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
179+
ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
180+
if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
181+
return -1;
182+
}
149183

150184
return 0;
151185
}
@@ -156,6 +190,40 @@ static void mac_str(char *b, const __u8 *mac)
156190
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
157191
}
158192

193+
static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
194+
{
195+
int ret;
196+
__u32 src6[4];
197+
__be32 src4;
198+
199+
switch (fib_params->family) {
200+
case AF_INET6:
201+
ret = inet_pton(AF_INET6, expected_src, src6);
202+
ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
203+
204+
ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
205+
if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
206+
char str_src6[64];
207+
208+
inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
209+
sizeof(str_src6));
210+
printf("ipv6 expected %s actual %s ", expected_src,
211+
str_src6);
212+
}
213+
214+
break;
215+
case AF_INET:
216+
ret = inet_pton(AF_INET, expected_src, &src4);
217+
ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
218+
219+
ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
220+
221+
break;
222+
default:
223+
PRINT_FAIL("invalid addr family: %d", fib_params->family);
224+
}
225+
}
226+
159227
void test_fib_lookup(void)
160228
{
161229
struct bpf_fib_lookup *fib_params;
@@ -207,6 +275,9 @@ void test_fib_lookup(void)
207275
ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
208276
"fib_lookup_ret");
209277

278+
if (tests[i].expected_src)
279+
assert_src_ip(fib_params, tests[i].expected_src);
280+
210281
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
211282
if (!ASSERT_EQ(ret, 0, "dmac not match")) {
212283
char expected[18], actual[18];

0 commit comments

Comments
 (0)