From 83b9795ec96e7e326cc3dee4844a65b26b87604b Mon Sep 17 00:00:00 2001 From: Yoel Caspersen Date: Sun, 24 Oct 2021 15:02:15 +0200 Subject: [PATCH 1/7] Adding custom memcpy and memset functions to pass verifier checks Built-in memcpy and memset functions cause verifier to complain about misaligned stack access. To get around that (and be able to copy a dynamic number of bytes via memcpy), two custom functions are added. This version of the relay uses interface name + VLAN identifiers in option 82 circuit ID (left-aligned, length IF_NAMESIZE with trailing null bytes). Signed-off-by: Yoel Caspersen --- dhcp-relay/Makefile | 1 + dhcp-relay/dhcp-relay.h | 4 +- dhcp-relay/dhcp_kern_xdp.c | 101 +++++++++++++++++++++++-------------- 3 files changed, 65 insertions(+), 41 deletions(-) diff --git a/dhcp-relay/Makefile b/dhcp-relay/Makefile index 964f707..6a3f5f6 100644 --- a/dhcp-relay/Makefile +++ b/dhcp-relay/Makefile @@ -4,6 +4,7 @@ USER_TARGETS := dhcp_user_xdp BPF_TARGETS :=dhcp_kern_xdp EXTRA_DEPS := dhcp-relay.h #EXTRA_CFLAGS := $(if $(IPV6),-DIPV6) +EXTRA_CFLAGS := -fno-builtin LIB_DIR = ../lib diff --git a/dhcp-relay/dhcp-relay.h b/dhcp-relay/dhcp-relay.h index 09bca44..ab7fec9 100644 --- a/dhcp-relay/dhcp-relay.h +++ b/dhcp-relay/dhcp-relay.h @@ -10,7 +10,7 @@ #define DHO_DHCP_AGENT_OPTIONS 82 #define RAI_CIRCUIT_ID 1 #define RAI_REMOTE_ID 2 -#define RAI_OPTION_LEN 40 +#define RAI_OPTION_LEN IF_NAMESIZE #define VLAN_ASCII_MAX 4 /* Max bytes needed to store VLAN in ASCII format */ #define DHCP_SERVER_PORT 67 @@ -22,7 +22,7 @@ struct sub_option { __u8 option_id; __u8 len; - char val[IF_NAMESIZE]; + char val[RAI_OPTION_LEN]; }; /*structure for dhcp option 82 */ diff --git a/dhcp-relay/dhcp_kern_xdp.c b/dhcp-relay/dhcp_kern_xdp.c index 56cd040..d19bae2 100644 --- a/dhcp-relay/dhcp_kern_xdp.c +++ b/dhcp-relay/dhcp_kern_xdp.c @@ -8,6 +8,13 @@ #include #include "dhcp-relay.h" +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + /* * This map is for storing the DHCP relay configuration, including: * @@ -49,35 +56,63 @@ struct { __uint(max_entries, 16384); } client_vlans SEC(".maps"); +void memcpy_var(void *to, void *from, __u64 len) { + __u8 *t8 = to, *f8 = from; + int i; + + for (i = 0; i < len; i++) { + *t8++ = *f8++; + } + +} + +void memset_var(void *d, __u8 c, __u64 len) { + __u8 *d8 = d; + int i; + + for (i = 0; i < len; i++) { + *d8++ = c; + } + +} + /* Inserts DHCP option 82 into the received DHCP packet * at the specified offset. */ static __always_inline int write_dhcp_option_82(void *ctx, int offset, struct collect_vlans *vlans, char *dev) { + struct dhcp_option_82 option; - + option.t = DHO_DHCP_AGENT_OPTIONS; option.len = sizeof (struct sub_option) + sizeof (struct sub_option); option.circuit_id.option_id = RAI_CIRCUIT_ID; - option.circuit_id.len = sizeof(option.circuit_id.val); + option.circuit_id.len = sizeof (option.circuit_id.val); + option.remote_id.option_id = RAI_REMOTE_ID; + option.remote_id.len = sizeof (option.remote_id.val); + + /* Initialize val arrays */ + memset_var(option.circuit_id.val, 0, sizeof (option.circuit_id.val)); + memset_var(option.remote_id.val, '*', sizeof (option.remote_id.val)); + //memset(option.circuit_id.val, 0, sizeof (option.circuit_id.val)); + //memset(option.remote_id.val, '*', sizeof (option.remote_id.val)); /* Reconstruct VLAN device name * Convert VLAN tags to ASCII from right to left, starting with * inner VLAN tag. - * Device name is 16 characters long and prepended with dash, e.g.: - * ----ens6f0.83.20 - * We avoid null bytes to ensure compatibility with DHCP servers that - * interpret null as a string terminator. + * Device name is up to 16 characters long - remaining buffer space + * contains null bytes. */ - - char buf[IF_NAMESIZE]; - memset(buf, '-', sizeof (buf)); + + char buf[RAI_OPTION_LEN]; + memset(buf, 0, sizeof (buf)); - int c = VLAN_ASCII_MAX; /* We will need 4 bytes at most */ - int i = IF_NAMESIZE - 1; + int c = VLAN_ASCII_MAX; /* We will need 4 bytes at most */ + int i = RAI_OPTION_LEN - 1; __u16 inner_vlan = vlans->id[1]; __u16 outer_vlan = vlans->id[0]; - + + /* Convert inner VLAN to ASCII */ for (c = VLAN_ASCII_MAX; c > 0; c--) { buf[i--] = (inner_vlan % 10) + '0'; inner_vlan /= 10; @@ -85,9 +120,10 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, break; } } - + buf[i--] = '.'; - + + /* Convert outer VLAN to ASCII */ for (c = VLAN_ASCII_MAX; c > 0; c--) { buf[i--] = (outer_vlan % 10) + '0'; outer_vlan /= 10; @@ -95,31 +131,25 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, break; } } - + buf[i--] = '.'; - - for (c = IF_NAMESIZE - 1; c >= 0; c--) { - if (dev[c] != 0) { + /* Append interface name */ + for (c = RAI_OPTION_LEN - 1; c >= 0; c--) { + if (dev[c] != 0) buf[i--] = dev[c]; - } - - if (i < 0) { - break; - } - - } - - if(sizeof(option.circuit_id.val) == sizeof(buf)) { - memcpy(option.circuit_id.val, buf, sizeof(buf)); + if (i < 0) + break; } - /* Initialize remote ID */ - memset(option.remote_id.val, 0, sizeof(option.remote_id.val)); - option.remote_id.option_id = RAI_REMOTE_ID; - option.remote_id.len = sizeof(option.remote_id.val); + i++; + /* Copy resulting interface name to circuit_id */ + if (sizeof (option.circuit_id.val) == sizeof (buf)) { + memcpy_var(option.circuit_id.val, buf + i, sizeof (buf) - i); + } + return xdp_store_bytes(ctx, offset, &option, sizeof (option), 0); } @@ -166,13 +196,6 @@ static __always_inline int calc_ip_csum(struct iphdr *oldip, struct iphdr *ip, */ //static __u8 buf[static_offset + VLAN_MAX_DEPTH * sizeof (struct vlan_hdr)]; -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - /* XDP program for parsing the DHCP packet and inserting the option 82*/ SEC(XDP_PROG_SEC) int xdp_dhcp_relay(struct xdp_md *ctx) { From 8d91b1409e3050da7e1fb3c3a9672ce95883e982 Mon Sep 17 00:00:00 2001 From: Yoel Caspersen Date: Mon, 25 Oct 2021 22:23:16 +0200 Subject: [PATCH 2/7] Upstream direction works with QinQ packets only When allowing packets with 1 VLAN tag only, verifier complains about BPF program being too large. Compiled with LLVM 13. Signed-off-by: Yoel Caspersen --- dhcp-relay/dhcp-relay.h | 2 ++ dhcp-relay/dhcp_kern_xdp.c | 47 +++++++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/dhcp-relay/dhcp-relay.h b/dhcp-relay/dhcp-relay.h index ab7fec9..00cd7ea 100644 --- a/dhcp-relay/dhcp-relay.h +++ b/dhcp-relay/dhcp-relay.h @@ -18,6 +18,8 @@ #define DHCP_REQUEST 1 #define DHCP_REPLY 2 +#define MAX_LOOPS 40 + /* Structure for sub-options in option 82 */ struct sub_option { __u8 option_id; diff --git a/dhcp-relay/dhcp_kern_xdp.c b/dhcp-relay/dhcp_kern_xdp.c index d19bae2..27df8ba 100644 --- a/dhcp-relay/dhcp_kern_xdp.c +++ b/dhcp-relay/dhcp_kern_xdp.c @@ -60,7 +60,7 @@ void memcpy_var(void *to, void *from, __u64 len) { __u8 *t8 = to, *f8 = from; int i; - for (i = 0; i < len; i++) { + for (i = 0; i < len && i < MAX_LOOPS; i++) { *t8++ = *f8++; } @@ -70,7 +70,7 @@ void memset_var(void *d, __u8 c, __u64 len) { __u8 *d8 = d; int i; - for (i = 0; i < len; i++) { + for (i = 0; i < len && i < MAX_LOOPS; i++) { *d8++ = c; } @@ -84,6 +84,8 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, struct dhcp_option_82 option; + static __u8 buf[RAI_OPTION_LEN]; + option.t = DHO_DHCP_AGENT_OPTIONS; option.len = sizeof (struct sub_option) + sizeof (struct sub_option); option.circuit_id.option_id = RAI_CIRCUIT_ID; @@ -104,15 +106,16 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, * contains null bytes. */ - char buf[RAI_OPTION_LEN]; memset(buf, 0, sizeof (buf)); int c = VLAN_ASCII_MAX; /* We will need 4 bytes at most */ int i = RAI_OPTION_LEN - 1; + __u16 inner_vlan = vlans->id[1]; __u16 outer_vlan = vlans->id[0]; /* Convert inner VLAN to ASCII */ +#pragma unroll VLAN_ASCII_MAX for (c = VLAN_ASCII_MAX; c > 0; c--) { buf[i--] = (inner_vlan % 10) + '0'; inner_vlan /= 10; @@ -124,6 +127,7 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, buf[i--] = '.'; /* Convert outer VLAN to ASCII */ +#pragma unroll VLAN_ASCII_MAX for (c = VLAN_ASCII_MAX; c > 0; c--) { buf[i--] = (outer_vlan % 10) + '0'; outer_vlan /= 10; @@ -132,10 +136,10 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, } } - buf[i--] = '.'; /* Append interface name */ +#pragma unroll RAI_OPTION_LEN for (c = RAI_OPTION_LEN - 1; c >= 0; c--) { if (dev[c] != 0) buf[i--] = dev[c]; @@ -148,8 +152,9 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, /* Copy resulting interface name to circuit_id */ if (sizeof (option.circuit_id.val) == sizeof (buf)) { memcpy_var(option.circuit_id.val, buf + i, sizeof (buf) - i); + //memcpy_var(option.circuit_id.val, buf, sizeof (buf)); } - + return xdp_store_bytes(ctx, offset, &option, sizeof (option), 0); } @@ -206,10 +211,10 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { int res = bpf_xdp_adjust_tail(ctx, delta); if (res != 0) { bpf_printk("Cannot tail extend packet, delta %i - error code %i", delta, res); - return XDP_ABORTED; + return XDP_PASS; } - bpf_printk("Tail extended packet by %i bytes", delta); + //bpf_printk("Tail extended packet by %i bytes", delta); void *data_end = (void *) (long) ctx->data_end; void *data = (void *) (long) ctx->data; @@ -240,27 +245,34 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { int key = 0; int len = 0; - if (data + 1 > data_end) - return XDP_ABORTED; + if (data + 1 > data_end) { + bpf_printk("Empty packet\n"); + goto out; + } nh.pos = data; ether_type = parse_ethhdr_vlan(&nh, data_end, ð, &vlans); /* check for valid ether type */ if (ether_type < 0) { - bpf_printk("Cannot determine ethertype"); - rc = XDP_ABORTED; + bpf_printk("Cannot determine ethertype\n"); goto out; } + if (ether_type != bpf_htons(ETH_P_IP)) { - bpf_printk("Ethertype %#x is not ETH_P_IP", bpf_ntohs(ether_type)); + //bpf_printk("Ethertype %x is not ETH_P_IP\n", bpf_ntohs(ether_type)); goto out; } - bpf_printk("Ethertype %x", bpf_ntohs(ether_type)); - + bpf_printk("Ethertype %x\n", bpf_ntohs(ether_type)); + /* Check at least two vlan tags are present */ + if (vlans.id[0] == 0) { + bpf_printk("No outer VLAN tag set\n"); + goto out; + } + if (vlans.id[1] == 0) { - bpf_printk("No VLAN tags set"); + bpf_printk("No inner VLAN tag set\n"); goto out; } @@ -287,6 +299,8 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { /* Increase UDP length header */ udp->len += bpf_htons(delta); + + udp->check = 0; /* Read DHCP server IP from config map */ key = 0; @@ -317,6 +331,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { // goto out; /* Increment offset by 4 bytes for each VLAN (to accomodate VLAN headers */ +#pragma unroll VLAN_MAX_DEPTH for (i = 0; i < VLAN_MAX_DEPTH; i++) { if (vlans.id[i]) { @@ -449,7 +464,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { bpf_printk("Could not write DHCP option 82 at offset %i", option_offset); return XDP_ABORTED; } - + /* Set END option */ /* Verifier check */ From 17f1da535f6d27c95e826eceb5e5993de87e52df Mon Sep 17 00:00:00 2001 From: Yoel Caspersen Date: Fri, 5 Nov 2021 14:36:44 +0100 Subject: [PATCH 3/7] First working prototype (IPv4 DHCP relay works in both directions) In this version of the relay, we assume that client requests are received on a double tagged VLAN interface and server replies are received on a single tagged VLAN interface. Test scripts are added: - test.sh (configures BNG and sets L3 config) - cleanup_test.sh (removes BNG configuration) - trace.sh (opens trace pipe for debugging) The code for adding interface name is disabled because it makes the verifier complain about the BPF program being too large. To-do: - Fix verifier issue to include interface name in option 82 - For server replies, erase option 82 instead of (partial) overwrite Signed-off-by: Yoel Caspersen --- dhcp-relay/cleanup_test.sh | 27 +++ dhcp-relay/dhcp-relay.h | 5 +- dhcp-relay/dhcp_kern_xdp.c | 398 ++++++++++++++++++++++++------------- dhcp-relay/test.sh | 65 ++++++ dhcp-relay/trace.sh | 3 + 5 files changed, 356 insertions(+), 142 deletions(-) create mode 100755 dhcp-relay/cleanup_test.sh create mode 100755 dhcp-relay/test.sh create mode 100755 dhcp-relay/trace.sh diff --git a/dhcp-relay/cleanup_test.sh b/dhcp-relay/cleanup_test.sh new file mode 100755 index 0000000..d7ae11e --- /dev/null +++ b/dhcp-relay/cleanup_test.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# -x + +OUTER_VLAN=83 +INNER_VLAN=20 +UPLINK_VLAN=84 +IF="ens6f0" + +DHCP_SERVER="185.107.12.59" +IPADDR_BNG="194.45.77.57" +IPADDR_UPLINK="185.107.12.99" +CLIENT_IP="194.45.77.59" + +echo "Unloading XDP program" +./dhcp_user_xdp -i $IF -d $DHCP_SERVER -s $IPADDR_BNG -u + +echo "Deleting VLAN interfaces" +# Delete inner VLAN interface +ip link del $IF.$OUTER_VLAN.$INNER_VLAN +# Delete outer VLAN interface +ip link del $IF.$OUTER_VLAN + +echo "Deleting uplink interface" +ip link del $IF.$UPLINK_VLAN + +# Remove BNG address from loopback interface +ip addr del $IPADDR_BNG/29 dev lo \ No newline at end of file diff --git a/dhcp-relay/dhcp-relay.h b/dhcp-relay/dhcp-relay.h index 00cd7ea..32b84bb 100644 --- a/dhcp-relay/dhcp-relay.h +++ b/dhcp-relay/dhcp-relay.h @@ -18,7 +18,10 @@ #define DHCP_REQUEST 1 #define DHCP_REPLY 2 -#define MAX_LOOPS 40 +#define MAX_LOOPS 20 +#define U16_ASCII_LEN 5 /* Max value: 65535 */ + +#define IP_ADDR_BCAST 0xFFFFFFFF /* 255.255.255.255 in hex */ /* Structure for sub-options in option 82 */ struct sub_option { diff --git a/dhcp-relay/dhcp_kern_xdp.c b/dhcp-relay/dhcp_kern_xdp.c index 27df8ba..df74750 100644 --- a/dhcp-relay/dhcp_kern_xdp.c +++ b/dhcp-relay/dhcp_kern_xdp.c @@ -56,36 +56,93 @@ struct { __uint(max_entries, 16384); } client_vlans SEC(".maps"); -void memcpy_var(void *to, void *from, __u64 len) { +static int memcpy_var(void *to, void *from, __u8 len) { __u8 *t8 = to, *f8 = from; int i; - for (i = 0; i < len && i < MAX_LOOPS; i++) { - *t8++ = *f8++; + if (len > MAX_LOOPS) { + return -1; } - + + for (i = 0; i < len; i++) { + + if (i > MAX_LOOPS) { + return -1; + } + + *t8++ = *f8++; + + } + + + if (i == MAX_LOOPS) + return -1; + + return 0; + } -void memset_var(void *d, __u8 c, __u64 len) { - __u8 *d8 = d; - int i; +static int u16_to_ascii(char *buf, __u8 offset, __u16 num) { + + __u8 i; +#pragma unroll U16_ASCII_LEN + for (i = offset; i > 0; i--) { + + buf[i - 1] = (num % 10) + '0'; + num /= 10; + if (num == 0) + break; - for (i = 0; i < len && i < MAX_LOOPS; i++) { - *d8++ = c; } - + + if (i > 1) { + i--; + buf[i - 1] = '.'; + } + + return i - 1; + +} + +static int str_len(char *buf) { + + __u8 i = 0; + for (i = 0; i < IF_NAMESIZE; i++) + if (buf[i] == 0) + break; + + return i; + +} + +static int copy_dev_name(char *buf, __u8 offset, char dev[IF_NAMESIZE]) { + + __u8 dev_len = 0; + + dev_len = str_len(dev); + + /* Check if we have enough space in buffer */ + if ((offset - dev_len) < 0) { + return -1; + } + + offset -= dev_len; + + memcpy_var(buf + offset, dev, dev_len); + + return offset; } /* Inserts DHCP option 82 into the received DHCP packet * at the specified offset. */ static __always_inline int write_dhcp_option_82(void *ctx, int offset, - struct collect_vlans *vlans, char *dev) { - - struct dhcp_option_82 option; - - static __u8 buf[RAI_OPTION_LEN]; - + struct collect_vlans *vlans, char dev[IF_NAMESIZE]) { + + struct dhcp_option_82 option = {0}; + + static __u8 buf[RAI_OPTION_LEN] = {0}; + option.t = DHO_DHCP_AGENT_OPTIONS; option.len = sizeof (struct sub_option) + sizeof (struct sub_option); option.circuit_id.option_id = RAI_CIRCUIT_ID; @@ -93,12 +150,6 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, option.remote_id.option_id = RAI_REMOTE_ID; option.remote_id.len = sizeof (option.remote_id.val); - /* Initialize val arrays */ - memset_var(option.circuit_id.val, 0, sizeof (option.circuit_id.val)); - memset_var(option.remote_id.val, '*', sizeof (option.remote_id.val)); - //memset(option.circuit_id.val, 0, sizeof (option.circuit_id.val)); - //memset(option.remote_id.val, '*', sizeof (option.remote_id.val)); - /* Reconstruct VLAN device name * Convert VLAN tags to ASCII from right to left, starting with * inner VLAN tag. @@ -106,55 +157,49 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, * contains null bytes. */ - memset(buf, 0, sizeof (buf)); - - int c = VLAN_ASCII_MAX; /* We will need 4 bytes at most */ - int i = RAI_OPTION_LEN - 1; - + int i = RAI_OPTION_LEN; + + // Start with interface name + /*int dev_len = str_len(dev, IF_NAMESIZE); + if (dev_len < sizeof (option.circuit_id.val)) { + memcpy_var(option.circuit_id.val, dev, dev_len); + }*/ + __u16 inner_vlan = vlans->id[1]; __u16 outer_vlan = vlans->id[0]; - /* Convert inner VLAN to ASCII */ -#pragma unroll VLAN_ASCII_MAX - for (c = VLAN_ASCII_MAX; c > 0; c--) { - buf[i--] = (inner_vlan % 10) + '0'; - inner_vlan /= 10; - if (inner_vlan == 0) { - break; + if (inner_vlan != 0) { + + /* Convert inner VLAN to ASCII */ + i = u16_to_ascii(buf, RAI_OPTION_LEN, inner_vlan); + if (i < 0) { + return -1; } + } - buf[i--] = '.'; + if (outer_vlan != 0) { - /* Convert outer VLAN to ASCII */ -#pragma unroll VLAN_ASCII_MAX - for (c = VLAN_ASCII_MAX; c > 0; c--) { - buf[i--] = (outer_vlan % 10) + '0'; - outer_vlan /= 10; - if (outer_vlan == 0) { - break; + /* Convert outer VLAN to ASCII */ + i = u16_to_ascii(buf, i, outer_vlan); + if (i < 0) { + return -1; } + } - buf[i--] = '.'; +// FIXME: Verifier complains about BPF program being too large when this +// function is enabled +// i = copy_dev_name(buf, i, dev); +// if (i < 0) +// return -1; - /* Append interface name */ -#pragma unroll RAI_OPTION_LEN - for (c = RAI_OPTION_LEN - 1; c >= 0; c--) { - if (dev[c] != 0) - buf[i--] = dev[c]; - if (i < 0) - break; - } - - i++; - - /* Copy resulting interface name to circuit_id */ if (sizeof (option.circuit_id.val) == sizeof (buf)) { + + /* Copy right-aligned VLAN text to left-aligned buffer */ memcpy_var(option.circuit_id.val, buf + i, sizeof (buf) - i); - //memcpy_var(option.circuit_id.val, buf, sizeof (buf)); } - + return xdp_store_bytes(ctx, offset, &option, sizeof (option), 0); } @@ -171,10 +216,10 @@ static __always_inline int write_dhcp_option_255(void *ctx, int offset) { /* Calculates the IP checksum */ static __always_inline int calc_ip_csum(struct iphdr *oldip, struct iphdr *ip, - __u32 oldcsum) { + __u32 oldcsum) { __u32 size = sizeof (struct iphdr); __u32 csum = bpf_csum_diff((__be32 *) oldip, size, (__be32 *) ip, size, - ~oldcsum); + ~oldcsum); __u32 sum = (csum >> 16) + (csum & 0xffff); sum += (sum >> 16); return sum; @@ -189,7 +234,7 @@ static __always_inline int calc_ip_csum(struct iphdr *oldip, struct iphdr *ip, sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct udphdr) + \ offsetof(struct dhcp_packet, options) -/* Delta value to be adjusted at xdp head*/ +/* Delta value for tail adjustment */ #define delta sizeof(struct dhcp_option_82) #ifndef DHCP_MAX_OPTIONS @@ -205,8 +250,6 @@ static __always_inline int calc_ip_csum(struct iphdr *oldip, struct iphdr *ip, SEC(XDP_PROG_SEC) int xdp_dhcp_relay(struct xdp_md *ctx) { - bpf_printk("\n"); - /* Tail extend packet */ int res = bpf_xdp_adjust_tail(ctx, delta); if (res != 0) { @@ -236,7 +279,8 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { __u8 option_length = 0; __u64 client_mac = 0; char *dev; - int i = 0; + __u8 i = 0; + __u8 head_adjusted = 0; /* These keep track of the next header type and iterator pointer */ struct hdr_cursor nh; @@ -246,7 +290,6 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { int len = 0; if (data + 1 > data_end) { - bpf_printk("Empty packet\n"); goto out; } @@ -254,32 +297,31 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { ether_type = parse_ethhdr_vlan(&nh, data_end, ð, &vlans); /* check for valid ether type */ if (ether_type < 0) { - bpf_printk("Cannot determine ethertype\n"); + bpf_printk("Cannot determine ethertype"); goto out; } - + if (ether_type != bpf_htons(ETH_P_IP)) { - //bpf_printk("Ethertype %x is not ETH_P_IP\n", bpf_ntohs(ether_type)); + //bpf_printk("Ethertype %x is not ETH_P_IP", bpf_ntohs(ether_type)); goto out; } - bpf_printk("Ethertype %x\n", bpf_ntohs(ether_type)); - - /* Check at least two vlan tags are present */ + /* Check at least one vlan tag is present */ if (vlans.id[0] == 0) { - bpf_printk("No outer VLAN tag set\n"); + bpf_printk("No outer VLAN tag set"); goto out; } - + if (vlans.id[1] == 0) { - bpf_printk("No inner VLAN tag set\n"); - goto out; + bpf_printk("No inner VLAN tag set"); + //goto out; } h_proto = parse_iphdr(&nh, data_end, &ip); /* Only handle fixed-size IP header due to static copy */ if (h_proto != IPPROTO_UDP || ip->ihl > 5) { + bpf_printk("Not UDP"); goto out; } @@ -291,15 +333,17 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { goto out; /* Handle DHCP packets only */ - if (udp->dest != bpf_htons(DHCP_SERVER_PORT) && udp->dest != bpf_htons(DHCP_CLIENT_PORT)) + if (udp->dest != bpf_htons(DHCP_SERVER_PORT) && udp->dest != bpf_htons(DHCP_CLIENT_PORT)) { + bpf_printk("Not DHCP"); goto out; + } /* Increase IP length header */ ip->tot_len += bpf_htons(delta); /* Increase UDP length header */ udp->len += bpf_htons(delta); - + udp->check = 0; /* Read DHCP server IP from config map */ @@ -326,9 +370,8 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { if (dev == NULL) goto out; - /* Copy headers of packet to buf */ - //if (xdp_load_bytes(ctx, 0, buf, static_offset)) - // goto out; + //memcpy(dev_name, dev, IF_NAMESIZE); + //dev_len = str_len(dev_name, IF_NAMESIZE); /* Increment offset by 4 bytes for each VLAN (to accomodate VLAN headers */ #pragma unroll VLAN_MAX_DEPTH @@ -337,9 +380,6 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { bpf_printk("Found VLAN tag %i at depth %i", vlans.id[i], i); - /* For each VLAN present, copy 4 bytes of DHCP options to buffer */ - //if (xdp_load_bytes(ctx, offset, buf + offset, 4)) - // goto out; offset += 4; vlan_length += 4; } @@ -355,6 +395,16 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { } dhcp = data + vlan_length + dhcp_offset; + /* Check hops */ + if (dhcp->hops > 16) { + bpf_printk("Max hops exceeded, discarding packet"); + rc = XDP_ABORTED; + goto out; + } + + /* Increment hops */ + dhcp->hops++; + /* Store client MAC */ if (dhcp->chaddr + ETH_ALEN > data_end) { goto out; @@ -364,11 +414,11 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { bpf_printk("Parsing DHCP packet, opcode %i, hops %i", dhcp->op, dhcp->hops); if (dhcp->op == DHCP_REQUEST && (eth->h_dest[0] == 0xff - && eth->h_dest[1] == 0xff - && eth->h_dest[2] == 0xff - && eth->h_dest[3] == 0xff - && eth->h_dest[4] == 0xff - && eth->h_dest[5] == 0xff)) { + && eth->h_dest[1] == 0xff + && eth->h_dest[2] == 0xff + && eth->h_dest[3] == 0xff + && eth->h_dest[4] == 0xff + && eth->h_dest[5] == 0xff)) { /* Request from client received as broadcast */ @@ -395,25 +445,16 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { } else if (dhcp->op == DHCP_REPLY && (eth->h_dest[0] != 0xff - || eth->h_dest[1] != 0xff - || eth->h_dest[2] != 0xff - || eth->h_dest[3] != 0xff - || eth->h_dest[4] != 0xff - || eth->h_dest[5] != 0xff)) { + || eth->h_dest[1] != 0xff + || eth->h_dest[2] != 0xff + || eth->h_dest[3] != 0xff + || eth->h_dest[4] != 0xff + || eth->h_dest[5] != 0xff)) { /* Response from server received as unicast */ bpf_printk("Unicast packet received, opcode %i, hops %i", dhcp->op, dhcp->hops); - /* FIXME: Add code for reply packets - * Basically: - * - Set dest and src MAC - * - Add VLAN tags - * - Remove option 82 - * - Use XDP_TX (or XDP_REDIRECT) to send the response - * to the end user - */ - struct collect_vlans *new_vlans; new_vlans = bpf_map_lookup_elem(&client_vlans, &client_mac); if (new_vlans == NULL) { @@ -421,19 +462,89 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { goto out; } - bpf_printk("Found map entry for MAC %i", client_mac); + bpf_printk("Found map entry for MAC %x", client_mac); - } + /* Set destination MAC */ + memcpy(eth->h_dest, dhcp->chaddr, ETH_ALEN); - /* Check hops */ - if (dhcp->hops > 16) { - bpf_printk("Max hops exceeded, discarding packet"); - rc = XDP_ABORTED; - goto out; - } + /* Set source MAC */ + memcpy(eth->h_source, relay_hwaddr, ETH_ALEN); - /* Increment hops */ - dhcp->hops++; + /* Set destination IP */ + ip->daddr = IP_ADDR_BCAST; + + /* Set source IP */ + ip->saddr = *relay_agent_ip; + + /* Add / replace VLAN tags */ + if (vlans.id[0] != 0) { + bpf_printk("Outer VLAN %i found, will change to %i", vlans.id[0], new_vlans->id[0]); + + struct vlan_hdr *outer_vlh = data + ETH_HLEN; + outer_vlh->h_vlan_TCI = bpf_htons((bpf_ntohs(outer_vlh->h_vlan_TCI) & 0xf000) | new_vlans->id[0]); + + } + + if (vlans.id[1] != 0) { + bpf_printk("Inner VLAN %i found, will change to %i", vlans.id[1], new_vlans->id[1]); + + struct vlan_hdr *inner_vlh = data + ETH_HLEN + sizeof (struct vlan_hdr); + inner_vlh->h_vlan_TCI = bpf_htons((bpf_ntohs(inner_vlh->h_vlan_TCI) & 0xf000) | new_vlans->id[1]); + + } else { + bpf_printk("Inner VLAN not found, will insert %i", new_vlans->id[1]); + + /* Adjust header by -4 bytes to make space for VLAN header */ + if (bpf_xdp_adjust_head(ctx, -(int) sizeof (struct vlan_hdr))) { + bpf_printk("Cannot head-adjust packet by %i bytes, aborting", -(int) sizeof (struct vlan_hdr)); + rc = XDP_ABORTED; + goto out; + } + + bpf_printk("Head-adjusted packet by %i bytes", -(int) sizeof (struct vlan_hdr)); + + head_adjusted = 1; + + data_end = (void *) (long) ctx->data_end; + data = (void *) (long) ctx->data; + + /* Verifier check */ + if (data + ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr) + sizeof (struct vlan_hdr) > data_end) { + rc = XDP_ABORTED; + goto out; + } + + /* Move MAC address headers + outer VLAN tag to beginning of packet */ + memmove(data, data + sizeof (struct vlan_hdr), ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr)); + + bpf_printk("Moved %i bytes from offset %i to offset %i", ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr), sizeof (struct vlan_hdr), 0); + + /* Make new inner VLAN header (copy from outer VLAN header) */ + memcpy(data + ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr), data + ETH_ALEN + ETH_ALEN, sizeof (struct vlan_hdr)); + + bpf_printk("Copied %i bytes from offset %i to offset %i", sizeof (struct vlan_hdr), ETH_ALEN + ETH_ALEN, ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr)); + + bpf_printk("Will modify VLAN header at offset %i", ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr)); + + struct vlan_hdr *vlh = data + ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr) + 2; + vlh->h_vlan_TCI = bpf_htons((bpf_ntohs(vlh->h_vlan_TCI) & 0xf000) | new_vlans->id[1]); + + offset += sizeof (struct vlan_hdr); + vlan_length += sizeof (struct vlan_hdr); + + /* Parse DHCP packet */ + if (data + vlan_length + dhcp_offset + sizeof (dhcp) > data_end) { + goto out; + } + dhcp = data + vlan_length + dhcp_offset; + + bpf_printk("Inserted VLAN header"); + + } + + rc = XDP_TX; + + } /* Check if we exceed boundaries to make verifier happy */ if (data + offset > data_end) @@ -451,20 +562,33 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { if (pos + 1 > data_end) break; + /* Read option code */ option_code = *pos; bpf_printk("Got option code %i at offset %i, hex %x", option_code, option_offset, option_offset); + if (option_code == 82 && dhcp->op == DHCP_REPLY) { + + bpf_printk("Will erase DHCP option 82"); + + /* FIXME: Erase options 82 + 255 and set new option 255 */ + + *pos = 255; + break; + + } + if (option_code == 255) { - bpf_printk("Going to write DHCP option at offset %i", option_offset); + bpf_printk("Going to write DHCP option 82 at offset %i", option_offset); /* Insert Option 82 before END option */ if (write_dhcp_option_82(ctx, option_offset, &vlans, dev)) { bpf_printk("Could not write DHCP option 82 at offset %i", option_offset); - return XDP_ABORTED; + //return XDP_ABORTED; + break; } - + /* Set END option */ /* Verifier check */ @@ -476,57 +600,50 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { if (write_dhcp_option_255(ctx, option_offset)) { bpf_printk("Could not write DHCP option 255 at offset %i", option_offset); - return XDP_ABORTED; + //return XDP_ABORTED; + break; } bpf_printk("Wrote DHCP option 255 at offset %i, returning XDP_PASS", option_offset); break; } + pos++; + /* Verifier check */ + if (pos + 1 > data_end) { + break; + } + option_length = *pos; option_offset += option_length + 2; + /* Verifier check */ if (pos + 1 > data_end) { break; } pos++; + /* Verifier check */ if (pos + option_length > data_end) { break; } + + /* Skip option value (go to next option) */ pos += option_length; } - //return XDP_PASS; - - /* Copy stored headers from buf to context */ - /*if (xdp_store_bytes(ctx, 0, buf, static_offset, 0)) { - - bpf_printk("xdp_store_bytes(ctx, 0, buf, %i) failed", static_offset); - return XDP_ABORTED; - }*/ - - - /* make space for option 82 - copy DHCP options after increasing offset */ - /*if (offset > static_offset) { - offset = static_offset; - for (i = 0; i < VLAN_MAX_DEPTH; i++) { - if (vlans.id[i]) {*/ - /* */ - /*if (xdp_store_bytes(ctx, offset, buf + offset, - 4, 0)) - return XDP_ABORTED; - offset += 4; -} + /* Adjust IP offset for VLAN header when VLAN header has been added */ + if (head_adjusted) { + ip = data + ip_offset + sizeof (struct vlan_hdr); + } else { + ip = data + ip_offset; } - }*/ - - ip = data + ip_offset; - if (ip + 1 > data_end) + if (ip + 1 > data_end) { return XDP_ABORTED; + } /* Overwrite the destination IP in IP header */ ip->daddr = *dhcp_srv_ip; @@ -537,7 +654,6 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { /* Re-calculate ip checksum */ __u32 sum = calc_ip_csum(&oldip, ip, oldip.check); ip->check = ~sum; - rc = XDP_PASS; goto out; diff --git a/dhcp-relay/test.sh b/dhcp-relay/test.sh new file mode 100755 index 0000000..dc5f026 --- /dev/null +++ b/dhcp-relay/test.sh @@ -0,0 +1,65 @@ +#!/bin/bash -x + +OUTER_VLAN=83 +INNER_VLAN=20 +UPLINK_VLAN=84 +IF="ens6f0" + +DHCP_SERVER="185.107.12.59" +IPADDR_BNG="194.45.77.57" +IPADDR_UPLINK="185.107.12.99" +UPLINK_GW="185.107.12.97" +CLIENT_IP="194.45.77.59" + +echo "Setting up VLAN interfaces" +ethtool -K $IF txvlan off +ethtool -K $IF rxvlan off +# Increase MTU to allow second VLAN tag (QinQ) +ip link set dev $IF mtu 1504 +ip link set dev $IF up +# Set outer VLAN interface +ip link add link $IF name $IF.$OUTER_VLAN type vlan id $OUTER_VLAN +ip link set $IF.$OUTER_VLAN up + +CLIENT_IF=$IF.$OUTER_VLAN.$INNER_VLAN + +# Set inner VLAN interface +ip link add link $IF.$OUTER_VLAN name $CLIENT_IF type vlan id $INNER_VLAN +ip link set $CLIENT_IF up + +# Set accept_local for VLAN interface +echo 1 > /proc/sys/net/ipv4/conf/$CLIENT_IF/accept_local + +# Disable reverse path filtering for VLAN interface +echo 0 > /proc/sys/net/ipv4/conf/$CLIENT_IF/rp_filter + +# Enable ARP proxy for VLAN interface +echo 1 > /proc/sys/net/ipv4/conf/$CLIENT_IF/proxy_arp + +# Insert /32 route to client +ip route add $CLIENT_IP/32 dev $CLIENT_IF + +# Set IP forwarding +echo 1 > /proc/sys/net/ipv4/ip_forward + +# Set L3 config for BNG interface +ip addr add $IPADDR_BNG/29 dev lo + +# Create upstream interface +ip link add link $IF name $IF.$UPLINK_VLAN type vlan id $UPLINK_VLAN +ip link set dev $IF.$UPLINK_VLAN +ip link set $IF.$UPLINK_VLAN up + +# Disable RP filtering globally to receive DHCP requests through unnumbered +# interface +echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter + +# Set L3 config for upstream interface +ip addr add $IPADDR_UPLINK/28 dev $IF.$UPLINK_VLAN +ip route replace default via $UPLINK_GW + +echo "Compiling XDP program" +make + +echo "Launching XDP program" +./dhcp_user_xdp -i $IF -d $DHCP_SERVER -s $IPADDR_UPLINK \ No newline at end of file diff --git a/dhcp-relay/trace.sh b/dhcp-relay/trace.sh new file mode 100755 index 0000000..d364282 --- /dev/null +++ b/dhcp-relay/trace.sh @@ -0,0 +1,3 @@ +#!/bin/bash -x + +cat /sys/kernel/debug/tracing/trace_pipe \ No newline at end of file From 813cfa271928f8482d0d3788eae6df6d452ee9cf Mon Sep 17 00:00:00 2001 From: Yoel Caspersen Date: Tue, 9 Nov 2021 16:35:37 +0100 Subject: [PATCH 4/7] Working prototype (IPv4) with interface name in Option 82 Circuit ID This version works with kernel 5.15 and LLVM 13. Improvements: - Interface name is added to Option 82 Circuit ID - On server replies, Option 82 is wiped completely - Char array parameters replaced with structs Functions that previously used char arrays as function parameters now use structs instead - otherwise the verifier cannot perform proper boundary checks. To-do: - IPv6 support - Multiple interface support Signed-off-by: Yoel Caspersen --- dhcp-relay/dhcp-relay.h | 5 + dhcp-relay/dhcp_kern_xdp.c | 187 +++++++++++++++++++++---------------- dhcp-relay/dhcp_user_xdp.c | 88 +++-------------- 3 files changed, 129 insertions(+), 151 deletions(-) diff --git a/dhcp-relay/dhcp-relay.h b/dhcp-relay/dhcp-relay.h index 32b84bb..8345786 100644 --- a/dhcp-relay/dhcp-relay.h +++ b/dhcp-relay/dhcp-relay.h @@ -43,6 +43,11 @@ struct dhcp_option_255 { __u8 t; }; +struct dev_name { + char name[IF_NAMESIZE]; + __u8 len; +}; + struct dhcp_packet { __u8 op; /* 0: Message opcode/type */ __u8 htype; /* 1: Hardware addr type (net/if_types.h) */ diff --git a/dhcp-relay/dhcp_kern_xdp.c b/dhcp-relay/dhcp_kern_xdp.c index df74750..d54b2ab 100644 --- a/dhcp-relay/dhcp_kern_xdp.c +++ b/dhcp-relay/dhcp_kern_xdp.c @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include /* IF_NAMESIZE */ #include #include -#include /* IF_NAMESIZE */ #include #include #include @@ -56,80 +56,107 @@ struct { __uint(max_entries, 16384); } client_vlans SEC(".maps"); -static int memcpy_var(void *to, void *from, __u8 len) { - __u8 *t8 = to, *f8 = from; - int i; - - if (len > MAX_LOOPS) { - return -1; - } - - for (i = 0; i < len; i++) { - - if (i > MAX_LOOPS) { - return -1; - } - - *t8++ = *f8++; - - } +//static int memcpy_var(void *to, void *from, __u8 len) { +// __u8 *t8 = to, *f8 = from; +// int i; +// +// if (len > RAI_OPTION_LEN) { +// return -1; +// } +// +// for (i = 0; i < len; i++) { +// +// if (i > RAI_OPTION_LEN) { +// return -1; +// } +// +// *t8++ = *f8++; +// +// } +// +// +// if (i == RAI_OPTION_LEN) +// return -1; +// +// return 0; +// +//} +int u16_to_ascii(struct sub_option *opt, __u8 offset, __u16 num) { - if (i == MAX_LOOPS) + if (opt == NULL) return -1; - return 0; - -} + if (offset > RAI_OPTION_LEN) + return -1; -static int u16_to_ascii(char *buf, __u8 offset, __u16 num) { + if (offset < U16_ASCII_LEN) + return -1; __u8 i; -#pragma unroll U16_ASCII_LEN - for (i = offset; i > 0; i--) { + //#pragma unroll U16_ASCII_LEN + for (i = offset - 1; i > 0; i--) { - buf[i - 1] = (num % 10) + '0'; + if (i == 0) + break; + + opt->val[i] = (__u8) (num % 10) + '0'; num /= 10; if (num == 0) break; } - if (i > 1) { - i--; - buf[i - 1] = '.'; + if (i > 0) { + opt->val[--i] = '.'; } - return i - 1; + if (i > RAI_OPTION_LEN) + return -1; + + return i; } -static int str_len(char *buf) { +int str_len(struct dev_name *dev) { + + if (dev == NULL) + return -1; __u8 i = 0; - for (i = 0; i < IF_NAMESIZE; i++) - if (buf[i] == 0) + for (i = 0; i < RAI_OPTION_LEN; i++) + if (dev->name[i] == 0) break; return i; } -static int copy_dev_name(char *buf, __u8 offset, char dev[IF_NAMESIZE]) { +int copy_dev_name(struct sub_option *dest, __u8 offset, struct dev_name *dev) { - __u8 dev_len = 0; + if (dest == NULL) + return -1; - dev_len = str_len(dev); - - /* Check if we have enough space in buffer */ - if ((offset - dev_len) < 0) { + if (dev == NULL) return -1; + + __u8 i; + + /* Copy device name and left-align VLAN part*/ +#pragma unroll RAI_OPTION_LEN + for (i = 0; i < RAI_OPTION_LEN; i++) { + + if (i < dev->len) { + /* Copy device name */ + dest->val[i] = dev->name[i]; + } else if (offset < RAI_OPTION_LEN) { + /* Move VLAN part (all bytes from offset and up) */ + dest->val[i] = dest->val[offset]; + dest->val[offset++] = 0; + } + } - - offset -= dev_len; - - memcpy_var(buf + offset, dev, dev_len); - + return offset; } @@ -137,12 +164,10 @@ static int copy_dev_name(char *buf, __u8 offset, char dev[IF_NAMESIZE]) { * at the specified offset. */ static __always_inline int write_dhcp_option_82(void *ctx, int offset, - struct collect_vlans *vlans, char dev[IF_NAMESIZE]) { + struct collect_vlans *vlans, struct dev_name dev) { struct dhcp_option_82 option = {0}; - static __u8 buf[RAI_OPTION_LEN] = {0}; - option.t = DHO_DHCP_AGENT_OPTIONS; option.len = sizeof (struct sub_option) + sizeof (struct sub_option); option.circuit_id.option_id = RAI_CIRCUIT_ID; @@ -159,19 +184,13 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, int i = RAI_OPTION_LEN; - // Start with interface name - /*int dev_len = str_len(dev, IF_NAMESIZE); - if (dev_len < sizeof (option.circuit_id.val)) { - memcpy_var(option.circuit_id.val, dev, dev_len); - }*/ - __u16 inner_vlan = vlans->id[1]; __u16 outer_vlan = vlans->id[0]; if (inner_vlan != 0) { /* Convert inner VLAN to ASCII */ - i = u16_to_ascii(buf, RAI_OPTION_LEN, inner_vlan); + i = u16_to_ascii(&option.circuit_id, RAI_OPTION_LEN, inner_vlan); if (i < 0) { return -1; } @@ -181,24 +200,17 @@ static __always_inline int write_dhcp_option_82(void *ctx, int offset, if (outer_vlan != 0) { /* Convert outer VLAN to ASCII */ - i = u16_to_ascii(buf, i, outer_vlan); + i = u16_to_ascii(&option.circuit_id, i, outer_vlan); if (i < 0) { return -1; } } -// FIXME: Verifier complains about BPF program being too large when this -// function is enabled -// i = copy_dev_name(buf, i, dev); -// if (i < 0) -// return -1; - - if (sizeof (option.circuit_id.val) == sizeof (buf)) { - - /* Copy right-aligned VLAN text to left-aligned buffer */ - memcpy_var(option.circuit_id.val, buf + i, sizeof (buf) - i); - } + /* Insert device name and left-align circuit ID */ + i = copy_dev_name(&option.circuit_id, i, &dev); + if (i < 0) + return -1; return xdp_store_bytes(ctx, offset, &option, sizeof (option), 0); } @@ -267,6 +279,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { struct iphdr oldip; struct udphdr *udp; struct dhcp_packet *dhcp; + struct dev_name dev = {0}; __u32 *dhcp_srv_ip; __u32 *relay_agent_ip; __u64 *relay_hwaddr; @@ -278,7 +291,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { __u8 option_code = 0; __u8 option_length = 0; __u64 client_mac = 0; - char *dev; + char *dev_config; __u8 i = 0; __u8 head_adjusted = 0; @@ -295,9 +308,9 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { nh.pos = data; ether_type = parse_ethhdr_vlan(&nh, data_end, ð, &vlans); - /* check for valid ether type */ + /* Check for valid EtherType */ if (ether_type < 0) { - bpf_printk("Cannot determine ethertype"); + bpf_printk("Cannot determine EtherType"); goto out; } @@ -314,7 +327,6 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { if (vlans.id[1] == 0) { bpf_printk("No inner VLAN tag set"); - //goto out; } h_proto = parse_iphdr(&nh, data_end, &ip); @@ -366,12 +378,12 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { /* Read device name from device map */ key = 0; - dev = bpf_map_lookup_elem(&device_name, &key); - if (dev == NULL) + dev_config = bpf_map_lookup_elem(&device_name, &key); + if (dev_config == NULL) goto out; - //memcpy(dev_name, dev, IF_NAMESIZE); - //dev_len = str_len(dev_name, IF_NAMESIZE); + memcpy(dev.name, dev_config, RAI_OPTION_LEN); + dev.len = str_len(&dev); /* Increment offset by 4 bytes for each VLAN (to accomodate VLAN headers */ #pragma unroll VLAN_MAX_DEPTH @@ -513,7 +525,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { rc = XDP_ABORTED; goto out; } - + /* Move MAC address headers + outer VLAN tag to beginning of packet */ memmove(data, data + sizeof (struct vlan_hdr), ETH_ALEN + ETH_ALEN + sizeof (struct vlan_hdr)); @@ -552,10 +564,12 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { option_offset = offset; + __u8 n = 0; + __u8 *pos = (__u8 *) (data + option_offset); /* Loop through all DHCP options */ -#pragma unroll DHCP_MAX_OPTIONS + //#pragma unroll DHCP_MAX_OPTIONS for (i = 0; i < DHCP_MAX_OPTIONS; i++) { /* Verifier check */ @@ -571,9 +585,26 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { bpf_printk("Will erase DHCP option 82"); - /* FIXME: Erase options 82 + 255 and set new option 255 */ - + /* Set new option 255 */ *pos = 255; + + /* Increment pointer 2nd byte of option 82 */ + pos++; + + /* Verifier check */ + if (pos + 1 > data_end) + break; + + /* Erase remainder of option 82 */ + for (n = 0; n < sizeof (struct dhcp_option_82); n++) { + + if (pos + 1 > data_end) + break; + + *pos++ = 0; + + } + break; } diff --git a/dhcp-relay/dhcp_user_xdp.c b/dhcp-relay/dhcp_user_xdp.c index 2aeb56e..a2ea5e5 100644 --- a/dhcp-relay/dhcp_user_xdp.c +++ b/dhcp-relay/dhcp_user_xdp.c @@ -89,63 +89,8 @@ int xdp_link_attach(int ifindex, __u32 xdp_flags, int prog_fd) { return 0; } -/* User program takes two or three arguments - * interface name, relay server IP and prog - * unload flag - */ int main(int argc, char **argv) { - /*char device[500] = "ens6f0np0"; - char o82[30] = { 0 }; - - int outer_vlan = 80; - int inner_vlan = 25; - - char str[30] = {0}; // large enough for an int even on 64-bit - int i = 30; - int c = 0; - - for(c = 4; c > 0; c--) { - str[i--] = (inner_vlan % 10) + '0'; - inner_vlan /= 10; - if(inner_vlan == 0) { - break; - } - } - - str[i--] = '.'; - - for(c = 4; c > 0; c--) { - str[i--] = (outer_vlan % 10) + '0'; - outer_vlan /= 10; - if(outer_vlan == 0) { - break; - } - } - - str[i--] = '.'; - - int y; - for(y = sizeof(device) - 1; y >= 0; y--) { - if(device[y] != 0) { - str[i] = device[y]; - i--; - } - } - - printf("i is %i\n", i); - - memset(o82, 0, 30); - memcpy(o82, str + i + 1, 30 - i); - - printf("The number was: %s\n", str + i + 1); - - printf("Option 82: %s\n", o82); - - printf("Option 82 length was %i\n", 30 - i); - - return 0;*/ - char filename[256] = "dhcp_kern_xdp.o"; int prog_fd, err; int opt; @@ -171,7 +116,7 @@ int main(int argc, char **argv) { while ((opt = getopt_long(argc, argv, "hui:d:m:s:", options, NULL)) != -1) { switch (opt) { - case 'i': + case 'i': /* Physical interface */ strncpy(dev, optarg, IF_NAMESIZE); dev[IF_NAMESIZE - 1] = '\0'; ifindex = if_nametoindex(dev); @@ -180,7 +125,7 @@ int main(int argc, char **argv) { return -EINVAL; } break; - case 'd': // DHCP server address + case 'd': /* DHCP server address */ if (inet_aton(optarg, &dhcp_server_addr) == 0) { fprintf(stderr, "Couldn't validate DHCP server IP address:%s\n", @@ -189,7 +134,7 @@ int main(int argc, char **argv) { } dhcp_server_addr_set = true; break; - case 's': // Relay agent address + case 's': /* Relay agent address */ if (inet_aton(optarg, &relay_agent_addr) == 0) { fprintf(stderr, "Couldn't validate relay agent IP address:%s\n", @@ -198,7 +143,7 @@ int main(int argc, char **argv) { } relay_agent_addr_set = true; break; - case 'm': + case 'm': /* Mode: skb or native */ if (strcmp(optarg, "skb") == 0) { xdp_flags = XDP_FLAGS_SKB_MODE; } else if (strcmp(optarg, "drv") != 0) { @@ -207,10 +152,10 @@ int main(int argc, char **argv) { } break; - case 'u': + case 'u': /* Unload XDP program */ do_unload = 1; break; - case 'h': + case 'h': /* Help menu */ print_usage(argv); exit(0); default: @@ -235,7 +180,7 @@ int main(int argc, char **argv) { if (do_unload) return xdp_link_detach(ifindex, xdp_flags); - // Find MAC address of interface + /* Find MAC address of interface */ fd = socket(AF_INET, SOCK_DGRAM, 0); ifr.ifr_addr.sa_family = AF_INET; @@ -250,7 +195,7 @@ int main(int argc, char **argv) { __u64 hwaddr = 0; memcpy(&hwaddr, (unsigned char *) ifr.ifr_hwaddr.sa_data, 6); - //display mac address + /* Display MAC address */ printf("Using device %s MAC: %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", dev, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); /* Load the BPF-ELF object file and get back first BPF_prog FD */ @@ -265,9 +210,7 @@ int main(int argc, char **argv) { return -1; } - /* read the map from prog object file and update the real - * server IP to the map - */ + /* Open server config map */ map = bpf_object__find_map_by_name(obj, SERVER_MAP); err = libbpf_get_error(map); if (err) { @@ -282,7 +225,7 @@ int main(int argc, char **argv) { exit(-1); } - // Set DHCP server address + /* Set DHCP server address */ key = 0; err = bpf_map_update_elem(map_fd, &key, &dhcp_server_addr.s_addr, BPF_ANY); if (err) { @@ -291,7 +234,7 @@ int main(int argc, char **argv) { exit(-1); } - // Set relay agent IP address + /* Set relay agent IP address */ key = 1; err = bpf_map_update_elem(map_fd, &key, &relay_agent_addr.s_addr, BPF_ANY); if (err) { @@ -300,7 +243,7 @@ int main(int argc, char **argv) { exit(-1); } - // Set relay agent MAC address + /* Set relay agent MAC address */ key = 2; err = bpf_map_update_elem(map_fd, &key, &hwaddr, BPF_ANY); if (err) { @@ -310,9 +253,7 @@ int main(int argc, char **argv) { } - /* read the map from prog object file and update the real - * server IP to the map - */ + /* Open device map */ device_map = bpf_object__find_map_by_name(obj, DEVICE_MAP); err = libbpf_get_error(device_map); if (err) { @@ -327,7 +268,7 @@ int main(int argc, char **argv) { exit(-1); } - // Set device name in map + /* Set device name in map */ key = 0; err = bpf_map_update_elem(device_map_fd, &key, dev, BPF_ANY); if (err) { @@ -336,6 +277,7 @@ int main(int argc, char **argv) { exit(-1); } + /* Attach XDP program to interface */ err = xdp_link_attach(ifindex, xdp_flags, prog_fd); if (err) return err; From f26620260ae25eee733b9cf77cda63dc1493970d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 13 Nov 2021 00:00:40 +0100 Subject: [PATCH 5/7] Improve readability of write_dhcp_option_82() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes several changes to improve the readability of write_dhcp_option_82(): The main change is in reworking the string helpers to be smaller and more modular, and using a separate buffer for stringifying the VLAN tags before copying them to the DHCP option field. Get rid of str_len() and the len member of struct dev_name. To aid in this, the whole containing function is made global (i.e., a separate BPF subprog) instead of the helpers. In addition, the local variables are renamed to be more descriptive, and the option struct is initialised using struct initialisation syntax. Signed-off-by: Toke Høiland-Jørgensen --- dhcp-relay/dhcp-relay.h | 1 - dhcp-relay/dhcp_kern_xdp.c | 187 ++++++++++++------------------------- 2 files changed, 58 insertions(+), 130 deletions(-) diff --git a/dhcp-relay/dhcp-relay.h b/dhcp-relay/dhcp-relay.h index 8345786..cbda2f0 100644 --- a/dhcp-relay/dhcp-relay.h +++ b/dhcp-relay/dhcp-relay.h @@ -45,7 +45,6 @@ struct dhcp_option_255 { struct dev_name { char name[IF_NAMESIZE]; - __u8 len; }; struct dhcp_packet { diff --git a/dhcp-relay/dhcp_kern_xdp.c b/dhcp-relay/dhcp_kern_xdp.c index d54b2ab..6b45d36 100644 --- a/dhcp-relay/dhcp_kern_xdp.c +++ b/dhcp-relay/dhcp_kern_xdp.c @@ -56,163 +56,93 @@ struct { __uint(max_entries, 16384); } client_vlans SEC(".maps"); -//static int memcpy_var(void *to, void *from, __u8 len) { -// __u8 *t8 = to, *f8 = from; -// int i; -// -// if (len > RAI_OPTION_LEN) { -// return -1; -// } -// -// for (i = 0; i < len; i++) { -// -// if (i > RAI_OPTION_LEN) { -// return -1; -// } -// -// *t8++ = *f8++; -// -// } -// -// -// if (i == RAI_OPTION_LEN) -// return -1; -// -// return 0; -// -//} - -int u16_to_ascii(struct sub_option *opt, __u8 offset, __u16 num) { - - if (opt == NULL) - return -1; - - if (offset > RAI_OPTION_LEN) - return -1; +static int u16_to_ascii(struct dev_name *buf, __u8 end_offset, __u16 num) { + int i; - if (offset < U16_ASCII_LEN) + if (buf == NULL || + end_offset > sizeof(buf->name) || + end_offset < U16_ASCII_LEN) return -1; - __u8 i; - //#pragma unroll U16_ASCII_LEN - for (i = offset - 1; i > 0; i--) { - - if (i == 0) - break; - - opt->val[i] = (__u8) (num % 10) + '0'; + for (i = end_offset - 1; i >= 0; i--) { + buf->name[i] = (__u8) (num % 10) + '0'; num /= 10; - if (num == 0) + if (!num) break; - } - if (i > 0) { - opt->val[--i] = '.'; - } - - if (i > RAI_OPTION_LEN) - return -1; - return i; - } -int str_len(struct dev_name *dev) { +static int copy_dev_name(struct sub_option *dest, __u8 wr_offset, + struct dev_name *dev, __u8 rd_offset) { + int i; - if (dev == NULL) + if (dest == NULL || dev == NULL || wr_offset > sizeof(dest->val) - 1) return -1; - __u8 i = 0; - for (i = 0; i < RAI_OPTION_LEN; i++) - if (dev->name[i] == 0) + for (i = 0; i < sizeof(dest->val) - wr_offset - 1; i++) { + if (i + rd_offset > sizeof(dev->name) - 1 || + !dev->name[i + rd_offset]) break; - return i; - -} - -int copy_dev_name(struct sub_option *dest, __u8 offset, struct dev_name *dev) { - - if (dest == NULL) - return -1; - - if (dev == NULL) - return -1; - - __u8 i; - - /* Copy device name and left-align VLAN part*/ -#pragma unroll RAI_OPTION_LEN - for (i = 0; i < RAI_OPTION_LEN; i++) { - - if (i < dev->len) { - /* Copy device name */ - dest->val[i] = dev->name[i]; - } else if (offset < RAI_OPTION_LEN) { - /* Move VLAN part (all bytes from offset and up) */ - dest->val[i] = dest->val[offset]; - dest->val[offset++] = 0; - } - + dest->val[i + wr_offset] = dev->name[i + rd_offset]; } - return offset; + return i; } /* Inserts DHCP option 82 into the received DHCP packet * at the specified offset. */ -static __always_inline int write_dhcp_option_82(void *ctx, int offset, - struct collect_vlans *vlans, struct dev_name dev) { - - struct dhcp_option_82 option = {0}; - - option.t = DHO_DHCP_AGENT_OPTIONS; - option.len = sizeof (struct sub_option) + sizeof (struct sub_option); - option.circuit_id.option_id = RAI_CIRCUIT_ID; - option.circuit_id.len = sizeof (option.circuit_id.val); - option.remote_id.option_id = RAI_REMOTE_ID; - option.remote_id.len = sizeof (option.remote_id.val); - - /* Reconstruct VLAN device name - * Convert VLAN tags to ASCII from right to left, starting with - * inner VLAN tag. - * Device name is up to 16 characters long - remaining buffer space - * contains null bytes. - */ - - int i = RAI_OPTION_LEN; - - __u16 inner_vlan = vlans->id[1]; - __u16 outer_vlan = vlans->id[0]; +int write_dhcp_option_82(struct xdp_md *ctx, int pkt_offset, + struct collect_vlans *vlans, struct dev_name *dev) { + + __u16 inner_vlan, outer_vlan; + struct dev_name buf = {}; + int len, vlan_offset; + struct dhcp_option_82 option = { + .t = DHO_DHCP_AGENT_OPTIONS, + .len = sizeof (struct sub_option) + sizeof (struct sub_option), + .circuit_id.option_id = RAI_CIRCUIT_ID, + .circuit_id.len = sizeof (option.circuit_id.val), + .remote_id.option_id = RAI_REMOTE_ID, + .remote_id.len = sizeof (option.remote_id.val) + }; + + if (!ctx || !vlans || !dev) + return -1; - if (inner_vlan != 0) { + inner_vlan = vlans->id[1]; + outer_vlan = vlans->id[0]; - /* Convert inner VLAN to ASCII */ - i = u16_to_ascii(&option.circuit_id, RAI_OPTION_LEN, inner_vlan); - if (i < 0) { - return -1; - } + if (!inner_vlan || !outer_vlan) + return -1; - } + /* The u16_to_ascii function works backwards from the end of the buffer, + * so start out at the end of the string. + */ + vlan_offset = sizeof(buf.name); - if (outer_vlan != 0) { + vlan_offset = u16_to_ascii(&buf, vlan_offset, inner_vlan); + if (vlan_offset < 0) + return vlan_offset; + buf.name[--vlan_offset] = '.'; - /* Convert outer VLAN to ASCII */ - i = u16_to_ascii(&option.circuit_id, i, outer_vlan); - if (i < 0) { - return -1; - } + vlan_offset = u16_to_ascii(&buf, vlan_offset, outer_vlan); + if (vlan_offset < 0) + return vlan_offset; + buf.name[--vlan_offset] = '.'; - } + len = copy_dev_name(&option.circuit_id, 0, dev, 0); + if (len < 0) + return len; - /* Insert device name and left-align circuit ID */ - i = copy_dev_name(&option.circuit_id, i, &dev); - if (i < 0) - return -1; + len = copy_dev_name(&option.circuit_id, len, &buf, vlan_offset); + if (len < 0) + return len; - return xdp_store_bytes(ctx, offset, &option, sizeof (option), 0); + return xdp_store_bytes(ctx, pkt_offset, &option, sizeof(option), 0); } /* Inserts DHCP option 255 into the received dhcp packet @@ -383,7 +313,6 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { goto out; memcpy(dev.name, dev_config, RAI_OPTION_LEN); - dev.len = str_len(&dev); /* Increment offset by 4 bytes for each VLAN (to accomodate VLAN headers */ #pragma unroll VLAN_MAX_DEPTH @@ -614,7 +543,7 @@ int xdp_dhcp_relay(struct xdp_md *ctx) { bpf_printk("Going to write DHCP option 82 at offset %i", option_offset); /* Insert Option 82 before END option */ - if (write_dhcp_option_82(ctx, option_offset, &vlans, dev)) { + if (write_dhcp_option_82(ctx, option_offset, &vlans, &dev)) { bpf_printk("Could not write DHCP option 82 at offset %i", option_offset); //return XDP_ABORTED; break; From b970472bc6be8036b429a4d3835ff55b0c6b15b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 13 Nov 2021 00:05:48 +0100 Subject: [PATCH 6/7] dhcp_user_xdp: Fix indentation of switch cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The case statements of the option parsing 'switch' had grown an extra level of indentation. Get rid of that. Signed-off-by: Toke Høiland-Jørgensen --- dhcp-relay/dhcp_user_xdp.c | 92 +++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/dhcp-relay/dhcp_user_xdp.c b/dhcp-relay/dhcp_user_xdp.c index a2ea5e5..803e55e 100644 --- a/dhcp-relay/dhcp_user_xdp.c +++ b/dhcp-relay/dhcp_user_xdp.c @@ -113,54 +113,54 @@ int main(int argc, char **argv) { unsigned char *mac; struct ifreq ifr; - while ((opt = getopt_long(argc, argv, "hui:d:m:s:", options, NULL)) != - -1) { + while ((opt = getopt_long(argc, argv, "hui:d:m:s:", options, NULL)) != -1) { switch (opt) { - case 'i': /* Physical interface */ - strncpy(dev, optarg, IF_NAMESIZE); - dev[IF_NAMESIZE - 1] = '\0'; - ifindex = if_nametoindex(dev); - if (ifindex <= 0) { - printf("Couldn't find ifname:%s \n", dev); - return -EINVAL; - } - break; - case 'd': /* DHCP server address */ - if (inet_aton(optarg, &dhcp_server_addr) == 0) { - fprintf(stderr, - "Couldn't validate DHCP server IP address:%s\n", - optarg); - return -EINVAL; - } - dhcp_server_addr_set = true; - break; - case 's': /* Relay agent address */ - if (inet_aton(optarg, &relay_agent_addr) == 0) { - fprintf(stderr, - "Couldn't validate relay agent IP address:%s\n", - optarg); - return -EINVAL; - } - relay_agent_addr_set = true; - break; - case 'm': /* Mode: skb or native */ - if (strcmp(optarg, "skb") == 0) { - xdp_flags = XDP_FLAGS_SKB_MODE; - } else if (strcmp(optarg, "drv") != 0) { - fprintf(stderr, "Invalid mode: %s\n", optarg); - return -EINVAL; - } - - break; - case 'u': /* Unload XDP program */ - do_unload = 1; - break; - case 'h': /* Help menu */ - print_usage(argv); - exit(0); - default: - fprintf(stderr, "Unknown option %s\n", argv[optind]); + case 'i': /* Physical interface */ + strncpy(dev, optarg, IF_NAMESIZE); + dev[IF_NAMESIZE - 1] = '\0'; + ifindex = if_nametoindex(dev); + if (ifindex <= 0) { + printf("Couldn't find ifname:%s \n", dev); + return -EINVAL; + } + break; + case 'd': /* DHCP server address */ + if (inet_aton(optarg, &dhcp_server_addr) == 0) { + fprintf(stderr, + "Couldn't validate DHCP server IP address:%s\n", + optarg); + return -EINVAL; + } + dhcp_server_addr_set = true; + break; + case 's': /* Relay agent address */ + if (inet_aton(optarg, &relay_agent_addr) == 0) { + fprintf(stderr, + "Couldn't validate relay agent IP address:%s\n", + optarg); + return -EINVAL; + } + relay_agent_addr_set = true; + break; + case 'm': /* Mode: skb or native */ + if (strcmp(optarg, "skb") == 0) { + xdp_flags = XDP_FLAGS_SKB_MODE; + } else if (strcmp(optarg, "drv") != 0) { + fprintf(stderr, "Invalid mode: %s\n", optarg); return -EINVAL; + } + + break; + case 'u': /* Unload XDP program */ + do_unload = 1; + break; + + case 'h': /* Help menu */ + print_usage(argv); + exit(0); + default: + fprintf(stderr, "Unknown option %s\n", argv[optind]); + return -EINVAL; } } From fd2b7509805e2308c2b0ba01f677b7257ad09227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 13 Nov 2021 00:07:30 +0100 Subject: [PATCH 7/7] dhcp_user_xdp: Add -v option to enable verbose logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a -v option to turn on verbose logging in libbpf; this makes it possible to see which relocations libbpf is making, which is useful to see which functions get turned into separate sub functions. Signed-off-by: Toke Høiland-Jørgensen --- dhcp-relay/dhcp_user_xdp.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dhcp-relay/dhcp_user_xdp.c b/dhcp-relay/dhcp_user_xdp.c index 803e55e..3ba4dc7 100644 --- a/dhcp-relay/dhcp_user_xdp.c +++ b/dhcp-relay/dhcp_user_xdp.c @@ -28,6 +28,7 @@ static const struct option options[] = { { "relay-agent-address", required_argument, NULL, 's'}, { "mode", required_argument, NULL, 'm'}, { "unload", no_argument, NULL, 'u'}, + { "verbose", no_argument, NULL, 'v'}, { 0, 0, NULL, 0} }; @@ -89,6 +90,13 @@ int xdp_link_attach(int ifindex, __u32 xdp_flags, int prog_fd) { return 0; } +static int libbpf_print_func(enum libbpf_print_level level, const char *format, + va_list args) +{ + return vfprintf(stderr, format, args); +} + + int main(int argc, char **argv) { char filename[256] = "dhcp_kern_xdp.o"; @@ -113,7 +121,7 @@ int main(int argc, char **argv) { unsigned char *mac; struct ifreq ifr; - while ((opt = getopt_long(argc, argv, "hui:d:m:s:", options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "huvi:d:m:s:", options, NULL)) != -1) { switch (opt) { case 'i': /* Physical interface */ strncpy(dev, optarg, IF_NAMESIZE); @@ -155,6 +163,10 @@ int main(int argc, char **argv) { do_unload = 1; break; + case 'v': /* Verbose libbpf logging */ + libbpf_set_print(libbpf_print_func); + break; + case 'h': /* Help menu */ print_usage(argv); exit(0);