Commit 93b1e055 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Martin KaFai Lau says:

====================
pull-request: bpf-next 2025-03-06

We've added 6 non-merge commits during the last 13 day(s) which contain
a total of 6 files changed, 230 insertions(+), 56 deletions(-).

The main changes are:

1) Add XDP metadata support for tun driver, from Marcus Wichelmann.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  selftests/bpf: Fix file descriptor assertion in open_tuntap helper
  selftests/bpf: Add test for XDP metadata support in tun driver
  selftests/bpf: Refactor xdp_context_functional test and bpf program
  selftests/bpf: Move open_tuntap to network helpers
  net: tun: Enable transfer of XDP metadata to skb
  net: tun: Enable XDP metadata support
====================

Link: https://patch.msgid.link/20250307055335.441298-1-martin.lau@linux.dev


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 7ae495a5 72aad21d
Loading
Loading
Loading
Loading
+23 −5
Original line number Diff line number Diff line
@@ -1535,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,

static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
				       struct page_frag *alloc_frag, char *buf,
				       int buflen, int len, int pad)
				       int buflen, int len, int pad,
				       int metasize)
{
	struct sk_buff *skb = build_skb(buf, buflen);

@@ -1544,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile,

	skb_reserve(skb, pad);
	skb_put(skb, len);
	if (metasize)
		skb_metadata_set(skb, metasize);
	skb_set_owner_w(skb, tfile->socket.sk);

	get_page(alloc_frag->page);
@@ -1603,6 +1606,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
	char *buf;
	size_t copied;
	int pad = TUN_RX_PAD;
	int metasize = 0;
	int err = 0;

	rcu_read_lock();
@@ -1630,7 +1634,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
	if (hdr->gso_type || !xdp_prog) {
		*skb_xdp = 1;
		return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
				       pad);
				       pad, metasize);
	}

	*skb_xdp = 0;
@@ -1644,7 +1648,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
		u32 act;

		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
		xdp_prepare_buff(&xdp, buf, pad, len, false);
		xdp_prepare_buff(&xdp, buf, pad, len, true);

		act = bpf_prog_run_xdp(xdp_prog, &xdp);
		if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -1665,12 +1669,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,

		pad = xdp.data - xdp.data_hard_start;
		len = xdp.data_end - xdp.data;

		/* It is known that the xdp_buff was prepared with metadata
		 * support, so the metasize will never be negative.
		 */
		metasize = xdp.data - xdp.data_meta;
	}
	bpf_net_ctx_clear(bpf_net_ctx);
	rcu_read_unlock();
	local_bh_enable();

	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad);
	return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
			       metasize);

out:
	bpf_net_ctx_clear(bpf_net_ctx);
@@ -2353,6 +2363,7 @@ static int tun_xdp_one(struct tun_struct *tun,
	struct sk_buff_head *queue;
	u32 rxhash = 0, act;
	int buflen = hdr->buflen;
	int metasize = 0;
	int ret = 0;
	bool skb_xdp = false;
	struct page *page;
@@ -2368,7 +2379,6 @@ static int tun_xdp_one(struct tun_struct *tun,
		}

		xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
		xdp_set_data_meta_invalid(xdp);

		act = bpf_prog_run_xdp(xdp_prog, xdp);
		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
@@ -2408,6 +2418,14 @@ static int tun_xdp_one(struct tun_struct *tun,
	skb_reserve(skb, xdp->data - xdp->data_hard_start);
	skb_put(skb, xdp->data_end - xdp->data);

	/* The externally provided xdp_buff may have no metadata support, which
	 * is marked by xdp->data_meta being xdp->data + 1. This will lead to a
	 * metasize of -1 and is the reason why the condition checks for > 0.
	 */
	metasize = xdp->data - xdp->data_meta;
	if (metasize > 0)
		skb_metadata_set(skb, metasize);

	if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
		atomic_long_inc(&tun->rx_frame_errors);
		kfree_skb(skb);
+28 −0
Original line number Diff line number Diff line
@@ -548,6 +548,34 @@ void close_netns(struct nstoken *token)
	free(token);
}

int open_tuntap(const char *dev_name, bool need_mac)
{
	int err = 0;
	struct ifreq ifr;
	int fd = open("/dev/net/tun", O_RDWR);

	if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
		return -1;

	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
	strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
	ifr.ifr_name[IFNAMSIZ - 1] = '\0';

	err = ioctl(fd, TUNSETIFF, &ifr);
	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
		close(fd);
		return -1;
	}

	err = fcntl(fd, F_SETFL, O_NONBLOCK);
	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
		close(fd);
		return -1;
	}

	return fd;
}

int get_socket_local_port(int sock_fd)
{
	struct sockaddr_storage addr;
+3 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
typedef __u16 __sum16;
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_tun.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/ethtool.h>
@@ -85,6 +86,8 @@ int get_socket_local_port(int sock_fd);
int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);

int open_tuntap(const char *dev_name, bool need_mac);

struct nstoken;
/**
 * open_netns() - Switch to specified network namespace by name.
+0 −29
Original line number Diff line number Diff line
@@ -5,7 +5,6 @@

#include <time.h>
#include <net/if.h>
#include <linux/if_tun.h>
#include <linux/icmp.h>

#include "test_progs.h"
@@ -37,34 +36,6 @@ static inline int netns_delete(void)
	return system("ip netns del " NETNS ">/dev/null 2>&1");
}

static int open_tuntap(const char *dev_name, bool need_mac)
{
	int err = 0;
	struct ifreq ifr;
	int fd = open("/dev/net/tun", O_RDWR);

	if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
		return -1;

	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
	strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
	ifr.ifr_name[IFNAMSIZ - 1] = '\0';

	err = ioctl(fd, TUNSETIFF, &ifr);
	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
		close(fd);
		return -1;
	}

	err = fcntl(fd, F_SETFL, O_NONBLOCK);
	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
		close(fd);
		return -1;
	}

	return fd;
}

#define ICMP_PAYLOAD_SIZE     100

/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
+138 −7
Original line number Diff line number Diff line
@@ -4,12 +4,20 @@
#include "test_xdp_context_test_run.skel.h"
#include "test_xdp_meta.skel.h"

#define TX_ADDR "10.0.0.1"
#define RX_ADDR "10.0.0.2"
#define RX_NAME "veth0"
#define TX_NAME "veth1"
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
#define TAP_NAME "tap0"
#define TAP_NETNS "xdp_context_tuntap"

#define TEST_PAYLOAD_LEN 32
static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
};

void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
			    __u32 data_meta, __u32 data, __u32 data_end,
@@ -112,7 +120,59 @@ void test_xdp_context_test_run(void)
	test_xdp_context_test_run__destroy(skel);
}

void test_xdp_context_functional(void)
static int send_test_packet(int ifindex)
{
	int n, sock = -1;
	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];

	/* The ethernet header is not relevant for this test and doesn't need to
	 * be meaningful.
	 */
	struct ethhdr eth = { 0 };

	memcpy(packet, &eth, sizeof(eth));
	memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);

	sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
	if (!ASSERT_GE(sock, 0, "socket"))
		goto err;

	struct sockaddr_ll saddr = {
		.sll_family = PF_PACKET,
		.sll_ifindex = ifindex,
		.sll_halen = ETH_ALEN
	};
	n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
		   sizeof(saddr));
	if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
		goto err;

	close(sock);
	return 0;

err:
	if (sock >= 0)
		close(sock);
	return -1;
}

static void assert_test_result(struct test_xdp_meta *skel)
{
	int err;
	__u32 map_key = 0;
	__u8 map_value[TEST_PAYLOAD_LEN];

	err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
				   sizeof(map_key), &map_value,
				   TEST_PAYLOAD_LEN, BPF_ANY);
	if (!ASSERT_OK(err, "lookup test_result"))
		return;

	ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
		     "test_result map contains test payload");
}

void test_xdp_context_veth(void)
{
	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
@@ -120,7 +180,7 @@ void test_xdp_context_functional(void)
	struct bpf_program *tc_prog, *xdp_prog;
	struct test_xdp_meta *skel = NULL;
	struct nstoken *nstoken = NULL;
	int rx_ifindex;
	int rx_ifindex, tx_ifindex;
	int ret;

	tx_ns = netns_new(TX_NETNS, false);
@@ -138,7 +198,6 @@ void test_xdp_context_functional(void)
	if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
		goto close;

	SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
	SYS(close, "ip link set dev " RX_NAME " up");

	skel = test_xdp_meta__open_and_load();
@@ -179,9 +238,17 @@ void test_xdp_context_functional(void)
	if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
		goto close;

	SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
	SYS(close, "ip link set dev " TX_NAME " up");
	ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");

	tx_ifindex = if_nametoindex(TX_NAME);
	if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
		goto close;

	ret = send_test_packet(tx_ifindex);
	if (!ASSERT_OK(ret, "send_test_packet"))
		goto close;

	assert_test_result(skel);

close:
	close_netns(nstoken);
@@ -190,3 +257,67 @@ void test_xdp_context_functional(void)
	netns_free(tx_ns);
}

void test_xdp_context_tuntap(void)
{
	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
	struct netns_obj *ns = NULL;
	struct test_xdp_meta *skel = NULL;
	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
	int tap_fd = -1;
	int tap_ifindex;
	int ret;

	ns = netns_new(TAP_NETNS, true);
	if (!ASSERT_OK_PTR(ns, "create and open ns"))
		return;

	tap_fd = open_tuntap(TAP_NAME, true);
	if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
		goto close;

	SYS(close, "ip link set dev " TAP_NAME " up");

	skel = test_xdp_meta__open_and_load();
	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
		goto close;

	tap_ifindex = if_nametoindex(TAP_NAME);
	if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
		goto close;

	tc_hook.ifindex = tap_ifindex;
	ret = bpf_tc_hook_create(&tc_hook);
	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
		goto close;

	tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
	ret = bpf_tc_attach(&tc_hook, &tc_opts);
	if (!ASSERT_OK(ret, "bpf_tc_attach"))
		goto close;

	ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
			     0, NULL);
	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
		goto close;

	/* The ethernet header is not relevant for this test and doesn't need to
	 * be meaningful.
	 */
	struct ethhdr eth = { 0 };

	memcpy(packet, &eth, sizeof(eth));
	memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);

	ret = write(tap_fd, packet, sizeof(packet));
	if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
		goto close;

	assert_test_result(skel);

close:
	if (tap_fd >= 0)
		close(tap_fd);
	test_xdp_meta__destroy(skel);
	netns_free(ns);
}
Loading