Commit 403fae59 authored by Jakub Sitnicki's avatar Jakub Sitnicki Committed by Martin KaFai Lau
Browse files

selftests/bpf: Cover metadata access from a modified skb clone



Demonstrate that, when processing an skb clone, the metadata gets truncated
if the program contains a direct write to either the payload or the
metadata, due to an implicit unclone in the prologue, and otherwise the
dynptr to the metadata is limited to being read-only.

Signed-off-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-9-8a39e636e0fb@cloudflare.com
parent bd1b51b3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
+113 −10
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
#define TAP_NAME "tap0"
#define DUMMY_NAME "dum0"
#define TAP_NETNS "xdp_context_tuntap"

#define TEST_PAYLOAD_LEN 32
@@ -156,6 +157,22 @@ static int send_test_packet(int ifindex)
	return -1;
}

static int write_test_packet(int tap_fd)
{
	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
	int n;

	/* The ethernet header doesn't need to be valid for this test */
	memset(packet, 0, sizeof(struct ethhdr));
	memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);

	n = write(tap_fd, packet, sizeof(packet));
	if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
		return -1;

	return 0;
}

static void assert_test_result(const struct bpf_map *result_map)
{
	int err;
@@ -276,7 +293,6 @@ static void test_tuntap(struct bpf_program *xdp_prog,
	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
	struct netns_obj *ns = NULL;
	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
	int tap_fd = -1;
	int tap_ifindex;
	int ret;
@@ -322,19 +338,82 @@ static void test_tuntap(struct bpf_program *xdp_prog,
	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
		goto close;

	/* The ethernet header is not relevant for this test and doesn't need to
	 * be meaningful.
	 */
	struct ethhdr eth = { 0 };
	ret = write_test_packet(tap_fd);
	if (!ASSERT_OK(ret, "write_test_packet"))
		goto close;

	memcpy(packet, &eth, sizeof(eth));
	memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
	assert_test_result(result_map);

close:
	if (tap_fd >= 0)
		close(tap_fd);
	netns_free(ns);
}

/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
static void test_tuntap_mirred(struct bpf_program *xdp_prog,
			       struct bpf_program *tc_prog,
			       bool *test_pass)
{
	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
	struct netns_obj *ns = NULL;
	int dummy_ifindex;
	int tap_fd = -1;
	int tap_ifindex;
	int ret;

	ret = write(tap_fd, packet, sizeof(packet));
	if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
	*test_pass = false;

	ns = netns_new(TAP_NETNS, true);
	if (!ASSERT_OK_PTR(ns, "netns_new"))
		return;

	/* Setup dummy interface */
	SYS(close, "ip link add name " DUMMY_NAME " type dummy");
	SYS(close, "ip link set dev " DUMMY_NAME " up");

	dummy_ifindex = if_nametoindex(DUMMY_NAME);
	if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
		goto close;

	assert_test_result(result_map);
	tc_hook.ifindex = dummy_ifindex;
	ret = bpf_tc_hook_create(&tc_hook);
	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
		goto close;

	tc_opts.prog_fd = bpf_program__fd(tc_prog);
	ret = bpf_tc_attach(&tc_hook, &tc_opts);
	if (!ASSERT_OK(ret, "bpf_tc_attach"))
		goto close;

	/* Setup TAP interface */
	tap_fd = open_tuntap(TAP_NAME, true);
	if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
		goto close;

	SYS(close, "ip link set dev " TAP_NAME " up");

	tap_ifindex = if_nametoindex(TAP_NAME);
	if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
		goto close;

	ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
		goto close;

	/* Copy all packets received from TAP to dummy ingress */
	SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
	SYS(close, "tc filter add dev " TAP_NAME " ingress "
		   "protocol all matchall "
		   "action mirred ingress mirror dev " DUMMY_NAME);

	/* Receive a packet on TAP */
	ret = write_test_packet(tap_fd);
	if (!ASSERT_OK(ret, "write_test_packet"))
		goto close;

	ASSERT_TRUE(*test_pass, "test_pass");

close:
	if (tap_fd >= 0)
@@ -385,6 +464,30 @@ void test_xdp_context_tuntap(void)
			    skel->progs.ing_cls_dynptr_offset_oob,
			    skel->progs.ing_cls,
			    skel->maps.test_result);
	if (test__start_subtest("clone_data_meta_empty_on_data_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_data_meta_empty_on_data_write,
				   &skel->bss->test_pass);
	if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_data_meta_empty_on_meta_write,
				   &skel->bss->test_pass);
	if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_dynptr_empty_on_data_slice_write,
				   &skel->bss->test_pass);
	if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_dynptr_empty_on_meta_slice_write,
				   &skel->bss->test_pass);
	if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
				   &skel->bss->test_pass);
	if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
		test_tuntap_mirred(skel->progs.ing_xdp,
				   skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
				   &skel->bss->test_pass);

	test_xdp_meta__destroy(skel);
}
+191 −0
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ struct {
	__uint(value_size, META_SIZE);
} test_result SEC(".maps");

bool test_pass;

SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
@@ -301,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx)
	return XDP_PASS;
}

/*
 * Check that skb->data_meta..skb->data is empty if prog writes to packet
 * _payload_ using packet pointers. Applies only to cloned skbs.
 */
SEC("tc")
int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
{
	struct ethhdr *eth = ctx_ptr(ctx, data);

	if (eth + 1 > ctx_ptr(ctx, data_end))
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	/* Expect no metadata */
	if (ctx->data_meta != ctx->data)
		goto out;

	/* Packet write to trigger unclone in prologue */
	eth->h_proto = 42;

	test_pass = true;
out:
	return TC_ACT_SHOT;
}

/*
 * Check that skb->data_meta..skb->data is empty if prog writes to packet
 * _metadata_ using packet pointers. Applies only to cloned skbs.
 */
SEC("tc")
int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
{
	struct ethhdr *eth = ctx_ptr(ctx, data);
	__u8 *md = ctx_ptr(ctx, data_meta);

	if (eth + 1 > ctx_ptr(ctx, data_end))
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	if (md + 1 > ctx_ptr(ctx, data)) {
		/* Expect no metadata */
		test_pass = true;
	} else {
		/* Metadata write to trigger unclone in prologue */
		*md = 42;
	}
out:
	return TC_ACT_SHOT;
}

/*
 * Check that skb_meta dynptr is writable but empty if prog writes to packet
 * _payload_ using a dynptr slice. Applies only to cloned skbs.
 */
SEC("tc")
int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
{
	struct bpf_dynptr data, meta;
	struct ethhdr *eth;

	bpf_dynptr_from_skb(ctx, 0, &data);
	eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
	if (!eth)
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	/* Expect no metadata */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
		goto out;

	/* Packet write to trigger unclone in prologue */
	eth->h_proto = 42;

	test_pass = true;
out:
	return TC_ACT_SHOT;
}

/*
 * Check that skb_meta dynptr is writable but empty if prog writes to packet
 * _metadata_ using a dynptr slice. Applies only to cloned skbs.
 */
SEC("tc")
int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
{
	struct bpf_dynptr data, meta;
	const struct ethhdr *eth;
	__u8 *md;

	bpf_dynptr_from_skb(ctx, 0, &data);
	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
	if (!eth)
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	/* Expect no metadata */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
		goto out;

	/* Metadata write to trigger unclone in prologue */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
	if (md)
		*md = 42;

	test_pass = true;
out:
	return TC_ACT_SHOT;
}

/*
 * Check that skb_meta dynptr is read-only before prog writes to packet payload
 * using dynptr_write helper. Applies only to cloned skbs.
 */
SEC("tc")
int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
{
	struct bpf_dynptr data, meta;
	const struct ethhdr *eth;

	bpf_dynptr_from_skb(ctx, 0, &data);
	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
	if (!eth)
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	/* Expect read-only metadata before unclone */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
		goto out;

	/* Helper write to payload will unclone the packet */
	bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);

	/* Expect no metadata after unclone */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
		goto out;

	test_pass = true;
out:
	return TC_ACT_SHOT;
}

/*
 * Check that skb_meta dynptr is read-only if prog writes to packet
 * metadata using dynptr_write helper. Applies only to cloned skbs.
 */
SEC("tc")
int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
{
	struct bpf_dynptr data, meta;
	const struct ethhdr *eth;

	bpf_dynptr_from_skb(ctx, 0, &data);
	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
	if (!eth)
		goto out;
	/* Ignore non-test packets */
	if (eth->h_proto != 0)
		goto out;

	/* Expect read-only metadata */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
		goto out;

	/* Metadata write. Expect failure. */
	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
	if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
		goto out;

	test_pass = true;
out:
	return TC_ACT_SHOT;
}

char _license[] SEC("license") = "GPL";