Commit f0709263 authored by Michal Luczaj's avatar Michal Luczaj Committed by Martin KaFai Lau
Browse files

selftests/bpf: Add selftest for sockmap/hashmap redirection



Test redirection logic. All supported and unsupported redirect combinations
are tested for success and failure respectively.

BPF_MAP_TYPE_SOCKMAP
BPF_MAP_TYPE_SOCKHASH
	x
sk_msg-to-egress
sk_msg-to-ingress
sk_skb-to-egress
sk_skb-to-ingress
	x
AF_INET, SOCK_STREAM
AF_INET6, SOCK_STREAM
AF_INET, SOCK_DGRAM
AF_INET6, SOCK_DGRAM
AF_UNIX, SOCK_STREAM
AF_UNIX, SOCK_DGRAM
AF_VSOCK, SOCK_STREAM
AF_VSOCK, SOCK_SEQPACKET

Suggested-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: default avatarMichal Luczaj <mhal@rbox.co>
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20250515-selftests-sockmap-redir-v3-5-a1ea723f7e7e@rbox.co
parent f266905b
Loading
Loading
Loading
Loading
+465 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Test for sockmap/sockhash redirection.
 *
 * BPF_MAP_TYPE_SOCKMAP
 * BPF_MAP_TYPE_SOCKHASH
 *	x
 * sk_msg-to-egress
 * sk_msg-to-ingress
 * sk_skb-to-egress
 * sk_skb-to-ingress
 *	x
 * AF_INET, SOCK_STREAM
 * AF_INET6, SOCK_STREAM
 * AF_INET, SOCK_DGRAM
 * AF_INET6, SOCK_DGRAM
 * AF_UNIX, SOCK_STREAM
 * AF_UNIX, SOCK_DGRAM
 * AF_VSOCK, SOCK_STREAM
 * AF_VSOCK, SOCK_SEQPACKET
 */

#include <errno.h>
#include <error.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>

#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <linux/string.h>
#include <linux/vm_sockets.h>

#include <bpf/bpf.h>
#include <bpf/libbpf.h>

#include "linux/const.h"
#include "test_progs.h"
#include "sockmap_helpers.h"
#include "test_sockmap_redir.skel.h"

/* The meaning of SUPPORTED is "will redirect packet as expected".
 */
#define SUPPORTED		_BITUL(0)

/* Note on sk_skb-to-ingress ->af_vsock:
 *
 * Peer socket may receive the packet some time after the return from sendmsg().
 * In a typical usage scenario, recvmsg() will block until the redirected packet
 * appears in the destination queue, or timeout if the packet was dropped. By
 * that point, the verdict map has already been updated to reflect what has
 * happened.
 *
 * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
 * takes place. Which means we may race the execution of the verdict logic and
 * read map_verd before it has been updated, i.e. we might observe
 * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
 *
 * This confuses the selftest logic: if there was no packet dropped, where's the
 * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
 * (which implies the verdict program has not been ran) just re-read the verdict
 * map again.
 */
#define UNSUPPORTED_RACY_VERD	_BITUL(1)

enum prog_type {
	SK_MSG_EGRESS,
	SK_MSG_INGRESS,
	SK_SKB_EGRESS,
	SK_SKB_INGRESS,
};

enum {
	SEND_INNER = 0,
	SEND_OUTER,
};

enum {
	RECV_INNER = 0,
	RECV_OUTER,
};

struct maps {
	int in;
	int out;
	int verd;
};

struct combo_spec {
	enum prog_type prog_type;
	const char *in, *out;
};

struct redir_spec {
	const char *name;
	int idx_send;
	int idx_recv;
	enum prog_type prog_type;
};

struct socket_spec {
	int family;
	int sotype;
	int send_flags;
	int in[2];
	int out[2];
};

static int socket_spec_pairs(struct socket_spec *s)
{
	return create_socket_pairs(s->family, s->sotype,
				   &s->in[0], &s->out[0],
				   &s->in[1], &s->out[1]);
}

static void socket_spec_close(struct socket_spec *s)
{
	xclose(s->in[0]);
	xclose(s->in[1]);
	xclose(s->out[0]);
	xclose(s->out[1]);
}

static void get_redir_params(struct redir_spec *redir,
			     struct test_sockmap_redir *skel, int *prog_fd,
			     enum bpf_attach_type *attach_type,
			     int *redirect_flags)
{
	enum prog_type type = redir->prog_type;
	struct bpf_program *prog;
	bool sk_msg;

	sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
	prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;

	*prog_fd = bpf_program__fd(prog);
	*attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;

	if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
		*redirect_flags = BPF_F_INGRESS;
	else
		*redirect_flags = 0;
}

static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
{
	ssize_t n;
	char buf;

	errno = 0;
	n = recv(fd, &buf, 1, flags);
	if (n < 0 && expect_success)
		FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
	if (!n && !expect_success)
		FAIL("%s: expected failure: retval=%zd", prefix, n);
}

static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
			       int sd_recv, int map_verd, int status)
{
	unsigned int drop, pass;
	char recv_buf;
	ssize_t n;

get_verdict:
	if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
	    xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
		return;

	if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
		sched_yield();
		goto get_verdict;
	}

	if (pass != 0) {
		FAIL("unsupported: wanted verdict pass 0, have %u", pass);
		return;
	}

	/* If nothing was dropped, packet should have reached the peer */
	if (drop == 0) {
		errno = 0;
		n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
		if (n != 1)
			FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
	}

	/* Ensure queues are empty */
	try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
	if (sd_in != sd_send)
		try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);

	try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
	if (sd_recv != sd_out)
		try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
}

static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
				 int sd_in, int sd_out, int sd_recv,
				 struct maps *maps, int status)
{
	unsigned int drop, pass;
	char *send_buf = "ab";
	char recv_buf = '\0';
	ssize_t n, len = 1;

	/* Zero out the verdict map */
	if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
	    xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
		return;

	if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
		return;

	if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
		goto del_in;

	/* Last byte is OOB data when send_flags has MSG_OOB bit set */
	if (send_flags & MSG_OOB)
		len++;
	n = send(sd_send, send_buf, len, send_flags);
	if (n >= 0 && n < len)
		FAIL("incomplete send");
	if (n < 0) {
		/* sk_msg redirect combo not supported? */
		if (status & SUPPORTED || errno != EACCES)
			FAIL_ERRNO("send");
		goto out;
	}

	if (!(status & SUPPORTED)) {
		handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
				   maps->verd, status);
		goto out;
	}

	errno = 0;
	n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
	if (n != 1) {
		FAIL_ERRNO("recv_timeout()");
		goto out;
	}

	/* Check verdict _after_ recv(); af_vsock may need time to catch up */
	if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
	    xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
		goto out;

	if (drop != 0 || pass != 1)
		FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
		     drop, pass);

	if (recv_buf != send_buf[0])
		FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);

	if (send_flags & MSG_OOB) {
		/* Fail reading OOB while in sockmap */
		try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
			 MSG_OOB | MSG_DONTWAIT, false);

		/* Remove sd_out from sockmap */
		xbpf_map_delete_elem(maps->out, &u32(0));

		/* Check that OOB was dropped on redirect */
		try_recv("recv(sd_out, MSG_OOB)", sd_out,
			 MSG_OOB | MSG_DONTWAIT, false);

		goto del_in;
	}
out:
	xbpf_map_delete_elem(maps->out, &u32(0));
del_in:
	xbpf_map_delete_elem(maps->in, &u32(0));
}

static int is_redir_supported(enum prog_type type, const char *in,
			      const char *out)
{
	/* Matching based on strings returned by socket_kind_to_str():
	 * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
	 * Plus a wildcard: any
	 * Not in use: u_seq, v_dgr
	 */
	struct combo_spec *c, combos[] = {
		/* Send to local: TCP -> any, but vsock */
		{ SK_MSG_INGRESS,	"tcp",	"tcp"	},
		{ SK_MSG_INGRESS,	"tcp",	"udp"	},
		{ SK_MSG_INGRESS,	"tcp",	"u_str"	},
		{ SK_MSG_INGRESS,	"tcp",	"u_dgr"	},

		/* Send to egress: TCP -> TCP */
		{ SK_MSG_EGRESS,	"tcp",	"tcp"	},

		/* Ingress to egress: any -> any */
		{ SK_SKB_EGRESS,	"any",	"any"	},

		/* Ingress to local: any -> any, but vsock */
		{ SK_SKB_INGRESS,	"any",	"tcp"	},
		{ SK_SKB_INGRESS,	"any",	"udp"	},
		{ SK_SKB_INGRESS,	"any",	"u_str"	},
		{ SK_SKB_INGRESS,	"any",	"u_dgr"	},
	};

	for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
		if (c->prog_type == type &&
		    (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
		    (!strcmp(c->out, "any") || strstarts(out, c->out)))
			return SUPPORTED;
	}

	return 0;
}

static int get_support_status(enum prog_type type, const char *in,
			      const char *out)
{
	int status = is_redir_supported(type, in, out);

	if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
		status |= UNSUPPORTED_RACY_VERD;

	return status;
}

static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
			struct maps *maps, struct socket_spec *s_in,
			struct socket_spec *s_out)
{
	int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
	const char *in_str, *out_str;
	char s[MAX_TEST_NAME];

	fd_in = s_in->in[0];
	fd_out = s_out->out[0];
	fd_send = s_in->in[redir->idx_send];
	fd_peer = s_in->in[redir->idx_send ^ 1];
	fd_recv = s_out->out[redir->idx_recv];
	flags = s_in->send_flags;

	in_str = socket_kind_to_str(fd_in);
	out_str = socket_kind_to_str(fd_out);
	status = get_support_status(redir->prog_type, in_str, out_str);

	snprintf(s, sizeof(s),
		 "%-4s %-17s %-5s %s %-5s%6s",
		 /* hash sk_skb-to-ingress u_str → v_str (OOB) */
		 type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
		 redir->name,
		 in_str,
		 status & SUPPORTED ? "→" : " ",
		 out_str,
		 (flags & MSG_OOB) ? "(OOB)" : "");

	if (!test__start_subtest(s))
		return;

	test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
			     maps, status);
}

static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
		       struct maps *maps)
{
	struct socket_spec *s, sockets[] = {
		{ AF_INET, SOCK_STREAM },
		// { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
		{ AF_INET6, SOCK_STREAM },
		{ AF_INET, SOCK_DGRAM },
		{ AF_INET6, SOCK_DGRAM },
		{ AF_UNIX, SOCK_STREAM },
		{ AF_UNIX, SOCK_STREAM, MSG_OOB },
		{ AF_UNIX, SOCK_DGRAM },
		// { AF_UNIX, SOCK_SEQPACKET},	/* Unsupported BPF_MAP_UPDATE_ELEM */
		{ AF_VSOCK, SOCK_STREAM },
		// { AF_VSOCK, SOCK_DGRAM },	/* Unsupported socket() */
		{ AF_VSOCK, SOCK_SEQPACKET },
	};

	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
		if (socket_spec_pairs(s))
			goto out;

	/* Intra-proto */
	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
		test_socket(type, redir, maps, s, s);

	/* Cross-proto */
	for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
		for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
			struct socket_spec *out = &sockets[j];
			struct socket_spec *in = &sockets[i];

			/* Skip intra-proto and between variants */
			if (out->send_flags ||
			    (in->family == out->family &&
			     in->sotype == out->sotype))
				continue;

			test_socket(type, redir, maps, in, out);
		}
	}
out:
	while (--s >= sockets)
		socket_spec_close(s);
}

static void test_map(enum bpf_map_type type)
{
	struct redir_spec *r, redirs[] = {
		{ "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
		{ "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
		{ "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
		{ "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
	};

	for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
		enum bpf_attach_type attach_type;
		struct test_sockmap_redir *skel;
		struct maps maps;
		int prog_fd;

		skel = test_sockmap_redir__open_and_load();
		if (!skel) {
			FAIL("open_and_load");
			return;
		}

		switch (type) {
		case BPF_MAP_TYPE_SOCKMAP:
			maps.in = bpf_map__fd(skel->maps.nop_map);
			maps.out = bpf_map__fd(skel->maps.sock_map);
			break;
		case BPF_MAP_TYPE_SOCKHASH:
			maps.in = bpf_map__fd(skel->maps.nop_hash);
			maps.out = bpf_map__fd(skel->maps.sock_hash);
			break;
		default:
			FAIL("Unsupported bpf_map_type");
			return;
		}

		skel->bss->redirect_type = type;
		maps.verd = bpf_map__fd(skel->maps.verdict_map);
		get_redir_params(r, skel, &prog_fd, &attach_type,
				 &skel->bss->redirect_flags);

		if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
			return;

		test_redir(type, r, &maps);

		if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
			return;

		test_sockmap_redir__destroy(skel);
	}
}

void serial_test_sockmap_redir(void)
{
	test_map(BPF_MAP_TYPE_SOCKMAP);
	test_map(BPF_MAP_TYPE_SOCKHASH);
}