Commit d137af87 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'netkit: Add option for scrubbing skb meta data'

Daniel Borkmann says:

=====================
This series is to add a NETKIT_SCRUB_NONE mode such that
the netkit device will not scrub the skb->{mark, priority} before
running the netkit bpf prog. This will allow the netkit bpf prog to
implement different policies based on the skb->{mark, priority}.

The default mode NETKIT_SCRUB_DEFAULT will always scrub
the skb->{mark, priority} before calling the netkit bpf prog. This
is the existing behavior of the netkit device and this change
will not affect the existing netkit users.
=====================

Link: https://lore.kernel.org/r/20241004101335.117711-1-daniel@iogearbox.net


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 8f5b408d 716fa7da
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -920,6 +920,13 @@ definitions:
      - name: l2
      - name: l3

  -
    name: netkit-scrub
    type: enum
    entries:
      - name: none
      - name: default

attribute-sets:
  -
    name: link-attrs
@@ -2147,6 +2154,14 @@ attribute-sets:
        name: mode
        type: u32
        enum: netkit-mode
      -
        name: scrub
        type: u32
        enum: netkit-scrub
      -
        name: peer-scrub
        type: u32
        enum: netkit-scrub

sub-messages:
  -
+57 −34
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ struct netkit {
	struct net_device __rcu *peer;
	struct bpf_mprog_entry __rcu *active;
	enum netkit_action policy;
	enum netkit_scrub scrub;
	struct bpf_mprog_bundle	bundle;

	/* Needed in slow-path */
@@ -50,12 +51,24 @@ netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
	return ret;
}

static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
static void netkit_xnet(struct sk_buff *skb)
{
	skb_scrub_packet(skb, xnet);
	skb->priority = 0;
	skb->mark = 0;
}

static void netkit_prep_forward(struct sk_buff *skb,
				bool xnet, bool xnet_scrub)
{
	skb_scrub_packet(skb, false);
	nf_skip_egress(skb, true);
	skb_reset_mac_header(skb);
	if (!xnet)
		return;
	ipvs_reset(skb);
	skb_clear_tstamp(skb);
	if (xnet_scrub)
		netkit_xnet(skb);
}

static struct netkit *netkit_priv(const struct net_device *dev)
@@ -80,7 +93,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
		     !pskb_may_pull(skb, ETH_HLEN) ||
		     skb_orphan_frags(skb, GFP_ATOMIC)))
		goto drop;
	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
			    nk->scrub);
	eth_skb_pkt_type(skb, peer);
	skb->dev = peer;
	entry = rcu_dereference(nk->active);
@@ -297,20 +311,6 @@ static int netkit_check_policy(int policy, struct nlattr *tb,
	}
}

static int netkit_check_mode(int mode, struct nlattr *tb,
			     struct netlink_ext_ack *extack)
{
	switch (mode) {
	case NETKIT_L2:
	case NETKIT_L3:
		return 0;
	default:
		NL_SET_ERR_MSG_ATTR(extack, tb,
				    "Provided device mode can only be L2 or L3");
		return -EINVAL;
	}
}

static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
			   struct netlink_ext_ack *extack)
{
@@ -332,8 +332,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
			   struct netlink_ext_ack *extack)
{
	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
	enum netkit_action default_prim = NETKIT_PASS;
	enum netkit_action default_peer = NETKIT_PASS;
	enum netkit_action policy_prim = NETKIT_PASS;
	enum netkit_action policy_peer = NETKIT_PASS;
	enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
	enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
	enum netkit_mode mode = NETKIT_L3;
	unsigned char ifname_assign_type;
	struct ifinfomsg *ifmp = NULL;
@@ -344,13 +346,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
	int err;

	if (data) {
		if (data[IFLA_NETKIT_MODE]) {
			attr = data[IFLA_NETKIT_MODE];
			mode = nla_get_u32(attr);
			err = netkit_check_mode(mode, attr, extack);
			if (err < 0)
				return err;
		}
		if (data[IFLA_NETKIT_MODE])
			mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
		if (data[IFLA_NETKIT_PEER_INFO]) {
			attr = data[IFLA_NETKIT_PEER_INFO];
			ifmp = nla_data(attr);
@@ -362,17 +359,21 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
				return err;
			tbp = peer_tb;
		}
		if (data[IFLA_NETKIT_SCRUB])
			scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
		if (data[IFLA_NETKIT_PEER_SCRUB])
			scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
		if (data[IFLA_NETKIT_POLICY]) {
			attr = data[IFLA_NETKIT_POLICY];
			default_prim = nla_get_u32(attr);
			err = netkit_check_policy(default_prim, attr, extack);
			policy_prim = nla_get_u32(attr);
			err = netkit_check_policy(policy_prim, attr, extack);
			if (err < 0)
				return err;
		}
		if (data[IFLA_NETKIT_PEER_POLICY]) {
			attr = data[IFLA_NETKIT_PEER_POLICY];
			default_peer = nla_get_u32(attr);
			err = netkit_check_policy(default_peer, attr, extack);
			policy_peer = nla_get_u32(attr);
			err = netkit_check_policy(policy_peer, attr, extack);
			if (err < 0)
				return err;
		}
@@ -409,7 +410,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

	nk = netkit_priv(peer);
	nk->primary = false;
	nk->policy = default_peer;
	nk->policy = policy_peer;
	nk->scrub = scrub_peer;
	nk->mode = mode;
	bpf_mprog_bundle_init(&nk->bundle);

@@ -434,7 +436,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

	nk = netkit_priv(dev);
	nk->primary = true;
	nk->policy = default_prim;
	nk->policy = policy_prim;
	nk->scrub = scrub_prim;
	nk->mode = mode;
	bpf_mprog_bundle_init(&nk->bundle);

@@ -874,6 +877,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
		return -EACCES;
	}

	if (data[IFLA_NETKIT_SCRUB]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
				    "netkit scrubbing cannot be changed after device creation");
		return -EACCES;
	}

	if (data[IFLA_NETKIT_PEER_SCRUB]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
				    "netkit scrubbing cannot be changed after device creation");
		return -EACCES;
	}

	if (data[IFLA_NETKIT_PEER_INFO]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
				    "netkit peer info cannot be changed after device creation");
@@ -908,8 +923,10 @@ static size_t netkit_get_size(const struct net_device *dev)
{
	return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
	       0;
}

@@ -924,11 +941,15 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
		return -EMSGSIZE;
	if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
		return -EMSGSIZE;
	if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
		return -EMSGSIZE;

	if (peer) {
		nk = netkit_priv(peer);
		if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
			return -EMSGSIZE;
		if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
			return -EMSGSIZE;
	}

	return 0;
@@ -936,9 +957,11 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)

static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
	[IFLA_NETKIT_PEER_INFO]		= { .len = sizeof(struct ifinfomsg) },
	[IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
	[IFLA_NETKIT_POLICY]		= { .type = NLA_U32 },
	[IFLA_NETKIT_MODE]		= { .type = NLA_U32 },
	[IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 },
	[IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
	[IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
	[IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT,
					    .reject_message = "Primary attribute is read-only" },
};
+15 −0
Original line number Diff line number Diff line
@@ -1292,6 +1292,19 @@ enum netkit_mode {
	NETKIT_L3,
};

/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
 * the BPF program if attached. This also means the latter can
 * consume the two fields if they were populated earlier.
 *
 * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
 * invoking the attached BPF program when the peer device resides
 * in a different network namespace. This is the default behavior.
 */
enum netkit_scrub {
	NETKIT_SCRUB_NONE,
	NETKIT_SCRUB_DEFAULT,
};

enum {
	IFLA_NETKIT_UNSPEC,
	IFLA_NETKIT_PEER_INFO,
@@ -1299,6 +1312,8 @@ enum {
	IFLA_NETKIT_POLICY,
	IFLA_NETKIT_PEER_POLICY,
	IFLA_NETKIT_MODE,
	IFLA_NETKIT_SCRUB,
	IFLA_NETKIT_PEER_SCRUB,
	__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX	(__IFLA_NETKIT_MAX - 1)
+552 −1

File changed.

Preview size limit exceeded, changes collapsed.

+85 −9
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"

#define MARK		42
#define PRIO		0xeb9f
#define ICMP_ECHO	8

struct icmphdr {
@@ -33,7 +35,7 @@ struct iplink_req {
};

static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
			 bool same_netns)
			 bool same_netns, int scrub, int peer_scrub)
{
	struct rtnl_handle rth = { .fd = -1 };
	struct iplink_req req = {};
@@ -58,6 +60,8 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
	addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
	addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
	addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
	addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
	addattr_nest_end(&req.n, data);
	addattr_nest_end(&req.n, linkinfo);
@@ -118,9 +122,9 @@ static void destroy_netkit(void)

static int __send_icmp(__u32 dest)
{
	int sock, ret, mark = MARK, prio = PRIO;
	struct sockaddr_in addr;
	struct icmphdr icmp;
	int sock, ret;

	ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
	if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
@@ -135,6 +139,15 @@ static int __send_icmp(__u32 dest)
	if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
		goto out;

	ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
	if (!ASSERT_OK(ret, "setsockopt(SO_MARK)"))
		goto out;

	ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
			 &prio, sizeof(prio));
	if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)"))
		goto out;

	memset(&addr, 0, sizeof(addr));
	addr.sin_family = AF_INET;
	addr.sin_addr.s_addr = htonl(dest);
@@ -171,7 +184,8 @@ void serial_test_tc_netkit_basic(void)
	int err, ifindex;

	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, false);
			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -285,7 +299,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
	int err, ifindex;

	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, false);
			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -413,7 +428,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
	int err, ifindex;

	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, false);
			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -527,7 +543,8 @@ void serial_test_tc_netkit_device(void)
	int err, ifindex, ifindex2;

	err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, true);
			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -638,7 +655,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
	int err, ifindex;

	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, false);
			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -715,7 +733,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
	struct bpf_link *link;

	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, true);
			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
			    NETKIT_SCRUB_DEFAULT);
	if (err)
		return;

@@ -779,3 +798,60 @@ void serial_test_tc_netkit_pkt_type(void)
	serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
	serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
}

static void serial_test_tc_netkit_scrub_type(int scrub)
{
	LIBBPF_OPTS(bpf_netkit_opts, optl);
	struct test_tc_link *skel;
	struct bpf_link *link;
	int err, ifindex;

	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
			    &ifindex, false, scrub, scrub);
	if (err)
		return;

	skel = test_tc_link__open();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
		goto cleanup;

	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8,
		  BPF_NETKIT_PRIMARY), 0, "tc8_attach_type");

	err = test_tc_link__load(skel);
	if (!ASSERT_OK(err, "skel_load"))
		goto cleanup;

	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);

	ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8");

	link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl);
	if (!ASSERT_OK_PTR(link, "link_attach"))
		goto cleanup;

	skel->links.tc8 = link;

	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);

	tc_skel_reset_all_seen(skel);
	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");

	ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
	ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
	ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
cleanup:
	test_tc_link__destroy(skel);

	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
	destroy_netkit();
}

void serial_test_tc_netkit_scrub(void)
{
	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
}
Loading