Commit 39ab2064 authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Daniel Borkmann says:

====================
pull-request: bpf-next 2024-10-14

The following pull-request contains BPF updates for your *net-next* tree.

We've added 21 non-merge commits during the last 18 day(s) which contain
a total of 21 files changed, 1185 insertions(+), 127 deletions(-).

The main changes are:

1) Put xsk sockets on a struct diet and add various cleanups. Overall, this helps
   to bump performance by 12% for some workloads, from Maciej Fijalkowski.

2) Extend BPF selftests to increase coverage of XDP features in combination
   with BPF cpumap, from Alexis Lothoré (eBPF Foundation).

3) Extend netkit with an option to delegate skb->{mark,priority} scrubbing to
   its BPF program, from Daniel Borkmann.

4) Make the bpf_get_netns_cookie() helper available also to tc(x) BPF programs,
   from Mahe Tardy.

5) Extend BPF selftests covering a BPF program setting socket options per MPTCP
   subflow, from Geliang Tang and Nicolas Rybowski.

bpf-next-for-netdev

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (21 commits)
  xsk: Use xsk_buff_pool directly for cq functions
  xsk: Wrap duplicated code to function
  xsk: Carry a copy of xdp_zc_max_segs within xsk_buff_pool
  xsk: Get rid of xdp_buff_xsk::orig_addr
  xsk: s/free_list_node/list_node/
  xsk: Get rid of xdp_buff_xsk::xskb_list_node
  selftests/bpf: check program redirect in xdp_cpumap_attach
  selftests/bpf: make xdp_cpumap_attach keep redirect prog attached
  selftests/bpf: fix bpf_map_redirect call for cpu map test
  selftests/bpf: add tcx netns cookie tests
  bpf: add get_netns_cookie helper to tc programs
  selftests/bpf: add missing header include for htons
  selftests/bpf: Extend netkit tests to validate skb meta data
  tools: Sync if_link.h uapi tooling header
  netkit: Add add netkit scrub support to rt_link.yaml
  netkit: Simplify netkit mode over to use NLA_POLICY_MAX
  netkit: Add option for scrubbing skb meta data
  bpf: Remove unused macro
  selftests/bpf: Add mptcp subflow subtest
  selftests/bpf: Add getsockopt to inspect mptcp subflow
  ...
====================

Link: https://patch.msgid.link/20241014211110.16562-1-daniel@iogearbox.net


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents de306f00 e6c4047f
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -920,6 +920,13 @@ definitions:
      - name: l2
      - name: l3

  -
    name: netkit-scrub
    type: enum
    entries:
      - name: none
      - name: default

attribute-sets:
  -
    name: link-attrs
@@ -2151,6 +2158,14 @@ attribute-sets:
        name: mode
        type: u32
        enum: netkit-mode
      -
        name: scrub
        type: u32
        enum: netkit-scrub
      -
        name: peer-scrub
        type: u32
        enum: netkit-scrub

sub-messages:
  -
+1 −1
Original line number Diff line number Diff line
@@ -16300,7 +16300,7 @@ F: include/net/mptcp.h
F:	include/trace/events/mptcp.h
F:	include/uapi/linux/mptcp*.h
F:	net/mptcp/
F:	tools/testing/selftests/bpf/*/*mptcp*.c
F:	tools/testing/selftests/bpf/*/*mptcp*.[ch]
F:	tools/testing/selftests/net/mptcp/
NETWORKING [TCP]
+57 −34
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ struct netkit {
	struct net_device __rcu *peer;
	struct bpf_mprog_entry __rcu *active;
	enum netkit_action policy;
	enum netkit_scrub scrub;
	struct bpf_mprog_bundle	bundle;

	/* Needed in slow-path */
@@ -50,12 +51,24 @@ netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
	return ret;
}

static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
static void netkit_xnet(struct sk_buff *skb)
{
	skb_scrub_packet(skb, xnet);
	skb->priority = 0;
	skb->mark = 0;
}

static void netkit_prep_forward(struct sk_buff *skb,
				bool xnet, bool xnet_scrub)
{
	skb_scrub_packet(skb, false);
	nf_skip_egress(skb, true);
	skb_reset_mac_header(skb);
	if (!xnet)
		return;
	ipvs_reset(skb);
	skb_clear_tstamp(skb);
	if (xnet_scrub)
		netkit_xnet(skb);
}

static struct netkit *netkit_priv(const struct net_device *dev)
@@ -80,7 +93,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
		     !pskb_may_pull(skb, ETH_HLEN) ||
		     skb_orphan_frags(skb, GFP_ATOMIC)))
		goto drop;
	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
			    nk->scrub);
	eth_skb_pkt_type(skb, peer);
	skb->dev = peer;
	entry = rcu_dereference(nk->active);
@@ -297,20 +311,6 @@ static int netkit_check_policy(int policy, struct nlattr *tb,
	}
}

static int netkit_check_mode(int mode, struct nlattr *tb,
			     struct netlink_ext_ack *extack)
{
	switch (mode) {
	case NETKIT_L2:
	case NETKIT_L3:
		return 0;
	default:
		NL_SET_ERR_MSG_ATTR(extack, tb,
				    "Provided device mode can only be L2 or L3");
		return -EINVAL;
	}
}

static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
			   struct netlink_ext_ack *extack)
{
@@ -332,8 +332,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
			   struct netlink_ext_ack *extack)
{
	struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
	enum netkit_action default_prim = NETKIT_PASS;
	enum netkit_action default_peer = NETKIT_PASS;
	enum netkit_action policy_prim = NETKIT_PASS;
	enum netkit_action policy_peer = NETKIT_PASS;
	enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
	enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
	enum netkit_mode mode = NETKIT_L3;
	unsigned char ifname_assign_type;
	struct ifinfomsg *ifmp = NULL;
@@ -344,13 +346,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
	int err;

	if (data) {
		if (data[IFLA_NETKIT_MODE]) {
			attr = data[IFLA_NETKIT_MODE];
			mode = nla_get_u32(attr);
			err = netkit_check_mode(mode, attr, extack);
			if (err < 0)
				return err;
		}
		if (data[IFLA_NETKIT_MODE])
			mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
		if (data[IFLA_NETKIT_PEER_INFO]) {
			attr = data[IFLA_NETKIT_PEER_INFO];
			ifmp = nla_data(attr);
@@ -362,17 +359,21 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
				return err;
			tbp = peer_tb;
		}
		if (data[IFLA_NETKIT_SCRUB])
			scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
		if (data[IFLA_NETKIT_PEER_SCRUB])
			scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
		if (data[IFLA_NETKIT_POLICY]) {
			attr = data[IFLA_NETKIT_POLICY];
			default_prim = nla_get_u32(attr);
			err = netkit_check_policy(default_prim, attr, extack);
			policy_prim = nla_get_u32(attr);
			err = netkit_check_policy(policy_prim, attr, extack);
			if (err < 0)
				return err;
		}
		if (data[IFLA_NETKIT_PEER_POLICY]) {
			attr = data[IFLA_NETKIT_PEER_POLICY];
			default_peer = nla_get_u32(attr);
			err = netkit_check_policy(default_peer, attr, extack);
			policy_peer = nla_get_u32(attr);
			err = netkit_check_policy(policy_peer, attr, extack);
			if (err < 0)
				return err;
		}
@@ -409,7 +410,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

	nk = netkit_priv(peer);
	nk->primary = false;
	nk->policy = default_peer;
	nk->policy = policy_peer;
	nk->scrub = scrub_peer;
	nk->mode = mode;
	bpf_mprog_bundle_init(&nk->bundle);

@@ -434,7 +436,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

	nk = netkit_priv(dev);
	nk->primary = true;
	nk->policy = default_prim;
	nk->policy = policy_prim;
	nk->scrub = scrub_prim;
	nk->mode = mode;
	bpf_mprog_bundle_init(&nk->bundle);

@@ -874,6 +877,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
		return -EACCES;
	}

	if (data[IFLA_NETKIT_SCRUB]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
				    "netkit scrubbing cannot be changed after device creation");
		return -EACCES;
	}

	if (data[IFLA_NETKIT_PEER_SCRUB]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
				    "netkit scrubbing cannot be changed after device creation");
		return -EACCES;
	}

	if (data[IFLA_NETKIT_PEER_INFO]) {
		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
				    "netkit peer info cannot be changed after device creation");
@@ -908,8 +923,10 @@ static size_t netkit_get_size(const struct net_device *dev)
{
	return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
	       nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
	       nla_total_size(sizeof(u8))  + /* IFLA_NETKIT_PRIMARY */
	       0;
}

@@ -924,11 +941,15 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
		return -EMSGSIZE;
	if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
		return -EMSGSIZE;
	if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
		return -EMSGSIZE;

	if (peer) {
		nk = netkit_priv(peer);
		if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
			return -EMSGSIZE;
		if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
			return -EMSGSIZE;
	}

	return 0;
@@ -936,9 +957,11 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)

static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
	[IFLA_NETKIT_PEER_INFO]		= { .len = sizeof(struct ifinfomsg) },
	[IFLA_NETKIT_MODE]		= NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
	[IFLA_NETKIT_POLICY]		= { .type = NLA_U32 },
	[IFLA_NETKIT_MODE]		= { .type = NLA_U32 },
	[IFLA_NETKIT_PEER_POLICY]	= { .type = NLA_U32 },
	[IFLA_NETKIT_SCRUB]		= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
	[IFLA_NETKIT_PEER_SCRUB]	= NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
	[IFLA_NETKIT_PRIMARY]		= { .type = NLA_REJECT,
					    .reject_message = "Primary attribute is read-only" },
};
+7 −7
Original line number Diff line number Diff line
@@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
	if (likely(!xdp_buff_has_frags(xdp)))
		goto out;

	list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
		list_del(&pos->xskb_list_node);
	list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
		list_del(&pos->list_node);
		xp_free(pos);
	}

@@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
{
	struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);

	list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
	list_add_tail(&frag->list_node, &frag->pool->xskb_list);
}

static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
@@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
	struct xdp_buff_xsk *frag;

	frag = list_first_entry_or_null(&xskb->pool->xskb_list,
					struct xdp_buff_xsk, xskb_list_node);
					struct xdp_buff_xsk, list_node);
	if (frag) {
		list_del(&frag->xskb_list_node);
		list_del(&frag->list_node);
		ret = &frag->xdp;
	}

@@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail)
{
	struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);

	list_del(&xskb->xskb_list_node);
	list_del(&xskb->list_node);
}

static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
@@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
	struct xdp_buff_xsk *frag;

	frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
			       xskb_list_node);
			       list_node);
	return &frag->xdp;
}

+13 −10
Original line number Diff line number Diff line
@@ -28,9 +28,7 @@ struct xdp_buff_xsk {
	dma_addr_t dma;
	dma_addr_t frame_dma;
	struct xsk_buff_pool *pool;
	u64 orig_addr;
	struct list_head free_list_node;
	struct list_head xskb_list_node;
	struct list_head list_node;
};

#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
@@ -78,6 +76,7 @@ struct xsk_buff_pool {
	u32 chunk_size;
	u32 chunk_shift;
	u32 frame_len;
	u32 xdp_zc_max_segs;
	u8 tx_metadata_len; /* inherited from umem */
	u8 cached_need_wakeup;
	bool uses_need_wakeup;
@@ -120,7 +119,6 @@ void xp_free(struct xdp_buff_xsk *xskb);
static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
				     u64 addr)
{
	xskb->orig_addr = addr;
	xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
}

@@ -222,14 +220,19 @@ static inline void xp_release(struct xdp_buff_xsk *xskb)
		xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
}

static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb,
				struct xsk_buff_pool *pool)
{
	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
	u64 orig_addr = xskb->xdp.data - pool->addrs;
	u64 offset;

	offset += xskb->pool->headroom;
	if (!xskb->pool->unaligned)
		return xskb->orig_addr + offset;
	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
	if (!pool->unaligned)
		return orig_addr;

	offset = xskb->xdp.data - xskb->xdp.data_hard_start;
	orig_addr -= offset;
	offset += pool->headroom;
	return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}

static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
Loading