Commit 7fc2bf8d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Martin KaFai Lau says:

====================
pull-request: bpf-next 2025-11-10

We've added 19 non-merge commits during the last 3 day(s) which contain
a total of 22 files changed, 1345 insertions(+), 197 deletions(-).

The main changes are:

1) Preserve skb metadata after a TC BPF program has changed the skb,
   from Jakub Sitnicki.
   This allows a TC program at the end of a TC filter chain to still see
   the skb metadata, even if another TC program at the front of the chain
   has changed the skb using BPF helpers.

2) Initial af_smc bpf_struct_ops support to control the smc specific
   syn/synack options, from D. Wythe.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  bpf/selftests: Add selftest for bpf_smc_hs_ctrl
  net/smc: bpf: Introduce generic hook for handshake flow
  bpf: Export necessary symbols for modules with struct_ops
  selftests/bpf: Cover skb metadata access after bpf_skb_change_proto
  selftests/bpf: Cover skb metadata access after change_head/tail helper
  selftests/bpf: Cover skb metadata access after bpf_skb_adjust_room
  selftests/bpf: Cover skb metadata access after vlan push/pop helper
  selftests/bpf: Expect unclone to preserve skb metadata
  selftests/bpf: Dump skb metadata on verification failure
  selftests/bpf: Verify skb metadata in BPF instead of userspace
  bpf: Make bpf_skb_change_head helper metadata-safe
  bpf: Make bpf_skb_change_proto helper metadata-safe
  bpf: Make bpf_skb_adjust_room metadata-safe
  bpf: Make bpf_skb_vlan_push helper metadata-safe
  bpf: Make bpf_skb_vlan_pop helper metadata-safe
  vlan: Make vlan_remove_tag return nothing
  bpf: Unclone skb head on bpf_dynptr_write to skb metadata
  net: Preserve metadata on pskb_expand_head
  net: Helper to move packet data and metadata after skb_push/pull
====================

Link: https://patch.msgid.link/20251110232427.3929291-1-martin.lau@linux.dev


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 38f073a7 67f4cfb5
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -1781,6 +1781,8 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
		      void *buf, unsigned long len, bool flush);
int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
			       const void *from, u32 len, u64 flags);
void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
@@ -1817,6 +1819,13 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
{
}

static inline int __bpf_skb_meta_store_bytes(struct sk_buff *skb, u32 offset,
					     const void *from, u32 len,
					     u64 flags)
{
	return -EOPNOTSUPP;
}

static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
{
	return ERR_PTR(-EOPNOTSUPP);
+6 −7
Original line number Diff line number Diff line
@@ -355,16 +355,17 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
					  __be16 vlan_proto, u16 vlan_tci,
					  unsigned int mac_len)
{
	const u8 meta_len = mac_len > ETH_TLEN ? skb_metadata_len(skb) : 0;
	struct vlan_ethhdr *veth;

	if (skb_cow_head(skb, VLAN_HLEN) < 0)
	if (skb_cow_head(skb, meta_len + VLAN_HLEN) < 0)
		return -ENOMEM;

	skb_push(skb, VLAN_HLEN);

	/* Move the mac header sans proto to the beginning of the new header. */
	if (likely(mac_len > ETH_TLEN))
		memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
		skb_postpush_data_move(skb, VLAN_HLEN, mac_len - ETH_TLEN);
	if (skb_mac_header_was_set(skb))
		skb->mac_header -= VLAN_HLEN;

@@ -731,18 +732,16 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 *
 * Expects the skb to contain a VLAN tag in the payload, and to have skb->data
 * pointing at the MAC header.
 *
 * Returns: a new pointer to skb->data, or NULL on failure to pull.
 */
static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
static inline void vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci)
{
	struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);

	*vlan_tci = ntohs(vhdr->h_vlan_TCI);

	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
	vlan_set_encap_proto(skb, vhdr);
	return __skb_pull(skb, VLAN_HLEN);
	__skb_pull(skb, VLAN_HLEN);
	skb_postpull_data_move(skb, VLAN_HLEN, 2 * ETH_ALEN);
}

/**
+75 −0
Original line number Diff line number Diff line
@@ -4564,6 +4564,81 @@ static inline void skb_metadata_clear(struct sk_buff *skb)
	skb_metadata_set(skb, 0);
}

/**
 * skb_data_move - Move packet data and metadata after skb_push() or skb_pull().
 * @skb: packet to operate on
 * @len: number of bytes pushed or pulled from &sk_buff->data
 * @n: number of bytes to memmove() from pre-push/pull &sk_buff->data
 *
 * Moves @n bytes of packet data, can be zero, and all bytes of skb metadata.
 *
 * Assumes metadata is located immediately before &sk_buff->data prior to the
 * push/pull, and that sufficient headroom exists to hold it after an
 * skb_push(). Otherwise, metadata is cleared and a one-time warning is issued.
 *
 * Prefer skb_postpull_data_move() or skb_postpush_data_move() to calling this
 * helper directly.
 */
static inline void skb_data_move(struct sk_buff *skb, const int len,
				 const unsigned int n)
{
	const u8 meta_len = skb_metadata_len(skb);
	u8 *meta, *meta_end;

	if (!len || (!n && !meta_len))
		return;

	if (!meta_len)
		goto no_metadata;

	meta_end = skb_metadata_end(skb);
	meta = meta_end - meta_len;

	if (WARN_ON_ONCE(meta_end + len != skb->data ||
			 meta_len > skb_headroom(skb))) {
		skb_metadata_clear(skb);
		goto no_metadata;
	}

	memmove(meta + len, meta, meta_len + n);
	return;

no_metadata:
	memmove(skb->data, skb->data - len, n);
}

/**
 * skb_postpull_data_move - Move packet data and metadata after skb_pull().
 * @skb: packet to operate on
 * @len: number of bytes pulled from &sk_buff->data
 * @n: number of bytes to memmove() from pre-pull &sk_buff->data
 *
 * See skb_data_move() for details.
 */
static inline void skb_postpull_data_move(struct sk_buff *skb,
					  const unsigned int len,
					  const unsigned int n)
{
	DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
	skb_data_move(skb, len, n);
}

/**
 * skb_postpush_data_move - Move packet data and metadata after skb_push().
 * @skb: packet to operate on
 * @len: number of bytes pushed onto &sk_buff->data
 * @n: number of bytes to memmove() from pre-push &sk_buff->data
 *
 * See skb_data_move() for details.
 */
static inline void skb_postpush_data_move(struct sk_buff *skb,
					  const unsigned int len,
					  const unsigned int n)
{
	DEBUG_NET_WARN_ON_ONCE(len > INT_MAX);
	skb_data_move(skb, -len, n);
}

struct sk_buff *skb_clone_sk(struct sk_buff *skb);

#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+3 −0
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@ struct netns_smc {
#ifdef CONFIG_SYSCTL
	struct ctl_table_header		*smc_hdr;
#endif
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
	struct smc_hs_ctrl __rcu	*hs_ctrl;
#endif /* CONFIG_SMC_HS_CTRL_BPF */
	unsigned int			sysctl_autocorking_size;
	unsigned int			sysctl_smcr_buf_type;
	int				sysctl_smcr_testlink_time;
+53 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#include <linux/wait.h>
#include <linux/dibs.h>

struct tcp_sock;
struct inet_request_sock;
struct sock;

#define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
	u8 going_away : 1;
};

#define SMC_HS_CTRL_NAME_MAX 16

enum {
	/* ops can be inherit from init_net */
	SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,

	SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
};

struct smc_hs_ctrl {
	/* private */

	struct list_head list;
	struct module *owner;

	/* public */

	/* unique name */
	char name[SMC_HS_CTRL_NAME_MAX];
	int flags;

	/* Invoked before computing SMC option for SYN packets.
	 * We can control whether to set SMC options by returning various value.
	 * Return 0 to disable SMC, or return any other value to enable it.
	 */
	int (*syn_option)(struct tcp_sock *tp);

	/* Invoked before Set up SMC options for SYN-ACK packets
	 * We can control whether to respond SMC options by returning various
	 * value. Return 0 to disable SMC, or return any other value to enable
	 * it.
	 */
	int (*synack_option)(const struct tcp_sock *tp,
			     struct inet_request_sock *ireq);
};

#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
#define smc_call_hsbpf(init_val, tp, func, ...) ({				\
	typeof(init_val) __ret = (init_val);					\
	struct smc_hs_ctrl *ctrl;						\
	rcu_read_lock();							\
	ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl);	\
	if (ctrl && ctrl->func)							\
		__ret = ctrl->func(tp, ##__VA_ARGS__);				\
	rcu_read_unlock();							\
	__ret;									\
})
#else
#define smc_call_hsbpf(init_val, tp, ...)  ({ (void)(tp); (init_val); })
#endif /* CONFIG_SMC_HS_CTRL_BPF */

#endif	/* _SMC_H */
Loading