Commit 32155c6f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Daniel Borkmann says:

====================
pull-request: bpf-next 2025-06-27

We've added 6 non-merge commits during the last 8 day(s) which contain
a total of 6 files changed, 120 insertions(+), 20 deletions(-).

The main changes are:

1) Fix RCU usage in task_cls_state() for BPF programs using helpers like
   bpf_get_cgroup_classid_curr() outside of networking, from Charalampos
   Mitrodimas.

2) Fix a sockmap race between map_update and a pending workqueue from
   an earlier map_delete freeing the old psock where both pointed to the
   same psock->sk, from Jiayuan Chen.

3) Fix a data corruption issue when using bpf_msg_pop_data() in kTLS which
   failed to recalculate the ciphertext length, also from Jiayuan Chen.

4) Remove xdp_redirect_map{,_err} trace events since they are unused and
   also hide XDP trace events under CONFIG_BPF_SYSCALL, from Steven Rostedt.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  xdp: tracing: Hide some xdp events under CONFIG_BPF_SYSCALL
  xdp: Remove unused events xdp_redirect_map and xdp_redirect_map_err
  net, bpf: Fix RCU usage in task_cls_state() for BPF programs
  selftests/bpf: Add test to cover ktls with bpf_msg_pop_data
  bpf, ktls: Fix data corruption when using bpf_msg_pop_data() in ktls
  bpf, sockmap: Fix psock incorrectly pointing to sk
====================

Link: https://patch.msgid.link/20250626230111.24772-1-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 4cd9d227 16f3c7ad
Loading
Loading
Loading
Loading
+2 −19
Original line number Diff line number Diff line
@@ -168,25 +168,7 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
	 trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)

/* not used anymore, but kept around so as not to break old programs */
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
	TP_PROTO(const struct net_device *dev,
		 const struct bpf_prog *xdp,
		 const void *tgt, int err,
		 enum bpf_map_type map_type,
		 u32 map_id, u32 index),
	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
);

#ifdef CONFIG_BPF_SYSCALL
TRACE_EVENT(xdp_cpumap_kthread,

	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
@@ -300,6 +282,7 @@ TRACE_EVENT(xdp_devmap_xmit,
		  __entry->sent, __entry->drops,
		  __entry->err)
);
#endif /* CONFIG_BPF_SYSCALL */

/* Expect users already include <net/xdp.h>, but not xdp_priv.h */
#include <net/xdp_priv.h>
+3 −1
Original line number Diff line number Diff line
@@ -21,7 +21,9 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
	return css_cls_state(task_css_check(p, net_cls_cgrp_id,
					    rcu_read_lock_bh_held()));
					    rcu_read_lock_held() ||
					    rcu_read_lock_bh_held() ||
					    rcu_read_lock_trace_held()));
}
EXPORT_SYMBOL_GPL(task_cls_state);

+7 −0
Original line number Diff line number Diff line
@@ -656,6 +656,13 @@ static void sk_psock_backlog(struct work_struct *work)
	bool ingress;
	int ret;

	/* If sk is quickly removed from the map and then added back, the old
	 * psock should not be scheduled, because there are now two psocks
	 * pointing to the same sk.
	 */
	if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
		return;

	/* Increment the psock refcnt to synchronize with close(fd) path in
	 * sock_map_close(), ensuring we wait for backlog thread completion
	 * before sk_socket freed. If refcnt increment fails, it indicates
+13 −0
Original line number Diff line number Diff line
@@ -872,6 +872,19 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
		delta = msg->sg.size;
		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
		delta -= msg->sg.size;

		if ((s32)delta > 0) {
			/* It indicates that we executed bpf_msg_pop_data(),
			 * causing the plaintext data size to decrease.
			 * Therefore the encrypted data size also needs to
			 * correspondingly decrease. We only need to subtract
			 * delta to calculate the new ciphertext length since
			 * ktls does not support block encryption.
			 */
			struct sk_msg *enc = &ctx->open_rec->msg_encrypted;

			sk_msg_trim(sk, enc, enc->sg.size - delta);
		}
	}
	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
	    !enospc && !full_record) {
+91 −0
Original line number Diff line number Diff line
@@ -314,6 +314,95 @@ static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
	test_sockmap_ktls__destroy(skel);
}

static void test_sockmap_ktls_tx_pop(int family, int sotype)
{
	char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
	int c = 0, p = 0, one = 1, sent, recvd;
	struct test_sockmap_ktls *skel;
	int prog_fd, map_fd;
	char rcv[50] = {0};
	int err;
	int i, m, r;

	skel = test_sockmap_ktls__open_and_load();
	if (!ASSERT_TRUE(skel, "open ktls skel"))
		return;

	err = create_pair(family, sotype, &c, &p);
	if (!ASSERT_OK(err, "create_pair()"))
		goto out;

	prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
	map_fd = bpf_map__fd(skel->maps.sock_map);

	err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
	if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
		goto out;

	err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
	if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
		goto out;

	err = init_ktls_pairs(c, p);
	if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
		goto out;

	struct {
		int	pop_start;
		int	pop_len;
	} pop_policy[] = {
		/* trim the start */
		{0, 2},
		{0, 10},
		{1, 2},
		{1, 10},
		/* trim the end */
		{35, 2},
		/* New entries should be added before this line */
		{-1, -1},
	};

	i = 0;
	while (pop_policy[i].pop_start >= 0) {
		skel->bss->pop_start = pop_policy[i].pop_start;
		skel->bss->pop_end =  pop_policy[i].pop_len;

		sent = send(c, msg, sizeof(msg), 0);
		if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
			goto out;

		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
		if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
			goto out;

		/* verify the data
		 * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
		 *                  |       |
		 *                  popped data
		 */
		for (m = 0, r = 0; m < sizeof(msg);) {
			/* skip checking the data that has been popped */
			if (m >= pop_policy[i].pop_start &&
			    m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
				m++;
				continue;
			}

			if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
				goto out;
			m++;
			r++;
		}
		i++;
	}
out:
	if (c)
		close(c);
	if (p)
		close(p);
	test_sockmap_ktls__destroy(skel);
}

static void run_tests(int family, enum bpf_map_type map_type)
{
	int map;
@@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype)
		test_sockmap_ktls_tx_cork(family, sotype, true);
	if (test__start_subtest("tls tx egress with no buf"))
		test_sockmap_ktls_tx_no_buf(family, sotype, true);
	if (test__start_subtest("tls tx with pop"))
		test_sockmap_ktls_tx_pop(family, sotype);
}

void test_sockmap_ktls(void)
Loading