Commit 9bf412d4 authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'bpf-fix-wrong-copied_seq-calculation-and-add-tests'

Jiayuan Chen says:

====================
A previous commit described in this topic
http://lore.kernel.org/bpf/20230523025618.113937-9-john.fastabend@gmail.com
directly updated 'sk->copied_seq' in the tcp_eat_skb() function when the
action of a BPF program was SK_REDIRECT. For other actions, like SK_PASS,
the update logic for 'sk->copied_seq' was moved to
tcp_bpf_recvmsg_parser() to ensure the accuracy of the 'fionread' feature.

That commit works for a single stream_verdict scenario, as it also
modified 'sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb'
to remove updating 'sk->copied_seq'.

However, for programs where both stream_parser and stream_verdict are
active (strparser purpose), tcp_read_sock() was used instead of
tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock).
tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicated
updates.

In summary, for strparser + SK_PASS, copied_seq is redundantly calculated
in both tcp_read_sock() and tcp_bpf_recvmsg_parser().

The issue causes incorrect copied_seq calculations, which prevent
correct data reads from the recv() interface in user-land.

Also we added test cases for bpf + strparser and separated them from
sockmap_basic, as strparser has more encapsulation and parsing
capabilities compared to sockmap.
====================

Link: https://patch.msgid.link/20250122100917.49845-1-mrpre@163.com


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents bc27c52e 6fcfe96e
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ Functions
Callbacks
=========

There are six callbacks:
There are seven callbacks:

    ::

@@ -182,6 +182,13 @@ There are six callbacks:
    the length of the message. skb->len - offset may be greater
    then full_len since strparser does not trim the skb.

    ::

	int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
                     sk_read_actor_t recv_actor);

    The read_sock callback is used by strparser instead of
    sock->ops->read_sock, if provided.
    ::

	int (*read_sock_done)(struct strparser *strp, int err);
+2 −0
Original line number Diff line number Diff line
@@ -91,6 +91,8 @@ struct sk_psock {
	struct sk_psock_progs		progs;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
	struct strparser		strp;
	u32				copied_seq;
	u32				ingress_bytes;
#endif
	struct sk_buff_head		ingress_skb;
	struct list_head		ingress_msg;
+2 −0
Original line number Diff line number Diff line
@@ -43,6 +43,8 @@ struct strparser;
struct strp_callbacks {
	int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
	int (*read_sock)(struct strparser *strp, read_descriptor_t *desc,
			 sk_read_actor_t recv_actor);
	int (*read_sock_done)(struct strparser *strp, int err);
	void (*abort_parser)(struct strparser *strp, int err);
	void (*lock)(struct strparser *strp);
+8 −0
Original line number Diff line number Diff line
@@ -729,6 +729,9 @@ void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
		  sk_read_actor_t recv_actor);
int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
			sk_read_actor_t recv_actor, bool noack,
			u32 *copied_seq);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off);
void tcp_read_done(struct sock *sk, size_t len);
@@ -2599,6 +2602,11 @@ struct sk_psock;
#ifdef CONFIG_BPF_SYSCALL
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#ifdef CONFIG_BPF_STREAM_PARSER
struct strparser;
int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
			   sk_read_actor_t recv_actor);
#endif /* CONFIG_BPF_STREAM_PARSER */
#endif /* CONFIG_BPF_SYSCALL */

#ifdef CONFIG_INET
+7 −0
Original line number Diff line number Diff line
@@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
			return num_sge;
	}

#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
	psock->ingress_bytes += len;
#endif
	copied = len;
	msg->sg.start = 0;
	msg->sg.size = copied;
@@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
	if (!ret)
		sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);

	if (sk_is_tcp(sk)) {
		psock->strp.cb.read_sock = tcp_bpf_strp_read_sock;
		psock->copied_seq = tcp_sk(sk)->copied_seq;
	}
	return ret;
}

Loading