Commit e076cd7b authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'selftests/bpf: new MPTCP subflow subtest'

Matthieu Baerts says:

====================
In this series from Geliang, modifying MPTCP BPF selftests, we have:

- A new MPTCP subflow BPF program setting socket options per subflow: it
  looks better to have this old test program in the BPF selftests to
  track regressions and to serve as example.

  Note: Nicolas is no longer working at Tessares, but he did this work
  while working for them, and his email address is no longer available.

- A new hook in the same BPF program to do the verification step.

- A new MPTCP BPF subtest validating the new BPF program added in the
  first patch, with the help of the new hook added in the second patch.

---
Changes in v7:
- Patch 2/3: use 'can_loop' instead of 'cond_break'. (Martin)
- Patch 3/3: use bpf_program__attach_cgroup(). (Martin)
- Link to v6: https://lore.kernel.org/r/20240911-upstream-bpf-next-20240506-mptcp-subflow-test-v6-0-7872294c466b@kernel.org

Changes in v6:
- Patch 3/3: use usleep() instead of sleep()
- Series: rebased on top of bpf-next/net
- Link to v5: https://lore.kernel.org/r/20240910-upstream-bpf-next-20240506-mptcp-subflow-test-v5-0-2c664a7da47c@kernel.org

Changes in v5:
- See the individual changelog for more details about them
- Patch 1/3: set TCP on the 2nd subflow
- Patch 2/3: new
- Patch 3/3: use the BPF program from patch 2/3 to do the validation
             instead of using ss.
- Series: rebased on top of bpf-next/net
- Link to v4: https://lore.kernel.org/r/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-0-2b4ca6994993@kernel.org

Changes in v4:
- Drop former patch 2/3: MPTCP's pm_nl_ctl requires a new header file:
  - I will check later if it is possible to avoid having duplicated
    header files in tools/include/uapi, but no need to block this series
    for that. Patch 2/3 can be added later if needed.
- Patch 2/2: skip the test if 'ip mptcp' is not available.
- Link to v3: https://lore.kernel.org/r/20240703-upstream-bpf-next-20240506-mptcp-subflow-test-v3-0-ebdc2d494049@kernel.org

Changes in v3:
- Sorry for the delay between v2 and v3, this series was conflicting
  with the "add netns helpers", but it looks like it is on hold:
  https://lore.kernel.org/cover.1715821541.git.tanggeliang@kylinos.cn
- Patch 1/3 includes "bpf_tracing_net.h", introduced in between.
- New patch 2/3: "selftests/bpf: Add mptcp pm_nl_ctl link".
- Patch 3/3: use the tool introduced in patch 2/3 + SYS_NOFAIL() helper.
- Link to v2: https://lore.kernel.org/r/20240509-upstream-bpf-next-20240506-mptcp-subflow-test-v2-0-4048c2948665@kernel.org

Changes in v2:
- Previous patches 1/4 and 2/4 have been dropped from this series:
  - 1/4: "selftests/bpf: Handle SIGINT when creating netns":
    - A new version, more generic and no longer specific to MPTCP BPF
      selftest will be sent later, as part of a new series. (Alexei)
  - 2/4: "selftests/bpf: Add RUN_MPTCP_TEST macro":
    - Removed, not to hide helper functions in macros. (Alexei)
- The commit message of patch 1/2 has been clarified to avoid some
  possible confusions spot by Alexei.
- Link to v1: https://lore.kernel.org/r/20240507-upstream-bpf-next-20240506-mptcp-subflow-test-v1-0-e2bcbdf49857@kernel.org



---
Geliang Tang (2):
      selftests/bpf: Add getsockopt to inspect mptcp subflow
      selftests/bpf: Add mptcp subflow subtest
====================

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents c824deb1 9b85f11e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -16281,7 +16281,7 @@ F: include/net/mptcp.h
F:	include/trace/events/mptcp.h
F:	include/uapi/linux/mptcp*.h
F:	net/mptcp/
F:	tools/testing/selftests/bpf/*/*mptcp*.c
F:	tools/testing/selftests/bpf/*/*mptcp*.[ch]
F:	tools/testing/selftests/net/mptcp/
NETWORKING [TCP]
+121 −0
Original line number Diff line number Diff line
@@ -5,12 +5,17 @@
#include <linux/const.h>
#include <netinet/in.h>
#include <test_progs.h>
#include <unistd.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
#include "mptcpify.skel.h"
#include "mptcp_subflow.skel.h"

#define NS_TEST "mptcp_ns"
#define ADDR_1	"10.0.1.1"
#define ADDR_2	"10.0.1.2"
#define PORT_1	10001

#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
@@ -335,10 +340,126 @@ static void test_mptcpify(void)
	close(cgroup_fd);
}

static int endpoint_init(char *flags)
{
	SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST);
	SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1);
	SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST);
	SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2);
	SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST);
	if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) {
		printf("'ip mptcp' not supported, skip this test.\n");
		test__skip();
		goto fail;
	}

	return 0;
fail:
	return -1;
}

static void wait_for_new_subflows(int fd)
{
	socklen_t len;
	u8 subflows;
	int err, i;

	len = sizeof(subflows);
	/* Wait max 5 sec for new subflows to be created */
	for (i = 0; i < 50; i++) {
		err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len);
		if (!err && subflows > 0)
			break;

		usleep(100000); /* 0.1s */
	}
}

static void run_subflow(void)
{
	int server_fd, client_fd, err;
	char new[TCP_CA_NAME_MAX];
	char cc[TCP_CA_NAME_MAX];
	unsigned int mark;
	socklen_t len;

	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
	if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
		return;

	client_fd = connect_to_fd(server_fd, 0);
	if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
		goto close_server;

	send_byte(client_fd);
	wait_for_new_subflows(client_fd);

	len = sizeof(mark);
	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
	if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))
		ASSERT_EQ(mark, 0, "mark");

	len = sizeof(new);
	err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len);
	if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) {
		get_msk_ca_name(cc);
		ASSERT_STREQ(new, cc, "cc");
	}

	close(client_fd);
close_server:
	close(server_fd);
}

static void test_subflow(void)
{
	struct mptcp_subflow *skel;
	struct nstoken *nstoken;
	int cgroup_fd;

	cgroup_fd = test__join_cgroup("/mptcp_subflow");
	if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow"))
		return;

	skel = mptcp_subflow__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow"))
		goto close_cgroup;

	skel->bss->pid = getpid();

	skel->links.mptcp_subflow =
		bpf_program__attach_cgroup(skel->progs.mptcp_subflow, cgroup_fd);
	if (!ASSERT_OK_PTR(skel->links.mptcp_subflow, "attach mptcp_subflow"))
		goto skel_destroy;

	skel->links._getsockopt_subflow =
		bpf_program__attach_cgroup(skel->progs._getsockopt_subflow, cgroup_fd);
	if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow"))
		goto skel_destroy;

	nstoken = create_netns();
	if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow"))
		goto skel_destroy;

	if (endpoint_init("subflow") < 0)
		goto close_netns;

	run_subflow();

close_netns:
	cleanup_netns(nstoken);
skel_destroy:
	mptcp_subflow__destroy(skel);
close_cgroup:
	close(cgroup_fd);
}

void test_mptcp(void)
{
	if (test__start_subtest("base"))
		test_base();
	if (test__start_subtest("mptcpify"))
		test_mptcpify();
	if (test__start_subtest("subflow"))
		test_subflow();
}
+42 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __MPTCP_BPF_H__
#define __MPTCP_BPF_H__

#include "bpf_experimental.h"

/* list helpers from include/linux/list.h */
static inline int list_is_head(const struct list_head *list,
			       const struct list_head *head)
{
	return list == head;
}

#define list_entry(ptr, type, member)					\
	container_of(ptr, type, member)

#define list_first_entry(ptr, type, member)				\
	list_entry((ptr)->next, type, member)

#define list_next_entry(pos, member)					\
	list_entry((pos)->member.next, typeof(*(pos)), member)

#define list_entry_is_head(pos, head, member)				\
	list_is_head(&pos->member, (head))

/* small difference: 'can_loop' has been added in the conditions */
#define list_for_each_entry(pos, head, member)				\
	for (pos = list_first_entry(head, typeof(*pos), member);	\
	     !list_entry_is_head(pos, head, member) && can_loop;	\
	     pos = list_next_entry(pos, member))

/* mptcp helpers from protocol.h */
#define mptcp_for_each_subflow(__msk, __subflow)			\
	list_for_each_entry(__subflow, &((__msk)->conn_list), node)

static __always_inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
	return subflow->tcp_sock;
}

#endif
+128 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020, Tessares SA. */
/* Copyright (c) 2024, Kylin Software */

/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
#include "mptcp_bpf.h"

char _license[] SEC("license") = "GPL";

char cc[TCP_CA_NAME_MAX] = "reno";
int pid;

/* Associate a subflow counter to each token */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
	__uint(max_entries, 100);
} mptcp_sf SEC(".maps");

SEC("sockops")
int mptcp_subflow(struct bpf_sock_ops *skops)
{
	__u32 init = 1, key, mark, *cnt;
	struct mptcp_sock *msk;
	struct bpf_sock *sk;
	int err;

	if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
		return 1;

	sk = skops->sk;
	if (!sk)
		return 1;

	msk = bpf_skc_to_mptcp_sock(sk);
	if (!msk)
		return 1;

	key = msk->token;
	cnt = bpf_map_lookup_elem(&mptcp_sf, &key);
	if (cnt) {
		/* A new subflow is added to an existing MPTCP connection */
		__sync_fetch_and_add(cnt, 1);
		mark = *cnt;
	} else {
		/* A new MPTCP connection is just initiated and this is its primary subflow */
		bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY);
		mark = init;
	}

	/* Set the mark of the subflow's socket based on appearance order */
	err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
	if (err < 0)
		return 1;
	if (mark == 2)
		err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX);

	return 1;
}

static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
{
	struct mptcp_subflow_context *subflow;
	int i = 0;

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk;

		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
							   struct mptcp_subflow_context));

		if (ssk->sk_mark != ++i) {
			ctx->retval = -2;
			break;
		}
	}

	return 1;
}

static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
{
	struct mptcp_subflow_context *subflow;

	mptcp_for_each_subflow(msk, subflow) {
		struct inet_connection_sock *icsk;
		struct sock *ssk;

		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
							   struct mptcp_subflow_context));
		icsk = bpf_core_cast(ssk, struct inet_connection_sock);

		if (ssk->sk_mark == 2 &&
		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
			ctx->retval = -2;
			break;
		}
	}

	return 1;
}

SEC("cgroup/getsockopt")
int _getsockopt_subflow(struct bpf_sockopt *ctx)
{
	struct bpf_sock *sk = ctx->sk;
	struct mptcp_sock *msk;

	if (bpf_get_current_pid_tgid() >> 32 != pid)
		return 1;

	if (!sk || sk->protocol != IPPROTO_MPTCP ||
	    (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
	     !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
		return 1;

	msk = bpf_core_cast(sk, struct mptcp_sock);
	if (msk->pm.subflows != 1) {
		ctx->retval = -1;
		return 1;
	}

	if (ctx->optname == SO_MARK)
		return _check_getsockopt_subflow_mark(msk, ctx);
	return _check_getsockopt_subflow_cc(msk, ctx);
}