Commit 55c85901 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Martin KaFai Lau says:

====================
pull-request: bpf-next 2024-11-14

We've added 9 non-merge commits during the last 4 day(s) which contain
a total of 3 files changed, 226 insertions(+), 84 deletions(-).

The main changes are:

1) Fixes to bpf_msg_push/pop_data and test_sockmap. The changes has
   dependency on the other changes in the bpf-next/net branch,
   from Zijian Zhang.

2) Drop netns codes from mptcp test. Reuse the common helpers in
   test_progs, from Geliang Tang.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  bpf, sockmap: Fix sk_msg_reset_curr
  bpf, sockmap: Several fixes to bpf_msg_pop_data
  bpf, sockmap: Several fixes to bpf_msg_push_data
  selftests/bpf: Add more tests for test_txmsg_push_pop in test_sockmap
  selftests/bpf: Add push/pop checking for msg_verify_data in test_sockmap
  selftests/bpf: Fix total_bytes in msg_loop_rx in test_sockmap
  selftests/bpf: Fix SENDPAGE data logic in test_sockmap
  selftests/bpf: Add txmsg_pass to pull/push/pop in test_sockmap
  selftests/bpf: Drop netns helpers in mptcp
====================

Link: https://patch.msgid.link/20241114202832.3187927-1-martin.lau@linux.dev


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 66dae28a 141b4d6a
Loading
Loading
Loading
Loading
+51 −37
Original line number Diff line number Diff line
@@ -2604,18 +2604,16 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)

static void sk_msg_reset_curr(struct sk_msg *msg)
{
	u32 i = msg->sg.start;
	u32 len = 0;

	do {
		len += sk_msg_elem(msg, i)->length;
		sk_msg_iter_var_next(i);
		if (len >= msg->sg.size)
			break;
	} while (i != msg->sg.end);
	if (!msg->sg.size) {
		msg->sg.curr = msg->sg.start;
		msg->sg.copybreak = 0;
	} else {
		u32 i = msg->sg.end;

		sk_msg_iter_var_prev(i);
		msg->sg.curr = i;
	msg->sg.copybreak = 0;
		msg->sg.copybreak = msg->sg.data[i].length;
	}
}

static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
@@ -2778,7 +2776,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
		sk_msg_iter_var_next(i);
	} while (i != msg->sg.end);

	if (start >= offset + l)
	if (start > offset + l)
		return -EINVAL;

	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2803,6 +2801,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,

		raw = page_address(page);

		if (i == msg->sg.end)
			sk_msg_iter_var_prev(i);
		psge = sk_msg_elem(msg, i);
		front = start - offset;
		back = psge->length - front;
@@ -2819,7 +2819,13 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
		}

		put_page(sg_page(psge));
	} else if (start - offset) {
		new = i;
		goto place_new;
	}

	if (start - offset) {
		if (i == msg->sg.end)
			sk_msg_iter_var_prev(i);
		psge = sk_msg_elem(msg, i);
		rsge = sk_msg_elem_cpy(msg, i);

@@ -2830,24 +2836,29 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
		sk_msg_iter_var_next(i);
		sg_unmark_end(psge);
		sg_unmark_end(&rsge);
		sk_msg_iter_next(msg, end);
	}

	/* Slot(s) to place newly allocated data */
	sk_msg_iter_next(msg, end);
	new = i;
	sk_msg_iter_var_next(i);

	/* Shift one or two slots as needed */
	if (!copy) {
		sge = sk_msg_elem_cpy(msg, i);
	if (i == msg->sg.end) {
		if (!rsge.length)
			goto place_new;
		sk_msg_iter_next(msg, end);
		goto place_new;
	}

		sk_msg_iter_var_next(i);
	/* Shift one or two slots as needed */
	sge = sk_msg_elem_cpy(msg, new);
	sg_unmark_end(&sge);
		sk_msg_iter_next(msg, end);

	nsge = sk_msg_elem_cpy(msg, i);
	if (rsge.length) {
		sk_msg_iter_var_next(i);
		nnsge = sk_msg_elem_cpy(msg, i);
		sk_msg_iter_next(msg, end);
	}

	while (i != msg->sg.end) {
@@ -2861,8 +2872,8 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
			nsge = sk_msg_elem_cpy(msg, i);
		}
	}
	}

place_new:
	/* Place newly allocated data buffer */
	sk_mem_charge(msg->sk, len);
	msg->sg.size += len;
@@ -2891,8 +2902,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {

static void sk_msg_shift_left(struct sk_msg *msg, int i)
{
	struct scatterlist *sge = sk_msg_elem(msg, i);
	int prev;

	put_page(sg_page(sge));
	do {
		prev = i;
		sk_msg_iter_var_next(i);
@@ -2929,6 +2942,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
	if (unlikely(flags))
		return -EINVAL;

	if (unlikely(len == 0))
		return 0;

	/* First find the starting scatterlist element */
	i = msg->sg.start;
	do {
@@ -2941,7 +2957,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
	} while (i != msg->sg.end);

	/* Bounds checks: start and pop must be inside message */
	if (start >= offset + l || last >= msg->sg.size)
	if (start >= offset + l || last > msg->sg.size)
		return -EINVAL;

	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
@@ -2970,12 +2986,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
	 */
	if (start != offset) {
		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
		int a = start;
		int a = start - offset;
		int b = sge->length - pop - a;

		sk_msg_iter_var_next(i);

		if (pop < sge->length - a) {
		if (b > 0) {
			if (space) {
				sge->length = a;
				sk_msg_shift_right(msg, i);
@@ -2994,7 +3010,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
				if (unlikely(!page))
					return -ENOMEM;

				sge->length = a;
				orig = sg_page(sge);
				from = sg_virt(sge);
				to = page_address(page);
@@ -3004,7 +3019,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
				put_page(orig);
			}
			pop = 0;
		} else if (pop >= sge->length - a) {
		} else {
			pop -= (sge->length - a);
			sge->length = a;
		}
@@ -3038,7 +3053,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
			pop -= sge->length;
			sk_msg_shift_left(msg, i);
		}
		sk_msg_iter_var_next(i);
	}

	sk_mem_uncharge(msg->sk, len - pop);
+12 −30
Original line number Diff line number Diff line
@@ -69,24 +69,6 @@ struct mptcp_storage {
	char ca_name[TCP_CA_NAME_MAX];
};

static struct nstoken *create_netns(void)
{
	SYS(fail, "ip netns add %s", NS_TEST);
	SYS(fail, "ip -net %s link set dev lo up", NS_TEST);

	return open_netns(NS_TEST);
fail:
	return NULL;
}

static void cleanup_netns(struct nstoken *nstoken)
{
	if (nstoken)
		close_netns(nstoken);

	SYS_NOFAIL("ip netns del %s", NS_TEST);
}

static int start_mptcp_server(int family, const char *addr_str, __u16 port,
			      int timeout_ms)
{
@@ -206,15 +188,15 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)

static void test_base(void)
{
	struct nstoken *nstoken = NULL;
	struct netns_obj *netns = NULL;
	int server_fd, cgroup_fd;

	cgroup_fd = test__join_cgroup("/mptcp");
	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
		return;

	nstoken = create_netns();
	if (!ASSERT_OK_PTR(nstoken, "create_netns"))
	netns = netns_new(NS_TEST, true);
	if (!ASSERT_OK_PTR(netns, "netns_new"))
		goto fail;

	/* without MPTCP */
@@ -237,7 +219,7 @@ static void test_base(void)
	close(server_fd);

fail:
	cleanup_netns(nstoken);
	netns_free(netns);
	close(cgroup_fd);
}

@@ -322,21 +304,21 @@ static int run_mptcpify(int cgroup_fd)

static void test_mptcpify(void)
{
	struct nstoken *nstoken = NULL;
	struct netns_obj *netns = NULL;
	int cgroup_fd;

	cgroup_fd = test__join_cgroup("/mptcpify");
	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
		return;

	nstoken = create_netns();
	if (!ASSERT_OK_PTR(nstoken, "create_netns"))
	netns = netns_new(NS_TEST, true);
	if (!ASSERT_OK_PTR(netns, "netns_new"))
		goto fail;

	ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");

fail:
	cleanup_netns(nstoken);
	netns_free(netns);
	close(cgroup_fd);
}

@@ -414,7 +396,7 @@ static void run_subflow(void)
static void test_subflow(void)
{
	struct mptcp_subflow *skel;
	struct nstoken *nstoken;
	struct netns_obj *netns;
	int cgroup_fd;

	cgroup_fd = test__join_cgroup("/mptcp_subflow");
@@ -437,8 +419,8 @@ static void test_subflow(void)
	if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow"))
		goto skel_destroy;

	nstoken = create_netns();
	if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow"))
	netns = netns_new(NS_TEST, true);
	if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow"))
		goto skel_destroy;

	if (endpoint_init("subflow") < 0)
@@ -447,7 +429,7 @@ static void test_subflow(void)
	run_subflow();

close_netns:
	cleanup_netns(nstoken);
	netns_free(netns);
skel_destroy:
	mptcp_subflow__destroy(skel);
close_cgroup:
+163 −17
Original line number Diff line number Diff line
@@ -88,6 +88,10 @@ int ktls;
int peek_flag;
int skb_use_parser;
int txmsg_omit_skb_parser;
int verify_push_start;
int verify_push_len;
int verify_pop_start;
int verify_pop_len;

static const struct option long_options[] = {
	{"help",	no_argument,		NULL, 'h' },
@@ -420,16 +424,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
{
	bool drop = opt->drop_expected;
	unsigned char k = 0;
	int i, j, fp;
	FILE *file;
	int i, fp;

	file = tmpfile();
	if (!file) {
		perror("create file for sendpage");
		return 1;
	}
	for (i = 0; i < iov_length * cnt; i++, k++)
	for (i = 0; i < cnt; i++, k = 0) {
		for (j = 0; j < iov_length; j++, k++)
			fwrite(&k, sizeof(char), 1, file);
	}
	fflush(file);
	fseek(file, 0, SEEK_SET);

@@ -512,12 +518,41 @@ static int msg_alloc_iov(struct msghdr *msg,
	return -ENOMEM;
}

/* TODO: Add verification logic for push, pull and pop data */
/* In push or pop test, we need to do some calculations for msg_verify_data */
static void msg_verify_date_prep(void)
{
	int push_range_end = txmsg_start_push + txmsg_end_push - 1;
	int pop_range_end = txmsg_start_pop + txmsg_pop - 1;

	if (txmsg_end_push && txmsg_pop &&
	    txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) {
		/* The push range and the pop range overlap */
		int overlap_len;

		verify_push_start = txmsg_start_push;
		verify_pop_start = txmsg_start_pop;
		if (txmsg_start_push < txmsg_start_pop)
			overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop);
		else
			overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push);
		verify_push_len = max(txmsg_end_push - overlap_len, 0);
		verify_pop_len = max(txmsg_pop - overlap_len, 0);
	} else {
		/* Otherwise */
		verify_push_start = txmsg_start_push;
		verify_pop_start = txmsg_start_pop;
		verify_push_len = txmsg_end_push;
		verify_pop_len = txmsg_pop;
	}
}

static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
				 unsigned char *k_p, int *bytes_cnt_p)
			   unsigned char *k_p, int *bytes_cnt_p,
			   int *check_cnt_p, int *push_p)
{
	int i, j, bytes_cnt = *bytes_cnt_p;
	int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p;
	unsigned char k = *k_p;
	int i, j;

	for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) {
		unsigned char *d = msg->msg_iov[i].iov_base;
@@ -536,6 +571,37 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
		}

		for (; j < msg->msg_iov[i].iov_len && size; j++) {
			if (push > 0 &&
			    check_cnt == verify_push_start + verify_push_len - push) {
				int skipped;
revisit_push:
				skipped = push;
				if (j + push >= msg->msg_iov[i].iov_len)
					skipped = msg->msg_iov[i].iov_len - j;
				push -= skipped;
				size -= skipped;
				j += skipped - 1;
				check_cnt += skipped;
				continue;
			}

			if (verify_pop_len > 0 && check_cnt == verify_pop_start) {
				bytes_cnt += verify_pop_len;
				check_cnt += verify_pop_len;
				k += verify_pop_len;

				if (bytes_cnt == chunk_sz) {
					k = 0;
					bytes_cnt = 0;
					check_cnt = 0;
					push = verify_push_len;
				}

				if (push > 0 &&
				    check_cnt == verify_push_start + verify_push_len - push)
					goto revisit_push;
			}

			if (d[j] != k++) {
				fprintf(stderr,
					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -543,15 +609,20 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
				return -EDATAINTEGRITY;
			}
			bytes_cnt++;
			check_cnt++;
			if (bytes_cnt == chunk_sz) {
				k = 0;
				bytes_cnt = 0;
				check_cnt = 0;
				push = verify_push_len;
			}
			size--;
		}
	}
	*k_p = k;
	*bytes_cnt_p = bytes_cnt;
	*check_cnt_p = check_cnt;
	*push_p = push;
	return 0;
}

@@ -604,12 +675,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
		}
		clock_gettime(CLOCK_MONOTONIC, &s->end);
	} else {
		float total_bytes, txmsg_pop_total, txmsg_push_total;
		int slct, recvp = 0, recv, max_fd = fd;
		float total_bytes, txmsg_pop_total;
		int fd_flags = O_NONBLOCK;
		struct timeval timeout;
		unsigned char k = 0;
		int bytes_cnt = 0;
		int check_cnt = 0;
		int push = 0;
		fd_set w;

		fcntl(fd, fd_flags);
@@ -623,12 +696,22 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
		 * This is really only useful for testing edge cases in code
		 * paths.
		 */
		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
		if (txmsg_apply)
		total_bytes = (float)iov_length * (float)cnt;
		if (!opt->sendpage)
			total_bytes *= (float)iov_count;
		if (txmsg_apply) {
			txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply);
			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
		else
		} else {
			txmsg_push_total = txmsg_end_push * cnt;
			txmsg_pop_total = txmsg_pop * cnt;
		}
		total_bytes += txmsg_push_total;
		total_bytes -= txmsg_pop_total;
		if (data) {
			msg_verify_date_prep();
			push = verify_push_len;
		}
		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
		if (err < 0)
			perror("recv start time");
@@ -701,10 +784,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,

			if (data) {
				int chunk_sz = opt->sendpage ?
						iov_length * cnt :
						iov_length :
						iov_length * iov_count;

				errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt);
				errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt,
							&check_cnt, &push);
				if (errno) {
					perror("data verify msg failed");
					goto out_errno;
@@ -714,7 +798,9 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
								recvp,
								chunk_sz,
								&k,
								&bytes_cnt);
								&bytes_cnt,
								&check_cnt,
								&push);
					if (errno) {
						perror("data verify msg_peek failed");
						goto out_errno;
@@ -796,8 +882,6 @@ static int sendmsg_test(struct sockmap_options *opt)

	rxpid = fork();
	if (rxpid == 0) {
		if (txmsg_pop || txmsg_start_pop)
			iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
		if (opt->drop_expected || txmsg_ktls_skb_drop)
			_exit(0);

@@ -1466,8 +1550,8 @@ static void test_send_many(struct sockmap_options *opt, int cgrp)

static void test_send_large(struct sockmap_options *opt, int cgrp)
{
	opt->iov_length = 256;
	opt->iov_count = 1024;
	opt->iov_length = 8192;
	opt->iov_count = 32;
	opt->rate = 2;
	test_exec(cgrp, opt);
}
@@ -1596,11 +1680,13 @@ static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
{
	/* Test basic start/end */
	txmsg_pass = 1;
	txmsg_start = 1;
	txmsg_end = 2;
	test_send(opt, cgrp);

	/* Test >4k pull */
	txmsg_pass = 1;
	txmsg_start = 4096;
	txmsg_end = 9182;
	test_send_large(opt, cgrp);
@@ -1628,12 +1714,16 @@ static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)

static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
{
	bool data = opt->data_test;

	/* Test basic pop */
	txmsg_pass = 1;
	txmsg_start_pop = 1;
	txmsg_pop = 2;
	test_send_many(opt, cgrp);

	/* Test pop with >4k */
	txmsg_pass = 1;
	txmsg_start_pop = 4096;
	txmsg_pop = 4096;
	test_send_large(opt, cgrp);
@@ -1644,6 +1734,12 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
	txmsg_pop = 2;
	test_send_many(opt, cgrp);

	/* TODO: Test for pop + cork should be different,
	 * - It makes the layout of the received data difficult
	 * - It makes it hard to calculate the total_bytes in the recvmsg
	 * Temporarily skip the data integrity test for this case now.
	 */
	opt->data_test = false;
	/* Test pop + cork */
	txmsg_redir = 0;
	txmsg_cork = 512;
@@ -1657,16 +1753,21 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
	txmsg_start_pop = 1;
	txmsg_pop = 2;
	test_send_many(opt, cgrp);
	opt->data_test = data;
}

static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
{
	bool data = opt->data_test;

	/* Test basic push */
	txmsg_pass = 1;
	txmsg_start_push = 1;
	txmsg_end_push = 1;
	test_send(opt, cgrp);

	/* Test push 4kB >4k */
	txmsg_pass = 1;
	txmsg_start_push = 4096;
	txmsg_end_push = 4096;
	test_send_large(opt, cgrp);
@@ -1677,21 +1778,66 @@ static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
	txmsg_end_push = 2;
	test_send_many(opt, cgrp);

	/* TODO: Test for push + cork should be different,
	 * - It makes the layout of the received data difficult
	 * - It makes it hard to calculate the total_bytes in the recvmsg
	 * Temporarily skip the data integrity test for this case now.
	 */
	opt->data_test = false;
	/* Test push + cork */
	txmsg_redir = 0;
	txmsg_cork = 512;
	txmsg_start_push = 1;
	txmsg_end_push = 2;
	test_send_many(opt, cgrp);
	opt->data_test = data;
}

static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
{
	/* Test push/pop range overlapping */
	txmsg_pass = 1;
	txmsg_start_push = 1;
	txmsg_end_push = 10;
	txmsg_start_pop = 5;
	txmsg_pop = 4;
	test_send_large(opt, cgrp);

	txmsg_pass = 1;
	txmsg_start_push = 1;
	txmsg_end_push = 10;
	txmsg_start_pop = 5;
	txmsg_pop = 16;
	test_send_large(opt, cgrp);

	txmsg_pass = 1;
	txmsg_start_push = 5;
	txmsg_end_push = 4;
	txmsg_start_pop = 1;
	txmsg_pop = 10;
	test_send_large(opt, cgrp);

	txmsg_pass = 1;
	txmsg_start_push = 5;
	txmsg_end_push = 16;
	txmsg_start_pop = 1;
	txmsg_pop = 10;
	test_send_large(opt, cgrp);

	/* Test push/pop range non-overlapping */
	txmsg_pass = 1;
	txmsg_start_push = 1;
	txmsg_end_push = 10;
	txmsg_start_pop = 16;
	txmsg_pop = 4;
	test_send_large(opt, cgrp);

	txmsg_pass = 1;
	txmsg_start_push = 16;
	txmsg_end_push = 10;
	txmsg_start_pop = 5;
	txmsg_pop = 4;
	test_send_large(opt, cgrp);
}

static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)