Commit 6855b9be authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'mptcp-pm-prep-work-for-new-ops-and-sysctl-knobs'



Matthieu Baerts says:

====================
mptcp: pm: prep work for new ops and sysctl knobs

Here are a few cleanups, preparation work for the new PM ops, and sysctl
knobs.

- Patch 1: reorg: move generic NL code used by all PMs to pm_netlink.c.

- Patch 2: use kmemdup() instead of kmalloc + copy.

- Patch 3: small cleanup to use pm var instead of msk->pm.

- Patch 4: reorg: id_avail_bitmap is only used by the in-kernel PM.

- Patch 5: use struct_group to easily reset a subset of PM data vars.

- Patch 6: introduce the minimal skeleton for the new PM ops.

- Patch 7: register in-kernel and userspace PM ops.

- Patch 8: new net.mptcp.path_manager sysctl knob, deprecating pm_type.

- Patch 9: map the new path_manager sysctl knob with pm_type.

- Patch 10: map the old pm_type sysctl knob with path_manager.

- Patch 11: new net.mptcp.available_path_managers sysctl knob.

- Patch 12: new test to validate path_manager and pm_type mapping.

Signed-off-by: default avatarMatthieu Baerts (NGI0) <matttbe@kernel.org>
====================

Link: https://patch.msgid.link/20250313-net-next-mptcp-pm-ops-intro-v1-0-f4e4a88efc50@kernel.org


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 27b91800 9cf0128e
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -30,6 +30,10 @@ allow_join_initial_addr_port - BOOLEAN

	Default: 1

available_path_managers - STRING
	Shows the available path managers choices that are registered. More
	path managers may be available, but not loaded.

available_schedulers - STRING
	Shows the available schedulers choices that are registered. More packet
	schedulers may be available, but not loaded.
@@ -72,6 +76,23 @@ enabled - BOOLEAN

	Default: 1 (enabled)

path_manager - STRING
	Set the default path manager name to use for each new MPTCP
	socket. In-kernel path management will control subflow
	connections and address advertisements according to
	per-namespace values configured over the MPTCP netlink
	API. Userspace path management puts per-MPTCP-connection subflow
	connection decisions and address advertisements under control of
	a privileged userspace program, at the cost of more netlink
	traffic to propagate all of the related events and commands.

	This is a per-namespace sysctl.

	* "kernel"          - In-kernel path manager
	* "userspace"       - Userspace path manager

	Default: "kernel"

pm_type - INTEGER
	Set the default path manager type to use for each new MPTCP
	socket. In-kernel path management will control subflow
@@ -84,6 +105,8 @@ pm_type - INTEGER

	This is a per-namespace sysctl.

	Deprecated since v6.15, use path_manager instead.

	* 0 - In-kernel path manager
	* 1 - Userspace path manager

+14 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@

struct mptcp_info;
struct mptcp_sock;
struct mptcp_pm_addr_entry;
struct seq_file;

/* MPTCP sk_buff extension data */
@@ -121,6 +122,19 @@ struct mptcp_sched_ops {
	void (*release)(struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;

#define MPTCP_PM_NAME_MAX	16
#define MPTCP_PM_MAX		128
#define MPTCP_PM_BUF_MAX	(MPTCP_PM_NAME_MAX * MPTCP_PM_MAX)

struct mptcp_pm_ops {
	char			name[MPTCP_PM_NAME_MAX];
	struct module		*owner;
	struct list_head	list;

	void (*init)(struct mptcp_sock *msk);
	void (*release)(struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;

#ifdef CONFIG_MPTCP
void mptcp_init(void);

+112 −1
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct mptcp_pernet {
	u8 allow_join_initial_addr_port;
	u8 pm_type;
	char scheduler[MPTCP_SCHED_NAME_MAX];
	char path_manager[MPTCP_PM_NAME_MAX];
};

static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -83,6 +84,11 @@ int mptcp_get_pm_type(const struct net *net)
	return mptcp_get_pernet(net)->pm_type;
}

const char *mptcp_get_path_manager(const struct net *net)
{
	return mptcp_get_pernet(net)->path_manager;
}

const char *mptcp_get_scheduler(const struct net *net)
{
	return mptcp_get_pernet(net)->scheduler;
@@ -101,6 +107,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
	pernet->stale_loss_cnt = 4;
	pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
	strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
	strscpy(pernet->path_manager, "kernel", sizeof(pernet->path_manager));
}

#ifdef CONFIG_SYSCTL
@@ -174,6 +181,96 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table,
	return ret;
}

static int mptcp_set_path_manager(char *path_manager, const char *name)
{
	struct mptcp_pm_ops *pm_ops;
	int ret = 0;

	rcu_read_lock();
	pm_ops = mptcp_pm_find(name);
	if (pm_ops)
		strscpy(path_manager, name, MPTCP_PM_NAME_MAX);
	else
		ret = -ENOENT;
	rcu_read_unlock();

	return ret;
}

static int proc_path_manager(const struct ctl_table *ctl, int write,
			     void *buffer, size_t *lenp, loff_t *ppos)
{
	struct mptcp_pernet *pernet = container_of(ctl->data,
						   struct mptcp_pernet,
						   path_manager);
	char (*path_manager)[MPTCP_PM_NAME_MAX] = ctl->data;
	char pm_name[MPTCP_PM_NAME_MAX];
	const struct ctl_table tbl = {
		.data = pm_name,
		.maxlen = MPTCP_PM_NAME_MAX,
	};
	int ret;

	strscpy(pm_name, *path_manager, MPTCP_PM_NAME_MAX);

	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
	if (write && ret == 0) {
		ret = mptcp_set_path_manager(*path_manager, pm_name);
		if (ret == 0) {
			u8 pm_type = __MPTCP_PM_TYPE_NR;

			if (strncmp(pm_name, "kernel", MPTCP_PM_NAME_MAX) == 0)
				pm_type = MPTCP_PM_TYPE_KERNEL;
			else if (strncmp(pm_name, "userspace", MPTCP_PM_NAME_MAX) == 0)
				pm_type = MPTCP_PM_TYPE_USERSPACE;
			pernet->pm_type = pm_type;
		}
	}

	return ret;
}

static int proc_pm_type(const struct ctl_table *ctl, int write,
			void *buffer, size_t *lenp, loff_t *ppos)
{
	struct mptcp_pernet *pernet = container_of(ctl->data,
						   struct mptcp_pernet,
						   pm_type);
	int ret;

	ret = proc_dou8vec_minmax(ctl, write, buffer, lenp, ppos);
	if (write && ret == 0) {
		u8 pm_type = READ_ONCE(*(u8 *)ctl->data);
		char *pm_name = "";

		if (pm_type == MPTCP_PM_TYPE_KERNEL)
			pm_name = "kernel";
		else if (pm_type == MPTCP_PM_TYPE_USERSPACE)
			pm_name = "userspace";
		mptcp_set_path_manager(pernet->path_manager, pm_name);
	}

	return ret;
}

static int proc_available_path_managers(const struct ctl_table *ctl,
					int write, void *buffer,
					size_t *lenp, loff_t *ppos)
{
	struct ctl_table tbl = { .maxlen = MPTCP_PM_BUF_MAX, };
	int ret;

	tbl.data = kmalloc(tbl.maxlen, GFP_USER);
	if (!tbl.data)
		return -ENOMEM;

	mptcp_pm_get_available(tbl.data, MPTCP_PM_BUF_MAX);
	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
	kfree(tbl.data);

	return ret;
}

static struct ctl_table mptcp_sysctl_table[] = {
	{
		.procname = "enabled",
@@ -218,7 +315,7 @@ static struct ctl_table mptcp_sysctl_table[] = {
		.procname = "pm_type",
		.maxlen = sizeof(u8),
		.mode = 0644,
		.proc_handler = proc_dou8vec_minmax,
		.proc_handler = proc_pm_type,
		.extra1       = SYSCTL_ZERO,
		.extra2       = &mptcp_pm_type_max
	},
@@ -253,6 +350,18 @@ static struct ctl_table mptcp_sysctl_table[] = {
		.mode = 0644,
		.proc_handler = proc_dou8vec_minmax,
	},
	{
		.procname = "path_manager",
		.maxlen	= MPTCP_PM_NAME_MAX,
		.mode = 0644,
		.proc_handler = proc_path_manager,
	},
	{
		.procname = "available_path_managers",
		.maxlen	= MPTCP_PM_BUF_MAX,
		.mode = 0444,
		.proc_handler = proc_available_path_managers,
	},
};

static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -278,6 +387,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
	table[8].data = &pernet->close_timeout;
	table[9].data = &pernet->blackhole_timeout;
	table[10].data = &pernet->syn_retrans_before_tcp_fallback;
	table[11].data = &pernet->path_manager;
	/* table[12] is for available_path_managers which is read-only info */

	hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
				     ARRAY_SIZE(mptcp_sysctl_table));
+81 −16
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
 */
#define pr_fmt(fmt) "MPTCP: " fmt

#include <linux/rculist.h>
#include <linux/spinlock.h>
#include "protocol.h"
#include "mib.h"

@@ -18,6 +20,9 @@ struct mptcp_pm_add_entry {
	struct mptcp_sock	*sock;
};

static DEFINE_SPINLOCK(mptcp_pm_list_lock);
static LIST_HEAD(mptcp_pm_list);

/* path manager helpers */

/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
@@ -511,13 +516,13 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
	 * be sure to serve this event only once.
	 */
	if (READ_ONCE(pm->work_pending) &&
	    !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
	    !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
		mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);

	if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
	if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
		announce = true;

	msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
	pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
	spin_unlock_bh(&pm->lock);

	if (announce)
@@ -978,10 +983,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
	u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
	struct mptcp_pm_data *pm = &msk->pm;

	pm->add_addr_signaled = 0;
	pm->add_addr_accepted = 0;
	pm->local_addr_used = 0;
	pm->subflows = 0;
	memset(&pm->reset, 0, sizeof(pm->reset));
	pm->rm_list_tx.nr = 0;
	pm->rm_list_rx.nr = 0;
	WRITE_ONCE(pm->pm_type, pm_type);
@@ -1000,16 +1002,9 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
			   !!mptcp_pm_get_add_addr_accept_max(msk) &&
			   subflows_allowed);
		WRITE_ONCE(pm->accept_subflow, subflows_allowed);
	} else {
		WRITE_ONCE(pm->work_pending, 0);
		WRITE_ONCE(pm->accept_addr, 0);
		WRITE_ONCE(pm->accept_subflow, 0);
	}

	WRITE_ONCE(pm->addr_signal, 0);
	WRITE_ONCE(pm->remote_deny_join_id0, false);
	pm->status = 0;
	bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
		bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
	}
}

void mptcp_pm_data_init(struct mptcp_sock *msk)
@@ -1022,5 +1017,75 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)

void __init mptcp_pm_init(void)
{
	mptcp_pm_kernel_register();
	mptcp_pm_userspace_register();
	mptcp_pm_nl_init();
}

/* Must be called with rcu read lock held */
struct mptcp_pm_ops *mptcp_pm_find(const char *name)
{
	struct mptcp_pm_ops *pm_ops;

	list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) {
		if (!strcmp(pm_ops->name, name))
			return pm_ops;
	}

	return NULL;
}

int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops)
{
	return 0;
}

int mptcp_pm_register(struct mptcp_pm_ops *pm_ops)
{
	int ret;

	ret = mptcp_pm_validate(pm_ops);
	if (ret)
		return ret;

	spin_lock(&mptcp_pm_list_lock);
	if (mptcp_pm_find(pm_ops->name)) {
		spin_unlock(&mptcp_pm_list_lock);
		return -EEXIST;
	}
	list_add_tail_rcu(&pm_ops->list, &mptcp_pm_list);
	spin_unlock(&mptcp_pm_list_lock);

	pr_debug("%s registered\n", pm_ops->name);
	return 0;
}

void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops)
{
	/* skip unregistering the default path manager */
	if (WARN_ON_ONCE(pm_ops == &mptcp_pm_kernel))
		return;

	spin_lock(&mptcp_pm_list_lock);
	list_del_rcu(&pm_ops->list);
	spin_unlock(&mptcp_pm_list_lock);
}

/* Build string with list of available path manager values.
 * Similar to tcp_get_available_congestion_control()
 */
void mptcp_pm_get_available(char *buf, size_t maxlen)
{
	struct mptcp_pm_ops *pm_ops;
	size_t offs = 0;

	rcu_read_lock();
	list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) {
		offs += snprintf(buf + offs, maxlen - offs, "%s%s",
				 offs == 0 ? "" : " ", pm_ops->name);

		if (WARN_ON_ONCE(offs >= maxlen))
			break;
	}
	rcu_read_unlock();
}
+9 −7
Original line number Diff line number Diff line
@@ -710,11 +710,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk,
		return ret;

	/* address not found, add to local list */
	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
	entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC);
	if (!entry)
		return -ENOMEM;

	*entry = *skc;
	entry->addr.port = 0;
	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false);
	if (ret < 0)
@@ -817,13 +816,12 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
		return -EINVAL;
	}

	entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT);
	entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT);
	if (!entry) {
		GENL_SET_ERR_MSG(info, "can't allocate addr");
		return -ENOMEM;
	}

	*entry = addr;
	if (entry->addr.port) {
		ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
		if (ret) {
@@ -1400,11 +1398,15 @@ static struct pernet_operations mptcp_pm_pernet_ops = {
	.size = sizeof(struct pm_nl_pernet),
};

void __init mptcp_pm_nl_init(void)
struct mptcp_pm_ops mptcp_pm_kernel = {
	.name			= "kernel",
	.owner			= THIS_MODULE,
};

void __init mptcp_pm_kernel_register(void)
{
	if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0)
		panic("Failed to register MPTCP PM pernet subsystem.\n");

	if (genl_register_family(&mptcp_genl_family))
		panic("Failed to register MPTCP PM netlink family\n");
	mptcp_pm_register(&mptcp_pm_kernel);
}
Loading