Commit 43c7ce69 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Paolo Abeni
Browse files

rtnetlink: Protect struct rtnl_link_ops with SRCU.



Once RTNL is replaced with rtnl_net_lock(), we need a mechanism to
guarantee that rtnl_link_ops is alive during inflight RTM_NEWLINK
even when its module is being unloaded.

Let's use SRCU to protect ops.

rtnl_link_ops_get() now iterates link_ops under RCU and returns
SRCU-protected ops pointer.  The caller must call rtnl_link_ops_put()
to release the pointer after the use.

Also, __rtnl_link_unregister() unlinks the ops first and calls
synchronize_srcu() to wait for inflight RTM_NEWLINK requests to
complete.

Note that link_ops needs to be protected by its dedicated lock
when RTNL is removed.

Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 0d3008d1
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#define __NET_RTNETLINK_H

#include <linux/rtnetlink.h>
#include <linux/srcu.h>
#include <net/netlink.h>

typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
@@ -69,7 +70,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
/**
 *	struct rtnl_link_ops - rtnetlink link operations
 *
 *	@list: Used internally
 *	@list: Used internally, protected by RTNL and SRCU
 *	@srcu: Used internally
 *	@kind: Identifier
 *	@netns_refund: Physical device, move to init_net on netns exit
 *	@maxtype: Highest device specific netlink attribute number
@@ -100,6 +102,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 */
struct rtnl_link_ops {
	struct list_head	list;
	struct srcu_struct	srcu;

	const char		*kind;

+61 −22
Original line number Diff line number Diff line
@@ -457,15 +457,29 @@ EXPORT_SYMBOL_GPL(__rtnl_unregister_many);

static LIST_HEAD(link_ops);

static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index)
{
	const struct rtnl_link_ops *ops;
	struct rtnl_link_ops *ops;

	rcu_read_lock();

	list_for_each_entry_rcu(ops, &link_ops, list) {
		if (!strcmp(ops->kind, kind)) {
			*srcu_index = srcu_read_lock(&ops->srcu);
			goto unlock;
		}
	}

	ops = NULL;
unlock:
	rcu_read_unlock();

	list_for_each_entry(ops, &link_ops, list) {
		if (!strcmp(ops->kind, kind))
	return ops;
}
	return NULL;

static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index)
{
	srcu_read_unlock(&ops->srcu, srcu_index);
}

/**
@@ -480,8 +494,16 @@ static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
 */
int __rtnl_link_register(struct rtnl_link_ops *ops)
{
	if (rtnl_link_ops_get(ops->kind))
	struct rtnl_link_ops *tmp;
	int err;

	/* When RTNL is removed, add lock for link_ops. */
	ASSERT_RTNL();

	list_for_each_entry(tmp, &link_ops, list) {
		if (!strcmp(ops->kind, tmp->kind))
			return -EEXIST;
	}

	/* The check for alloc/setup is here because if ops
	 * does not have that filled up, it is not possible
@@ -491,7 +513,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
	if ((ops->alloc || ops->setup) && !ops->dellink)
		ops->dellink = unregister_netdevice_queue;

	list_add_tail(&ops->list, &link_ops);
	err = init_srcu_struct(&ops->srcu);
	if (err)
		return err;

	list_add_tail_rcu(&ops->list, &link_ops);

	return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_link_register);
@@ -542,11 +569,13 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
	struct net *net;

	for_each_net(net) {
	list_del_rcu(&ops->list);
	synchronize_srcu(&ops->srcu);
	cleanup_srcu_struct(&ops->srcu);

	for_each_net(net)
		__rtnl_kill_links(net, ops);
}
	list_del(&ops->list);
}
EXPORT_SYMBOL_GPL(__rtnl_link_unregister);

/* Return with the rtnl_lock held when there are no network
@@ -2158,10 +2187,11 @@ static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
	[IFLA_XDP_PROG_ID]	= { .type = NLA_U32 },
};

static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla,
						  int *ops_srcu_index)
{
	const struct rtnl_link_ops *ops = NULL;
	struct nlattr *linfo[IFLA_INFO_MAX + 1];
	struct rtnl_link_ops *ops = NULL;

	if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0)
		return NULL;
@@ -2170,7 +2200,7 @@ static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla
		char kind[MODULE_NAME_LEN];

		nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
		ops = rtnl_link_ops_get(kind);
		ops = rtnl_link_ops_get(kind, ops_srcu_index);
	}

	return ops;
@@ -2290,8 +2320,8 @@ static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh,

static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
	const struct rtnl_link_ops *kind_ops = NULL;
	struct netlink_ext_ack *extack = cb->extack;
	struct rtnl_link_ops *kind_ops = NULL;
	const struct nlmsghdr *nlh = cb->nlh;
	struct net *net = sock_net(skb->sk);
	unsigned int flags = NLM_F_MULTI;
@@ -2302,6 +2332,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
	struct net *tgt_net = net;
	u32 ext_filter_mask = 0;
	struct net_device *dev;
	int ops_srcu_index;
	int master_idx = 0;
	int netnsid = -1;
	int err, i;
@@ -2335,7 +2366,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
			master_idx = nla_get_u32(tb[i]);
			break;
		case IFLA_LINKINFO:
			kind_ops = linkinfo_to_kind_ops(tb[i]);
			kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index);
			break;
		default:
			if (cb->strict_check) {
@@ -2361,6 +2392,10 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
		if (err < 0)
			break;
	}

	if (kind_ops)
		rtnl_link_ops_put(kind_ops, ops_srcu_index);

	cb->seq = tgt_net->dev_base_seq;
	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
	if (netnsid >= 0)
@@ -3747,8 +3782,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
			struct netlink_ext_ack *extack)
{
	struct nlattr **tb, **linkinfo, **data = NULL;
	const struct rtnl_link_ops *ops = NULL;
	struct rtnl_link_ops *ops = NULL;
	struct rtnl_newlink_tbs *tbs;
	int ops_srcu_index;
	int ret;

	tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
@@ -3780,13 +3816,13 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
		char kind[MODULE_NAME_LEN];

		nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
		ops = rtnl_link_ops_get(kind);
		ops = rtnl_link_ops_get(kind, &ops_srcu_index);
#ifdef CONFIG_MODULES
		if (!ops) {
			__rtnl_unlock();
			request_module("rtnl-link-%s", kind);
			rtnl_lock();
			ops = rtnl_link_ops_get(kind);
			ops = rtnl_link_ops_get(kind, &ops_srcu_index);
		}
#endif
	}
@@ -3800,7 +3836,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
							  linkinfo[IFLA_INFO_DATA],
							  ops->policy, extack);
			if (ret < 0)
				goto free;
				goto put_ops;

			data = tbs->attr;
		}
@@ -3808,12 +3844,15 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
		if (ops->validate) {
			ret = ops->validate(tb, data, extack);
			if (ret < 0)
				goto free;
				goto put_ops;
		}
	}

	ret = __rtnl_newlink(skb, nlh, ops, tbs, data, extack);

put_ops:
	if (ops)
		rtnl_link_ops_put(ops, ops_srcu_index);
free:
	kfree(tbs);
	return ret;