Commit c1dacb45 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'nexthop-convert-rtm_-new-del-nexthop-to-per-netns-rtnl'

Kuniyuki Iwashima says:

====================
nexthop: Convert RTM_{NEW,DEL}NEXTHOP to per-netns RTNL.

Patch 1 - 5 move some validation for RTM_NEWNEXTHOP so that it can be
called without RTNL.

Patch 6 & 7 converts RTM_NEWNEXTHOP and RTM_DELNEXTHOP to per-netns RTNL.

Note that RTM_GETNEXTHOP and RTM_GETNEXTHOPBUCKET are not touched in
this series.

rtm_get_nexthop() can be easily converted to RCU, but rtm_dump_nexthop()
needs more work due to the left-to-right rbtree walk, which looks prone
to node deletion and tree rotation without a retry mechanism.

v1: https://lore.kernel.org/netdev/20250318233240.53946-1-kuniyu@amazon.com/
====================

Link: https://patch.msgid.link/20250319230743.65267-1-kuniyu@amazon.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b709857e 29c8e323
Loading
Loading
Loading
Loading
+112 −71
Original line number Diff line number Diff line
@@ -1272,10 +1272,8 @@ static int nh_check_attr_group(struct net *net,
			       u16 nh_grp_type, struct netlink_ext_ack *extack)
{
	unsigned int len = nla_len(tb[NHA_GROUP]);
	u8 nh_family = AF_UNSPEC;
	struct nexthop_grp *nhg;
	unsigned int i, j;
	u8 nhg_fdb = 0;

	if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
		NL_SET_ERR_MSG(extack,
@@ -1307,10 +1305,41 @@ static int nh_check_attr_group(struct net *net,
		}
	}

	if (tb[NHA_FDB])
		nhg_fdb = 1;
	nhg = nla_data(tb[NHA_GROUP]);
	for (i = 0; i < len; ++i) {
	for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
		if (!tb[i])
			continue;
		switch (i) {
		case NHA_HW_STATS_ENABLE:
		case NHA_FDB:
			continue;
		case NHA_RES_GROUP:
			if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
				continue;
			break;
		}
		NL_SET_ERR_MSG(extack,
			       "No other attributes can be set in nexthop groups");
		return -EINVAL;
	}

	return 0;
}

static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[],
				    struct netlink_ext_ack *extack)
{
	u8 nh_family = AF_UNSPEC;
	struct nexthop_grp *nhg;
	unsigned int len;
	unsigned int i;
	u8 nhg_fdb;

	len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg);
	nhg = nla_data(tb[NHA_GROUP]);
	nhg_fdb = !!tb[NHA_FDB];

	for (i = 0; i < len; i++) {
		struct nexthop *nh;
		bool is_fdb_nh;

@@ -1330,22 +1359,6 @@ static int nh_check_attr_group(struct net *net,
			return -EINVAL;
		}
	}
	for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
		if (!tb[i])
			continue;
		switch (i) {
		case NHA_HW_STATS_ENABLE:
		case NHA_FDB:
			continue;
		case NHA_RES_GROUP:
			if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
				continue;
			break;
		}
		NL_SET_ERR_MSG(extack,
			       "No other attributes can be set in nexthop groups");
		return -EINVAL;
	}

	return 0;
}
@@ -2679,9 +2692,6 @@ static struct nexthop *nexthop_create_group(struct net *net,
	int err;
	int i;

	if (WARN_ON(!num_nh))
		return ERR_PTR(-EINVAL);

	nh = nexthop_alloc();
	if (!nh)
		return ERR_PTR(-ENOMEM);
@@ -2915,11 +2925,6 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
	struct nexthop *nh;
	int err;

	if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
		NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
		return ERR_PTR(-EINVAL);
	}

	if (!cfg->nh_id) {
		cfg->nh_id = nh_find_unused_id(net);
		if (!cfg->nh_id) {
@@ -3016,19 +3021,13 @@ static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
}

static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
			    struct nlmsghdr *nlh, struct nh_config *cfg,
			    struct nlmsghdr *nlh, struct nlattr **tb,
			    struct nh_config *cfg,
			    struct netlink_ext_ack *extack)
{
	struct nhmsg *nhm = nlmsg_data(nlh);
	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
	int err;

	err = nlmsg_parse(nlh, sizeof(*nhm), tb,
			  ARRAY_SIZE(rtm_nh_policy_new) - 1,
			  rtm_nh_policy_new, extack);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (nhm->resvd || nhm->nh_scope) {
		NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
@@ -3093,7 +3092,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
			NL_SET_ERR_MSG(extack, "Invalid group type");
			goto out;
		}
		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),

		err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new),
					  cfg->nh_grp_type, extack);
		if (err)
			goto out;
@@ -3126,25 +3126,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
		goto out;
	}

	if (!cfg->nh_fdb && tb[NHA_OIF]) {
		cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
		if (cfg->nh_ifindex)
			cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);

		if (!cfg->dev) {
			NL_SET_ERR_MSG(extack, "Invalid device index");
			goto out;
		} else if (!(cfg->dev->flags & IFF_UP)) {
			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
			err = -ENETDOWN;
			goto out;
		} else if (!netif_carrier_ok(cfg->dev)) {
			NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
			err = -ENETDOWN;
			goto out;
		}
	}

	err = -EINVAL;
	if (tb[NHA_GATEWAY]) {
		struct nlattr *gwa = tb[NHA_GATEWAY];
@@ -3188,7 +3169,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,

		cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
		err = lwtunnel_valid_encap_type(cfg->nh_encap_type,
						extack, true);
						extack, false);
		if (err < 0)
			goto out;

@@ -3207,22 +3188,76 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
	return err;
}

static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb,
				 struct nh_config *cfg,
				 struct netlink_ext_ack *extack)
{
	if (tb[NHA_GROUP])
		return nh_check_attr_group_rtnl(net, tb, extack);

	if (tb[NHA_OIF]) {
		cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
		if (cfg->nh_ifindex)
			cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);

		if (!cfg->dev) {
			NL_SET_ERR_MSG(extack, "Invalid device index");
			return -EINVAL;
		}

		if (!(cfg->dev->flags & IFF_UP)) {
			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
			return -ENETDOWN;
		}

		if (!netif_carrier_ok(cfg->dev)) {
			NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
			return -ENETDOWN;
		}
	}

	return 0;
}

/* rtnl */
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
			   struct netlink_ext_ack *extack)
{
	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
	struct net *net = sock_net(skb->sk);
	struct nh_config cfg;
	struct nexthop *nh;
	int err;

	err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
	if (!err) {
	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
			  ARRAY_SIZE(rtm_nh_policy_new) - 1,
			  rtm_nh_policy_new, extack);
	if (err < 0)
		goto out;

	err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack);
	if (err)
		goto out;

	if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) {
		NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
		err = -EINVAL;
		goto out;
	}

	rtnl_net_lock(net);

	err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack);
	if (err)
		goto unlock;

	nh = nexthop_add(net, &cfg, extack);
	if (IS_ERR(nh))
		err = PTR_ERR(nh);
	}

unlock:
	rtnl_net_unlock(net);
out:
	return err;
}

@@ -3279,13 +3314,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
	if (err)
		return err;

	nh = nexthop_find_by_id(net, id);
	if (!nh)
		return -ENOENT;
	rtnl_net_lock(net);

	nh = nexthop_find_by_id(net, id);
	if (nh)
		remove_nexthop(net, nh, &nlinfo);
	else
		err = -ENOENT;

	return 0;
	rtnl_net_unlock(net);

	return err;
}

/* rtnl */
@@ -4037,18 +4076,20 @@ static struct pernet_operations nexthop_net_ops = {
};

static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
	{.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop},
	{.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop},
	{.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop,
	 .flags = RTNL_FLAG_DOIT_PERNET},
	{.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop,
	 .flags = RTNL_FLAG_DOIT_PERNET},
	{.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
	 .dumpit = rtm_dump_nexthop},
	{.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
	 .dumpit = rtm_dump_nexthop_bucket},
	{.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
	 .doit = rtm_new_nexthop},
	 .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
	{.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
	 .dumpit = rtm_dump_nexthop},
	{.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
	 .doit = rtm_new_nexthop},
	 .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
	{.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
	 .dumpit = rtm_dump_nexthop},
};