Commit 07519648 authored by Ido Schimmel's avatar Ido Schimmel Committed by Jakub Kicinski
Browse files

vrf: Remove unnecessary RCU protection around dst entries



During initialization of a VRF device, the VRF driver creates two dst
entries (for IPv4 and IPv6). They are attached to locally generated
packets that are transmitted out of the VRF ports (via the
l3mdev_l3_out() hook). Their purpose is to redirect packets towards the
VRF device instead of having the packets egress directly out of the VRF
ports. This is useful, for example, when a queuing discipline is
configured on the VRF device.

In order to avoid a NULL pointer dereference, commit b0e95ccd ("net:
vrf: protect changes to private data with rcu") made the pointers to the
dst entries RCU protected. As far as I can tell, this was needed because
back then the dst entries were released (and the pointers reset to NULL)
before removing the VRF ports.

Later on, commit f630c38e ("vrf: fix bug_on triggered by rx when
destroying a vrf") moved the removal of the VRF ports to the VRF
device's dellink() callback. As such, the tear down sequence of a VRF
device looks as follows:

1. VRF ports are removed.
2. VRF device is unregistered.
    a. Device is closed.
    b. An RCU grace period passes.
    c. ndo_uninit() is called.
        i. dst entries are released.

Given the above, the Tx path will always see the same fully initialized
dst entries and will never race with the ndo_uninit() callback.

Therefore, there is no need to make the pointers to the dst entries RCU
protected. Remove it as well as the unnecessary NULL checks in the Tx
path.

Signed-off-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260326203233.1128554-4-idosch@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 50504e25
Loading
Loading
Loading
Loading
+12 −44
Original line number Diff line number Diff line
@@ -112,8 +112,8 @@ struct netns_vrf {
};

struct net_vrf {
	struct rtable __rcu	*rth;
	struct rt6_info	__rcu	*rt6;
	struct rtable		*rth;
	struct rt6_info		*rt6;
#if IS_ENABLED(CONFIG_IPV6)
	struct fib6_table	*fib6_table;
#endif
@@ -648,26 +648,13 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
					    struct sk_buff *skb)
{
	struct net_vrf *vrf = netdev_priv(vrf_dev);
	struct dst_entry *dst = NULL;
	struct rt6_info *rt6;

	rcu_read_lock();

	rt6 = rcu_dereference(vrf->rt6);
	if (likely(rt6)) {
		dst = &rt6->dst;
		dst_hold(dst);
	}

	rcu_read_unlock();

	if (unlikely(!dst)) {
		vrf_tx_error(vrf_dev, skb);
		return NULL;
	}
	rt6 = vrf->rt6;
	dst_hold(&rt6->dst);

	skb_dst_drop(skb);
	skb_dst_set(skb, dst);
	skb_dst_set(skb, &rt6->dst);

	return skb;
}
@@ -750,10 +737,7 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
	struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);

	RCU_INIT_POINTER(vrf->rt6, NULL);
	synchronize_rcu();
	struct rt6_info *rt6 = vrf->rt6;

	if (rt6) {
		dst_dev_put(&rt6->dst);
@@ -784,7 +768,7 @@ static int vrf_rt6_create(struct net_device *dev)

	rt6->dst.output	= vrf_output6;

	rcu_assign_pointer(vrf->rt6, rt6);
	vrf->rt6 = rt6;

	rc = 0;
out:
@@ -870,26 +854,13 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
					   struct sk_buff *skb)
{
	struct net_vrf *vrf = netdev_priv(vrf_dev);
	struct dst_entry *dst = NULL;
	struct rtable *rth;

	rcu_read_lock();

	rth = rcu_dereference(vrf->rth);
	if (likely(rth)) {
		dst = &rth->dst;
		dst_hold(dst);
	}

	rcu_read_unlock();

	if (unlikely(!dst)) {
		vrf_tx_error(vrf_dev, skb);
		return NULL;
	}
	rth = vrf->rth;
	dst_hold(&rth->dst);

	skb_dst_drop(skb);
	skb_dst_set(skb, dst);
	skb_dst_set(skb, &rth->dst);

	return skb;
}
@@ -989,10 +960,7 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
	struct rtable *rth = rtnl_dereference(vrf->rth);

	RCU_INIT_POINTER(vrf->rth, NULL);
	synchronize_rcu();
	struct rtable *rth = vrf->rth;

	dst_dev_put(&rth->dst);
	dst_release(&rth->dst);
@@ -1013,7 +981,7 @@ static int vrf_rtable_create(struct net_device *dev)

	rth->dst.output	= vrf_output;

	rcu_assign_pointer(vrf->rth, rth);
	vrf->rth = rth;

	return 0;
}