Commit c353e898 authored by Paolo Abeni's avatar Paolo Abeni Committed by Jakub Kicinski
Browse files

net: introduce per netns packet chains



Currently network taps unbound to any interface are linked in the
global ptype_all list, affecting the performance in all the network
namespaces.

Add per netns ptypes chains, so that in the mentioned case only
the netns owning the packet socket(s) is affected.

While at that drop the global ptype_all list: no in kernel user
registers a tap on "any" type without specifying either the target
device or the target namespace (and IMHO doing that would not make
any sense).

Note that this adds a conditional in the fast path (to check for
per netns ptype_specific list) and increases the dataset size by
a cacheline (owing the per netns lists).

Reviewed-by: default avatarSabrina Dubroca <sd@queasysnail.net>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Reviewed-by: default avatarEric Dumazet <edumaze@google.com>
Link: https://patch.msgid.link/ae405f98875ee87f8150c460ad162de7e466f8a7.1742494826.git.pabeni@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 29abdf66
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -4278,7 +4278,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
	return 0;
}

bool dev_nit_active(struct net_device *dev);
bool dev_nit_active_rcu(const struct net_device *dev);
static inline bool dev_nit_active(const struct net_device *dev)
{
	bool ret;

	rcu_read_lock();
	ret = dev_nit_active_rcu(dev);
	rcu_read_unlock();
	return ret;
}

void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);

static inline void __dev_put(struct net_device *dev)
+0 −1
Original line number Diff line number Diff line
@@ -23,7 +23,6 @@ struct net_hotdata {
	struct net_offload	udpv6_offload;
#endif
	struct list_head	offload_base;
	struct list_head	ptype_all;
	struct kmem_cache	*skbuff_cache;
	struct kmem_cache	*skbuff_fclone_cache;
	struct kmem_cache	*skb_small_head_cache;
+3 −0
Original line number Diff line number Diff line
@@ -83,6 +83,9 @@ struct net {
	struct llist_node	defer_free_list;
	struct llist_node	cleanup_list;	/* namespaces on death row */

	struct list_head ptype_all;
	struct list_head ptype_specific;

#ifdef CONFIG_KEYS
	struct key_tag		*key_domain;	/* Key domain of operation tag */
#endif
+41 −12
Original line number Diff line number Diff line
@@ -572,10 +572,18 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)

static inline struct list_head *ptype_head(const struct packet_type *pt)
{
	if (pt->type == htons(ETH_P_ALL))
		return pt->dev ? &pt->dev->ptype_all : &net_hotdata.ptype_all;
	else
		return pt->dev ? &pt->dev->ptype_specific :
	if (pt->type == htons(ETH_P_ALL)) {
		if (!pt->af_packet_net && !pt->dev)
			return NULL;

		return pt->dev ? &pt->dev->ptype_all :
				 &pt->af_packet_net->ptype_all;
	}

	if (pt->dev)
		return &pt->dev->ptype_specific;

	return pt->af_packet_net ? &pt->af_packet_net->ptype_specific :
				 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}

@@ -596,6 +604,9 @@ void dev_add_pack(struct packet_type *pt)
{
	struct list_head *head = ptype_head(pt);

	if (WARN_ON_ONCE(!head))
		return;

	spin_lock(&ptype_lock);
	list_add_rcu(&pt->list, head);
	spin_unlock(&ptype_lock);
@@ -620,6 +631,9 @@ void __dev_remove_pack(struct packet_type *pt)
	struct list_head *head = ptype_head(pt);
	struct packet_type *pt1;

	if (!head)
		return;

	spin_lock(&ptype_lock);

	list_for_each_entry(pt1, head, list) {
@@ -2441,16 +2455,21 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
}

/**
 * dev_nit_active - return true if any network interface taps are in use
 * dev_nit_active_rcu - return true if any network interface taps are in use
 *
 * The caller must hold the RCU lock
 *
 * @dev: network device to check for the presence of taps
 */
bool dev_nit_active(struct net_device *dev)
bool dev_nit_active_rcu(const struct net_device *dev)
{
	return !list_empty(&net_hotdata.ptype_all) ||
	/* Callers may hold either RCU or RCU BH lock */
	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());

	return !list_empty(&dev_net(dev)->ptype_all) ||
	       !list_empty(&dev->ptype_all);
}
EXPORT_SYMBOL_GPL(dev_nit_active);
EXPORT_SYMBOL_GPL(dev_nit_active_rcu);

/*
 *	Support routine. Sends outgoing frames to any network
@@ -2459,11 +2478,12 @@ EXPORT_SYMBOL_GPL(dev_nit_active);

void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
	struct list_head *ptype_list = &net_hotdata.ptype_all;
	struct packet_type *ptype, *pt_prev = NULL;
	struct list_head *ptype_list;
	struct sk_buff *skb2 = NULL;

	rcu_read_lock();
	ptype_list = &dev_net_rcu(dev)->ptype_all;
again:
	list_for_each_entry_rcu(ptype, ptype_list, list) {
		if (READ_ONCE(ptype->ignore_outgoing))
@@ -2507,7 +2527,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
		pt_prev = ptype;
	}

	if (ptype_list == &net_hotdata.ptype_all) {
	if (ptype_list != &dev->ptype_all) {
		ptype_list = &dev->ptype_all;
		goto again;
	}
@@ -3752,7 +3772,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
	unsigned int len;
	int rc;

	if (dev_nit_active(dev))
	if (dev_nit_active_rcu(dev))
		dev_queue_xmit_nit(skb, dev);

	len = skb->len;
@@ -5696,7 +5716,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
	if (pfmemalloc)
		goto skip_taps;

	list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
	list_for_each_entry_rcu(ptype, &dev_net_rcu(skb->dev)->ptype_all,
				list) {
		if (pt_prev)
			ret = deliver_skb(skb, pt_prev, orig_dev);
		pt_prev = ptype;
@@ -5808,6 +5829,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
				       &ptype_base[ntohs(type) &
						   PTYPE_HASH_MASK]);

		/* orig_dev and skb->dev could belong to different netns;
		 * Even in such case we need to traverse only the list
		 * coming from skb->dev, as the ptype owner (packet socket)
		 * will use dev_net(skb->dev) to do namespace filtering.
		 */
		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
				       &dev_net_rcu(skb->dev)->ptype_specific);
	}

	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+0 −1
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@

struct net_hotdata net_hotdata __cacheline_aligned = {
	.offload_base = LIST_HEAD_INIT(net_hotdata.offload_base),
	.ptype_all = LIST_HEAD_INIT(net_hotdata.ptype_all),
	.gro_normal_batch = 8,

	.netdev_budget = 300,
Loading