Commit 47e8dbb6 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net/sched: do not reset queues in graft operations



Following typical script is extremely disruptive,
because each graft operation calls dev_deactivate()
which resets all the queues of the device.

QPARAM="limit 100000 flow_limit 1000 buckets 4096"
TXQS=64
for ETH in eth1
do
 tc qd del dev $ETH root 2>/dev/null
 tc qd add dev $ETH root handle 1: mq
 for i in `seq 1 $TXQS`
 do
   slot=$( printf %x $(( i )) )
   tc qd add dev $ETH parent 1:$slot fq $QPARAM
 done
done

One can add "ip link set dev $ETH down/up" to reduce the disruption time:

QPARAM="limit 100000 flow_limit 1000 buckets 4096"
TXQS=64
for ETH in eth1
do
 ip link set dev $ETH down
 tc qd del dev $ETH root 2>/dev/null
 tc qd add dev $ETH root handle 1: mq
 for i in `seq 1 $TXQS`
 do
   slot=$( printf %x $(( i )) )
   tc qd add dev $ETH parent 1:$slot fq $QPARAM
 done
 ip link set dev $ETH up
done

Or we can add a @reset_needed flag to dev_deactivate() and
dev_deactivate_many().

This flag is set to true at device dismantle or linkwatch_do_dev(),
and to false for graft operations.

In the future, we might only stop one queue instead of the whole
device, ie call dev_deactivate_queue() instead of dev_deactivate().

I think the problem (quadratic behavior) was added in commit
2fb541c8 ("net: sch_generic: aviod concurrent reset and enqueue op
for lockless qdisc") but this does not look serious enough to deserve
risky backports.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: default avatarJamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: default avatarVictor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260307163430.470644-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 82f36517
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -710,8 +710,8 @@ void dev_qdisc_change_real_num_tx(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev);
void dev_shutdown(struct net_device *dev);
void dev_activate(struct net_device *dev);
void dev_deactivate(struct net_device *dev);
void dev_deactivate_many(struct list_head *head);
void dev_deactivate(struct net_device *dev, bool reset_needed);
void dev_deactivate_many(struct list_head *head, bool reset_needed);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
			      struct Qdisc *qdisc);
void qdisc_reset(struct Qdisc *qdisc);
+1 −1
Original line number Diff line number Diff line
@@ -1756,7 +1756,7 @@ static void __dev_close_many(struct list_head *head)
		smp_mb__after_atomic(); /* Commit netif_running(). */
	}

	dev_deactivate_many(head);
	dev_deactivate_many(head, true);

	list_for_each_entry(dev, head, close_list) {
		const struct net_device_ops *ops = dev->netdev_ops;
+1 −1
Original line number Diff line number Diff line
@@ -181,7 +181,7 @@ static void linkwatch_do_dev(struct net_device *dev)
		if (netif_carrier_ok(dev))
			dev_activate(dev);
		else
			dev_deactivate(dev);
			dev_deactivate(dev, true);

		netif_state_change(dev);
	}
+1 −1
Original line number Diff line number Diff line
@@ -1120,7 +1120,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
		}

		if (dev->flags & IFF_UP)
			dev_deactivate(dev);
			dev_deactivate(dev, false);

		qdisc_offload_graft_root(dev, new, old, extack);

+12 −8
Original line number Diff line number Diff line
@@ -1370,11 +1370,12 @@ static bool some_qdisc_is_busy(struct net_device *dev)
/**
 * 	dev_deactivate_many - deactivate transmissions on several devices
 * 	@head: list of devices to deactivate
 *	@reset_needed: qdisc should be reset if true.
 *
 *	This function returns only when all outstanding transmissions
 *	have completed, unless all devices are in dismantle phase.
 */
void dev_deactivate_many(struct list_head *head)
void dev_deactivate_many(struct list_head *head, bool reset_needed)
{
	bool sync_needed = false;
	struct net_device *dev;
@@ -1393,11 +1394,14 @@ void dev_deactivate_many(struct list_head *head)
	if (sync_needed)
		synchronize_net();

	if (reset_needed) {
		list_for_each_entry(dev, head, close_list) {
			netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);

			if (dev_ingress_queue(dev))
			dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
				dev_reset_queue(dev, dev_ingress_queue(dev),
						NULL);
		}
	}

	/* Wait for outstanding qdisc_run calls. */
@@ -1412,12 +1416,12 @@ void dev_deactivate_many(struct list_head *head)
	}
}

void dev_deactivate(struct net_device *dev)
void dev_deactivate(struct net_device *dev, bool reset_needed)
{
	LIST_HEAD(single);

	list_add(&dev->close_list, &single);
	dev_deactivate_many(&single);
	dev_deactivate_many(&single, reset_needed);
	list_del(&single);
}
EXPORT_SYMBOL(dev_deactivate);
@@ -1473,7 +1477,7 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev)
	int ret = 0;

	if (up)
		dev_deactivate(dev);
		dev_deactivate(dev, false);

	for (i = 0; i < dev->num_tx_queues; i++) {
		ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
Loading