Commit ffe4ccd3 authored by Eric Dumazet's avatar Eric Dumazet Committed by Paolo Abeni
Browse files

net: add net.core.qdisc_max_burst



In blamed commit, I added a check against the temporary queue
built in __dev_xmit_skb(). Idea was to drop packets early,
before any spinlock was acquired.

if (unlikely(defer_count > READ_ONCE(q->limit))) {
	kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
	return NET_XMIT_DROP;
}

It turned out that HTB Qdisc has a zero q->limit.
HTB limits packets on a per-class basis.
Some of our tests became flaky.

Add a new sysctl : net.core.qdisc_max_burst to control
how many packets can be stored in the temporary lockless queue.

Also add a new QDISC_BURST_DROP drop reason to better diagnose
future issues.

Thanks Neal !

Fixes: 100dfa74 ("net: dev_queue_xmit() llist adoption")
Reported-and-bisected-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarNeal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20260107104159.3669285-1-edumazet@google.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent dfdf7746
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -303,6 +303,14 @@ netdev_max_backlog
Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.

qdisc_max_burst
------------------

Maximum number of packets that can be temporarily stored before
reaching qdisc.

Default: 1000

netdev_rss_key
--------------

+6 −0
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@
	FN(TC_EGRESS)			\
	FN(SECURITY_HOOK)		\
	FN(QDISC_DROP)			\
	FN(QDISC_BURST_DROP)		\
	FN(QDISC_OVERLIMIT)		\
	FN(QDISC_CONGESTED)		\
	FN(CAKE_FLOOD)			\
@@ -374,6 +375,11 @@ enum skb_drop_reason {
	 * failed to enqueue to current qdisc)
	 */
	SKB_DROP_REASON_QDISC_DROP,
	/**
	 * @SKB_DROP_REASON_QDISC_BURST_DROP: dropped when net.core.qdisc_max_burst
	 * limit is hit.
	 */
	SKB_DROP_REASON_QDISC_BURST_DROP,
	/**
	 * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
	 * instance exceeds its total buffer size limit.
+1 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ struct net_hotdata {
	int			netdev_budget_usecs;
	int			tstamp_prequeue;
	int			max_backlog;
	int			qdisc_max_burst;
	int			dev_tx_weight;
	int			dev_rx_weight;
	int			sysctl_max_skb_frags;
+3 −3
Original line number Diff line number Diff line
@@ -4203,8 +4203,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
	do {
		if (first_n && !defer_count) {
			defer_count = atomic_long_inc_return(&q->defer_count);
			if (unlikely(defer_count > READ_ONCE(q->limit))) {
				kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_DROP);
			if (unlikely(defer_count > READ_ONCE(net_hotdata.qdisc_max_burst))) {
				kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_BURST_DROP);
				return NET_XMIT_DROP;
			}
		}
@@ -4222,7 +4222,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
	ll_list = llist_del_all(&q->defer_list);
	/* There is a small race because we clear defer_count not atomically
	 * with the prior llist_del_all(). This means defer_list could grow
	 * over q->limit.
	 * over qdisc_max_burst.
	 */
	atomic_long_set(&q->defer_count, 0);

+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ struct net_hotdata net_hotdata __cacheline_aligned = {

	.tstamp_prequeue = 1,
	.max_backlog = 1000,
	.qdisc_max_burst = 1000,
	.dev_tx_weight = 64,
	.dev_rx_weight = 64,
	.sysctl_max_skb_frags = MAX_SKB_FRAGS,
Loading