Commit 01857fc7 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-sched-refactor-qdisc-drop-reasons-into-dedicated-tracepoint'

Jesper Dangaard Brouer says:

====================
net: sched: refactor qdisc drop reasons into dedicated tracepoint

This series refactors qdisc drop reason handling by introducing a dedicated
enum qdisc_drop_reason and trace_qdisc_drop tracepoint, providing qdisc
layer drop diagnostics with direct qdisc context visibility.

Background:
-----------
Identifying which qdisc dropped a packet via skb_drop_reason is difficult.
Normally, the kfree_skb tracepoint caller "location" hints at the dropping
code, but qdisc drops happen at a central point (__dev_queue_xmit), making
this unusable. As a workaround, commits 5765c7f6 ("net_sched: sch_fq:
add three drop_reason") and a42d71e3 ("net_sched: sch_cake: Add drop
reasons") encoded qdisc names directly in the drop reason enums.

This series provides a cleaner solution by creating a dedicated qdisc
tracepoint that naturally includes qdisc context (handle, parent, kind).

Solution:
---------
Create a new tracepoint trace_qdisc_drop that builds on top of existing
trace_qdisc_enqueue infrastructure. It includes qdisc handle, parent,
qdisc kind (name), and device information directly.

The existing SKB_DROP_REASON_QDISC_DROP is retained for backwards
compatibility via kfree_skb_reason(). The qdisc-specific drop reasons
(QDISC_DROP_*) provide fine-grained detail via the new tracepoint.
The enum uses subsystem encoding (offset by SKB_DROP_REASON_SUBSYS_QDISC)
to catch type mismatches during debugging.

This implements the alternative approach described in:
https://lore.kernel.org/all/6be17a08-f8aa-4f91-9bd0-d9e1f0a92d90@kernel.org/
====================

Link: https://patch.msgid.link/177211325634.3011628.9343837509740374154.stgit@firesoul


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 52d534aa 67713dff
Loading
Loading
Loading
Loading
+4 −44
Original line number Diff line number Diff line
@@ -68,12 +68,6 @@
	FN(SECURITY_HOOK)		\
	FN(QDISC_DROP)			\
	FN(QDISC_BURST_DROP)		\
	FN(QDISC_OVERLIMIT)		\
	FN(QDISC_CONGESTED)		\
	FN(CAKE_FLOOD)			\
	FN(FQ_BAND_LIMIT)		\
	FN(FQ_HORIZON_LIMIT)		\
	FN(FQ_FLOW_LIMIT)		\
	FN(CPU_BACKLOG)			\
	FN(XDP)				\
	FN(TC_INGRESS)			\
@@ -127,7 +121,6 @@
	FN(CANFD_RX_INVALID_FRAME)	\
	FN(CANXL_RX_INVALID_FRAME)	\
	FN(PFMEMALLOC)	\
	FN(DUALPI2_STEP_DROP)		\
	FN(PSP_INPUT)			\
	FN(PSP_OUTPUT)			\
	FNe(MAX)
@@ -371,8 +364,10 @@ enum skb_drop_reason {
	/** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
	SKB_DROP_REASON_SECURITY_HOOK,
	/**
	 * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
	 * failed to enqueue to current qdisc)
	 * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc during enqueue or
	 * dequeue. More specific drop reasons are available via the
	 * qdisc:qdisc_drop tracepoint, which also provides qdisc handle
	 * and name for identifying the source.
	 */
	SKB_DROP_REASON_QDISC_DROP,
	/**
@@ -380,36 +375,6 @@ enum skb_drop_reason {
	 * limit is hit.
	 */
	SKB_DROP_REASON_QDISC_BURST_DROP,
	/**
	 * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc
	 * instance exceeds its total buffer size limit.
	 */
	SKB_DROP_REASON_QDISC_OVERLIMIT,
	/**
	 * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm
	 * due to congestion.
	 */
	SKB_DROP_REASON_QDISC_CONGESTED,
	/**
	 * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of
	 * CAKE qdisc AQM algorithm (BLUE).
	 */
	SKB_DROP_REASON_CAKE_FLOOD,
	/**
	 * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band
	 * limit is reached.
	 */
	SKB_DROP_REASON_FQ_BAND_LIMIT,
	/**
	 * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet
	 * timestamp is too far in the future.
	 */
	SKB_DROP_REASON_FQ_HORIZON_LIMIT,
	/**
	 * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow
	 * exceeds its limits.
	 */
	SKB_DROP_REASON_FQ_FLOW_LIMIT,
	/**
	 * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU
	 * backlog queue. This can be caused by backlog queue full (see
@@ -613,11 +578,6 @@ enum skb_drop_reason {
	 * reached a path or socket not eligible for use of memory reserves
	 */
	SKB_DROP_REASON_PFMEMALLOC,
	/**
	 * @SKB_DROP_REASON_DUALPI2_STEP_DROP: dropped by the step drop
	 * threshold of DualPI2 qdisc.
	 */
	SKB_DROP_REASON_DUALPI2_STEP_DROP,
	/** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */
	SKB_DROP_REASON_PSP_INPUT,
	/** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */
+114 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-or-later */

#ifndef _LINUX_DROPREASON_QDISC_H
#define _LINUX_DROPREASON_QDISC_H
#include <net/dropreason.h>

#define DEFINE_QDISC_DROP_REASON(FN, FNe)	\
	FN(UNSPEC)			\
	FN(GENERIC)			\
	FN(OVERLIMIT)			\
	FN(CONGESTED)			\
	FN(MAXFLOWS)			\
	FN(FLOOD_PROTECTION)		\
	FN(BAND_LIMIT)		\
	FN(HORIZON_LIMIT)		\
	FN(FLOW_LIMIT)			\
	FN(L4S_STEP_NON_ECN)		\
	FNe(MAX)

#undef FN
#undef FNe
#define FN(reason)	QDISC_DROP_##reason,
#define FNe(reason)	QDISC_DROP_##reason

/**
 * enum qdisc_drop_reason - reason why a qdisc dropped a packet
 *
 * Qdisc-specific drop reasons for packet drops that occur within the
 * traffic control (TC) queueing discipline layer. These reasons provide
 * detailed diagnostics about why packets were dropped by various qdisc
 * algorithms, enabling fine-grained monitoring and troubleshooting of
 * queue behavior.
 */
enum qdisc_drop_reason {
	/**
	 * @QDISC_DROP_UNSPEC: unspecified/invalid qdisc drop reason.
	 * Value 0 serves as analogous to SKB_NOT_DROPPED_YET for enum skb_drop_reason.
	 * Used for catching zero-initialized drop_reason fields.
	 */
	QDISC_DROP_UNSPEC = 0,
	/**
	 * @__QDISC_DROP_REASON: subsystem base value for qdisc drop reasons
	 */
	__QDISC_DROP_REASON = SKB_DROP_REASON_SUBSYS_QDISC <<
				SKB_DROP_REASON_SUBSYS_SHIFT,
	/**
	 * @QDISC_DROP_GENERIC: generic/default qdisc drop, used when no
	 * more specific reason applies
	 */
	QDISC_DROP_GENERIC,
	/**
	 * @QDISC_DROP_OVERLIMIT: packet dropped because the qdisc queue
	 * length exceeded its configured limit (sch->limit). This typically
	 * indicates the queue is full and cannot accept more packets.
	 */
	QDISC_DROP_OVERLIMIT,
	/**
	 * @QDISC_DROP_CONGESTED: packet dropped due to active congestion
	 * control algorithms (e.g., CoDel, PIE, RED) detecting network
	 * congestion. The qdisc proactively dropped the packet to signal
	 * congestion to the sender and prevent bufferbloat.
	 */
	QDISC_DROP_CONGESTED,
	/**
	 * @QDISC_DROP_MAXFLOWS: packet dropped because the qdisc's flow
	 * tracking table is full and no free slots are available to allocate
	 * for a new flow. This indicates flow table exhaustion in flow-based
	 * qdiscs that maintain per-flow state (e.g., SFQ).
	 */
	QDISC_DROP_MAXFLOWS,
	/**
	 * @QDISC_DROP_FLOOD_PROTECTION: packet dropped by flood protection
	 * mechanism detecting unresponsive flows (potential DoS/flood).
	 * Used by qdiscs implementing probabilistic drop algorithms like
	 * BLUE (e.g., CAKE's Cobalt AQM).
	 */
	QDISC_DROP_FLOOD_PROTECTION,
	/**
	 * @QDISC_DROP_BAND_LIMIT: packet dropped because the priority band's
	 * limit was reached. Used by qdiscs with priority bands that have
	 * per-band packet limits (e.g., FQ).
	 */
	QDISC_DROP_BAND_LIMIT,
	/**
	 * @QDISC_DROP_HORIZON_LIMIT: packet dropped because its timestamp
	 * is too far in the future (beyond the configured horizon).
	 * Used by qdiscs with time-based scheduling (e.g., FQ).
	 */
	QDISC_DROP_HORIZON_LIMIT,
	/**
	 * @QDISC_DROP_FLOW_LIMIT: packet dropped because an individual flow
	 * exceeded its per-flow packet/depth limit. Used by FQ and SFQ qdiscs
	 * to enforce per-flow fairness and prevent a single flow from
	 * monopolizing queue resources.
	 */
	QDISC_DROP_FLOW_LIMIT,
	/**
	 * @QDISC_DROP_L4S_STEP_NON_ECN: DualPI2 qdisc dropped a non-ECN-capable
	 * packet because the L4S queue delay exceeded the step threshold.
	 * Since the packet cannot be ECN-marked, it must be dropped to signal
	 * congestion. See RFC 9332 for the DualQ Coupled AQM step mechanism.
	 */
	QDISC_DROP_L4S_STEP_NON_ECN,
	/**
	 * @QDISC_DROP_MAX: the maximum of qdisc drop reasons, which
	 * shouldn't be used as a real 'reason' - only for tracing code gen
	 */
	QDISC_DROP_MAX,
};

#undef FN
#undef FNe

#endif
+6 −0
Original line number Diff line number Diff line
@@ -23,6 +23,12 @@ enum skb_drop_reason_subsys {
	 */
	SKB_DROP_REASON_SUBSYS_OPENVSWITCH,

	/**
	 * @SKB_DROP_REASON_SUBSYS_QDISC: TC qdisc drop reasons,
	 * see include/net/dropreason-qdisc.h
	 */
	SKB_DROP_REASON_SUBSYS_QDISC,

	/** @SKB_DROP_REASON_SUBSYS_NUM: number of subsystems defined */
	SKB_DROP_REASON_SUBSYS_NUM
};
+30 −13
Original line number Diff line number Diff line
@@ -20,12 +20,15 @@
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
#include <linux/xarray.h>
#include <net/dropreason-qdisc.h>

struct Qdisc_ops;
struct qdisc_walker;
struct tcf_walker;
struct module;
struct bpf_flow_keys;
struct Qdisc;
struct netdev_queue;

struct qdisc_rate_table {
	struct tc_ratespec rate;
@@ -1106,36 +1109,50 @@ static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
	return cb;
}

/* TC classifier accessors - use enum skb_drop_reason */
static inline enum skb_drop_reason
tcf_get_drop_reason(const struct sk_buff *skb)
{
	return tc_skb_cb(skb)->drop_reason;
	return (enum skb_drop_reason)tc_skb_cb(skb)->drop_reason;
}

static inline void tcf_set_drop_reason(const struct sk_buff *skb,
				       enum skb_drop_reason reason)
{
	tc_skb_cb(skb)->drop_reason = reason;
	tc_skb_cb(skb)->drop_reason = (enum qdisc_drop_reason)reason;
}

static inline void tcf_kfree_skb_list(struct sk_buff *skb)
/* Qdisc accessors - use enum qdisc_drop_reason */
static inline enum qdisc_drop_reason
tcf_get_qdisc_drop_reason(const struct sk_buff *skb)
{
	while (unlikely(skb)) {
		struct sk_buff *next = skb->next;
	return tc_skb_cb(skb)->drop_reason;
}

		prefetch(next);
		kfree_skb_reason(skb, tcf_get_drop_reason(skb));
		skb = next;
static inline void tcf_set_qdisc_drop_reason(const struct sk_buff *skb,
					     enum qdisc_drop_reason reason)
{
	tc_skb_cb(skb)->drop_reason = reason;
}

void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q,
			  struct netdev_queue *txq, struct net_device *dev);

static inline void tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q,
				      struct netdev_queue *txq,
				      struct net_device *dev)
{
	if (unlikely(skb))
		__tcf_kfree_skb_list(skb, q, txq, dev);
}

static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
				      enum skb_drop_reason reason)
				      enum qdisc_drop_reason reason)
{
	DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
	DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK);

	tcf_set_drop_reason(skb, reason);
	tcf_set_qdisc_drop_reason(skb, reason);
	skb->next = q->to_free;
	q->to_free = skb;
}
@@ -1312,9 +1329,9 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,

static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch,
				    struct sk_buff **to_free,
				    enum skb_drop_reason reason)
				    enum qdisc_drop_reason reason)
{
	tcf_set_drop_reason(skb, reason);
	tcf_set_qdisc_drop_reason(skb, reason);
	return qdisc_drop(skb, sch, to_free);
}

+51 −0
Original line number Diff line number Diff line
@@ -74,6 +74,57 @@ TRACE_EVENT(qdisc_enqueue,
		  __entry->ifindex, __entry->handle, __entry->parent, __entry->skbaddr)
);

#undef FN
#undef FNe
#define FN(reason)	TRACE_DEFINE_ENUM(QDISC_DROP_##reason);
#define FNe(reason)	TRACE_DEFINE_ENUM(QDISC_DROP_##reason);
DEFINE_QDISC_DROP_REASON(FN, FNe)

#undef FN
#undef FNe
#define FN(reason)	{ QDISC_DROP_##reason, #reason },
#define FNe(reason)	{ QDISC_DROP_##reason, #reason }

TRACE_EVENT(qdisc_drop,

	TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq,
		 struct net_device *dev, struct sk_buff *skb,
		 enum qdisc_drop_reason reason),

	TP_ARGS(qdisc, txq, dev, skb, reason),

	TP_STRUCT__entry(
		__field(struct Qdisc *, qdisc)
		__field(const struct netdev_queue *, txq)
		__field(void *,	skbaddr)
		__field(int, ifindex)
		__field(u32, handle)
		__field(u32, parent)
		__field(enum qdisc_drop_reason, reason)
		__string(kind, qdisc->ops->id)
	),

	TP_fast_assign(
		__entry->qdisc = qdisc;
		__entry->txq	 = txq;
		__entry->skbaddr = skb;
		__entry->ifindex = dev ? dev->ifindex : 0;
		__entry->handle	 = qdisc->handle;
		__entry->parent	 = qdisc->parent;
		__entry->reason	 = reason;
		__assign_str(kind);
	),

	TP_printk("drop ifindex=%d kind=%s handle=0x%X parent=0x%X skbaddr=%p reason=%s",
		  __entry->ifindex, __get_str(kind), __entry->handle,
		  __entry->parent, __entry->skbaddr,
		  __print_symbolic(__entry->reason,
				   DEFINE_QDISC_DROP_REASON(FN, FNe)))
);

#undef FN
#undef FNe

TRACE_EVENT(qdisc_reset,

	TP_PROTO(struct Qdisc *q),
Loading