Commit 65e4efa0 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-drop-rx-socket-tracepoint'



Yan Zhai says:

====================
net: pass receive socket to drop tracepoint

We set up our production packet drop monitoring around the kfree_skb
tracepoint. While this tracepoint is extremely valuable for diagnosing
critical problems, it also has some limitation with drops on the local
receive path: this tracepoint can only inspect the dropped skb itself,
but such skb might not carry enough information to:

1. determine in which netns/container this skb gets dropped
2. determine by which socket/service this skb oughts to be received

The 1st issue is because skb->dev is the only member field with valid
netns reference. But skb->dev can get cleared or reused. For example,
tcp_v4_rcv will clear skb->dev and in later processing it might be reused
for OFO tree.

The 2nd issue is because there is no reference on an skb that reliably
points to a receiving socket. skb->sk usually points to the local
sending socket, and it only points to a receive socket briefly after
early demux stage, yet the socket can get stolen later. For certain drop
reason like TCP OFO_MERGE, Zerowindow, UDP at PROTO_MEM error, etc, it
is hard to infer which receiving socket is impacted. This cannot be
overcome by simply looking at the packet header, because of
complications like sk lookup programs. In the past, single purpose
tracepoints like trace_udp_fail_queue_rcv_skb, trace_sock_rcvqueue_full,
etc are added as needed to provide more visibility. This could be
handled in a more generic way.

In this change set we propose a new 'sk_skb_reason_drop' call as a drop-in
replacement for kfree_skb_reason at various local input path. It accepts
an extra receiving socket argument. Both issues above can be resolved
via this new argument.

V4->V5: rename rx_skaddr to rx_sk to be more clear visually, suggested
by Jesper Dangaard Brouer.

V3->V4: adjusted the TP_STRUCT field order to align better, suggested by
Steven Rostedt.

V2->V3: fixed drop_monitor function signatures; fixed a few uninitialized sks;
Added a few missing report tags from test bots (also noticed by Dan
Carpenter and Simon Horman).

V1->V2: instead of using skb->cb, directly add the needed argument to
trace_kfree_skb tracepoint. Also renamed functions as Eric Dumazet
suggested.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6f46fc9b e2e7d78d
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -1251,8 +1251,14 @@ static inline bool skb_data_unref(const struct sk_buff *skb,
	return true;
}

void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb,
				      enum skb_drop_reason reason);

static inline void
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
	sk_skb_reason_drop(NULL, skb, reason);
}

/**
 *	kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
+7 −4
Original line number Diff line number Diff line
@@ -24,13 +24,14 @@ DEFINE_DROP_REASON(FN, FN)
TRACE_EVENT(kfree_skb,

	TP_PROTO(struct sk_buff *skb, void *location,
		 enum skb_drop_reason reason),
		 enum skb_drop_reason reason, struct sock *rx_sk),

	TP_ARGS(skb, location, reason),
	TP_ARGS(skb, location, reason, rx_sk),

	TP_STRUCT__entry(
		__field(void *,		skbaddr)
		__field(void *,		location)
		__field(void *,		rx_sk)
		__field(unsigned short,	protocol)
		__field(enum skb_drop_reason,	reason)
	),
@@ -38,12 +39,14 @@ TRACE_EVENT(kfree_skb,
	TP_fast_assign(
		__entry->skbaddr = skb;
		__entry->location = location;
		__entry->rx_sk = rx_sk;
		__entry->protocol = ntohs(skb->protocol);
		__entry->reason = reason;
	),

	TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s",
		  __entry->skbaddr, __entry->protocol, __entry->location,
	TP_printk("skbaddr=%p rx_sk=%p protocol=%u location=%pS reason: %s",
		  __entry->skbaddr, __entry->rx_sk, __entry->protocol,
		  __entry->location,
		  __print_symbolic(__entry->reason,
				   DEFINE_DROP_REASON(FN, FNe)))
);
+1 −1
Original line number Diff line number Diff line
@@ -5234,7 +5234,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
				trace_consume_skb(skb, net_tx_action);
			else
				trace_kfree_skb(skb, net_tx_action,
						get_kfree_skb_cb(skb)->reason);
						get_kfree_skb_cb(skb)->reason, NULL);

			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
				__kfree_skb(skb);
+6 −3
Original line number Diff line number Diff line
@@ -109,7 +109,8 @@ static u32 net_dm_queue_len = 1000;
struct net_dm_alert_ops {
	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
				void *location,
				enum skb_drop_reason reason);
				enum skb_drop_reason reason,
				struct sock *rx_sk);
	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
				int work, int budget);
	void (*work_item_func)(struct work_struct *work);
@@ -264,7 +265,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)

static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
				void *location,
				enum skb_drop_reason reason)
				enum skb_drop_reason reason,
				struct sock *rx_sk)
{
	trace_drop_common(skb, location);
}
@@ -491,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
					      struct sk_buff *skb,
					      void *location,
					      enum skb_drop_reason reason)
					      enum skb_drop_reason reason,
					      struct sock *rx_sk)
{
	ktime_t tstamp = ktime_get_real();
	struct per_cpu_dm_data *data;
+12 −10
Original line number Diff line number Diff line
@@ -1190,7 +1190,8 @@ void __kfree_skb(struct sk_buff *skb)
EXPORT_SYMBOL(__kfree_skb);

static __always_inline
bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb,
			  enum skb_drop_reason reason)
{
	if (unlikely(!skb_unref(skb)))
		return false;
@@ -1203,26 +1204,27 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
	if (reason == SKB_CONSUMED)
		trace_consume_skb(skb, __builtin_return_address(0));
	else
		trace_kfree_skb(skb, __builtin_return_address(0), reason);
		trace_kfree_skb(skb, __builtin_return_address(0), reason, sk);
	return true;
}

/**
 *	kfree_skb_reason - free an sk_buff with special reason
 *	sk_skb_reason_drop - free an sk_buff with special reason
 *	@sk: the socket to receive @skb, or NULL if not applicable
 *	@skb: buffer to free
 *	@reason: reason why this skb is dropped
 *
 *	Drop a reference to the buffer and free it if the usage count has
 *	hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
 *	tracepoint.
 *	Drop a reference to the buffer and free it if the usage count has hit
 *	zero. Meanwhile, pass the receiving socket and drop reason to
 *	'kfree_skb' tracepoint.
 */
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason)
{
	if (__kfree_skb_reason(skb, reason))
	if (__sk_skb_reason_drop(sk, skb, reason))
		__kfree_skb(skb);
}
EXPORT_SYMBOL(kfree_skb_reason);
EXPORT_SYMBOL(sk_skb_reason_drop);

#define KFREE_SKB_BULK_SIZE	16

@@ -1261,7 +1263,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
	while (segs) {
		struct sk_buff *next = segs->next;

		if (__kfree_skb_reason(segs, reason)) {
		if (__sk_skb_reason_drop(NULL, segs, reason)) {
			skb_poison_list(segs);
			kfree_skb_add_bulk(segs, &sa, reason);
		}
Loading