Commit a6f19063 authored by Jesper Dangaard Brouer's avatar Jesper Dangaard Brouer Committed by Jakub Kicinski
Browse files

net: track pfmemalloc drops via SKB_DROP_REASON_PFMEMALLOC



Add a new SKB drop reason (SKB_DROP_REASON_PFMEMALLOC) to track packets
dropped due to memory pressure. In production environments, we've observed
memory exhaustion reported by memory layer stack traces, but these drops
were not properly tracked in the SKB drop reason infrastructure.

While most network code paths now properly report pfmemalloc drops, some
protocol-specific socket implementations still use sk_filter() without
drop reason tracking:
- Bluetooth L2CAP sockets
- CAIF sockets
- IUCV sockets
- Netlink sockets
- SCTP sockets
- Unix domain sockets

These remaining cases represent less common paths and could be converted
in a follow-up patch if needed. The current implementation provides
significantly improved observability into memory pressure events in the
network stack, especially for key protocols like TCP and UDP, helping to
diagnose problems in production environments.

Reported-by: default avatarMatt Fleming <mfleming@cloudflare.com>
Signed-off-by: default avatarJesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/175268316579.2407873.11634752355644843509.stgit@firesoul


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 8b7ab8eb
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
@@ -1002,8 +1002,8 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
/* Net device start xmit */
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
	enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
	struct tun_struct *tun = netdev_priv(dev);
	enum skb_drop_reason drop_reason;
	int txq = skb->queue_mapping;
	struct netdev_queue *queue;
	struct tun_file *tfile;
@@ -1032,10 +1032,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
	}

	if (tfile->socket.sk->sk_filter &&
	    sk_filter(tfile->socket.sk, skb)) {
		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
	    sk_filter_reason(tfile->socket.sk, skb, &drop_reason))
		goto drop;
	}

	len = run_ebpf_filter(tun, skb, len);
	if (len == 0) {
+12 −2
Original line number Diff line number Diff line
@@ -1073,10 +1073,20 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
	return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
}

int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap,
		       enum skb_drop_reason *reason);

static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	return sk_filter_trim_cap(sk, skb, 1);
	enum skb_drop_reason ignore_reason;

	return sk_filter_trim_cap(sk, skb, 1, &ignore_reason);
}

static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb,
				   enum skb_drop_reason *reason)
{
	return sk_filter_trim_cap(sk, skb, 1, reason);
}

struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
+6 −0
Original line number Diff line number Diff line
@@ -125,6 +125,7 @@
	FN(CAN_RX_INVALID_FRAME)	\
	FN(CANFD_RX_INVALID_FRAME)	\
	FN(CANXL_RX_INVALID_FRAME)	\
	FN(PFMEMALLOC)	\
	FNe(MAX)

/**
@@ -598,6 +599,11 @@ enum skb_drop_reason {
	 * non conform CAN-XL frame (or device is unable to receive CAN frames)
	 */
	SKB_DROP_REASON_CANXL_RX_INVALID_FRAME,
	/**
	 * @SKB_DROP_REASON_PFMEMALLOC: packet allocated from memory reserve
	 * reached a path or socket not eligible for use of memory reserves
	 */
	SKB_DROP_REASON_PFMEMALLOC,
	/**
	 * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
	 * shouldn't be used as a real 'reason' - only for tracing code gen
+1 −1
Original line number Diff line number Diff line
@@ -1559,7 +1559,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
		     enum skb_drop_reason *reason);


int tcp_filter(struct sock *sk, struct sk_buff *skb);
int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason);
void tcp_set_state(struct sock *sk, int state);
void tcp_done(struct sock *sk);
int tcp_abort(struct sock *sk, int err);
+6 −2
Original line number Diff line number Diff line
@@ -5749,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
				    struct packet_type **ppt_prev)
{
	enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO;
	struct packet_type *ptype, *pt_prev;
	rx_handler_func_t *rx_handler;
	struct sk_buff *skb = *pskb;
@@ -5840,8 +5841,10 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
#endif
	skb_reset_redirect(skb);
skip_classify:
	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
	if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) {
		drop_reason = SKB_DROP_REASON_PFMEMALLOC;
		goto drop;
	}

	if (skb_vlan_tag_present(skb)) {
		if (pt_prev) {
@@ -5946,7 +5949,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
			dev_core_stats_rx_dropped_inc(skb->dev);
		else
			dev_core_stats_rx_nohandler_inc(skb->dev);
		kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);

		kfree_skb_reason(skb, drop_reason);
		/* Jamal, now you will not able to escape explaining
		 * me how you were going to use this. :-)
		 */
Loading