Commit e2d072d6 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'fix-bpf_link-grace-period-wait-for-tracepoints'

Kumar Kartikeya Dwivedi says:

====================
Fix bpf_link grace period wait for tracepoints

A recent change to non-faultable tracepoints switched from
preempt-disabled critical sections to SRCU-fast, which breaks
assumptions in the bpf_link_free() path. Use call_srcu() to fix the
breakage.

Changelog:
----------
v3 -> v4
v3: https://lore.kernel.org/bpf/20260331005215.2813492-1-memxor@gmail.com

 * Introduce call_tracepoint_unregister_{atomic,syscall} instead. (Alexei, Steven)

v2 -> v3
v2: https://lore.kernel.org/bpf/20260330143102.1265391-1-memxor@gmail.com

 * Introduce and switch to call_tracepoint_unregister_non_faultable(). (Steven)
 * Address Andrii's comment and add Acked-by. (Andrii)
 * Drop rcu_trace_implies_rcu_gp() conversion. (Alexei)

v1 -> v2
v1: https://lore.kernel.org/bpf/20260330032124.3141001-1-memxor@gmail.com

 * Add Reviewed-by tags. (Paul, Puranjay)
 * Adjust commit descriptions and comments to clarify intent. (Puranjay)
====================

Link: https://patch.msgid.link/20260331211021.1632902-1-memxor@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a8502a79 c76fef7d
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1854,6 +1854,10 @@ struct bpf_link_ops {
	 * target hook is sleepable, we'll go through tasks trace RCU GP and
	 * then "classic" RCU GP; this need for chaining tasks trace and
	 * classic RCU GPs is designated by setting bpf_link->sleepable flag
	 *
	 * For non-sleepable tracepoint links we go through SRCU gp instead,
	 * since RCU is not used in that case. Sleepable tracepoints still
	 * follow the scheme above.
	 */
	void (*dealloc_deferred)(struct bpf_link *link);
	int (*detach)(struct bpf_link *link);
+20 −0
Original line number Diff line number Diff line
@@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
	return tp->ext && tp->ext->faultable;
}
/*
 * Run RCU callback with the appropriate grace period wait for non-faultable
 * tracepoints, e.g., those used in atomic context.
 */
static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
{
	call_srcu(&tracepoint_srcu, rcu, func);
}
/*
 * Run RCU callback with the appropriate grace period wait for faultable
 * tracepoints, e.g., those used in syscall context.
 */
static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
{
	call_rcu_tasks_trace(rcu, func);
}
#else
static inline void tracepoint_synchronize_unregister(void)
{ }
@@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp)
{
	return false;
}
static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func)
{  }
static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func)
{  }
#endif

#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
+23 −2
Original line number Diff line number Diff line
@@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
	bpf_link_dealloc(link);
}

static bool bpf_link_is_tracepoint(struct bpf_link *link)
{
	/*
	 * Only these combinations support a tracepoint bpf_link.
	 * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use
	 * bpf_raw_tp_link_lops and thus dealloc_deferred(), see
	 * bpf_raw_tp_link_attach().
	 */
	return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT ||
	       (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP);
}

static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
{
	if (rcu_trace_implies_rcu_gp())
@@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link)
	if (link->prog)
		ops->release(link);
	if (ops->dealloc_deferred) {
		/* Schedule BPF link deallocation, which will only then
		/*
		 * Schedule BPF link deallocation, which will only then
		 * trigger putting BPF program refcount.
		 * If underlying BPF program is sleepable or BPF link's target
		 * attach hookpoint is sleepable or otherwise requires RCU GPs
		 * to ensure link and its underlying BPF program is not
		 * reachable anymore, we need to first wait for RCU tasks
		 * trace sync, and then go through "classic" RCU grace period
		 * trace sync, and then go through "classic" RCU grace period.
		 *
		 * For tracepoint BPF links, we need to go through SRCU grace
		 * period wait instead when non-faultable tracepoint is used. We
		 * don't need to chain SRCU grace period waits, however, for the
		 * faultable case, since it exclusively uses RCU Tasks Trace.
		 */
		if (link->sleepable || (link->prog && link->prog->sleepable))
			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
		/* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */
		else if (bpf_link_is_tracepoint(link))
			call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
		else
			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
	} else if (ops->dealloc) {