Commit 90661365 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

perf: Unify perf_event_free_task() / perf_event_exit_task_context()



Both perf_event_free_task() and perf_event_exit_task_context() are
very similar, except perf_event_exit_task_context() is a little more
generic / makes less assumptions.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Link: https://lkml.kernel.org/r/20250307193723.274039710@infradead.org
parent 3e8671e0
Loading
Loading
Loading
Loading
+25 −68
Original line number Diff line number Diff line
@@ -13742,13 +13742,11 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
	perf_event_wakeup(event);
}

static void perf_event_exit_task_context(struct task_struct *child)
static void perf_event_exit_task_context(struct task_struct *child, bool exit)
{
	struct perf_event_context *child_ctx, *clone_ctx = NULL;
	struct perf_event *child_event, *next;

	WARN_ON_ONCE(child != current);

	child_ctx = perf_pin_task_context(child);
	if (!child_ctx)
		return;
@@ -13771,6 +13769,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
	 * in.
	 */
	raw_spin_lock_irq(&child_ctx->lock);
	if (exit)
		task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);

	/*
@@ -13780,7 +13779,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
	RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
	put_ctx(child_ctx); /* cannot be last */
	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
	put_task_struct(current); /* cannot be last */
	put_task_struct(child); /* cannot be last */

	clone_ctx = unclone_ctx(child_ctx);
	raw_spin_unlock_irq(&child_ctx->lock);
@@ -13793,6 +13792,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
	 * won't get any samples after PERF_RECORD_EXIT. We can however still
	 * get a few PERF_RECORD_READ events.
	 */
	if (exit)
		perf_event_task(child, child_ctx, 0);

	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
@@ -13800,6 +13800,23 @@ static void perf_event_exit_task_context(struct task_struct *child)

	mutex_unlock(&child_ctx->mutex);

	if (!exit) {
		/*
		 * perf_event_release_kernel() could still have a reference on
		 * this context. In that case we must wait for these events to
		 * have been freed (in particular all their references to this
		 * task must've been dropped).
		 *
		 * Without this copy_process() will unconditionally free this
		 * task (irrespective of its reference count) and
		 * _free_event()'s put_task_struct(event->hw.target) will be a
		 * use-after-free.
		 *
		 * Wait for all events to drop their context reference.
		 */
		wait_var_event(&child_ctx->refcount,
			       refcount_read(&child_ctx->refcount) == 1);
	}
	put_ctx(child_ctx);
}

@@ -13827,7 +13844,7 @@ void perf_event_exit_task(struct task_struct *child)
	}
	mutex_unlock(&child->perf_event_mutex);

	perf_event_exit_task_context(child);
	perf_event_exit_task_context(child, true);

	/*
	 * The perf_event_exit_task_context calls perf_event_task
@@ -13844,25 +13861,6 @@ void perf_event_exit_task(struct task_struct *child)
	detach_task_ctx_data(child);
}

static void perf_free_event(struct perf_event *event,
			    struct perf_event_context *ctx)
{
	struct perf_event *parent = event->parent;

	if (WARN_ON_ONCE(!parent))
		return;

	mutex_lock(&parent->child_mutex);
	list_del_init(&event->child_list);
	mutex_unlock(&parent->child_mutex);

	raw_spin_lock_irq(&ctx->lock);
	perf_group_detach(event);
	list_del_event(event, ctx);
	raw_spin_unlock_irq(&ctx->lock);
	put_event(event);
}

/*
 * Free a context as created by inheritance by perf_event_init_task() below,
 * used by fork() in case of fail.
@@ -13872,48 +13870,7 @@ static void perf_free_event(struct perf_event *event,
 */
void perf_event_free_task(struct task_struct *task)
{
	struct perf_event_context *ctx;
	struct perf_event *event, *tmp;

	ctx = rcu_access_pointer(task->perf_event_ctxp);
	if (!ctx)
		return;

	mutex_lock(&ctx->mutex);
	raw_spin_lock_irq(&ctx->lock);
	/*
	 * Destroy the task <-> ctx relation and mark the context dead.
	 *
	 * This is important because even though the task hasn't been
	 * exposed yet the context has been (through child_list).
	 */
	RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
	WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
	put_task_struct(task); /* cannot be last */
	raw_spin_unlock_irq(&ctx->lock);


	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
		perf_free_event(event, ctx);

	mutex_unlock(&ctx->mutex);

	/*
	 * perf_event_release_kernel() could've stolen some of our
	 * child events and still have them on its free_list. In that
	 * case we must wait for these events to have been freed (in
	 * particular all their references to this task must've been
	 * dropped).
	 *
	 * Without this copy_process() will unconditionally free this
	 * task (irrespective of its reference count) and
	 * _free_event()'s put_task_struct(event->hw.target) will be a
	 * use-after-free.
	 *
	 * Wait for all events to drop their context reference.
	 */
	wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
	put_ctx(ctx); /* must be last */
	perf_event_exit_task_context(task, false);
}

void perf_event_delayed_put(struct task_struct *task)