Commit bd2da08d authored by Kan Liang's avatar Kan Liang Committed by Peter Zijlstra
Browse files

perf: Clean up pmu specific data



The pmu specific data is saved in task_struct now. Remove it from event
context structure.

Remove swap_task_ctx() as well.

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250314172700.438923-7-kan.liang@linux.intel.com
parent 1fbc6c8e
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -501,16 +501,6 @@ struct pmu {
	 */
	struct kmem_cache		*task_ctx_cache;

	/*
	 * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
	 * can be synchronized using this function. See Intel LBR callstack support
	 * implementation and Perf core context switch handling callbacks for usage
	 * examples.
	 */
	void (*swap_task_ctx)		(struct perf_event_pmu_context *prev_epc,
					 struct perf_event_pmu_context *next_epc);
					/* optional */

	/*
	 * Set up pmu-private data structures for an AUX area
	 */
@@ -933,7 +923,6 @@ struct perf_event_pmu_context {
	atomic_t			refcount; /* event <-> epc */
	struct rcu_head			rcu_head;

	void				*task_ctx_data; /* pmu specific data */
	/*
	 * Set when one or more (plausibly active) event can't be scheduled
	 * due to pmu overcommit or pmu constraints, except tolerant to
@@ -981,7 +970,6 @@ struct perf_event_context {
	int				nr_user;
	int				is_active;

	int				nr_task_data;
	int				nr_stat;
	int				nr_freq;
	int				rotate_disable;
+3 −73
Original line number Diff line number Diff line
@@ -1254,20 +1254,6 @@ static void get_ctx(struct perf_event_context *ctx)
	refcount_inc(&ctx->refcount);
}

static void *alloc_task_ctx_data(struct pmu *pmu)
{
	if (pmu->task_ctx_cache)
		return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);

	return NULL;
}

static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
{
	if (pmu->task_ctx_cache && task_ctx_data)
		kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
}

static void free_ctx(struct rcu_head *head)
{
	struct perf_event_context *ctx;
@@ -3577,42 +3563,6 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
	}
}

#define double_list_for_each_entry(pos1, pos2, head1, head2, member)	\
	for (pos1 = list_first_entry(head1, typeof(*pos1), member),	\
	     pos2 = list_first_entry(head2, typeof(*pos2), member);	\
	     !list_entry_is_head(pos1, head1, member) &&		\
	     !list_entry_is_head(pos2, head2, member);			\
	     pos1 = list_next_entry(pos1, member),			\
	     pos2 = list_next_entry(pos2, member))

static void perf_event_swap_task_ctx_data(struct perf_event_context *prev_ctx,
					  struct perf_event_context *next_ctx)
{
	struct perf_event_pmu_context *prev_epc, *next_epc;

	if (!prev_ctx->nr_task_data)
		return;

	double_list_for_each_entry(prev_epc, next_epc,
				   &prev_ctx->pmu_ctx_list, &next_ctx->pmu_ctx_list,
				   pmu_ctx_entry) {

		if (WARN_ON_ONCE(prev_epc->pmu != next_epc->pmu))
			continue;

		/*
		 * PMU specific parts of task perf context can require
		 * additional synchronization. As an example of such
		 * synchronization see implementation details of Intel
		 * LBR call stack data profiling;
		 */
		if (prev_epc->pmu->swap_task_ctx)
			prev_epc->pmu->swap_task_ctx(prev_epc, next_epc);
		else
			swap(prev_epc->task_ctx_data, next_epc->task_ctx_data);
	}
}

static void perf_ctx_sched_task_cb(struct perf_event_context *ctx,
				   struct task_struct *task, bool sched_in)
{
@@ -3687,16 +3637,15 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
			WRITE_ONCE(next_ctx->task, task);

			perf_ctx_sched_task_cb(ctx, task, false);
			perf_event_swap_task_ctx_data(ctx, next_ctx);

			perf_ctx_enable(ctx, false);

			/*
			 * RCU_INIT_POINTER here is safe because we've not
			 * modified the ctx and the above modification of
			 * ctx->task and ctx->task_ctx_data are immaterial
			 * since those values are always verified under
			 * ctx->lock which we're now holding.
			 * ctx->task is immaterial since this value is
			 * always verified under ctx->lock which we're now
			 * holding.
			 */
			RCU_INIT_POINTER(task->perf_event_ctxp, next_ctx);
			RCU_INIT_POINTER(next->perf_event_ctxp, ctx);
@@ -5005,7 +4954,6 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
		     struct perf_event *event)
{
	struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc;
	void *task_ctx_data = NULL;

	if (!ctx->task) {
		/*
@@ -5038,14 +4986,6 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
	if (!new)
		return ERR_PTR(-ENOMEM);

	if (event->attach_state & PERF_ATTACH_TASK_DATA) {
		task_ctx_data = alloc_task_ctx_data(pmu);
		if (!task_ctx_data) {
			kfree(new);
			return ERR_PTR(-ENOMEM);
		}
	}

	__perf_init_event_pmu_context(new, pmu);

	/*
@@ -5080,14 +5020,7 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
	epc->ctx = ctx;

found_epc:
	if (task_ctx_data && !epc->task_ctx_data) {
		epc->task_ctx_data = task_ctx_data;
		task_ctx_data = NULL;
		ctx->nr_task_data++;
	}
	raw_spin_unlock_irq(&ctx->lock);

	free_task_ctx_data(pmu, task_ctx_data);
	kfree(new);

	return epc;
@@ -5103,7 +5036,6 @@ static void free_cpc_rcu(struct rcu_head *head)
	struct perf_cpu_pmu_context *cpc =
		container_of(head, typeof(*cpc), epc.rcu_head);

	kfree(cpc->epc.task_ctx_data);
	kfree(cpc);
}

@@ -5111,7 +5043,6 @@ static void free_epc_rcu(struct rcu_head *head)
{
	struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head);

	kfree(epc->task_ctx_data);
	kfree(epc);
}

@@ -14103,7 +14034,6 @@ inherit_event(struct perf_event *parent_event,
	if (is_orphaned_event(parent_event) ||
	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
		mutex_unlock(&parent_event->child_mutex);
		/* task_ctx_data is freed with child_ctx */
		free_event(child_event);
		return NULL;
	}