Commit 4eabf533 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

perf/core: Detach 'struct perf_cpu_pmu_context' and 'struct pmu' lifetimes



In prepration for being able to unregister a PMU with existing events,
it becomes important to detach struct perf_cpu_pmu_context lifetimes
from that of struct pmu.

Notably struct perf_cpu_pmu_context embeds a struct perf_event_pmu_context
that can stay referenced until the last event goes.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20241104135518.760214287@infradead.org
parent 0983593f
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -343,7 +343,7 @@ struct pmu {
	 */
	unsigned int			scope;

	struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
	struct perf_cpu_pmu_context __percpu **cpu_pmu_context;
	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
	int				task_ctx_nr;
	int				hrtimer_interval_ms;
@@ -922,7 +922,7 @@ struct perf_event_pmu_context {
	struct list_head		pinned_active;
	struct list_head		flexible_active;

	/* Used to avoid freeing per-cpu perf_event_pmu_context */
	/* Used to identify the per-cpu perf_event_pmu_context */
	unsigned int			embedded : 1;

	unsigned int			nr_events;
+47 −9
Original line number Diff line number Diff line
@@ -1219,7 +1219,7 @@ static int perf_mux_hrtimer_restart_ipi(void *arg)

static __always_inline struct perf_cpu_pmu_context *this_cpc(struct pmu *pmu)
{
	return this_cpu_ptr(pmu->cpu_pmu_context);
	return *this_cpu_ptr(pmu->cpu_pmu_context);
}

void perf_pmu_disable(struct pmu *pmu)
@@ -5007,11 +5007,14 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
		 */
		struct perf_cpu_pmu_context *cpc;

		cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
		cpc = *per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
		epc = &cpc->epc;
		raw_spin_lock_irq(&ctx->lock);
		if (!epc->ctx) {
			atomic_set(&epc->refcount, 1);
			/*
			 * One extra reference for the pmu; see perf_pmu_free().
			 */
			atomic_set(&epc->refcount, 2);
			epc->embedded = 1;
			list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
			epc->ctx = ctx;
@@ -5087,6 +5090,15 @@ static void get_pmu_ctx(struct perf_event_pmu_context *epc)
	WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount));
}

static void free_cpc_rcu(struct rcu_head *head)
{
	struct perf_cpu_pmu_context *cpc =
		container_of(head, typeof(*cpc), epc.rcu_head);

	kfree(cpc->epc.task_ctx_data);
	kfree(cpc);
}

static void free_epc_rcu(struct rcu_head *head)
{
	struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head);
@@ -5121,8 +5133,10 @@ static void put_pmu_ctx(struct perf_event_pmu_context *epc)

	raw_spin_unlock_irqrestore(&ctx->lock, flags);

	if (epc->embedded)
	if (epc->embedded) {
		call_rcu(&epc->rcu_head, free_cpc_rcu);
		return;
	}

	call_rcu(&epc->rcu_head, free_epc_rcu);
}
@@ -11752,7 +11766,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
	cpus_read_lock();
	for_each_online_cpu(cpu) {
		struct perf_cpu_pmu_context *cpc;
		cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
		cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
		cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);

		cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc);
@@ -11925,8 +11939,26 @@ static void perf_pmu_free(struct pmu *pmu)
		device_del(pmu->dev);
		put_device(pmu->dev);
	}

	if (pmu->cpu_pmu_context) {
		int cpu;

		for_each_possible_cpu(cpu) {
			struct perf_cpu_pmu_context *cpc;

			cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
			if (!cpc)
				continue;
			if (cpc->epc.embedded) {
				/* refcount managed */
				put_pmu_ctx(&cpc->epc);
				continue;
			}
			kfree(cpc);
		}
		free_percpu(pmu->cpu_pmu_context);
	}
}

DEFINE_FREE(pmu_unregister, struct pmu *, if (_T) perf_pmu_free(_T))

@@ -11964,14 +11996,20 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
			return ret;
	}

	pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
	pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context *);
	if (!pmu->cpu_pmu_context)
		return -ENOMEM;

	for_each_possible_cpu(cpu) {
		struct perf_cpu_pmu_context *cpc;
		struct perf_cpu_pmu_context *cpc =
			kmalloc_node(sizeof(struct perf_cpu_pmu_context),
				     GFP_KERNEL | __GFP_ZERO,
				     cpu_to_node(cpu));

		if (!cpc)
			return -ENOMEM;

		cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
		*per_cpu_ptr(pmu->cpu_pmu_context, cpu) = cpc;
		__perf_init_event_pmu_context(&cpc->epc, pmu);
		__perf_mux_hrtimer_init(cpc, cpu);
	}