perf: Optimize perf_cgroup_switch() (f06cc667) · Commits · git / linux-nf

include/linux/perf_event.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -878,6 +878,7 @@ struct perf_event_pmu_context {
		unsigned int embedded : 1;

		unsigned int nr_events;
		unsigned int nr_cgroups;

		atomic_t refcount; /* event <-> epc */
		struct rcu_head rcu_head;

kernel/events/core.c

+60 −55

Original line number	Diff line number	Diff line
		@@ -375,6 +375,7 @@ enum event_type_t {
		EVENT_TIME = 0x4,
		/* see ctx_resched() for details */
		EVENT_CPU = 0x8,
		EVENT_CGROUP = 0x10,
		EVENT_ALL = EVENT_FLEXIBLE \| EVENT_PINNED,
		};

		@@ -684,21 +685,27 @@ do { \
		___p; \
		})

		static void perf_ctx_disable(struct perf_event_context *ctx)
		static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
		{
		struct perf_event_pmu_context *pmu_ctx;

		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
		if (cgroup && !pmu_ctx->nr_cgroups)
		continue;
		perf_pmu_disable(pmu_ctx->pmu);
		}
		}

		static void perf_ctx_enable(struct perf_event_context *ctx)
		static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
		{
		struct perf_event_pmu_context *pmu_ctx;

		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
		if (cgroup && !pmu_ctx->nr_cgroups)
		continue;
		perf_pmu_enable(pmu_ctx->pmu);
		}
		}

		static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
		static void ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type);
		@@ -856,9 +863,9 @@ static void perf_cgroup_switch(struct task_struct *task)
		return;

		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
		perf_ctx_disable(&cpuctx->ctx);
		perf_ctx_disable(&cpuctx->ctx, true);

		ctx_sched_out(&cpuctx->ctx, EVENT_ALL);
		ctx_sched_out(&cpuctx->ctx, EVENT_ALL\|EVENT_CGROUP);
		/*
		* must not be done before ctxswout due
		* to update_cgrp_time_from_cpuctx() in
		@@ -870,9 +877,9 @@ static void perf_cgroup_switch(struct task_struct *task)
		* perf_cgroup_set_timestamp() in ctx_sched_in()
		* to not have to pass task around
		*/
		ctx_sched_in(&cpuctx->ctx, EVENT_ALL);
		ctx_sched_in(&cpuctx->ctx, EVENT_ALL\|EVENT_CGROUP);

		perf_ctx_enable(&cpuctx->ctx);
		perf_ctx_enable(&cpuctx->ctx, true);
		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
		}

		@@ -965,6 +972,8 @@ perf_cgroup_event_enable(struct perf_event event, struct perf_event_context ct
		if (!is_cgroup_event(event))
		return;

		event->pmu_ctx->nr_cgroups++;

		/*
		* Because cgroup events are always per-cpu events,
		* @ctx == &cpuctx->ctx.
		@@ -985,6 +994,8 @@ perf_cgroup_event_disable(struct perf_event event, struct perf_event_context c
		if (!is_cgroup_event(event))
		return;

		event->pmu_ctx->nr_cgroups--;

		/*
		* Because cgroup events are always per-cpu events,
		* @ctx == &cpuctx->ctx.
		@@ -2677,9 +2688,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,

		event_type &= EVENT_ALL;

		perf_ctx_disable(&cpuctx->ctx);
		perf_ctx_disable(&cpuctx->ctx, false);
		if (task_ctx) {
		perf_ctx_disable(task_ctx);
		perf_ctx_disable(task_ctx, false);
		task_ctx_sched_out(task_ctx, event_type);
		}

		@@ -2697,9 +2708,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,

		perf_event_sched_in(cpuctx, task_ctx);

		perf_ctx_enable(&cpuctx->ctx);
		perf_ctx_enable(&cpuctx->ctx, false);
		if (task_ctx)
		perf_ctx_enable(task_ctx);
		perf_ctx_enable(task_ctx, false);
		}

		void perf_pmu_resched(struct pmu *pmu)
		@@ -3244,6 +3255,9 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
		struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
		struct perf_event_pmu_context *pmu_ctx;
		int is_active = ctx->is_active;
		bool cgroup = event_type & EVENT_CGROUP;

		event_type &= ~EVENT_CGROUP;

		lockdep_assert_held(&ctx->lock);

		@@ -3290,9 +3304,12 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)

		is_active ^= ctx->is_active; /* changed bits */

		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
		if (cgroup && !pmu_ctx->nr_cgroups)
		continue;
		__pmu_ctx_sched_out(pmu_ctx, is_active);
		}
		}

		/*
		* Test whether two contexts are equivalent, i.e. whether they have both been
		@@ -3482,7 +3499,7 @@ perf_event_context_sched_out(struct task_struct task, struct task_struct next)
		raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
		if (context_equiv(ctx, next_ctx)) {

		perf_ctx_disable(ctx);
		perf_ctx_disable(ctx, false);

		/* PMIs are disabled; ctx->nr_pending is stable. */
		if (local_read(&ctx->nr_pending) \|\|
		@@ -3502,7 +3519,7 @@ perf_event_context_sched_out(struct task_struct task, struct task_struct next)
		perf_ctx_sched_task_cb(ctx, false);
		perf_event_swap_task_ctx_data(ctx, next_ctx);

		perf_ctx_enable(ctx);
		perf_ctx_enable(ctx, false);

		/*
		* RCU_INIT_POINTER here is safe because we've not
		@@ -3526,13 +3543,13 @@ perf_event_context_sched_out(struct task_struct task, struct task_struct next)

		if (do_switch) {
		raw_spin_lock(&ctx->lock);
		perf_ctx_disable(ctx);
		perf_ctx_disable(ctx, false);

		inside_switch:
		perf_ctx_sched_task_cb(ctx, false);
		task_ctx_sched_out(ctx, EVENT_ALL);

		perf_ctx_enable(ctx);
		perf_ctx_enable(ctx, false);
		raw_spin_unlock(&ctx->lock);
		}
		}
		@@ -3818,47 +3835,32 @@ static int merge_sched_in(struct perf_event event, void data)
		return 0;
		}

		static void ctx_pinned_sched_in(struct perf_event_context ctx, struct pmu pmu)
		static void pmu_groups_sched_in(struct perf_event_context *ctx,
		struct perf_event_groups *groups,
		struct pmu *pmu)
		{
		struct perf_event_pmu_context *pmu_ctx;
		int can_add_hw = 1;

		if (pmu) {
		visit_groups_merge(ctx, &ctx->pinned_groups,
		smp_processor_id(), pmu,
		merge_sched_in, &can_add_hw);
		} else {
		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
		can_add_hw = 1;
		visit_groups_merge(ctx, &ctx->pinned_groups,
		smp_processor_id(), pmu_ctx->pmu,
		visit_groups_merge(ctx, groups, smp_processor_id(), pmu,
		merge_sched_in, &can_add_hw);
		}
		}
		}

		static void ctx_flexible_sched_in(struct perf_event_context ctx, struct pmu pmu)
		static void ctx_groups_sched_in(struct perf_event_context *ctx,
		struct perf_event_groups *groups,
		bool cgroup)
		{
		struct perf_event_pmu_context *pmu_ctx;
		int can_add_hw = 1;

		if (pmu) {
		visit_groups_merge(ctx, &ctx->flexible_groups,
		smp_processor_id(), pmu,
		merge_sched_in, &can_add_hw);
		} else {
		list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
		can_add_hw = 1;
		visit_groups_merge(ctx, &ctx->flexible_groups,
		smp_processor_id(), pmu_ctx->pmu,
		merge_sched_in, &can_add_hw);
		}
		if (cgroup && !pmu_ctx->nr_cgroups)
		continue;
		pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
		}
		}

		static void __pmu_ctx_sched_in(struct perf_event_context ctx, struct pmu pmu)
		static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
		struct pmu *pmu)
		{
		ctx_flexible_sched_in(ctx, pmu);
		pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
		}

		static void
		@@ -3866,6 +3868,9 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
		{
		struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
		int is_active = ctx->is_active;
		bool cgroup = event_type & EVENT_CGROUP;

		event_type &= ~EVENT_CGROUP;

		lockdep_assert_held(&ctx->lock);

		@@ -3898,11 +3903,11 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
		* in order to give them the best chance of going on.
		*/
		if (is_active & EVENT_PINNED)
		ctx_pinned_sched_in(ctx, NULL);
		ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);

		/* Then walk through the lower prio flexible groups */
		if (is_active & EVENT_FLEXIBLE)
		ctx_flexible_sched_in(ctx, NULL);
		ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
		}

		static void perf_event_context_sched_in(struct task_struct *task)
		@@ -3917,11 +3922,11 @@ static void perf_event_context_sched_in(struct task_struct *task)

		if (cpuctx->task_ctx == ctx) {
		perf_ctx_lock(cpuctx, ctx);
		perf_ctx_disable(ctx);
		perf_ctx_disable(ctx, false);

		perf_ctx_sched_task_cb(ctx, true);

		perf_ctx_enable(ctx);
		perf_ctx_enable(ctx, false);
		perf_ctx_unlock(cpuctx, ctx);
		goto rcu_unlock;
		}
		@@ -3934,7 +3939,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
		if (!ctx->nr_events)
		goto unlock;

		perf_ctx_disable(ctx);
		perf_ctx_disable(ctx, false);
		/*
		* We want to keep the following priority order:
		* cpu pinned (that don't need to move), task pinned,
		@@ -3944,7 +3949,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
		* events, no need to flip the cpuctx's events around.
		*/
		if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
		perf_ctx_disable(&cpuctx->ctx);
		perf_ctx_disable(&cpuctx->ctx, false);
		ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
		}

		@@ -3953,9 +3958,9 @@ static void perf_event_context_sched_in(struct task_struct *task)
		perf_ctx_sched_task_cb(cpuctx->task_ctx, true);

		if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
		perf_ctx_enable(&cpuctx->ctx);
		perf_ctx_enable(&cpuctx->ctx, false);

		perf_ctx_enable(ctx);
		perf_ctx_enable(ctx, false);

		unlock:
		perf_ctx_unlock(cpuctx, ctx);