Commit 36e40df3 authored by Anna-Maria Behnsen's avatar Anna-Maria Behnsen Committed by Thomas Gleixner
Browse files

timer_migration: Add tracepoints



The timer pull logic needs proper debugging aids. Add tracepoints so the
hierarchical idle machinery can be diagnosed.

Signed-off-by: default avatarAnna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240222103403.31923-1-anna-maria@linutronix.de
parent 7ee98877
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -17499,6 +17499,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F:	fs/timerfd.c
F:	include/linux/time_namespace.h
F:	include/linux/timer*
F:	include/trace/events/timer*
F:	kernel/time/*timer*
F:	kernel/time/namespace.c
+298 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */

#undef TRACE_SYSTEM
#define TRACE_SYSTEM timer_migration

#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_TIMER_MIGRATION_H

#include <linux/tracepoint.h>

/* Group events */
TRACE_EVENT(tmigr_group_set,

	TP_PROTO(struct tmigr_group *group),

	TP_ARGS(group),

	TP_STRUCT__entry(
		__field( void *,	group		)
		__field( unsigned int,	lvl		)
		__field( unsigned int,	numa_node	)
	),

	TP_fast_assign(
		__entry->group		= group;
		__entry->lvl		= group->level;
		__entry->numa_node	= group->numa_node;
	),

	TP_printk("group=%p lvl=%d numa=%d",
		  __entry->group, __entry->lvl, __entry->numa_node)
);

TRACE_EVENT(tmigr_connect_child_parent,

	TP_PROTO(struct tmigr_group *child),

	TP_ARGS(child),

	TP_STRUCT__entry(
		__field( void *,	child		)
		__field( void *,	parent		)
		__field( unsigned int,	lvl		)
		__field( unsigned int,	numa_node	)
		__field( unsigned int,	num_children	)
		__field( u32,		childmask	)
	),

	TP_fast_assign(
		__entry->child		= child;
		__entry->parent		= child->parent;
		__entry->lvl		= child->parent->level;
		__entry->numa_node	= child->parent->numa_node;
		__entry->num_children	= child->parent->num_children;
		__entry->childmask	= child->childmask;
	),

	TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
		  __entry->child,  __entry->childmask, __entry->parent,
		  __entry->lvl, __entry->numa_node, __entry->num_children)
);

TRACE_EVENT(tmigr_connect_cpu_parent,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc),

	TP_STRUCT__entry(
		__field( void *,	parent		)
		__field( unsigned int,	cpu		)
		__field( unsigned int,	lvl		)
		__field( unsigned int,	numa_node	)
		__field( unsigned int,	num_children	)
		__field( u32,		childmask	)
	),

	TP_fast_assign(
		__entry->parent		= tmc->tmgroup;
		__entry->cpu		= tmc->cpuevt.cpu;
		__entry->lvl		= tmc->tmgroup->level;
		__entry->numa_node	= tmc->tmgroup->numa_node;
		__entry->num_children	= tmc->tmgroup->num_children;
		__entry->childmask	= tmc->childmask;
	),

	TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
		  __entry->cpu,	 __entry->childmask, __entry->parent,
		  __entry->lvl, __entry->numa_node, __entry->num_children)
);

DECLARE_EVENT_CLASS(tmigr_group_and_cpu,

	TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),

	TP_ARGS(group, state, childmask),

	TP_STRUCT__entry(
		__field( void *,	group		)
		__field( void *,	parent		)
		__field( unsigned int,	lvl		)
		__field( unsigned int,	numa_node	)
		__field( u32,		childmask	)
		__field( u8,		active		)
		__field( u8,		migrator	)
	),

	TP_fast_assign(
		__entry->group		= group;
		__entry->parent		= group->parent;
		__entry->lvl		= group->level;
		__entry->numa_node	= group->numa_node;
		__entry->childmask	= childmask;
		__entry->active		= state.active;
		__entry->migrator	= state.migrator;
	),

	TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x "
		  "parent=%p childmask=%0x",
		  __entry->group, __entry->lvl, __entry->numa_node,
		  __entry->active, __entry->migrator,
		  __entry->parent, __entry->childmask)
);

DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive,

	TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),

	TP_ARGS(group, state, childmask)
);

DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active,

	TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),

	TP_ARGS(group, state, childmask)
);

/* CPU events*/
DECLARE_EVENT_CLASS(tmigr_cpugroup,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc),

	TP_STRUCT__entry(
		__field( u64,		wakeup	)
		__field( void *,	parent	)
		__field( unsigned int,	cpu	)

	),

	TP_fast_assign(
		__entry->wakeup		= tmc->wakeup;
		__entry->parent		= tmc->tmgroup;
		__entry->cpu		= tmc->cpuevt.cpu;
	),

	TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup)
);

DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc)
);

DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc)
);

DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc)
);

DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc)
);

DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu,

	TP_PROTO(struct tmigr_cpu *tmc),

	TP_ARGS(tmc)
);

DECLARE_EVENT_CLASS(tmigr_idle,

	TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),

	TP_ARGS(tmc, nextevt),

	TP_STRUCT__entry(
		__field( u64,		nextevt)
		__field( u64,		wakeup)
		__field( void *,	parent)
		__field( unsigned int,	cpu)
	),

	TP_fast_assign(
		__entry->nextevt	= nextevt;
		__entry->wakeup		= tmc->wakeup;
		__entry->parent		= tmc->tmgroup;
		__entry->cpu		= tmc->cpuevt.cpu;
	),

	TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu",
		  __entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup)
);

DEFINE_EVENT(tmigr_idle, tmigr_cpu_idle,

	TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),

	TP_ARGS(tmc, nextevt)
);

DEFINE_EVENT(tmigr_idle, tmigr_cpu_new_timer_idle,

	TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),

	TP_ARGS(tmc, nextevt)
);

TRACE_EVENT(tmigr_update_events,

	TP_PROTO(struct tmigr_group *child, struct tmigr_group *group,
		 union tmigr_state childstate,	union tmigr_state groupstate,
		 u64 nextevt),

	TP_ARGS(child, group, childstate, groupstate, nextevt),

	TP_STRUCT__entry(
		__field( void *,	child			)
		__field( void *,	group			)
		__field( u64,		nextevt			)
		__field( u64,		group_next_expiry	)
		__field( u64,		child_evt_expiry	)
		__field( unsigned int,	group_lvl		)
		__field( unsigned int,	child_evtcpu		)
		__field( u8,		child_active		)
		__field( u8,		group_active		)
	),

	TP_fast_assign(
		__entry->child			= child;
		__entry->group			= group;
		__entry->nextevt		= nextevt;
		__entry->group_next_expiry	= group->next_expiry;
		__entry->child_evt_expiry	= child ? child->groupevt.nextevt.expires : 0;
		__entry->group_lvl		= group->level;
		__entry->child_evtcpu		= child ? child->groupevt.cpu : 0;
		__entry->child_active		= childstate.active;
		__entry->group_active		= groupstate.active;
	),

	TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x "
		  "nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d",
		  __entry->child, __entry->group, __entry->group_lvl, __entry->child_active,
		  __entry->group_active,
		  __entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry,
		  __entry->child_evtcpu)
);

TRACE_EVENT(tmigr_handle_remote,

	TP_PROTO(struct tmigr_group *group),

	TP_ARGS(group),

	TP_STRUCT__entry(
		__field( void * ,	group	)
		__field( unsigned int ,	lvl	)
	),

	TP_fast_assign(
		__entry->group		= group;
		__entry->lvl		= group->level;
	),

	TP_printk("group=%p lvl=%d",
		   __entry->group, __entry->lvl)
);

#endif /*  _TRACE_TIMER_MIGRATION_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
+26 −0
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@
#include "timer_migration.h"
#include "tick-internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/timer_migration.h>

/*
 * The timer migration mechanism is built on a hierarchy of groups. The
 * lowest level group contains CPUs, the next level groups of CPU groups
@@ -663,6 +666,8 @@ static bool tmigr_active_up(struct tmigr_group *group,
	 */
	group->groupevt.ignore = true;

	trace_tmigr_group_set_cpu_active(group, newstate, childmask);

	return walk_done;
}

@@ -672,6 +677,8 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc)

	data.childmask = tmc->childmask;

	trace_tmigr_cpu_active(tmc);

	tmc->cpuevt.ignore = true;
	WRITE_ONCE(tmc->wakeup, KTIME_MAX);

@@ -829,6 +836,9 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
		data->firstexp = tmigr_next_groupevt_expires(group);
	}

	trace_tmigr_update_events(child, group, childstate, groupstate,
				  nextexp);

unlock:
	raw_spin_unlock(&group->lock);

@@ -863,6 +873,8 @@ static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp)
	if (tmc->remote)
		return KTIME_MAX;

	trace_tmigr_cpu_new_timer(tmc);

	tmc->cpuevt.ignore = false;
	data.remote = false;

@@ -904,6 +916,8 @@ static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now,
		return;
	}

	trace_tmigr_handle_remote_cpu(tmc);

	tmc->remote = true;
	WRITE_ONCE(tmc->wakeup, KTIME_MAX);

@@ -984,6 +998,7 @@ static bool tmigr_handle_remote_up(struct tmigr_group *group,

	childmask = data->childmask;

	trace_tmigr_handle_remote(group);
again:
	/*
	 * Handle the group only if @childmask is the migrator or if the
@@ -1206,6 +1221,7 @@ u64 tmigr_cpu_new_timer(u64 nextexp)
	 */
	WRITE_ONCE(tmc->wakeup, ret);

	trace_tmigr_cpu_new_timer_idle(tmc, nextexp);
	raw_spin_unlock(&tmc->lock);
	return ret;
}
@@ -1298,6 +1314,8 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
	 */
	WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);

	trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);

	return walk_done;
}

@@ -1350,6 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp)
	 */
	WRITE_ONCE(tmc->wakeup, ret);

	trace_tmigr_cpu_idle(tmc, nextexp);
	raw_spin_unlock(&tmc->lock);
	return ret;
}
@@ -1467,6 +1486,7 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,

	/* Setup successful. Add it to the hierarchy */
	list_add(&group->list, &tmigr_level_list[lvl]);
	trace_tmigr_group_set(group);
	return group;
}

@@ -1484,6 +1504,8 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
	raw_spin_unlock(&parent->lock);
	raw_spin_unlock_irq(&child->lock);

	trace_tmigr_connect_child_parent(child);

	/*
	 * To prevent inconsistent states, active children need to be active in
	 * the new parent as well. Inactive children are already marked inactive
@@ -1575,6 +1597,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)

			raw_spin_unlock_irq(&group->lock);

			trace_tmigr_connect_cpu_parent(tmc);

			/* There are no children that need to be connected */
			continue;
		} else {
@@ -1642,6 +1666,7 @@ static int tmigr_cpu_online(unsigned int cpu)
		WRITE_ONCE(tmc->wakeup, KTIME_MAX);
	}
	raw_spin_lock_irq(&tmc->lock);
	trace_tmigr_cpu_online(tmc);
	tmc->idle = timer_base_is_idle();
	if (!tmc->idle)
		__tmigr_cpu_activate(tmc);
@@ -1681,6 +1706,7 @@ static int tmigr_cpu_offline(unsigned int cpu)
	 * offline; Therefore nextevt value is set to KTIME_MAX
	 */
	firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
	trace_tmigr_cpu_offline(tmc);
	raw_spin_unlock_irq(&tmc->lock);

	if (firstexp != KTIME_MAX) {