Commit 26f80681 authored by Gabriele Monaco's avatar Gabriele Monaco Committed by Steven Rostedt (Google)
Browse files

sched: Add sched tracepoints for RV task model

Add the following tracepoints:
* sched_entry(bool preempt, ip)
    Called while entering __schedule
* sched_exit(bool is_switch, ip)
    Called while exiting __schedule
* sched_set_state(task, curr_state, state)
    Called when a task changes its state (to and from running)

These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Link: https://lore.kernel.org/20250305140406.350227-2-gmonaco@redhat.com


Signed-off-by: default avatarGabriele Monaco <gmonaco@redhat.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent 41a4d2d3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
#ifndef _LINUX_RV_H
#define _LINUX_RV_H

#define MAX_DA_NAME_LEN	24
#define MAX_DA_NAME_LEN	32

#ifdef CONFIG_RV
/*
+16 −0
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@
#include <linux/rv.h>
#include <linux/livepatch_sched.h>
#include <linux/uidgid_types.h>
#include <linux/tracepoint-defs.h>
#include <asm/kmap_size.h>

/* task_struct member predeclarations (sorted alphabetically): */
@@ -186,6 +187,12 @@ struct user_event_mm;
# define debug_rtlock_wait_restore_state()	do { } while (0)
#endif

#define trace_set_current_state(state_value)                     \
	do {                                                     \
		if (tracepoint_enabled(sched_set_state_tp))      \
			__trace_set_current_state(state_value); \
	} while (0)

/*
 * set_current_state() includes a barrier so that the write of current->__state
 * is correctly serialised wrt the caller's subsequent test of whether to
@@ -226,12 +233,14 @@ struct user_event_mm;
#define __set_current_state(state_value)				\
	do {								\
		debug_normal_state_change((state_value));		\
		trace_set_current_state(state_value);			\
		WRITE_ONCE(current->__state, (state_value));		\
	} while (0)

#define set_current_state(state_value)					\
	do {								\
		debug_normal_state_change((state_value));		\
		trace_set_current_state(state_value);			\
		smp_store_mb(current->__state, (state_value));		\
	} while (0)

@@ -247,6 +256,7 @@ struct user_event_mm;
									\
		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
		debug_special_state_change((state_value));		\
		trace_set_current_state(state_value);			\
		WRITE_ONCE(current->__state, (state_value));		\
		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
	} while (0)
@@ -282,6 +292,7 @@ struct user_event_mm;
		raw_spin_lock(&current->pi_lock);			\
		current->saved_state = current->__state;		\
		debug_rtlock_wait_set_state();				\
		trace_set_current_state(TASK_RTLOCK_WAIT);		\
		WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT);		\
		raw_spin_unlock(&current->pi_lock);			\
	} while (0);
@@ -291,6 +302,7 @@ struct user_event_mm;
		lockdep_assert_irqs_disabled();				\
		raw_spin_lock(&current->pi_lock);			\
		debug_rtlock_wait_restore_state();			\
		trace_set_current_state(current->saved_state);		\
		WRITE_ONCE(current->__state, current->saved_state);	\
		current->saved_state = TASK_RUNNING;			\
		raw_spin_unlock(&current->pi_lock);			\
@@ -327,6 +339,10 @@ extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
extern void io_schedule(void);

/* wrapper function to trace from this header file */
DECLARE_TRACEPOINT(sched_set_state_tp);
extern void __trace_set_current_state(int state_value);

/**
 * struct prev_cputime - snapshot of system and user cputime
 * @utime: time spent in user mode
+13 −0
Original line number Diff line number Diff line
@@ -824,6 +824,19 @@ DECLARE_TRACE(sched_compute_energy_tp,
		 unsigned long max_util, unsigned long busy_time),
	TP_ARGS(p, dst_cpu, energy, max_util, busy_time));

DECLARE_TRACE(sched_entry_tp,
	TP_PROTO(bool preempt, unsigned long ip),
	TP_ARGS(preempt, ip));

DECLARE_TRACE(sched_exit_tp,
	TP_PROTO(bool is_switch, unsigned long ip),
	TP_ARGS(is_switch, ip));

DECLARE_TRACE_CONDITION(sched_set_state_tp,
	TP_PROTO(struct task_struct *tsk, int state),
	TP_ARGS(tsk, state),
	TP_CONDITION(!!(tsk->__state) != !!state));

#endif /* _TRACE_SCHED_H */

/* This part must be outside protection */
+22 −1
Original line number Diff line number Diff line
@@ -491,6 +491,16 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }

#endif /* CONFIG_SCHED_CORE */

/* need a wrapper since we may need to trace from modules */
EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);

/* Call via the helper macro trace_set_current_state. */
void __trace_set_current_state(int state_value)
{
	trace_sched_set_state_tp(current, state_value);
}
EXPORT_SYMBOL(__trace_set_current_state);

/*
 * Serialization rules:
 *
@@ -5307,6 +5317,12 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
	 */

	finish_task_switch(prev);
	/*
	 * This is a special case: the newly created task has just
	 * switched the context for the first time. It is returning from
	 * schedule for the first time in this path.
	 */
	trace_sched_exit_tp(true, CALLER_ADDR0);
	preempt_enable();

	if (current->set_child_tid)
@@ -6650,12 +6666,15 @@ static void __sched notrace __schedule(int sched_mode)
	 * as a preemption by schedule_debug() and RCU.
	 */
	bool preempt = sched_mode > SM_NONE;
	bool is_switch = false;
	unsigned long *switch_count;
	unsigned long prev_state;
	struct rq_flags rf;
	struct rq *rq;
	int cpu;

	trace_sched_entry_tp(preempt, CALLER_ADDR0);

	cpu = smp_processor_id();
	rq = cpu_rq(cpu);
	prev = rq->curr;
@@ -6723,7 +6742,8 @@ static void __sched notrace __schedule(int sched_mode)
	rq->last_seen_need_resched_ns = 0;
#endif

	if (likely(prev != next)) {
	is_switch = prev != next;
	if (likely(is_switch)) {
		rq->nr_switches++;
		/*
		 * RCU users of rcu_dereference(rq->curr) may not see
@@ -6768,6 +6788,7 @@ static void __sched notrace __schedule(int sched_mode)
		__balance_callbacks(rq);
		raw_spin_rq_unlock_irq(rq);
	}
	trace_sched_exit_tp(is_switch, CALLER_ADDR0);
}

void __noreturn do_task_dead(void)
+1 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#define MAX_DESCRIPTION 1024
#define MAX_DA_NAME_LEN	24
#define MAX_DA_NAME_LEN	32

struct monitor {
	char name[MAX_DA_NAME_LEN];