Commit 6ea082b1 authored by Tomas Glozar's avatar Tomas Glozar Committed by Steven Rostedt (Google)
Browse files

rtla/timerlat: Add action on threshold feature

Extend the functionality provided by the -t/--trace option, which
triggers saving the contents of a tracefs buffer after tracing is
stopped, to support implementing arbitrary actions.

A new option, --on-threshold, is added, taking an argument
that further specifies the action. Actions added in this patch are:

- trace[,file=<filename>]: Saves tracefs buffer, optionally taking a
filename.
- signal,num=<sig>,pid=<pid>: Sends signal to process. "parent" might
be specified instead of number to send signal to parent process.
- shell,command=<command>: Execute shell command.

Multiple actions may be specified and will be executed in order,
including multiple actions of the same type. Trace output requested via
-t and -a now adds a trace action to the end of the list.

If an action fails, the following actions are not executed. For
example, this command:

$ rtla timerlat -T 20 --on-threshold trace \
--on-threshold shell,command="grep ipi_send timerlat_trace.txt" \
--on-threshold signal,num=2,pid=parent

will send signal 2 (SIGINT) to parent process, but only if saved trace
contains the text "ipi_send".

This way, the feature can be used for flexible reactions on latency
spikes, and allows combining rtla with other tooling like perf.

Cc: John Kacur <jkacur@redhat.com>
Cc: Luis Goncalves <lgoncalv@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Chang Yin <cyin@redhat.com>
Cc: Costa Shulyupin <costa.shul@redhat.com>
Cc: Crystal Wood <crwood@redhat.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/20250626123405.1496931-3-tglozar@redhat.com


Signed-off-by: default avatarTomas Glozar <tglozar@redhat.com>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent 8b6cbcac
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
rtla-y += trace.o
rtla-y += utils.o
rtla-y += actions.o
rtla-y += osnoise.o
rtla-y += osnoise_top.o
rtla-y += osnoise_hist.o
+235 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>

#include "actions.h"
#include "trace.h"
#include "utils.h"

/*
 * actions_init - initialize struct actions
 */
void
actions_init(struct actions *self)
{
	self->size = action_default_size;
	self->list = calloc(self->size, sizeof(struct action));
	self->len = 0;

	memset(&self->present, 0, sizeof(self->present));

	/* This has to be set by the user */
	self->trace_output_inst = NULL;
}

/*
 * actions_destroy - destroy struct actions
 */
void
actions_destroy(struct actions *self)
{
	/* Free any action-specific data */
	for (struct action *action = self->list; action < self->list + self->len; action++) {
		if (action->type == ACTION_SHELL)
			free(action->command);
		if (action->type == ACTION_TRACE_OUTPUT)
			free(action->trace_output);
	}

	/* Free action list */
	free(self->list);
}

/*
 * actions_new - Get pointer to new action
 */
static struct action *
actions_new(struct actions *self)
{
	if (self->size >= self->len) {
		self->size *= 2;
		self->list = realloc(self->list, self->size * sizeof(struct action));
	}

	return &self->list[self->len++];
}

/*
 * actions_add_trace_output - add an action to output trace
 */
int
actions_add_trace_output(struct actions *self, const char *trace_output)
{
	struct action *action = actions_new(self);

	self->present[ACTION_TRACE_OUTPUT] = true;
	action->type = ACTION_TRACE_OUTPUT;
	action->trace_output = calloc(strlen(trace_output) + 1, sizeof(char));
	if (!action->trace_output)
		return -1;
	strcpy(action->trace_output, trace_output);

	return 0;
}

/*
 * actions_add_trace_output - add an action to send signal to a process
 */
int
actions_add_signal(struct actions *self, int signal, int pid)
{
	struct action *action = actions_new(self);

	self->present[ACTION_SIGNAL] = true;
	action->type = ACTION_SIGNAL;
	action->signal = signal;
	action->pid = pid;

	return 0;
}

/*
 * actions_add_shell - add an action to execute a shell command
 */
int
actions_add_shell(struct actions *self, const char *command)
{
	struct action *action = actions_new(self);

	self->present[ACTION_SHELL] = true;
	action->type = ACTION_SHELL;
	action->command = calloc(strlen(command) + 1, sizeof(char));
	if (!action->command)
		return -1;
	strcpy(action->command, command);

	return 0;
}

/*
 * actions_parse - add an action based on text specification
 */
int
actions_parse(struct actions *self, const char *trigger)
{
	enum action_type type = ACTION_NONE;
	char *token;
	char trigger_c[strlen(trigger)];

	/* For ACTION_SIGNAL */
	int signal = 0, pid = 0;

	/* For ACTION_TRACE_OUTPUT */
	char *trace_output;

	strcpy(trigger_c, trigger);
	token = strtok(trigger_c, ",");

	if (strcmp(token, "trace") == 0)
		type = ACTION_TRACE_OUTPUT;
	else if (strcmp(token, "signal") == 0)
		type = ACTION_SIGNAL;
	else if (strcmp(token, "shell") == 0)
		type = ACTION_SHELL;
	else
		/* Invalid trigger type */
		return -1;

	token = strtok(NULL, ",");

	switch (type) {
	case ACTION_TRACE_OUTPUT:
		/* Takes no argument */
		if (token == NULL)
			trace_output = "timerlat_trace.txt";
		else {
			if (strlen(token) > 5 && strncmp(token, "file=", 5) == 0) {
				trace_output = token + 5;
			} else {
				/* Invalid argument */
				return -1;
			}

			token = strtok(NULL, ",");
			if (token != NULL)
				/* Only one argument allowed */
				return -1;
		}
		return actions_add_trace_output(self, trace_output);
	case ACTION_SIGNAL:
		/* Takes two arguments, num (signal) and pid */
		while (token != NULL) {
			if (strlen(token) > 4 && strncmp(token, "num=", 4) == 0) {
				signal = atoi(token + 4);
			} else if (strlen(token) > 4 && strncmp(token, "pid=", 4) == 0) {
				if (strncmp(token + 4, "parent", 7) == 0)
					pid = -1;
				else
					pid = atoi(token + 4);
			} else {
				/* Invalid argument */
				return -1;
			}

			token = strtok(NULL, ",");
		}

		if (!signal || !pid)
			/* Missing argument */
			return -1;

		return actions_add_signal(self, signal, pid);
	case ACTION_SHELL:
		if (token == NULL)
			return -1;
		if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0)
			return actions_add_shell(self, token + 8);
		return -1;
	default:
		return -1;
	}
}

/*
 * actions_perform - perform all actions
 */
int
actions_perform(const struct actions *self)
{
	int pid, retval;
	const struct action *action;

	for (action = self->list; action < self->list + self->len; action++) {
		switch (action->type) {
		case ACTION_TRACE_OUTPUT:
			retval = save_trace_to_file(self->trace_output_inst, action->trace_output);
			if (retval) {
				err_msg("Error saving trace\n");
				return retval;
			}
			break;
		case ACTION_SIGNAL:
			if (action->pid == -1)
				pid = getppid();
			else
				pid = action->pid;
			retval = kill(pid, action->signal);
			if (retval) {
				err_msg("Error sending signal\n");
				return retval;
			}
			break;
		case ACTION_SHELL:
			retval = system(action->command);
			if (retval)
				return retval;
			break;
		default:
			break;
		}
	}

	return 0;
}
+49 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#include <tracefs.h>
#include <stdbool.h>

enum action_type {
	ACTION_NONE = 0,
	ACTION_TRACE_OUTPUT,
	ACTION_SIGNAL,
	ACTION_SHELL,
	ACTION_FIELD_N
};

struct action {
	enum action_type type;
	union {
		struct {
			/* For ACTION_TRACE_OUTPUT */
			char *trace_output;
		};
		struct {
			/* For ACTION_SIGNAL */
			int signal;
			int pid;
		};
		struct {
			/* For ACTION_SHELL */
			char *command;
		};
	};
};

static const int action_default_size = 8;

struct actions {
	struct action *list;
	int len, size;
	bool present[ACTION_FIELD_N];

	/* External dependencies */
	struct tracefs_instance *trace_output_inst;
};

void actions_init(struct actions *self);
void actions_destroy(struct actions *self);
int actions_add_trace_output(struct actions *self, const char *trace_output);
int actions_add_signal(struct actions *self, int signal, int pid);
int actions_add_shell(struct actions *self, const char *command);
int actions_parse(struct actions *self, const char *trigger);
int actions_perform(const struct actions *self);
+2 −1
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include "actions.h"
#include "osnoise.h"

/*
@@ -22,7 +23,6 @@ struct timerlat_params {
	/* Common params */
	char			*cpus;
	cpu_set_t		monitored_cpus;
	char			*trace_output;
	char			*cgroup_name;
	unsigned long long	runtime;
	long long		stop_us;
@@ -48,6 +48,7 @@ struct timerlat_params {
	struct sched_attr	sched_param;
	struct trace_events	*events;
	enum timerlat_tracing_mode mode;
	struct actions actions;
	union {
		struct {
			/* top only */
+27 −10
Original line number Diff line number Diff line
@@ -757,6 +757,7 @@ static void timerlat_hist_usage(char *usage)
		"	     --warm-up s: let the workload run for s seconds before collecting data",
		"	     --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
		"	     --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency",
		"	     --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed",
		NULL,
	};

@@ -786,11 +787,14 @@ static struct timerlat_params
	int auto_thresh;
	int retval;
	int c;
	char *trace_output = NULL;

	params = calloc(1, sizeof(*params));
	if (!params)
		exit(1);

	actions_init(&params->actions);

	/* disabled by default */
	params->dma_latency = -1;

@@ -841,6 +845,7 @@ static struct timerlat_params
			{"warm-up",		required_argument,	0, '\2'},
			{"trace-buffer-size",	required_argument,	0, '\3'},
			{"deepest-idle-state",	required_argument,	0, '\4'},
			{"on-threshold",	required_argument,	0, '\5'},
			{0, 0, 0, 0}
		};

@@ -866,7 +871,7 @@ static struct timerlat_params
			params->print_stack = auto_thresh;

			/* set trace */
			params->trace_output = "timerlat_trace.txt";
			trace_output = "timerlat_trace.txt";

			break;
		case 'c':
@@ -956,13 +961,13 @@ static struct timerlat_params
		case 't':
			if (optarg) {
				if (optarg[0] == '=')
					params->trace_output = &optarg[1];
					trace_output = &optarg[1];
				else
					params->trace_output = &optarg[0];
					trace_output = &optarg[0];
			} else if (optind < argc && argv[optind][0] != '-')
				params->trace_output = argv[optind];
				trace_output = argv[optind];
			else
				params->trace_output = "timerlat_trace.txt";
				trace_output = "timerlat_trace.txt";
			break;
		case 'u':
			params->user_workload = 1;
@@ -1032,11 +1037,21 @@ static struct timerlat_params
		case '\4':
			params->deepest_idle_state = get_llong_from_str(optarg);
			break;
		case '\5':
			retval = actions_parse(&params->actions, optarg);
			if (retval) {
				err_msg("Invalid action %s\n", optarg);
				exit(EXIT_FAILURE);
			}
			break;
		default:
			timerlat_hist_usage("Invalid option");
		}
	}

	if (trace_output)
		actions_add_trace_output(&params->actions, trace_output);

	if (geteuid()) {
		err_msg("rtla needs root permission\n");
		exit(EXIT_FAILURE);
@@ -1061,7 +1076,8 @@ static struct timerlat_params
	 * If auto-analysis or trace output is enabled, switch from BPF mode to
	 * mixed mode
	 */
	if (params->mode == TRACING_MODE_BPF && params->trace_output && !params->no_aa)
	if (params->mode == TRACING_MODE_BPF &&
	    (params->actions.present[ACTION_TRACE_OUTPUT] || !params->no_aa))
		params->mode = TRACING_MODE_MIXED;

	return params;
@@ -1254,12 +1270,13 @@ int timerlat_hist_main(int argc, char *argv[])
		}
	}

	if (params->trace_output) {
	if (params->actions.present[ACTION_TRACE_OUTPUT]) {
		record = osnoise_init_trace_tool("timerlat");
		if (!record) {
			err_msg("Failed to enable the trace instance\n");
			goto out_free;
		}
		params->actions.trace_output_inst = record->trace.inst;

		if (params->events) {
			retval = trace_events_enable(&record->trace, params->events);
@@ -1325,7 +1342,7 @@ int timerlat_hist_main(int argc, char *argv[])
	 * tracing while enabling other instances. The trace instance is the
	 * one with most valuable information.
	 */
	if (params->trace_output)
	if (params->actions.present[ACTION_TRACE_OUTPUT])
		trace_instance_start(&record->trace);
	if (!params->no_aa)
		trace_instance_start(&aa->trace);
@@ -1395,8 +1412,7 @@ int timerlat_hist_main(int argc, char *argv[])
		if (!params->no_aa)
			timerlat_auto_analysis(params->stop_us, params->stop_total_us);

		save_trace_to_file(record ? record->trace.inst : NULL,
				   params->trace_output);
		actions_perform(&params->actions);
		return_value = FAILED;
	}

@@ -1418,6 +1434,7 @@ int timerlat_hist_main(int argc, char *argv[])
	osnoise_destroy_tool(aa);
	osnoise_destroy_tool(record);
	osnoise_destroy_tool(tool);
	actions_destroy(&params->actions);
	if (params->mode != TRACING_MODE_TRACEFS)
		timerlat_bpf_destroy();
	free(params);
Loading