Commit e34293dd authored by Tomas Glozar's avatar Tomas Glozar Committed by Steven Rostedt (Google)
Browse files

rtla/timerlat: Add BPF skeleton to collect samples

Add BPF program that attaches to the osnoise:timerlat_sample tracepoint
and collects both the summary and the histogram (if requested) into BPF
maps (one map of each kind per context).

The program is designed to be used for both timerlat-top and
timerlat-hist. If using with timerlat-top, the "entries" parameter is
set to zero, which prevents the BPF program from recording histogram
entries. In that case, the maps for histograms do not have to be
created, as the BPF verifier will identify the code using them as
unreachable.

An IRQ or thread latency threshold might be supplied to stop recording
if hit, similar to the timerlat tracer threshold, which stops ftrace
tracing if hit. A BPF ringbuffer is used to signal threshold overflow to
userspace. In aa-only mode, this is the only function of the BPF
program.

Cc: John Kacur <jkacur@redhat.com>
Cc: Luis Goncalves <lgoncalv@redhat.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Link: https://lore.kernel.org/20250218145859.27762-5-tglozar@redhat.com


Signed-off-by: default avatarTomas Glozar <tglozar@redhat.com>
Signed-off-by: default avatarSteven Rostedt (Google) <rostedt@goodmis.org>
parent 9dc3766e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -4,3 +4,4 @@ rtla-static
fixdep
feature
FEATURE-DUMP
*.skel.h
+13 −1
Original line number Diff line number Diff line
@@ -73,6 +73,17 @@ CFLAGS += $(INCLUDES) $(LIB_INCLUDES)

export CFLAGS OUTPUT srctree

ifeq ($(BUILD_BPF_SKEL),1)
src/timerlat.bpf.o: src/timerlat.bpf.c
	$(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -c $(filter %.c,$^) -o $@

src/timerlat.skel.h: src/timerlat.bpf.o
	$(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@
else
src/timerlat.skel.h:
	$(Q)echo '/* BPF skeleton is disabled */' > src/timerlat.skel.h
endif

$(RTLA): $(RTLA_IN)
	$(QUIET_LINK)$(CC) $(LDFLAGS) -o $(RTLA) $(RTLA_IN) $(EXTLIBS)

@@ -83,7 +94,7 @@ static: $(RTLA_IN)
rtla.%: fixdep FORCE
	make -f $(srctree)/tools/build/Makefile.build dir=. $@

$(RTLA_IN): fixdep FORCE
$(RTLA_IN): fixdep FORCE src/timerlat.skel.h
	make $(build)=rtla

clean: doc_clean fixdep-clean
@@ -91,6 +102,7 @@ clean: doc_clean fixdep-clean
	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
	$(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-*
	$(Q)rm -rf feature
	$(Q)rm -f src/timerlat.bpf.o src/timerlat.skel.h
check: $(RTLA)
	RTLA=$(RTLA) prove -o -f tests/
.PHONY: FORCE clean check
+1 −0
Original line number Diff line number Diff line
@@ -8,4 +8,5 @@ rtla-y += timerlat_top.o
rtla-y += timerlat_hist.o
rtla-y += timerlat_u.o
rtla-y += timerlat_aa.o
rtla-y += timerlat_bpf.o
rtla-y += rtla.o
+149 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
#include "timerlat_bpf.h"

#define nosubprog __always_inline
#define MAX_ENTRIES_DEFAULT 4096

char LICENSE[] SEC("license") = "GPL";

struct trace_event_raw_timerlat_sample {
	unsigned long long timer_latency;
	int context;
} __attribute__((preserve_access_index));

struct {
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
	__uint(max_entries, MAX_ENTRIES_DEFAULT);
	__type(key, unsigned int);
	__type(value, unsigned long long);
} hist_irq SEC(".maps"), hist_thread SEC(".maps"), hist_user SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
	__uint(max_entries, SUMMARY_FIELD_N);
	__type(key, unsigned int);
	__type(value, unsigned long long);
} summary_irq SEC(".maps"), summary_thread SEC(".maps"), summary_user SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_RINGBUF);
	__uint(max_entries, 1);
} signal_stop_tracing SEC(".maps");

/* Params to be set by rtla */
const volatile int bucket_size = 1;
const volatile int output_divisor = 1000;
const volatile int entries = 256;
const volatile int irq_threshold;
const volatile int thread_threshold;
const volatile bool aa_only;

int stop_tracing;

nosubprog unsigned long long map_get(void *map,
				     unsigned int key)
{
	unsigned long long *value_ptr;

	value_ptr = bpf_map_lookup_elem(map, &key);

	return !value_ptr ? 0 : *value_ptr;
}

nosubprog void map_set(void *map,
		       unsigned int key,
		       unsigned long long value)
{
	bpf_map_update_elem(map, &key, &value, BPF_ANY);
}

nosubprog void map_increment(void *map,
			     unsigned int key)
{
	map_set(map, key, map_get(map, key) + 1);
}

nosubprog void update_main_hist(void *map,
				int bucket)
{
	if (entries == 0)
		/* No histogram */
		return;

	if (bucket >= entries)
		/* Overflow */
		return;

	map_increment(map, bucket);
}

nosubprog void update_summary(void *map,
			      unsigned long long latency,
			      int bucket)
{
	if (aa_only)
		/* Auto-analysis only, nothing to be done here */
		return;

	map_set(map, SUMMARY_CURRENT, latency);

	if (bucket >= entries)
		/* Overflow */
		map_increment(map, SUMMARY_OVERFLOW);

	if (latency > map_get(map, SUMMARY_MAX))
		map_set(map, SUMMARY_MAX, latency);

	if (latency < map_get(map, SUMMARY_MIN) || map_get(map, SUMMARY_COUNT) == 0)
		map_set(map, SUMMARY_MIN, latency);

	map_increment(map, SUMMARY_COUNT);
	map_set(map, SUMMARY_SUM, map_get(map, SUMMARY_SUM) + latency);
}

nosubprog void set_stop_tracing(void)
{
	int value = 0;

	/* Suppress further sample processing */
	stop_tracing = 1;

	/* Signal to userspace */
	bpf_ringbuf_output(&signal_stop_tracing, &value, sizeof(value), 0);
}

SEC("tp/osnoise/timerlat_sample")
int handle_timerlat_sample(struct trace_event_raw_timerlat_sample *tp_args)
{
	unsigned long long latency, latency_us;
	int bucket;

	if (stop_tracing)
		return 0;

	latency = tp_args->timer_latency / output_divisor;
	latency_us = tp_args->timer_latency / 1000;
	bucket = latency / bucket_size;

	if (tp_args->context == 0) {
		update_main_hist(&hist_irq, bucket);
		update_summary(&summary_irq, latency, bucket);

		if (irq_threshold != 0 && latency_us >= irq_threshold)
			set_stop_tracing();
	} else if (tp_args->context == 1) {
		update_main_hist(&hist_thread, bucket);
		update_summary(&summary_thread, latency, bucket);

		if (thread_threshold != 0 && latency_us >= thread_threshold)
			set_stop_tracing();
	} else {
		update_main_hist(&hist_user, bucket);
		update_summary(&summary_user, latency, bucket);
	}

	return 0;
}
+166 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#ifdef HAVE_BPF_SKEL
#include "timerlat.h"
#include "timerlat_bpf.h"
#include "timerlat.skel.h"

static struct timerlat_bpf *bpf;

/*
 * timerlat_bpf_init - load and initialize BPF program to collect timerlat data
 */
int timerlat_bpf_init(struct timerlat_params *params)
{
	int err;

	debug_msg("Loading BPF program\n");

	bpf = timerlat_bpf__open();
	if (!bpf)
		return 1;

	/* Pass common options */
	bpf->rodata->output_divisor = params->output_divisor;
	bpf->rodata->entries = params->entries;
	bpf->rodata->irq_threshold = params->stop_us;
	bpf->rodata->thread_threshold = params->stop_total_us;
	bpf->rodata->aa_only = params->aa_only;

	if (params->entries != 0) {
		/* Pass histogram options */
		bpf->rodata->bucket_size = params->bucket_size;

		/* Set histogram array sizes */
		bpf_map__set_max_entries(bpf->maps.hist_irq, params->entries);
		bpf_map__set_max_entries(bpf->maps.hist_thread, params->entries);
		bpf_map__set_max_entries(bpf->maps.hist_user, params->entries);
	} else {
		/* No entries, disable histogram */
		bpf_map__set_autocreate(bpf->maps.hist_irq, false);
		bpf_map__set_autocreate(bpf->maps.hist_thread, false);
		bpf_map__set_autocreate(bpf->maps.hist_user, false);
	}

	if (params->aa_only) {
		/* Auto-analysis only, disable summary */
		bpf_map__set_autocreate(bpf->maps.summary_irq, false);
		bpf_map__set_autocreate(bpf->maps.summary_thread, false);
		bpf_map__set_autocreate(bpf->maps.summary_user, false);
	}

	/* Load and verify BPF program */
	err = timerlat_bpf__load(bpf);
	if (err) {
		timerlat_bpf__destroy(bpf);
		return err;
	}

	return 0;
}

/*
 * timerlat_bpf_attach - attach BPF program to collect timerlat data
 */
int timerlat_bpf_attach(void)
{
	debug_msg("Attaching BPF program\n");

	return timerlat_bpf__attach(bpf);
}

/*
 * timerlat_bpf_detach - detach BPF program to collect timerlat data
 */
void timerlat_bpf_detach(void)
{
	timerlat_bpf__detach(bpf);
}

/*
 * timerlat_bpf_detach - destroy BPF program to collect timerlat data
 */
void timerlat_bpf_destroy(void)
{
	timerlat_bpf__destroy(bpf);
}

static int handle_rb_event(void *ctx, void *data, size_t data_sz)
{
	return 0;
}

/*
 * timerlat_bpf_wait - wait until tracing is stopped or signal
 */
int timerlat_bpf_wait(int timeout)
{
	struct ring_buffer *rb;
	int retval;

	rb = ring_buffer__new(bpf_map__fd(bpf->maps.signal_stop_tracing),
			      handle_rb_event, NULL, NULL);
	retval = ring_buffer__poll(rb, timeout * 1000);
	ring_buffer__free(rb);

	return retval;
}

static int get_value(struct bpf_map *map_irq,
		     struct bpf_map *map_thread,
		     struct bpf_map *map_user,
		     int key,
		     long long *value_irq,
		     long long *value_thread,
		     long long *value_user,
		     int cpus)
{
	int err;

	err = bpf_map__lookup_elem(map_irq, &key,
				   sizeof(unsigned int), value_irq,
				   sizeof(long long) * cpus, 0);
	if (err)
		return err;
	err = bpf_map__lookup_elem(map_thread, &key,
				   sizeof(unsigned int), value_thread,
				   sizeof(long long) * cpus, 0);
	if (err)
		return err;
	err = bpf_map__lookup_elem(map_user, &key,
				   sizeof(unsigned int), value_user,
				   sizeof(long long) * cpus, 0);
	if (err)
		return err;
	return 0;
}

/*
 * timerlat_bpf_get_hist_value - get value from BPF hist map
 */
int timerlat_bpf_get_hist_value(int key,
				long long *value_irq,
				long long *value_thread,
				long long *value_user,
				int cpus)
{
	return get_value(bpf->maps.hist_irq,
			 bpf->maps.hist_thread,
			 bpf->maps.hist_user,
			 key, value_irq, value_thread, value_user, cpus);
}

/*
 * timerlat_bpf_get_summary_value - get value from BPF summary map
 */
int timerlat_bpf_get_summary_value(enum summary_field key,
				   long long *value_irq,
				   long long *value_thread,
				   long long *value_user,
				   int cpus)
{
	return get_value(bpf->maps.summary_irq,
			 bpf->maps.summary_thread,
			 bpf->maps.summary_user,
			 key, value_irq, value_thread, value_user, cpus);
}
#endif /* HAVE_BPF_SKEL */
Loading