Commit dc6d2bc2 authored by Ian Rogers's avatar Ian Rogers Committed by Namhyung Kim
Browse files

perf sample: Make user_regs and intr_regs optional

The struct dump_regs contains 512 bytes of cache_regs, meaning the two
values in perf_sample contribute 1088 bytes of its total 1384 bytes
size. Initializing this much memory has a cost reported by Tavian
Barnes <tavianator@tavianator.com> as about 2.5% when running `perf
script --itrace=i0`:
https://lore.kernel.org/lkml/d841b97b3ad2ca8bcab07e4293375fb7c32dfce7.1736618095.git.tavianator@tavianator.com/



Adrian Hunter <adrian.hunter@intel.com> replied that the zero
initialization was necessary and couldn't simply be removed.

This patch aims to strike a middle ground of still zeroing the
perf_sample, but removing 79% of its size by make user_regs and
intr_regs optional pointers to zalloc-ed memory. To support the
allocation accessors are created for user_regs and intr_regs. To
support correct cleanup perf_sample__init and perf_sample__exit
functions are created and added throughout the code base.

Signed-off-by: default avatarIan Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250113194345.1537821-1-irogers@google.com


Signed-off-by: default avatarNamhyung Kim <namhyung@kernel.org>
parent 08d9e883
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ static int sample_ustack(struct perf_sample *sample,
int test__arch_unwind_sample(struct perf_sample *sample,
			     struct thread *thread)
{
	struct regs_dump *regs = &sample->user_regs;
	struct regs_dump *regs = perf_sample__user_regs(sample);
	u64 *buf;

	buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
	struct unwind_info *ui = arg;
	struct regs_dump *user_regs = &ui->sample->user_regs;
	struct regs_dump *user_regs = perf_sample__user_regs(ui->sample);
	Dwarf_Word dwarf_regs[17];
	unsigned nregs;

+3 −1
Original line number Diff line number Diff line
@@ -1917,9 +1917,10 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
					u16 misc_flag)
{
	struct perf_sample_id *sid;
	struct perf_sample sample = {};
	struct perf_sample sample;
	int id_hdr_size;

	perf_sample__init(&sample, /*all=*/true);
	lost->lost = lost_count;
	if (evsel->core.ids) {
		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
@@ -1931,6 +1932,7 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
	lost->header.size = sizeof(*lost) + id_hdr_size;
	lost->header.misc = misc_flag;
	record__write(rec, NULL, lost, lost->header.size);
	perf_sample__exit(&sample);
}

static void record__read_lost_samples(struct record *rec)
+8 −2
Original line number Diff line number Diff line
@@ -783,14 +783,20 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
				      struct perf_event_attr *attr, const char *arch, FILE *fp)
{
	return perf_sample__fprintf_regs(&sample->intr_regs,
	if (!sample->intr_regs)
		return 0;

	return perf_sample__fprintf_regs(perf_sample__intr_regs(sample),
					 attr->sample_regs_intr, arch, fp);
}

static int perf_sample__fprintf_uregs(struct perf_sample *sample,
				      struct perf_event_attr *attr, const char *arch, FILE *fp)
{
	return perf_sample__fprintf_regs(&sample->user_regs,
	if (!sample->user_regs)
		return 0;

	return perf_sample__fprintf_regs(perf_sample__user_regs(sample),
					 attr->sample_regs_user, arch, fp);
}

+6 −2
Original line number Diff line number Diff line
@@ -1157,6 +1157,7 @@ static int deliver_event(struct ordered_events *qe,
		return 0;
	}

	perf_sample__init(&sample, /*all=*/false);
	ret = evlist__parse_sample(evlist, event, &sample);
	if (ret) {
		pr_err("Can't parse sample, err = %d\n", ret);
@@ -1167,8 +1168,10 @@ static int deliver_event(struct ordered_events *qe,
	assert(evsel != NULL);

	if (event->header.type == PERF_RECORD_SAMPLE) {
		if (evswitch__discard(&top->evswitch, evsel))
			return 0;
		if (evswitch__discard(&top->evswitch, evsel)) {
			ret = 0;
			goto next_event;
		}
		++top->samples;
	}

@@ -1219,6 +1222,7 @@ static int deliver_event(struct ordered_events *qe,

	ret = 0;
next_event:
	perf_sample__exit(&sample);
	return ret;
}

Loading