Commit f9db3a38 authored by Xu Kuohai's avatar Xu Kuohai Committed by Andrii Nakryiko
Browse files

selftests/bpf/benchs: Add overwrite mode benchmark for BPF ring buffer



Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode.
Since overwrite mode is not yet supported by libbpf for consumer, also add
--rb-bench-producer option to benchmark producer directly without a consumer.

Benchmarks on an x86_64 and an arm64 CPU are shown below for reference.

- AMD EPYC 9654 (x86_64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1    32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2    9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3    8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4    9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8    9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12   3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16   2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20   2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24   2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28   2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32   2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36   2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40   2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44   2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48   2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 52   2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s)

- HiSilicon Kunpeng 920 (arm64)

Ringbuf, multi-producer contention in overwrite mode, no consumer
=================================================================
rb-prod nr_prod 1    14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 2    21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 3    6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 4    5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 8    4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 12   3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 16   3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 20   3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 24   3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 28   2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 32   3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 36   2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 40   2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 44   2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-prod nr_prod 48   2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s)
Rb-prod nr_prod 52   2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: default avatarXu Kuohai <xukuohai@huawei.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20251018035738.4039621-4-xukuohai@huaweicloud.com
parent 8f7a86ec
Loading
Loading
Loading
Loading
+59 −6
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ static struct {
	int ringbuf_sz; /* per-ringbuf, in bytes */
	bool ringbuf_use_output; /* use slower output API */
	int perfbuf_sz; /* per-CPU size, in pages */
	bool overwrite;
	bool bench_producer;
} args = {
	.back2back = false,
	.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
	.ringbuf_sz = 512 * 1024,
	.ringbuf_use_output = false,
	.perfbuf_sz = 128,
	.overwrite = false,
	.bench_producer = false,
};

enum {
@@ -35,6 +39,8 @@ enum {
	ARG_RB_BATCH_CNT = 2002,
	ARG_RB_SAMPLED = 2003,
	ARG_RB_SAMPLE_RATE = 2004,
	ARG_RB_OVERWRITE = 2005,
	ARG_RB_BENCH_PRODUCER = 2006,
};

static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
	{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
	{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
	{},
};

@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
			argp_usage(state);
		}
		break;
	case ARG_RB_OVERWRITE:
		args.overwrite = true;
		break;
	case ARG_RB_BENCH_PRODUCER:
		args.bench_producer = true;
		break;
	default:
		return ARGP_ERR_UNKNOWN;
	}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)

static void bufs_validate(void)
{
	if (env.consumer_cnt != 1) {
		fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
	if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
		fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
		exit(1);
	}

	if (args.overwrite && !args.bench_producer) {
		fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
		exit(1);
	}

	if (args.bench_producer && env.consumer_cnt != 0) {
		fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
		exit(1);
	}

	if (args.bench_producer && args.back2back) {
		fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
		exit(1);
	}

	if (args.bench_producer && args.sampled) {
		fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
		exit(1);
	}

	if (!args.bench_producer && env.consumer_cnt != 1) {
		fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
		exit(1);
	}

@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
{
	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;

	if (args.bench_producer)
		res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
	else
		res->hits = atomic_swap(&buf_hits.value, 0);
	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}

static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
	__u32 flags;
	struct bpf_map *ringbuf;
	struct ringbuf_bench *skel;

	setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)

	skel->rodata->batch_cnt = args.batch_cnt;
	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
	skel->rodata->bench_producer = args.bench_producer;

	if (args.sampled)
		/* record data + header take 16 bytes */
		skel->rodata->wakeup_data_size = args.sample_rate * 16;

	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
	ringbuf = skel->maps.ringbuf;
	if (args.overwrite) {
		flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
		bpf_map__set_map_flags(ringbuf, flags);
	}

	bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);

	if (ringbuf_bench__load(skel)) {
		fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
{
	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
	struct bpf_link *link;
	int map_fd;

	ctx->skel = ringbuf_setup_skeleton();
	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
					buf_process_sample, NULL, NULL);

	map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
	ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
	if (!ctx->ringbuf) {
		fprintf(stderr, "failed to create ringbuf\n");
		exit(1);
+4 −0
Original line number Diff line number Diff line
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
	summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done

header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
	summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
done
+11 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook

#include <stdbool.h>
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
@@ -14,9 +15,11 @@ struct {

const volatile int batch_cnt = 0;
const volatile long use_output = 0;
const volatile bool bench_producer = false;

long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
long hits __attribute__((aligned(128))) = 0;

const volatile long wakeup_data_size = 0;

@@ -24,6 +27,9 @@ static __always_inline long get_flags()
{
	long sz;

	if (bench_producer)
		return BPF_RB_NO_WAKEUP;

	if (!wakeup_data_size)
		return 0;

@@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx)
				*sample = sample_val;
				flags = get_flags();
				bpf_ringbuf_submit(sample, flags);
				if (bench_producer)
					__sync_add_and_fetch(&hits, 1);
			}
		}
	} else {
@@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx)
			if (bpf_ringbuf_output(&ringbuf, &sample_val,
					       sizeof(sample_val), flags))
				__sync_add_and_fetch(&dropped, 1);
			else if (bench_producer)
				__sync_add_and_fetch(&hits, 1);

		}
	}
	return 0;