Commit 4c229f33 authored by Andrii Nakryiko's avatar Andrii Nakryiko
Browse files

Merge branch 'libbpf-fix-usdt-sib-argument-handling-causing-unrecognized-register-error'

Jiawei Zhao says:

====================
libbpf: fix USDT SIB argument handling causing unrecognized register error

When using GCC on x86-64 to compile an usdt prog with -O1 or higher
optimization, the compiler will generate SIB addressing mode for global
array, e.g. "1@-96(%rbp,%rax,8)".

The current USDT implementation in libbpf cannot parse these two formats,
causing `bpf_program__attach_usdt()` to fail with -ENOENT
(unrecognized register).

This patch series adds support for SIB addressing mode in USDT probes.
The main changes include:
- add correct handling logic for SIB-addressed arguments in
  `parse_usdt_arg`.
- add an usdt_o2 test case to cover SIB addressing mode.

Testing shows that the SIB probe correctly generates 8@(%rcx,%rax,8)
argument spec and passes all validation checks.

The modification history of this patch series:
Change since v1:
- refactor the code to make it more readable
- modify the commit message to explain why and how

Change since v2:
- fix the `scale` uninitialized error

Change since v3:
- force -O2 optimization for usdt.test.o to generate SIB addressing usdt
  and pass all test cases.

Change since v4:
- split the patch into two parts, one for the fix and the other for the
  test

Change since v5:
- Only enable optimization for x86 architecture to generate SIB addressing
  usdt argument spec.

Change since v6:
- Add an usdt_o2 test case to cover SIB addressing mode.
- Reinstate the usdt.c test case.

Change since v7:
- Refactor modifications to __bpf_usdt_arg_spec to avoid increasing its size,
  achieving better compatibility
- Fix some minor code style issues
- Refactor the usdt_o2 test case, removing semaphore and adding GCC attribute
  to force -O2 optimization

Change since v8:
- Refactor the usdt_o2 test case, using assembly to force SIB addressing mode.

Change since v9:
- Only enable the usdt_o2 test case on x86_64 and i386 architectures since the
  SIB addressing mode is only supported on x86_64 and i386.

Change since v10:
- Replace `__attribute__((optimize("O2")))` with `#pragma GCC optimize("O1")`
  to fix the issue where the optimized compilation condition works improperly.
- Renamed test case usdt_o2 and relevant files name to usdt_o1 in that O1
  level optimization is enough to generate SIB addressing usdt argument spec.

Change since v11:
- Replace `STAP_PROBE1` with `STAP_PROBE_ASM`
- Use bit fields instead of bit shifting operations
- Merge the usdt_o1 test case into the usdt test case

Change since v12:
- This patch is same with the v12 but with a new version number.

Change since v13(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzZWd2zUC=U6uGJFF3EMZ7zWGLweQAG3CJWTeHy-5yFEPw@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4Bzbs3hV_Q47+d93tTX13WkrpkpOb4=U04mZCjHyZg4aVdw@mail.gmail.com/

Change since v14:
- fix a typo in __bpf_usdt_arg_spec

Change since v15(resolve some review comments):
- https://lore.kernel.org/bpf/CAEf4BzaxuYijEfQMDFZ+CQdjxLuDZiesUXNA-SiopS+5+VxRaA@mail.gmail.com/
- https://lore.kernel.org/bpf/CAEf4BzaHi5kpuJ6OVvDU62LT5g0qHbWYMfb_FBQ3iuvvUF9fag@mail.gmail.com/
- https://lore.kernel.org/bpf/d438bf3a-a9c9-4d34-b814-63f2e9bb3a85@linux.dev/
====================

Link: https://patch.msgid.link/20250827053128.1301287-1-phoenix500526@163.com


Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents d3abefe8 69424097
Loading
Loading
Loading
Loading
+42 −2
Original line number Diff line number Diff line
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
	BPF_USDT_ARG_CONST,
	BPF_USDT_ARG_REG,
	BPF_USDT_ARG_REG_DEREF,
	BPF_USDT_ARG_SIB,
};

/*
 * This struct layout is designed specifically to be backwards/forward
 * compatible between libbpf versions for ARG_CONST, ARG_REG, and
 * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
 */
struct __bpf_usdt_arg_spec {
	/* u64 scalar interpreted depending on arg_type, see below */
	__u64 val_off;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	/* arg location case, see bpf_usdt_arg() for details */
	enum __bpf_usdt_arg_type arg_type;
	enum __bpf_usdt_arg_type arg_type: 8;
	/* index register offset within struct pt_regs */
	__u16 idx_reg_off: 12;
	/* scale factor for index register (1, 2, 4, or 8) */
	__u16 scale_bitshift: 4;
	/* reserved for future use, keeps reg_off offset stable */
	__u8 __reserved: 8;
#else
	__u8 __reserved: 8;
	__u16 idx_reg_off: 12;
	__u16 scale_bitshift: 4;
	enum __bpf_usdt_arg_type arg_type: 8;
#endif
	/* offset of referenced register within struct pt_regs */
	short reg_off;
	/* whether arg should be interpreted as signed value */
@@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
	struct __bpf_usdt_spec *spec;
	struct __bpf_usdt_arg_spec *arg_spec;
	unsigned long val;
	unsigned long val, idx;
	int err, spec_id;

	*res = 0;
@@ -202,6 +221,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
			return err;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
		val >>= arg_spec->arg_bitshift;
#endif
		break;
	case BPF_USDT_ARG_SIB:
		/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
		 * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
		 * fetch the base register contents and the index register
		 * contents from pt_regs. Then we calculate the final address
		 * as base + (index * scale) + offset, and do a user-space
		 * probe read to fetch the argument value.
		 */
		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
		if (err)
			return err;
		err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
		if (err)
			return err;
		err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
		if (err)
			return err;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
		val >>= arg_spec->arg_bitshift;
#endif
		break;
	default:
+57 −5
Original line number Diff line number Diff line
@@ -200,12 +200,23 @@ enum usdt_arg_type {
	USDT_ARG_CONST,
	USDT_ARG_REG,
	USDT_ARG_REG_DEREF,
	USDT_ARG_SIB,
};

/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
	__u64 val_off;
	enum usdt_arg_type arg_type;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	enum usdt_arg_type arg_type: 8;
	__u16	idx_reg_off: 12;
	__u16	scale_bitshift: 4;
	__u8 __reserved: 8;     /* keep reg_off offset stable */
#else
	__u8 __reserved: 8;     /* keep reg_off offset stable */
	__u16	idx_reg_off: 12;
	__u16	scale_bitshift: 4;
	enum usdt_arg_type arg_type: 8;
#endif
	short reg_off;
	bool arg_signed;
	char arg_bitshift;
@@ -1283,11 +1294,51 @@ static int calc_pt_regs_off(const char *reg_name)

static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
	char reg_name[16];
	int len, reg_off;
	long off;
	char reg_name[16] = {0}, idx_reg_name[16] = {0};
	int len, reg_off, idx_reg_off, scale = 1;
	long off = 0;

	if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
		   arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
		sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
		       arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
		sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
		       arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
		sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
		       arg_sz, reg_name, idx_reg_name, &len) == 3
		) {
		/*
		 * Scale Index Base case:
		 * 1@-96(%rbp,%rax,8)
		 * 1@(%rbp,%rax,8)
		 * 1@-96(%rbp,%rax)
		 * 1@(%rbp,%rax)
		 */
		arg->arg_type = USDT_ARG_SIB;
		arg->val_off = off;

	if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
		reg_off = calc_pt_regs_off(reg_name);
		if (reg_off < 0)
			return reg_off;
		arg->reg_off = reg_off;

		idx_reg_off = calc_pt_regs_off(idx_reg_name);
		if (idx_reg_off < 0)
			return idx_reg_off;
		arg->idx_reg_off = idx_reg_off;

		/* validate scale factor and set fields directly */
		switch (scale) {
		case 1: arg->scale_bitshift = 0; break;
		case 2: arg->scale_bitshift = 1; break;
		case 4: arg->scale_bitshift = 2; break;
		case 8: arg->scale_bitshift = 3; break;
		default:
			pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
			return -EINVAL;
		}
	} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
				arg_sz, &off, reg_name, &len) == 3) {
		/* Memory dereference case, e.g., -4@-20(%rbp) */
		arg->arg_type = USDT_ARG_REG_DEREF;
		arg->val_off = off;
@@ -1306,6 +1357,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
	} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
		/* Register read case, e.g., -4@%eax */
		arg->arg_type = USDT_ARG_REG;
		/* register read has no memory offset */
		arg->val_off = 0;

		reg_off = calc_pt_regs_off(reg_name);
+81 −2
Original line number Diff line number Diff line
@@ -40,12 +40,72 @@ static void __always_inline trigger_func(int x) {
	}
}

#if defined(__x86_64__) || defined(__i386__)
/*
 * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
 * - 'size' is the size in bytes of the array element, and its sign indicates
 *   whether the type is signed (negative) or unsigned (positive).
 * - 'base_reg' is the register holding the base address, normally rdx or edx
 * - 'index_reg' is the register holding the index, normally rax or eax
 * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
 *    size of the element type.
 *
 * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
 * - size: -2 (negative because 'short' is signed)
 * - scale: 2 (since sizeof(short) == 2)
 *
 * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
 */
static volatile short array[] = {-1, -2, -3, -4};

#if defined(__x86_64__)
#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
#else
#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
#endif

unsigned short test_usdt_sib_semaphore SEC(".probes");

static void trigger_sib_spec(void)
{
	/*
	 * Force SIB addressing with inline assembly.
	 *
	 * You must compile with -std=gnu99 or -std=c99 to use the
	 * STAP_PROBE_ASM macro.
	 *
	 * The STAP_PROBE_ASM macro generates a quoted string that gets
	 * inserted between the surrounding assembly instructions. In this
	 * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
	 * stream, creating a probe point between the asm statement boundaries.
	 * It works fine with gcc/clang.
	 *
	 * Register constraints:
	 * - "d"(array): Binds the 'array' variable to %rdx or %edx register
	 * - "a"(0): Binds the constant 0 to %rax or %eax register
	 * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
	 * %%rax(%eax), they contain the expected values for SIB addressing.
	 *
	 * The "memory" clobber prevents the compiler from reordering memory
	 * accesses around the probe point, ensuring that the probe behavior
	 * is predictable and consistent.
	 */
	asm volatile(
		STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
		:
		: "d"(array), "a"(0)
		: "memory"
	);
}
#endif

static void subtest_basic_usdt(void)
{
	LIBBPF_OPTS(bpf_usdt_opts, opts);
	struct test_usdt *skel;
	struct test_usdt__bss *bss;
	int err, i;
	const __u64 expected_cookie = 0xcafedeadbeeffeed;

	skel = test_usdt__open_and_load();
	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -59,20 +119,29 @@ static void subtest_basic_usdt(void)
		goto cleanup;

	/* usdt0 won't be auto-attached */
	opts.usdt_cookie = 0xcafedeadbeeffeed;
	opts.usdt_cookie = expected_cookie;
	skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
						     0 /*self*/, "/proc/self/exe",
						     "test", "usdt0", &opts);
	if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
		goto cleanup;

#if defined(__x86_64__) || defined(__i386__)
	opts.usdt_cookie = expected_cookie;
	skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
							 0 /*self*/, "/proc/self/exe",
							 "test", "usdt_sib", &opts);
	if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
		goto cleanup;
#endif

	trigger_func(1);

	ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
	ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
	ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");

	ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
	ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
	ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
	ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
	ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
@@ -156,6 +225,16 @@ static void subtest_basic_usdt(void)
	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");

#if defined(__x86_64__) || defined(__i386__)
	trigger_sib_spec();
	ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
	ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
	ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
	ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
	ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
	ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
#endif

cleanup:
	test_usdt__destroy(skel);
}
+31 −0
Original line number Diff line number Diff line
@@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
	return 0;
}

int usdt_sib_called;
u64 usdt_sib_cookie;
int usdt_sib_arg_cnt;
int usdt_sib_arg_ret;
short usdt_sib_arg;
int usdt_sib_arg_size;

/*
 * usdt_sib is only tested on x86-related architectures, so it requires
 * manual attach since auto-attach will panic tests under other architectures
 */
SEC("usdt")
int usdt_sib(struct pt_regs *ctx)
{
	long tmp;

	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
		return 0;

	__sync_fetch_and_add(&usdt_sib_called, 1);

	usdt_sib_cookie = bpf_usdt_cookie(ctx);
	usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);

	usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
	usdt_sib_arg = (short)tmp;
	usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);

	return 0;
}

char _license[] SEC("license") = "GPL";