Commit 590016ad authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'fix-bpf-multi-uprobe-pid-filtering-logic'

Andrii Nakryiko says:

====================
Fix BPF multi-uprobe PID filtering logic

It turns out that current implementation of multi-uprobe PID filtering logic
is broken. It filters by thread, while the promise is filtering by process.
Patch #1 fixes the logic trivially. The rest is testing and mitigations that
are necessary for libbpf to not break users of USDT programs.

v1->v2:
  - fix selftest in last patch (CI);
  - use semicolon in patch #3 (Jiri).
====================

Link: https://lore.kernel.org/r/20240521163401.3005045-1-andrii@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 44382b3e 198034a8
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
	struct bpf_run_ctx *old_run_ctx;
	int err = 0;

	if (link->task && current != link->task)
	if (link->task && current->mm != link->task->mm)
		return 0;

	if (sleepable)
@@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
	upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
	cnt = attr->link_create.uprobe_multi.cnt;
	pid = attr->link_create.uprobe_multi.pid;

	if (!upath || !uoffsets || !cnt)
	if (!upath || !uoffsets || !cnt || pid < 0)
		return -EINVAL;
	if (cnt > MAX_UPROBE_MULTI_CNT)
		return -E2BIG;
@@ -3421,11 +3422,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
		goto error_path_put;
	}

	pid = attr->link_create.uprobe_multi.pid;
	if (pid) {
		rcu_read_lock();
		task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
		rcu_read_unlock();
		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
		if (!task) {
			err = -ESRCH;
			goto error_path_put;
+30 −1
Original line number Diff line number Diff line
@@ -392,11 +392,40 @@ static int probe_uprobe_multi_link(int token_fd)
	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
	err = -errno; /* close() can clobber errno */

	if (link_fd >= 0 || err != -EBADF) {
		close(link_fd);
		close(prog_fd);
		return 0;
	}

	/* Initial multi-uprobe support in kernel didn't handle PID filtering
	 * correctly (it was doing thread filtering, not process filtering).
	 * So now we'll detect if PID filtering logic was fixed, and, if not,
	 * we'll pretend multi-uprobes are not supported, if not.
	 * Multi-uprobes are used in USDT attachment logic, and we need to be
	 * conservative here, because multi-uprobe selection happens early at
	 * load time, while the use of PID filtering is known late at
	 * attachment time, at which point it's too late to undo multi-uprobe
	 * selection.
	 *
	 * Creating uprobe with pid == -1 for (invalid) '/' binary will fail
	 * early with -EINVAL on kernels with fixed PID filtering logic;
	 * otherwise -ESRCH would be returned if passed correct binary path
	 * (but we'll just get -BADF, of course).
	 */
	link_opts.uprobe_multi.pid = -1; /* invalid PID */
	link_opts.uprobe_multi.path = "/"; /* invalid path */
	link_opts.uprobe_multi.offsets = &offset;
	link_opts.uprobe_multi.cnt = 1;

	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
	err = -errno; /* close() can clobber errno */

	if (link_fd >= 0)
		close(link_fd);
	close(prog_fd);

	return link_fd < 0 && err == -EBADF;
	return link_fd < 0 && err == -EINVAL;
}

static int probe_kern_bpf_cookie(int token_fd)
+126 −8
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <unistd.h>
#include <pthread.h>
#include <test_progs.h>
#include "uprobe_multi.skel.h"
#include "uprobe_multi_bench.skel.h"
#include "uprobe_multi_usdt.skel.h"
#include "bpf/libbpf_internal.h"
#include "testing_helpers.h"
#include "../sdt.h"

static char test_data[] = "test_data";

@@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void)
	asm volatile ("");
}

noinline void usdt_trigger(void)
{
	STAP_PROBE(test, pid_filter_usdt);
}

struct child {
	int go[2];
	int c2p[2]; /* child -> parent channel */
	int pid;
	int tid;
	pthread_t thread;
};

static void release_child(struct child *child)
@@ -38,6 +48,10 @@ static void release_child(struct child *child)
		return;
	close(child->go[1]);
	close(child->go[0]);
	if (child->thread)
		pthread_join(child->thread, NULL);
	close(child->c2p[0]);
	close(child->c2p[1]);
	if (child->pid > 0)
		waitpid(child->pid, &child_status, 0);
}
@@ -63,7 +77,7 @@ static struct child *spawn_child(void)
	if (pipe(child.go))
		return NULL;

	child.pid = fork();
	child.pid = child.tid = fork();
	if (child.pid < 0) {
		release_child(&child);
		errno = EINVAL;
@@ -82,6 +96,7 @@ static struct child *spawn_child(void)
		uprobe_multi_func_1();
		uprobe_multi_func_2();
		uprobe_multi_func_3();
		usdt_trigger();

		exit(errno);
	}
@@ -89,6 +104,67 @@ static struct child *spawn_child(void)
	return &child;
}

static void *child_thread(void *ctx)
{
	struct child *child = ctx;
	int c = 0, err;

	child->tid = syscall(SYS_gettid);

	/* let parent know we are ready */
	err = write(child->c2p[1], &c, 1);
	if (err != 1)
		pthread_exit(&err);

	/* wait for parent's kick */
	err = read(child->go[0], &c, 1);
	if (err != 1)
		pthread_exit(&err);

	uprobe_multi_func_1();
	uprobe_multi_func_2();
	uprobe_multi_func_3();
	usdt_trigger();

	err = 0;
	pthread_exit(&err);
}

static struct child *spawn_thread(void)
{
	static struct child child;
	int c, err;

	/* pipe to notify child to execute the trigger functions */
	if (pipe(child.go))
		return NULL;
	/* pipe to notify parent that child thread is ready */
	if (pipe(child.c2p)) {
		close(child.go[0]);
		close(child.go[1]);
		return NULL;
	}

	child.pid = getpid();

	err = pthread_create(&child.thread, NULL, child_thread, &child);
	if (err) {
		err = -errno;
		close(child.go[0]);
		close(child.go[1]);
		close(child.c2p[0]);
		close(child.c2p[1]);
		errno = -err;
		return NULL;
	}

	err = read(child.c2p[0], &c, 1);
	if (!ASSERT_EQ(err, 1, "child_thread_ready"))
		return NULL;

	return &child;
}

static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
{
	skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@@ -103,14 +179,22 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
	 * passed at the probe attach.
	 */
	skel->bss->pid = child ? 0 : getpid();
	skel->bss->expect_pid = child ? child->pid : 0;

	if (child)
		kick_child(child);

	/* trigger all probes */
	/* trigger all probes, if we are testing child *process*, just to make
	 * sure that PID filtering doesn't let through activations from wrong
	 * PIDs; when we test child *thread*, we don't want to do this to
	 * avoid double counting number of triggering events
	 */
	if (!child || !child->thread) {
		uprobe_multi_func_1();
		uprobe_multi_func_2();
		uprobe_multi_func_3();
		usdt_trigger();
	}

	if (child)
		kick_child(child);

	/*
	 * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
@@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child

	ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");

	if (child)
	ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");

	if (child) {
		ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
		ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
	}
}

static void test_skel_api(void)
@@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
	if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
		goto cleanup;

	/* Attach (uprobe-backed) USDTs */
	skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
							"test", "pid_filter_usdt", NULL);
	if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
		goto cleanup;

	skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
							  "test", "pid_filter_usdt", NULL);
	if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
		goto cleanup;

	uprobe_multi_test_run(skel, child);

	ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
	if (child) {
		ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
		ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
	}
cleanup:
	uprobe_multi__destroy(skel);
}
@@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
		return;

	__test_attach_api(binary, pattern, opts, child);

	/* pid filter (thread) */
	child = spawn_thread();
	if (!ASSERT_OK_PTR(child, "spawn_thread"))
		return;

	__test_attach_api(binary, pattern, opts, child);
}

static void test_attach_api_pattern(void)
@@ -397,7 +508,7 @@ static void test_attach_api_fails(void)
	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
	if (!ASSERT_ERR(link_fd, "link_fd"))
		goto cleanup;
	ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
	ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");

cleanup:
	if (link_fd >= 0)
@@ -495,6 +606,13 @@ static void test_link_api(void)
		return;

	__test_link_api(child);

	/* pid filter (thread) */
	child = spawn_thread();
	if (!ASSERT_OK_PTR(child, "spawn_thread"))
		return;

	__test_link_api(child);
}

static void test_bench_attach_uprobe(void)
+46 −4
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <stdbool.h>
#include <bpf/usdt.bpf.h>

char _license[] SEC("license") = "GPL";

@@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0;

int pid = 0;
int child_pid = 0;
int child_tid = 0;
int child_pid_usdt = 0;
int child_tid_usdt = 0;

int expect_pid = 0;
bool bad_pid_seen = false;
bool bad_pid_seen_usdt = false;

bool test_cookie = false;
void *user_ptr = 0;
@@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void)

static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
{
	child_pid = bpf_get_current_pid_tgid() >> 32;
	__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
	__u32 cur_pid;

	if (pid && child_pid != pid)
	cur_pid = cur_pid_tgid >> 32;
	if (pid && cur_pid != pid)
		return;

	if (expect_pid && cur_pid != expect_pid)
		bad_pid_seen = true;

	child_pid = cur_pid_tgid >> 32;
	child_tid = (__u32)cur_pid_tgid;

	__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
	__u64 addr = bpf_get_func_ip(ctx);

@@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx)
SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
int uprobe_extra(struct pt_regs *ctx)
{
	/* we need this one just to mix PID-filtered and global uprobes */
	return 0;
}

SEC("usdt")
int usdt_pid(struct pt_regs *ctx)
{
	__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
	__u32 cur_pid;

	cur_pid = cur_pid_tgid >> 32;
	if (pid && cur_pid != pid)
		return 0;

	if (expect_pid && cur_pid != expect_pid)
		bad_pid_seen_usdt = true;

	child_pid_usdt = cur_pid_tgid >> 32;
	child_tid_usdt = (__u32)cur_pid_tgid;

	return 0;
}

SEC("usdt")
int usdt_extra(struct pt_regs *ctx)
{
	/* we need this one just to mix PID-filtered and global USDT probes */
	return 0;
}