Commit f4efc73b authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'introduce-kfuncs-for-memory-reads-into-dynptrs'

Mykyta Yatsenko says:

====================
Introduce kfuncs for memory reads into dynptrs

From: Mykyta Yatsenko <yatsenko@meta.com>

This patch adds new kfuncs that enable reading variable-length
user or kernel data directly into dynptrs.
These kfuncs provide a way to perform dynamically-sized reads
while maintaining memory safety. Unlike existing
`bpf_probe_read_{user|kernel}` APIs, which are limited to constant-sized
reads, these new kfuncs allow for more flexible data access.

v4 -> v5
 * Fix pointers annotations, use __user where necessary, cast where needed

v3 -> v4
 * Added pid filtering in selftests

v2 -> v3
 * Add KF_TRUSTED_ARGS for kfuncs that take pointer to task_struct
 as an argument
 * Remove checks for non-NULL task, where it was not necessary
 * Added comments on constants used in selftests, etc.

v1 -> v2
 * Renaming helper functions to use "user_str" instead of "user_data_str"
 suffix

====================

Link: https://patch.msgid.link/20250512205348.191079-1-mykyta.yatsenko5@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents fd5fd538 c61bcd29
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -1349,6 +1349,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
		       void *src, u32 len, u64 flags);
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
			    void *buffer__opt, u32 buffer__szk);

static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
{
	u32 size = __bpf_dynptr_size(ptr);

	if (len > size || offset > size - len)
		return -E2BIG;

	return 0;
}

#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+10 −12
Original line number Diff line number Diff line
@@ -1714,16 +1714,6 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
	memset(ptr, 0, sizeof(*ptr));
}

static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
{
	u32 size = __bpf_dynptr_size(ptr);

	if (len > size || offset > size - len)
		return -E2BIG;

	return 0;
}

BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
{
	int err;
@@ -1810,7 +1800,7 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
	.arg5_type	= ARG_ANYTHING,
};

static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
		       u32 len, u64 flags)
{
	enum bpf_dynptr_type type;
@@ -3388,6 +3378,14 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_local_irq_save)
BTF_ID_FLAGS(func, bpf_local_irq_restore)
BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_KFUNCS_END(common_btf_ids)

static const struct btf_kfunc_id_set common_kfunc_set = {
+194 −0
Original line number Diff line number Diff line
@@ -3466,6 +3466,142 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)

late_initcall(bpf_kprobe_multi_kfuncs_init);

typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);

/*
 * The __always_inline is to make sure the compiler doesn't
 * generate indirect calls into callbacks, which is expensive,
 * on some kernel configurations. This allows compiler to put
 * direct calls into all the specific callback implementations
 * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
 */
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
						 const void *unsafe_src,
						 copy_fn_t str_copy_fn,
						 struct task_struct *tsk)
{
	struct bpf_dynptr_kern *dst;
	u32 chunk_sz, off;
	void *dst_slice;
	int cnt, err;
	char buf[256];

	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
	if (likely(dst_slice))
		return str_copy_fn(dst_slice, unsafe_src, size, tsk);

	dst = (struct bpf_dynptr_kern *)dptr;
	if (bpf_dynptr_check_off_len(dst, doff, size))
		return -E2BIG;

	for (off = 0; off < size; off += chunk_sz - 1) {
		chunk_sz = min_t(u32, sizeof(buf), size - off);
		/* Expect str_copy_fn to return count of copied bytes, including
		 * zero terminator. Next iteration increment off by chunk_sz - 1 to
		 * overwrite NUL.
		 */
		cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
		if (cnt < 0)
			return cnt;
		err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
		if (err)
			return err;
		if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
			return off + cnt;
	}
	return off;
}

static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
					     u32 size, const void *unsafe_src,
					     copy_fn_t copy_fn, struct task_struct *tsk)
{
	struct bpf_dynptr_kern *dst;
	void *dst_slice;
	char buf[256];
	u32 off, chunk_sz;
	int err;

	dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
	if (likely(dst_slice))
		return copy_fn(dst_slice, unsafe_src, size, tsk);

	dst = (struct bpf_dynptr_kern *)dptr;
	if (bpf_dynptr_check_off_len(dst, doff, size))
		return -E2BIG;

	for (off = 0; off < size; off += chunk_sz) {
		chunk_sz = min_t(u32, sizeof(buf), size - off);
		err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
		if (err)
			return err;
		err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
		if (err)
			return err;
	}
	return 0;
}

static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
						  u32 size, struct task_struct *tsk)
{
	return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
}

static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
						    u32 size, struct task_struct *tsk)
{
	int ret;

	if (!tsk) /* Read from the current task */
		return copy_from_user(dst, (const void __user *)unsafe_src, size);

	ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
	if (ret != size)
		return -EFAULT;
	return 0;
}

static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
						    u32 size, struct task_struct *tsk)
{
	return copy_from_kernel_nofault(dst, unsafe_src, size);
}

static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
						 u32 size, struct task_struct *tsk)
{
	return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
}

static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
						   u32 size, struct task_struct *tsk)
{
	int ret;

	if (unlikely(size == 0))
		return 0;

	if (tsk) {
		ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
	} else {
		ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
		/* strncpy_from_user does not guarantee NUL termination */
		if (ret >= 0)
			((char *)dst)[ret] = '\0';
	}

	if (ret < 0)
		return ret;
	return ret + 1;
}

static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
						   u32 size, struct task_struct *tsk)
{
	return strncpy_from_kernel_nofault(dst, unsafe_src, size);
}

__bpf_kfunc_start_defs();

__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
@@ -3477,4 +3613,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
	return bpf_send_signal_common(sig, type, task, value);
}

__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
					   u32 size, const void __user *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
				 copy_user_data_nofault, NULL);
}

__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
					     u32 size, const void *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
				 copy_kernel_data_nofault, NULL);
}

__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
					       u32 size, const void __user *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
				     copy_user_str_nofault, NULL);
}

__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
						 u32 size, const void *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
				     copy_kernel_str_nofault, NULL);
}

__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
					  u32 size, const void __user *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
				 copy_user_data_sleepable, NULL);
}

__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
					      u32 size, const void __user *unsafe_ptr__ign)
{
	return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
				     copy_user_str_sleepable, NULL);
}

__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
					       u32 size, const void __user *unsafe_ptr__ign,
					       struct task_struct *tsk)
{
	return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
				 copy_user_data_sleepable, tsk);
}

__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
						   u32 size, const void __user *unsafe_ptr__ign,
						   struct task_struct *tsk)
{
	return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
				     copy_user_str_sleepable, tsk);
}

__bpf_kfunc_end_defs();
+1 −0
Original line number Diff line number Diff line
# TEMPORARY
# Alphabetical order
dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next
get_stack_raw_tp    # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id
stacktrace_build_id_nmi
+13 −0
Original line number Diff line number Diff line
@@ -33,10 +33,19 @@ static struct {
	{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
	{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
	{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
	{"test_probe_read_user_dynptr", SETUP_XDP_PROG},
	{"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
	{"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
	{"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
	{"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
	{"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
	{"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
	{"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};

static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
	char user_data[384] = {[0 ... 382] = 'a', '\0'};
	struct dynptr_success *skel;
	struct bpf_program *prog;
	struct bpf_link *link;
@@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
	if (!ASSERT_OK(err, "dynptr_success__load"))
		goto cleanup;

	skel->bss->user_ptr = user_data;
	skel->data->test_len[0] = sizeof(user_data);
	memcpy(skel->bss->expected_str, user_data, sizeof(user_data));

	switch (setup_type) {
	case SETUP_SYSCALL_SLEEP:
		link = bpf_program__attach(prog);
Loading