Commit 2146f7fe authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'allocate-bpf-trampoline-on-bpf_prog_pack'

Song Liu says:

====================
Allocate bpf trampoline on bpf_prog_pack

This set enables allocating bpf trampoline from bpf_prog_pack on x86. The
majority of this work, however, is the refactoring of trampoline code.
This is needed because we need to handle 4 archs and 2 users (trampoline
and struct_ops).

1/7 through 6/7 refactors trampoline code. A few helpers are added.
7/7 finally let bpf trampoline on x86 use bpf_prog_pack.

Changes in v7:
1. Use kvmalloc for rw_image in x86/arch_prepare_bpf_trampoline. (Alexei)
2. Add comment to explain why we cannot use kvmalloc in
   x86/arch_bpf_trampoline_size. (Alexei)

Changes in v6:
1. Rebase.
2. Add Acked-by and Tested-by from Jiri Olsa and Björn Töpel.

Changes in v5:
1. Adjust size of trampoline ksym. (Jiri)
2. Use "unsigned int size" arg in image management helpers.(Daniel)

Changes in v4:
1. Dropped 1/8 in v3, which is already merged in bpf-next.
2. Add Reviewed-by from Björn Töpel.

Changes in v3:
1. Fix bug in s390. (Thanks to Ilya Leoshkevich).
2. Fix build error in riscv. (kernel test robot).

Changes in v2:
1. Add missing changes in net/bpf/bpf_dummy_struct_ops.c.
2. Reduce one dry run in arch_prepare_bpf_trampoline. (Xu Kuohai)
3. Other small fixes.
====================

Link: https://lore.kernel.org/r/20231206224054.492250-1-song@kernel.org


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 7065eefb 3ba026fc
Loading
Loading
Loading
Loading
+37 −18
Original line number Diff line number Diff line
@@ -1828,7 +1828,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
 *
 */
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
			      struct bpf_tramp_links *tlinks, void *orig_call,
			      struct bpf_tramp_links *tlinks, void *func_addr,
			      int nregs, u32 flags)
{
	int i;
@@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,

	if (flags & BPF_TRAMP_F_IP_ARG) {
		/* save ip address of the traced function */
		emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
		emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
		emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
	}

@@ -2026,18 +2026,10 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
	return ctx->idx;
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
				void *orig_call)
static int btf_func_model_nregs(const struct btf_func_model *m)
{
	int i, ret;
	int nregs = m->nr_args;
	int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
	struct jit_ctx ctx = {
		.image = NULL,
		.idx = 0,
	};
	int i;

	/* extra registers needed for struct argument */
	for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
@@ -2046,22 +2038,49 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
			nregs += (m->arg_size[i] + 7) / 8 - 1;
	}

	return nregs;
}

int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
			     struct bpf_tramp_links *tlinks, void *func_addr)
{
	struct jit_ctx ctx = {
		.image = NULL,
		.idx = 0,
	};
	struct bpf_tramp_image im;
	int nregs, ret;

	nregs = btf_func_model_nregs(m);
	/* the first 8 registers are used for arguments */
	if (nregs > 8)
		return -ENOTSUPP;

	ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
	if (ret < 0)
		return ret;

	if (ret > max_insns)
		return -EFBIG;
	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
}

	ctx.image = image;
	ctx.idx = 0;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
				void *func_addr)
{
	int ret, nregs;
	struct jit_ctx ctx = {
		.image = image,
		.idx = 0,
	};

	nregs = btf_func_model_nregs(m);
	/* the first 8 registers are used for arguments */
	if (nregs > 8)
		return -ENOTSUPP;

	jit_fill_hole(image, (unsigned int)(image_end - image));
	ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);

	if (ret > 0 && validate_code(&ctx) < 0)
		ret = -EINVAL;
+15 −10
Original line number Diff line number Diff line
@@ -1029,23 +1029,28 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
	return ret;
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
				void *func_addr)
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
			     struct bpf_tramp_links *tlinks, void *func_addr)
{
	int ret;
	struct bpf_tramp_image im;
	struct rv_jit_context ctx;
	int ret;

	ctx.ninsns = 0;
	ctx.insns = NULL;
	ctx.ro_insns = NULL;
	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
	if (ret < 0)
		return ret;
	ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);

	if (ninsns_rvoff(ret) > (long)image_end - (long)image)
		return -EFBIG;
	return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
				void *func_addr)
{
	int ret;
	struct rv_jit_context ctx;

	ctx.ninsns = 0;
	/*
+34 −22
Original line number Diff line number Diff line
@@ -2637,6 +2637,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
	return 0;
}

int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
			     struct bpf_tramp_links *tlinks, void *orig_call)
{
	struct bpf_tramp_image im;
	struct bpf_tramp_jit tjit;
	int ret;

	memset(&tjit, 0, sizeof(tjit));

	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
					    tlinks, orig_call);

	return ret < 0 ? ret : tjit.common.prg;
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
@@ -2644,19 +2659,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
{
	struct bpf_tramp_jit tjit;
	int ret;
	int i;

	for (i = 0; i < 2; i++) {
		if (i == 0) {
	/* Compute offsets, check whether the code fits. */
	memset(&tjit, 0, sizeof(tjit));
		} else {
			/* Generate the code. */
			tjit.common.prg = 0;
			tjit.common.prg_buf = image;
		}
	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
					    tlinks, func_addr);

	if (ret < 0)
		return ret;
	if (tjit.common.prg > (char *)image_end - (char *)image)
@@ -2665,9 +2673,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
		 * BPF_MAX_TRAMP_LINKS.
		 */
		return -E2BIG;
	}

	return tjit.common.prg;
	tjit.common.prg = 0;
	tjit.common.prg_buf = image;
	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
					    tlinks, func_addr);

	return ret < 0 ? ret : tjit.common.prg;
}

bool bpf_jit_supports_subprog_tailcalls(void)
+104 −26
Original line number Diff line number Diff line
@@ -2198,7 +2198,8 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,

static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
			   struct bpf_tramp_link *l, int stack_size,
			   int run_ctx_off, bool save_ret)
			   int run_ctx_off, bool save_ret,
			   void *image, void *rw_image)
{
	u8 *prog = *pprog;
	u8 *jmp_insn;
@@ -2226,7 +2227,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	else
		EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);

	if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
	if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image)))
		return -EINVAL;
	/* remember prog start time returned by __bpf_prog_enter */
	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@ -2250,7 +2251,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
			       (long) p->insnsi >> 32,
			       (u32) (long) p->insnsi);
	/* call JITed bpf program or interpreter */
	if (emit_rsb_call(&prog, p->bpf_func, prog))
	if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
		return -EINVAL;

	/*
@@ -2277,7 +2278,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
		EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
	else
		EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
	if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
	if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image)))
		return -EINVAL;

	*pprog = prog;
@@ -2312,14 +2313,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)

static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
		      struct bpf_tramp_links *tl, int stack_size,
		      int run_ctx_off, bool save_ret)
		      int run_ctx_off, bool save_ret,
		      void *image, void *rw_image)
{
	int i;
	u8 *prog = *pprog;

	for (i = 0; i < tl->nr_links; i++) {
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
				    run_ctx_off, save_ret))
				    run_ctx_off, save_ret, image, rw_image))
			return -EINVAL;
	}
	*pprog = prog;
@@ -2328,7 +2330,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,

static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
			      struct bpf_tramp_links *tl, int stack_size,
			      int run_ctx_off, u8 **branches)
			      int run_ctx_off, u8 **branches,
			      void *image, void *rw_image)
{
	u8 *prog = *pprog;
	int i;
@@ -2339,7 +2342,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
	emit_mov_imm32(&prog, false, BPF_REG_0, 0);
	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
	for (i = 0; i < tl->nr_links; i++) {
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
		if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
				    image, rw_image))
			return -EINVAL;

		/* mod_ret prog stored return value into [rbp - 8]. Emit:
@@ -2422,7 +2426,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 * add rsp, 8                      // skip eth_type_trans's frame
 * ret                             // return to its caller
 */
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
					 void *rw_image_end, void *image,
					 const struct btf_func_model *m, u32 flags,
					 struct bpf_tramp_links *tlinks,
					 void *func_addr)
@@ -2521,7 +2526,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
		orig_call += X86_PATCH_SIZE;
	}

	prog = image;
	prog = rw_image;

	EMIT_ENDBR();
	/*
@@ -2563,7 +2568,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		/* arg1: mov rdi, im */
		emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
		if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
		if (emit_rsb_call(&prog, __bpf_tramp_enter,
				  image + (prog - (u8 *)rw_image))) {
			ret = -EINVAL;
			goto cleanup;
		}
@@ -2571,7 +2577,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i

	if (fentry->nr_links)
		if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
			       flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
			return -EINVAL;

	if (fmod_ret->nr_links) {
@@ -2581,7 +2587,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
			return -ENOMEM;

		if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
				       run_ctx_off, branches)) {
				       run_ctx_off, branches, image, rw_image)) {
			ret = -EINVAL;
			goto cleanup;
		}
@@ -2602,14 +2608,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
			EMIT2(0xff, 0xd3); /* call *rbx */
		} else {
			/* call original function */
			if (emit_rsb_call(&prog, orig_call, prog)) {
			if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) {
				ret = -EINVAL;
				goto cleanup;
			}
		}
		/* remember return value in a stack for bpf prog to access */
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
		im->ip_after_call = prog;
		im->ip_after_call = image + (prog - (u8 *)rw_image);
		memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
		prog += X86_PATCH_SIZE;
	}
@@ -2625,12 +2631,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
		 * aligned address of do_fexit.
		 */
		for (i = 0; i < fmod_ret->nr_links; i++)
			emit_cond_near_jump(&branches[i], prog, branches[i],
					    X86_JNE);
			emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
					    image + (branches[i] - (u8 *)rw_image), X86_JNE);
	}

	if (fexit->nr_links)
		if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
		if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
			       false, image, rw_image)) {
			ret = -EINVAL;
			goto cleanup;
		}
@@ -2643,10 +2650,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	 * restored to R0.
	 */
	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		im->ip_epilogue = prog;
		im->ip_epilogue = image + (prog - (u8 *)rw_image);
		/* arg1: mov rdi, im */
		emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
		if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
		if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) {
			ret = -EINVAL;
			goto cleanup;
		}
@@ -2665,19 +2672,90 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
	if (flags & BPF_TRAMP_F_SKIP_FRAME)
		/* skip our return address and return to parent */
		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
	emit_return(&prog, prog);
	emit_return(&prog, image + (prog - (u8 *)rw_image));
	/* Make sure the trampoline generation logic doesn't overflow */
	if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
	if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
		ret = -EFAULT;
		goto cleanup;
	}
	ret = prog - (u8 *)image;
	ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY;

cleanup:
	kfree(branches);
	return ret;
}

void *arch_alloc_bpf_trampoline(unsigned int size)
{
	return bpf_prog_pack_alloc(size, jit_fill_hole);
}

void arch_free_bpf_trampoline(void *image, unsigned int size)
{
	bpf_prog_pack_free(image, size);
}

void arch_protect_bpf_trampoline(void *image, unsigned int size)
{
}

void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
{
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
				const struct btf_func_model *m, u32 flags,
				struct bpf_tramp_links *tlinks,
				void *func_addr)
{
	void *rw_image, *tmp;
	int ret;
	u32 size = image_end - image;

	/* rw_image doesn't need to be in module memory range, so we can
	 * use kvmalloc.
	 */
	rw_image = kvmalloc(size, GFP_KERNEL);
	if (!rw_image)
		return -ENOMEM;

	ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
					    flags, tlinks, func_addr);
	if (ret < 0)
		goto out;

	tmp = bpf_arch_text_copy(image, rw_image, size);
	if (IS_ERR(tmp))
		ret = PTR_ERR(tmp);
out:
	kvfree(rw_image);
	return ret;
}

int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
			     struct bpf_tramp_links *tlinks, void *func_addr)
{
	struct bpf_tramp_image im;
	void *image;
	int ret;

	/* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
	 * This will NOT cause fragmentation in direct map, as we do not
	 * call set_memory_*() on this buffer.
	 *
	 * We cannot use kvmalloc here, because we need image to be in
	 * module memory range.
	 */
	image = bpf_jit_alloc_exec(PAGE_SIZE);
	if (!image)
		return -ENOMEM;

	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
					    m, flags, tlinks, func_addr);
	bpf_jit_free_exec(image);
	return ret;
}

static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf)
{
	u8 *jg_reloc, *prog = *pprog;
+11 −3
Original line number Diff line number Diff line
@@ -1098,10 +1098,17 @@ struct bpf_tramp_run_ctx;
 *      fexit = a set of program to run after original function
 */
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
				const struct btf_func_model *m, u32 flags,
				struct bpf_tramp_links *tlinks,
				void *orig_call);
				void *func_addr);
void *arch_alloc_bpf_trampoline(unsigned int size);
void arch_free_bpf_trampoline(void *image, unsigned int size);
void arch_protect_bpf_trampoline(void *image, unsigned int size);
void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
			     struct bpf_tramp_links *tlinks, void *func_addr);

u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
					     struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
@@ -1134,6 +1141,7 @@ enum bpf_tramp_prog_type {

struct bpf_tramp_image {
	void *image;
	int size;
	struct bpf_ksym ksym;
	struct percpu_ref pcref;
	void *ip_after_call;
@@ -1318,7 +1326,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
				struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
Loading