Commit 6c17a882 authored by Puranjay Mohan's avatar Puranjay Mohan Committed by Daniel Borkmann
Browse files

bpf, arm64: JIT support for private stack



The private stack is allocated in bpf_int_jit_compile() with 16-byte
alignment. It includes additional guard regions to detect stack
overflows and underflows at runtime.

Memory layout:

              +------------------------------------------------------+
              |                                                      |
              |  16 bytes padding (overflow guard - stack top)       |
              |  [ detects writes beyond top of stack ]              |
     BPF FP ->+------------------------------------------------------+
              |                                                      |
              |  BPF private stack (sized by verifier)               |
              |  [ 16-byte aligned ]                                 |
              |                                                      |
BPF PRIV SP ->+------------------------------------------------------+
              |                                                      |
              |  16 bytes padding (underflow guard - stack bottom)   |
              |  [ detects accesses before start of stack ]          |
              |                                                      |
              +------------------------------------------------------+

On detection of an overflow or underflow, the kernel emits messages
like:

    BPF private stack overflow/underflow detected for prog <prog_name>

After commit bd737fcb ("bpf, arm64: Get rid of fpb"), Jited BPF
programs use the stack in two ways:

1. Via the BPF frame pointer (top of stack), using negative offsets.
2. Via the stack pointer (bottom of stack), using positive offsets in
   LDR/STR instructions.

When a private stack is used, ARM64 callee-saved register x27 replaces
the stack pointer. The BPF frame pointer usage remains unchanged; but
it now points to the top of the private stack.

Relevant tests (Enabled in following patch):

 #415/1   struct_ops_private_stack/private_stack:OK
 #415/2   struct_ops_private_stack/private_stack_fail:OK
 #415/3   struct_ops_private_stack/private_stack_recur:OK
 #415     struct_ops_private_stack:OK
 #549/1   verifier_private_stack/Private stack, single prog:OK
 #549/2   verifier_private_stack/Private stack, subtree > MAX_BPF_STACK:OK
 #549/3   verifier_private_stack/No private stack:OK
 #549/4   verifier_private_stack/Private stack, callback:OK
 #549/5   verifier_private_stack/Private stack, exception in main prog:OK
 #549/6   verifier_private_stack/Private stack, exception in subprog:OK
 #549/7   verifier_private_stack/Private stack, async callback, not nested:OK
 #549/8   verifier_private_stack/Private stack, async callback, potential nesting:OK
 #549     verifier_private_stack:OK
 Summary: 2/11 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: default avatarPuranjay Mohan <puranjay@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarYonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20250724120257.7299-3-puranjay@kernel.org
parent 3ba58312
Loading
Loading
Loading
Loading
+121 −12
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)

#define check_imm(bits, imm) do {				\
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
	[TCCNT_PTR] = A64_R(26),
	/* temporary register for blinding constants */
	[BPF_REG_AX] = A64_R(9),
	/* callee saved register for private stack pointer */
	[PRIVATE_SP] = A64_R(27),
	/* callee saved register for kern_vm_start address */
	[ARENA_VM_START] = A64_R(28),
};
@@ -86,6 +89,7 @@ struct jit_ctx {
	u64 user_vm_start;
	u64 arena_vm_start;
	bool fp_used;
	bool priv_sp_used;
	bool write;
};

@@ -98,6 +102,10 @@ struct bpf_plt {
#define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)

/* Memory size/value to protect private stack overflow/underflow */
#define PRIV_STACK_GUARD_SZ    16
#define PRIV_STACK_GUARD_VAL   0xEB9F12345678eb9fULL

static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
	if (ctx->image != NULL && ctx->write)
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
	if (reg_used & 8)
		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];

	if (reg_used & 16)
	if (reg_used & 16) {
		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
		if (ctx->priv_sp_used)
			ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
	}

	if (ctx->arena_vm_start)
		ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
@@ -462,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
	}
}

static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
			    struct jit_ctx *ctx)
{
	const u8 tmp = bpf2a64[TMP_REG_1];

	emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
	if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
		emit(A64_MRS_TPIDR_EL2(tmp), ctx);
	else
		emit(A64_MRS_TPIDR_EL1(tmp), ctx);
	emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
}

#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)

@@ -477,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
	const bool is_main_prog = !bpf_is_subprog(prog);
	const u8 fp = bpf2a64[BPF_REG_FP];
	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
	const u8 priv_sp = bpf2a64[PRIVATE_SP];
	void __percpu *priv_stack_ptr;
	const int idx0 = ctx->idx;
	int cur_offset;

@@ -552,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
		emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
	}

	if (ctx->fp_used)
		/* Set up BPF prog stack base register */
		emit(A64_MOV(1, fp, A64_SP), ctx);

	/* Stack must be multiples of 16B */
	ctx->stack_size = round_up(prog->aux->stack_depth, 16);

	if (ctx->fp_used) {
		if (ctx->priv_sp_used) {
			/* Set up private stack pointer */
			priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
			emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
			emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
		} else {
			/* Set up BPF prog stack base register */
			emit(A64_MOV(1, fp, A64_SP), ctx);
		}
	}

	/* Set up function call stack */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	if (ctx->arena_vm_start)
@@ -624,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
	emit(A64_STR64I(tcc, ptr, 0), ctx);

	/* restore SP */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	pop_callee_regs(ctx);
@@ -992,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
	const u8 ptr = bpf2a64[TCCNT_PTR];

	/* We're done with BPF stack */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	pop_callee_regs(ctx);
@@ -1121,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	const u8 tmp2 = bpf2a64[TMP_REG_2];
	const u8 fp = bpf2a64[BPF_REG_FP];
	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
	const u8 priv_sp = bpf2a64[PRIVATE_SP];
	const s16 off = insn->off;
	const s32 imm = insn->imm;
	const int i = insn - ctx->prog->insnsi;
@@ -1565,7 +1600,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			src = tmp2;
		}
		if (src == fp) {
			src_adj = A64_SP;
			src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			src_adj = src;
@@ -1655,7 +1690,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			dst = tmp2;
		}
		if (dst == fp) {
			dst_adj = A64_SP;
			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			dst_adj = dst;
@@ -1717,7 +1752,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			dst = tmp2;
		}
		if (dst == fp) {
			dst_adj = A64_SP;
			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			dst_adj = dst;
@@ -1860,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
	flush_icache_range((unsigned long)start, (unsigned long)end);
}

static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
{
	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
	u64 *stack_ptr;

	for_each_possible_cpu(cpu) {
		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
	}
}

static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
				   struct bpf_prog *prog)
{
	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
	u64 *stack_ptr;

	for_each_possible_cpu(cpu) {
		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
			pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
			       bpf_jit_get_prog_name(prog));
			break;
		}
	}
}

struct arm64_jit_data {
	struct bpf_binary_header *header;
	u8 *ro_image;
@@ -1872,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	int image_size, prog_size, extable_size, extable_align, extable_offset;
	struct bpf_prog *tmp, *orig_prog = prog;
	struct bpf_binary_header *header;
	struct bpf_binary_header *ro_header;
	struct bpf_binary_header *ro_header = NULL;
	struct arm64_jit_data *jit_data;
	void __percpu *priv_stack_ptr = NULL;
	bool was_classic = bpf_prog_was_classic(prog);
	int priv_stack_alloc_sz;
	bool tmp_blinded = false;
	bool extra_pass = false;
	struct jit_ctx ctx;
@@ -1906,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
		}
		prog->aux->jit_data = jit_data;
	}
	priv_stack_ptr = prog->aux->priv_stack_ptr;
	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
		/* Allocate actual private stack size with verifier-calculated
		 * stack size plus two memory guards to protect overflow and
		 * underflow.
		 */
		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
				      2 * PRIV_STACK_GUARD_SZ;
		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
		if (!priv_stack_ptr) {
			prog = orig_prog;
			goto out_priv_stack;
		}

		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
		prog->aux->priv_stack_ptr = priv_stack_ptr;
	}
	if (jit_data->ctx.offset) {
		ctx = jit_data->ctx;
		ro_image_ptr = jit_data->ro_image;
@@ -1929,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);

	if (priv_stack_ptr)
		ctx.priv_sp_used = true;

	/* Pass 1: Estimate the maximum image size.
	 *
	 * BPF line info needs ctx->offset[i] to be the offset of
@@ -2068,7 +2158,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
			ctx.offset[i] *= AARCH64_INSN_SIZE;
		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
		if (!ro_header && priv_stack_ptr) {
			free_percpu(priv_stack_ptr);
			prog->aux->priv_stack_ptr = NULL;
		}
		kvfree(ctx.offset);
out_priv_stack:
		kfree(jit_data);
		prog->aux->jit_data = NULL;
	}
@@ -2087,6 +2182,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	goto out_off;
}

bool bpf_jit_supports_private_stack(void)
{
	return true;
}

bool bpf_jit_supports_kfunc_call(void)
{
	return true;
@@ -2932,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
	if (prog->jited) {
		struct arm64_jit_data *jit_data = prog->aux->jit_data;
		struct bpf_binary_header *hdr;
		void __percpu *priv_stack_ptr;
		int priv_stack_alloc_sz;

		/*
		 * If we fail the final pass of JIT (from jit_subprogs),
@@ -2945,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
		}
		hdr = bpf_jit_binary_pack_hdr(prog);
		bpf_jit_binary_pack_free(hdr, NULL);
		priv_stack_ptr = prog->aux->priv_stack_ptr;
		if (priv_stack_ptr) {
			priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
					      2 * PRIV_STACK_GUARD_SZ;
			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
			free_percpu(prog->aux->priv_stack_ptr);
		}
		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
	}