Commit 0a91336e authored by Huacai Chen's avatar Huacai Chen
Browse files

Merge tag 'bpf-next-6.17' into loongarch-next

LoongArch architecture changes for 6.17 have many bpf features such as
trampoline, so merge 'bpf-next-6.17' to create a base to make bpf work
well.
parents 038d61fd cd7c97f4
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -611,9 +611,10 @@ Q: I have added a new BPF instruction to the kernel, how can I integrate
it into LLVM?

A: LLVM has a ``-mcpu`` selector for the BPF back end in order to allow
the selection of BPF instruction set extensions. By default the
``generic`` processor target is used, which is the base instruction set
(v1) of BPF.
the selection of BPF instruction set extensions. Before llvm version 20,
the ``generic`` processor target is used, which is the base instruction
set (v1) of BPF. Since llvm 20, the default processor target has changed
to instruction set v3.

LLVM has an option to select ``-mcpu=probe`` where it will probe the host
kernel for supported BPF instruction set extensions and selects the
+3 −3
Original line number Diff line number Diff line
@@ -350,9 +350,9 @@ Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
dividing -1, the desination register is instead set to ``LLONG_MIN``. For
``ALU``, if execution would result in ``INT_MIN`` dividing -1, the
desination register is instead set to ``INT_MIN``.
divided by -1, the destination register is instead set to ``LLONG_MIN``. For
``ALU``, if execution would result in ``INT_MIN`` divided by -1, the
destination register is instead set to ``INT_MIN``.

If execution would result in modulo by zero, for ``ALU64`` the value of
the destination register is unchanged whereas for ``ALU`` the upper
+5 −0
Original line number Diff line number Diff line
@@ -325,4 +325,9 @@
#define A64_MRS_SP_EL0(Rt) \
	aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0)

/* Barriers */
#define A64_SB aarch64_insn_get_sb_value()
#define A64_DSB_NSH (aarch64_insn_get_dsb_base_value() | 0x7 << 8)
#define A64_ISB aarch64_insn_get_isb_value()

#endif /* _BPF_JIT_H */
+140 −27
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)

#define check_imm(bits, imm) do {				\
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
	[TCCNT_PTR] = A64_R(26),
	/* temporary register for blinding constants */
	[BPF_REG_AX] = A64_R(9),
	/* callee saved register for private stack pointer */
	[PRIVATE_SP] = A64_R(27),
	/* callee saved register for kern_vm_start address */
	[ARENA_VM_START] = A64_R(28),
};
@@ -86,6 +89,7 @@ struct jit_ctx {
	u64 user_vm_start;
	u64 arena_vm_start;
	bool fp_used;
	bool priv_sp_used;
	bool write;
};

@@ -98,6 +102,10 @@ struct bpf_plt {
#define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)

/* Memory size/value to protect private stack overflow/underflow */
#define PRIV_STACK_GUARD_SZ    16
#define PRIV_STACK_GUARD_VAL   0xEB9F12345678eb9fULL

static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
	if (ctx->image != NULL && ctx->write)
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
	if (reg_used & 8)
		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];

	if (reg_used & 16)
	if (reg_used & 16) {
		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
		if (ctx->priv_sp_used)
			ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
	}

	if (ctx->arena_vm_start)
		ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
@@ -412,6 +423,7 @@ static void push_callee_regs(struct jit_ctx *ctx)
		emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
		emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
		emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
		ctx->fp_used = true;
	} else {
		find_used_callee_regs(ctx);
		for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
@@ -461,6 +473,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
	}
}

static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
			    struct jit_ctx *ctx)
{
	const u8 tmp = bpf2a64[TMP_REG_1];

	emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
	if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
		emit(A64_MRS_TPIDR_EL2(tmp), ctx);
	else
		emit(A64_MRS_TPIDR_EL1(tmp), ctx);
	emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
}

#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)

@@ -476,6 +501,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
	const bool is_main_prog = !bpf_is_subprog(prog);
	const u8 fp = bpf2a64[BPF_REG_FP];
	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
	const u8 priv_sp = bpf2a64[PRIVATE_SP];
	void __percpu *priv_stack_ptr;
	const int idx0 = ctx->idx;
	int cur_offset;

@@ -551,15 +578,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
		emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
	}

	if (ctx->fp_used)
		/* Set up BPF prog stack base register */
		emit(A64_MOV(1, fp, A64_SP), ctx);

	/* Stack must be multiples of 16B */
	ctx->stack_size = round_up(prog->aux->stack_depth, 16);

	if (ctx->fp_used) {
		if (ctx->priv_sp_used) {
			/* Set up private stack pointer */
			priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
			emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
			emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
		} else {
			/* Set up BPF prog stack base register */
			emit(A64_MOV(1, fp, A64_SP), ctx);
		}
	}

	/* Set up function call stack */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	if (ctx->arena_vm_start)
@@ -623,7 +658,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
	emit(A64_STR64I(tcc, ptr, 0), ctx);

	/* restore SP */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	pop_callee_regs(ctx);
@@ -991,7 +1026,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
	const u8 ptr = bpf2a64[TCCNT_PTR];

	/* We're done with BPF stack */
	if (ctx->stack_size)
	if (ctx->stack_size && !ctx->priv_sp_used)
		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);

	pop_callee_regs(ctx);
@@ -1120,6 +1155,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	const u8 tmp2 = bpf2a64[TMP_REG_2];
	const u8 fp = bpf2a64[BPF_REG_FP];
	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
	const u8 priv_sp = bpf2a64[PRIVATE_SP];
	const s16 off = insn->off;
	const s32 imm = insn->imm;
	const int i = insn - ctx->prog->insnsi;
@@ -1564,7 +1600,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			src = tmp2;
		}
		if (src == fp) {
			src_adj = A64_SP;
			src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			src_adj = src;
@@ -1630,17 +1666,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			return ret;
		break;

	/* speculation barrier */
	/* speculation barrier against v1 and v4 */
	case BPF_ST | BPF_NOSPEC:
		/*
		 * Nothing required here.
		 *
		 * In case of arm64, we rely on the firmware mitigation of
		 * Speculative Store Bypass as controlled via the ssbd kernel
		 * parameter. Whenever the mitigation is enabled, it works
		 * for all of the kernel code with no need to provide any
		 * additional instructions.
		 */
		if (alternative_has_cap_likely(ARM64_HAS_SB)) {
			emit(A64_SB, ctx);
		} else {
			emit(A64_DSB_NSH, ctx);
			emit(A64_ISB, ctx);
		}
		break;

	/* ST: *(size *)(dst + off) = imm */
@@ -1657,7 +1690,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			dst = tmp2;
		}
		if (dst == fp) {
			dst_adj = A64_SP;
			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			dst_adj = dst;
@@ -1719,7 +1752,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
			dst = tmp2;
		}
		if (dst == fp) {
			dst_adj = A64_SP;
			dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
			off_adj = off + ctx->stack_size;
		} else {
			dst_adj = dst;
@@ -1862,6 +1895,39 @@ static inline void bpf_flush_icache(void *start, void *end)
	flush_icache_range((unsigned long)start, (unsigned long)end);
}

static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
{
	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
	u64 *stack_ptr;

	for_each_possible_cpu(cpu) {
		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
	}
}

static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
				   struct bpf_prog *prog)
{
	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
	u64 *stack_ptr;

	for_each_possible_cpu(cpu) {
		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
		    stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
			pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
			       bpf_jit_get_prog_name(prog));
			break;
		}
	}
}

struct arm64_jit_data {
	struct bpf_binary_header *header;
	u8 *ro_image;
@@ -1874,9 +1940,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	int image_size, prog_size, extable_size, extable_align, extable_offset;
	struct bpf_prog *tmp, *orig_prog = prog;
	struct bpf_binary_header *header;
	struct bpf_binary_header *ro_header;
	struct bpf_binary_header *ro_header = NULL;
	struct arm64_jit_data *jit_data;
	void __percpu *priv_stack_ptr = NULL;
	bool was_classic = bpf_prog_was_classic(prog);
	int priv_stack_alloc_sz;
	bool tmp_blinded = false;
	bool extra_pass = false;
	struct jit_ctx ctx;
@@ -1908,6 +1976,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
		}
		prog->aux->jit_data = jit_data;
	}
	priv_stack_ptr = prog->aux->priv_stack_ptr;
	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
		/* Allocate actual private stack size with verifier-calculated
		 * stack size plus two memory guards to protect overflow and
		 * underflow.
		 */
		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
				      2 * PRIV_STACK_GUARD_SZ;
		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
		if (!priv_stack_ptr) {
			prog = orig_prog;
			goto out_priv_stack;
		}

		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
		prog->aux->priv_stack_ptr = priv_stack_ptr;
	}
	if (jit_data->ctx.offset) {
		ctx = jit_data->ctx;
		ro_image_ptr = jit_data->ro_image;
@@ -1931,6 +2016,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);

	if (priv_stack_ptr)
		ctx.priv_sp_used = true;

	/* Pass 1: Estimate the maximum image size.
	 *
	 * BPF line info needs ctx->offset[i] to be the offset of
@@ -2070,7 +2158,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
			ctx.offset[i] *= AARCH64_INSN_SIZE;
		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
		if (!ro_header && priv_stack_ptr) {
			free_percpu(priv_stack_ptr);
			prog->aux->priv_stack_ptr = NULL;
		}
		kvfree(ctx.offset);
out_priv_stack:
		kfree(jit_data);
		prog->aux->jit_data = NULL;
	}
@@ -2089,6 +2182,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
	goto out_off;
}

bool bpf_jit_supports_private_stack(void)
{
	return true;
}

bool bpf_jit_supports_kfunc_call(void)
{
	return true;
@@ -2243,11 +2341,6 @@ static int calc_arg_aux(const struct btf_func_model *m,

	/* the rest arguments are passed through stack */
	for (; i < m->nr_args; i++) {
		/* We can not know for sure about exact alignment needs for
		 * struct passed on stack, so deny those
		 */
		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
			return -ENOTSUPP;
		stack_slots = (m->arg_size[i] + 7) / 8;
		a->bstack_for_args += stack_slots * 8;
		a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
@@ -2911,6 +3004,17 @@ bool bpf_jit_supports_percpu_insn(void)
	return true;
}

bool bpf_jit_bypass_spec_v4(void)
{
	/* In case of arm64, we rely on the firmware mitigation of Speculative
	 * Store Bypass as controlled via the ssbd kernel parameter. Whenever
	 * the mitigation is enabled, it works for all of the kernel code with
	 * no need to provide any additional instructions. Therefore, skip
	 * inserting nospec insns against Spectre v4.
	 */
	return true;
}

bool bpf_jit_inlines_helper_call(s32 imm)
{
	switch (imm) {
@@ -2928,6 +3032,8 @@ void bpf_jit_free(struct bpf_prog *prog)
	if (prog->jited) {
		struct arm64_jit_data *jit_data = prog->aux->jit_data;
		struct bpf_binary_header *hdr;
		void __percpu *priv_stack_ptr;
		int priv_stack_alloc_sz;

		/*
		 * If we fail the final pass of JIT (from jit_subprogs),
@@ -2941,6 +3047,13 @@ void bpf_jit_free(struct bpf_prog *prog)
		}
		hdr = bpf_jit_binary_pack_hdr(prog);
		bpf_jit_binary_pack_free(hdr, NULL);
		priv_stack_ptr = prog->aux->priv_stack_ptr;
		if (priv_stack_ptr) {
			priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
					      2 * PRIV_STACK_GUARD_SZ;
			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
			free_percpu(prog->aux->priv_stack_ptr);
		}
		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
	}

+59 −20
Original line number Diff line number Diff line
@@ -370,6 +370,23 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
	return 0;
}

bool bpf_jit_bypass_spec_v1(void)
{
#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
	return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
		 security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR));
#else
	return true;
#endif
}

bool bpf_jit_bypass_spec_v4(void)
{
	return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
		 security_ftr_enabled(SEC_FTR_STF_BARRIER) &&
		 stf_barrier_type_get() != STF_BARRIER_NONE);
}

/*
 * We spill into the redzone always, even if the bpf program has its own stackframe.
 * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
@@ -397,6 +414,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
		       u32 *addrs, int pass, bool extra_pass)
{
	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
	bool sync_emitted, ori31_emitted;
	const struct bpf_insn *insn = fp->insnsi;
	int flen = fp->len;
	int i, ret;
@@ -789,20 +807,36 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code

		/*
		 * BPF_ST NOSPEC (speculation barrier)
		 *
		 * The following must act as a barrier against both Spectre v1
		 * and v4 if we requested both mitigations. Therefore, also emit
		 * 'isync; sync' on E500 or 'ori31' on BOOK3S_64 in addition to
		 * the insns needed for a Spectre v4 barrier.
		 *
		 * If we requested only !bypass_spec_v1 OR only !bypass_spec_v4,
		 * we can skip the respective other barrier type as an
		 * optimization.
		 */
		case BPF_ST | BPF_NOSPEC:
			if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
					!security_ftr_enabled(SEC_FTR_STF_BARRIER))
				break;

			sync_emitted = false;
			ori31_emitted = false;
			if (IS_ENABLED(CONFIG_PPC_E500) &&
			    !bpf_jit_bypass_spec_v1()) {
				EMIT(PPC_RAW_ISYNC());
				EMIT(PPC_RAW_SYNC());
				sync_emitted = true;
			}
			if (!bpf_jit_bypass_spec_v4()) {
				switch (stf_barrier) {
				case STF_BARRIER_EIEIO:
					EMIT(PPC_RAW_EIEIO() | 0x02000000);
					break;
				case STF_BARRIER_SYNC_ORI:
					if (!sync_emitted)
						EMIT(PPC_RAW_SYNC());
					EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
					EMIT(PPC_RAW_ORI(_R31, _R31, 0));
					ori31_emitted = true;
					break;
				case STF_BARRIER_FALLBACK:
					ctx->seen |= SEEN_FUNC;
@@ -813,6 +847,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
				case STF_BARRIER_NONE:
					break;
				}
			}
			if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
			    !bpf_jit_bypass_spec_v1() &&
			    !ori31_emitted)
				EMIT(PPC_RAW_ORI(_R31, _R31, 0));
			break;

		/*
Loading