Commit 685c6d5b authored by David S. Miller's avatar David S. Miller
Browse files
Alexei Starovoitov says:

====================
The following pull-request contains BPF updates for your *net-next* tree.

We've added 73 non-merge commits during the last 9 day(s) which contain
a total of 79 files changed, 5275 insertions(+), 600 deletions(-).

The main changes are:

1) Basic BTF validation in libbpf, from Andrii Nakryiko.

2) bpf_assert(), bpf_throw(), exceptions in bpf progs, from Kumar Kartikeya Dwivedi.

3) next_thread cleanups, from Oleg Nesterov.

4) Add mcpu=v4 support to arm32, from Puranjay Mohan.

5) Add support for __percpu pointers in bpf progs, from Yonghong Song.

6) Fix bpf tailcall interaction with bpf trampoline, from Leon Hwang.

7) Raise irq_work in bpf_mem_alloc while irqs are disabled to improve refill probabablity, from Hou Tao.

Please consider pulling these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git



Thanks a lot!

Also thanks to reporters, reviewers and testers of commits in this pull-request:

Alan Maguire, Andrey Konovalov, Dave Marchevsky, "Eric W. Biederman",
Jiri Olsa, Maciej Fijalkowski, Quentin Monnet, Russell King (Oracle),
Song Liu, Stanislav Fomichev, Yonghong Song
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fbb49deb ec6f1b4d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ Flags
  used by ``eth_get_headlen`` to estimate length of all headers for GRO.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL`` - tells BPF flow dissector to
  stop parsing as soon as it reaches IPv6 flow label; used by
  ``___skb_get_hash`` and ``__skb_get_hash_symmetric`` to get flow hash.
  ``___skb_get_hash`` to get flow hash.
* ``BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP`` - tells BPF flow dissector to stop
  parsing as soon as it reaches encapsulated headers; used by routing
  infrastructure.
+21 −0
Original line number Diff line number Diff line
@@ -42,6 +42,19 @@ definitions:
        doc:
          This feature informs if netdev implements non-linear XDP buffer
          support in ndo_xdp_xmit callback.
  -
    type: flags
    name: xdp-rx-metadata
    render-max: true
    entries:
      -
        name: timestamp
        doc:
          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
      -
        name: hash
        doc:
          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().

attribute-sets:
  -
@@ -68,6 +81,13 @@ attribute-sets:
        type: u32
        checks:
          min: 1
      -
        name: xdp-rx-metadata-features
        doc: Bitmask of supported XDP receive metadata features.
             See Documentation/networking/xdp-rx-metadata.rst for more details.
        type: u64
        enum: xdp-rx-metadata
        enum-as-flags: true

operations:
  list:
@@ -84,6 +104,7 @@ operations:
            - ifindex
            - xdp-features
            - xdp-zc-max-segs
            - xdp-rx-metadata-features
      dump:
        reply: *dev-all
    -
+7 −0
Original line number Diff line number Diff line
@@ -105,6 +105,13 @@ bpf_tail_call
Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY``
is currently not supported.

Supported Devices
=================

It is possible to query which kfunc the particular netdev implements via
netlink. See ``xdp-rx-metadata-features`` attribute set in
``Documentation/netlink/specs/netdev.yaml``.

Example
=======

+3 −2
Original line number Diff line number Diff line
@@ -3596,9 +3596,10 @@ F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
F:	drivers/iio/accel/bma400*
BPF JIT for ARM
M:	Shubham Bansal <illusionist.neo@gmail.com>
M:	Russell King <linux@armlinux.org.uk>
M:	Puranjay Mohan <puranjay12@gmail.com>
L:	bpf@vger.kernel.org
S:	Odd Fixes
S:	Maintained
F:	arch/arm/net/
BPF JIT for ARM64
+260 −20
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
/*
 * Just-In-Time compiler for eBPF filters on 32bit ARM
 *
 * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
 */
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
#include <linux/math64.h>

#include <asm/cacheflush.h>
#include <asm/hwcap.h>
@@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
	return dividend % divisor;
}

static s32 jit_sdiv32(s32 dividend, s32 divisor)
{
	return dividend / divisor;
}

static s32 jit_smod32(s32 dividend, s32 divisor)
{
	return dividend % divisor;
}

/* Wrappers for 64-bit div/mod */
static u64 jit_udiv64(u64 dividend, u64 divisor)
{
	return div64_u64(dividend, divisor);
}

static u64 jit_mod64(u64 dividend, u64 divisor)
{
	u64 rem;

	div64_u64_rem(dividend, divisor, &rem);
	return rem;
}

static s64 jit_sdiv64(s64 dividend, s64 divisor)
{
	return div64_s64(dividend, divisor);
}

static s64 jit_smod64(s64 dividend, s64 divisor)
{
	u64 q;

	q = div64_s64(dividend, divisor);

	return dividend - q * divisor;
}

static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
{
	inst |= (cond << 28);
@@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
#define ARM_LDRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
#define ARM_LDRH_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)

#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)

#define ARM_STR_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
#define ARM_STRB_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
#define ARM_STRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
	return to - from - 2;
}

static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
{
	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
	const s8 *tmp = bpf2a32[TMP_REG_1];
	u32 dst;

#if __LINUX_ARM_ARCH__ == 7
	if (elf_hwcap & HWCAP_IDIVA) {
		if (op == BPF_DIV)
			emit(ARM_UDIV(rd, rm, rn), ctx);
		else {
			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
		if (op == BPF_DIV) {
			emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
		} else {
			emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
		}
		return;
@@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
	emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);

	/* Call appropriate function */
	emit_mov_i(ARM_IP, op == BPF_DIV ?
		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv32;
		else
			dst = (u32)jit_smod32;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv32;
		else
			dst = (u32)jit_mod32;
	}

	emit_mov_i(ARM_IP, dst, ctx);
	emit_blx_r(ARM_IP, ctx);

	/* Restore caller-saved registers from stack */
@@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}

static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
				  u8 op, u8 sign)
{
	u32 dst;

	/* Push caller-saved registers on stack */
	emit(ARM_PUSH(CALLER_MASK), ctx);

	/*
	 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
	 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
	 * we can recover them later after returning from the function call.
	 */
	if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
		/*
		 * Move Rm to {R1, R0} if it is not already there.
		 */
		if (rm[1] != ARM_R0) {
			if (rn[1] == ARM_R0)
				emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
			emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
			emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
			if (rn[1] == ARM_R0) {
				emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
				goto cont;
			}
		}
		/*
		 * Move Rn to {R3, R2} if it is not already there.
		 */
		if (rn[1] != ARM_R2) {
			emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
			emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
		}
	}

cont:

	/* Call appropriate function */
	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv64;
		else
			dst = (u32)jit_smod64;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv64;
		else
			dst = (u32)jit_mod64;
	}

	emit_mov_i(ARM_IP, dst, ctx);
	emit_blx_r(ARM_IP, ctx);

	/* Save return value */
	if (rd[1] != ARM_R0) {
		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
	}

	/* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
		emit(ARM_POP(CALLER_MASK), ctx);
	} else if (rd[1] != ARM_R0) {
		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
	} else {
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
	}
}

/* Is the translated BPF register on stack? */
static bool is_stacked(s8 reg)
{
@@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}

/* dst = src (4 bytes)*/
static inline void emit_a32_mov_r(const s8 dst, const s8 src,
static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
				  struct jit_ctx *ctx) {
	const s8 *tmp = bpf2a32[TMP_REG_1];
	s8 rt;

	rt = arm_bpf_get_reg32(src, tmp[0], ctx);
	if (off && off != 32) {
		emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
		emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
	}
	arm_bpf_put_reg32(dst, rt, ctx);
}

@@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
				  const s8 src[],
				  struct jit_ctx *ctx) {
	if (!is64) {
		emit_a32_mov_r(dst_lo, src_lo, ctx);
		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
		if (!ctx->prog->aux->verifier_zext)
			/* Zero out high 4 bytes */
			emit_a32_mov_i(dst_hi, 0, ctx);
	} else if (__LINUX_ARM_ARCH__ < 6 &&
		   ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
		/* complete 8 byte move */
		emit_a32_mov_r(dst_lo, src_lo, ctx);
		emit_a32_mov_r(dst_hi, src_hi, ctx);
		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
		emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
	} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
		const u8 *tmp = bpf2a32[TMP_REG_1];

@@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
	}
}

/* dst = (signed)src */
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
				      struct jit_ctx *ctx) {
	const s8 *tmp = bpf2a32[TMP_REG_1];
	const s8 *rt;

	rt = arm_bpf_get_reg64(dst, tmp, ctx);

	emit_a32_mov_r(dst_lo, src_lo, off, ctx);
	if (!is64) {
		if (!ctx->prog->aux->verifier_zext)
			/* Zero out high 4 bytes */
			emit_a32_mov_i(dst_hi, 0, ctx);
	} else {
		emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
	}
}

/* Shift operations */
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
				struct jit_ctx *ctx, const u8 op) {
@@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size)
	return -off_max <= off && off <= off_max;
}

static bool is_ldst_imm8(s16 off, const u8 size)
{
	s16 off_max = 0;

	switch (size) {
	case BPF_B:
		off_max = 0xff;
		break;
	case BPF_W:
		off_max = 0xfff;
		break;
	case BPF_H:
		off_max = 0xff;
		break;
	}
	return -off_max <= off && off <= off_max;
}

/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
			      s16 off, struct jit_ctx *ctx, const u8 sz){
@@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
	arm_bpf_put_reg64(dst, rd, ctx);
}

/* dst = *(signed size*)(src + off) */
static inline void emit_ldsx_r(const s8 dst[], const s8 src,
			       s16 off, struct jit_ctx *ctx, const u8 sz){
	const s8 *tmp = bpf2a32[TMP_REG_1];
	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
	s8 rm = src;
	int add_off;

	if (!is_ldst_imm8(off, sz)) {
		/*
		 * offset does not fit in the load/store immediate,
		 * construct an ADD instruction to apply the offset.
		 */
		add_off = imm8m(off);
		if (add_off > 0) {
			emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
			rm = tmp[0];
		} else {
			emit_a32_mov_i(tmp[0], off, ctx);
			emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
			rm = tmp[0];
		}
		off = 0;
	}

	switch (sz) {
	case BPF_B:
		/* Load a Byte with sign extension*/
		emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
		break;
	case BPF_H:
		/* Load a HalfWord with sign extension*/
		emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
		break;
	case BPF_W:
		/* Load a Word*/
		emit(ARM_LDR_I(rd[1], rm, off), ctx);
		break;
	}
	/* Carry the sign extension to upper 32 bits */
	emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
	arm_bpf_put_reg64(dst, rd, ctx);
}

/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
			     const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1385,6 +1596,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
				emit_a32_mov_i(dst_hi, 0, ctx);
				break;
			}
			if (insn->off)
				emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
			else
				emit_a32_mov_r64(is64, dst, src, ctx);
			break;
		case BPF_K:
@@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
			rt = src_lo;
			break;
		}
		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
		if (!ctx->prog->aux->verifier_zext)
			emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
	case BPF_ALU64 | BPF_DIV | BPF_X:
	case BPF_ALU64 | BPF_MOD | BPF_K:
	case BPF_ALU64 | BPF_MOD | BPF_X:
		goto notyet;
		rd = arm_bpf_get_reg64(dst, tmp2, ctx);
		switch (BPF_SRC(code)) {
		case BPF_X:
			rs = arm_bpf_get_reg64(src, tmp, ctx);
			break;
		case BPF_K:
			rs = tmp;
			emit_a32_mov_se_i64(is64, rs, imm, ctx);
			break;
		}
		emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
		arm_bpf_put_reg64(dst, rd, ctx);
		break;
	/* dst = dst << imm */
	/* dst = dst >> imm */
	/* dst = dst >> imm (signed) */
@@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
		break;
	/* dst = htole(dst) */
	/* dst = htobe(dst) */
	case BPF_ALU | BPF_END | BPF_FROM_LE:
	case BPF_ALU | BPF_END | BPF_FROM_BE:
	case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
	case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
	/* dst = bswap(dst) */
	case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
		rd = arm_bpf_get_reg64(dst, tmp, ctx);
		if (BPF_SRC(code) == BPF_FROM_LE)
		if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
			goto emit_bswap_uxt;
		switch (imm) {
		case 16:
@@ -1603,7 +1831,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
	case BPF_LDX | BPF_MEM | BPF_H:
	case BPF_LDX | BPF_MEM | BPF_B:
	case BPF_LDX | BPF_MEM | BPF_DW:
	/* LDSX: dst = *(signed size *)(src + off) */
	case BPF_LDX | BPF_MEMSX | BPF_B:
	case BPF_LDX | BPF_MEMSX | BPF_H:
	case BPF_LDX | BPF_MEMSX | BPF_W:
		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
		if (BPF_MODE(insn->code) == BPF_MEMSX)
			emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
		else
			emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
		break;
	/* speculation barrier */
@@ -1761,10 +1996,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
		break;
	/* JMP OFF */
	case BPF_JMP | BPF_JA:
	case BPF_JMP32 | BPF_JA:
	{
		if (off == 0)
			break;
		if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
			jmp_offset = bpf2a32_offset(i + imm, i, ctx);
		else if (BPF_CLASS(code) == BPF_JMP && off != 0)
			jmp_offset = bpf2a32_offset(i + off, i, ctx);
		else
			break;

		check_imm24(jmp_offset);
		emit(ARM_B(jmp_offset), ctx);
		break;
Loading