Commit c4ab64e6 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'arm32-bpf-add-support-for-cpuv4-insns'

Puranjay Mohan says:

====================
arm32, bpf: add support for cpuv4 insns

Changes in V2 -> V3
- Added comments at places where there could be confustion.
- In the patch for DIV64, fix the if-else case that would never run.
- In the same patch use a single instruction to POP caller saved regs.
- Add a patch to change maintainership of ARM32 BPF JIT.

Changes in V1 -> V2:
- Fix coding style issues.
- Don't use tmp variable for src in emit_ldsx_r() as it is redundant.
- Optimize emit_ldsx_r() when offset can fit in immediate.

Add the support for cpuv4 instructions for ARM32 BPF JIT. 64-bit division
was not supported earlier so this series adds 64-bit DIV, SDIV, MOD, SMOD
instructions as well.

This series needs any one of the patches from [1] to disable zero-extension
for BPF_MEMSX to support ldsx.

The relevant selftests have passed expect ldsx_insn which needs fentry:

Tested on BeagleBone Black (ARMv7-A):

[root@alarm del]# echo 1 > /proc/sys/net/core/bpf_jit_enable
[root@alarm del]# ./test_progs -a verifier_sdiv,verifier_movsx,verifier_ldsx,verifier_gotol,verifier_bswap
#337/1   verifier_bswap/BSWAP, 16:OK
#337/2   verifier_bswap/BSWAP, 16 @unpriv:OK
#337/3   verifier_bswap/BSWAP, 32:OK
#337/4   verifier_bswap/BSWAP, 32 @unpriv:OK
#337/5   verifier_bswap/BSWAP, 64:OK
#337/6   verifier_bswap/BSWAP, 64 @unpriv:OK
#337     verifier_bswap:OK
#351/1   verifier_gotol/gotol, small_imm:OK
#351/2   verifier_gotol/gotol, small_imm @unpriv:OK
#351     verifier_gotol:OK
#359/1   verifier_ldsx/LDSX, S8:OK
#359/2   verifier_ldsx/LDSX, S8 @unpriv:OK
#359/3   verifier_ldsx/LDSX, S16:OK
#359/4   verifier_ldsx/LDSX, S16 @unpriv:OK
#359/5   verifier_ldsx/LDSX, S32:OK
#359/6   verifier_ldsx/LDSX, S32 @unpriv:OK
#359/7   verifier_ldsx/LDSX, S8 range checking, privileged:OK
#359/8   verifier_ldsx/LDSX, S16 range checking:OK
#359/9   verifier_ldsx/LDSX, S16 range checking @unpriv:OK
#359/10  verifier_ldsx/LDSX, S32 range checking:OK
#359/11  verifier_ldsx/LDSX, S32 range checking @unpriv:OK
#359     verifier_ldsx:OK
#370/1   verifier_movsx/MOV32SX, S8:OK
#370/2   verifier_movsx/MOV32SX, S8 @unpriv:OK
#370/3   verifier_movsx/MOV32SX, S16:OK
#370/4   verifier_movsx/MOV32SX, S16 @unpriv:OK
#370/5   verifier_movsx/MOV64SX, S8:OK
#370/6   verifier_movsx/MOV64SX, S8 @unpriv:OK
#370/7   verifier_movsx/MOV64SX, S16:OK
#370/8   verifier_movsx/MOV64SX, S16 @unpriv:OK
#370/9   verifier_movsx/MOV64SX, S32:OK
#370/10  verifier_movsx/MOV64SX, S32 @unpriv:OK
#370/11  verifier_movsx/MOV32SX, S8, range_check:OK
#370/12  verifier_movsx/MOV32SX, S8, range_check @unpriv:OK
#370/13  verifier_movsx/MOV32SX, S16, range_check:OK
#370/14  verifier_movsx/MOV32SX, S16, range_check @unpriv:OK
#370/15  verifier_movsx/MOV32SX, S16, range_check 2:OK
#370/16  verifier_movsx/MOV32SX, S16, range_check 2 @unpriv:OK
#370/17  verifier_movsx/MOV64SX, S8, range_check:OK
#370/18  verifier_movsx/MOV64SX, S8, range_check @unpriv:OK
#370/19  verifier_movsx/MOV64SX, S16, range_check:OK
#370/20  verifier_movsx/MOV64SX, S16, range_check @unpriv:OK
#370/21  verifier_movsx/MOV64SX, S32, range_check:OK
#370/22  verifier_movsx/MOV64SX, S32, range_check @unpriv:OK
#370/23  verifier_movsx/MOV64SX, S16, R10 Sign Extension:OK
#370/24  verifier_movsx/MOV64SX, S16, R10 Sign Extension @unpriv:OK
#370     verifier_movsx:OK
#382/1   verifier_sdiv/SDIV32, non-zero imm divisor, check 1:OK
#382/2   verifier_sdiv/SDIV32, non-zero imm divisor, check 1 @unpriv:OK
#382/3   verifier_sdiv/SDIV32, non-zero imm divisor, check 2:OK
#382/4   verifier_sdiv/SDIV32, non-zero imm divisor, check 2 @unpriv:OK
#382/5   verifier_sdiv/SDIV32, non-zero imm divisor, check 3:OK
#382/6   verifier_sdiv/SDIV32, non-zero imm divisor, check 3 @unpriv:OK
#382/7   verifier_sdiv/SDIV32, non-zero imm divisor, check 4:OK
#382/8   verifier_sdiv/SDIV32, non-zero imm divisor, check 4 @unpriv:OK
#382/9   verifier_sdiv/SDIV32, non-zero imm divisor, check 5:OK
#382/10  verifier_sdiv/SDIV32, non-zero imm divisor, check 5 @unpriv:OK
#382/11  verifier_sdiv/SDIV32, non-zero imm divisor, check 6:OK
#382/12  verifier_sdiv/SDIV32, non-zero imm divisor, check 6 @unpriv:OK
#382/13  verifier_sdiv/SDIV32, non-zero imm divisor, check 7:OK
#382/14  verifier_sdiv/SDIV32, non-zero imm divisor, check 7 @unpriv:OK
#382/15  verifier_sdiv/SDIV32, non-zero imm divisor, check 8:OK
#382/16  verifier_sdiv/SDIV32, non-zero imm divisor, check 8 @unpriv:OK
#382/17  verifier_sdiv/SDIV32, non-zero reg divisor, check 1:OK
#382/18  verifier_sdiv/SDIV32, non-zero reg divisor, check 1 @unpriv:OK
#382/19  verifier_sdiv/SDIV32, non-zero reg divisor, check 2:OK
#382/20  verifier_sdiv/SDIV32, non-zero reg divisor, check 2 @unpriv:OK
#382/21  verifier_sdiv/SDIV32, non-zero reg divisor, check 3:OK
#382/22  verifier_sdiv/SDIV32, non-zero reg divisor, check 3 @unpriv:OK
#382/23  verifier_sdiv/SDIV32, non-zero reg divisor, check 4:OK
#382/24  verifier_sdiv/SDIV32, non-zero reg divisor, check 4 @unpriv:OK
#382/25  verifier_sdiv/SDIV32, non-zero reg divisor, check 5:OK
#382/26  verifier_sdiv/SDIV32, non-zero reg divisor, check 5 @unpriv:OK
#382/27  verifier_sdiv/SDIV32, non-zero reg divisor, check 6:OK
#382/28  verifier_sdiv/SDIV32, non-zero reg divisor, check 6 @unpriv:OK
#382/29  verifier_sdiv/SDIV32, non-zero reg divisor, check 7:OK
#382/30  verifier_sdiv/SDIV32, non-zero reg divisor, check 7 @unpriv:OK
#382/31  verifier_sdiv/SDIV32, non-zero reg divisor, check 8:OK
#382/32  verifier_sdiv/SDIV32, non-zero reg divisor, check 8 @unpriv:OK
#382/33  verifier_sdiv/SDIV64, non-zero imm divisor, check 1:OK
#382/34  verifier_sdiv/SDIV64, non-zero imm divisor, check 1 @unpriv:OK
#382/35  verifier_sdiv/SDIV64, non-zero imm divisor, check 2:OK
#382/36  verifier_sdiv/SDIV64, non-zero imm divisor, check 2 @unpriv:OK
#382/37  verifier_sdiv/SDIV64, non-zero imm divisor, check 3:OK
#382/38  verifier_sdiv/SDIV64, non-zero imm divisor, check 3 @unpriv:OK
#382/39  verifier_sdiv/SDIV64, non-zero imm divisor, check 4:OK
#382/40  verifier_sdiv/SDIV64, non-zero imm divisor, check 4 @unpriv:OK
#382/41  verifier_sdiv/SDIV64, non-zero imm divisor, check 5:OK
#382/42  verifier_sdiv/SDIV64, non-zero imm divisor, check 5 @unpriv:OK
#382/43  verifier_sdiv/SDIV64, non-zero imm divisor, check 6:OK
#382/44  verifier_sdiv/SDIV64, non-zero imm divisor, check 6 @unpriv:OK
#382/45  verifier_sdiv/SDIV64, non-zero reg divisor, check 1:OK
#382/46  verifier_sdiv/SDIV64, non-zero reg divisor, check 1 @unpriv:OK
#382/47  verifier_sdiv/SDIV64, non-zero reg divisor, check 2:OK
#382/48  verifier_sdiv/SDIV64, non-zero reg divisor, check 2 @unpriv:OK
#382/49  verifier_sdiv/SDIV64, non-zero reg divisor, check 3:OK
#382/50  verifier_sdiv/SDIV64, non-zero reg divisor, check 3 @unpriv:OK
#382/51  verifier_sdiv/SDIV64, non-zero reg divisor, check 4:OK
#382/52  verifier_sdiv/SDIV64, non-zero reg divisor, check 4 @unpriv:OK
#382/53  verifier_sdiv/SDIV64, non-zero reg divisor, check 5:OK
#382/54  verifier_sdiv/SDIV64, non-zero reg divisor, check 5 @unpriv:OK
#382/55  verifier_sdiv/SDIV64, non-zero reg divisor, check 6:OK
#382/56  verifier_sdiv/SDIV64, non-zero reg divisor, check 6 @unpriv:OK
#382/57  verifier_sdiv/SMOD32, non-zero imm divisor, check 1:OK
#382/58  verifier_sdiv/SMOD32, non-zero imm divisor, check 1 @unpriv:OK
#382/59  verifier_sdiv/SMOD32, non-zero imm divisor, check 2:OK
#382/60  verifier_sdiv/SMOD32, non-zero imm divisor, check 2 @unpriv:OK
#382/61  verifier_sdiv/SMOD32, non-zero imm divisor, check 3:OK
#382/62  verifier_sdiv/SMOD32, non-zero imm divisor, check 3 @unpriv:OK
#382/63  verifier_sdiv/SMOD32, non-zero imm divisor, check 4:OK
#382/64  verifier_sdiv/SMOD32, non-zero imm divisor, check 4 @unpriv:OK
#382/65  verifier_sdiv/SMOD32, non-zero imm divisor, check 5:OK
#382/66  verifier_sdiv/SMOD32, non-zero imm divisor, check 5 @unpriv:OK
#382/67  verifier_sdiv/SMOD32, non-zero imm divisor, check 6:OK
#382/68  verifier_sdiv/SMOD32, non-zero imm divisor, check 6 @unpriv:OK
#382/69  verifier_sdiv/SMOD32, non-zero reg divisor, check 1:OK
#382/70  verifier_sdiv/SMOD32, non-zero reg divisor, check 1 @unpriv:OK
#382/71  verifier_sdiv/SMOD32, non-zero reg divisor, check 2:OK
#382/72  verifier_sdiv/SMOD32, non-zero reg divisor, check 2 @unpriv:OK
#382/73  verifier_sdiv/SMOD32, non-zero reg divisor, check 3:OK
#382/74  verifier_sdiv/SMOD32, non-zero reg divisor, check 3 @unpriv:OK
#382/75  verifier_sdiv/SMOD32, non-zero reg divisor, check 4:OK
#382/76  verifier_sdiv/SMOD32, non-zero reg divisor, check 4 @unpriv:OK
#382/77  verifier_sdiv/SMOD32, non-zero reg divisor, check 5:OK
#382/78  verifier_sdiv/SMOD32, non-zero reg divisor, check 5 @unpriv:OK
#382/79  verifier_sdiv/SMOD32, non-zero reg divisor, check 6:OK
#382/80  verifier_sdiv/SMOD32, non-zero reg divisor, check 6 @unpriv:OK
#382/81  verifier_sdiv/SMOD64, non-zero imm divisor, check 1:OK
#382/82  verifier_sdiv/SMOD64, non-zero imm divisor, check 1 @unpriv:OK
#382/83  verifier_sdiv/SMOD64, non-zero imm divisor, check 2:OK
#382/84  verifier_sdiv/SMOD64, non-zero imm divisor, check 2 @unpriv:OK
#382/85  verifier_sdiv/SMOD64, non-zero imm divisor, check 3:OK
#382/86  verifier_sdiv/SMOD64, non-zero imm divisor, check 3 @unpriv:OK
#382/87  verifier_sdiv/SMOD64, non-zero imm divisor, check 4:OK
#382/88  verifier_sdiv/SMOD64, non-zero imm divisor, check 4 @unpriv:OK
#382/89  verifier_sdiv/SMOD64, non-zero imm divisor, check 5:OK
#382/90  verifier_sdiv/SMOD64, non-zero imm divisor, check 5 @unpriv:OK
#382/91  verifier_sdiv/SMOD64, non-zero imm divisor, check 6:OK
#382/92  verifier_sdiv/SMOD64, non-zero imm divisor, check 6 @unpriv:OK
#382/93  verifier_sdiv/SMOD64, non-zero imm divisor, check 7:OK
#382/94  verifier_sdiv/SMOD64, non-zero imm divisor, check 7 @unpriv:OK
#382/95  verifier_sdiv/SMOD64, non-zero imm divisor, check 8:OK
#382/96  verifier_sdiv/SMOD64, non-zero imm divisor, check 8 @unpriv:OK
#382/97  verifier_sdiv/SMOD64, non-zero reg divisor, check 1:OK
#382/98  verifier_sdiv/SMOD64, non-zero reg divisor, check 1 @unpriv:OK
#382/99  verifier_sdiv/SMOD64, non-zero reg divisor, check 2:OK
#382/100 verifier_sdiv/SMOD64, non-zero reg divisor, check 2 @unpriv:OK
#382/101 verifier_sdiv/SMOD64, non-zero reg divisor, check 3:OK
#382/102 verifier_sdiv/SMOD64, non-zero reg divisor, check 3 @unpriv:OK
#382/103 verifier_sdiv/SMOD64, non-zero reg divisor, check 4:OK
#382/104 verifier_sdiv/SMOD64, non-zero reg divisor, check 4 @unpriv:OK
#382/105 verifier_sdiv/SMOD64, non-zero reg divisor, check 5:OK
#382/106 verifier_sdiv/SMOD64, non-zero reg divisor, check 5 @unpriv:OK
#382/107 verifier_sdiv/SMOD64, non-zero reg divisor, check 6:OK
#382/108 verifier_sdiv/SMOD64, non-zero reg divisor, check 6 @unpriv:OK
#382/109 verifier_sdiv/SMOD64, non-zero reg divisor, check 7:OK
#382/110 verifier_sdiv/SMOD64, non-zero reg divisor, check 7 @unpriv:OK
#382/111 verifier_sdiv/SMOD64, non-zero reg divisor, check 8:OK
#382/112 verifier_sdiv/SMOD64, non-zero reg divisor, check 8 @unpriv:OK
#382/113 verifier_sdiv/SDIV32, zero divisor:OK
#382/114 verifier_sdiv/SDIV32, zero divisor @unpriv:OK
#382/115 verifier_sdiv/SDIV64, zero divisor:OK
#382/116 verifier_sdiv/SDIV64, zero divisor @unpriv:OK
#382/117 verifier_sdiv/SMOD32, zero divisor:OK
#382/118 verifier_sdiv/SMOD32, zero divisor @unpriv:OK
#382/119 verifier_sdiv/SMOD64, zero divisor:OK
#382/120 verifier_sdiv/SMOD64, zero divisor @unpriv:OK
#382     verifier_sdiv:OK
Summary: 5/163 PASSED, 0 SKIPPED, 0 FAILED

As the selftests don't compile for 32-bit architectures without
modifications due to long being 32-bit,
I have added new tests to lib/test_bpf.c for cpuv4 insns, all are passing:

test_bpf: Summary: 1052 PASSED, 0 FAILED, [891/1040 JIT'ed]
test_bpf: test_tail_calls: Summary: 10 PASSED, 0 FAILED, [10/10 JIT'ed]
test_bpf: test_skb_segment: Summary: 2 PASSED, 0 FAILED

[1] https://lore.kernel.org/all/mb61p5y4u3ptd.fsf@amazon.com/
====================

Link: https://lore.kernel.org/r/20230907230550.1417590-1-puranjay12@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 9b2b8633 9b31b4f1
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -3596,9 +3596,10 @@ F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
F:	drivers/iio/accel/bma400*
BPF JIT for ARM
M:	Shubham Bansal <illusionist.neo@gmail.com>
M:	Russell King <linux@armlinux.org.uk>
M:	Puranjay Mohan <puranjay12@gmail.com>
L:	bpf@vger.kernel.org
S:	Odd Fixes
S:	Maintained
F:	arch/arm/net/
BPF JIT for ARM64
+260 −20
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
/*
 * Just-In-Time compiler for eBPF filters on 32bit ARM
 *
 * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com>
 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
 */
@@ -15,6 +16,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/if_vlan.h>
#include <linux/math64.h>

#include <asm/cacheflush.h>
#include <asm/hwcap.h>
@@ -228,6 +230,44 @@ static u32 jit_mod32(u32 dividend, u32 divisor)
	return dividend % divisor;
}

static s32 jit_sdiv32(s32 dividend, s32 divisor)
{
	return dividend / divisor;
}

static s32 jit_smod32(s32 dividend, s32 divisor)
{
	return dividend % divisor;
}

/* Wrappers for 64-bit div/mod */
static u64 jit_udiv64(u64 dividend, u64 divisor)
{
	return div64_u64(dividend, divisor);
}

static u64 jit_mod64(u64 dividend, u64 divisor)
{
	u64 rem;

	div64_u64_rem(dividend, divisor, &rem);
	return rem;
}

static s64 jit_sdiv64(s64 dividend, s64 divisor)
{
	return div64_s64(dividend, divisor);
}

static s64 jit_smod64(s64 dividend, s64 divisor)
{
	u64 q;

	q = div64_s64(dividend, divisor);

	return dividend - q * divisor;
}

static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
{
	inst |= (cond << 28);
@@ -333,6 +373,9 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
#define ARM_LDRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
#define ARM_LDRH_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)

#define ARM_LDRSH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSH_I, rt, rn, off)
#define ARM_LDRSB_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRSB_I, rt, rn, off)

#define ARM_STR_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
#define ARM_STRB_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
#define ARM_STRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
@@ -474,17 +517,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
	return to - from - 2;
}

static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op, u8 sign)
{
	const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
	const s8 *tmp = bpf2a32[TMP_REG_1];
	u32 dst;

#if __LINUX_ARM_ARCH__ == 7
	if (elf_hwcap & HWCAP_IDIVA) {
		if (op == BPF_DIV)
			emit(ARM_UDIV(rd, rm, rn), ctx);
		else {
			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
		if (op == BPF_DIV) {
			emit(sign ? ARM_SDIV(rd, rm, rn) : ARM_UDIV(rd, rm, rn), ctx);
		} else {
			emit(sign ? ARM_SDIV(ARM_IP, rm, rn) : ARM_UDIV(ARM_IP, rm, rn), ctx);
			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
		}
		return;
@@ -512,8 +556,19 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
	emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);

	/* Call appropriate function */
	emit_mov_i(ARM_IP, op == BPF_DIV ?
		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv32;
		else
			dst = (u32)jit_smod32;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv32;
		else
			dst = (u32)jit_mod32;
	}

	emit_mov_i(ARM_IP, dst, ctx);
	emit_blx_r(ARM_IP, ctx);

	/* Restore caller-saved registers from stack */
@@ -530,6 +585,78 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}

static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx,
				  u8 op, u8 sign)
{
	u32 dst;

	/* Push caller-saved registers on stack */
	emit(ARM_PUSH(CALLER_MASK), ctx);

	/*
	 * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in
	 * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack,
	 * we can recover them later after returning from the function call.
	 */
	if (rm[1] != ARM_R0 || rn[1] != ARM_R2) {
		/*
		 * Move Rm to {R1, R0} if it is not already there.
		 */
		if (rm[1] != ARM_R0) {
			if (rn[1] == ARM_R0)
				emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
			emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
			emit(ARM_MOV_R(ARM_R0, rm[1]), ctx);
			if (rn[1] == ARM_R0) {
				emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
				goto cont;
			}
		}
		/*
		 * Move Rn to {R3, R2} if it is not already there.
		 */
		if (rn[1] != ARM_R2) {
			emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
			emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
		}
	}

cont:

	/* Call appropriate function */
	if (sign) {
		if (op == BPF_DIV)
			dst = (u32)jit_sdiv64;
		else
			dst = (u32)jit_smod64;
	} else {
		if (op == BPF_DIV)
			dst = (u32)jit_udiv64;
		else
			dst = (u32)jit_mod64;
	}

	emit_mov_i(ARM_IP, dst, ctx);
	emit_blx_r(ARM_IP, ctx);

	/* Save return value */
	if (rd[1] != ARM_R0) {
		emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
		emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
	}

	/* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */
	if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
		emit(ARM_POP(CALLER_MASK), ctx);
	} else if (rd[1] != ARM_R0) {
		emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
	} else {
		emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
		emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
	}
}

/* Is the translated BPF register on stack? */
static bool is_stacked(s8 reg)
{
@@ -744,12 +871,16 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}

/* dst = src (4 bytes)*/
static inline void emit_a32_mov_r(const s8 dst, const s8 src,
static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
				  struct jit_ctx *ctx) {
	const s8 *tmp = bpf2a32[TMP_REG_1];
	s8 rt;

	rt = arm_bpf_get_reg32(src, tmp[0], ctx);
	if (off && off != 32) {
		emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
		emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
	}
	arm_bpf_put_reg32(dst, rt, ctx);
}

@@ -758,15 +889,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
				  const s8 src[],
				  struct jit_ctx *ctx) {
	if (!is64) {
		emit_a32_mov_r(dst_lo, src_lo, ctx);
		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
		if (!ctx->prog->aux->verifier_zext)
			/* Zero out high 4 bytes */
			emit_a32_mov_i(dst_hi, 0, ctx);
	} else if (__LINUX_ARM_ARCH__ < 6 &&
		   ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
		/* complete 8 byte move */
		emit_a32_mov_r(dst_lo, src_lo, ctx);
		emit_a32_mov_r(dst_hi, src_hi, ctx);
		emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
		emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
	} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
		const u8 *tmp = bpf2a32[TMP_REG_1];

@@ -782,6 +913,24 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
	}
}

/* dst = (signed)src */
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
				      struct jit_ctx *ctx) {
	const s8 *tmp = bpf2a32[TMP_REG_1];
	const s8 *rt;

	rt = arm_bpf_get_reg64(dst, tmp, ctx);

	emit_a32_mov_r(dst_lo, src_lo, off, ctx);
	if (!is64) {
		if (!ctx->prog->aux->verifier_zext)
			/* Zero out high 4 bytes */
			emit_a32_mov_i(dst_hi, 0, ctx);
	} else {
		emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
	}
}

/* Shift operations */
static inline void emit_a32_alu_i(const s8 dst, const u32 val,
				struct jit_ctx *ctx, const u8 op) {
@@ -1026,6 +1175,24 @@ static bool is_ldst_imm(s16 off, const u8 size)
	return -off_max <= off && off <= off_max;
}

static bool is_ldst_imm8(s16 off, const u8 size)
{
	s16 off_max = 0;

	switch (size) {
	case BPF_B:
		off_max = 0xff;
		break;
	case BPF_W:
		off_max = 0xfff;
		break;
	case BPF_H:
		off_max = 0xff;
		break;
	}
	return -off_max <= off && off <= off_max;
}

/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
			      s16 off, struct jit_ctx *ctx, const u8 sz){
@@ -1105,6 +1272,50 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src,
	arm_bpf_put_reg64(dst, rd, ctx);
}

/* dst = *(signed size*)(src + off) */
static inline void emit_ldsx_r(const s8 dst[], const s8 src,
			       s16 off, struct jit_ctx *ctx, const u8 sz){
	const s8 *tmp = bpf2a32[TMP_REG_1];
	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
	s8 rm = src;
	int add_off;

	if (!is_ldst_imm8(off, sz)) {
		/*
		 * offset does not fit in the load/store immediate,
		 * construct an ADD instruction to apply the offset.
		 */
		add_off = imm8m(off);
		if (add_off > 0) {
			emit(ARM_ADD_I(tmp[0], src, add_off), ctx);
			rm = tmp[0];
		} else {
			emit_a32_mov_i(tmp[0], off, ctx);
			emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
			rm = tmp[0];
		}
		off = 0;
	}

	switch (sz) {
	case BPF_B:
		/* Load a Byte with sign extension*/
		emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
		break;
	case BPF_H:
		/* Load a HalfWord with sign extension*/
		emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
		break;
	case BPF_W:
		/* Load a Word*/
		emit(ARM_LDR_I(rd[1], rm, off), ctx);
		break;
	}
	/* Carry the sign extension to upper 32 bits */
	emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
	arm_bpf_put_reg64(dst, rd, ctx);
}

/* Arithmatic Operation */
static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
			     const u8 rn, struct jit_ctx *ctx, u8 op,
@@ -1385,6 +1596,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
				emit_a32_mov_i(dst_hi, 0, ctx);
				break;
			}
			if (insn->off)
				emit_a32_movsx_r64(is64, insn->off, dst, src, ctx);
			else
				emit_a32_mov_r64(is64, dst, src, ctx);
			break;
		case BPF_K:
@@ -1461,7 +1675,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
			rt = src_lo;
			break;
		}
		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code), off);
		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
		if (!ctx->prog->aux->verifier_zext)
			emit_a32_mov_i(dst_hi, 0, ctx);
@@ -1470,7 +1684,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
	case BPF_ALU64 | BPF_DIV | BPF_X:
	case BPF_ALU64 | BPF_MOD | BPF_K:
	case BPF_ALU64 | BPF_MOD | BPF_X:
		goto notyet;
		rd = arm_bpf_get_reg64(dst, tmp2, ctx);
		switch (BPF_SRC(code)) {
		case BPF_X:
			rs = arm_bpf_get_reg64(src, tmp, ctx);
			break;
		case BPF_K:
			rs = tmp;
			emit_a32_mov_se_i64(is64, rs, imm, ctx);
			break;
		}
		emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off);
		arm_bpf_put_reg64(dst, rd, ctx);
		break;
	/* dst = dst << imm */
	/* dst = dst >> imm */
	/* dst = dst >> imm (signed) */
@@ -1545,10 +1771,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
		break;
	/* dst = htole(dst) */
	/* dst = htobe(dst) */
	case BPF_ALU | BPF_END | BPF_FROM_LE:
	case BPF_ALU | BPF_END | BPF_FROM_BE:
	case BPF_ALU | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
	case BPF_ALU | BPF_END | BPF_FROM_BE: /* also BPF_TO_BE */
	/* dst = bswap(dst) */
	case BPF_ALU64 | BPF_END | BPF_FROM_LE: /* also BPF_TO_LE */
		rd = arm_bpf_get_reg64(dst, tmp, ctx);
		if (BPF_SRC(code) == BPF_FROM_LE)
		if (BPF_SRC(code) == BPF_FROM_LE && BPF_CLASS(code) != BPF_ALU64)
			goto emit_bswap_uxt;
		switch (imm) {
		case 16:
@@ -1603,7 +1831,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
	case BPF_LDX | BPF_MEM | BPF_H:
	case BPF_LDX | BPF_MEM | BPF_B:
	case BPF_LDX | BPF_MEM | BPF_DW:
	/* LDSX: dst = *(signed size *)(src + off) */
	case BPF_LDX | BPF_MEMSX | BPF_B:
	case BPF_LDX | BPF_MEMSX | BPF_H:
	case BPF_LDX | BPF_MEMSX | BPF_W:
		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
		if (BPF_MODE(insn->code) == BPF_MEMSX)
			emit_ldsx_r(dst, rn, off, ctx, BPF_SIZE(code));
		else
			emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
		break;
	/* speculation barrier */
@@ -1761,10 +1996,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
		break;
	/* JMP OFF */
	case BPF_JMP | BPF_JA:
	case BPF_JMP32 | BPF_JA:
	{
		if (off == 0)
			break;
		if (BPF_CLASS(code) == BPF_JMP32 && imm != 0)
			jmp_offset = bpf2a32_offset(i + imm, i, ctx);
		else if (BPF_CLASS(code) == BPF_JMP && off != 0)
			jmp_offset = bpf2a32_offset(i + off, i, ctx);
		else
			break;

		check_imm24(jmp_offset);
		emit(ARM_B(jmp_offset), ctx);
		break;
+4 −0
Original line number Diff line number Diff line
@@ -79,9 +79,11 @@
#define ARM_INST_LDST__IMM12	0x00000fff
#define ARM_INST_LDRB_I		0x05500000
#define ARM_INST_LDRB_R		0x07d00000
#define ARM_INST_LDRSB_I	0x015000d0
#define ARM_INST_LDRD_I		0x014000d0
#define ARM_INST_LDRH_I		0x015000b0
#define ARM_INST_LDRH_R		0x019000b0
#define ARM_INST_LDRSH_I	0x015000f0
#define ARM_INST_LDR_I		0x05100000
#define ARM_INST_LDR_R		0x07900000

@@ -137,6 +139,7 @@
#define ARM_INST_TST_I		0x03100000

#define ARM_INST_UDIV		0x0730f010
#define ARM_INST_SDIV		0x0710f010

#define ARM_INST_UMULL		0x00800090

@@ -265,6 +268,7 @@
#define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)

#define ARM_UDIV(rd, rn, rm)	(ARM_INST_UDIV | (rd) << 16 | (rn) | (rm) << 8)
#define ARM_SDIV(rd, rn, rm)	(ARM_INST_SDIV | (rd) << 16 | (rn) | (rm) << 8)

#define ARM_UMULL(rd_lo, rd_hi, rn, rm)	(ARM_INST_UMULL | (rd_hi) << 16 \
					 | (rd_lo) << 12 | (rm) << 8 | rn)
+46 −4
Original line number Diff line number Diff line
@@ -117,21 +117,25 @@ struct ctl_table_header;

/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */

#define BPF_ALU64_IMM(OP, DST, IMM)				\
#define BPF_ALU64_IMM_OFF(OP, DST, IMM, OFF)			\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.off   = OFF,					\
		.imm   = IMM })
#define BPF_ALU64_IMM(OP, DST, IMM)				\
	BPF_ALU64_IMM_OFF(OP, DST, IMM, 0)

#define BPF_ALU32_IMM(OP, DST, IMM)				\
#define BPF_ALU32_IMM_OFF(OP, DST, IMM, OFF)			\
	((struct bpf_insn) {					\
		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.off   = OFF,					\
		.imm   = IMM })
#define BPF_ALU32_IMM(OP, DST, IMM)				\
	BPF_ALU32_IMM_OFF(OP, DST, IMM, 0)

/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */

@@ -143,6 +147,16 @@ struct ctl_table_header;
		.off   = 0,					\
		.imm   = LEN })

/* Byte Swap, bswap16/32/64 */

#define BPF_BSWAP(DST, LEN)					\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_END | BPF_SRC(BPF_TO_LE),	\
		.dst_reg = DST,					\
		.src_reg = 0,					\
		.off   = 0,					\
		.imm   = LEN })

/* Short form of mov, dst_reg = src_reg */

#define BPF_MOV64_REG(DST, SRC)					\
@@ -179,6 +193,24 @@ struct ctl_table_header;
		.off   = 0,					\
		.imm   = IMM })

/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */

#define BPF_MOVSX64_REG(DST, SRC, OFF)				\
	((struct bpf_insn) {					\
		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = 0 })

#define BPF_MOVSX32_REG(DST, SRC, OFF)				\
	((struct bpf_insn) {					\
		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = 0 })

/* Special form of mov32, used for doing explicit zero extension on dst. */
#define BPF_ZEXT_REG(DST)					\
	((struct bpf_insn) {					\
@@ -263,6 +295,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
		.off   = OFF,					\
		.imm   = 0 })

/* Memory load, dst_reg = *(signed size *) (src_reg + off16) */

#define BPF_LDX_MEMSX(SIZE, DST, SRC, OFF)			\
	((struct bpf_insn) {					\
		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEMSX,	\
		.dst_reg = DST,					\
		.src_reg = SRC,					\
		.off   = OFF,					\
		.imm   = 0 })

/* Memory store, *(uint *) (dst_reg + off16) = src_reg */

#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
+371 −0
Original line number Diff line number Diff line
@@ -5111,6 +5111,104 @@ static struct bpf_test tests[] = {
		{ },
		{ { 0, 0xffffffff } }
	},
	/* MOVSX32 */
	{
		"ALU_MOVSX | BPF_B",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0x00000000ffffffefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX32_REG(R1, R3, 8),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	{
		"ALU_MOVSX | BPF_H",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0x00000000ffffbeefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX32_REG(R1, R3, 16),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	{
		"ALU_MOVSX | BPF_W",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0x00000000deadbeefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX32_REG(R1, R3, 32),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	/* MOVSX64 REG */
	{
		"ALU64_MOVSX | BPF_B",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0xffffffffffffffefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX64_REG(R1, R3, 8),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	{
		"ALU64_MOVSX | BPF_H",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0xffffffffffffbeefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX64_REG(R1, R3, 16),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	{
		"ALU64_MOVSX | BPF_W",
		.u.insns_int = {
			BPF_LD_IMM64(R2, 0xffffffffdeadbeefLL),
			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
			BPF_MOVSX64_REG(R1, R3, 32),
			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
			BPF_MOV32_IMM(R0, 2),
			BPF_EXIT_INSN(),
			BPF_MOV32_IMM(R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1 } },
	},
	/* BPF_ALU | BPF_ADD | BPF_X */
	{
		"ALU_ADD_X: 1 + 2 = 3",
@@ -6105,6 +6203,106 @@ static struct bpf_test tests[] = {
		{ },
		{ { 0, 2 } },
	},
	/* BPF_ALU | BPF_DIV | BPF_X off=1 (SDIV) */
	{
		"ALU_SDIV_X: -6 / 2 = -3",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -6),
			BPF_ALU32_IMM(BPF_MOV, R1, 2),
			BPF_ALU32_REG_OFF(BPF_DIV, R0, R1, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -3 } },
	},
	/* BPF_ALU | BPF_DIV | BPF_K off=1 (SDIV) */
	{
		"ALU_SDIV_K: -6 / 2 = -3",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -6),
			BPF_ALU32_IMM_OFF(BPF_DIV, R0, 2, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -3 } },
	},
	/* BPF_ALU64 | BPF_DIV | BPF_X off=1 (SDIV64) */
	{
		"ALU64_SDIV_X: -6 / 2 = -3",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -6),
			BPF_ALU32_IMM(BPF_MOV, R1, 2),
			BPF_ALU64_REG_OFF(BPF_DIV, R0, R1, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -3 } },
	},
	/* BPF_ALU64 | BPF_DIV | BPF_K off=1 (SDIV64) */
	{
		"ALU64_SDIV_K: -6 / 2 = -3",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -6),
			BPF_ALU64_IMM_OFF(BPF_DIV, R0, 2, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -3 } },
	},
	/* BPF_ALU | BPF_MOD | BPF_X off=1 (SMOD) */
	{
		"ALU_SMOD_X: -7 % 2 = -1",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -7),
			BPF_ALU32_IMM(BPF_MOV, R1, 2),
			BPF_ALU32_REG_OFF(BPF_MOD, R0, R1, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -1 } },
	},
	/* BPF_ALU | BPF_MOD | BPF_K off=1 (SMOD) */
	{
		"ALU_SMOD_K: -7 % 2 = -1",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -7),
			BPF_ALU32_IMM_OFF(BPF_MOD, R0, 2, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -1 } },
	},
	/* BPF_ALU64 | BPF_MOD | BPF_X off=1 (SMOD64) */
	{
		"ALU64_SMOD_X: -7 % 2 = -1",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -7),
			BPF_ALU32_IMM(BPF_MOV, R1, 2),
			BPF_ALU64_REG_OFF(BPF_MOD, R0, R1, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -1 } },
	},
	/* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */
	{
		"ALU64_SMOD_X: -7 % 2 = -1",
		.u.insns_int = {
			BPF_LD_IMM64(R0, -7),
			BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, -1 } },
	},
	/* BPF_ALU | BPF_AND | BPF_X */
	{
		"ALU_AND_X: 3 & 2 = 2",
@@ -7837,6 +8035,104 @@ static struct bpf_test tests[] = {
		{ },
		{ { 0, (u32) (cpu_to_le64(0xfedcba9876543210ULL) >> 32) } },
	},
	/* BSWAP */
	{
		"BSWAP 16: 0x0123456789abcdef -> 0xefcd",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
			BPF_BSWAP(R0, 16),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0xefcd } },
	},
	{
		"BSWAP 32: 0x0123456789abcdef -> 0xefcdab89",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
			BPF_BSWAP(R0, 32),
			BPF_ALU64_REG(BPF_MOV, R1, R0),
			BPF_ALU64_IMM(BPF_RSH, R1, 32),
			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0xefcdab89 } },
	},
	{
		"BSWAP 64: 0x0123456789abcdef -> 0x67452301",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
			BPF_BSWAP(R0, 64),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x67452301 } },
	},
	{
		"BSWAP 64: 0x0123456789abcdef >> 32 -> 0xefcdab89",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
			BPF_BSWAP(R0, 64),
			BPF_ALU64_IMM(BPF_RSH, R0, 32),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0xefcdab89 } },
	},
	/* BSWAP, reversed */
	{
		"BSWAP 16: 0xfedcba9876543210 -> 0x1032",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
			BPF_BSWAP(R0, 16),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x1032 } },
	},
	{
		"BSWAP 32: 0xfedcba9876543210 -> 0x10325476",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
			BPF_BSWAP(R0, 32),
			BPF_ALU64_REG(BPF_MOV, R1, R0),
			BPF_ALU64_IMM(BPF_RSH, R1, 32),
			BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x10325476 } },
	},
	{
		"BSWAP 64: 0xfedcba9876543210 -> 0x98badcfe",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
			BPF_BSWAP(R0, 64),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x98badcfe } },
	},
	{
		"BSWAP 64: 0xfedcba9876543210 >> 32 -> 0x10325476",
		.u.insns_int = {
			BPF_LD_IMM64(R0, 0xfedcba9876543210ULL),
			BPF_BSWAP(R0, 64),
			BPF_ALU64_IMM(BPF_RSH, R0, 32),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0x10325476 } },
	},
	/* BPF_LDX_MEM B/H/W/DW */
	{
		"BPF_LDX_MEM | BPF_B, base",
@@ -8228,6 +8524,67 @@ static struct bpf_test tests[] = {
		{ { 32, 0 } },
		.stack_depth = 0,
	},
	/* BPF_LDX_MEMSX B/H/W */
	{
		"BPF_LDX_MEMSX | BPF_B",
		.u.insns_int = {
			BPF_LD_IMM64(R1, 0xdead0000000000f0ULL),
			BPF_LD_IMM64(R2, 0xfffffffffffffff0ULL),
			BPF_STX_MEM(BPF_DW, R10, R1, -8),
#ifdef __BIG_ENDIAN
			BPF_LDX_MEMSX(BPF_B, R0, R10, -1),
#else
			BPF_LDX_MEMSX(BPF_B, R0, R10, -8),
#endif
			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
			BPF_ALU64_IMM(BPF_MOV, R0, 0),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0 } },
		.stack_depth = 8,
	},
	{
		"BPF_LDX_MEMSX | BPF_H",
		.u.insns_int = {
			BPF_LD_IMM64(R1, 0xdead00000000f123ULL),
			BPF_LD_IMM64(R2, 0xfffffffffffff123ULL),
			BPF_STX_MEM(BPF_DW, R10, R1, -8),
#ifdef __BIG_ENDIAN
			BPF_LDX_MEMSX(BPF_H, R0, R10, -2),
#else
			BPF_LDX_MEMSX(BPF_H, R0, R10, -8),
#endif
			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
			BPF_ALU64_IMM(BPF_MOV, R0, 0),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0 } },
		.stack_depth = 8,
	},
	{
		"BPF_LDX_MEMSX | BPF_W",
		.u.insns_int = {
			BPF_LD_IMM64(R1, 0x00000000deadbeefULL),
			BPF_LD_IMM64(R2, 0xffffffffdeadbeefULL),
			BPF_STX_MEM(BPF_DW, R10, R1, -8),
#ifdef __BIG_ENDIAN
			BPF_LDX_MEMSX(BPF_W, R0, R10, -4),
#else
			BPF_LDX_MEMSX(BPF_W, R0, R10, -8),
#endif
			BPF_JMP_REG(BPF_JNE, R0, R2, 1),
			BPF_ALU64_IMM(BPF_MOV, R0, 0),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 0 } },
		.stack_depth = 8,
	},
	/* BPF_STX_MEM B/H/W/DW */
	{
		"BPF_STX_MEM | BPF_B",
@@ -9474,6 +9831,20 @@ static struct bpf_test tests[] = {
		{ },
		{ { 0, 1 } },
	},
	/* BPF_JMP32 | BPF_JA */
	{
		"JMP32_JA: Unconditional jump: if (true) return 1",
		.u.insns_int = {
			BPF_ALU32_IMM(BPF_MOV, R0, 0),
			BPF_JMP32_IMM(BPF_JA, 0, 1, 0),
			BPF_EXIT_INSN(),
			BPF_ALU32_IMM(BPF_MOV, R0, 1),
			BPF_EXIT_INSN(),
		},
		INTERNAL,
		{ },
		{ { 0, 1 } },
	},
	/* BPF_JMP | BPF_JSLT | BPF_K */
	{
		"JMP_JSLT_K: Signed jump: if (-2 < -1) return 1",
Loading