Commit 6efbea77 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull arm64 fixes from Will Deacon:

 - Disable software tag-based KASAN when compiling with GCC, as
   functions are incorrectly instrumented leading to a crash early
   during boot

 - Fix pkey configuration for kernel threads when POE is enabled

 - Fix invalid memory accesses in uprobes when targetting load-literal
   instructions

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  kasan: Disable Software Tag-Based KASAN with GCC
  Documentation/protection-keys: add AArch64 to documentation
  arm64: set POR_EL0 for kernel threads
  arm64: probes: Fix uprobes for big-endian kernels
  arm64: probes: Fix simulate_ldr*_literal()
  arm64: probes: Remove broken LDR (literal) uprobe support
parents c16e5c94 7aed6a2c
Loading
Loading
Loading
Loading
+30 −8
Original line number Diff line number Diff line
@@ -12,7 +12,10 @@ Pkeys Userspace (PKU) is a feature which can be found on:
        * Intel server CPUs, Skylake and later
        * Intel client CPUs, Tiger Lake (11th Gen Core) and later
        * Future AMD CPUs
        * arm64 CPUs implementing the Permission Overlay Extension (FEAT_S1POE)

x86_64
======
Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
a "protection key", giving 16 possible keys.

@@ -28,6 +31,22 @@ register. The feature is only available in 64-bit mode, even though there is
theoretically space in the PAE PTEs.  These permissions are enforced on data
access only and have no effect on instruction fetches.

arm64
=====

Pkeys use 3 bits in each page table entry, to encode a "protection key index",
giving 8 possible keys.

Protections for each key are defined with a per-CPU user-writable system
register (POR_EL0).  This is a 64-bit register encoding read, write and execute
overlay permissions for each protection key index.

Being a CPU register, POR_EL0 is inherently thread-local, potentially giving
each thread a different set of protections from every other thread.

Unlike x86_64, the protection key permissions also apply to instruction
fetches.

Syscalls
========

@@ -38,11 +57,10 @@ There are 3 system calls which directly interact with pkeys::
	int pkey_mprotect(unsigned long start, size_t len,
			  unsigned long prot, int pkey);

Before a pkey can be used, it must first be allocated with
pkey_alloc().  An application calls the WRPKRU instruction
directly in order to change access permissions to memory covered
with a key.  In this example WRPKRU is wrapped by a C function
called pkey_set().
Before a pkey can be used, it must first be allocated with pkey_alloc().  An
application writes to the architecture specific CPU register directly in order
to change access permissions to memory covered with a key.  In this example
this is wrapped by a C function called pkey_set().
::

	int real_prot = PROT_READ|PROT_WRITE;
@@ -64,9 +82,9 @@ is no longer in use::
	munmap(ptr, PAGE_SIZE);
	pkey_free(pkey);

.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
          An example implementation can be found in
          tools/testing/selftests/x86/protection_keys.c.
.. note:: pkey_set() is a wrapper around writing to the CPU register.
          Example implementations can be found in
          tools/testing/selftests/mm/pkey-{arm64,powerpc,x86}.h

Behavior
========
@@ -96,3 +114,7 @@ with a read()::
The kernel will send a SIGSEGV in both cases, but si_code will be set
to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
the plain mprotect() permissions are violated.

Note that kernel accesses from a kthread (such as io_uring) will use a default
value for the protection key register and so will not be consistent with
userspace's value of the register or mprotect().
+3 −5
Original line number Diff line number Diff line
@@ -10,11 +10,9 @@
#include <asm/insn.h>
#include <asm/probes.h>

#define MAX_UINSN_BYTES		AARCH64_INSN_SIZE

#define UPROBE_SWBP_INSN	cpu_to_le32(BRK64_OPCODE_UPROBES)
#define UPROBE_SWBP_INSN_SIZE	AARCH64_INSN_SIZE
#define UPROBE_XOL_SLOT_BYTES	MAX_UINSN_BYTES
#define UPROBE_XOL_SLOT_BYTES	AARCH64_INSN_SIZE

typedef __le32 uprobe_opcode_t;

@@ -23,8 +21,8 @@ struct arch_uprobe_task {

struct arch_uprobe {
	union {
		u8 insn[MAX_UINSN_BYTES];
		u8 ixol[MAX_UINSN_BYTES];
		__le32 insn;
		__le32 ixol;
	};
	struct arch_probe_insn api;
	bool simulate;
+11 −5
Original line number Diff line number Diff line
@@ -99,10 +99,6 @@ arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
	    aarch64_insn_is_blr(insn) ||
	    aarch64_insn_is_ret(insn)) {
		api->handler = simulate_br_blr_ret;
	} else if (aarch64_insn_is_ldr_lit(insn)) {
		api->handler = simulate_ldr_literal;
	} else if (aarch64_insn_is_ldrsw_lit(insn)) {
		api->handler = simulate_ldrsw_literal;
	} else {
		/*
		 * Instruction cannot be stepped out-of-line and we don't
@@ -140,6 +136,17 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
	probe_opcode_t insn = le32_to_cpu(*addr);
	probe_opcode_t *scan_end = NULL;
	unsigned long size = 0, offset = 0;
	struct arch_probe_insn *api = &asi->api;

	if (aarch64_insn_is_ldr_lit(insn)) {
		api->handler = simulate_ldr_literal;
		decoded = INSN_GOOD_NO_SLOT;
	} else if (aarch64_insn_is_ldrsw_lit(insn)) {
		api->handler = simulate_ldrsw_literal;
		decoded = INSN_GOOD_NO_SLOT;
	} else {
		decoded = arm_probe_decode_insn(insn, &asi->api);
	}

	/*
	 * If there's a symbol defined in front of and near enough to
@@ -157,7 +164,6 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
		else
			scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
	}
	decoded = arm_probe_decode_insn(insn, &asi->api);

	if (decoded != INSN_REJECTED && scan_end)
		if (is_probed_address_atomic(addr - 1, scan_end))
+7 −11
Original line number Diff line number Diff line
@@ -171,17 +171,15 @@ simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
{
	u64 *load_addr;
	unsigned long load_addr;
	int xn = opcode & 0x1f;
	int disp;

	disp = ldr_displacement(opcode);
	load_addr = (u64 *) (addr + disp);
	load_addr = addr + ldr_displacement(opcode);

	if (opcode & (1 << 30))	/* x0-x30 */
		set_x_reg(regs, xn, *load_addr);
		set_x_reg(regs, xn, READ_ONCE(*(u64 *)load_addr));
	else			/* w0-w30 */
		set_w_reg(regs, xn, *load_addr);
		set_w_reg(regs, xn, READ_ONCE(*(u32 *)load_addr));

	instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
@@ -189,14 +187,12 @@ simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
void __kprobes
simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
{
	s32 *load_addr;
	unsigned long load_addr;
	int xn = opcode & 0x1f;
	int disp;

	disp = ldr_displacement(opcode);
	load_addr = (s32 *) (addr + disp);
	load_addr = addr + ldr_displacement(opcode);

	set_x_reg(regs, xn, *load_addr);
	set_x_reg(regs, xn, READ_ONCE(*(s32 *)load_addr));

	instruction_pointer_set(regs, instruction_pointer(regs) + 4);
}
+2 −2
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
	else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
		return -EINVAL;

	insn = *(probe_opcode_t *)(&auprobe->insn[0]);
	insn = le32_to_cpu(auprobe->insn);

	switch (arm_probe_decode_insn(insn, &auprobe->api)) {
	case INSN_REJECTED:
@@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
	if (!auprobe->simulate)
		return false;

	insn = *(probe_opcode_t *)(&auprobe->insn[0]);
	insn = le32_to_cpu(auprobe->insn);
	addr = instruction_pointer(regs);

	if (auprobe->api.handler)
Loading