Commit 83ef4a37 authored by Catalin Marinas's avatar Catalin Marinas
Browse files

Merge branch 'for-next/pkey-signal' into for-next/core

* for-next/pkey-signal:
  : Bring arm64 pkey signal delivery in line with the x86 behaviour
  selftests/mm: Fix unused function warning for aarch64_write_signal_pkey()
  selftests/mm: Define PKEY_UNRESTRICTED for pkey_sighandler_tests
  selftests/mm: Enable pkey_sighandler_tests on arm64
  selftests/mm: Use generic pkey register manipulation
  arm64: signal: Remove unused macro
  arm64: signal: Remove unnecessary check when saving POE state
  arm64: signal: Improve POR_EL0 handling to avoid uaccess failures
  firmware: arm_sdei: Fix the input parameter of cpuhp_remove_state()
  Revert "kasan: Disable Software Tag-Based KASAN with GCC"
  kasan: Fix Software Tag-Based KASAN with GCC
  kasan: Disable Software Tag-Based KASAN with GCC
  Documentation/protection-keys: add AArch64 to documentation
  arm64: set POR_EL0 for kernel threads

# Conflicts:
#	arch/arm64/kernel/signal.c
parents 437330d9 929bbc16
Loading
Loading
Loading
Loading
+30 −8
Original line number Diff line number Diff line
@@ -12,7 +12,10 @@ Pkeys Userspace (PKU) is a feature which can be found on:
        * Intel server CPUs, Skylake and later
        * Intel client CPUs, Tiger Lake (11th Gen Core) and later
        * Future AMD CPUs
        * arm64 CPUs implementing the Permission Overlay Extension (FEAT_S1POE)

x86_64
======
Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
a "protection key", giving 16 possible keys.

@@ -28,6 +31,22 @@ register. The feature is only available in 64-bit mode, even though there is
theoretically space in the PAE PTEs.  These permissions are enforced on data
access only and have no effect on instruction fetches.

arm64
=====

Pkeys use 3 bits in each page table entry, to encode a "protection key index",
giving 8 possible keys.

Protections for each key are defined with a per-CPU user-writable system
register (POR_EL0).  This is a 64-bit register encoding read, write and execute
overlay permissions for each protection key index.

Being a CPU register, POR_EL0 is inherently thread-local, potentially giving
each thread a different set of protections from every other thread.

Unlike x86_64, the protection key permissions also apply to instruction
fetches.

Syscalls
========

@@ -38,11 +57,10 @@ There are 3 system calls which directly interact with pkeys::
	int pkey_mprotect(unsigned long start, size_t len,
			  unsigned long prot, int pkey);

Before a pkey can be used, it must first be allocated with
pkey_alloc().  An application calls the WRPKRU instruction
directly in order to change access permissions to memory covered
with a key.  In this example WRPKRU is wrapped by a C function
called pkey_set().
Before a pkey can be used, it must first be allocated with pkey_alloc().  An
application writes to the architecture specific CPU register directly in order
to change access permissions to memory covered with a key.  In this example
this is wrapped by a C function called pkey_set().
::

	int real_prot = PROT_READ|PROT_WRITE;
@@ -64,9 +82,9 @@ is no longer in use::
	munmap(ptr, PAGE_SIZE);
	pkey_free(pkey);

.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
          An example implementation can be found in
          tools/testing/selftests/x86/protection_keys.c.
.. note:: pkey_set() is a wrapper around writing to the CPU register.
          Example implementations can be found in
          tools/testing/selftests/mm/pkey-{arm64,powerpc,x86}.h

Behavior
========
@@ -96,3 +114,7 @@ with a read()::
The kernel will send a SIGSEGV in both cases, but si_code will be set
to SEGV_PKERR when violating protection keys versus SEGV_ACCERR when
the plain mprotect() permissions are violated.

Note that kernel accesses from a kthread (such as io_uring) will use a default
value for the protection key register and so will not be consistent with
userspace's value of the register or mprotect().
+3 −0
Original line number Diff line number Diff line
@@ -466,6 +466,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)

		p->thread.cpu_context.x19 = (unsigned long)args->fn;
		p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;

		if (system_supports_poe())
			p->thread.por_el0 = POR_EL0_INIT;
	}
	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
	p->thread.cpu_context.sp = (unsigned long)childregs;
+79 −16
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <linux/ratelimit.h>
#include <linux/rseq.h>
#include <linux/syscalls.h>
#include <linux/pkeys.h>

#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
@@ -72,10 +73,62 @@ struct rt_sigframe_user_layout {
	unsigned long end_offset;
};

#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
/*
 * Holds any EL0-controlled state that influences unprivileged memory accesses.
 * This includes both accesses done in userspace and uaccess done in the kernel.
 *
 * This state needs to be carefully managed to ensure that it doesn't cause
 * uaccess to fail when setting up the signal frame, and the signal handler
 * itself also expects a well-defined state when entered.
 */
struct user_access_state {
	u64 por_el0;
};

#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)

/*
 * Save the user access state into ua_state and reset it to disable any
 * restrictions.
 */
static void save_reset_user_access_state(struct user_access_state *ua_state)
{
	if (system_supports_poe()) {
		u64 por_enable_all = 0;

		for (int pkey = 0; pkey < arch_max_pkey(); pkey++)
			por_enable_all |= POE_RXW << (pkey * POR_BITS_PER_PKEY);

		ua_state->por_el0 = read_sysreg_s(SYS_POR_EL0);
		write_sysreg_s(por_enable_all, SYS_POR_EL0);
		/* Ensure that any subsequent uaccess observes the updated value */
		isb();
	}
}

/*
 * Set the user access state for invoking the signal handler.
 *
 * No uaccess should be done after that function is called.
 */
static void set_handler_user_access_state(void)
{
	if (system_supports_poe())
		write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
}

/*
 * Restore the user access state to the values saved in ua_state.
 *
 * No uaccess should be done after that function is called.
 */
static void restore_user_access_state(const struct user_access_state *ua_state)
{
	if (system_supports_poe())
		write_sysreg_s(ua_state->por_el0, SYS_POR_EL0);
}

static void init_user_layout(struct rt_sigframe_user_layout *user)
{
	const size_t reserved_size =
@@ -269,18 +322,20 @@ static int restore_fpmr_context(struct user_ctxs *user)
	return err;
}

static int preserve_poe_context(struct poe_context __user *ctx)
static int preserve_poe_context(struct poe_context __user *ctx,
				const struct user_access_state *ua_state)
{
	int err = 0;

	__put_user_error(POE_MAGIC, &ctx->head.magic, err);
	__put_user_error(sizeof(*ctx), &ctx->head.size, err);
	__put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
	__put_user_error(ua_state->por_el0, &ctx->por_el0, err);

	return err;
}

static int restore_poe_context(struct user_ctxs *user)
static int restore_poe_context(struct user_ctxs *user,
			       struct user_access_state *ua_state)
{
	u64 por_el0;
	int err = 0;
@@ -290,7 +345,7 @@ static int restore_poe_context(struct user_ctxs *user)

	__get_user_error(por_el0, &(user->poe->por_el0), err);
	if (!err)
		write_sysreg_s(por_el0, SYS_POR_EL0);
		ua_state->por_el0 = por_el0;

	return err;
}
@@ -946,7 +1001,8 @@ static int parse_user_sigframe(struct user_ctxs *user,
}

static int restore_sigframe(struct pt_regs *regs,
			    struct rt_sigframe __user *sf)
			    struct rt_sigframe __user *sf,
			    struct user_access_state *ua_state)
{
	sigset_t set;
	int i, err;
@@ -998,7 +1054,7 @@ static int restore_sigframe(struct pt_regs *regs,
		err = restore_zt_context(&user);

	if (err == 0 && system_supports_poe() && user.poe)
		err = restore_poe_context(&user);
		err = restore_poe_context(&user, ua_state);

	return err;
}
@@ -1059,6 +1115,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
{
	struct pt_regs *regs = current_pt_regs();
	struct rt_sigframe __user *frame;
	struct user_access_state ua_state;

	/* Always make any pending restarted system calls return -EINTR */
	current->restart_block.fn = do_no_restart_syscall;
@@ -1075,7 +1132,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
	if (!access_ok(frame, sizeof (*frame)))
		goto badframe;

	if (restore_sigframe(regs, frame))
	if (restore_sigframe(regs, frame, &ua_state))
		goto badframe;

	if (gcs_restore_signal())
@@ -1084,6 +1141,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
	if (restore_altstack(&frame->uc.uc_stack))
		goto badframe;

	restore_user_access_state(&ua_state);

	return regs->regs[0];

badframe:
@@ -1198,7 +1257,8 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
}

static int setup_sigframe(struct rt_sigframe_user_layout *user,
			  struct pt_regs *regs, sigset_t *set)
			  struct pt_regs *regs, sigset_t *set,
			  const struct user_access_state *ua_state)
{
	int i, err = 0;
	struct rt_sigframe __user *sf = user->sigframe;
@@ -1262,14 +1322,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
		err |= preserve_fpmr_context(fpmr_ctx);
	}

	if (system_supports_poe() && err == 0 && user->poe_offset) {
	if (system_supports_poe() && err == 0) {
		struct poe_context __user *poe_ctx =
			apply_user_offset(user, user->poe_offset);

		err |= preserve_poe_context(poe_ctx);
		err |= preserve_poe_context(poe_ctx, ua_state);
	}


	/* ZA state if present */
	if (system_supports_sme() && err == 0 && user->za_offset) {
		struct za_context __user *za_ctx =
@@ -1447,9 +1506,6 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig,
		sme_smstop();
	}

	if (system_supports_poe())
		write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);

	if (ksig->ka.sa.sa_flags & SA_RESTORER)
		sigtramp = ksig->ka.sa.sa_restorer;
	else
@@ -1465,6 +1521,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
{
	struct rt_sigframe_user_layout user;
	struct rt_sigframe __user *frame;
	struct user_access_state ua_state;
	int err = 0;

	fpsimd_signal_preserve_current_state();
@@ -1472,13 +1529,14 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
	if (get_sigframe(&user, ksig, regs))
		return 1;

	save_reset_user_access_state(&ua_state);
	frame = user.sigframe;

	__put_user_error(0, &frame->uc.uc_flags, err);
	__put_user_error(NULL, &frame->uc.uc_link, err);

	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
	err |= setup_sigframe(&user, regs, set);
	err |= setup_sigframe(&user, regs, set, &ua_state);
	if (err == 0) {
		err = setup_return(regs, ksig, &user, usig);
		if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
@@ -1488,6 +1546,11 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
		}
	}

	if (err == 0)
		set_handler_user_access_state();
	else
		restore_user_access_state(&ua_state);

	return err;
}

+1 −1
Original line number Diff line number Diff line
@@ -763,7 +763,7 @@ static int sdei_device_freeze(struct device *dev)
	int err;

	/* unregister private events */
	cpuhp_remove_state(sdei_entry_point);
	cpuhp_remove_state(sdei_hp_state);

	err = sdei_unregister_shared();
	if (err)
+4 −0
Original line number Diff line number Diff line
@@ -80,7 +80,11 @@
#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif

#ifdef __SANITIZE_HWADDRESS__
#define __no_sanitize_address __attribute__((__no_sanitize__("hwaddress")))
#else
#define __no_sanitize_address __attribute__((__no_sanitize_address__))
#endif

#if defined(__SANITIZE_THREAD__)
#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
Loading