Commit 53a08704 authored by Will Deacon's avatar Will Deacon
Browse files

Merge branch 'for-next/sme-fixes' into for-next/core

* for-next/sme-fixes: (35 commits)
  arm64/fpsimd: Allow CONFIG_ARM64_SME to be selected
  arm64/fpsimd: ptrace: Gracefully handle errors
  arm64/fpsimd: ptrace: Mandate SVE payload for streaming-mode state
  arm64/fpsimd: ptrace: Do not present register data for inactive mode
  arm64/fpsimd: ptrace: Save task state before generating SVE header
  arm64/fpsimd: ptrace/prctl: Ensure VL changes leave task in a valid state
  arm64/fpsimd: ptrace/prctl: Ensure VL changes do not resurrect stale data
  arm64/fpsimd: Make clone() compatible with ZA lazy saving
  arm64/fpsimd: Clear PSTATE.SM during clone()
  arm64/fpsimd: Consistently preserve FPSIMD state during clone()
  arm64/fpsimd: Remove redundant task->mm check
  arm64/fpsimd: signal: Use SMSTOP behaviour in setup_return()
  arm64/fpsimd: Add task_smstop_sm()
  arm64/fpsimd: Factor out {sve,sme}_state_size() helpers
  arm64/fpsimd: Clarify sve_sync_*() functions
  arm64/fpsimd: ptrace: Consistently handle partial writes to NT_ARM_(S)SVE
  arm64/fpsimd: signal: Consistently read FPSIMD context
  arm64/fpsimd: signal: Mandate SVE payload for streaming-mode state
  arm64/fpsimd: signal: Clear PSTATE.SM when restoring FPSIMD frame only
  arm64/fpsimd: Do not discard modified SVE state
  ...
parents f92f17a7 33c4618d
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -69,8 +69,8 @@ model features for SME is included in Appendix A.
  vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
  used for SVE vectors.

* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
  in which case it is set to 0.
* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
  is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.

2.  Vector lengths
------------------
@@ -115,7 +115,7 @@ be zeroed.
5.  Signal handling
-------------------

* Signal handlers are invoked with streaming mode and ZA disabled.
* Signal handlers are invoked with PSTATE.SM=0, PSTATE.ZA=0, and TPIDR2_EL0=0.

* A new signal frame record TPIDR2_MAGIC is added formatted as a struct
  tpidr2_context to allow access to TPIDR2_EL0 from signal handlers.
@@ -241,7 +241,7 @@ prctl(PR_SME_SET_VL, unsigned long arg)
      length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
      does not constitute a change to the vector length for this purpose.

    * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared.
    * Changing the vector length causes PSTATE.ZA to be cleared.
      Calling PR_SME_SET_VL with vl equal to the thread's current vector
      length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag,
      does not constitute a change to the vector length for this purpose.
+0 −1
Original line number Diff line number Diff line
@@ -2285,7 +2285,6 @@ config ARM64_SME
	bool "ARM Scalable Matrix Extension support"
	default y
	depends on ARM64_SVE
	depends on BROKEN
	help
	  The Scalable Matrix Extension (SME) is an extension to the AArch64
	  execution state which utilises a substantial subset of the SVE
+8 −6
Original line number Diff line number Diff line
@@ -371,12 +371,14 @@
/*
 * ISS values for SME traps
 */

#define ESR_ELx_SME_ISS_SME_DISABLED	0
#define ESR_ELx_SME_ISS_ILL		1
#define ESR_ELx_SME_ISS_SM_DISABLED	2
#define ESR_ELx_SME_ISS_ZA_DISABLED	3
#define ESR_ELx_SME_ISS_ZT_DISABLED	4
#define ESR_ELx_SME_ISS_SMTC_MASK		GENMASK(2, 0)
#define ESR_ELx_SME_ISS_SMTC(esr)		((esr) & ESR_ELx_SME_ISS_SMTC_MASK)

#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED	0
#define ESR_ELx_SME_ISS_SMTC_ILL		1
#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED	2
#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED	3
#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED	4

/* ISS field definitions for MOPS exceptions */
#define ESR_ELx_MOPS_ISS_MEM_INST	(UL(1) << 24)
+47 −17
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#define __ASM_FP_H

#include <asm/errno.h>
#include <asm/percpu.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
@@ -76,7 +77,6 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);

extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
@@ -93,9 +93,12 @@ struct cpu_fp_state {
	enum fp_type to_save;
};

DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);

extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);

extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_current_state(void);
extern void fpsimd_save_and_flush_cpu_state(void);

static inline bool thread_sm_enabled(struct thread_struct *thread)
@@ -108,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread)
	return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
}

extern void task_smstop_sm(struct task_struct *task);

/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100

@@ -195,10 +200,8 @@ struct vl_info {

extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
extern void sve_sync_to_fpsimd(struct task_struct *task);
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
extern void fpsimd_sync_from_effective_state(struct task_struct *task);
extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);

extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
				 unsigned long vl, unsigned long flags);
@@ -292,14 +295,29 @@ static inline bool sve_vq_available(unsigned int vq)
	return vq_available(ARM64_VEC_SVE, vq);
}

size_t sve_state_size(struct task_struct const *task);
static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
{
	unsigned int vl = max(sve_vl, sme_vl);
	return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
}

/*
 * Return how many bytes of memory are required to store the full SVE
 * state for task, given task's currently configured vector length.
 */
static inline size_t sve_state_size(struct task_struct const *task)
{
	unsigned int sve_vl = task_get_sve_vl(task);
	unsigned int sme_vl = task_get_sme_vl(task);
	return __sve_state_size(sve_vl, sme_vl);
}

#else /* ! CONFIG_ARM64_SVE */

static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }

static inline int sve_max_virtualisable_vl(void)
{
@@ -333,6 +351,11 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }

static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
{
	return 0;
}

static inline size_t sve_state_size(struct task_struct const *task)
{
	return 0;
@@ -385,6 +408,16 @@ extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
extern void sme_suspend_exit(void);

static inline size_t __sme_state_size(unsigned int sme_vl)
{
	size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));

	if (system_supports_sme2())
		size += ZT_SIG_REG_SIZE;

	return size;
}

/*
 * Return how many bytes of memory are required to store the full SME
 * specific state for task, given task's currently configured vector
@@ -392,15 +425,7 @@ extern void sme_suspend_exit(void);
 */
static inline size_t sme_state_size(struct task_struct const *task)
{
	unsigned int vl = task_get_sme_vl(task);
	size_t size;

	size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));

	if (system_supports_sme2())
		size += ZT_SIG_REG_SIZE;

	return size;
	return __sme_state_size(task_get_sme_vl(task));
}

#else
@@ -421,6 +446,11 @@ static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
static inline void sme_suspend_exit(void) { }

static inline size_t __sme_state_size(unsigned int sme_vl)
{
	return 0;
}

static inline size_t sme_state_size(struct task_struct const *task)
{
	return 0;
+35 −11
Original line number Diff line number Diff line
@@ -393,20 +393,16 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 * As per the ABI exit SME streaming mode and clear the SVE state not
 * shared with FPSIMD on syscall entry.
 */
static inline void fp_user_discard(void)
static inline void fpsimd_syscall_enter(void)
{
	/*
	 * If SME is active then exit streaming mode.  If ZA is active
	 * then flush the SVE registers but leave userspace access to
	 * both SVE and SME enabled, otherwise disable SME for the
	 * task and fall through to disabling SVE too.  This means
	 * that after a syscall we never have any streaming mode
	 * register state to track, if this changes the KVM code will
	 * need updating.
	 */
	/* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */
	if (system_supports_sme())
		sme_smstop_sm();

	/*
	 * The CPU is not in streaming mode. If non-streaming SVE is not
	 * supported, there is no SVE state that needs to be discarded.
	 */
	if (!system_supports_sve())
		return;

@@ -416,6 +412,33 @@ static inline void fp_user_discard(void)
		sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
		sve_flush_live(true, sve_vq_minus_one);
	}

	/*
	 * Any live non-FPSIMD SVE state has been zeroed. Allow
	 * fpsimd_save_user_state() to lazily discard SVE state until either
	 * the live state is unbound or fpsimd_syscall_exit() is called.
	 */
	__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
}

static __always_inline void fpsimd_syscall_exit(void)
{
	if (!system_supports_sve())
		return;

	/*
	 * The current task's user FPSIMD/SVE/SME state is now bound to this
	 * CPU. The fpsimd_last_state.to_save value is either:
	 *
	 * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU
	 *   since fpsimd_syscall_enter().
	 *
	 * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at
	 *   any point.
	 *
	 * Reset this to FP_STATE_CURRENT to stop lazy discarding.
	 */
	__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
}

UNHANDLED(el1t, 64, sync)
@@ -739,10 +762,11 @@ static void noinstr el0_svc(struct pt_regs *regs)
{
	enter_from_user_mode(regs);
	cortex_a76_erratum_1463225_svc_handler();
	fp_user_discard();
	fpsimd_syscall_enter();
	local_daif_restore(DAIF_PROCCTX);
	do_el0_svc(regs);
	exit_to_user_mode(regs);
	fpsimd_syscall_exit();
}

static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
Loading