Commit 5d26eaae authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvmarm-fixes-6.18-1' of...

Merge tag 'kvmarm-fixes-6.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.18, take #1

Improvements and bug fixes:

- Fix the handling of ZCR_EL2 in NV VMs
  (20250926194108.84093-1-oliver.upton@linux.dev)

- Pick the correct translation regime when doing a PTW on
  the back of a SEA (20250926224246.731748-1-oliver.upton@linux.dev)

- Prevent userspace from injecting an event into a vcpu that isn't
  initialised yet (20250930085237.108326-1-oliver.upton@linux.dev)

- Move timer save/restore to the sysreg handling code, fixing EL2 timer
  access in the process (20250929160458.33517881-1-maz@kernel.org)

- Add FGT-based trapping of MDSCR_EL1 to reduce the overhead of debug
  (20250924235150.617451-1-oliver.upton@linux.dev)

- Fix trapping configuration when the host isn't GICv3
  (20251007160704.1673584-1-sascha.bischoff@arm.com)

- Improve the detection of HCR_EL2.E2H being RES1
  (20251009121239.29370-1-maz@kernel.org)

- Drop a spurious 'break' statement in the S1 PTW
  (20250930135621.162050-1-osama.abdelkader@gmail.com)

- Don't try to access SPE when owned by EL3
  (20251010174707.1684200-1-mukesh.ojha@oss.qualcomm.com)

Documentation updates:

- Document the failure modes of event injection
  (20250930233620.124607-1-oliver.upton@linux.dev)

- Document that a GICv3 guest can be created on a GICv5 host
  with FEAT_GCIE_LEGACY (20251007154848.1640444-1-sascha.bischoff@arm.com)

Selftest improvements:

- Add a selftest for the effective value of HCR_EL2.AMO
  (20250926224454.734066-1-oliver.upton@linux.dev)

- Address build warning in the timer selftest when building
  with clang (20250926155838.2612205-1-seanjc@google.com)

- Teach irq_fd selftests about non-x86 architectures
  (20250930193301.119859-1-oliver.upton@linux.dev)

- Add missing sysregs to the set_id_regs selftest
  (20251012154352.61133-1-zenghui.yu@linux.dev)

- Fix vcpu allocation in the vgic_lpi_stress selftest
  (20251008154520.54801-1-zenghui.yu@linux.dev)

- Correctly enable interrupts in the vgic_lpi_stress selftest
  (20251007195254.260539-1-oliver.upton@linux.dev)
parents 3a866087 ca88ecdc
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via
KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
directly to the virtual CPU).

Calling this ioctl on a vCPU that hasn't been initialized will return
-ENOEXEC.

::

  struct kvm_vcpu_events {
@@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.

See KVM_GET_VCPU_EVENTS for the data structure.

Calling this ioctl on a vCPU that hasn't been initialized will return
-ENOEXEC.

4.33 KVM_GET_DEBUGREGS
----------------------
+2 −1
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices
to inject interrupts to the VGIC instead of directly to CPUs.  It is not
possible to create both a GICv3 and GICv2 on the same VM.

Creating a guest GICv3 device requires a host GICv3 as well.
Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with
support for FEAT_GCIE_LEGACY.


Groups:
+32 −6
Original line number Diff line number Diff line
@@ -24,22 +24,48 @@
	 * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
	 * can reset into an UNKNOWN state and might not read as 1 until it has
	 * been initialized explicitly.
	 *
	 * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
	 * don't advertise it (they predate this relaxation).
	 *
	 * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
	 * indicating whether the CPU is running in E2H mode.
	 */
	mrs_s	x1, SYS_ID_AA64MMFR4_EL1
	sbfx	x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
	cmp	x1, #0
	b.ge	.LnVHE_\@
	b.lt	.LnE2H0_\@

	/*
	 * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised
	 * as such via ID_AA64MMFR4_EL1.E2H0:
	 *
	 * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to
	 *   have HCR_EL2.E2H implemented as RAO/WI.
	 *
	 * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest
	 *   reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV
	 *   guests on these hosts can write to HCR_EL2.E2H without
	 *   trapping to the hypervisor, but these writes have no
	 *   functional effect.
	 *
	 * Handle both cases by checking for an essential VHE property
	 * (system register remapping) to decide whether we're
	 * effectively VHE-only or not.
	 */
	msr_hcr_el2 x0		// Setup HCR_EL2 as nVHE
	isb
	mov	x1, #1		// Write something to FAR_EL1
	msr	far_el1, x1
	isb
	mov	x1, #2		// Try to overwrite it via FAR_EL2
	msr	far_el2, x1
	isb
	mrs	x1, far_el1	// If we see the latest write in FAR_EL1,
	cmp	x1, #2		// we can safely assume we are VHE only.
	b.ne	.LnVHE_\@	// Otherwise, we know that nVHE works.

.LnE2H0_\@:
	orr	x0, x0, #HCR_E2H
.LnVHE_\@:
	msr_hcr_el2 x0
	isb
.LnVHE_\@:
.endm

.macro __init_el2_sctlr
+50 −0
Original line number Diff line number Diff line
@@ -816,6 +816,11 @@ struct kvm_vcpu_arch {
	u64 hcrx_el2;
	u64 mdcr_el2;

	struct {
		u64 r;
		u64 w;
	} fgt[__NR_FGT_GROUP_IDS__];

	/* Exception Information */
	struct kvm_vcpu_fault_info fault;

@@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void)
void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
void check_feature_map(void);
void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu);

static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg)
{
	switch (reg) {
	case HFGRTR_EL2:
	case HFGWTR_EL2:
		return HFGRTR_GROUP;
	case HFGITR_EL2:
		return HFGITR_GROUP;
	case HDFGRTR_EL2:
	case HDFGWTR_EL2:
		return HDFGRTR_GROUP;
	case HAFGRTR_EL2:
		return HAFGRTR_GROUP;
	case HFGRTR2_EL2:
	case HFGWTR2_EL2:
		return HFGRTR2_GROUP;
	case HFGITR2_EL2:
		return HFGITR2_GROUP;
	case HDFGRTR2_EL2:
	case HDFGWTR2_EL2:
		return HDFGRTR2_GROUP;
	default:
		BUILD_BUG_ON(1);
	}
}

#define vcpu_fgt(vcpu, reg)						\
	({								\
		enum fgt_group_id id = __fgt_reg_to_group_id(reg);	\
		u64 *p;							\
		switch (reg) {						\
		case HFGWTR_EL2:					\
		case HDFGWTR_EL2:					\
		case HFGWTR2_EL2:					\
		case HDFGWTR2_EL2:					\
			p = &(vcpu)->arch.fgt[id].w;			\
			break;						\
		default:						\
			p = &(vcpu)->arch.fgt[id].r;			\
			break;						\
		}							\
									\
		p;							\
	})

#endif /* __ARM64_KVM_HOST_H__ */
+14 −91
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu)

u32 timer_get_ctl(struct arch_timer_context *ctxt)
{
	struct kvm_vcpu *vcpu = ctxt->vcpu;
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);

	switch(arch_timer_ctx_index(ctxt)) {
	case TIMER_VTIMER:
@@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)

u64 timer_get_cval(struct arch_timer_context *ctxt)
{
	struct kvm_vcpu *vcpu = ctxt->vcpu;
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);

	switch(arch_timer_ctx_index(ctxt)) {
	case TIMER_VTIMER:
@@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)

static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
{
	struct kvm_vcpu *vcpu = ctxt->vcpu;
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);

	switch(arch_timer_ctx_index(ctxt)) {
	case TIMER_VTIMER:
@@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)

static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
{
	struct kvm_vcpu *vcpu = ctxt->vcpu;
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);

	switch(arch_timer_ctx_index(ctxt)) {
	case TIMER_VTIMER:
@@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
	}
}

static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
{
	if (!ctxt->offset.vm_offset) {
		WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
		return;
	}

	WRITE_ONCE(*ctxt->offset.vm_offset, offset);
}

u64 kvm_phys_timer_read(void)
{
	return timecounter->cc->read(timecounter->cc);
@@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
	u64 ns;

	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
	vcpu = ctx->vcpu;
	vcpu = timer_context_to_vcpu(ctx);

	trace_kvm_timer_hrtimer_expire(ctx);

@@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
	 *
	 * But hey, it's fast, right?
	 */
	if (is_hyp_ctxt(ctx->vcpu) &&
	    (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
	if (is_hyp_ctxt(vcpu) &&
	    (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) {
		unsigned long val = timer_get_ctl(ctx);
		__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
		timer_set_ctl(ctx, val);
@@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx)
	trace_kvm_timer_emulate(ctx, should_fire);

	if (should_fire != ctx->irq.level)
		kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
		kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);

	kvm_timer_update_status(ctx, should_fire);

@@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff)

static void timer_save_state(struct arch_timer_context *ctx)
{
	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
	struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
	unsigned long flags;

@@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)

static void timer_restore_state(struct arch_timer_context *ctx)
{
	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
	struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
	unsigned long flags;

@@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo

static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
{
	struct kvm_vcpu *vcpu = ctx->vcpu;
	struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
	bool phys_active = false;

	/*
@@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
	 * this point and the register restoration, we'll take the
	 * interrupt anyway.
	 */
	kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
	kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);

	if (irqchip_in_kernel(vcpu->kvm))
		phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
@@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
	struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
	struct kvm *kvm = vcpu->kvm;

	ctxt->vcpu = vcpu;
	ctxt->timer_id = timerid;

	if (timerid == TIMER_VTIMER)
		ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
@@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void)
		disable_percpu_irq(host_ptimer_irq);
}

int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
	struct arch_timer_context *timer;

	switch (regid) {
	case KVM_REG_ARM_TIMER_CTL:
		timer = vcpu_vtimer(vcpu);
		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
		break;
	case KVM_REG_ARM_TIMER_CNT:
		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
			      &vcpu->kvm->arch.flags)) {
			timer = vcpu_vtimer(vcpu);
			timer_set_offset(timer, kvm_phys_timer_read() - value);
		}
		break;
	case KVM_REG_ARM_TIMER_CVAL:
		timer = vcpu_vtimer(vcpu);
		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
		break;
	case KVM_REG_ARM_PTIMER_CTL:
		timer = vcpu_ptimer(vcpu);
		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
		break;
	case KVM_REG_ARM_PTIMER_CNT:
		if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
			      &vcpu->kvm->arch.flags)) {
			timer = vcpu_ptimer(vcpu);
			timer_set_offset(timer, kvm_phys_timer_read() - value);
		}
		break;
	case KVM_REG_ARM_PTIMER_CVAL:
		timer = vcpu_ptimer(vcpu);
		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
		break;

	default:
		return -1;
	}

	return 0;
}

static u64 read_timer_ctl(struct arch_timer_context *timer)
{
	/*
@@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
	return ctl;
}

u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
{
	switch (regid) {
	case KVM_REG_ARM_TIMER_CTL:
		return kvm_arm_timer_read(vcpu,
					  vcpu_vtimer(vcpu), TIMER_REG_CTL);
	case KVM_REG_ARM_TIMER_CNT:
		return kvm_arm_timer_read(vcpu,
					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
	case KVM_REG_ARM_TIMER_CVAL:
		return kvm_arm_timer_read(vcpu,
					  vcpu_vtimer(vcpu), TIMER_REG_CVAL);
	case KVM_REG_ARM_PTIMER_CTL:
		return kvm_arm_timer_read(vcpu,
					  vcpu_ptimer(vcpu), TIMER_REG_CTL);
	case KVM_REG_ARM_PTIMER_CNT:
		return kvm_arm_timer_read(vcpu,
					  vcpu_ptimer(vcpu), TIMER_REG_CNT);
	case KVM_REG_ARM_PTIMER_CVAL:
		return kvm_arm_timer_read(vcpu,
					  vcpu_ptimer(vcpu), TIMER_REG_CVAL);
	}
	return (u64)-1;
}

static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
			      struct arch_timer_context *timer,
			      enum kvm_arch_timer_regs treg)
Loading