Commit 5e21d0c5 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvmarm-fixes-6.14-1' of...

Merge tag 'kvmarm-fixes-6.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.14, take #1

- Correctly clean the BSS to the PoC before allowing EL2 to access it
  on nVHE/hVHE/protected configurations

- Propagate ownership of debug registers in protected mode after
  the rework that landed in 6.14-rc1

- Stop pretending that we can run the protected mode without a GICv3
  being present on the host

- Fix a use-after-free situation that can occur if a vcpu fails to
  initialise the NV shadow S2 MMU contexts

- Always evaluate the need to arm a background timer for fully emulated
  guest timers

- Fix the emulation of EL1 timers in the absence of FEAT_ECV

- Correctly handle the EL2 virtual timer, specially when HCR_EL2.E2H==0
parents 35441cdd 0e459810
Loading
Loading
Loading
Loading
+11 −38
Original line number Diff line number Diff line
@@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx)

	trace_kvm_timer_emulate(ctx, should_fire);

	if (should_fire != ctx->irq.level) {
	if (should_fire != ctx->irq.level)
		kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
		return;
	}

	kvm_timer_update_status(ctx, should_fire);

@@ -761,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
					    timer_irq(map->direct_ptimer),
					    &arch_timer_irq_ops);
		WARN_ON_ONCE(ret);

		/*
		 * The virtual offset behaviour is "interesting", as it
		 * always applies when HCR_EL2.E2H==0, but only when
		 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
		 * track E2H when putting the HV timer in "direct" mode.
		 */
		if (map->direct_vtimer == vcpu_hvtimer(vcpu)) {
			struct arch_timer_offset *offs = &map->direct_vtimer->offset;

			if (vcpu_el2_e2h_is_set(vcpu))
				offs->vcpu_offset = NULL;
			else
				offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2);
		}
	}
}

@@ -976,31 +959,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
	 * which allows trapping of the timer registers even with NV2.
	 * Still, this is still worse than FEAT_NV on its own. Meh.
	 */
	if (!vcpu_el2_e2h_is_set(vcpu)) {
		if (cpus_have_final_cap(ARM64_HAS_ECV))
			return;

		/*
		 * A non-VHE guest hypervisor doesn't have any direct access
		 * to its timers: the EL2 registers trap (and the HW is
		 * fully emulated), while the EL0 registers access memory
		 * despite the access being notionally direct. Boo.
		 *
		 * We update the hardware timer registers with the
		 * latest value written by the guest to the VNCR page
		 * and let the hardware take care of the rest.
		 */
		write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0),  SYS_CNTV_CTL);
		write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL);
		write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0),  SYS_CNTP_CTL);
		write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL);
	} else {
	if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
		/*
		 * For a VHE guest hypervisor, the EL2 state is directly
		 * stored in the host EL1 timers, while the emulated EL0
		 * stored in the host EL1 timers, while the emulated EL1
		 * state is stored in the VNCR page. The latter could have
		 * been updated behind our back, and we must reset the
		 * emulation of the timers.
		 *
		 * A non-VHE guest hypervisor doesn't have any direct access
		 * to its timers: the EL2 registers trap despite being
		 * notionally direct (we use the EL1 HW, as for VHE), while
		 * the EL1 registers access memory.
		 *
		 * In both cases, process the emulated timers on each guest
		 * exit. Boo.
		 */
		struct timer_map map;
		get_timer_map(vcpu, &map);
+20 −0
Original line number Diff line number Diff line
@@ -2290,6 +2290,19 @@ static int __init init_subsystems(void)
		break;
	case -ENODEV:
	case -ENXIO:
		/*
		 * No VGIC? No pKVM for you.
		 *
		 * Protected mode assumes that VGICv3 is present, so no point
		 * in trying to hobble along if vgic initialization fails.
		 */
		if (is_protected_kvm_enabled())
			goto out;

		/*
		 * Otherwise, userspace could choose to implement a GIC for its
		 * guest on non-cooperative hardware.
		 */
		vgic_present = false;
		err = 0;
		break;
@@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void)
	kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
	kvm_nvhe_sym(__icache_flags) = __icache_flags;
	kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;

	/*
	 * Flush entire BSS since part of its data containing init symbols is read
	 * while the MMU is off.
	 */
	kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start),
				kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start));
}

static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
+24 −0
Original line number Diff line number Diff line
@@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}

static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

	hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;

	if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
		hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
	else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
		hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
}

static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
{
	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

	if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
		host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
	else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
		host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
}

static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;

	fpsimd_sve_flush();
	flush_debug_state(hyp_vcpu);

	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;

@@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
	unsigned int i;

	fpsimd_sve_sync(&hyp_vcpu->vcpu);
	sync_debug_state(hyp_vcpu);

	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;

+5 −4
Original line number Diff line number Diff line
@@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
	if (!tmp)
		return -ENOMEM;

	swap(kvm->arch.nested_mmus, tmp);

	/*
	 * If we went through a realocation, adjust the MMU back-pointers in
	 * the previously initialised kvm_pgtable structures.
	 */
	if (kvm->arch.nested_mmus != tmp)
		for (int i = 0; i < kvm->arch.nested_mmus_size; i++)
			tmp[i].pgt->mmu = &tmp[i];
			kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i];

	for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++)
		ret = init_nested_s2_mmu(kvm, &tmp[i]);
		ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]);

	if (ret) {
		for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++)
			kvm_free_stage2_pgd(&tmp[i]);
			kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]);

		return ret;
	}

	kvm->arch.nested_mmus_size = num_mmus;
	kvm->arch.nested_mmus = tmp;

	return 0;
}
+13 −3
Original line number Diff line number Diff line
@@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
	return true;
}

static bool access_hv_timer(struct kvm_vcpu *vcpu,
			    struct sys_reg_params *p,
			    const struct sys_reg_desc *r)
{
	if (!vcpu_el2_e2h_is_set(vcpu))
		return undef_access(vcpu, p, r);

	return access_arch_timer(vcpu, p, r);
}

static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp,
				    s64 new, s64 cur)
{
@@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
	EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0),
	EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0),

	{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer },
	EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0),
	EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0),
	{ SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer },
	EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0),
	EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0),

	{ SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },