Commit 24c12911 authored by Isaku Yamahata's avatar Isaku Yamahata Committed by Paolo Bonzini
Browse files

KVM: TDX: Implement non-NMI interrupt injection



Implement non-NMI interrupt injection for TDX via posted interrupt.

As CPU state is protected and APICv is enabled for the TDX guest, TDX
supports non-NMI interrupt injection only by posted interrupt. Posted
interrupt descriptors (PIDs) are allocated in shared memory, KVM can
update them directly.  If target vCPU is in non-root mode, send posted
interrupt notification to the vCPU and hardware will sync PIR to vIRR
atomically.  Otherwise, kick it to pick up the interrupt from PID. To
post pending interrupts in the PID, KVM can generate a self-IPI with
notification vector prior to TD entry.

Since the guest status of TD vCPU is protected, assume interrupt is
always allowed.  Ignore the code path for event injection mechanism or
LAPIC emulation for TDX.

Signed-off-by: default avatarIsaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: default avatarBinbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: default avatarBinbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Message-ID: <20250222014757.897978-5-binbin.wu@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 254e5dcd
Loading
Loading
Loading
Loading
+85 −9
Original line number Diff line number Diff line
@@ -191,6 +191,34 @@ static int vt_handle_exit(struct kvm_vcpu *vcpu,
	return vmx_handle_exit(vcpu, fastpath);
}

static void vt_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
{
	struct pi_desc *pi = vcpu_to_pi_desc(vcpu);

	pi_clear_on(pi);
	memset(pi->pir, 0, sizeof(pi->pir));
}

static int vt_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu))
		return -1;

	return vmx_sync_pir_to_irr(vcpu);
}

static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
			   int trig_mode, int vector)
{
	if (is_td_vcpu(apic->vcpu)) {
		tdx_deliver_interrupt(apic, delivery_mode, trig_mode,
					     vector);
		return;
	}

	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
}

static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu)) {
@@ -238,6 +266,54 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
}

static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
	if (is_td_vcpu(vcpu))
		return;

	vmx_set_interrupt_shadow(vcpu, mask);
}

static u32 vt_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu))
		return 0;

	return vmx_get_interrupt_shadow(vcpu);
}

static void vt_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
	if (is_td_vcpu(vcpu))
		return;

	vmx_inject_irq(vcpu, reinjected);
}

static void vt_cancel_injection(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu))
		return;

	vmx_cancel_injection(vcpu);
}

static int vt_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
	if (is_td_vcpu(vcpu))
		return true;

	return vmx_interrupt_allowed(vcpu, for_injection);
}

static void vt_enable_irq_window(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu))
		return;

	vmx_enable_irq_window(vcpu);
}

static void vt_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
{
	*intr_info = 0;
@@ -359,19 +435,19 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
	.handle_exit = vt_handle_exit,
	.skip_emulated_instruction = vmx_skip_emulated_instruction,
	.update_emulated_instruction = vmx_update_emulated_instruction,
	.set_interrupt_shadow = vmx_set_interrupt_shadow,
	.get_interrupt_shadow = vmx_get_interrupt_shadow,
	.set_interrupt_shadow = vt_set_interrupt_shadow,
	.get_interrupt_shadow = vt_get_interrupt_shadow,
	.patch_hypercall = vmx_patch_hypercall,
	.inject_irq = vmx_inject_irq,
	.inject_irq = vt_inject_irq,
	.inject_nmi = vmx_inject_nmi,
	.inject_exception = vmx_inject_exception,
	.cancel_injection = vmx_cancel_injection,
	.interrupt_allowed = vmx_interrupt_allowed,
	.cancel_injection = vt_cancel_injection,
	.interrupt_allowed = vt_interrupt_allowed,
	.nmi_allowed = vmx_nmi_allowed,
	.get_nmi_mask = vmx_get_nmi_mask,
	.set_nmi_mask = vmx_set_nmi_mask,
	.enable_nmi_window = vmx_enable_nmi_window,
	.enable_irq_window = vmx_enable_irq_window,
	.enable_irq_window = vt_enable_irq_window,
	.update_cr8_intercept = vmx_update_cr8_intercept,

	.x2apic_icr_is_split = false,
@@ -379,11 +455,11 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
	.load_eoi_exitmap = vmx_load_eoi_exitmap,
	.apicv_pre_state_restore = vmx_apicv_pre_state_restore,
	.apicv_pre_state_restore = vt_apicv_pre_state_restore,
	.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
	.hwapic_isr_update = vmx_hwapic_isr_update,
	.sync_pir_to_irr = vmx_sync_pir_to_irr,
	.deliver_interrupt = vmx_deliver_interrupt,
	.sync_pir_to_irr = vt_sync_pir_to_irr,
	.deliver_interrupt = vt_deliver_interrupt,
	.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,

	.set_tss_addr = vmx_set_tss_addr,
+1 −1
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
 */
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);

static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
	return &(to_vt(vcpu)->pi_desc);
}
+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
#include <linux/bitmap.h>
#include <asm/posted_intr.h>

struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu);

void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
+23 −0
Original line number Diff line number Diff line
@@ -671,6 +671,9 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
	if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE)
		vcpu->arch.xfd_no_write_intercept = true;

	tdx->vt.pi_desc.nv = POSTED_INTR_VECTOR;
	__pi_set_sn(&tdx->vt.pi_desc);

	tdx->state = VCPU_TD_STATE_UNINITIALIZED;

	return 0;
@@ -680,6 +683,7 @@ void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
	struct vcpu_tdx *tdx = to_tdx(vcpu);

	vmx_vcpu_pi_load(vcpu, cpu);
	if (vcpu->cpu == cpu || !is_hkid_assigned(to_kvm_tdx(vcpu->kvm)))
		return;

@@ -965,6 +969,9 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)

	trace_kvm_entry(vcpu, force_immediate_exit);

	if (pi_test_on(&vt->pi_desc))
		apic->send_IPI_self(POSTED_INTR_VECTOR);

	tdx_vcpu_enter_exit(vcpu);

	if (vt->host_debugctlmsr & ~TDX_DEBUGCTL_PRESERVED)
@@ -1628,6 +1635,18 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
	return tdx_sept_drop_private_spte(kvm, gfn, level, page);
}

void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
			   int trig_mode, int vector)
{
	struct kvm_vcpu *vcpu = apic->vcpu;
	struct vcpu_tdx *tdx = to_tdx(vcpu);

	/* TDX supports only posted interrupt.  No lapic emulation. */
	__vmx_deliver_posted_interrupt(vcpu, &tdx->vt.pi_desc, vector);

	trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector);
}

int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
{
	struct vcpu_tdx *tdx = to_tdx(vcpu);
@@ -2571,6 +2590,10 @@ static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
	if (ret)
		return ret;

	td_vmcs_write16(tdx, POSTED_INTR_NV, POSTED_INTR_VECTOR);
	td_vmcs_write64(tdx, POSTED_INTR_DESC_ADDR, __pa(&tdx->vt.pi_desc));
	td_vmcs_setbit32(tdx, PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_POSTED_INTR);

	tdx->state = VCPU_TD_STATE_INITIALIZED;

	return 0;
+0 −8
Original line number Diff line number Diff line
@@ -6904,14 +6904,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
	vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
}

void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
{
	struct vcpu_vt *vt = to_vt(vcpu);

	pi_clear_on(&vt->pi_desc);
	memset(vt->pi_desc.pir, 0, sizeof(vt->pi_desc.pir));
}

void vmx_do_interrupt_irqoff(unsigned long entry);
void vmx_do_nmi_irqoff(void);

Loading