Commit cf04ec39 authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: x86: Dedup AVIC vs. PI code for identifying target vCPU



Hoist the logic for identifying the target vCPU for a posted interrupt
into common x86.  The code is functionally identical between Intel and
AMD.

Tested-by: default avatarSairaj Kodilkar <sarunkod@amd.com>
Link: https://lore.kernel.org/r/20250611224604.313496-30-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 9517aede
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1855,7 +1855,7 @@ struct kvm_x86_ops {

	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
			      unsigned int host_irq, uint32_t guest_irq,
			      struct kvm_kernel_irq_routing_entry *new);
			      struct kvm_vcpu *vcpu, u32 vector);
	void (*pi_start_assignment)(struct kvm *kvm);
	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+39 −6
Original line number Diff line number Diff line
@@ -508,6 +508,42 @@ void kvm_arch_irq_routing_update(struct kvm *kvm)
		kvm_make_scan_ioapic_request(kvm);
}

static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
			      struct kvm_kernel_irq_routing_entry *entry)
{
	struct kvm *kvm = irqfd->kvm;
	struct kvm_vcpu *vcpu = NULL;
	struct kvm_lapic_irq irq;

	if (!irqchip_in_kernel(kvm) ||
	    !kvm_arch_has_irq_bypass() ||
	    !kvm_arch_has_assigned_device(kvm))
		return 0;

	if (entry && entry->type == KVM_IRQ_ROUTING_MSI) {
		kvm_set_msi_irq(kvm, entry, &irq);

		/*
		 * Force remapped mode if hardware doesn't support posting the
		 * virtual interrupt to a vCPU.  Only IRQs are postable (NMIs,
		 * SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
		 * posting multicast/broadcast IRQs.  If the interrupt can't be
		 * posted, the device MSI needs to be routed to the host so that
		 * the guest's desired interrupt can be synthesized by KVM.
		 *
		 * This means that KVM can only post lowest-priority interrupts
		 * if they have a single CPU as the destination, e.g. only if
		 * the guest has affined the interrupt to a single vCPU.
		 */
		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
		    !kvm_irq_is_postable(&irq))
			vcpu = NULL;
	}

	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
					    irqfd->gsi, vcpu, irq.vector);
}

int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
				      struct irq_bypass_producer *prod)
{
@@ -522,8 +558,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
	irqfd->producer = prod;

	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
						   irqfd->gsi, &irqfd->irq_entry);
		ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry);
		if (ret)
			kvm_arch_end_assignment(irqfd->kvm);
	}
@@ -551,8 +586,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
	spin_lock_irq(&kvm->irqfds.lock);

	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
						   irqfd->gsi, NULL);
		ret = kvm_pi_update_irte(irqfd, NULL);
		if (ret)
			pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",
				irqfd->consumer.eventfd, ret);
@@ -568,8 +602,7 @@ int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
				  struct kvm_kernel_irq_routing_entry *old,
				  struct kvm_kernel_irq_routing_entry *new)
{
	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
					    irqfd->gsi, new);
	return kvm_pi_update_irte(irqfd, new);
}

bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
+19 −63
Original line number Diff line number Diff line
@@ -804,52 +804,12 @@ static int svm_ir_list_add(struct vcpu_svm *svm,
	return 0;
}

/*
 * Note:
 * The HW cannot support posting multicast/broadcast
 * interrupts to a vCPU. So, we still use legacy interrupt
 * remapping for these kind of interrupts.
 *
 * For lowest-priority interrupts, we only support
 * those with single CPU as the destination, e.g. user
 * configures the interrupts via /proc/irq or uses
 * irqbalance to make the interrupts single-CPU.
 */
static int
get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
		 struct vcpu_data *vcpu_info, struct kvm_vcpu **vcpu)
{
	struct kvm_lapic_irq irq;
	*vcpu = NULL;

	kvm_set_msi_irq(kvm, e, &irq);

	if (!kvm_intr_is_single_vcpu(kvm, &irq, vcpu) ||
	    !kvm_irq_is_postable(&irq)) {
		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
			 __func__, irq.vector);
		return -1;
	}

	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
		 irq.vector);
	vcpu_info->vector = irq.vector;

	return 0;
}

int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
			unsigned int host_irq, uint32_t guest_irq,
			struct kvm_kernel_irq_routing_entry *new)
			struct kvm_vcpu *vcpu, u32 vector)
{
	bool enable_remapped_mode = true;
	struct vcpu_data vcpu_info;
	struct kvm_vcpu *vcpu = NULL;
	int ret = 0;

	if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
		return 0;

	/*
	 * If the IRQ was affined to a different vCPU, remove the IRTE metadata
	 * from the *previous* vCPU's list.
@@ -857,7 +817,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
	svm_ir_list_del(irqfd);

	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
		 __func__, host_irq, guest_irq, !!new);
		 __func__, host_irq, guest_irq, !!vcpu);

	/**
	 * Here, we setup with legacy mode in the following cases:
@@ -866,23 +826,23 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
	 * 3. APIC virtualization is disabled for the vcpu.
	 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
	 */
	if (new && new->type == KVM_IRQ_ROUTING_MSI &&
	    !get_pi_vcpu_info(kvm, new, &vcpu_info, &vcpu) &&
	    kvm_vcpu_apicv_active(vcpu)) {
		struct amd_iommu_pi_data pi;

		enable_remapped_mode = false;

		vcpu_info.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu));

	if (vcpu && kvm_vcpu_apicv_active(vcpu)) {
		/*
		 * Try to enable guest_mode in IRTE.  Note, the address
		 * of the vCPU's AVIC backing page is passed to the
		 * IOMMU via vcpu_info->pi_desc_addr.
		 */
		pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id);
		pi.is_guest_mode = true;
		pi.vcpu_data = &vcpu_info;
		struct vcpu_data vcpu_info = {
			.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu)),
			.vector = vector,
		};

		struct amd_iommu_pi_data pi = {
			.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id),
			.is_guest_mode = true,
			.vcpu_data = &vcpu_info,
		};

		ret = irq_set_vcpu_affinity(host_irq, &pi);

		/**
@@ -894,12 +854,11 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
		 */
		if (!ret)
			ret = svm_ir_list_add(to_svm(vcpu), irqfd, &pi);
	}

	if (!ret && vcpu) {
		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id,
					 guest_irq, vcpu_info.vector,
					 vcpu_info.pi_desc_addr, !!new);
		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
					 vector, vcpu_info.pi_desc_addr, true);
	} else {
		ret = irq_set_vcpu_affinity(host_irq, NULL);
	}

	if (ret < 0) {
@@ -907,9 +866,6 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
		goto out;
	}

	if (enable_remapped_mode)
		ret = irq_set_vcpu_affinity(host_irq, NULL);
	else
	ret = 0;
out:
	return ret;
+1 −1
Original line number Diff line number Diff line
@@ -747,7 +747,7 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
			unsigned int host_irq, uint32_t guest_irq,
			struct kvm_kernel_irq_routing_entry *new);
			struct kvm_vcpu *vcpu, u32 vector);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
+14 −41
Original line number Diff line number Diff line
@@ -300,46 +300,19 @@ void vmx_pi_start_assignment(struct kvm *kvm)

int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
		       unsigned int host_irq, uint32_t guest_irq,
		       struct kvm_kernel_irq_routing_entry *new)
		       struct kvm_vcpu *vcpu, u32 vector)
{
	struct kvm_lapic_irq irq;
	struct kvm_vcpu *vcpu;
	struct vcpu_data vcpu_info;

	if (!vmx_can_use_vtd_pi(kvm))
		return 0;

	/*
	 * VT-d PI cannot support posting multicast/broadcast
	 * interrupts to a vCPU, we still use interrupt remapping
	 * for these kind of interrupts.
	 *
	 * For lowest-priority interrupts, we only support
	 * those with single CPU as the destination, e.g. user
	 * configures the interrupts via /proc/irq or uses
	 * irqbalance to make the interrupts single-CPU.
	 *
	 * We will support full lowest-priority interrupt later.
	 *
	 * In addition, we can only inject generic interrupts using
	 * the PI mechanism, refuse to route others through it.
	 */
	if (!new || new->type != KVM_IRQ_ROUTING_MSI)
		goto do_remapping;

	kvm_set_msi_irq(kvm, new, &irq);

	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
	    !kvm_irq_is_postable(&irq))
		goto do_remapping;

	vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
	vcpu_info.vector = irq.vector;
	if (vcpu) {
		struct vcpu_data vcpu_info = {
			.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
			.vector = vector,
		};

		trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
					 vcpu_info.vector, vcpu_info.pi_desc_addr, true);

		return irq_set_vcpu_affinity(host_irq, &vcpu_info);
do_remapping:
	} else {
		return irq_set_vcpu_affinity(host_irq, NULL);
	}
}
Loading