Commit 05c5e236 authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: SVM: Track per-vCPU IRTEs using kvm_kernel_irqfd structure

Track the IRTEs that are posting to an SVM vCPU via the associated irqfd
structure and GSI routing instead of dynamically allocating a separate
data structure.  In addition to eliminating an atomic allocation, this
will allow hoisting much of the IRTE update logic to common x86.

Cc: Sairaj Kodilkar <sarunkod@amd.com>
Link: https://lore.kernel.org/r/20250611224604.313496-6-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent cb210737
Loading
Loading
Loading
Loading
+27 −44
Original line number Diff line number Diff line
@@ -76,14 +76,6 @@ static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
bool x2avic_enabled;

/*
 * This is a wrapper of struct amd_iommu_ir_data.
 */
struct amd_svm_iommu_ir {
	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
	void *data;		/* Storing pointer to struct amd_ir_data */
};

static void avic_activate_vmcb(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -747,8 +739,8 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
{
	int ret = 0;
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;
	struct vcpu_svm *svm = to_svm(vcpu);
	struct kvm_kernel_irqfd *irqfd;

	if (!kvm_arch_has_assigned_device(vcpu->kvm))
		return 0;
@@ -762,11 +754,11 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
	if (list_empty(&svm->ir_list))
		goto out;

	list_for_each_entry(ir, &svm->ir_list, node) {
	list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
		if (activate)
			ret = amd_iommu_activate_guest_mode(ir->data);
			ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data);
		else
			ret = amd_iommu_deactivate_guest_mode(ir->data);
			ret = amd_iommu_deactivate_guest_mode(irqfd->irq_bypass_data);
		if (ret)
			break;
	}
@@ -775,27 +767,30 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
	return ret;
}

static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
static void svm_ir_list_del(struct vcpu_svm *svm,
			    struct kvm_kernel_irqfd *irqfd,
			    struct amd_iommu_pi_data *pi)
{
	unsigned long flags;
	struct amd_svm_iommu_ir *cur;
	struct kvm_kernel_irqfd *cur;

	spin_lock_irqsave(&svm->ir_list_lock, flags);
	list_for_each_entry(cur, &svm->ir_list, node) {
		if (cur->data != pi->ir_data)
	list_for_each_entry(cur, &svm->ir_list, vcpu_list) {
		if (cur->irq_bypass_data != pi->ir_data)
			continue;
		if (WARN_ON_ONCE(cur != irqfd))
			continue;
		list_del(&cur->node);
		kfree(cur);
		list_del(&irqfd->vcpu_list);
		break;
	}
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}

static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
static int svm_ir_list_add(struct vcpu_svm *svm,
			   struct kvm_kernel_irqfd *irqfd,
			   struct amd_iommu_pi_data *pi)
{
	int ret = 0;
	unsigned long flags;
	struct amd_svm_iommu_ir *ir;
	u64 entry;

	if (WARN_ON_ONCE(!pi->ir_data))
@@ -812,25 +807,14 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
		struct vcpu_svm *prev_svm;

		if (!prev_vcpu) {
			ret = -EINVAL;
			goto out;
		}
		if (!prev_vcpu)
			return -EINVAL;

		prev_svm = to_svm(prev_vcpu);
		svm_ir_list_del(prev_svm, pi);
		svm_ir_list_del(prev_svm, irqfd, pi);
	}

	/**
	 * Allocating new amd_iommu_pi_data, which will get
	 * add to the per-vcpu ir_list.
	 */
	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
	if (!ir) {
		ret = -ENOMEM;
		goto out;
	}
	ir->data = pi->ir_data;
	irqfd->irq_bypass_data = pi->ir_data;

	spin_lock_irqsave(&svm->ir_list_lock, flags);

@@ -845,10 +829,9 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
		amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
				    true, pi->ir_data);

	list_add(&ir->node, &svm->ir_list);
	list_add(&irqfd->vcpu_list, &svm->ir_list);
	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
	return ret;
	return 0;
}

/*
@@ -952,7 +935,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
			 * scheduling information in IOMMU irte.
			 */
			if (!ret && pi.is_guest_mode)
				svm_ir_list_add(svm, &pi);
				svm_ir_list_add(svm, irqfd, &pi);
		}

		if (!ret && svm) {
@@ -993,7 +976,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,

			vcpu = kvm_get_vcpu_by_id(kvm, id);
			if (vcpu)
				svm_ir_list_del(to_svm(vcpu), &pi);
				svm_ir_list_del(to_svm(vcpu), irqfd, &pi);
		}
	}
out:
@@ -1005,8 +988,8 @@ static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
	int ret = 0;
	struct amd_svm_iommu_ir *ir;
	struct vcpu_svm *svm = to_svm(vcpu);
	struct kvm_kernel_irqfd *irqfd;

	lockdep_assert_held(&svm->ir_list_lock);

@@ -1020,8 +1003,8 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
	if (list_empty(&svm->ir_list))
		return 0;

	list_for_each_entry(ir, &svm->ir_list, node) {
		ret = amd_iommu_update_ga(cpu, r, ir->data);
	list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
		ret = amd_iommu_update_ga(cpu, r, irqfd->irq_bypass_data);
		if (ret)
			return ret;
	}
+6 −4
Original line number Diff line number Diff line
@@ -310,10 +310,12 @@ struct vcpu_svm {
	u64 *avic_physical_id_cache;

	/*
	 * Per-vcpu list of struct amd_svm_iommu_ir:
	 * This is used mainly to store interrupt remapping information used
	 * when update the vcpu affinity. This avoids the need to scan for
	 * IRTE and try to match ga_tag in the IOMMU driver.
	 * Per-vCPU list of irqfds that are eligible to post IRQs directly to
	 * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass).  The list
	 * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
	 * target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
	 * and if the irqfd becomes ineligible for posting (to put the IRTE
	 * back into remapped mode).
	 */
	struct list_head ir_list;
	spinlock_t ir_list_lock;
+3 −0
Original line number Diff line number Diff line
@@ -59,6 +59,9 @@ struct kvm_kernel_irqfd {
	struct work_struct shutdown;
	struct irq_bypass_consumer consumer;
	struct irq_bypass_producer *producer;

	struct list_head vcpu_list;
	void *irq_bypass_data;
};

#endif /* __LINUX_KVM_IRQFD_H */