Commit def054b0 authored by Lu Baolu's avatar Lu Baolu Committed by Joerg Roedel
Browse files

iommu/vt-d: Use device rbtree in iopf reporting path



The existing I/O page fault handler currently locates the PCI device by
calling pci_get_domain_bus_and_slot(). This function searches the list
of all PCI devices until the desired device is found. To improve lookup
efficiency, replace it with device_rbtree_find() to search the device
within the probed device rbtree.

The I/O page fault is initiated by the device, which does not have any
synchronization mechanism with the software to ensure that the device
stays in the probed device tree. Theoretically, a device could be released
by the IOMMU subsystem after device_rbtree_find() and before
iopf_get_dev_fault_param(), which would cause a use-after-free problem.

Add a mutex to synchronize the I/O page fault reporting path and the IOMMU
release device path. This lock doesn't introduce any performance overhead,
as the conflict between I/O page fault reporting and device releasing is
very rare.

Signed-off-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 1a75cc71
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
	iommu->segment = drhd->segment;
	iommu->device_rbtree = RB_ROOT;
	spin_lock_init(&iommu->device_rbtree_lock);
	mutex_init(&iommu->iopf_lock);
	iommu->node = NUMA_NO_NODE;

	ver = readl(iommu->reg + DMAR_VER_REG);
+3 −0
Original line number Diff line number Diff line
@@ -4362,8 +4362,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
	struct device_domain_info *info = dev_iommu_priv_get(dev);
	struct intel_iommu *iommu = info->iommu;

	mutex_lock(&iommu->iopf_lock);
	device_rbtree_remove(info);
	mutex_unlock(&iommu->iopf_lock);
	dmar_remove_one_dev_info(dev);
	intel_pasid_free_table(dev);
	intel_iommu_debugfs_remove_dev(info);
+2 −0
Original line number Diff line number Diff line
@@ -713,6 +713,8 @@ struct intel_iommu {
#endif
	struct iopf_queue *iopf_queue;
	unsigned char iopfq_name[16];
	/* Synchronization between fault report and iommu device release. */
	struct mutex iopf_lock;
	struct q_inval  *qi;            /* Queued invalidation info */
	u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/

+9 −8
Original line number Diff line number Diff line
@@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
	struct intel_iommu *iommu = d;
	struct page_req_dsc *req;
	int head, tail, handled;
	struct pci_dev *pdev;
	struct device *dev;
	u64 address;

	/*
@@ -689,23 +689,24 @@ static irqreturn_t prq_event_thread(int irq, void *d)
		if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
			goto prq_advance;

		pdev = pci_get_domain_bus_and_slot(iommu->segment,
						   PCI_BUS_NUM(req->rid),
						   req->rid & 0xff);
		/*
		 * If prq is to be handled outside iommu driver via receiver of
		 * the fault notifiers, we skip the page response here.
		 */
		if (!pdev)
		mutex_lock(&iommu->iopf_lock);
		dev = device_rbtree_find(iommu, req->rid);
		if (!dev) {
			mutex_unlock(&iommu->iopf_lock);
			goto bad_req;
		}

		if (intel_svm_prq_report(iommu, &pdev->dev, req))
		if (intel_svm_prq_report(iommu, dev, req))
			handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
		else
			trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
			trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
					 req->priv_data[0], req->priv_data[1],
					 iommu->prq_seq_number++);
		pci_dev_put(pdev);
		mutex_unlock(&iommu->iopf_lock);
prq_advance:
		head = (head + sizeof(*req)) & PRQ_RING_MASK;
	}