Commit 3bf83e47 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio

Pull VFIO fixes from Alex Williamson:

 - Convert vfio-pci BAR resource requests and iomaps initialization
   from a lazy, on-demand model to an eager pre-allocation model to
   avoid races while preserving legacy error behavior.  Fix unchecked
   barmap access in dma-buf export path (Matt Evans)

 - Introduce an implicit unsigned cast in converting vfio-pci device
   offsets to region indexes, closing a potential out-of-bounds
   access through the vfio_pci_ioeventfd() interface (Matt Evans)

 - Fix a dma-buf kref underflow and stuck wait_for_completion() when
   closing a previously revoked dma-buf (Alex Williamson)

* tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio:
  vfio/pci: Check BAR resources before exporting a DMABUF
  vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable()
  vfio/pci: Make VFIO_PCI_OFFSET_TO_INDEX() return unsigned
  vfio/pci: fix dma-buf kref underflow after revoke
parents b0662be9 702809da
Loading
Loading
Loading
Loading
+36 −1
Original line number Diff line number Diff line
@@ -482,6 +482,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
}
#endif /* CONFIG_PM */

/*
 * Eager-request BAR resources, and iomap them.  Soft failures are
 * allowed, and consumers must check the barmap before use in order to
 * give compatible user-visible behaviour with the previous on-demand
 * allocation method.
 */
static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev)
{
	struct pci_dev *pdev = vdev->pdev;
	int i;

	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
		int bar = i + PCI_STD_RESOURCES;

		vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV);

		if (!pci_resource_len(pdev, i))
			continue;

		if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) {
			pci_dbg(pdev, "Failed to reserve region %d\n", bar);
			vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY);
			continue;
		}

		vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
		if (!vdev->barmap[bar]) {
			pci_dbg(pdev, "Failed to iomap region %d\n", bar);
			pci_release_selected_regions(pdev, 1 << bar);
			vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM);
		}
	}
}

/*
 * The pci-driver core runtime PM routines always save the device state
 * before going into suspended state. If the device is going into low power
@@ -568,6 +602,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
		vdev->has_vga = true;

	vfio_pci_core_map_bars(vdev);

	return 0;

@@ -648,7 +683,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)

	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
		bar = i + PCI_STD_RESOURCES;
		if (!vdev->barmap[bar])
		if (IS_ERR_OR_NULL(vdev->barmap[bar]))
			continue;
		pci_iounmap(pdev, vdev->barmap[bar]);
		pci_release_selected_regions(pdev, 1 << bar);
+22 −20
Original line number Diff line number Diff line
@@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
		return -EINVAL;

	/*
	 * For PCI the region_index is the BAR number like everything else.
	 * For PCI the region_index is the BAR number like everything
	 * else.  Check that PCI resources have been claimed for it.
	 */
	if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
	if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
	    vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
		return -ENODEV;

	dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
@@ -354,19 +356,18 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
			if (revoked) {
				kref_put(&priv->kref, vfio_pci_dma_buf_done);
				wait_for_completion(&priv->comp);
			} else {
				/*
				 * Kref is initialize again, because when revoke
				 * was performed the reference counter was decreased
				 * to zero to trigger completion.
				 * Re-arm the registered kref reference and the
				 * completion so the post-revoke state matches the
				 * post-creation state.  An un-revoke followed by a
				 * new mapping needs the kref to be non-zero before
				 * kref_get(), and vfio_pci_dma_buf_cleanup()
				 * delegates its drain back through this revoke
				 * path on a possibly-already-revoked dma-buf.
				 */
				kref_init(&priv->kref);
				/*
				 * There is no need to wait as no mapping was
				 * performed when the previous status was
				 * priv->revoked == true.
				 */
				reinit_completion(&priv->comp);
			} else {
				dma_resv_lock(priv->dmabuf->resv, NULL);
				priv->revoked = false;
				dma_resv_unlock(priv->dmabuf->resv);
@@ -382,21 +383,22 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
	struct vfio_pci_dma_buf *tmp;

	down_write(&vdev->memory_lock);

	/*
	 * Drain any active mappings via the revoke path.  The move is
	 * idempotent for dma-bufs already in the revoked state and
	 * leaves every priv with the kref re-armed and the completion
	 * ready, so cleanup itself does not need to participate in kref
	 * bookkeeping.
	 */
	vfio_pci_dma_buf_move(vdev, true);

	list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
		if (!get_file_active(&priv->dmabuf->file))
			continue;

		dma_resv_lock(priv->dmabuf->resv, NULL);
		list_del_init(&priv->dmabufs_elm);
		priv->vdev = NULL;
		priv->revoked = true;
		dma_buf_invalidate_mappings(priv->dmabuf);
		dma_resv_wait_timeout(priv->dmabuf->resv,
				      DMA_RESV_USAGE_BOOKKEEP, false,
				      MAX_SCHEDULE_TIMEOUT);
		dma_resv_unlock(priv->dmabuf->resv);
		kref_put(&priv->kref, vfio_pci_dma_buf_done);
		wait_for_completion(&priv->comp);
		vfio_device_put_registration(&vdev->vdev);
		fput(priv->dmabuf->file);
	}
+7 −19
Original line number Diff line number Diff line
@@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);

/*
 * The barmap is set up in vfio_pci_core_enable().  Callers use this
 * function to check that the BAR resources are requested or that the
 * pci_iomap() was done.
 */
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
	struct pci_dev *pdev = vdev->pdev;
	int ret;
	void __iomem *io;

	if (vdev->barmap[bar])
		return 0;

	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
	if (ret)
		return ret;

	io = pci_iomap(pdev, bar, 0);
	if (!io) {
		pci_release_selected_regions(pdev, 1 << bar);
		return -ENOMEM;
	}

	vdev->barmap[bar] = io;

	if (IS_ERR(vdev->barmap[bar]))
		return PTR_ERR(vdev->barmap[bar]);
	return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@
#define VFIO_PCI_CORE_H

#define VFIO_PCI_OFFSET_SHIFT   40
#define VFIO_PCI_OFFSET_TO_INDEX(off)	(off >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_TO_INDEX(off)	((u64)(off) >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK	(((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)