Commit 8cfc5b60 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Alex Williamson
Browse files

vfio: Replace the iommu notifier with a device list



Instead of bouncing the function call to the driver op through a blocking
notifier just have the iommu layer call it directly.

Register each device that is being attached to the iommu with the lower
driver which then threads them on a linked list and calls the appropriate
driver op at the right time.

Currently the only use is if dma_unmap() is defined.

Also, fully lock all the debugging tests on the pinning path that a
dma_unmap is registered.

Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent ce4b4657
Loading
Loading
Loading
Loading
+10 −31
Original line number Diff line number Diff line
@@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
	struct vfio_iommu_driver *driver, *tmp;

	if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
	if (WARN_ON(!ops->register_device != !ops->unregister_device))
		return -EINVAL;

	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
@@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device)
	up_write(&device->group->group_rwsem);
}

static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
			       void *data)
{
	struct vfio_device *vfio_device =
		container_of(nb, struct vfio_device, iommu_nb);
	struct vfio_iommu_type1_dma_unmap *unmap = data;

	vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
	return NOTIFY_OK;
}

static struct file *vfio_device_open(struct vfio_device *device)
{
	struct vfio_iommu_driver *iommu_driver;
@@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
		}

		iommu_driver = device->group->container->iommu_driver;
		if (device->ops->dma_unmap && iommu_driver &&
		    iommu_driver->ops->register_notifier) {
			unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;

			device->iommu_nb.notifier_call = vfio_iommu_notifier;
			iommu_driver->ops->register_notifier(
				device->group->container->iommu_data, &events,
				&device->iommu_nb);
		}
		if (iommu_driver && iommu_driver->ops->register_device)
			iommu_driver->ops->register_device(
				device->group->container->iommu_data, device);

		up_read(&device->group->group_rwsem);
	}
@@ -1176,11 +1159,9 @@ static struct file *vfio_device_open(struct vfio_device *device)
		device->ops->close_device(device);

		iommu_driver = device->group->container->iommu_driver;
		if (device->ops->dma_unmap && iommu_driver &&
		    iommu_driver->ops->unregister_notifier)
			iommu_driver->ops->unregister_notifier(
				device->group->container->iommu_data,
				&device->iommu_nb);
		if (iommu_driver && iommu_driver->ops->unregister_device)
			iommu_driver->ops->unregister_device(
				device->group->container->iommu_data, device);
	}
err_undo_count:
	up_read(&device->group->group_rwsem);
@@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
		device->ops->close_device(device);

	iommu_driver = device->group->container->iommu_driver;
	if (device->ops->dma_unmap && iommu_driver &&
	    iommu_driver->ops->unregister_notifier)
		iommu_driver->ops->unregister_notifier(
			device->group->container->iommu_data,
			&device->iommu_nb);
	if (iommu_driver && iommu_driver->ops->unregister_device)
		iommu_driver->ops->unregister_device(
			device->group->container->iommu_data, device);
	up_read(&device->group->group_rwsem);
	device->open_count--;
	if (device->open_count == 0)
+4 −8
Original line number Diff line number Diff line
@@ -33,9 +33,6 @@ enum vfio_iommu_notify_type {
	VFIO_IOMMU_CONTAINER_CLOSE = 0,
};

/* events for register_notifier() */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)

/**
 * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
 */
@@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops {
				     unsigned long *phys_pfn);
	int		(*unpin_pages)(void *iommu_data,
				       unsigned long *user_pfn, int npage);
	int		(*register_notifier)(void *iommu_data,
					     unsigned long *events,
					     struct notifier_block *nb);
	int		(*unregister_notifier)(void *iommu_data,
					       struct notifier_block *nb);
	void		(*register_device)(void *iommu_data,
					   struct vfio_device *vdev);
	void		(*unregister_device)(void *iommu_data,
					     struct vfio_device *vdev);
	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
				  void *data, size_t count, bool write);
	struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
+66 −37
Original line number Diff line number Diff line
@@ -67,7 +67,8 @@ struct vfio_iommu {
	struct list_head	iova_list;
	struct mutex		lock;
	struct rb_root		dma_list;
	struct blocking_notifier_head notifier;
	struct list_head	device_list;
	struct mutex		device_list_lock;
	unsigned int		dma_avail;
	unsigned int		vaddr_invalid_count;
	uint64_t		pgsize_bitmap;
@@ -865,8 +866,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
		}
	}

	/* Fail if notifier list is empty */
	if (!iommu->notifier.head) {
	/* Fail if no dma_umap notifier is registered */
	if (list_empty(&iommu->device_list)) {
		ret = -EINVAL;
		goto pin_done;
	}
@@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
	return 0;
}

/*
 * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
 * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
 * pages in response to an invalidation.
 */
static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
				  struct vfio_dma *dma)
{
	struct vfio_device *device;

	if (list_empty(&iommu->device_list))
		return;

	/*
	 * The device is expected to call vfio_unpin_pages() for any IOVA it has
	 * pinned within the range. Since vfio_unpin_pages() will eventually
	 * call back down to this code and try to obtain the iommu->lock we must
	 * drop it.
	 */
	mutex_lock(&iommu->device_list_lock);
	mutex_unlock(&iommu->lock);

	list_for_each_entry(device, &iommu->device_list, iommu_entry)
		device->ops->dma_unmap(device, dma->iova, dma->size);

	mutex_unlock(&iommu->device_list_lock);
	mutex_lock(&iommu->lock);
}

static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
			     struct vfio_iommu_type1_dma_unmap *unmap,
			     struct vfio_bitmap *bitmap)
@@ -1400,8 +1430,6 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
		}

		if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
			struct vfio_iommu_type1_dma_unmap nb_unmap;

			if (dma_last == dma) {
				BUG_ON(++retries > 10);
			} else {
@@ -1409,20 +1437,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
				retries = 0;
			}

			nb_unmap.iova = dma->iova;
			nb_unmap.size = dma->size;

			/*
			 * Notify anyone (mdev vendor drivers) to invalidate and
			 * unmap iovas within the range we're about to unmap.
			 * Vendor drivers MUST unpin pages in response to an
			 * invalidation.
			 */
			mutex_unlock(&iommu->lock);
			blocking_notifier_call_chain(&iommu->notifier,
						    VFIO_IOMMU_NOTIFY_DMA_UNMAP,
						    &nb_unmap);
			mutex_lock(&iommu->lock);
			vfio_notify_dma_unmap(iommu, dma);
			goto again;
		}

@@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,

		if (list_empty(&iommu->emulated_iommu_groups) &&
		    list_empty(&iommu->domain_list)) {
			WARN_ON(iommu->notifier.head);
			WARN_ON(!list_empty(&iommu->device_list));
			vfio_iommu_unmap_unpin_all(iommu);
		}
		goto detach_group_done;
@@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
		if (list_empty(&domain->group_list)) {
			if (list_is_singular(&iommu->domain_list)) {
				if (list_empty(&iommu->emulated_iommu_groups)) {
					WARN_ON(iommu->notifier.head);
					WARN_ON(!list_empty(
						&iommu->device_list));
					vfio_iommu_unmap_unpin_all(iommu);
				} else {
					vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
	iommu->dma_avail = dma_entry_limit;
	iommu->container_open = true;
	mutex_init(&iommu->lock);
	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
	mutex_init(&iommu->device_list_lock);
	INIT_LIST_HEAD(&iommu->device_list);
	init_waitqueue_head(&iommu->vaddr_wait);
	iommu->pgsize_bitmap = PAGE_MASK;
	INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
	}
}

static int vfio_iommu_type1_register_notifier(void *iommu_data,
					      unsigned long *events,
					      struct notifier_block *nb)
static void vfio_iommu_type1_register_device(void *iommu_data,
					     struct vfio_device *vdev)
{
	struct vfio_iommu *iommu = iommu_data;

	/* clear known events */
	*events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;

	/* refuse to register if still events remaining */
	if (*events)
		return -EINVAL;
	if (!vdev->ops->dma_unmap)
		return;

	return blocking_notifier_chain_register(&iommu->notifier, nb);
	/*
	 * list_empty(&iommu->device_list) is tested under the iommu->lock while
	 * iteration for dma_unmap must be done under the device_list_lock.
	 * Holding both locks here allows avoiding the device_list_lock in
	 * several fast paths. See vfio_notify_dma_unmap()
	 */
	mutex_lock(&iommu->lock);
	mutex_lock(&iommu->device_list_lock);
	list_add(&vdev->iommu_entry, &iommu->device_list);
	mutex_unlock(&iommu->device_list_lock);
	mutex_unlock(&iommu->lock);
}

static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
						struct notifier_block *nb)
static void vfio_iommu_type1_unregister_device(void *iommu_data,
					       struct vfio_device *vdev)
{
	struct vfio_iommu *iommu = iommu_data;

	return blocking_notifier_chain_unregister(&iommu->notifier, nb);
	if (!vdev->ops->dma_unmap)
		return;

	mutex_lock(&iommu->lock);
	mutex_lock(&iommu->device_list_lock);
	list_del(&vdev->iommu_entry);
	mutex_unlock(&iommu->device_list_lock);
	mutex_unlock(&iommu->lock);
}

static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
	.detach_group		= vfio_iommu_type1_detach_group,
	.pin_pages		= vfio_iommu_type1_pin_pages,
	.unpin_pages		= vfio_iommu_type1_unpin_pages,
	.register_notifier	= vfio_iommu_type1_register_notifier,
	.unregister_notifier	= vfio_iommu_type1_unregister_notifier,
	.register_device	= vfio_iommu_type1_register_device,
	.unregister_device	= vfio_iommu_type1_unregister_device,
	.dma_rw			= vfio_iommu_type1_dma_rw,
	.group_iommu_domain	= vfio_iommu_type1_group_iommu_domain,
	.notify			= vfio_iommu_type1_notify,
+1 −1
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ struct vfio_device {
	unsigned int open_count;
	struct completion comp;
	struct list_head group_next;
	struct notifier_block iommu_nb;
	struct list_head iommu_entry;
};

/**