Commit b7d88336 authored by Lu Baolu's avatar Lu Baolu Committed by Jason Gunthorpe
Browse files

iommufd: Fault-capable hwpt attach/detach/replace

Add iopf-capable hw page table attach/detach/replace helpers. The pointer
to iommufd_device is stored in the domain attachment handle, so that it
can be echo'ed back in the iopf_group.

The iopf-capable hw page tables can only be attached to devices that
support the IOMMU_DEV_FEAT_IOPF feature. On the first attachment of an
iopf-capable hw_pagetable to the device, the IOPF feature is enabled on
the device. Similarly, after the last iopf-capable hwpt is detached from
the device, the IOPF feature is disabled on the device.

The current implementation allows a replacement between iopf-capable and
non-iopf-capable hw page tables. This matches the nested translation use
case, where a parent domain is attached by default and can then be
replaced with a nested user domain with iopf support.

Link: https://lore.kernel.org/r/20240702063444.105814-8-baolu.lu@linux.intel.com


Signed-off-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 07838f7f
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
	refcount_inc(&idev->obj.users);
	/* igroup refcount moves into iommufd_device */
	idev->igroup = igroup;
	mutex_init(&idev->iopf_lock);

	/*
	 * If the caller fails after this success it must call
@@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
	 * attachment.
	 */
	if (list_empty(&idev->igroup->device_list)) {
		rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
		rc = iommufd_hwpt_attach_device(hwpt, idev);
		if (rc)
			goto err_unresv;
		idev->igroup->hwpt = hwpt;
@@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
	mutex_lock(&idev->igroup->lock);
	list_del(&idev->group_item);
	if (list_empty(&idev->igroup->device_list)) {
		iommu_detach_group(hwpt->domain, idev->igroup->group);
		iommufd_hwpt_detach_device(hwpt, idev);
		idev->igroup->hwpt = NULL;
	}
	if (hwpt_is_paging(hwpt))
@@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev,
			goto err_unlock;
	}

	rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
	rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
	if (rc)
		goto err_unresv;

+190 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/iommufd.h>
#include <linux/pci.h>
#include <linux/poll.h>
#include <linux/anon_inodes.h>
#include <uapi/linux/iommufd.h>
@@ -15,6 +16,195 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"

static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
{
	struct device *dev = idev->dev;
	int ret;

	/*
	 * Once we turn on PCI/PRI support for VF, the response failure code
	 * should not be forwarded to the hardware due to PRI being a shared
	 * resource between PF and VFs. There is no coordination for this
	 * shared capability. This waits for a vPRI reset to recover.
	 */
	if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
		return -EINVAL;

	mutex_lock(&idev->iopf_lock);
	/* Device iopf has already been on. */
	if (++idev->iopf_enabled > 1) {
		mutex_unlock(&idev->iopf_lock);
		return 0;
	}

	ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
	if (ret)
		--idev->iopf_enabled;
	mutex_unlock(&idev->iopf_lock);

	return ret;
}

static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
{
	mutex_lock(&idev->iopf_lock);
	if (!WARN_ON(idev->iopf_enabled == 0)) {
		if (--idev->iopf_enabled == 0)
			iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
	}
	mutex_unlock(&idev->iopf_lock);
}

static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
				     struct iommufd_device *idev)
{
	struct iommufd_attach_handle *handle;
	int ret;

	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
	if (!handle)
		return -ENOMEM;

	handle->idev = idev;
	ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
					&handle->handle);
	if (ret)
		kfree(handle);

	return ret;
}

int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
				    struct iommufd_device *idev)
{
	int ret;

	if (!hwpt->fault)
		return -EINVAL;

	ret = iommufd_fault_iopf_enable(idev);
	if (ret)
		return ret;

	ret = __fault_domain_attach_dev(hwpt, idev);
	if (ret)
		iommufd_fault_iopf_disable(idev);

	return ret;
}

static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
					 struct iommufd_attach_handle *handle)
{
	struct iommufd_fault *fault = hwpt->fault;
	struct iopf_group *group, *next;
	unsigned long index;

	if (!fault)
		return;

	mutex_lock(&fault->mutex);
	list_for_each_entry_safe(group, next, &fault->deliver, node) {
		if (group->attach_handle != &handle->handle)
			continue;
		list_del(&group->node);
		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
		iopf_free_group(group);
	}

	xa_for_each(&fault->response, index, group) {
		if (group->attach_handle != &handle->handle)
			continue;
		xa_erase(&fault->response, index);
		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
		iopf_free_group(group);
	}
	mutex_unlock(&fault->mutex);
}

static struct iommufd_attach_handle *
iommufd_device_get_attach_handle(struct iommufd_device *idev)
{
	struct iommu_attach_handle *handle;

	handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
	if (!handle)
		return NULL;

	return to_iommufd_handle(handle);
}

void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
				     struct iommufd_device *idev)
{
	struct iommufd_attach_handle *handle;

	handle = iommufd_device_get_attach_handle(idev);
	iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
	iommufd_auto_response_faults(hwpt, handle);
	iommufd_fault_iopf_disable(idev);
	kfree(handle);
}

static int __fault_domain_replace_dev(struct iommufd_device *idev,
				      struct iommufd_hw_pagetable *hwpt,
				      struct iommufd_hw_pagetable *old)
{
	struct iommufd_attach_handle *handle, *curr = NULL;
	int ret;

	if (old->fault)
		curr = iommufd_device_get_attach_handle(idev);

	if (hwpt->fault) {
		handle = kzalloc(sizeof(*handle), GFP_KERNEL);
		if (!handle)
			return -ENOMEM;

		handle->handle.domain = hwpt->domain;
		handle->idev = idev;
		ret = iommu_replace_group_handle(idev->igroup->group,
						 hwpt->domain, &handle->handle);
	} else {
		ret = iommu_replace_group_handle(idev->igroup->group,
						 hwpt->domain, NULL);
	}

	if (!ret && curr) {
		iommufd_auto_response_faults(old, curr);
		kfree(curr);
	}

	return ret;
}

int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
				     struct iommufd_hw_pagetable *hwpt,
				     struct iommufd_hw_pagetable *old)
{
	bool iopf_off = !hwpt->fault && old->fault;
	bool iopf_on = hwpt->fault && !old->fault;
	int ret;

	if (iopf_on) {
		ret = iommufd_fault_iopf_enable(idev);
		if (ret)
			return ret;
	}

	ret = __fault_domain_replace_dev(idev, hwpt, old);
	if (ret) {
		if (iopf_on)
			iommufd_fault_iopf_disable(idev);
		return ret;
	}

	if (iopf_off)
		iommufd_fault_iopf_disable(idev);

	return 0;
}

void iommufd_fault_destroy(struct iommufd_object *obj)
{
	struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
+41 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/iommu.h>
#include <linux/iova_bitmap.h>
#include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"

struct iommu_domain;
struct iommu_group;
@@ -293,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommufd_hw_pagetable {
	struct iommufd_object obj;
	struct iommu_domain *domain;
	struct iommufd_fault *fault;
};

struct iommufd_hwpt_paging {
@@ -396,6 +398,9 @@ struct iommufd_device {
	/* always the physical device */
	struct device *dev;
	bool enforce_cache_coherency;
	/* protect iopf_enabled counter */
	struct mutex iopf_lock;
	unsigned int iopf_enabled;
};

static inline struct iommufd_device *
@@ -456,6 +461,42 @@ struct iommufd_attach_handle {
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);

int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
				    struct iommufd_device *idev);
void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
				     struct iommufd_device *idev);
int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
				     struct iommufd_hw_pagetable *hwpt,
				     struct iommufd_hw_pagetable *old);

static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
					     struct iommufd_device *idev)
{
	if (hwpt->fault)
		return iommufd_fault_domain_attach_dev(hwpt, idev);

	return iommu_attach_group(hwpt->domain, idev->igroup->group);
}

static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
					      struct iommufd_device *idev)
{
	if (hwpt->fault)
		iommufd_fault_domain_detach_dev(hwpt, idev);

	iommu_detach_group(hwpt->domain, idev->igroup->group);
}

static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
					      struct iommufd_hw_pagetable *hwpt,
					      struct iommufd_hw_pagetable *old)
{
	if (old->fault || hwpt->fault)
		return iommufd_fault_domain_replace_dev(idev, hwpt, old);

	return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
}

#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);