Commit c93529ad authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull iommufd updates from Jason Gunthorpe:
 "This broadly brings the assigned HW command queue support to iommufd.
  This feature is used to improve SVA performance in VMs by avoiding
  paravirtualization traps during SVA invalidations.

  Along the way I think some of the core logic is in a much better state
  to support future driver backed features.

  Summary:

   - IOMMU HW now has features to directly assign HW command queues to a
     guest VM. In this mode the command queue operates on a limited set
     of invalidation commands that are suitable for improving guest
     invalidation performance and easy for the HW to virtualize.

     This brings the generic infrastructure to allow IOMMU drivers to
     expose such command queues through the iommufd uAPI, mmap the
     doorbell pages, and get the guest physical range for the command
     queue ring itself.

   - An implementation for the NVIDIA SMMUv3 extension "cmdqv" is built
     on the new iommufd command queue features. It works with the
     existing SMMU driver support for cmdqv in guest VMs.

   - Many precursor cleanups and improvements to support the above
     cleanly, changes to the general ioctl and object helpers, driver
     support for VDEVICE, and mmap pgoff cookie infrastructure.

   - Sequence VDEVICE destruction to always happen before VFIO device
     destruction. When using the above type features, and also in future
     confidential compute, the internal virtual device representation
     becomes linked to HW or CC TSM configuration and objects. If a VFIO
     device is removed from iommufd those HW objects should also be
     cleaned up to prevent a sort of UAF. This became important now that
     we have HW backing the VDEVICE.

   - Fix one syzkaller found error related to math overflows during iova
     allocation"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (57 commits)
  iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
  iommu/arm-smmu-v3: Do not bother impl_ops if IOMMU_VIOMMU_TYPE_ARM_SMMUV3
  iommufd: Rename some shortterm-related identifiers
  iommufd/selftest: Add coverage for vdevice tombstone
  iommufd/selftest: Explicitly skip tests for inapplicable variant
  iommufd/vdevice: Remove struct device reference from struct vdevice
  iommufd: Destroy vdevice on idevice destroy
  iommufd: Add a pre_destroy() op for objects
  iommufd: Add iommufd_object_tombstone_user() helper
  iommufd/viommu: Roll back to use iommufd_object_alloc() for vdevice
  iommufd/selftest: Test reserved regions near ULONG_MAX
  iommufd: Prevent ALIGN() overflow
  iommu/tegra241-cmdqv: import IOMMUFD module namespace
  iommufd: Do not allow _iommufd_object_alloc_ucmd if abort op is set
  iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support
  iommu/tegra241-cmdqv: Add user-space use support
  iommu/tegra241-cmdqv: Do not statically map LVCMDQs
  iommu/tegra241-cmdqv: Simplify deinit flow in tegra241_cmdqv_remove_vintf()
  iommu/tegra241-cmdqv: Use request_threaded_irq
  iommu/arm-smmu-v3-iommufd: Add hw_info to impl_ops
  ...
parents 7ce4de1c 2c78e744
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -124,6 +124,17 @@ Following IOMMUFD objects are exposed to userspace:
  used to allocate a vEVENTQ. Each vIOMMU can support multiple types of vEVENTS,
  but is confined to one vEVENTQ per vEVENTQ type.

- IOMMUFD_OBJ_HW_QUEUE, representing a hardware accelerated queue, as a subset
  of IOMMU's virtualization features, for the IOMMU HW to directly read or write
  the virtual queue memory owned by a guest OS. This HW-acceleration feature can
  allow VM to work with the IOMMU HW directly without a VM Exit, so as to reduce
  overhead from the hypercalls. Along with the HW QUEUE object, iommufd provides
  user space an mmap interface for VMM to mmap a physical MMIO region from the
  host physical address space to the guest physical address space, allowing the
  guest OS to directly control the allocated HW QUEUE. Thus, when allocating a
  HW QUEUE, the VMM must request a pair of mmap info (offset/length) and pass in
  exactly to an mmap syscall via its offset and length arguments.

All user-visible objects are destroyed via the IOMMU_DESTROY uAPI.

The diagrams below show relationships between user-visible objects and kernel
@@ -270,6 +281,7 @@ User visible objects are backed by following datastructures:
- iommufd_viommu for IOMMUFD_OBJ_VIOMMU.
- iommufd_vdevice for IOMMUFD_OBJ_VDEVICE.
- iommufd_veventq for IOMMUFD_OBJ_VEVENTQ.
- iommufd_hw_queue for IOMMUFD_OBJ_HW_QUEUE.

Several terminologies when looking at these datastructures:

+44 −26
Original line number Diff line number Diff line
@@ -7,13 +7,22 @@

#include "arm-smmu-v3.h"

void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
void *arm_smmu_hw_info(struct device *dev, u32 *length,
		       enum iommu_hw_info_type *type)
{
	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
	const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops;
	struct iommu_hw_info_arm_smmuv3 *info;
	u32 __iomem *base_idr;
	unsigned int i;

	if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
	    *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
		if (!impl_ops || !impl_ops->hw_info)
			return ERR_PTR(-EOPNOTSUPP);
		return impl_ops->hw_info(master->smmu, length, type);
	}

	info = kzalloc(sizeof(*info), GFP_KERNEL);
	if (!info)
		return ERR_PTR(-ENOMEM);
@@ -216,7 +225,7 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
	return 0;
}

static struct iommu_domain *
struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
			      const struct iommu_user_data *user_data)
{
@@ -327,7 +336,7 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
	return 0;
}

static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
			       struct iommu_user_data_array *array)
{
	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
@@ -382,25 +391,14 @@ static const struct iommufd_viommu_ops arm_vsmmu_ops = {
	.cache_invalidate = arm_vsmmu_cache_invalidate,
};

struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
				       struct iommu_domain *parent,
				       struct iommufd_ctx *ictx,
				       unsigned int viommu_type)
size_t arm_smmu_get_viommu_size(struct device *dev,
				enum iommu_viommu_type viommu_type)
{
	struct arm_smmu_device *smmu =
		iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
	struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
	struct arm_vsmmu *vsmmu;

	if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
		return ERR_PTR(-EOPNOTSUPP);
	struct arm_smmu_device *smmu = master->smmu;

	if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
		return ERR_PTR(-EOPNOTSUPP);

	if (s2_parent->smmu != master->smmu)
		return ERR_PTR(-EINVAL);
		return 0;

	/*
	 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
@@ -408,7 +406,7 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
	 * any change to remove this.
	 */
	if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
		return ERR_PTR(-EOPNOTSUPP);
		return 0;

	/*
	 * Must support some way to prevent the VM from bypassing the cache
@@ -420,19 +418,39 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
	 */
	if (!arm_smmu_master_canwbs(master) &&
	    !(smmu->features & ARM_SMMU_FEAT_S2FWB))
		return ERR_PTR(-EOPNOTSUPP);
		return 0;

	vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
				     &arm_vsmmu_ops);
	if (IS_ERR(vsmmu))
		return ERR_CAST(vsmmu);
	if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
		return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);

	if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
		return 0;
	return smmu->impl_ops->get_viommu_size(viommu_type);
}

int arm_vsmmu_init(struct iommufd_viommu *viommu,
		   struct iommu_domain *parent_domain,
		   const struct iommu_user_data *user_data)
{
	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
	struct arm_smmu_device *smmu =
		container_of(viommu->iommu_dev, struct arm_smmu_device, iommu);
	struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain);

	if (s2_parent->smmu != smmu)
		return -EINVAL;

	vsmmu->smmu = smmu;
	vsmmu->s2_parent = s2_parent;
	/* FIXME Move VMID allocation from the S2 domain allocation to here */
	vsmmu->vmid = s2_parent->s2_cfg.vmid;

	return &vsmmu->core;
	if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
		viommu->ops = &arm_vsmmu_ops;
		return 0;
	}

	return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
}

int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
+16 −1
Original line number Diff line number Diff line
@@ -3689,7 +3689,8 @@ static const struct iommu_ops arm_smmu_ops = {
	.get_resv_regions	= arm_smmu_get_resv_regions,
	.page_response		= arm_smmu_page_response,
	.def_domain_type	= arm_smmu_def_domain_type,
	.viommu_alloc		= arm_vsmmu_alloc,
	.get_viommu_size	= arm_smmu_get_viommu_size,
	.viommu_init		= arm_vsmmu_init,
	.user_pasid_table	= 1,
	.owner			= THIS_MODULE,
	.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -4700,6 +4701,7 @@ static void arm_smmu_impl_remove(void *data)
static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
{
	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
	const struct arm_smmu_impl_ops *ops;
	int ret;

	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
@@ -4710,11 +4712,24 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
	if (IS_ERR(new_smmu))
		return new_smmu;

	ops = new_smmu->impl_ops;
	if (ops) {
		/* get_viommu_size and vsmmu_init ops must be paired */
		if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
			ret = -EINVAL;
			goto err_remove;
		}
	}

	ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
				       new_smmu);
	if (ret)
		return ERR_PTR(ret);
	return new_smmu;

err_remove:
	arm_smmu_impl_remove(new_smmu);
	return ERR_PTR(ret);
}

static int arm_smmu_device_probe(struct platform_device *pdev)
+27 −6
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/sizes.h>

struct arm_smmu_device;
struct arm_vsmmu;

/* MMIO registers */
#define ARM_SMMU_IDR0			0x0
@@ -721,6 +722,16 @@ struct arm_smmu_impl_ops {
	int (*init_structures)(struct arm_smmu_device *smmu);
	struct arm_smmu_cmdq *(*get_secondary_cmdq)(
		struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
	/*
	 * An implementation should define its own type other than the default
	 * IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type
	 * to return its own structure.
	 */
	void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length,
			 enum iommu_hw_info_type *type);
	size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type);
	int (*vsmmu_init)(struct arm_vsmmu *vsmmu,
			  const struct iommu_user_data *user_data);
};

/* An SMMUv3 instance */
@@ -1035,19 +1046,29 @@ struct arm_vsmmu {
};

#if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD)
void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type);
struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
				       struct iommu_domain *parent,
				       struct iommufd_ctx *ictx,
				       unsigned int viommu_type);
void *arm_smmu_hw_info(struct device *dev, u32 *length,
		       enum iommu_hw_info_type *type);
size_t arm_smmu_get_viommu_size(struct device *dev,
				enum iommu_viommu_type viommu_type);
int arm_vsmmu_init(struct iommufd_viommu *viommu,
		   struct iommu_domain *parent_domain,
		   const struct iommu_user_data *user_data);
int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
				    struct arm_smmu_nested_domain *nested_domain);
void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
			      const struct iommu_user_data *user_data);
int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
			       struct iommu_user_data_array *array);
#else
#define arm_smmu_get_viommu_size NULL
#define arm_smmu_hw_info NULL
#define arm_vsmmu_alloc NULL
#define arm_vsmmu_init NULL
#define arm_vsmmu_alloc_domain_nested NULL
#define arm_vsmmu_cache_invalidate NULL

static inline int
arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+474 −19

File changed.

Preview size limit exceeded, changes collapsed.

Loading