Commit 056daec2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull iommufd updates from Jason Gunthorpe:
 "This is a pretty consequential cycle for iommufd, though this pull is
  not too big. It is based on a shared branch with VFIO that introduces
  VFIO_DEVICE_FEATURE_DMA_BUF a DMABUF exporter for VFIO device's MMIO
  PCI BARs. This was a large multiple series journey over the last year
  and a half.

  Based on that work IOMMUFD gains support for VFIO DMABUF's in its
  existing IOMMU_IOAS_MAP_FILE, which closes the last major gap to
  support PCI peer to peer transfers within VMs.

  In Joerg's iommu tree we have the "generic page table" work which aims
  to consolidate all the duplicated page table code in every iommu
  driver into a single algorithm. This will be used by iommufd to
  implement unique page table operations to start adding new features
  and improve performance.

  In here:

   - Expand IOMMU_IOAS_MAP_FILE to accept a DMABUF exported from VFIO.
     This is the first step to broader DMABUF support in iommufd, right
     now it only works with VFIO. This closes the last functional gap
     with classic VFIO type 1 to safely support PCI peer to peer DMA by
     mapping the VFIO device's MMIO into the IOMMU.

   - Relax SMMUv3 restrictions on nesting domains to better support
     qemu's sequence to have an identity mapping before the vSID is
     established"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd:
  iommu/arm-smmu-v3-iommufd: Allow attaching nested domain for GBPA cases
  iommufd/selftest: Add some tests for the dmabuf flow
  iommufd: Accept a DMABUF through IOMMU_IOAS_MAP_FILE
  iommufd: Have iopt_map_file_pages convert the fd to a file
  iommufd: Have pfn_reader process DMABUF iopt_pages
  iommufd: Allow MMIO pages in a batch
  iommufd: Allow a DMABUF to be revoked
  iommufd: Do not map/unmap revoked DMABUFs
  iommufd: Add DMABUF to iopt_pages
  vfio/pci: Add vfio_pci_dma_buf_iommufd_map()
parents a3ebb59e 5185c4d8
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -99,6 +99,8 @@ static void arm_smmu_make_nested_domain_ste(
int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
				    struct arm_smmu_nested_domain *nested_domain)
{
	unsigned int cfg =
		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
	struct arm_smmu_vmaster *vmaster;
	unsigned long vsid;
	int ret;
@@ -107,8 +109,17 @@ int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,

	ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
					 state->master->dev, &vsid);
	if (ret)
	/*
	 * Attaching to a translate nested domain must allocate a vDEVICE prior,
	 * as CD/ATS invalidations and vevents require a vSID to work properly.
	 * A abort/bypass domain is allowed to attach w/o vmaster for GBPA case.
	 */
	if (ret) {
		if (cfg == STRTAB_STE_0_CFG_ABORT ||
		    cfg == STRTAB_STE_0_CFG_BYPASS)
			return 0;
		return ret;
	}

	vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
	if (!vmaster)
+65 −13
Original line number Diff line number Diff line
@@ -8,8 +8,10 @@
 * The datastructure uses the iopt_pages to optimize the storage of the PFNs
 * between the domains and xarray.
 */
#include <linux/dma-buf.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/iommu.h>
#include <linux/iommufd.h>
#include <linux/lockdep.h>
@@ -284,6 +286,9 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt,
		case IOPT_ADDRESS_FILE:
			start = elm->start_byte + elm->pages->start;
			break;
		case IOPT_ADDRESS_DMABUF:
			start = elm->start_byte + elm->pages->dmabuf.start;
			break;
		}
		rc = iopt_alloc_iova(iopt, dst_iova, start, length);
		if (rc)
@@ -468,25 +473,53 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
 * @iopt: io_pagetable to act on
 * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
 *        the chosen iova on output. Otherwise is the iova to map to on input
 * @file: file to map
 * @fd: fdno of a file to map
 * @start: map file starting at this byte offset
 * @length: Number of bytes to map
 * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
 * @flags: IOPT_ALLOC_IOVA or zero
 */
int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
			unsigned long *iova, struct file *file,
			unsigned long start, unsigned long length,
			int iommu_prot, unsigned int flags)
			unsigned long *iova, int fd, unsigned long start,
			unsigned long length, int iommu_prot,
			unsigned int flags)
{
	struct iopt_pages *pages;
	struct dma_buf *dmabuf;
	unsigned long start_byte;
	unsigned long last;

	if (!length)
		return -EINVAL;
	if (check_add_overflow(start, length - 1, &last))
		return -EOVERFLOW;

	start_byte = start - ALIGN_DOWN(start, PAGE_SIZE);
	dmabuf = dma_buf_get(fd);
	if (!IS_ERR(dmabuf)) {
		pages = iopt_alloc_dmabuf_pages(ictx, dmabuf, start_byte, start,
						length,
						iommu_prot & IOMMU_WRITE);
		if (IS_ERR(pages)) {
			dma_buf_put(dmabuf);
			return PTR_ERR(pages);
		}
	} else {
		struct file *file;

		file = fget(fd);
		if (!file)
			return -EBADF;

	pages = iopt_alloc_file_pages(file, start, length,
		pages = iopt_alloc_file_pages(file, start_byte, start, length,
					      iommu_prot & IOMMU_WRITE);
		fput(file);
		if (IS_ERR(pages))
			return PTR_ERR(pages);
	}

	return iopt_map_common(ictx, iopt, pages, iova, length,
			       start - pages->start, iommu_prot, flags);
			       start_byte, iommu_prot, flags);
}

struct iova_bitmap_fn_arg {
@@ -961,8 +994,14 @@ static void iopt_unfill_domain(struct io_pagetable *iopt,
				WARN_ON(!area->storage_domain);
			if (area->storage_domain == domain)
				area->storage_domain = storage_domain;
			if (iopt_is_dmabuf(pages)) {
				if (!iopt_dmabuf_revoked(pages))
					iopt_area_unmap_domain(area, domain);
				iopt_dmabuf_untrack_domain(pages, area, domain);
			}
			mutex_unlock(&pages->mutex);

			if (!iopt_is_dmabuf(pages))
				iopt_area_unmap_domain(area, domain);
		}
		return;
@@ -980,6 +1019,8 @@ static void iopt_unfill_domain(struct io_pagetable *iopt,
		WARN_ON(area->storage_domain != domain);
		area->storage_domain = NULL;
		iopt_area_unfill_domain(area, pages, domain);
		if (iopt_is_dmabuf(pages))
			iopt_dmabuf_untrack_domain(pages, area, domain);
		mutex_unlock(&pages->mutex);
	}
}
@@ -1009,10 +1050,16 @@ static int iopt_fill_domain(struct io_pagetable *iopt,
		if (!pages)
			continue;

		mutex_lock(&pages->mutex);
		guard(mutex)(&pages->mutex);
		if (iopt_is_dmabuf(pages)) {
			rc = iopt_dmabuf_track_domain(pages, area, domain);
			if (rc)
				goto out_unfill;
		}
		rc = iopt_area_fill_domain(area, domain);
		if (rc) {
			mutex_unlock(&pages->mutex);
			if (iopt_is_dmabuf(pages))
				iopt_dmabuf_untrack_domain(pages, area, domain);
			goto out_unfill;
		}
		if (!area->storage_domain) {
@@ -1021,7 +1068,6 @@ static int iopt_fill_domain(struct io_pagetable *iopt,
			interval_tree_insert(&area->pages_node,
					     &pages->domains_itree);
		}
		mutex_unlock(&pages->mutex);
	}
	return 0;

@@ -1042,6 +1088,8 @@ static int iopt_fill_domain(struct io_pagetable *iopt,
			area->storage_domain = NULL;
		}
		iopt_area_unfill_domain(area, pages, domain);
		if (iopt_is_dmabuf(pages))
			iopt_dmabuf_untrack_domain(pages, area, domain);
		mutex_unlock(&pages->mutex);
	}
	return rc;
@@ -1252,6 +1300,10 @@ static int iopt_area_split(struct iopt_area *area, unsigned long iova)
	if (!pages || area->prevent_access)
		return -EBUSY;

	/* Maintaining the domains_itree below is a bit complicated */
	if (iopt_is_dmabuf(pages))
		return -EOPNOTSUPP;

	if (new_start & (alignment - 1) ||
	    iopt_area_start_byte(area, new_start) & (alignment - 1))
		return -EINVAL;
+52 −2
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#ifndef __IO_PAGETABLE_H
#define __IO_PAGETABLE_H

#include <linux/dma-buf.h>
#include <linux/interval_tree.h>
#include <linux/kref.h>
#include <linux/mutex.h>
@@ -69,6 +70,16 @@ void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
void iopt_area_unmap_domain(struct iopt_area *area,
			    struct iommu_domain *domain);

int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area,
			     struct iommu_domain *domain);
void iopt_dmabuf_untrack_domain(struct iopt_pages *pages,
				struct iopt_area *area,
				struct iommu_domain *domain);
int iopt_dmabuf_track_all_domains(struct iopt_area *area,
				  struct iopt_pages *pages);
void iopt_dmabuf_untrack_all_domains(struct iopt_area *area,
				     struct iopt_pages *pages);

static inline unsigned long iopt_area_index(struct iopt_area *area)
{
	return area->pages_node.start;
@@ -179,7 +190,22 @@ enum {

enum iopt_address_type {
	IOPT_ADDRESS_USER = 0,
	IOPT_ADDRESS_FILE = 1,
	IOPT_ADDRESS_FILE,
	IOPT_ADDRESS_DMABUF,
};

struct iopt_pages_dmabuf_track {
	struct iommu_domain *domain;
	struct iopt_area *area;
	struct list_head elm;
};

struct iopt_pages_dmabuf {
	struct dma_buf_attachment *attach;
	struct dma_buf_phys_vec phys;
	/* Always PAGE_SIZE aligned */
	unsigned long start;
	struct list_head tracker;
};

/*
@@ -209,6 +235,8 @@ struct iopt_pages {
			struct file *file;
			unsigned long start;
		};
		/* IOPT_ADDRESS_DMABUF */
		struct iopt_pages_dmabuf dmabuf;
	};
	bool writable:1;
	u8 account_mode;
@@ -220,9 +248,31 @@ struct iopt_pages {
	struct rb_root_cached domains_itree;
};

static inline bool iopt_is_dmabuf(struct iopt_pages *pages)
{
	if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
		return false;
	return pages->type == IOPT_ADDRESS_DMABUF;
}

static inline bool iopt_dmabuf_revoked(struct iopt_pages *pages)
{
	lockdep_assert_held(&pages->mutex);
	if (iopt_is_dmabuf(pages))
		return pages->dmabuf.phys.len == 0;
	return false;
}

struct iopt_pages *iopt_alloc_user_pages(void __user *uptr,
					 unsigned long length, bool writable);
struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start,
struct iopt_pages *iopt_alloc_file_pages(struct file *file,
					 unsigned long start_byte,
					 unsigned long start,
					 unsigned long length, bool writable);
struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx,
					   struct dma_buf *dmabuf,
					   unsigned long start_byte,
					   unsigned long start,
					   unsigned long length, bool writable);
void iopt_release_pages(struct kref *kref);
static inline void iopt_put_pages(struct iopt_pages *pages)
+1 −7
Original line number Diff line number Diff line
@@ -207,7 +207,6 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd)
	unsigned long iova = cmd->iova;
	struct iommufd_ioas *ioas;
	unsigned int flags = 0;
	struct file *file;
	int rc;

	if (cmd->flags &
@@ -229,11 +228,7 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd)
	if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
		flags = IOPT_ALLOC_IOVA;

	file = fget(cmd->fd);
	if (!file)
		return -EBADF;

	rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, file,
	rc = iopt_map_file_pages(ucmd->ictx, &ioas->iopt, &iova, cmd->fd,
				 cmd->start, cmd->length,
				 conv_iommu_prot(cmd->flags), flags);
	if (rc)
@@ -243,7 +238,6 @@ int iommufd_ioas_map_file(struct iommufd_ucmd *ucmd)
	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_put:
	iommufd_put_object(ucmd->ictx, &ioas->obj);
	fput(file);
	return rc;
}

+13 −1
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ struct iommu_domain;
struct iommu_group;
struct iommu_option;
struct iommufd_device;
struct dma_buf_attachment;
struct dma_buf_phys_vec;

struct iommufd_sw_msi_map {
	struct list_head sw_msi_item;
@@ -108,7 +110,7 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
			unsigned long length, int iommu_prot,
			unsigned int flags);
int iopt_map_file_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
			unsigned long *iova, struct file *file,
			unsigned long *iova, int fd,
			unsigned long start, unsigned long length,
			int iommu_prot, unsigned int flags);
int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
@@ -504,6 +506,8 @@ void iommufd_device_pre_destroy(struct iommufd_object *obj);
void iommufd_device_destroy(struct iommufd_object *obj);
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);

struct device *iommufd_global_device(void);

struct iommufd_access {
	struct iommufd_object obj;
	struct iommufd_ctx *ictx;
@@ -713,6 +717,8 @@ bool iommufd_should_fail(void);
int __init iommufd_test_init(void);
void iommufd_test_exit(void);
bool iommufd_selftest_is_mock_dev(struct device *dev);
int iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
				     struct dma_buf_phys_vec *phys);
#else
static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
						 unsigned int ioas_id,
@@ -734,5 +740,11 @@ static inline bool iommufd_selftest_is_mock_dev(struct device *dev)
{
	return false;
}
static inline int
iommufd_test_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
				 struct dma_buf_phys_vec *phys)
{
	return -EOPNOTSUPP;
}
#endif
#endif
Loading