Commit b5c58b2f authored by Leon Romanovsky's avatar Leon Romanovsky Committed by Christoph Hellwig
Browse files

dma-mapping: direct calls for dma-iommu



Directly call into dma-iommu just like we have been doing for dma-direct
for a while.  This avoids the indirect call overhead for IOMMU ops and
removes the need to have DMA ops entirely for many common configurations.

Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Acked-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: default avatarRobin Murphy <robin.murphy@arm.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent f69e342e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11722,6 +11722,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
F:	drivers/iommu/dma-iommu.c
F:	drivers/iommu/dma-iommu.h
F:	drivers/iommu/iova.c
F:	include/linux/iommu-dma.h
F:	include/linux/iova.h
IOMMU SUBSYSTEM
+1 −1
Original line number Diff line number Diff line
@@ -151,7 +151,7 @@ config OF_IOMMU
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
	def_bool ARM64 || X86 || S390
	select DMA_OPS
	select DMA_OPS_HELPERS
	select IOMMU_API
	select IOMMU_IOVA
	select IRQ_MSI_IOMMU
+36 −68
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
#include <linux/iommu-dma.h>
#include <linux/iova.h>
#include <linux/irq.h>
#include <linux/list_sort.h>
@@ -1037,9 +1038,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
	return NULL;
}

static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
		size_t size, enum dma_data_direction dir, gfp_t gfp,
		unsigned long attrs)
struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size,
	       enum dma_data_direction dir, gfp_t gfp, unsigned long attrs)
{
	struct dma_sgt_handle *sh;

@@ -1055,7 +1055,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
	return &sh->sgt;
}

static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
		struct sg_table *sgt, enum dma_data_direction dir)
{
	struct dma_sgt_handle *sh = sgt_handle(sgt);
@@ -1066,8 +1066,8 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
	kfree(sh);
}

static void iommu_dma_sync_single_for_cpu(struct device *dev,
		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
		size_t size, enum dma_data_direction dir)
{
	phys_addr_t phys;

@@ -1081,8 +1081,8 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
	swiotlb_sync_single_for_cpu(dev, phys, size, dir);
}

static void iommu_dma_sync_single_for_device(struct device *dev,
		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
		size_t size, enum dma_data_direction dir)
{
	phys_addr_t phys;

@@ -1096,9 +1096,8 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
		arch_sync_dma_for_device(phys, size, dir);
}

static void iommu_dma_sync_sg_for_cpu(struct device *dev,
		struct scatterlist *sgl, int nelems,
		enum dma_data_direction dir)
void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
		int nelems, enum dma_data_direction dir)
{
	struct scatterlist *sg;
	int i;
@@ -1112,9 +1111,8 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
}

static void iommu_dma_sync_sg_for_device(struct device *dev,
		struct scatterlist *sgl, int nelems,
		enum dma_data_direction dir)
void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
		int nelems, enum dma_data_direction dir)
{
	struct scatterlist *sg;
	int i;
@@ -1129,7 +1127,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
			arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
}

static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
	      unsigned long offset, size_t size, enum dma_data_direction dir,
	      unsigned long attrs)
{
@@ -1189,7 +1187,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
	return iova;
}

static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -1342,8 +1340,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 * impedance-matching, to be able to hand off a suitably-aligned list,
 * but still preserve the original offsets and sizes for the caller.
 */
static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
		int nents, enum dma_data_direction dir, unsigned long attrs)
int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
		enum dma_data_direction dir, unsigned long attrs)
{
	struct iommu_domain *domain = iommu_get_dma_domain(dev);
	struct iommu_dma_cookie *cookie = domain->iova_cookie;
@@ -1462,8 +1460,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
	return ret;
}

static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
		int nents, enum dma_data_direction dir, unsigned long attrs)
void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
		enum dma_data_direction dir, unsigned long attrs)
{
	dma_addr_t end = 0, start;
	struct scatterlist *tmp;
@@ -1512,7 +1510,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
		__iommu_dma_unmap(dev, start, end - start);
}

static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	return __iommu_dma_map(dev, phys, size,
@@ -1520,7 +1518,7 @@ static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
			dma_get_mask(dev));
}

static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	__iommu_dma_unmap(dev, handle, size);
@@ -1557,7 +1555,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
		dma_free_contiguous(dev, page, alloc_size);
}

static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr,
		dma_addr_t handle, unsigned long attrs)
{
	__iommu_dma_unmap(dev, handle, size);
@@ -1601,8 +1599,8 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
	return NULL;
}

static void *iommu_dma_alloc(struct device *dev, size_t size,
		dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
		gfp_t gfp, unsigned long attrs)
{
	bool coherent = dev_is_dma_coherent(dev);
	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
@@ -1635,7 +1633,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
	return cpu_addr;
}

static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
		void *cpu_addr, dma_addr_t dma_addr, size_t size,
		unsigned long attrs)
{
@@ -1666,7 +1664,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
			       vma->vm_page_prot);
}

static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
		void *cpu_addr, dma_addr_t dma_addr, size_t size,
		unsigned long attrs)
{
@@ -1693,19 +1691,19 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
	return ret;
}

static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
unsigned long iommu_dma_get_merge_boundary(struct device *dev)
{
	struct iommu_domain *domain = iommu_get_dma_domain(dev);

	return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
}

static size_t iommu_dma_opt_mapping_size(void)
size_t iommu_dma_opt_mapping_size(void)
{
	return iova_rcache_range();
}

static size_t iommu_dma_max_mapping_size(struct device *dev)
size_t iommu_dma_max_mapping_size(struct device *dev)
{
	if (dev_is_untrusted(dev))
		return swiotlb_max_mapping_size(dev);
@@ -1713,32 +1711,6 @@ static size_t iommu_dma_max_mapping_size(struct device *dev)
	return SIZE_MAX;
}

static const struct dma_map_ops iommu_dma_ops = {
	.flags			= DMA_F_PCI_P2PDMA_SUPPORTED |
				  DMA_F_CAN_SKIP_SYNC,
	.alloc			= iommu_dma_alloc,
	.free			= iommu_dma_free,
	.alloc_pages_op		= dma_common_alloc_pages,
	.free_pages		= dma_common_free_pages,
	.alloc_noncontiguous	= iommu_dma_alloc_noncontiguous,
	.free_noncontiguous	= iommu_dma_free_noncontiguous,
	.mmap			= iommu_dma_mmap,
	.get_sgtable		= iommu_dma_get_sgtable,
	.map_page		= iommu_dma_map_page,
	.unmap_page		= iommu_dma_unmap_page,
	.map_sg			= iommu_dma_map_sg,
	.unmap_sg		= iommu_dma_unmap_sg,
	.sync_single_for_cpu	= iommu_dma_sync_single_for_cpu,
	.sync_single_for_device	= iommu_dma_sync_single_for_device,
	.sync_sg_for_cpu	= iommu_dma_sync_sg_for_cpu,
	.sync_sg_for_device	= iommu_dma_sync_sg_for_device,
	.map_resource		= iommu_dma_map_resource,
	.unmap_resource		= iommu_dma_unmap_resource,
	.get_merge_boundary	= iommu_dma_get_merge_boundary,
	.opt_mapping_size	= iommu_dma_opt_mapping_size,
	.max_mapping_size       = iommu_dma_max_mapping_size,
};

void iommu_setup_dma_ops(struct device *dev)
{
	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -1746,19 +1718,15 @@ void iommu_setup_dma_ops(struct device *dev)
	if (dev_is_pci(dev))
		dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;

	if (iommu_is_dma_domain(domain)) {
		if (iommu_dma_init_domain(domain, dev))
	dev->dma_iommu = iommu_is_dma_domain(domain);
	if (dev->dma_iommu && iommu_dma_init_domain(domain, dev))
		goto out_err;
		dev->dma_ops = &iommu_dma_ops;
	} else if (dev->dma_ops == &iommu_dma_ops) {
		/* Clean up if we've switched *from* a DMA domain */
		dev->dma_ops = NULL;
	}

	return;
out_err:
	pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
		dev_name(dev));
	dev->dma_iommu = false;
}

static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
+0 −1
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@ config DMAR_DEBUG
config INTEL_IOMMU
	bool "Support for Intel IOMMU using DMA Remapping Devices"
	depends on PCI_MSI && ACPI && X86
	select DMA_OPS
	select IOMMU_API
	select IOMMU_IOVA
	select IOMMUFD_DRIVER if IOMMUFD
+5 −0
Original line number Diff line number Diff line
@@ -707,6 +707,8 @@ struct device_physical_location {
 *		for dma allocations.  This flag is managed by the dma ops
 *		instance from ->dma_supported.
 * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers.
 * @dma_iommu: Device is using default IOMMU implementation for DMA and
 *		doesn't rely on dma_ops structure.
 *
 * At the lowest level, every device in a Linux system is represented by an
 * instance of struct device. The device structure contains the information
@@ -822,6 +824,9 @@ struct device {
#ifdef CONFIG_DMA_NEED_SYNC
	bool			dma_skip_sync:1;
#endif
#ifdef CONFIG_IOMMU_DMA
	bool			dma_iommu:1;
#endif
};

/**
Loading