Commit 954b7207 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'dma-mapping-5.13' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig:

 - add a new dma_alloc_noncontiguous API (me, Ricardo Ribalda)

 - fix a copyright notice (Hao Fang)

 - add an unlikely annotation to dma_mapping_error (Heiner Kallweit)

 - remove a pointless empty line (Wang Qing)

 - add support for multi-pages map/unmap bencharking (Xiang Chen)

* tag 'dma-mapping-5.13' of git://git.infradead.org/users/hch/dma-mapping:
  dma-mapping: add unlikely hint to error path in dma_mapping_error
  dma-mapping: benchmark: Add support for multi-pages map/unmap
  dma-mapping: benchmark: use the correct HiSilicon copyright
  dma-mapping: remove a pointless empty line in dma_alloc_coherent
  media: uvcvideo: Use dma_alloc_noncontiguous API
  dma-iommu: implement ->alloc_noncontiguous
  dma-iommu: refactor iommu_dma_alloc_remap
  dma-mapping: add a dma_alloc_noncontiguous API
  dma-mapping: refactor dma_{alloc,free}_pages
  dma-mapping: add a dma_mmap_pages helper
parents 51e6f07c a7f3d3d3
Loading
Loading
Loading
Loading
+88 −0
Original line number Diff line number Diff line
@@ -563,6 +563,16 @@ Free a region of memory previously allocated using dma_alloc_pages().
dev, size, dma_handle and dir must all be the same as those passed into
dma_alloc_pages().  page must be the pointer returned by dma_alloc_pages().

::

	int
	dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
		       size_t size, struct page *page)

Map an allocation returned from dma_alloc_pages() into a user address space.
dev and size must be the same as those passed into dma_alloc_pages().
page must be the pointer returned by dma_alloc_pages().

::

	void *
@@ -584,6 +594,84 @@ dev, size, dma_handle and dir must all be the same as those passed into
dma_alloc_noncoherent().  cpu_addr must be the virtual address returned by
dma_alloc_noncoherent().

::

	struct sg_table *
	dma_alloc_noncontiguous(struct device *dev, size_t size,
				enum dma_data_direction dir, gfp_t gfp,
				unsigned long attrs);

This routine allocates  <size> bytes of non-coherent and possibly non-contiguous
memory.  It returns a pointer to struct sg_table that describes the allocated
and DMA mapped memory, or NULL if the allocation failed. The resulting memory
can be used for struct page mapped into a scatterlist are suitable for.

The return sg_table is guaranteed to have 1 single DMA mapped segment as
indicated by sgt->nents, but it might have multiple CPU side segments as
indicated by sgt->orig_nents.

The dir parameter specified if data is read and/or written by the device,
see dma_map_single() for details.

The gfp parameter allows the caller to specify the ``GFP_`` flags (see
kmalloc()) for the allocation, but rejects flags used to specify a memory
zone such as GFP_DMA or GFP_HIGHMEM.

The attrs argument must be either 0 or DMA_ATTR_ALLOC_SINGLE_PAGES.

Before giving the memory to the device, dma_sync_sgtable_for_device() needs
to be called, and before reading memory written by the device,
dma_sync_sgtable_for_cpu(), just like for streaming DMA mappings that are
reused.

::

	void
	dma_free_noncontiguous(struct device *dev, size_t size,
			       struct sg_table *sgt,
			       enum dma_data_direction dir)

Free memory previously allocated using dma_alloc_noncontiguous().  dev, size,
and dir must all be the same as those passed into dma_alloc_noncontiguous().
sgt must be the pointer returned by dma_alloc_noncontiguous().

::

	void *
	dma_vmap_noncontiguous(struct device *dev, size_t size,
		struct sg_table *sgt)

Return a contiguous kernel mapping for an allocation returned from
dma_alloc_noncontiguous().  dev and size must be the same as those passed into
dma_alloc_noncontiguous().  sgt must be the pointer returned by
dma_alloc_noncontiguous().

Once a non-contiguous allocation is mapped using this function, the
flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must be used
to manage the coherency between the kernel mapping, the device and user space
mappings (if any).

::

	void
	dma_vunmap_noncontiguous(struct device *dev, void *vaddr)

Unmap a kernel mapping returned by dma_vmap_noncontiguous().  dev must be the
same the one passed into dma_alloc_noncontiguous().  vaddr must be the pointer
returned by dma_vmap_noncontiguous().


::

	int
	dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
			       size_t size, struct sg_table *sgt)

Map an allocation returned from dma_alloc_noncontiguous() into a user address
space.  dev and size must be the same as those passed into
dma_alloc_noncontiguous().  sgt must be the pointer returned by
dma_alloc_noncontiguous().

::

	int
+71 −32
Original line number Diff line number Diff line
@@ -647,23 +647,12 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
	return pages;
}

/**
 * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
 * @dev: Device to allocate memory for. Must be a real device
 *	 attached to an iommu_dma_domain
 * @size: Size of buffer in bytes
 * @dma_handle: Out argument for allocated DMA handle
 * @gfp: Allocation flags
 * @prot: pgprot_t to use for the remapped mapping
 * @attrs: DMA attributes for this allocation
 *
 * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
/*
 * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
 * but an IOMMU which supports smaller pages might not map the whole thing.
 *
 * Return: Mapped virtual address, or NULL on failure.
 */
static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
		dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
		size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
		unsigned long attrs)
{
	struct iommu_domain *domain = iommu_get_dma_domain(dev);
@@ -673,11 +662,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
	struct page **pages;
	struct sg_table sgt;
	dma_addr_t iova;
	void *vaddr;

	*dma_handle = DMA_MAPPING_ERROR;

	if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
	    iommu_deferred_attach(dev, domain))
@@ -704,41 +689,91 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
	if (!iova)
		goto out_free_pages;

	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
	if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
		goto out_free_iova;

	if (!(ioprot & IOMMU_CACHE)) {
		struct scatterlist *sg;
		int i;

		for_each_sg(sgt.sgl, sg, sgt.orig_nents, i)
		for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
			arch_dma_prep_coherent(sg_page(sg), sg->length);
	}

	if (iommu_map_sg_atomic(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
	if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)
			< size)
		goto out_free_sg;

	sgt->sgl->dma_address = iova;
	sgt->sgl->dma_length = size;
	return pages;

out_free_sg:
	sg_free_table(sgt);
out_free_iova:
	iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_pages:
	__iommu_dma_free_pages(pages, count);
	return NULL;
}

static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
		dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
		unsigned long attrs)
{
	struct page **pages;
	struct sg_table sgt;
	void *vaddr;

	pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
						attrs);
	if (!pages)
		return NULL;
	*dma_handle = sgt.sgl->dma_address;
	sg_free_table(&sgt);
	vaddr = dma_common_pages_remap(pages, size, prot,
			__builtin_return_address(0));
	if (!vaddr)
		goto out_unmap;

	*dma_handle = iova;
	sg_free_table(&sgt);
	return vaddr;

out_unmap:
	__iommu_dma_unmap(dev, iova, size);
out_free_sg:
	sg_free_table(&sgt);
out_free_iova:
	iommu_dma_free_iova(cookie, iova, size, NULL);
out_free_pages:
	__iommu_dma_free_pages(pages, count);
	__iommu_dma_unmap(dev, *dma_handle, size);
	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
	return NULL;
}

#ifdef CONFIG_DMA_REMAP
static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
		size_t size, enum dma_data_direction dir, gfp_t gfp,
		unsigned long attrs)
{
	struct dma_sgt_handle *sh;

	sh = kmalloc(sizeof(*sh), gfp);
	if (!sh)
		return NULL;

	sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
						    PAGE_KERNEL, attrs);
	if (!sh->pages) {
		kfree(sh);
		return NULL;
	}
	return &sh->sgt;
}

static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
		struct sg_table *sgt, enum dma_data_direction dir)
{
	struct dma_sgt_handle *sh = sgt_handle(sgt);

	__iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
	__iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
	sg_free_table(&sh->sgt);
}
#endif /* CONFIG_DMA_REMAP */

static void iommu_dma_sync_single_for_cpu(struct device *dev,
		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
{
@@ -1255,6 +1290,10 @@ static const struct dma_map_ops iommu_dma_ops = {
	.free			= iommu_dma_free,
	.alloc_pages		= dma_common_alloc_pages,
	.free_pages		= dma_common_free_pages,
#ifdef CONFIG_DMA_REMAP
	.alloc_noncontiguous	= iommu_dma_alloc_noncontiguous,
	.free_noncontiguous	= iommu_dma_free_noncontiguous,
#endif
	.mmap			= iommu_dma_mmap,
	.get_sgtable		= iommu_dma_get_sgtable,
	.map_page		= iommu_dma_map_page,
+69 −25
Original line number Diff line number Diff line
@@ -6,11 +6,14 @@
 *          Laurent Pinchart (laurent.pinchart@ideasonboard.com)
 */

#include <linux/dma-mapping.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/usb.h>
#include <linux/usb/hcd.h>
#include <linux/videodev2.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
@@ -1096,6 +1099,29 @@ static int uvc_video_decode_start(struct uvc_streaming *stream,
	return data[0];
}

static inline enum dma_data_direction uvc_stream_dir(
				struct uvc_streaming *stream)
{
	if (stream->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
		return DMA_FROM_DEVICE;
	else
		return DMA_TO_DEVICE;
}

static inline struct device *uvc_stream_to_dmadev(struct uvc_streaming *stream)
{
	return bus_to_hcd(stream->dev->udev->bus)->self.sysdev;
}

static int uvc_submit_urb(struct uvc_urb *uvc_urb, gfp_t mem_flags)
{
	/* Sync DMA. */
	dma_sync_sgtable_for_device(uvc_stream_to_dmadev(uvc_urb->stream),
				    uvc_urb->sgt,
				    uvc_stream_dir(uvc_urb->stream));
	return usb_submit_urb(uvc_urb->urb, mem_flags);
}

/*
 * uvc_video_decode_data_work: Asynchronous memcpy processing
 *
@@ -1117,7 +1143,7 @@ static void uvc_video_copy_data_work(struct work_struct *work)
		uvc_queue_buffer_release(op->buf);
	}

	ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
	ret = uvc_submit_urb(uvc_urb, GFP_KERNEL);
	if (ret < 0)
		dev_err(&uvc_urb->stream->intf->dev,
			"Failed to resubmit video URB (%d).\n", ret);
@@ -1537,6 +1563,12 @@ static void uvc_video_complete(struct urb *urb)
	/* Re-initialise the URB async work. */
	uvc_urb->async_operations = 0;

	/* Sync DMA and invalidate vmap range. */
	dma_sync_sgtable_for_cpu(uvc_stream_to_dmadev(uvc_urb->stream),
				 uvc_urb->sgt, uvc_stream_dir(stream));
	invalidate_kernel_vmap_range(uvc_urb->buffer,
				     uvc_urb->stream->urb_size);

	/*
	 * Process the URB headers, and optionally queue expensive memcpy tasks
	 * to be deferred to a work queue.
@@ -1545,7 +1577,7 @@ static void uvc_video_complete(struct urb *urb)

	/* If no async work is needed, resubmit the URB immediately. */
	if (!uvc_urb->async_operations) {
		ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
		ret = uvc_submit_urb(uvc_urb, GFP_ATOMIC);
		if (ret < 0)
			dev_err(&stream->intf->dev,
				"Failed to resubmit video URB (%d).\n", ret);
@@ -1560,24 +1592,49 @@ static void uvc_video_complete(struct urb *urb)
 */
static void uvc_free_urb_buffers(struct uvc_streaming *stream)
{
	struct device *dma_dev = uvc_stream_to_dmadev(stream);
	struct uvc_urb *uvc_urb;

	for_each_uvc_urb(uvc_urb, stream) {
		if (!uvc_urb->buffer)
			continue;

#ifndef CONFIG_DMA_NONCOHERENT
		usb_free_coherent(stream->dev->udev, stream->urb_size,
				  uvc_urb->buffer, uvc_urb->dma);
#else
		kfree(uvc_urb->buffer);
#endif
		dma_vunmap_noncontiguous(dma_dev, uvc_urb->buffer);
		dma_free_noncontiguous(dma_dev, stream->urb_size, uvc_urb->sgt,
				       uvc_stream_dir(stream));

		uvc_urb->buffer = NULL;
		uvc_urb->sgt = NULL;
	}

	stream->urb_size = 0;
}

static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
				 struct uvc_urb *uvc_urb, gfp_t gfp_flags)
{
	struct device *dma_dev = uvc_stream_to_dmadev(stream);

	uvc_urb->sgt = dma_alloc_noncontiguous(dma_dev, stream->urb_size,
					       uvc_stream_dir(stream),
					       gfp_flags, 0);
	if (!uvc_urb->sgt)
		return false;
	uvc_urb->dma = uvc_urb->sgt->sgl->dma_address;

	uvc_urb->buffer = dma_vmap_noncontiguous(dma_dev, stream->urb_size,
						 uvc_urb->sgt);
	if (!uvc_urb->buffer) {
		dma_free_noncontiguous(dma_dev, stream->urb_size,
				       uvc_urb->sgt,
				       uvc_stream_dir(stream));
		uvc_urb->sgt = NULL;
		return false;
	}

	return true;
}

/*
 * Allocate transfer buffers. This function can be called with buffers
 * already allocated when resuming from suspend, in which case it will
@@ -1608,19 +1665,12 @@ static int uvc_alloc_urb_buffers(struct uvc_streaming *stream,

	/* Retry allocations until one succeed. */
	for (; npackets > 1; npackets /= 2) {
		stream->urb_size = psize * npackets;

		for (i = 0; i < UVC_URBS; ++i) {
			struct uvc_urb *uvc_urb = &stream->uvc_urb[i];

			stream->urb_size = psize * npackets;
#ifndef CONFIG_DMA_NONCOHERENT
			uvc_urb->buffer = usb_alloc_coherent(
				stream->dev->udev, stream->urb_size,
				gfp_flags | __GFP_NOWARN, &uvc_urb->dma);
#else
			uvc_urb->buffer =
			    kmalloc(stream->urb_size, gfp_flags | __GFP_NOWARN);
#endif
			if (!uvc_urb->buffer) {
			if (!uvc_alloc_urb_buffer(stream, uvc_urb, gfp_flags)) {
				uvc_free_urb_buffers(stream);
				break;
			}
@@ -1730,12 +1780,8 @@ static int uvc_init_video_isoc(struct uvc_streaming *stream,
		urb->context = uvc_urb;
		urb->pipe = usb_rcvisocpipe(stream->dev->udev,
				ep->desc.bEndpointAddress);
#ifndef CONFIG_DMA_NONCOHERENT
		urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
		urb->transfer_dma = uvc_urb->dma;
#else
		urb->transfer_flags = URB_ISO_ASAP;
#endif
		urb->interval = ep->desc.bInterval;
		urb->transfer_buffer = uvc_urb->buffer;
		urb->complete = uvc_video_complete;
@@ -1795,10 +1841,8 @@ static int uvc_init_video_bulk(struct uvc_streaming *stream,

		usb_fill_bulk_urb(urb, stream->dev->udev, pipe,	uvc_urb->buffer,
				  size, uvc_video_complete, uvc_urb);
#ifndef CONFIG_DMA_NONCOHERENT
		urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
		urb->transfer_dma = uvc_urb->dma;
#endif

		uvc_urb->urb = urb;
	}
@@ -1895,7 +1939,7 @@ static int uvc_video_start_transfer(struct uvc_streaming *stream,

	/* Submit the URBs. */
	for_each_uvc_urb(uvc_urb, stream) {
		ret = usb_submit_urb(uvc_urb->urb, gfp_flags);
		ret = uvc_submit_urb(uvc_urb, gfp_flags);
		if (ret < 0) {
			dev_err(&stream->intf->dev,
				"Failed to submit URB %u (%d).\n",
+4 −1
Original line number Diff line number Diff line
@@ -219,6 +219,7 @@
 */

struct gpio_desc;
struct sg_table;
struct uvc_device;

/* TODO: Put the most frequently accessed fields at the beginning of
@@ -545,7 +546,8 @@ struct uvc_copy_op {
 * @urb: the URB described by this context structure
 * @stream: UVC streaming context
 * @buffer: memory storage for the URB
 * @dma: DMA coherent addressing for the urb_buffer
 * @dma: Allocated DMA handle
 * @sgt: sgt_table with the urb locations in memory
 * @async_operations: counter to indicate the number of copy operations
 * @copy_operations: work descriptors for asynchronous copy operations
 * @work: work queue entry for asynchronous decode
@@ -556,6 +558,7 @@ struct uvc_urb {

	char *buffer;
	dma_addr_t dma;
	struct sg_table *sgt;

	unsigned int async_operations;
	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
+19 −0
Original line number Diff line number Diff line
@@ -22,6 +22,11 @@ struct dma_map_ops {
			gfp_t gfp);
	void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
			dma_addr_t dma_handle, enum dma_data_direction dir);
	struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size,
			enum dma_data_direction dir, gfp_t gfp,
			unsigned long attrs);
	void (*free_noncontiguous)(struct device *dev, size_t size,
			struct sg_table *sgt, enum dma_data_direction dir);
	int (*mmap)(struct device *, struct vm_area_struct *,
			void *, dma_addr_t, size_t, unsigned long attrs);

@@ -198,6 +203,20 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
}
#endif /* CONFIG_DMA_DECLARE_COHERENT */

/*
 * This is the actual return value from the ->alloc_noncontiguous method.
 * The users of the DMA API should only care about the sg_table, but to make
 * the DMA-API internal vmaping and freeing easier we stash away the page
 * array as well (except for the fallback case).  This can go away any time,
 * e.g. when a vmap-variant that takes a scatterlist comes along.
 */
struct dma_sgt_handle {
	struct sg_table sgt;
	struct page **pages;
};
#define sgt_handle(sgt) \
	container_of((sgt), struct dma_sgt_handle, sgt)

int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
		void *cpu_addr, dma_addr_t dma_addr, size_t size,
		unsigned long attrs);
Loading