Commit dabb83ec authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'dma-mapping-7.0-2026-03-25' of...

Merge tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux

Pull dma-mapping fixes from Marek Szyprowski:
 "A set of fixes for DMA-mapping subsystem, which resolve false-
  positive warnings from KMSAN and DMA-API debug (Shigeru Yoshida
  and Leon Romanovsky) as well as a simple build fix (Miguel Ojeda)"

* tag 'dma-mapping-7.0-2026-03-25' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux:
  dma-mapping: add missing `inline` for `dma_free_attrs`
  mm/hmm: Indicate that HMM requires DMA coherency
  RDMA/umem: Tell DMA mapping that UMEM requires coherency
  iommu/dma: add support for DMA_ATTR_REQUIRE_COHERENT attribute
  dma-direct: prevent SWIOTLB path when DMA_ATTR_REQUIRE_COHERENT is set
  dma-mapping: Introduce DMA require coherency attribute
  dma-mapping: Clarify valid conditions for CPU cache line overlap
  dma-mapping: handle DMA_ATTR_CPU_CACHE_CLEAN in trace output
  dma-debug: Allow multiple invocations of overlapping entries
  dma: swiotlb: add KMSAN annotations to swiotlb_bounce()
parents 0138af24 2cdaff22
Loading
Loading
Loading
Loading
+30 −8
Original line number Diff line number Diff line
@@ -149,11 +149,33 @@ For architectures that require cache flushing for DMA coherence
DMA_ATTR_MMIO will not perform any cache flushing. The address
provided must never be mapped cacheable into the CPU.

DMA_ATTR_CPU_CACHE_CLEAN
------------------------

This attribute indicates the CPU will not dirty any cacheline overlapping this
DMA_FROM_DEVICE/DMA_BIDIRECTIONAL buffer while it is mapped. This allows
multiple small buffers to safely share a cacheline without risk of data
corruption, suppressing DMA debug warnings about overlapping mappings.
All mappings sharing a cacheline should have this attribute.
DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
------------------------------------

This attribute indicates that CPU cache lines may overlap for buffers mapped
with DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.

Such overlap may occur when callers map multiple small buffers that reside
within the same cache line. In this case, callers must guarantee that the CPU
will not dirty these cache lines after the mappings are established. When this
condition is met, multiple buffers can safely share a cache line without risking
data corruption.

All mappings that share a cache line must set this attribute to suppress DMA
debug warnings about overlapping mappings.

DMA_ATTR_REQUIRE_COHERENT
-------------------------

DMA mapping requests with the DMA_ATTR_REQUIRE_COHERENT fail on any
system where SWIOTLB or cache management is required. This should only
be used to support uAPI designs that require continuous HW DMA
coherence with userspace processes, for example RDMA and DRM. At a
minimum the memory being mapped must be userspace memory from
pin_user_pages() or similar.

Drivers should consider using dma_mmap_pages() instead of this
interface when building their uAPIs, when possible.

It must never be used in an in-kernel driver that only works with
kernel memory.
+3 −2
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d

	if (dirty)
		ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
					   DMA_BIDIRECTIONAL, 0);
					   DMA_BIDIRECTIONAL,
					   DMA_ATTR_REQUIRE_COHERENT);

	for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
		unpin_user_page_range_dirty_lock(sg_page(sg),
@@ -169,7 +170,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
	unsigned long lock_limit;
	unsigned long new_pinned;
	unsigned long cur_base;
	unsigned long dma_attr = 0;
	unsigned long dma_attr = DMA_ATTR_REQUIRE_COHERENT;
	struct mm_struct *mm;
	unsigned long npages;
	int pinned, ret;
+17 −4
Original line number Diff line number Diff line
@@ -1211,7 +1211,7 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
	 */
	if (dev_use_swiotlb(dev, size, dir) &&
	    iova_unaligned(iovad, phys, size)) {
		if (attrs & DMA_ATTR_MMIO)
		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
			return DMA_MAPPING_ERROR;

		phys = iommu_dma_map_swiotlb(dev, phys, size, dir, attrs);
@@ -1223,7 +1223,8 @@ dma_addr_t iommu_dma_map_phys(struct device *dev, phys_addr_t phys, size_t size,
		arch_sync_dma_for_device(phys, size, dir);

	iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
	if (iova == DMA_MAPPING_ERROR && !(attrs & DMA_ATTR_MMIO))
	if (iova == DMA_MAPPING_ERROR &&
	    !(attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)))
		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
	return iova;
}
@@ -1233,7 +1234,7 @@ void iommu_dma_unmap_phys(struct device *dev, dma_addr_t dma_handle,
{
	phys_addr_t phys;

	if (attrs & DMA_ATTR_MMIO) {
	if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT)) {
		__iommu_dma_unmap(dev, dma_handle, size);
		return;
	}
@@ -1945,9 +1946,21 @@ int dma_iova_link(struct device *dev, struct dma_iova_state *state,
	if (WARN_ON_ONCE(iova_start_pad && offset > 0))
		return -EIO;

	/*
	 * DMA_IOVA_USE_SWIOTLB is set on state after some entry
	 * took SWIOTLB path, which we were supposed to prevent
	 * for DMA_ATTR_REQUIRE_COHERENT attribute.
	 */
	if (WARN_ON_ONCE((state->__size & DMA_IOVA_USE_SWIOTLB) &&
			 (attrs & DMA_ATTR_REQUIRE_COHERENT)))
		return -EOPNOTSUPP;

	if (!dev_is_dma_coherent(dev) && (attrs & DMA_ATTR_REQUIRE_COHERENT))
		return -EOPNOTSUPP;

	if (dev_use_swiotlb(dev, size, dir) &&
	    iova_unaligned(iovad, phys, size)) {
		if (attrs & DMA_ATTR_MMIO)
		if (attrs & (DMA_ATTR_MMIO | DMA_ATTR_REQUIRE_COHERENT))
			return -EPERM;

		return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
+5 −5
Original line number Diff line number Diff line
@@ -2912,10 +2912,10 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 * @data: the token identifying the buffer.
 * @gfp: how to do memory allocations (if necessary).
 *
 * Same as virtqueue_add_inbuf but passes DMA_ATTR_CPU_CACHE_CLEAN to indicate
 * that the CPU will not dirty any cacheline overlapping this buffer while it
 * is available, and to suppress overlapping cacheline warnings in DMA debug
 * builds.
 * Same as virtqueue_add_inbuf but passes DMA_ATTR_DEBUGGING_IGNORE_CACHELINES
 * to indicate that the CPU will not dirty any cacheline overlapping this buffer
 * while it is available, and to suppress overlapping cacheline warnings in DMA
 * debug builds.
 *
 * Caller must ensure we don't call this with other virtqueue operations
 * at the same time (except where noted).
@@ -2928,7 +2928,7 @@ int virtqueue_add_inbuf_cache_clean(struct virtqueue *vq,
				    gfp_t gfp)
{
	return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, false, gfp,
			     DMA_ATTR_CPU_CACHE_CLEAN);
			     DMA_ATTR_DEBUGGING_IGNORE_CACHELINES);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_cache_clean);

+13 −6
Original line number Diff line number Diff line
@@ -80,11 +80,18 @@
#define DMA_ATTR_MMIO		(1UL << 10)

/*
 * DMA_ATTR_CPU_CACHE_CLEAN: Indicates the CPU will not dirty any cacheline
 * overlapping this buffer while it is mapped for DMA. All mappings sharing
 * a cacheline must have this attribute for this to be considered safe.
 * DMA_ATTR_DEBUGGING_IGNORE_CACHELINES: Indicates the CPU cache line can be
 * overlapped. All mappings sharing a cacheline must have this attribute for
 * this to be considered safe.
 */
#define DMA_ATTR_CPU_CACHE_CLEAN	(1UL << 11)
#define DMA_ATTR_DEBUGGING_IGNORE_CACHELINES	(1UL << 11)

/*
 * DMA_ATTR_REQUIRE_COHERENT: Indicates that DMA coherency is required.
 * All mappings that carry this attribute can't work with SWIOTLB and cache
 * flushing.
 */
#define DMA_ATTR_REQUIRE_COHERENT	(1UL << 12)

/*
 * A dma_addr_t can hold any valid DMA or bus address for the platform.  It can
@@ -248,8 +255,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
{
	return NULL;
}
static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
		dma_addr_t dma_handle, unsigned long attrs)
static inline void dma_free_attrs(struct device *dev, size_t size,
		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
{
}
static inline void *dmam_alloc_attrs(struct device *dev, size_t size,
Loading