Commit d8c2c6c3 authored by Srinivasan Shanmugam's avatar Srinivasan Shanmugam Committed by Alex Deucher
Browse files

drm/amdgpu: Map/Unmap MMIO_REMAP as BAR register window; add TTM sg helpers; wire dma-buf



MMIO_REMAP (HDP flush page) exposes a hardware MMIO register window via
a PCI BAR; there are no struct pages backing it (not normal RAM).  But
when one device shares memory with another through dma-buf, the receiver
still expects a delivery route—a list of DMA-able chunks—called an
sg_table. For the BAR window, we can’t (no pages!), so we instead create
a one-entry list that points directly to the BAR’s physical bus address
and tell DMA: “use this I/O span.” - A single, contiguous byte range on
the PCI bus (start DMA address + length)). That’s why we map it with
dma_map_resource() and set sg_set_page(..., NULL, ...). Perform DMA
reads/writes directly to that range so we build an sg_table from a BAR
physical span and map it with dma_map_resource().

This patch centralizes the BAR-I/O mapping in TTM and wires dma-buf to
it:

Add amdgpu_ttm_mmio_remap_alloc_sgt() /
amdgpu_ttm_mmio_remap_free_sgt(). They walk the TTM resource via
amdgpu_res_cursor, add the byte offset to adev->rmmio_remap.bus_addr,
build a one-entry sg_table with sg_set_page(NULL, …), and map/unmap it
with dma_map_resource().

In dma-buf map/unmap, if the BO is in AMDGPU_PL_MMIO_REMAP, call the new
helpers.

Single place for BAR-I/O handling: amdgpu_ttm.c in
amdgpu_ttm_mmio_remap_alloc_sgt() and ..._free_sgt().
No struct pages: sg_set_page(sg, NULL, cur.size, 0); inside
amdgpu_ttm_mmio_remap_alloc_sgt().
Minimal sg_table: sg_alloc_table(*sgt, 1, GFP_KERNEL); inside
amdgpu_ttm_mmio_remap_alloc_sgt().
Hooked into dma-buf: amdgpu_dma_buf_map()/unmap() in amdgpu_dma_buf.c
call these helpers for AMDGPU_PL_MMIO_REMAP.

v2: squash in fix for set/get tiling

Suggested-by: default avatarChristian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarSrinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent de895550
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_vm.h"
#include "amdgpu_ttm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
@@ -241,6 +242,14 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
		if (r)
			return ERR_PTR(r);
		break;

	case AMDGPU_PL_MMIO_REMAP:
		r = amdgpu_ttm_mmio_remap_alloc_sgt(adev, bo->tbo.resource,
						    attach->dev, dir, &sgt);
		if (r)
			return ERR_PTR(r);
		break;

	default:
		return ERR_PTR(-EINVAL);
	}
@@ -266,6 +275,15 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
				 struct sg_table *sgt,
				 enum dma_data_direction dir)
{
	struct drm_gem_object *obj = attach->dmabuf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);

	if (bo->tbo.resource &&
	    bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) {
		amdgpu_ttm_mmio_remap_free_sgt(attach->dev, dir, sgt);
		return;
	}

	if (sg_page(sgt->sgl)) {
		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
		sg_free_table(sgt);
+9 −0
Original line number Diff line number Diff line
@@ -719,6 +719,15 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
	if (unlikely(r != 0))
		goto out;

	/* Reject MMIO_REMAP BOs at IOCTL level: metadata/tiling does not apply. */
	if (robj->tbo.resource &&
	    robj->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) {
		DRM_WARN("metadata ioctl on MMIO_REMAP BO (handle %d)\n",
			 args->handle);
		r = -EINVAL;
		goto unreserve;
	}

	if (args->op == AMDGPU_GEM_METADATA_OP_GET_METADATA) {
		amdgpu_bo_get_tiling_flags(robj, &args->data.tiling_info);
		r = amdgpu_bo_get_metadata(robj, args->data.data,
+11 −0
Original line number Diff line number Diff line
@@ -1126,6 +1126,10 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	struct amdgpu_bo_user *ubo;

	/* MMIO_REMAP is BAR I/O space; tiling should never be used here. */
	WARN_ON_ONCE(bo->tbo.resource &&
		     bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP);

	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
	if (adev->family <= AMDGPU_FAMILY_CZ &&
	    AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
@@ -1148,6 +1152,13 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
	struct amdgpu_bo_user *ubo;

	/*
	 * MMIO_REMAP BOs are not real VRAM/GTT memory but a fixed BAR I/O window.
	 * They should never go through GEM tiling helpers.
	 */
	WARN_ON_ONCE(bo->tbo.resource &&
		     bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP);

	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
	dma_resv_assert_held(bo->tbo.base.resv);
	ubo = to_amdgpu_bo_user(bo);
+80 −0
Original line number Diff line number Diff line
@@ -1062,6 +1062,86 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
	kfree(gtt);
}

/**
 * amdgpu_ttm_mmio_remap_alloc_sgt - build an sg_table for MMIO_REMAP I/O aperture
 * @adev: amdgpu device providing the remap BAR base (adev->rmmio_remap.bus_addr)
 * @res:  TTM resource of the BO to export; expected to live in AMDGPU_PL_MMIO_REMAP
 * @dev:  importing device to map for (typically @attach->dev in dma-buf paths)
 * @dir:  DMA data direction for the importer (passed to dma_map_resource())
 * @sgt:  output; on success, set to a newly allocated sg_table describing the I/O span
 *
 * The HDP flush page (AMDGPU_PL_MMIO_REMAP) is a fixed hardware I/O window in a PCI
 * BAR—there are no struct pages to back it. Importers still need a DMA address list,
 * so we synthesize a minimal sg_table and populate it from dma_map_resource(), not
 * from pages. Using the common amdgpu_res_cursor walker keeps the offset/size math
 * consistent with other TTM/manager users.
 *
 * - @res is assumed to be a small, contiguous I/O region (typically a single 4 KiB
 *   page) in AMDGPU_PL_MMIO_REMAP. Callers should validate placement before calling.
 * - The sg entry is created with sg_set_page(sg, NULL, …) to reflect I/O space.
 * - The mapping uses DMA_ATTR_SKIP_CPU_SYNC because this is MMIO, not cacheable RAM.
 * - Peer reachability / p2pdma policy checks must be done by the caller.
 *
 * Return:
 * * 0 on success, with *@sgt set to a valid table that must be freed via
 *   amdgpu_ttm_mmio_remap_free_sgt().
 * * -ENOMEM if allocation of the sg_table fails.
 * * -EIO if dma_map_resource() fails.
 *
 */
int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev,
				    struct ttm_resource *res,
				    struct device *dev,
				    enum dma_data_direction dir,
				    struct sg_table **sgt)
{
	struct amdgpu_res_cursor cur;
	dma_addr_t dma;
	resource_size_t phys;
	struct scatterlist *sg;
	int r;

	/* Walk the resource once; MMIO_REMAP is expected to be contiguous+small. */
	amdgpu_res_first(res, 0, res->size, &cur);

	/* Translate byte offset in the remap window into a host physical BAR address. */
	phys = adev->rmmio_remap.bus_addr + cur.start;

	/* Build a single-entry sg_table mapped as I/O (no struct page backing). */
	*sgt = kzalloc(sizeof(**sgt), GFP_KERNEL);
	if (!*sgt)
		return -ENOMEM;
	r = sg_alloc_table(*sgt, 1, GFP_KERNEL);
	if (r) {
		kfree(*sgt);
		return r;
	}
	sg = (*sgt)->sgl;
	sg_set_page(sg, NULL, cur.size, 0);  /* WHY: I/O space → no pages */

	dma = dma_map_resource(dev, phys, cur.size, dir, DMA_ATTR_SKIP_CPU_SYNC);
	if (dma_mapping_error(dev, dma)) {
		sg_free_table(*sgt);
		kfree(*sgt);
		return -EIO;
	}
	sg_dma_address(sg) = dma;
	sg_dma_len(sg) = cur.size;
	return 0;
}

void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
				    enum dma_data_direction dir,
				    struct sg_table *sgt)
{
	struct scatterlist *sg = sgt->sgl;

	dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg),
			   dir, DMA_ATTR_SKIP_CPU_SYNC);
	sg_free_table(sgt);
	kfree(sgt);
}

/**
 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
 *
+9 −0
Original line number Diff line number Diff line
@@ -213,4 +213,13 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);

void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);

int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev,
				    struct ttm_resource *res,
				    struct device *dev,
				    enum dma_data_direction dir,
				    struct sg_table **sgt);
void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev,
				    enum dma_data_direction dir,
				    struct sg_table *sgt);

#endif