Commit cd2c9c30 authored by Loïc Molinari's avatar Loïc Molinari Committed by Boris Brezillon
Browse files

drm/panthor: Add flag to map GEM object Write-Back Cacheable



Will be used by the UMD to optimize CPU accesses to buffers
that are frequently read by the CPU, or on which the access
pattern makes non-cacheable mappings inefficient.

Mapping buffers CPU-cached implies taking care of the CPU
cache maintenance in the UMD, unless the GPU is IO coherent.

v2:
- Add more to the commit message
- Tweak the doc
- Make sure we sync the section of the BO pointing to the CS
  syncobj before we read its seqno

v3:
- Fix formatting/spelling issues

v4:
- Add Steve's R-b

v5:
- Drop Steve's R-b (changes in the ioctl semantics requiring
  new review)

v6:
- Fix the uAPI doc
- Fix inverted logic in some comment

v7:
- No changes

v8:
- Collect R-b

Signed-off-by: default avatarLoïc Molinari <loic.molinari@collabora.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com


Signed-off-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
parent c146c82f
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -902,7 +902,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
	return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
}

#define PANTHOR_BO_FLAGS		DRM_PANTHOR_BO_NO_MMAP
#define PANTHOR_BO_FLAGS		(DRM_PANTHOR_BO_NO_MMAP | \
					 DRM_PANTHOR_BO_WB_MMAP)

static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
				   struct drm_file *file)
@@ -921,6 +922,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
		goto out_dev_exit;
	}

	if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) &&
	    (args->flags & DRM_PANTHOR_BO_WB_MMAP))
		return -EINVAL;

	if (args->exclusive_vm_id) {
		vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
		if (!vm) {
+35 −2
Original line number Diff line number Diff line
@@ -77,6 +77,39 @@ static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u
static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {}
#endif

static bool
should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm)
{
	struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base);

	/* We can't do uncached mappings if the device is coherent,
	 * because the zeroing done by the shmem layer at page allocation
	 * time happens on a cached mapping which isn't CPU-flushed (at least
	 * not on Arm64 where the flush is deferred to PTE setup time, and
	 * only done conditionally based on the mapping permissions). We can't
	 * rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush
	 * those, because they are NOPed if dma_dev_coherent() returns true.
	 *
	 * FIXME: Note that this problem is going to pop up again when we
	 * decide to support mapping buffers with the NO_MMAP flag as
	 * non-shareable (AKA buffers accessed only by the GPU), because we
	 * need the same CPU flush to happen after page allocation, otherwise
	 * there's a risk of data leak or late corruption caused by a dirty
	 * cacheline being evicted. At this point we'll need a way to force
	 * CPU cache maintenance regardless of whether the device is coherent
	 * or not.
	 */
	if (ptdev->coherent)
		return false;

	/* Cached mappings are explicitly requested, so no write-combine. */
	if (bo->flags & DRM_PANTHOR_BO_WB_MMAP)
		return false;

	/* The default is write-combine. */
	return true;
}

static void panthor_gem_free_object(struct drm_gem_object *obj)
{
	struct panthor_gem_object *bo = to_panthor_bo(obj);
@@ -163,6 +196,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
	bo = to_panthor_bo(&obj->base);
	kbo->obj = &obj->base;
	bo->flags = bo_flags;
	bo->base.map_wc = should_map_wc(bo, vm);
	bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
	drm_gem_object_get(bo->exclusive_vm_root_gem);
	bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
@@ -363,7 +397,6 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = {
 */
struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
{
	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
	struct panthor_gem_object *obj;

	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
@@ -371,7 +404,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t
		return ERR_PTR(-ENOMEM);

	obj->base.base.funcs = &panthor_gem_funcs;
	obj->base.map_wc = !ptdev->coherent;
	mutex_init(&obj->label.lock);

	panthor_gem_debugfs_bo_init(obj);
@@ -406,6 +438,7 @@ panthor_gem_create_with_handle(struct drm_file *file,

	bo = to_panthor_bo(&shmem->base);
	bo->flags = flags;
	bo->base.map_wc = should_map_wc(bo, exclusive_vm);

	if (exclusive_vm) {
		bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
+16 −2
Original line number Diff line number Diff line
@@ -863,8 +863,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
	struct iosys_map map;
	int ret;

	if (queue->syncwait.kmap)
		return queue->syncwait.kmap + queue->syncwait.offset;
	if (queue->syncwait.kmap) {
		bo = container_of(queue->syncwait.obj,
				  struct panthor_gem_object, base.base);
		goto out_sync;
	}

	bo = panthor_vm_get_bo_for_va(group->vm,
				      queue->syncwait.gpu_va,
@@ -881,6 +884,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
		goto err_put_syncwait_obj;

out_sync:
	/* Make sure the CPU caches are invalidated before the seqno is read.
	 * drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check
	 * it here.
	 */
	panthor_gem_sync(&bo->base.base, queue->syncwait.offset,
			 queue->syncwait.sync64 ?
			 sizeof(struct panthor_syncobj_64b) :
			 sizeof(struct panthor_syncobj_32b),
			 DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE);

	return queue->syncwait.kmap + queue->syncwait.offset;

err_put_syncwait_obj:
+9 −0
Original line number Diff line number Diff line
@@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state {
enum drm_panthor_bo_flags {
	/** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
	DRM_PANTHOR_BO_NO_MMAP = (1 << 0),

	/**
	 * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping.
	 *
	 * CPU map the buffer object in userspace by forcing the "Write-Back
	 * Cacheable" cacheability attribute. The mapping otherwise uses the
	 * "Non-Cacheable" attribute if the GPU is not IO coherent.
	 */
	DRM_PANTHOR_BO_WB_MMAP = (1 << 1),
};

/**