Commit 834368ea authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher
Browse files

drm/amdkfd: Ensure user queue buffers residency



Add atomic queue_refcount to struct bo_va, return -EBUSY to fail unmap
BO from the GPU if the bo_va queue_refcount is not zero.

Create queue to increase the bo_va queue_refcount, destroy queue to
decrease the bo_va queue_refcount, to ensure the queue buffers mapped on
the GPU when queue is active.

Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 22a9d5cb
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -1252,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
	return ret;
}

static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
				struct kfd_mem_attachment *entry,
				struct amdgpu_sync *sync)
{
@@ -1260,11 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
	struct amdgpu_device *adev = entry->adev;
	struct amdgpu_vm *vm = bo_va->base.vm;

	if (bo_va->queue_refcount) {
		pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
		return -EBUSY;
	}

	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);

	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);

	amdgpu_sync_fence(sync, bo_va->last_pt_update);

	return 0;
}

static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -2191,7 +2198,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
		pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
			 entry->va, entry->va + bo_size, entry);

		unmap_bo_from_gpuvm(mem, entry, ctx.sync);
		ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
		if (ret)
			goto unreserve_out;

		entry->is_mapped = false;

		mem->mapped_to_gpu_memory--;
+6 −0
Original line number Diff line number Diff line
@@ -90,6 +90,12 @@ struct amdgpu_bo_va {
	bool				cleared;

	bool				is_xgmi;

	/*
	 * protected by vm reservation lock
	 * if non-zero, cannot unmap from GPU because user queues may still access it
	 */
	unsigned int			queue_refcount;
};

struct amdgpu_bo {
+1 −2
Original line number Diff line number Diff line
@@ -1384,8 +1384,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
			peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
		if (err) {
			pr_err("Failed to unmap from gpu %d/%d\n",
			       i, args->n_devices);
			pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);
			goto unmap_memory_from_gpu_failed;
		}
		args->n_success = i+1;
+1 −0
Original line number Diff line number Diff line
@@ -1292,6 +1292,7 @@ void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
			 u64 expected_size);
void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);

+29 −5
Original line number Diff line number Diff line
@@ -106,6 +106,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_
	}

	*pbo = amdgpu_bo_ref(mapping->bo_va->base.bo);
	mapping->bo_va->queue_refcount++;
	return 0;

out_err:
@@ -113,6 +114,19 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_
	return -EINVAL;
}

void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo)
{
	if (*bo) {
		struct amdgpu_bo_va *bo_va;

		bo_va = amdgpu_vm_bo_find(vm, *bo);
		if (bo_va)
			bo_va->queue_refcount--;
	}

	amdgpu_bo_unref(bo);
}

int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
{
	struct amdgpu_vm *vm;
@@ -166,10 +180,20 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope

int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
{
	amdgpu_bo_unref(&properties->wptr_bo);
	amdgpu_bo_unref(&properties->rptr_bo);
	amdgpu_bo_unref(&properties->ring_bo);
	amdgpu_bo_unref(&properties->eop_buf_bo);
	amdgpu_bo_unref(&properties->cwsr_bo);
	struct amdgpu_vm *vm;
	int err;

	vm = drm_priv_to_vm(pdd->drm_priv);
	err = amdgpu_bo_reserve(vm->root.bo, false);
	if (err)
		return err;

	kfd_queue_buffer_put(vm, &properties->wptr_bo);
	kfd_queue_buffer_put(vm, &properties->rptr_bo);
	kfd_queue_buffer_put(vm, &properties->ring_bo);
	kfd_queue_buffer_put(vm, &properties->eop_buf_bo);
	kfd_queue_buffer_put(vm, &properties->cwsr_bo);

	amdgpu_bo_unreserve(vm->root.bo);
	return 0;
}