Commit 69c5fbd2 authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/amdgpu: rework how we handle TLB fences

Add a new VM flag to indicate whether or not we need
a TLB fence.  Userqs (KFD or KGD) require a TLB fence.
A TLB fence is not strictly required for kernel queues,
but it shouldn't hurt.  That said, enabling this
unconditionally should be fine, but it seems to tickle
some issues in KIQ/MES.  Only enable them for KFD,
or when KGD userq queues are enabled (currently via module
parameter).

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4798
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4749


Fixes: f3854e04 ("drm/amdgpu: attach tlb fence to the PTs update")
Cc: Christian König <christian.koenig@amd.com>
Cc: Prike Liang <Prike.Liang@amd.com>
Reviewed-by: default avatarPrike Liang <Prike.Liang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 855e3e19
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -1073,7 +1073,10 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
	}

	/* Prepare a TLB flush fence to be attached to PTs */
	if (!params->unlocked) {
	/* The check for need_tlb_fence should be dropped once we
	 * sort out the issues with KIQ/MES TLB invalidation timeouts.
	 */
	if (!params->unlocked && vm->need_tlb_fence) {
		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);

		/* Makes sure no PD/PT is freed before the flush */
@@ -2606,6 +2609,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
	ttm_lru_bulk_move_init(&vm->lru_bulk_move);

	vm->is_compute_context = false;
	vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev);

	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
				    AMDGPU_VM_USE_CPU_FOR_GFX);
@@ -2743,6 +2747,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
	dma_fence_put(vm->last_update);
	vm->last_update = dma_fence_get_stub();
	vm->is_compute_context = true;
	vm->need_tlb_fence = true;

unreserve_bo:
	amdgpu_bo_unreserve(vm->root.bo);
+2 −0
Original line number Diff line number Diff line
@@ -441,6 +441,8 @@ struct amdgpu_vm {
	struct ttm_lru_bulk_move lru_bulk_move;
	/* Flag to indicate if VM is used for compute */
	bool			is_compute_context;
	/* Flag to indicate if VM needs a TLB fence (KFD or KGD) */
	bool			need_tlb_fence;

	/* Memory partition number, -1 means any partition */
	int8_t			mem_id;