Commit cb0de06d authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: remove all KFD fences from the BO on release



Remove all KFD BOs from the private dma_resv object.

This prevents the KFD from being evict unecessarily when an exported BO
is released.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarJames Zhu <James.Zhu@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Reviewed-and-tested-by: default avatarJames Zhu <James.Zhu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3521276a
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -193,7 +193,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
				unsigned long cur_seq, struct kgd_mem *mem);
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
@@ -213,9 +213,8 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
}

static inline
int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
	return 0;
}

static inline
+22 −30
Original line number Diff line number Diff line
@@ -370,40 +370,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
	return 0;
}

int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
/**
 * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
 * @bo: the BO where to remove the evictions fences from.
 *
 * This functions should only be used on release when all references to the BO
 * are already dropped. We remove the eviction fence from the private copy of
 * the dma_resv object here since that is what is used during release to
 * determine of the BO is idle or not.
 */
void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
	struct amdgpu_bo *root = bo;
	struct amdgpu_vm_bo_base *vm_bo;
	struct amdgpu_vm *vm;
	struct amdkfd_process_info *info;
	struct amdgpu_amdkfd_fence *ef;
	int ret;

	/* we can always get vm_bo from root PD bo.*/
	while (root->parent)
		root = root->parent;

	vm_bo = root->vm_bo;
	if (!vm_bo)
		return 0;

	vm = vm_bo->vm;
	if (!vm)
		return 0;

	info = vm->process_info;
	if (!info || !info->eviction_fence)
		return 0;
	struct dma_resv *resv = &bo->tbo.base._resv;
	struct dma_fence *fence, *stub;
	struct dma_resv_iter cursor;

	ef = container_of(dma_fence_get(&info->eviction_fence->base),
			struct amdgpu_amdkfd_fence, base);
	dma_resv_assert_held(resv);

	BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
	ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
	dma_resv_unlock(bo->tbo.base.resv);
	stub = dma_fence_get_stub();
	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
		if (!to_amdgpu_amdkfd_fence(fence))
			continue;

	dma_fence_put(&ef->base);
	return ret;
		dma_resv_replace_fences(resv, fence->context, stub,
					DMA_RESV_USAGE_BOOKKEEP);
	}
	dma_fence_put(stub);
}

static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+23 −15
Original line number Diff line number Diff line
@@ -1295,28 +1295,36 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
	if (abo->kfd_bo)
		amdgpu_amdkfd_release_notify(abo);

	/* We only remove the fence if the resv has individualized. */
	WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
			&& bo->base.resv != &bo->base._resv);
	if (bo->base.resv == &bo->base._resv)
		amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
	/*
	 * We lock the private dma_resv object here and since the BO is about to
	 * be released nobody else should have a pointer to it.
	 * So when this locking here fails something is wrong with the reference
	 * counting.
	 */
	if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
		return;

	amdgpu_amdkfd_remove_all_eviction_fences(abo);

	if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
	    adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
		return;
		goto out;

	if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
		return;
	r = dma_resv_reserve_fences(&bo->base._resv, 1);
	if (r)
		goto out;

	r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true);
	if (WARN_ON(r))
		goto out;

	r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
	if (!WARN_ON(r)) {
	amdgpu_vram_mgr_set_cleared(bo->resource);
		amdgpu_bo_fence(abo, fence, false);
	dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
	dma_fence_put(fence);
	}

	dma_resv_unlock(bo->base.resv);
out:
	dma_resv_unlock(&bo->base._resv);
}

/**