Commit fd1fa48b authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: lock both VM and BO in amdgpu_gem_object_open



The VM was not locked in the past since we initially only cleared the
linked list element and not added it to any VM state.

But this has changed quite some time ago, we just never realized this
problem because the VM state lock was masking it.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent abde4911
Loading
Loading
Loading
Loading
+13 −6
Original line number Diff line number Diff line
@@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
	struct amdgpu_bo *bo[2] = {NULL, NULL};
	struct amdgpu_bo_va *bo_va;
	bool same_hive = false;
	struct drm_exec exec;
	int i, ret;

	if (!va) {
@@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
			goto unwind;
		}

		/* Add BO to VM internal data structures */
		ret = amdgpu_bo_reserve(bo[i], false);
		if (ret) {
			pr_debug("Unable to reserve BO during memory attach");
		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
		drm_exec_until_all_locked(&exec) {
			ret = amdgpu_vm_lock_pd(vm, &exec, 0);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto unwind;
			ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto unwind;
		}

		bo_va = amdgpu_vm_bo_find(vm, bo[i]);
		if (!bo_va)
			bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
		else
			++bo_va->ref_count;
		attachment[i]->bo_va = bo_va;
		amdgpu_bo_unreserve(bo[i]);
		drm_exec_fini(&exec);
		if (unlikely(!attachment[i]->bo_va)) {
			ret = -ENOMEM;
			pr_err("Failed to add BO object to VM. ret == %d\n",
+17 −5
Original line number Diff line number Diff line
@@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
	struct amdgpu_vm *vm = &fpriv->vm;
	struct amdgpu_bo_va *bo_va;
	struct mm_struct *mm;
	struct drm_exec exec;
	int r;

	mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
@@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
	    !amdgpu_vm_is_bo_always_valid(vm, abo))
		return -EPERM;

	r = amdgpu_bo_reserve(abo, false);
	if (r)
		return r;
	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
	drm_exec_until_all_locked(&exec) {
		r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1);
		drm_exec_retry_on_contention(&exec);
		if (unlikely(r))
			goto out_unlock;

		r = amdgpu_vm_lock_pd(vm, &exec, 0);
		drm_exec_retry_on_contention(&exec);
		if (unlikely(r))
			goto out_unlock;
	}

	amdgpu_vm_bo_update_shared(abo);
	bo_va = amdgpu_vm_bo_find(vm, abo);
@@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
		amdgpu_bo_unreserve(abo);
		return r;
	}

	amdgpu_bo_unreserve(abo);
	drm_exec_fini(&exec);

	/* Validate and add eviction fence to DMABuf imports with dynamic
	 * attachment in compute VMs. Re-validation will be done by
@@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
		}
	}
	mutex_unlock(&vm->process_info->lock);
	return r;

out_unlock:
	drm_exec_fini(&exec);
	return r;
}

+10 −0
Original line number Diff line number Diff line
@@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
{
	struct amdgpu_device *adev = drm_to_adev(dev);
	struct amdgpu_fpriv *fpriv;
	struct drm_exec exec;
	int r, pasid;

	/* Ensure IB tests are run on ring */
@@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
	if (r)
		goto error_pasid;

	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
	drm_exec_until_all_locked(&exec) {
		r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
		drm_exec_retry_on_contention(&exec);
		if (unlikely(r))
			goto error_vm;
	}

	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
	drm_exec_fini(&exec);
	if (!fpriv->prt_va) {
		r = -ENOMEM;
		goto error_vm;
+2 −0
Original line number Diff line number Diff line
@@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
{
	struct amdgpu_bo_va *bo_va;

	amdgpu_vm_assert_locked(vm);

	bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
	if (bo_va == NULL) {
		return NULL;