Commit 77f2ef3f authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Lock all gpuva ops during VM bind IOCTL



Lock all BOs used in gpuva ops and validate all BOs in a single step
during the VM bind IOCTL.

This help with the transition to making all gpuva ops in a VM bind IOCTL
a single atomic job which is required for proper error handling.

v2:
 - Better commit message (Oak)
 - s/op_lock/op_lock_and_prep, few other renames too (Oak)
 - Use DRM_EXEC_IGNORE_DUPLICATES flag in drm_exec_init (local testing)
 - Do not reserve slots in locking step (direction based on series from Thomas)
v3:
 - Validate BO if is immediate set (Oak)

Cc: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarOak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-2-matthew.brost@intel.com
parent 6a2a90cb
Loading
Loading
Loading
Loading
+108 −40
Original line number Diff line number Diff line
@@ -315,18 +315,22 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)

#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000

static void xe_vm_kill(struct xe_vm *vm)
static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
{
	struct xe_exec_queue *q;

	lockdep_assert_held(&vm->lock);

	if (unlocked)
		xe_vm_lock(vm, false);

	vm->flags |= XE_VM_FLAG_BANNED;
	trace_xe_vm_kill(vm);

	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
		q->ops->kill(q);

	if (unlocked)
		xe_vm_unlock(vm);

	/* TODO: Inform user the VM is banned */
@@ -557,7 +561,7 @@ static void preempt_rebind_work_func(struct work_struct *w)

	if (err) {
		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
		xe_vm_kill(vm);
		xe_vm_kill(vm, true);
	}
	up_write(&vm->lock);

@@ -1774,17 +1778,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue
		      u32 num_syncs, bool immediate, bool first_op,
		      bool last_op)
{
	int err;

	xe_vm_assert_held(vm);
	xe_bo_assert_held(bo);

	if (bo && immediate) {
		err = xe_bo_validate(bo, vm, true);
		if (err)
			return err;
	}

	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
			    last_op);
}
@@ -2437,17 +2433,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
	return 0;
}

static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
		      struct xe_vma *vma, struct xe_vma_op *op)
static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
		      struct xe_vma_op *op)
{
	int err;

	lockdep_assert_held_write(&vm->lock);

	err = xe_vm_lock_vma(exec, vma);
	if (err)
		return err;

	xe_vm_assert_held(vm);
	xe_bo_assert_held(xe_vma_bo(vma));

@@ -2528,19 +2520,10 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
			       struct xe_vma_op *op)
{
	struct drm_exec exec;
	int err;

retry_userptr:
	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
	drm_exec_until_all_locked(&exec) {
		err = op_execute(&exec, vm, vma, op);
		drm_exec_retry_on_contention(&exec);
		if (err)
			break;
	}
	drm_exec_fini(&exec);

	err = op_execute(vm, vma, op);
	if (err == -EAGAIN) {
		lockdep_assert_held_write(&vm->lock);

@@ -2705,14 +2688,95 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
	}
}

static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
				 bool validate)
{
	struct xe_bo *bo = xe_vma_bo(vma);
	int err = 0;

	if (bo) {
		if (!bo->vm)
			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 0);
		if (!err && validate)
			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
	}

	return err;
}

static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
			    struct xe_vma_op *op)
{
	int err = 0;

	switch (op->base.op) {
	case DRM_GPUVA_OP_MAP:
		err = vma_lock_and_validate(exec, op->map.vma,
					    !xe_vm_in_fault_mode(vm) ||
					    op->map.immediate);
		break;
	case DRM_GPUVA_OP_REMAP:
		err = vma_lock_and_validate(exec,
					    gpuva_to_vma(op->base.remap.unmap->va),
					    false);
		if (!err && op->remap.prev)
			err = vma_lock_and_validate(exec, op->remap.prev, true);
		if (!err && op->remap.next)
			err = vma_lock_and_validate(exec, op->remap.next, true);
		break;
	case DRM_GPUVA_OP_UNMAP:
		err = vma_lock_and_validate(exec,
					    gpuva_to_vma(op->base.unmap.va),
					    false);
		break;
	case DRM_GPUVA_OP_PREFETCH:
		err = vma_lock_and_validate(exec,
					    gpuva_to_vma(op->base.prefetch.va), true);
		break;
	default:
		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
	}

	return err;
}

static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
					   struct xe_vm *vm,
					   struct list_head *ops_list)
{
	struct xe_vma_op *op;
	int err;

	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 0);
	if (err)
		return err;

	list_for_each_entry(op, ops_list, link) {
		err = op_lock_and_prep(exec, vm, op);
		if (err)
			return err;
	}

	return 0;
}

static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
				     struct list_head *ops_list)
{
	struct drm_exec exec;
	struct xe_vma_op *op, *next;
	int err;

	lockdep_assert_held_write(&vm->lock);

	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
		      DRM_EXEC_IGNORE_DUPLICATES, 0);
	drm_exec_until_all_locked(&exec) {
		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, ops_list);
		drm_exec_retry_on_contention(&exec);
		if (err)
			goto unlock;

		list_for_each_entry_safe(op, next, ops_list, link) {
			err = xe_vma_op_execute(vm, op);
			if (err) {
@@ -2721,13 +2785,17 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
				/*
				 * FIXME: Killing VM rather than proper error handling
				 */
			xe_vm_kill(vm);
			return -ENOSPC;
				xe_vm_kill(vm, false);
				err = -ENOSPC;
				goto unlock;
			}
			xe_vma_op_cleanup(vm, op);
		}
	}

	return 0;
unlock:
	drm_exec_fini(&exec);
	return err;
}

#define SUPPORTED_FLAGS	\