Commit 9924db4a authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-xe-next-2025-04-28-1' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next



Core Changes:
- Add drm_coredump_printer_is_full() (Matt Brost)

Driver Changes:
- Do not queue unneeded terminations from debugfs (Daniele)
- Fix out-of-bound while enabling engine activity stats (Michal)
- Use GT oriented message to report engine activity error (Michal)
- Some fault-injection additions (Satyanarayana)
- Fix an error pointer dereference (Harshit)
- Fix capture of steering registers (John)
- Use the steering flag when printing registers (John)
- Cache DSS info when creating capture register list (John)
- Backup VRAM in PM notifier instead of in the suspend / freeze
  callbacks (Matt Auld)
- Fix CFI violation when accessing sysfs files (Jeevaka)
- Fix kernel version docs for temperature and fan speed (Lucas)
- Add devcoredump chunking (Matt Brost)
- Update xe_ttm_access_memory to use GPU for non-visible access
  (Matt Brost)
- Abort printing coredump in VM printer output if full (Matt Brost)
- Resolve a possible circular locking dependency (Harish)
- Don't support EU stall on SRIOV VF (Harish)
- Drop force_alloc from xe_bo_evict in selftests (Matt Brost)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://lore.kernel.org/r/aA-mvTb6s909V8hu@fedora
parents d2b9e2f8 1bb53d05
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input
Date:		March 2025
KernelVersion:	6.14
KernelVersion:	6.15
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Package temperature in millidegree Celsius.

@@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input
Date:		March 2025
KernelVersion:	6.14
KernelVersion:	6.15
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. VRAM temperature in millidegree Celsius.

@@ -127,7 +127,7 @@ Description: RO. VRAM temperature in millidegree Celsius.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
Date:		March 2025
KernelVersion:	6.14
KernelVersion:	6.16
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Fan 1 speed in RPM.

@@ -135,7 +135,7 @@ Description: RO. Fan 1 speed in RPM.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan2_input
Date:		March 2025
KernelVersion:	6.14
KernelVersion:	6.16
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Fan 2 speed in RPM.

@@ -143,7 +143,7 @@ Description: RO. Fan 2 speed in RPM.

What:		/sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan3_input
Date:		March 2025
KernelVersion:	6.14
KernelVersion:	6.16
Contact:	intel-xe@lists.freedesktop.org
Description:	RO. Fan 3 speed in RPM.

+1 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
	}

	/* Evict to system. CCS data should be copied. */
	ret = xe_bo_evict(bo, true);
	ret = xe_bo_evict(bo);
	if (ret) {
		KUNIT_FAIL(test, "Failed to evict bo.\n");
		return ret;
+1 −1
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
	 * the exporter and the importer should be the same bo.
	 */
	swap(exported->ttm.base.dma_buf, dmabuf);
	ret = xe_bo_evict(exported, true);
	ret = xe_bo_evict(exported);
	swap(exported->ttm.base.dma_buf, dmabuf);
	if (ret) {
		if (ret != -EINTR && ret != -ERESTARTSYS)
+1 −1
Original line number Diff line number Diff line
@@ -509,7 +509,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
	dma_fence_put(fence);

	kunit_info(test, "Evict vram buffer object\n");
	ret = xe_bo_evict(vram_bo, true);
	ret = xe_bo_evict(vram_bo);
	if (ret) {
		KUNIT_FAIL(test, "Failed to evict bo.\n");
		return;
+124 −28
Original line number Diff line number Diff line
@@ -1084,6 +1084,80 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
	return lret;
}

/**
 * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
 * up in system memory.
 * @bo: The buffer object to prepare.
 *
 * On successful completion, the object backup pages are allocated. Expectation
 * is that this is called from the PM notifier, prior to suspend/hibernation.
 *
 * Return: 0 on success. Negative error code on failure.
 */
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
{
	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
	struct xe_bo *backup;
	int ret = 0;

	xe_bo_lock(bo, false);

	xe_assert(xe, !bo->backup_obj);

	/*
	 * Since this is called from the PM notifier we might have raced with
	 * someone unpinning this after we dropped the pinned list lock and
	 * grabbing the above bo lock.
	 */
	if (!xe_bo_is_pinned(bo))
		goto out_unlock_bo;

	if (!xe_bo_is_vram(bo))
		goto out_unlock_bo;

	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
		goto out_unlock_bo;

	backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
					DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
					XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
					XE_BO_FLAG_PINNED);
	if (IS_ERR(backup)) {
		ret = PTR_ERR(backup);
		goto out_unlock_bo;
	}

	backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
	ttm_bo_pin(&backup->ttm);
	bo->backup_obj = backup;

out_unlock_bo:
	xe_bo_unlock(bo);
	return ret;
}

/**
 * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
 * @bo: The buffer object to undo the prepare for.
 *
 * Always returns 0. The backup object is removed, if still present. Expectation
 * it that this called from the PM notifier when undoing the prepare step.
 *
 * Return: Always returns 0.
 */
int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
{
	xe_bo_lock(bo, false);
	if (bo->backup_obj) {
		ttm_bo_unpin(&bo->backup_obj->ttm);
		xe_bo_put(bo->backup_obj);
		bo->backup_obj = NULL;
	}
	xe_bo_unlock(bo);

	return 0;
}

/**
 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
 * @bo: The buffer object to move.
@@ -1098,7 +1172,8 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
int xe_bo_evict_pinned(struct xe_bo *bo)
{
	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
	struct xe_bo *backup;
	struct xe_bo *backup = bo->backup_obj;
	bool backup_created = false;
	bool unmap = false;
	int ret = 0;

@@ -1120,13 +1195,18 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
		goto out_unlock_bo;

	backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel,
	if (!backup) {
		backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
						DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
						XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
						XE_BO_FLAG_PINNED);
		if (IS_ERR(backup)) {
			ret = PTR_ERR(backup);
			goto out_unlock_bo;
		}
		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
		backup_created = true;
	}

	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
		struct xe_migrate *migrate;
@@ -1173,12 +1253,12 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
				   bo->size);
	}

	if (!bo->backup_obj)
		bo->backup_obj = backup;

out_backup:
	xe_bo_vunmap(backup);
	xe_bo_unlock(backup);
	if (ret)
	if (ret && backup_created)
		xe_bo_put(backup);
out_unlock_bo:
	if (unmap)
@@ -1212,15 +1292,12 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
	if (!backup)
		return 0;

	xe_bo_lock(backup, false);
	xe_bo_lock(bo, false);

	if (!xe_bo_is_pinned(backup)) {
		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
		if (ret)
		goto out_backup;

	if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) {
		ret = -EBUSY;
		goto out_backup;
			goto out_unlock_bo;
	}

	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
@@ -1261,7 +1338,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
		if (iosys_map_is_null(&bo->vmap)) {
			ret = xe_bo_vmap(bo);
			if (ret)
				goto out_unlock_bo;
				goto out_backup;
			unmap = true;
		}

@@ -1271,15 +1348,17 @@ int xe_bo_restore_pinned(struct xe_bo *bo)

	bo->backup_obj = NULL;

out_backup:
	xe_bo_vunmap(backup);
	if (!bo->backup_obj) {
		if (xe_bo_is_pinned(backup))
			ttm_bo_unpin(&backup->ttm);
		xe_bo_put(backup);
	}
out_unlock_bo:
	if (unmap)
		xe_bo_vunmap(bo);
	xe_bo_unlock(bo);
out_backup:
	xe_bo_vunmap(backup);
	xe_bo_unlock(backup);
	if (!bo->backup_obj)
		xe_bo_put(backup);
	return ret;
}

@@ -1455,6 +1534,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
	struct xe_res_cursor cursor;
	struct xe_vram_region *vram;
	int bytes_left = len;
	int err = 0;

	xe_bo_assert_held(bo);
	xe_device_assert_mem_access(xe);
@@ -1462,9 +1542,14 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
		return -EIO;

	/* FIXME: Use GPU for non-visible VRAM */
	if (!xe_ttm_resource_visible(ttm_bo->resource))
		return -EIO;
	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
		struct xe_migrate *migrate =
			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);

		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
					       write);
		goto out;
	}

	vram = res_to_mem_region(ttm_bo->resource);
	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
@@ -1488,7 +1573,8 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
			xe_res_next(&cursor, PAGE_SIZE);
	} while (bytes_left);

	return len;
out:
	return err ?: len;
}

const struct ttm_device_funcs xe_ttm_funcs = {
@@ -1532,6 +1618,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
	if (bo->vm && xe_bo_is_user(bo))
		xe_vm_put(bo->vm);

	if (bo->parent_obj)
		xe_bo_put(bo->parent_obj);

	mutex_lock(&xe->mem_access.vram_userfault.lock);
	if (!list_empty(&bo->vram_userfault_link))
		list_del(&bo->vram_userfault_link);
@@ -2306,6 +2395,13 @@ void xe_bo_unpin(struct xe_bo *bo)
		xe_assert(xe, !list_empty(&bo->pinned_link));
		list_del_init(&bo->pinned_link);
		spin_unlock(&xe->pinned.lock);

		if (bo->backup_obj) {
			if (xe_bo_is_pinned(bo->backup_obj))
				ttm_bo_unpin(&bo->backup_obj->ttm);
			xe_bo_put(bo->backup_obj);
			bo->backup_obj = NULL;
		}
	}
	ttm_bo_unpin(&bo->ttm);
	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
Loading