Driver Changes:

- Increase global invalidation timeout to handle some workloads
    (Kenneth Graunke)
  - Fix NPD while evicting BOs in an array of VM binds (Matthew Brost)
  - Fix resizable BAR to account for possibly needing to move BARs other
    than the LMEMBAR (Lucas De Marchi)
  - Fix error handling in xe_migrate_init() (Thomas Hellström)
  - Fix atomic fault handling with mixed mappings or if the page is
    already in VRAM (Matthew Brost)
  - Enable media samplers power gating for platforms before Xe2 (Vinay
    Belgaumkar)
  - Fix de-registering exec queue from GuC when unbinding (Matthew Brost)
  - Ensure data migration to system if indicated by madvise with SVM
    (Thomas Hellström)
  - Fix kerneldoc for kunit change (Matt Roper)
  - Always account for cacheline alignment on migration (Matthew Auld)
  - Drop bogus assertion on eviction (Matthew Auld)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmjxG4kACgkQm6KlpjDL
 6lMVSxAAi89fShCW8/H7TJUDfaQdX1qTg574x+4kmsxAe5IdZLKR17iKagGwhXYt
 pGPOHLL6s13trhSDT9RHxrQ/iUhlUMAf3HGZyeC0/X86QuKA0qGbrXoJTdexaA/V
 AyaXmyPCh4CsDP7o/QNfkmaH9Ze3tYniYPxKmQXIsbJbG6hK8jgREpE3UC0ilveX
 9rgA8t66W08CbPsHX8bLEgpQ6dchSZHOvHSaXvW3X1xDIi9P5kd2A3JPW9q+T15M
 84xtbxan6JDZx+xguIKimlUti6ihTSksxkAV6nKyg0I3n56iLarf0HN5MDM6ZExU
 8uS1ZmocaKqLji51LroIL+0X31H4VnQZlT/eehheBukW8SF6/jXEnq2PtxNy01Yi
 NJTCcwvvA0jMhK02tc9gcpHgJcmjp08lbymlZ0QdEp4gIQn5dpXubhcvdNeOmUK9
 NJMD8aE+9JnQ6iD8GFVjvdTSHKMpKtsNl2kUShOU3oK1KNHAqn/v3r4iM8VbGBff
 TaCxusNeVqFCcWkh4R58ppKdKiwLzitjc0xP9kjNFtGDVtPS11fluxQ+BhhrzFKk
 84wnhG8Lry7Ss5TpCAWjirxQOANx/q4Nef7uby6QAF9SLuon7Q2XU7ShLOlWIeTH
 AmtX57A8TxTrXa0Smn0rIP7/sYAdGfWDAdTDdJjAoJ36w8T2rgo=
 =BqxC
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-fixes-2025-10-16' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
 - Increase global invalidation timeout to handle some workloads
   (Kenneth Graunke)
 - Fix NPD while evicting BOs in an array of VM binds (Matthew Brost)
 - Fix resizable BAR to account for possibly needing to move BARs other
   than the LMEMBAR (Lucas De Marchi)
 - Fix error handling in xe_migrate_init() (Thomas Hellström)
 - Fix atomic fault handling with mixed mappings or if the page is
   already in VRAM (Matthew Brost)
 - Enable media samplers power gating for platforms before Xe2 (Vinay
   Belgaumkar)
 - Fix de-registering exec queue from GuC when unbinding (Matthew Brost)
 - Ensure data migration to system if indicated by madvise with SVM
   (Thomas Hellström)
 - Fix kerneldoc for kunit change (Matt Roper)
 - Always account for cacheline alignment on migration (Matthew Auld)
 - Drop bogus assertion on eviction (Matthew Auld)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/rch735eqkmprfyutk3ux2fsqa3e5ve4p77w7a5j66qdpgyquxr@ao3wzcqtpn6s
This commit is contained in:
Dave Airlie 2025-10-17 09:39:34 +10:00
commit 62cab426d0
13 changed files with 99 additions and 31 deletions

View File

@ -342,6 +342,7 @@
#define POWERGATE_ENABLE XE_REG(0xa210)
#define RENDER_POWERGATE_ENABLE REG_BIT(0)
#define MEDIA_POWERGATE_ENABLE REG_BIT(1)
#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2)
#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n))
#define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n))

View File

@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
/**
* xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
* @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
/**
* xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters
* @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
/**
* xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters
* @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
/**
* xe_pci_id_gen_param - Generate struct pci_device_id parameters
* @test: test context object
* @prev: the pointer to the previous parameter to iterate from or NULL
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
/**
* xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
* @test: test context object
* @prev: the previously returned value, or NULL for the first iteration
* @desc: the buffer for a parameter name
*

View File

@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe)
static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
int ret;
ret = xe_bo_restore_pinned(bo);
@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
}
}
/*
* We expect validate to trigger a move VRAM and our move code
* should setup the iosys map.
*/
xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
!iosys_map_is_null(&bo->vmap));
return 0;
}

View File

@ -1070,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe)
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);

View File

@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
if (xe_gt_is_main_type(gt))
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE;
if (xe->info.platform != XE_DG1) {
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
str_up_down(pg_status & media_slices[n].status_bit));
}
if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
drm_printf(p, "Media Samplers Power Gating Enabled: %s\n",
str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE));
return 0;
}

View File

@ -44,6 +44,7 @@
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_trace.h"
#include "xe_uc_fw.h"
#include "xe_vm.h"
static struct xe_guc *
@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
trace_xe_exec_queue_cleanup_entity(q);
if (exec_queue_registered(q))
/*
* Expected state transitions for cleanup:
* - If the exec queue is registered and GuC firmware is running, we must first
* disable scheduling and deregister the queue to ensure proper teardown and
* resource release in the GuC, then destroy the exec queue on driver side.
* - If the GuC is already stopped (e.g., during driver unload or GPU reset),
* we cannot expect a response for the deregister request. In this case,
* it is safe to directly destroy the exec queue on driver side, as the GuC
* will not process further requests and all resources must be cleaned up locally.
*/
if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
disable_scheduling_deregister(guc, q);
else
__guc_exec_queue_destroy(guc, q);

View File

@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m)
err = xe_migrate_lock_prepare_vm(tile, m, vm);
if (err)
return err;
goto err_out;
if (xe->info.has_usm) {
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
if (current_bytes & ~PAGE_MASK) {
int pitch = 4;
current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
current_bytes = min_t(int, current_bytes,
round_down(S16_MAX * pitch,
XE_CACHELINE_BYTES));
}
__fence = xe_migrate_vram(m, current_bytes,

View File

@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
xe_vram_resize_bar(xe);
err = xe_device_probe_early(xe);
/*
* In Boot Survivability mode, no drm card is exposed and driver

View File

@ -1034,6 +1034,9 @@ retry:
if (err)
return err;
dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (!dpagemap && !ctx.devmem_only)
ctx.device_private_page_owner = NULL;
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
if (IS_ERR(range))
@ -1054,7 +1057,6 @@ retry:
range_debug(range, "PAGE FAULT");
dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
ktime_t migrate_start = xe_svm_stats_ktime_get();
@ -1073,7 +1075,17 @@ retry:
drm_dbg(&vm->xe->drm,
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
goto retry;
/*
* In the devmem-only case, mixed mappings may
* be found. The get_pages function will fix
* these up to a single location, allowing the
* page fault handler to make forward progress.
*/
if (ctx.devmem_only)
goto get_pages;
else
goto retry;
} else {
drm_err(&vm->xe->drm,
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
@ -1083,6 +1095,7 @@ retry:
}
}
get_pages:
get_pages_start = xe_svm_stats_ktime_get();
range_debug(range, "GET PAGES");

View File

@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
}
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
bool validate)
bool res_evict, bool validate)
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
err = xe_bo_validate(bo, vm,
!xe_vm_in_preempt_fence_mode(vm), exec);
!xe_vm_in_preempt_fence_mode(vm) &&
res_evict, exec);
}
return err;
@ -2913,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
}
static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
struct xe_vma_op *op)
struct xe_vma_ops *vops, struct xe_vma_op *op)
{
int err = 0;
bool res_evict;
/*
* We only allow evicting a BO within the VM if it is not part of an
* array of binds, as an array of binds can evict another BO within the
* bind.
*/
res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if (!op->map.invalidate_on_bind)
err = vma_lock_and_validate(exec, op->map.vma,
res_evict,
!xe_vm_in_fault_mode(vm) ||
op->map.immediate);
break;
@ -2931,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.remap.unmap->va),
false);
res_evict, false);
if (!err && op->remap.prev)
err = vma_lock_and_validate(exec, op->remap.prev, true);
err = vma_lock_and_validate(exec, op->remap.prev,
res_evict, true);
if (!err && op->remap.next)
err = vma_lock_and_validate(exec, op->remap.next, true);
err = vma_lock_and_validate(exec, op->remap.next,
res_evict, true);
break;
case DRM_GPUVA_OP_UNMAP:
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@ -2944,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.unmap.va),
false);
res_evict, false);
break;
case DRM_GPUVA_OP_PREFETCH:
{
@ -2959,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
false);
res_evict, false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
region_to_mem_type[region],
@ -3005,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
return err;
list_for_each_entry(op, &vops->list, link) {
err = op_lock_and_prep(exec, vm, op);
err = op_lock_and_prep(exec, vm, vops, op);
if (err)
return err;
}
@ -3638,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
if (args->num_binds > 1)
vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
for (i = 0; i < args->num_binds; ++i) {
u64 range = bind_ops[i].range;
u64 addr = bind_ops[i].addr;

View File

@ -476,6 +476,7 @@ struct xe_vma_ops {
/** @flag: signify the properties within xe_vma_ops*/
#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */

View File

@ -26,15 +26,35 @@
#define BAR_SIZE_SHIFT 20
static void
_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
/*
* Release all the BARs that could influence/block LMEMBAR resizing, i.e.
* assigned IORESOURCE_MEM_64 BARs
*/
static void release_bars(struct pci_dev *pdev)
{
struct resource *res;
int i;
pci_dev_for_each_resource(pdev, res, i) {
/* Resource already un-assigned, do not reset it */
if (!res->parent)
continue;
/* No need to release unrelated BARs */
if (!(res->flags & IORESOURCE_MEM_64))
continue;
pci_release_resource(pdev, i);
}
}
static void resize_bar(struct xe_device *xe, int resno, resource_size_t size)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int bar_size = pci_rebar_bytes_to_size(size);
int ret;
if (pci_resource_len(pdev, resno))
pci_release_resource(pdev, resno);
release_bars(pdev);
ret = pci_resize_resource(pdev, resno, bar_size);
if (ret) {
@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
* if force_vram_bar_size is set, attempt to set to the requested size
* else set to maximum possible size
*/
static void resize_vram_bar(struct xe_device *xe)
void xe_vram_resize_bar(struct xe_device *xe)
{
int force_vram_bar_size = xe_modparam.force_vram_bar_size;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe)
pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
_resize_bar(xe, LMEM_BAR, rebar_size);
resize_bar(xe, LMEM_BAR, rebar_size);
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *
return -ENXIO;
}
resize_vram_bar(xe);
lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR);
lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR);
if (!lmem_bar->io_size)

View File

@ -11,6 +11,7 @@
struct xe_device;
struct xe_vram_region;
void xe_vram_resize_bar(struct xe_device *xe);
int xe_vram_probe(struct xe_device *xe);
struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);