Files
linux-net/drivers/gpu/drm/xe/xe_guc_submit.c
Matthew Brost 8ae8a2e8dd drm/xe: Long running job update
For long running (LR) jobs with the DRM scheduler we must return NULL in
run_job which results in signaling the job's finished fence immediately.
This prevents LR jobs from creating infinite dma-fences.

Signaling job's finished fence immediately breaks flow controlling ring
with the DRM scheduler. To work around this, the ring is flow controlled
and written in the exec IOCTL. Signaling job's finished fence
immediately also breaks the TDR which is used in reset / cleanup entity
paths so write a new path for LR entities.

v2: Better commit, white space, remove rmb(), better comment next to
emit_job()
v3 (Thomas): Change LR reference counting, fix working in commit

Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2023-12-21 11:34:44 -05:00

1908 lines
49 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_guc_submit.h"
#include <linux/bitfield.h>
#include <linux/bitmap.h>
#include <linux/circ_buf.h>
#include <linux/delay.h>
#include <linux/dma-fence-array.h>
#include <drm/drm_managed.h>
#include "regs/xe_lrc_layout.h"
#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_engine.h"
#include "xe_force_wake.h"
#include "xe_gpu_scheduler.h"
#include "xe_gt.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_engine_types.h"
#include "xe_guc_submit_types.h"
#include "xe_hw_engine.h"
#include "xe_hw_fence.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_map.h"
#include "xe_mocs.h"
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_trace.h"
#include "xe_vm.h"
static struct xe_gt *
guc_to_gt(struct xe_guc *guc)
{
return container_of(guc, struct xe_gt, uc.guc);
}
static struct xe_device *
guc_to_xe(struct xe_guc *guc)
{
return gt_to_xe(guc_to_gt(guc));
}
static struct xe_guc *
engine_to_guc(struct xe_engine *e)
{
return &e->gt->uc.guc;
}
/*
* Helpers for engine state, using an atomic as some of the bits can transition
* as the same time (e.g. a suspend can be happning at the same time as schedule
* engine done being processed).
*/
#define ENGINE_STATE_REGISTERED (1 << 0)
#define ENGINE_STATE_ENABLED (1 << 1)
#define ENGINE_STATE_PENDING_ENABLE (1 << 2)
#define ENGINE_STATE_PENDING_DISABLE (1 << 3)
#define ENGINE_STATE_DESTROYED (1 << 4)
#define ENGINE_STATE_SUSPENDED (1 << 5)
#define ENGINE_STATE_RESET (1 << 6)
#define ENGINE_STATE_KILLED (1 << 7)
static bool engine_registered(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_REGISTERED;
}
static void set_engine_registered(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_REGISTERED, &e->guc->state);
}
static void clear_engine_registered(struct xe_engine *e)
{
atomic_and(~ENGINE_STATE_REGISTERED, &e->guc->state);
}
static bool engine_enabled(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_ENABLED;
}
static void set_engine_enabled(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_ENABLED, &e->guc->state);
}
static void clear_engine_enabled(struct xe_engine *e)
{
atomic_and(~ENGINE_STATE_ENABLED, &e->guc->state);
}
static bool engine_pending_enable(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_ENABLE;
}
static void set_engine_pending_enable(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
}
static void clear_engine_pending_enable(struct xe_engine *e)
{
atomic_and(~ENGINE_STATE_PENDING_ENABLE, &e->guc->state);
}
static bool engine_pending_disable(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_PENDING_DISABLE;
}
static void set_engine_pending_disable(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
}
static void clear_engine_pending_disable(struct xe_engine *e)
{
atomic_and(~ENGINE_STATE_PENDING_DISABLE, &e->guc->state);
}
static bool engine_destroyed(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_DESTROYED;
}
static void set_engine_destroyed(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_DESTROYED, &e->guc->state);
}
static bool engine_banned(struct xe_engine *e)
{
return (e->flags & ENGINE_FLAG_BANNED);
}
static void set_engine_banned(struct xe_engine *e)
{
e->flags |= ENGINE_FLAG_BANNED;
}
static bool engine_suspended(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_SUSPENDED;
}
static void set_engine_suspended(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_SUSPENDED, &e->guc->state);
}
static void clear_engine_suspended(struct xe_engine *e)
{
atomic_and(~ENGINE_STATE_SUSPENDED, &e->guc->state);
}
static bool engine_reset(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_RESET;
}
static void set_engine_reset(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_RESET, &e->guc->state);
}
static bool engine_killed(struct xe_engine *e)
{
return atomic_read(&e->guc->state) & ENGINE_STATE_KILLED;
}
static void set_engine_killed(struct xe_engine *e)
{
atomic_or(ENGINE_STATE_KILLED, &e->guc->state);
}
static bool engine_killed_or_banned(struct xe_engine *e)
{
return engine_killed(e) || engine_banned(e);
}
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
xa_destroy(&guc->submission_state.engine_lookup);
ida_destroy(&guc->submission_state.guc_ids);
bitmap_free(guc->submission_state.guc_ids_bitmap);
}
#define GUC_ID_MAX 65535
#define GUC_ID_NUMBER_MLRC 4096
#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC
static const struct xe_engine_ops guc_engine_ops;
static void primelockdep(struct xe_guc *guc)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
return;
fs_reclaim_acquire(GFP_KERNEL);
mutex_lock(&guc->submission_state.lock);
might_lock(&guc->submission_state.suspend.lock);
mutex_unlock(&guc->submission_state.lock);
fs_reclaim_release(GFP_KERNEL);
}
int xe_guc_submit_init(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
int err;
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
gt->engine_ops = &guc_engine_ops;
mutex_init(&guc->submission_state.lock);
xa_init(&guc->submission_state.engine_lookup);
ida_init(&guc->submission_state.guc_ids);
spin_lock_init(&guc->submission_state.suspend.lock);
guc->submission_state.suspend.context = dma_fence_context_alloc(1);
primelockdep(guc);
err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
if (err)
return err;
return 0;
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_engine *e)
{
int ret;
void *ptr;
/*
* Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
* worse case user gets -ENOMEM on engine create and has to try again.
*
* FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
* failure.
*/
lockdep_assert_held(&guc->submission_state.lock);
if (xe_engine_is_parallel(e)) {
void *bitmap = guc->submission_state.guc_ids_bitmap;
ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
order_base_2(e->width));
} else {
ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
}
if (ret < 0)
return ret;
e->guc->id = ret;
if (xe_engine_is_parallel(e))
e->guc->id += GUC_ID_START_MLRC;
ptr = xa_store(&guc->submission_state.engine_lookup,
e->guc->id, e, GFP_NOWAIT);
if (IS_ERR(ptr)) {
ret = PTR_ERR(ptr);
goto err_release;
}
return 0;
err_release:
ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
return ret;
}
static void release_guc_id(struct xe_guc *guc, struct xe_engine *e)
{
mutex_lock(&guc->submission_state.lock);
xa_erase(&guc->submission_state.engine_lookup, e->guc->id);
if (xe_engine_is_parallel(e))
bitmap_release_region(guc->submission_state.guc_ids_bitmap,
e->guc->id - GUC_ID_START_MLRC,
order_base_2(e->width));
else
ida_simple_remove(&guc->submission_state.guc_ids, e->guc->id);
mutex_unlock(&guc->submission_state.lock);
}
struct engine_policy {
u32 count;
struct guc_update_engine_policy h2g;
};
static u32 __guc_engine_policy_action_size(struct engine_policy *policy)
{
size_t bytes = sizeof(policy->h2g.header) +
(sizeof(policy->h2g.klv[0]) * policy->count);
return bytes / sizeof(u32);
}
static void __guc_engine_policy_start_klv(struct engine_policy *policy,
u16 guc_id)
{
policy->h2g.header.action =
XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
policy->h2g.header.guc_id = guc_id;
policy->count = 0;
}
#define MAKE_ENGINE_POLICY_ADD(func, id) \
static void __guc_engine_policy_add_##func(struct engine_policy *policy, \
u32 data) \
{ \
XE_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
\
policy->h2g.klv[policy->count].kl = \
FIELD_PREP(GUC_KLV_0_KEY, \
GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
FIELD_PREP(GUC_KLV_0_LEN, 1); \
policy->h2g.klv[policy->count].value = data; \
policy->count++; \
}
MAKE_ENGINE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
MAKE_ENGINE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
MAKE_ENGINE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
#undef MAKE_ENGINE_POLICY_ADD
static const int xe_engine_prio_to_guc[] = {
[XE_ENGINE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
[XE_ENGINE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
[XE_ENGINE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
[XE_ENGINE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
};
static void init_policies(struct xe_guc *guc, struct xe_engine *e)
{
struct engine_policy policy;
enum xe_engine_priority prio = e->priority;
u32 timeslice_us = e->sched_props.timeslice_us;
u32 preempt_timeout_us = e->sched_props.preempt_timeout_us;
XE_BUG_ON(!engine_registered(e));
__guc_engine_policy_start_klv(&policy, e->guc->id);
__guc_engine_policy_add_priority(&policy, xe_engine_prio_to_guc[prio]);
__guc_engine_policy_add_execution_quantum(&policy, timeslice_us);
__guc_engine_policy_add_preemption_timeout(&policy, preempt_timeout_us);
xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
__guc_engine_policy_action_size(&policy), 0, 0);
}
static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
{
struct engine_policy policy;
__guc_engine_policy_start_klv(&policy, e->guc->id);
__guc_engine_policy_add_preemption_timeout(&policy, 1);
xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
__guc_engine_policy_action_size(&policy), 0, 0);
}
#define parallel_read(xe_, map_, field_) \
xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_)
#define parallel_write(xe_, map_, field_, val_) \
xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
field_, val_)
static void __register_mlrc_engine(struct xe_guc *guc,
struct xe_engine *e,
struct guc_ctxt_registration_info *info)
{
#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
u32 action[MAX_MLRC_REG_SIZE];
int len = 0;
int i;
XE_BUG_ON(!xe_engine_is_parallel(e));
action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
action[len++] = info->flags;
action[len++] = info->context_idx;
action[len++] = info->engine_class;
action[len++] = info->engine_submit_mask;
action[len++] = info->wq_desc_lo;
action[len++] = info->wq_desc_hi;
action[len++] = info->wq_base_lo;
action[len++] = info->wq_base_hi;
action[len++] = info->wq_size;
action[len++] = e->width;
action[len++] = info->hwlrca_lo;
action[len++] = info->hwlrca_hi;
for (i = 1; i < e->width; ++i) {
struct xe_lrc *lrc = e->lrc + i;
action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
}
XE_BUG_ON(len > MAX_MLRC_REG_SIZE);
#undef MAX_MLRC_REG_SIZE
xe_guc_ct_send(&guc->ct, action, len, 0, 0);
}
static void __register_engine(struct xe_guc *guc,
struct guc_ctxt_registration_info *info)
{
u32 action[] = {
XE_GUC_ACTION_REGISTER_CONTEXT,
info->flags,
info->context_idx,
info->engine_class,
info->engine_submit_mask,
info->wq_desc_lo,
info->wq_desc_hi,
info->wq_base_lo,
info->wq_base_hi,
info->wq_size,
info->hwlrca_lo,
info->hwlrca_hi,
};
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
}
static void register_engine(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct xe_lrc *lrc = e->lrc;
struct guc_ctxt_registration_info info;
XE_BUG_ON(engine_registered(e));
memset(&info, 0, sizeof(info));
info.context_idx = e->guc->id;
info.engine_class = xe_engine_class_to_guc_class(e->class);
info.engine_submit_mask = e->logical_mask;
info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
if (xe_engine_is_parallel(e)) {
u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
struct iosys_map map = xe_lrc_parallel_map(lrc);
info.wq_desc_lo = lower_32_bits(ggtt_addr +
offsetof(struct guc_submit_parallel_scratch, wq_desc));
info.wq_desc_hi = upper_32_bits(ggtt_addr +
offsetof(struct guc_submit_parallel_scratch, wq_desc));
info.wq_base_lo = lower_32_bits(ggtt_addr +
offsetof(struct guc_submit_parallel_scratch, wq[0]));
info.wq_base_hi = upper_32_bits(ggtt_addr +
offsetof(struct guc_submit_parallel_scratch, wq[0]));
info.wq_size = WQ_SIZE;
e->guc->wqi_head = 0;
e->guc->wqi_tail = 0;
xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
}
/*
* We must keep a reference for LR engines if engine is registered with
* the GuC as jobs signal immediately and can't destroy an engine if the
* GuC has a reference to it.
*/
if (xe_engine_is_lr(e))
xe_engine_get(e);
set_engine_registered(e);
trace_xe_engine_register(e);
if (xe_engine_is_parallel(e))
__register_mlrc_engine(guc, e, &info);
else
__register_engine(guc, &info);
init_policies(guc, e);
}
static u32 wq_space_until_wrap(struct xe_engine *e)
{
return (WQ_SIZE - e->guc->wqi_tail);
}
static int wq_wait_for_space(struct xe_engine *e, u32 wqi_size)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct iosys_map map = xe_lrc_parallel_map(e->lrc);
unsigned int sleep_period_ms = 1;
#define AVAILABLE_SPACE \
CIRC_SPACE(e->guc->wqi_tail, e->guc->wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
try_again:
e->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
if (wqi_size > AVAILABLE_SPACE) {
if (sleep_period_ms == 1024) {
xe_gt_reset_async(e->gt);
return -ENODEV;
}
msleep(sleep_period_ms);
sleep_period_ms <<= 1;
goto try_again;
}
}
#undef AVAILABLE_SPACE
return 0;
}
static int wq_noop_append(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct iosys_map map = xe_lrc_parallel_map(e->lrc);
u32 len_dw = wq_space_until_wrap(e) / sizeof(u32) - 1;
if (wq_wait_for_space(e, wq_space_until_wrap(e)))
return -ENODEV;
XE_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw));
parallel_write(xe, map, wq[e->guc->wqi_tail / sizeof(u32)],
FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
FIELD_PREP(WQ_LEN_MASK, len_dw));
e->guc->wqi_tail = 0;
return 0;
}
static void wq_item_append(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct iosys_map map = xe_lrc_parallel_map(e->lrc);
u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + 3];
u32 wqi_size = (e->width + 3) * sizeof(u32);
u32 len_dw = (wqi_size / sizeof(u32)) - 1;
int i = 0, j;
if (wqi_size > wq_space_until_wrap(e)) {
if (wq_noop_append(e))
return;
}
if (wq_wait_for_space(e, wqi_size))
return;
wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
FIELD_PREP(WQ_LEN_MASK, len_dw);
wqi[i++] = xe_lrc_descriptor(e->lrc);
wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, e->guc->id) |
FIELD_PREP(WQ_RING_TAIL_MASK, e->lrc->ring.tail / sizeof(u64));
wqi[i++] = 0;
for (j = 1; j < e->width; ++j) {
struct xe_lrc *lrc = e->lrc + j;
wqi[i++] = lrc->ring.tail / sizeof(u64);
}
XE_BUG_ON(i != wqi_size / sizeof(u32));
iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
wq[e->guc->wqi_tail / sizeof(u32)]));
xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
e->guc->wqi_tail += wqi_size;
XE_BUG_ON(e->guc->wqi_tail > WQ_SIZE);
xe_device_wmb(xe);
map = xe_lrc_parallel_map(e->lrc);
parallel_write(xe, map, wq_desc.tail, e->guc->wqi_tail);
}
#define RESUME_PENDING ~0x0ull
static void submit_engine(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_lrc *lrc = e->lrc;
u32 action[3];
u32 g2h_len = 0;
u32 num_g2h = 0;
int len = 0;
bool extra_submit = false;
XE_BUG_ON(!engine_registered(e));
if (xe_engine_is_parallel(e))
wq_item_append(e);
else
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
if (engine_suspended(e) && !xe_engine_is_parallel(e))
return;
if (!engine_enabled(e) && !engine_suspended(e)) {
action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
action[len++] = e->guc->id;
action[len++] = GUC_CONTEXT_ENABLE;
g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
num_g2h = 1;
if (xe_engine_is_parallel(e))
extra_submit = true;
e->guc->resume_time = RESUME_PENDING;
set_engine_pending_enable(e);
set_engine_enabled(e);
trace_xe_engine_scheduling_enable(e);
} else {
action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
action[len++] = e->guc->id;
trace_xe_engine_submit(e);
}
xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
if (extra_submit) {
len = 0;
action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
action[len++] = e->guc->id;
trace_xe_engine_submit(e);
xe_guc_ct_send(&guc->ct, action, len, 0, 0);
}
}
static struct dma_fence *
guc_engine_run_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_engine *e = job->engine;
bool lr = xe_engine_is_lr(e);
XE_BUG_ON((engine_destroyed(e) || engine_pending_disable(e)) &&
!engine_banned(e) && !engine_suspended(e));
trace_xe_sched_job_run(job);
if (!engine_killed_or_banned(e) && !xe_sched_job_is_error(job)) {
if (!engine_registered(e))
register_engine(e);
if (!lr) /* LR jobs are emitted in the exec IOCTL */
e->ring_ops->emit_job(job);
submit_engine(e);
}
if (lr) {
xe_sched_job_set_error(job, -EOPNOTSUPP);
return NULL;
} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
return job->fence;
} else {
return dma_fence_get(job->fence);
}
}
static void guc_engine_free_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
trace_xe_sched_job_free(job);
xe_sched_job_put(job);
}
static int guc_read_stopped(struct xe_guc *guc)
{
return atomic_read(&guc->submission_state.stopped);
}
#define MAKE_SCHED_CONTEXT_ACTION(e, enable_disable) \
u32 action[] = { \
XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
e->guc->id, \
GUC_CONTEXT_##enable_disable, \
}
static void disable_scheduling_deregister(struct xe_guc *guc,
struct xe_engine *e)
{
MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
int ret;
set_min_preemption_timeout(guc, e);
smp_rmb();
ret = wait_event_timeout(guc->ct.wq, !engine_pending_enable(e) ||
guc_read_stopped(guc), HZ * 5);
if (!ret) {
struct xe_gpu_scheduler *sched = &e->guc->sched;
XE_WARN_ON("Pending enable failed to respond");
xe_sched_submission_start(sched);
xe_gt_reset_async(e->gt);
xe_sched_tdr_queue_imm(sched);
return;
}
clear_engine_enabled(e);
set_engine_pending_disable(e);
set_engine_destroyed(e);
trace_xe_engine_scheduling_disable(e);
/*
* Reserve space for both G2H here as the 2nd G2H is sent from a G2H
* handler and we are not allowed to reserved G2H space in handlers.
*/
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
}
static void guc_engine_print(struct xe_engine *e, struct drm_printer *p);
#if IS_ENABLED(CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE)
static void simple_error_capture(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct drm_printer p = drm_err_printer("");
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
u32 adj_logical_mask = e->logical_mask;
u32 width_mask = (0x1 << e->width) - 1;
int i;
bool cookie;
if (e->vm && !e->vm->error_capture.capture_once) {
e->vm->error_capture.capture_once = true;
cookie = dma_fence_begin_signalling();
for (i = 0; e->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
if (adj_logical_mask & BIT(i)) {
adj_logical_mask |= width_mask << i;
i += e->width;
} else {
++i;
}
}
xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
xe_guc_ct_print(&guc->ct, &p, true);
guc_engine_print(e, &p);
for_each_hw_engine(hwe, guc_to_gt(guc), id) {
if (hwe->class != e->hwe->class ||
!(BIT(hwe->logical_instance) & adj_logical_mask))
continue;
xe_hw_engine_print(hwe, &p);
}
xe_analyze_vm(&p, e->vm, e->gt->info.id);
xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
dma_fence_end_signalling(cookie);
}
}
#else
static void simple_error_capture(struct xe_engine *e)
{
}
#endif
static void xe_guc_engine_trigger_cleanup(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
if (xe_engine_is_lr(e))
queue_work(guc_to_gt(guc)->ordered_wq, &e->guc->lr_tdr);
else
xe_sched_tdr_queue_imm(&e->guc->sched);
}
static void xe_guc_engine_lr_cleanup(struct work_struct *w)
{
struct xe_guc_engine *ge =
container_of(w, struct xe_guc_engine, lr_tdr);
struct xe_engine *e = ge->engine;
struct xe_gpu_scheduler *sched = &ge->sched;
XE_WARN_ON(!xe_engine_is_lr(e));
trace_xe_engine_lr_cleanup(e);
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
/* Engine state now stable, disable scheduling / deregister if needed */
if (engine_registered(e)) {
struct xe_guc *guc = engine_to_guc(e);
int ret;
set_engine_banned(e);
disable_scheduling_deregister(guc, e);
/*
* Must wait for scheduling to be disabled before signalling
* any fences, if GT broken the GT reset code should signal us.
*/
ret = wait_event_timeout(guc->ct.wq,
!engine_pending_disable(e) ||
guc_read_stopped(guc), HZ * 5);
if (!ret) {
XE_WARN_ON("Schedule disable failed to respond");
xe_sched_submission_start(sched);
xe_gt_reset_async(e->gt);
return;
}
}
xe_sched_submission_start(sched);
}
static enum drm_gpu_sched_stat
guc_engine_timedout_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_sched_job *tmp_job;
struct xe_engine *e = job->engine;
struct xe_gpu_scheduler *sched = &e->guc->sched;
struct xe_device *xe = guc_to_xe(engine_to_guc(e));
int err = -ETIME;
int i = 0;
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
XE_WARN_ON(e->flags & ENGINE_FLAG_KERNEL);
XE_WARN_ON(e->flags & ENGINE_FLAG_VM && !engine_killed(e));
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), e->guc->id, e->flags);
simple_error_capture(e);
xe_devcoredump(e);
} else {
drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), e->guc->id, e->flags);
}
trace_xe_sched_job_timedout(job);
/* Kill the run_job entry point */
xe_sched_submission_stop(sched);
/*
* Kernel jobs should never fail, nor should VM jobs if they do
* somethings has gone wrong and the GT needs a reset
*/
if (e->flags & ENGINE_FLAG_KERNEL ||
(e->flags & ENGINE_FLAG_VM && !engine_killed(e))) {
if (!xe_sched_invalidate_job(job, 2)) {
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
xe_gt_reset_async(e->gt);
goto out;
}
}
/* Engine state now stable, disable scheduling if needed */
if (engine_enabled(e)) {
struct xe_guc *guc = engine_to_guc(e);
int ret;
if (engine_reset(e))
err = -EIO;
set_engine_banned(e);
xe_engine_get(e);
disable_scheduling_deregister(guc, e);
/*
* Must wait for scheduling to be disabled before signalling
* any fences, if GT broken the GT reset code should signal us.
*
* FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
* error) messages which can cause the schedule disable to get
* lost. If this occurs, trigger a GT reset to recover.
*/
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
!engine_pending_disable(e) ||
guc_read_stopped(guc), HZ * 5);
if (!ret) {
XE_WARN_ON("Schedule disable failed to respond");
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
xe_gt_reset_async(e->gt);
xe_sched_tdr_queue_imm(sched);
goto out;
}
}
/* Stop fence signaling */
xe_hw_fence_irq_stop(e->fence_irq);
/*
* Fence state now stable, stop / start scheduler which cleans up any
* fences that are complete
*/
xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
xe_guc_engine_trigger_cleanup(e);
/* Mark all outstanding jobs as bad, thus completing them */
spin_lock(&sched->base.job_list_lock);
list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
spin_unlock(&sched->base.job_list_lock);
/* Start fence signaling */
xe_hw_fence_irq_start(e->fence_irq);
out:
return DRM_GPU_SCHED_STAT_NOMINAL;
}
static void __guc_engine_fini_async(struct work_struct *w)
{
struct xe_guc_engine *ge =
container_of(w, struct xe_guc_engine, fini_async);
struct xe_engine *e = ge->engine;
struct xe_guc *guc = engine_to_guc(e);
trace_xe_engine_destroy(e);
if (xe_engine_is_lr(e))
cancel_work_sync(&ge->lr_tdr);
if (e->flags & ENGINE_FLAG_PERSISTENT)
xe_device_remove_persistent_engines(gt_to_xe(e->gt), e);
release_guc_id(guc, e);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);
if (!(e->flags & ENGINE_FLAG_KERNEL)) {
kfree(ge);
xe_engine_fini(e);
}
}
static void guc_engine_fini_async(struct xe_engine *e)
{
bool kernel = e->flags & ENGINE_FLAG_KERNEL;
INIT_WORK(&e->guc->fini_async, __guc_engine_fini_async);
queue_work(system_wq, &e->guc->fini_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (kernel) {
struct xe_guc_engine *ge = e->guc;
flush_work(&ge->fini_async);
kfree(ge);
xe_engine_fini(e);
}
}
static void __guc_engine_fini(struct xe_guc *guc, struct xe_engine *e)
{
/*
* Might be done from within the GPU scheduler, need to do async as we
* fini the scheduler when the engine is fini'd, the scheduler can't
* complete fini within itself (circular dependency). Async resolves
* this we and don't really care when everything is fini'd, just that it
* is.
*/
guc_engine_fini_async(e);
}
static void __guc_engine_process_msg_cleanup(struct xe_sched_msg *msg)
{
struct xe_engine *e = msg->private_data;
struct xe_guc *guc = engine_to_guc(e);
XE_BUG_ON(e->flags & ENGINE_FLAG_KERNEL);
trace_xe_engine_cleanup_entity(e);
if (engine_registered(e))
disable_scheduling_deregister(guc, e);
else
__guc_engine_fini(guc, e);
}
static bool guc_engine_allowed_to_change_state(struct xe_engine *e)
{
return !engine_killed_or_banned(e) && engine_registered(e);
}
static void __guc_engine_process_msg_set_sched_props(struct xe_sched_msg *msg)
{
struct xe_engine *e = msg->private_data;
struct xe_guc *guc = engine_to_guc(e);
if (guc_engine_allowed_to_change_state(e))
init_policies(guc, e);
kfree(msg);
}
static void suspend_fence_signal(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
XE_BUG_ON(!engine_suspended(e) && !engine_killed(e) &&
!guc_read_stopped(guc));
XE_BUG_ON(!e->guc->suspend_pending);
e->guc->suspend_pending = false;
smp_wmb();
wake_up(&e->guc->suspend_wait);
}
static void __guc_engine_process_msg_suspend(struct xe_sched_msg *msg)
{
struct xe_engine *e = msg->private_data;
struct xe_guc *guc = engine_to_guc(e);
if (guc_engine_allowed_to_change_state(e) && !engine_suspended(e) &&
engine_enabled(e)) {
wait_event(guc->ct.wq, e->guc->resume_time != RESUME_PENDING ||
guc_read_stopped(guc));
if (!guc_read_stopped(guc)) {
MAKE_SCHED_CONTEXT_ACTION(e, DISABLE);
s64 since_resume_ms =
ktime_ms_delta(ktime_get(),
e->guc->resume_time);
s64 wait_ms = e->vm->preempt.min_run_period_ms -
since_resume_ms;
if (wait_ms > 0 && e->guc->resume_time)
msleep(wait_ms);
set_engine_suspended(e);
clear_engine_enabled(e);
set_engine_pending_disable(e);
trace_xe_engine_scheduling_disable(e);
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
}
} else if (e->guc->suspend_pending) {
set_engine_suspended(e);
suspend_fence_signal(e);
}
}
static void __guc_engine_process_msg_resume(struct xe_sched_msg *msg)
{
struct xe_engine *e = msg->private_data;
struct xe_guc *guc = engine_to_guc(e);
if (guc_engine_allowed_to_change_state(e)) {
MAKE_SCHED_CONTEXT_ACTION(e, ENABLE);
e->guc->resume_time = RESUME_PENDING;
clear_engine_suspended(e);
set_engine_pending_enable(e);
set_engine_enabled(e);
trace_xe_engine_scheduling_enable(e);
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
} else {
clear_engine_suspended(e);
}
}
#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
#define SET_SCHED_PROPS 2
#define SUSPEND 3
#define RESUME 4
static void guc_engine_process_msg(struct xe_sched_msg *msg)
{
trace_xe_sched_msg_recv(msg);
switch (msg->opcode) {
case CLEANUP:
__guc_engine_process_msg_cleanup(msg);
break;
case SET_SCHED_PROPS:
__guc_engine_process_msg_set_sched_props(msg);
break;
case SUSPEND:
__guc_engine_process_msg_suspend(msg);
break;
case RESUME:
__guc_engine_process_msg_resume(msg);
break;
default:
XE_BUG_ON("Unknown message type");
}
}
static const struct drm_sched_backend_ops drm_sched_ops = {
.run_job = guc_engine_run_job,
.free_job = guc_engine_free_job,
.timedout_job = guc_engine_timedout_job,
};
static const struct xe_sched_backend_ops xe_sched_ops = {
.process_msg = guc_engine_process_msg,
};
static int guc_engine_init(struct xe_engine *e)
{
struct xe_gpu_scheduler *sched;
struct xe_guc *guc = engine_to_guc(e);
struct xe_guc_engine *ge;
long timeout;
int err;
XE_BUG_ON(!xe_device_guc_submission_enabled(guc_to_xe(guc)));
ge = kzalloc(sizeof(*ge), GFP_KERNEL);
if (!ge)
return -ENOMEM;
e->guc = ge;
ge->engine = e;
init_waitqueue_head(&ge->suspend_wait);
timeout = xe_vm_no_dma_fences(e->vm) ? MAX_SCHEDULE_TIMEOUT : HZ * 5;
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, NULL,
e->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
64, timeout, guc_to_gt(guc)->ordered_wq, NULL,
e->name, gt_to_xe(e->gt)->drm.dev);
if (err)
goto err_free;
sched = &ge->sched;
err = xe_sched_entity_init(&ge->entity, sched);
if (err)
goto err_sched;
e->priority = XE_ENGINE_PRIORITY_NORMAL;
if (xe_engine_is_lr(e))
INIT_WORK(&e->guc->lr_tdr, xe_guc_engine_lr_cleanup);
mutex_lock(&guc->submission_state.lock);
err = alloc_guc_id(guc, e);
if (err)
goto err_entity;
e->entity = &ge->entity;
if (guc_read_stopped(guc))
xe_sched_stop(sched);
mutex_unlock(&guc->submission_state.lock);
switch (e->class) {
case XE_ENGINE_CLASS_RENDER:
sprintf(e->name, "rcs%d", e->guc->id);
break;
case XE_ENGINE_CLASS_VIDEO_DECODE:
sprintf(e->name, "vcs%d", e->guc->id);
break;
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
sprintf(e->name, "vecs%d", e->guc->id);
break;
case XE_ENGINE_CLASS_COPY:
sprintf(e->name, "bcs%d", e->guc->id);
break;
case XE_ENGINE_CLASS_COMPUTE:
sprintf(e->name, "ccs%d", e->guc->id);
break;
default:
XE_WARN_ON(e->class);
}
trace_xe_engine_create(e);
return 0;
err_entity:
xe_sched_entity_fini(&ge->entity);
err_sched:
xe_sched_fini(&ge->sched);
err_free:
kfree(ge);
return err;
}
static void guc_engine_kill(struct xe_engine *e)
{
trace_xe_engine_kill(e);
set_engine_killed(e);
xe_guc_engine_trigger_cleanup(e);
}
static void guc_engine_add_msg(struct xe_engine *e, struct xe_sched_msg *msg,
u32 opcode)
{
INIT_LIST_HEAD(&msg->link);
msg->opcode = opcode;
msg->private_data = e;
trace_xe_sched_msg_add(msg);
xe_sched_add_msg(&e->guc->sched, msg);
}
#define STATIC_MSG_CLEANUP 0
#define STATIC_MSG_SUSPEND 1
#define STATIC_MSG_RESUME 2
static void guc_engine_fini(struct xe_engine *e)
{
struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_CLEANUP;
if (!(e->flags & ENGINE_FLAG_KERNEL))
guc_engine_add_msg(e, msg, CLEANUP);
else
__guc_engine_fini(engine_to_guc(e), e);
}
static int guc_engine_set_priority(struct xe_engine *e,
enum xe_engine_priority priority)
{
struct xe_sched_msg *msg;
if (e->priority == priority || engine_killed_or_banned(e))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (!msg)
return -ENOMEM;
guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
e->priority = priority;
return 0;
}
static int guc_engine_set_timeslice(struct xe_engine *e, u32 timeslice_us)
{
struct xe_sched_msg *msg;
if (e->sched_props.timeslice_us == timeslice_us ||
engine_killed_or_banned(e))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (!msg)
return -ENOMEM;
e->sched_props.timeslice_us = timeslice_us;
guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
return 0;
}
static int guc_engine_set_preempt_timeout(struct xe_engine *e,
u32 preempt_timeout_us)
{
struct xe_sched_msg *msg;
if (e->sched_props.preempt_timeout_us == preempt_timeout_us ||
engine_killed_or_banned(e))
return 0;
msg = kmalloc(sizeof(*msg), GFP_KERNEL);
if (!msg)
return -ENOMEM;
e->sched_props.preempt_timeout_us = preempt_timeout_us;
guc_engine_add_msg(e, msg, SET_SCHED_PROPS);
return 0;
}
static int guc_engine_set_job_timeout(struct xe_engine *e, u32 job_timeout_ms)
{
struct xe_gpu_scheduler *sched = &e->guc->sched;
XE_BUG_ON(engine_registered(e));
XE_BUG_ON(engine_banned(e));
XE_BUG_ON(engine_killed(e));
sched->base.timeout = job_timeout_ms;
return 0;
}
static int guc_engine_suspend(struct xe_engine *e)
{
struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_SUSPEND;
if (engine_killed_or_banned(e) || e->guc->suspend_pending)
return -EINVAL;
e->guc->suspend_pending = true;
guc_engine_add_msg(e, msg, SUSPEND);
return 0;
}
static void guc_engine_suspend_wait(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
wait_event(e->guc->suspend_wait, !e->guc->suspend_pending ||
guc_read_stopped(guc));
}
static void guc_engine_resume(struct xe_engine *e)
{
struct xe_sched_msg *msg = e->guc->static_msgs + STATIC_MSG_RESUME;
XE_BUG_ON(e->guc->suspend_pending);
guc_engine_add_msg(e, msg, RESUME);
}
/*
* All of these functions are an abstraction layer which other parts of XE can
* use to trap into the GuC backend. All of these functions, aside from init,
* really shouldn't do much other than trap into the DRM scheduler which
* synchronizes these operations.
*/
static const struct xe_engine_ops guc_engine_ops = {
.init = guc_engine_init,
.kill = guc_engine_kill,
.fini = guc_engine_fini,
.set_priority = guc_engine_set_priority,
.set_timeslice = guc_engine_set_timeslice,
.set_preempt_timeout = guc_engine_set_preempt_timeout,
.set_job_timeout = guc_engine_set_job_timeout,
.suspend = guc_engine_suspend,
.suspend_wait = guc_engine_suspend_wait,
.resume = guc_engine_resume,
};
static void guc_engine_stop(struct xe_guc *guc, struct xe_engine *e)
{
struct xe_gpu_scheduler *sched = &e->guc->sched;
/* Stop scheduling + flush any DRM scheduler operations */
xe_sched_submission_stop(sched);
/* Clean up lost G2H + reset engine state */
if (engine_registered(e)) {
if ((engine_banned(e) && engine_destroyed(e)) ||
xe_engine_is_lr(e))
xe_engine_put(e);
else if (engine_destroyed(e))
__guc_engine_fini(guc, e);
}
if (e->guc->suspend_pending) {
set_engine_suspended(e);
suspend_fence_signal(e);
}
atomic_and(ENGINE_STATE_DESTROYED | ENGINE_STATE_SUSPENDED,
&e->guc->state);
e->guc->resume_time = 0;
trace_xe_engine_stop(e);
/*
* Ban any engine (aside from kernel and engines used for VM ops) with a
* started but not complete job or if a job has gone through a GT reset
* more than twice.
*/
if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
struct xe_sched_job *job = xe_sched_first_pending_job(sched);
if (job) {
if ((xe_sched_job_started(job) &&
!xe_sched_job_completed(job)) ||
xe_sched_invalidate_job(job, 2)) {
trace_xe_sched_job_ban(job);
xe_sched_tdr_queue_imm(&e->guc->sched);
set_engine_banned(e);
}
}
}
}
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
* failure). submission_state.lock needs the CT lock to resubmit jobs.
* Atomic is not ideal, but it works to prevent against concurrent reset
* and releasing any TDRs waiting on guc->submission_state.stopped.
*/
ret = atomic_fetch_or(1, &guc->submission_state.stopped);
smp_wmb();
wake_up_all(&guc->ct.wq);
return ret;
}
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
wait_event(guc->ct.wq, !guc_read_stopped(guc));
}
int xe_guc_submit_stop(struct xe_guc *guc)
{
struct xe_engine *e;
unsigned long index;
XE_BUG_ON(guc_read_stopped(guc) != 1);
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.engine_lookup, index, e)
guc_engine_stop(guc, e);
mutex_unlock(&guc->submission_state.lock);
/*
* No one can enter the backend at this point, aside from new engine
* creation which is protected by guc->submission_state.lock.
*/
return 0;
}
static void guc_engine_start(struct xe_engine *e)
{
struct xe_gpu_scheduler *sched = &e->guc->sched;
if (!engine_killed_or_banned(e)) {
int i;
trace_xe_engine_resubmit(e);
for (i = 0; i < e->width; ++i)
xe_lrc_set_ring_head(e->lrc + i, e->lrc[i].ring.tail);
xe_sched_resubmit_jobs(sched);
}
xe_sched_submission_start(sched);
}
int xe_guc_submit_start(struct xe_guc *guc)
{
struct xe_engine *e;
unsigned long index;
XE_BUG_ON(guc_read_stopped(guc) != 1);
mutex_lock(&guc->submission_state.lock);
atomic_dec(&guc->submission_state.stopped);
xa_for_each(&guc->submission_state.engine_lookup, index, e)
guc_engine_start(e);
mutex_unlock(&guc->submission_state.lock);
wake_up_all(&guc->ct.wq);
return 0;
}
static struct xe_engine *
g2h_engine_lookup(struct xe_guc *guc, u32 guc_id)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_engine *e;
if (unlikely(guc_id >= GUC_ID_MAX)) {
drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
return NULL;
}
e = xa_load(&guc->submission_state.engine_lookup, guc_id);
if (unlikely(!e)) {
drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
return NULL;
}
XE_BUG_ON(e->guc->id != guc_id);
return e;
}
static void deregister_engine(struct xe_guc *guc, struct xe_engine *e)
{
u32 action[] = {
XE_GUC_ACTION_DEREGISTER_CONTEXT,
e->guc->id,
};
trace_xe_engine_deregister(e);
xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
}
int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_engine *e;
u32 guc_id = msg[0];
if (unlikely(len < 2)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
e = g2h_engine_lookup(guc, guc_id);
if (unlikely(!e))
return -EPROTO;
if (unlikely(!engine_pending_enable(e) &&
!engine_pending_disable(e))) {
drm_err(&xe->drm, "Unexpected engine state 0x%04x",
atomic_read(&e->guc->state));
return -EPROTO;
}
trace_xe_engine_scheduling_done(e);
if (engine_pending_enable(e)) {
e->guc->resume_time = ktime_get();
clear_engine_pending_enable(e);
smp_wmb();
wake_up_all(&guc->ct.wq);
} else {
clear_engine_pending_disable(e);
if (e->guc->suspend_pending) {
suspend_fence_signal(e);
} else {
if (engine_banned(e)) {
smp_wmb();
wake_up_all(&guc->ct.wq);
}
deregister_engine(guc, e);
}
}
return 0;
}
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_engine *e;
u32 guc_id = msg[0];
if (unlikely(len < 1)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
e = g2h_engine_lookup(guc, guc_id);
if (unlikely(!e))
return -EPROTO;
if (!engine_destroyed(e) || engine_pending_disable(e) ||
engine_pending_enable(e) || engine_enabled(e)) {
drm_err(&xe->drm, "Unexpected engine state 0x%04x",
atomic_read(&e->guc->state));
return -EPROTO;
}
trace_xe_engine_deregister_done(e);
clear_engine_registered(e);
if (engine_banned(e) || xe_engine_is_lr(e))
xe_engine_put(e);
else
__guc_engine_fini(guc, e);
return 0;
}
int xe_guc_engine_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_engine *e;
u32 guc_id = msg[0];
if (unlikely(len < 1)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
e = g2h_engine_lookup(guc, guc_id);
if (unlikely(!e))
return -EPROTO;
drm_info(&xe->drm, "Engine reset: guc_id=%d", guc_id);
/* FIXME: Do error capture, most likely async */
trace_xe_engine_reset(e);
/*
* A banned engine is a NOP at this point (came from
* guc_engine_timedout_job). Otherwise, kick drm scheduler to cancel
* jobs by setting timeout of the job to the minimum value kicking
* guc_engine_timedout_job.
*/
set_engine_reset(e);
if (!engine_banned(e))
xe_guc_engine_trigger_cleanup(e);
return 0;
}
int xe_guc_engine_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_engine *e;
u32 guc_id = msg[0];
if (unlikely(len < 1)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
e = g2h_engine_lookup(guc, guc_id);
if (unlikely(!e))
return -EPROTO;
drm_warn(&xe->drm, "Engine memory cat error: guc_id=%d", guc_id);
trace_xe_engine_memory_cat_error(e);
/* Treat the same as engine reset */
set_engine_reset(e);
if (!engine_banned(e))
xe_guc_engine_trigger_cleanup(e);
return 0;
}
int xe_guc_engine_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
struct xe_device *xe = guc_to_xe(guc);
u8 guc_class, instance;
u32 reason;
if (unlikely(len != 3)) {
drm_err(&xe->drm, "Invalid length %u", len);
return -EPROTO;
}
guc_class = msg[0];
instance = msg[1];
reason = msg[2];
/* Unexpected failure of a hardware feature, log an actual error */
drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
guc_class, instance, reason);
xe_gt_reset_async(guc_to_gt(guc));
return 0;
}
static void
guc_engine_wq_snapshot_capture(struct xe_engine *e,
struct xe_guc_submit_engine_snapshot *snapshot)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct iosys_map map = xe_lrc_parallel_map(e->lrc);
int i;
snapshot->guc.wqi_head = e->guc->wqi_head;
snapshot->guc.wqi_tail = e->guc->wqi_tail;
snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
snapshot->parallel.wq_desc.status = parallel_read(xe, map,
wq_desc.wq_status);
if (snapshot->parallel.wq_desc.head !=
snapshot->parallel.wq_desc.tail) {
for (i = snapshot->parallel.wq_desc.head;
i != snapshot->parallel.wq_desc.tail;
i = (i + sizeof(u32)) % WQ_SIZE)
snapshot->parallel.wq[i / sizeof(u32)] =
parallel_read(xe, map, wq[i / sizeof(u32)]);
}
}
static void
guc_engine_wq_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
struct drm_printer *p)
{
int i;
drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
if (snapshot->parallel.wq_desc.head !=
snapshot->parallel.wq_desc.tail) {
for (i = snapshot->parallel.wq_desc.head;
i != snapshot->parallel.wq_desc.tail;
i = (i + sizeof(u32)) % WQ_SIZE)
drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
snapshot->parallel.wq[i / sizeof(u32)]);
}
}
/**
* xe_guc_engine_snapshot_capture - Take a quick snapshot of the GuC Engine.
* @e: Xe Engine.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
*
* Returns: a GuC Submit Engine snapshot object that must be freed by the
* caller, using `xe_guc_engine_snapshot_free`.
*/
struct xe_guc_submit_engine_snapshot *
xe_guc_engine_snapshot_capture(struct xe_engine *e)
{
struct xe_guc *guc = engine_to_guc(e);
struct xe_device *xe = guc_to_xe(guc);
struct xe_gpu_scheduler *sched = &e->guc->sched;
struct xe_sched_job *job;
struct xe_guc_submit_engine_snapshot *snapshot;
int i;
snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
if (!snapshot) {
drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
return NULL;
}
snapshot->guc.id = e->guc->id;
memcpy(&snapshot->name, &e->name, sizeof(snapshot->name));
snapshot->class = e->class;
snapshot->logical_mask = e->logical_mask;
snapshot->width = e->width;
snapshot->refcount = kref_read(&e->refcount);
snapshot->sched_timeout = sched->base.timeout;
snapshot->sched_props.timeslice_us = e->sched_props.timeslice_us;
snapshot->sched_props.preempt_timeout_us =
e->sched_props.preempt_timeout_us;
snapshot->lrc = kmalloc_array(e->width, sizeof(struct lrc_snapshot),
GFP_ATOMIC);
if (!snapshot->lrc) {
drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
} else {
for (i = 0; i < e->width; ++i) {
struct xe_lrc *lrc = e->lrc + i;
snapshot->lrc[i].context_desc =
lower_32_bits(xe_lrc_ggtt_addr(lrc));
snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
snapshot->lrc[i].tail.internal = lrc->ring.tail;
snapshot->lrc[i].tail.memory =
xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
}
}
snapshot->schedule_state = atomic_read(&e->guc->state);
snapshot->engine_flags = e->flags;
snapshot->parallel_execution = xe_engine_is_parallel(e);
if (snapshot->parallel_execution)
guc_engine_wq_snapshot_capture(e, snapshot);
spin_lock(&sched->base.job_list_lock);
snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
sizeof(struct pending_list_snapshot),
GFP_ATOMIC);
if (!snapshot->pending_list) {
drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
} else {
i = 0;
list_for_each_entry(job, &sched->base.pending_list, drm.list) {
snapshot->pending_list[i].seqno =
xe_sched_job_seqno(job);
snapshot->pending_list[i].fence =
dma_fence_is_signaled(job->fence) ? 1 : 0;
snapshot->pending_list[i].finished =
dma_fence_is_signaled(&job->drm.s_fence->finished)
? 1 : 0;
i++;
}
}
spin_unlock(&sched->base.job_list_lock);
return snapshot;
}
/**
* xe_guc_engine_snapshot_print - Print out a given GuC Engine snapshot.
* @snapshot: GuC Submit Engine snapshot object.
* @p: drm_printer where it will be printed out.
*
* This function prints out a given GuC Submit Engine snapshot object.
*/
void
xe_guc_engine_snapshot_print(struct xe_guc_submit_engine_snapshot *snapshot,
struct drm_printer *p)
{
int i;
if (!snapshot)
return;
drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
drm_printf(p, "\tName: %s\n", snapshot->name);
drm_printf(p, "\tClass: %d\n", snapshot->class);
drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
drm_printf(p, "\tWidth: %d\n", snapshot->width);
drm_printf(p, "\tRef: %d\n", snapshot->refcount);
drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
drm_printf(p, "\tTimeslice: %u (us)\n",
snapshot->sched_props.timeslice_us);
drm_printf(p, "\tPreempt timeout: %u (us)\n",
snapshot->sched_props.preempt_timeout_us);
for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
drm_printf(p, "\tHW Context Desc: 0x%08x\n",
snapshot->lrc[i].context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n",
snapshot->lrc[i].head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->lrc[i].tail.internal,
snapshot->lrc[i].tail.memory);
drm_printf(p, "\tStart seqno: (memory) %d\n",
snapshot->lrc[i].start_seqno);
drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
}
drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
drm_printf(p, "\tFlags: 0x%lx\n", snapshot->engine_flags);
if (snapshot->parallel_execution)
guc_engine_wq_snapshot_print(snapshot, p);
for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
i++)
drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
snapshot->pending_list[i].seqno,
snapshot->pending_list[i].fence,
snapshot->pending_list[i].finished);
}
/**
* xe_guc_engine_snapshot_free - Free all allocated objects for a given
* snapshot.
* @snapshot: GuC Submit Engine snapshot object.
*
* This function free all the memory that needed to be allocated at capture
* time.
*/
void xe_guc_engine_snapshot_free(struct xe_guc_submit_engine_snapshot *snapshot)
{
if (!snapshot)
return;
kfree(snapshot->lrc);
kfree(snapshot->pending_list);
kfree(snapshot);
}
static void guc_engine_print(struct xe_engine *e, struct drm_printer *p)
{
struct xe_guc_submit_engine_snapshot *snapshot;
snapshot = xe_guc_engine_snapshot_capture(e);
xe_guc_engine_snapshot_print(snapshot, p);
xe_guc_engine_snapshot_free(snapshot);
}
/**
* xe_guc_submit_print - GuC Submit Print.
* @guc: GuC.
* @p: drm_printer where it will be printed out.
*
* This function capture and prints snapshots of **all** GuC Engines.
*/
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
{
struct xe_engine *e;
unsigned long index;
if (!xe_device_guc_submission_enabled(guc_to_xe(guc)))
return;
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.engine_lookup, index, e)
guc_engine_print(e, p);
mutex_unlock(&guc->submission_state.lock);
}