Commit dcdd6b84 authored by Daniele Ceraolo Spurio's avatar Daniele Ceraolo Spurio
Browse files

drm/xe/pxp: Allocate PXP execution resources



PXP requires submissions to the HW for the following operations

1) Key invalidation, done via the VCS engine
2) Communication with the GSC FW for session management, done via the
   GSCCS.

Key invalidation submissions are serialized (only 1 termination can be
serviced at a given time) and done via GGTT, so we can allocate a simple
BO and a kernel queue for it.

Submissions for session management are tied to a PXP client (identified
by a unique host_session_id); from the GSC POV this is a user-accessible
construct, so all related submission must be done via PPGTT. The driver
does not currently support PPGTT submission from within the kernel, so
to add this support, the following changes have been included:

- a new type of kernel-owned VM (marked as GSC), required to ensure we
  don't use fault mode on the engine and to mark the different lock
  usage with lockdep.
- a new function to map a BO into a VM from within the kernel.

v2: improve comments and function name, remove unneeded include (John)
v3: fix variable/function names in documentation

Signed-off-by: default avatarDaniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250129174140.948829-3-daniele.ceraolospurio@intel.com
parent ff48e05d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -88,6 +88,7 @@ xe-y += xe_bb.o \
	xe_pt.o \
	xe_pt_walk.o \
	xe_pxp.o \
	xe_pxp_submit.o \
	xe_query.o \
	xe_range_fence.o \
	xe_reg_sr.o \
+7 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
#define _ABI_GSC_PXP_COMMANDS_ABI_H

#include <linux/sizes.h>
#include <linux/types.h>

/* Heci client ID for PXP commands */
@@ -13,6 +14,12 @@

#define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))

/*
 * A PXP sub-section in an HECI packet can be up to 64K big in each direction.
 * This does not include the top-level GSC header.
 */
#define PXP_MAX_PACKET_SIZE SZ_64K

/*
 * there are a lot of status codes for PXP, but we only define the cross-API
 * common ones that we actually can handle in the kernel driver. Other failure
+3 −0
Original line number Diff line number Diff line
@@ -153,6 +153,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
	struct xe_exec_queue *q;
	int err;

	/* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
	xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));

	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
				  extensions);
	if (IS_ERR(q))
+22 −1
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "xe_gt.h"
#include "xe_gt_types.h"
#include "xe_mmio.h"
#include "xe_pxp_submit.h"
#include "xe_pxp_types.h"
#include "xe_uc_fw.h"
#include "regs/xe_pxp_regs.h"
@@ -50,6 +51,20 @@ static int kcr_pxp_enable(const struct xe_pxp *pxp)
	return kcr_pxp_set_status(pxp, true);
}

static int kcr_pxp_disable(const struct xe_pxp *pxp)
{
	return kcr_pxp_set_status(pxp, false);
}

static void pxp_fini(void *arg)
{
	struct xe_pxp *pxp = arg;

	xe_pxp_destroy_execution_resources(pxp);

	/* no need to explicitly disable KCR since we're going to do an FLR */
}

/**
 * xe_pxp_init - initialize PXP support
 * @xe: the xe_device structure
@@ -97,10 +112,16 @@ int xe_pxp_init(struct xe_device *xe)
	if (err)
		goto out_free;

	err = xe_pxp_allocate_execution_resources(pxp);
	if (err)
		goto out_kcr_disable;

	xe->pxp = pxp;

	return 0;
	return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp);

out_kcr_disable:
	kcr_pxp_disable(pxp);
out_free:
	drmm_kfree(&xe->drm, pxp);
	return err;
+199 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright(c) 2024 Intel Corporation.
 */

#include "xe_pxp_submit.h"

#include <uapi/drm/xe_drm.h>

#include "xe_device_types.h"
#include "xe_bo.h"
#include "xe_exec_queue.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
#include "xe_pxp_types.h"
#include "xe_vm.h"

/*
 * The VCS is used for kernel-owned GGTT submissions to issue key termination.
 * Terminations are serialized, so we only need a single queue and a single
 * batch.
 */
static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
{
	struct xe_gt *gt = pxp->gt;
	struct xe_device *xe = pxp->xe;
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_hw_engine *hwe;
	struct xe_exec_queue *q;
	struct xe_bo *bo;
	int err;

	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
	if (!hwe)
		return -ENODEV;

	q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe,
				 EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
	if (IS_ERR(q))
		return PTR_ERR(q);

	/*
	 * Each termination is 16 DWORDS, so 4K is enough to contain a
	 * termination for each sessions.
	 */
	bo = xe_bo_create_pin_map(xe, tile, 0, SZ_4K, ttm_bo_type_kernel,
				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
	if (IS_ERR(bo)) {
		err = PTR_ERR(bo);
		goto out_queue;
	}

	pxp->vcs_exec.q = q;
	pxp->vcs_exec.bo = bo;

	return 0;

out_queue:
	xe_exec_queue_put(q);
	return err;
}

static void destroy_vcs_execution_resources(struct xe_pxp *pxp)
{
	if (pxp->vcs_exec.bo)
		xe_bo_unpin_map_no_vm(pxp->vcs_exec.bo);

	if (pxp->vcs_exec.q)
		xe_exec_queue_put(pxp->vcs_exec.q);
}

#define PXP_BB_SIZE		XE_PAGE_SIZE
static int allocate_gsc_client_resources(struct xe_gt *gt,
					 struct xe_pxp_gsc_client_resources *gsc_res,
					 size_t inout_size)
{
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_device *xe = tile_to_xe(tile);
	struct xe_hw_engine *hwe;
	struct xe_vm *vm;
	struct xe_bo *bo;
	struct xe_exec_queue *q;
	struct dma_fence *fence;
	long timeout;
	int err = 0;

	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, 0, true);

	/* we shouldn't reach here if the GSC engine is not available */
	xe_assert(xe, hwe);

	/* PXP instructions must be issued from PPGTT */
	vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
	if (IS_ERR(vm))
		return PTR_ERR(vm);

	/* We allocate a single object for the batch and the in/out memory */
	xe_vm_lock(vm, false);
	bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
				  ttm_bo_type_kernel,
				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
	xe_vm_unlock(vm);
	if (IS_ERR(bo)) {
		err = PTR_ERR(bo);
		goto vm_out;
	}

	fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
	if (IS_ERR(fence)) {
		err = PTR_ERR(fence);
		goto bo_out;
	}

	timeout = dma_fence_wait_timeout(fence, false, HZ);
	dma_fence_put(fence);
	if (timeout <= 0) {
		err = timeout ?: -ETIME;
		goto bo_out;
	}

	q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
				 EXEC_QUEUE_FLAG_KERNEL |
				 EXEC_QUEUE_FLAG_PERMANENT, 0);
	if (IS_ERR(q)) {
		err = PTR_ERR(q);
		goto bo_out;
	}

	gsc_res->vm = vm;
	gsc_res->bo = bo;
	gsc_res->inout_size = inout_size;
	gsc_res->batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
	gsc_res->msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE);
	gsc_res->msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_SIZE + inout_size);
	gsc_res->q = q;

	/* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
	gsc_res->host_session_handle = xe_gsc_create_host_session_id();

	return 0;

bo_out:
	xe_bo_unpin_map_no_vm(bo);
vm_out:
	xe_vm_close_and_put(vm);

	return err;
}

static void destroy_gsc_client_resources(struct xe_pxp_gsc_client_resources *gsc_res)
{
	if (!gsc_res->q)
		return;

	xe_exec_queue_put(gsc_res->q);
	xe_bo_unpin_map_no_vm(gsc_res->bo);
	xe_vm_close_and_put(gsc_res->vm);
}

/**
 * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
 * @pxp: the xe_pxp structure
 *
 * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
 * submissions are done via PPGTT, so this function allocates a VM for it and
 * maps the object into it.
 *
 * Returns 0 if the allocation and mapping is successful, an errno value
 * otherwise.
 */
int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
{
	int err;

	err = allocate_vcs_execution_resources(pxp);
	if (err)
		return err;

	/*
	 * PXP commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE),
	 * but we currently only support a subset of commands that are small
	 * (< 20 dwords), so a single page is enough for now.
	 */
	err = allocate_gsc_client_resources(pxp->gt, &pxp->gsc_res, XE_PAGE_SIZE);
	if (err)
		goto destroy_vcs_context;

	return 0;

destroy_vcs_context:
	destroy_vcs_execution_resources(pxp);
	return err;
}

void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
{
	destroy_gsc_client_resources(&pxp->gsc_res);
	destroy_vcs_execution_resources(pxp);
}
Loading