Files
linux-cryptodev-2.6/drivers/gpu/drm/xe/xe_tile_sriov_vf.c
Matthew Brost 7dd11d8804 drm/xe/vf: Close multi-GT GGTT shift race
As multi-GT VF post-migration recovery can run in parallel on different
workqueues, but both GTs point to the same GGTT, only one GT needs to
shift the GGTT. However, both GTs need to know when this step has
completed. To coordinate this, perform the GGTT shift under the GGTT
lock. With shift being done under the lock, storing the shift value
becomes unnecessary.

In addition to above, move the GGTT VF config from the GT to the tile.

v3:
 - Update commmit message (Tomasz)
v4:
 - Move GGTT values to tile state (Michal)
 - Use GGTT lock (Michal)
v5:
 - Only take GGTT lock during recovery (CI)
 - Drop goto in vf_get_submission_cfg (Michal)
 - Add kernel doc around recovery in xe_gt_sriov_vf_query_config (Michal)
v7:
 - Drop recovery variable (Michal)
 - Use _locked naming (Michal)
 - Use guard (Michal)
v9:
 - Break LMEM changes into different patch (Michal)
 - Fix layering (Michal)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20251008214532.3442967-15-matthew.brost@intel.com
2025-10-09 03:22:34 -07:00

351 lines
10 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2025 Intel Corporation
*/
#include <drm/drm_managed.h>
#include "regs/xe_gtt_defs.h"
#include "xe_assert.h"
#include "xe_ggtt.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_tile_sriov_vf.h"
#include "xe_wopcm.h"
static int vf_init_ggtt_balloons(struct xe_tile *tile)
{
struct xe_ggtt *ggtt = tile->mem.ggtt;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
}
return 0;
}
/**
* xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
* @tile: the &xe_tile struct instance
*
* Return: 0 on success or a negative error code on failure.
*/
static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
{
u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base;
u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size;
struct xe_device *xe = tile_to_xe(tile);
u64 wopcm = xe_wopcm_size(xe);
u64 start, end;
int err;
xe_tile_assert(tile, IS_SRIOV_VF(xe));
xe_tile_assert(tile, ggtt_size);
lockdep_assert_held(&tile->mem.ggtt->lock);
/*
* VF can only use part of the GGTT as allocated by the PF:
*
* WOPCM GUC_GGTT_TOP
* |<------------ Total GGTT size ------------------>|
*
* VF GGTT base -->|<- size ->|
*
* +--------------------+----------+-----------------+
* |////////////////////| block |\\\\\\\\\\\\\\\\\|
* +--------------------+----------+-----------------+
*
* |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
*/
if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
ggtt_size > GUC_GGTT_TOP - ggtt_base) {
xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
return -ERANGE;
}
start = wopcm;
end = ggtt_base;
if (end != start) {
err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
start, end);
if (err)
return err;
}
start = ggtt_base + ggtt_size;
end = GUC_GGTT_TOP;
if (end != start) {
err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
start, end);
if (err) {
xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
return err;
}
}
return 0;
}
static int vf_balloon_ggtt(struct xe_tile *tile)
{
struct xe_ggtt *ggtt = tile->mem.ggtt;
int err;
mutex_lock(&ggtt->lock);
err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
mutex_unlock(&ggtt->lock);
return err;
}
/**
* xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
* @tile: the &xe_tile struct instance
*/
void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
{
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
}
static void vf_deballoon_ggtt(struct xe_tile *tile)
{
mutex_lock(&tile->mem.ggtt->lock);
xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
mutex_unlock(&tile->mem.ggtt->lock);
}
static void vf_fini_ggtt_balloons(struct xe_tile *tile)
{
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
}
static void cleanup_ggtt(struct drm_device *drm, void *arg)
{
struct xe_tile *tile = arg;
vf_deballoon_ggtt(tile);
vf_fini_ggtt_balloons(tile);
}
/**
* xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
* @tile: the &xe_tile
*
* This function is for VF use only.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
int err;
err = vf_init_ggtt_balloons(tile);
if (err)
return err;
err = vf_balloon_ggtt(tile);
if (err) {
vf_fini_ggtt_balloons(tile);
return err;
}
return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
}
/**
* DOC: GGTT nodes shifting during VF post-migration recovery
*
* The first fixup applied to the VF KMD structures as part of post-migration
* recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
* from range previously assigned to this VF, into newly provisioned area.
* The changes include balloons, which are resized accordingly.
*
* The balloon nodes are there to eliminate unavailable ranges from use: one
* reserves the GGTT area below the range for current VF, and another one
* reserves area above.
*
* Below is a GGTT layout of example VF, with a certain address range assigned to
* said VF, and inaccessible areas above and below:
*
* 0 4GiB
* |<--------------------------- Total GGTT size ----------------------------->|
* WOPCM GUC_TOP
* |<-------------- Area mappable by xe_ggtt instance ---------------->|
*
* +---+---------------------------------+----------+----------------------+---+
* |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\|
* +---+---------------------------------+----------+----------------------+---+
*
* Hardware enforced access rules before migration:
*
* |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
*
* GGTT nodes used for tracking allocations:
*
* |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
*
* After the migration, GGTT area assigned to the VF might have shifted, either
* to lower or to higher address. But we expect the total size and extra areas to
* be identical, as migration can only happen between matching platforms.
* Below is an example of GGTT layout of the VF after migration. Content of the
* GGTT for VF has been moved to a new area, and we receive its address from GuC:
*
* +---+----------------------+----------+---------------------------------+---+
* |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\|
* +---+----------------------+----------+---------------------------------+---+
*
* Hardware enforced access rules after migration:
*
* |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
*
* So the VF has a new slice of GGTT assigned, and during migration process, the
* memory content was copied to that new area. But the &xe_ggtt nodes are still
* tracking allocations using the old addresses. The nodes within VF owned area
* have to be shifted, and balloon nodes need to be resized to properly mask out
* areas not owned by the VF.
*
* Fixed &xe_ggtt nodes used for tracking allocations:
*
* |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
*
* Due to use of GPU profiles, we do not expect the old and new GGTT ares to
* overlap; but our node shifting will fix addresses properly regardless.
*/
/**
* xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range.
* @tile: the &xe_tile struct instance
* @shift: the shift value
*
* Since Global GTT is not virtualized, each VF has an assigned range
* within the global space. This range might have changed during migration,
* which requires all memory addresses pointing to GGTT to be shifted.
*/
void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift)
{
struct xe_ggtt *ggtt = tile->mem.ggtt;
lockdep_assert_held(&ggtt->lock);
xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
xe_ggtt_shift_nodes_locked(ggtt, shift);
xe_tile_sriov_vf_balloon_ggtt_locked(tile);
}
/**
* xe_tile_sriov_vf_lmem - VF LMEM configuration.
* @tile: the &xe_tile
*
* This function is for VF use only.
*
* Return: size of the LMEM assigned to VF.
*/
u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
return config->lmem_size;
}
/**
* xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration
* @tile: the &xe_tile
* @lmem_size: VF LMEM size to store
*
* This function is for VF use only.
*/
void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
config->lmem_size = lmem_size;
}
/**
* xe_tile_sriov_vf_ggtt - VF GGTT configuration.
* @tile: the &xe_tile
*
* This function is for VF use only.
*
* Return: size of the GGTT assigned to VF.
*/
u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
return config->ggtt_size;
}
/**
* xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration
* @tile: the &xe_tile
* @ggtt_size: VF GGTT size to store
*
* This function is for VF use only.
*/
void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
config->ggtt_size = ggtt_size;
}
/**
* xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration.
* @tile: the &xe_tile
*
* This function is for VF use only.
*
* Return: base of the GGTT assigned to VF.
*/
u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
return config->ggtt_base;
}
/**
* xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration
* @tile: the &xe_tile
* @ggtt_base: VF GGTT base to store
*
* This function is for VF use only.
*/
void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base)
{
struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
config->ggtt_base = ggtt_base;
}