Commit 84c0b4a0 authored by Vinay Belgaumkar's avatar Vinay Belgaumkar Committed by Lucas De Marchi
Browse files

drm/xe/bmg: Update Wa_22019338487



Limit GT max frequency to 2600MHz and wait for frequency to reduce
before proceeding with a transient flush. This is really only needed for
the transient flush: if L2 flush is needed due to 16023588340 then
there's no need to do this additional wait since we are already using
the bigger hammer.

v2: Use generic names, ensure user set max frequency requests wait
for flush to complete (Rodrigo)
v3:
 - User requests wait via wait_var_event_timeout (Lucas)
 - Close races on flush + user requests (Lucas)
 - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt
   rather than root gt (Lucas)
v4:
 - Only apply the freq reducing part if a TDF is needed: L2 flush trumps
   the need for waiting a lower frequency

Fixes: aaa08078 ("drm/xe/bmg: Apply Wa_22019338487")
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarVinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-4-b888388477f2@intel.com


Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit deea6a7d)
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
parent a5c7dcdd
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
@@ -1071,11 +1072,14 @@ void xe_device_td_flush(struct xe_device *xe)
		return;

	root_gt = xe_root_mmio_gt(xe);
	if (XE_WA(root_gt, 16023588340))
	if (XE_WA(root_gt, 16023588340)) {
		/* A transient flush is not sufficient: flush the L2 */
		xe_device_l2_flush(xe);
	else
	} else {
		xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
		tdf_request_sync(xe);
		xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
	}
}

u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+125 −0
Original line number Diff line number Diff line
@@ -7,7 +7,9 @@

#include <linux/cleanup.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/wait_bit.h>

#include <drm/drm_managed.h>
#include <drm/drm_print.h>
@@ -53,9 +55,11 @@
#define LNL_MERT_FREQ_CAP	800
#define BMG_MERT_FREQ_CAP	2133
#define BMG_MIN_FREQ		1200
#define BMG_MERT_FLUSH_FREQ_CAP	2600

#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
#define SLPC_ACT_FREQ_TIMEOUT_MS 100

/**
 * DOC: GuC Power Conservation (PC)
@@ -143,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
	return -ETIMEDOUT;
}

static int wait_for_flush_complete(struct xe_guc_pc *pc)
{
	const unsigned long timeout = msecs_to_jiffies(30);

	if (!wait_var_event_timeout(&pc->flush_freq_limit,
				    !atomic_read(&pc->flush_freq_limit),
				    timeout))
		return -ETIMEDOUT;

	return 0;
}

static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
{
	int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
	int slept, wait = 10;

	for (slept = 0; slept < timeout_us;) {
		if (xe_guc_pc_get_act_freq(pc) <= freq)
			return 0;

		usleep_range(wait, wait << 1);
		slept += wait;
		wait <<= 1;
		if (slept + wait > timeout_us)
			wait = timeout_us - slept;
	}

	return -ETIMEDOUT;
}
static int pc_action_reset(struct xe_guc_pc *pc)
{
	struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -688,6 +722,11 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
 */
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
	if (XE_WA(pc_to_gt(pc), 22019338487)) {
		if (wait_for_flush_complete(pc) != 0)
			return -EAGAIN;
	}

	guard(mutex)(&pc->freq_lock);

	return xe_guc_pc_set_max_freq_locked(pc, freq);
@@ -888,6 +927,92 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
	return ret;
}

static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
	struct xe_gt *gt = pc_to_gt(pc);

	return  XE_WA(gt, 22019338487) &&
		pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
}

/**
 * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
 * @pc: the xe_guc_pc object
 *
 * As per the WA, reduce max GT frequency during L2 cache flush
 */
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
{
	struct xe_gt *gt = pc_to_gt(pc);
	u32 max_freq;
	int ret;

	if (!needs_flush_freq_limit(pc))
		return;

	guard(mutex)(&pc->freq_lock);

	ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
	if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
		ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
		if (ret) {
			xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
				       BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
			return;
		}

		atomic_set(&pc->flush_freq_limit, 1);

		/*
		 * If user has previously changed max freq, stash that value to
		 * restore later, otherwise use the current max. New user
		 * requests wait on flush.
		 */
		if (pc->user_requested_max != 0)
			pc->stashed_max_freq = pc->user_requested_max;
		else
			pc->stashed_max_freq = max_freq;
	}

	/*
	 * Wait for actual freq to go below the flush cap: even if the previous
	 * max was below cap, the current one might still be above it
	 */
	ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
	if (ret)
		xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
			       BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
}

/**
 * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
 * @pc: the xe_guc_pc object
 *
 * Retrieve the previous GT max frequency value.
 */
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
{
	struct xe_gt *gt = pc_to_gt(pc);
	int ret = 0;

	if (!needs_flush_freq_limit(pc))
		return;

	if (!atomic_read(&pc->flush_freq_limit))
		return;

	mutex_lock(&pc->freq_lock);

	ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
	if (ret)
		xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
			       pc->stashed_max_freq, ret);

	atomic_set(&pc->flush_freq_limit, 0);
	mutex_unlock(&pc->freq_lock);
	wake_up_var(&pc->flush_freq_limit);
}

static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
{
	int ret;
+2 −0
Original line number Diff line number Diff line
@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);

#endif /* _XE_GUC_PC_H_ */
+2 −0
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@
struct xe_guc_pc {
	/** @bo: GGTT buffer object that is shared with GuC PC */
	struct xe_bo *bo;
	/** @flush_freq_limit: 1 when max freq changes are limited by driver */
	atomic_t flush_freq_limit;
	/** @rp0_freq: HW RP0 frequency - The Maximum one */
	u32 rp0_freq;
	/** @rpa_freq: HW RPa frequency - The Achievable one */