Commit 00ffe45e authored by Lizhi Hou's avatar Lizhi Hou
Browse files

accel/amdxdna: Fix race condition when checking rpm_on



When autosuspend is triggered, driver rpm_on flag is set to indicate that
a suspend/resume is already in progress. However, when a userspace
application submits a command during this narrow window,
amdxdna_pm_resume_get() may incorrectly skip the resume operation because
the rpm_on flag is still set. This results in commands being submitted
while the device has not actually resumed, causing unexpected behavior.

The set_dpm() is called by suspend/resume, it relied on rpm_on flag to
avoid calling into rpm suspend/resume recursivly. So to fix this, remove
the use of the rpm_on flag entirely. Instead, introduce aie2_pm_set_dpm()
which explicitly resumes the device before invoking set_dpm(). With this
change, set_dpm() is called directly inside the suspend or resume execution
path. Otherwise, aie2_pm_set_dpm() is called.

Fixes: 063db451 ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: default avatarMario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: default avatarMaciej Falkowski <maciej.falkowski@linux.intel.com>
Signed-off-by: default avatarLizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20251208165356.1549237-1-lizhi.hou@amd.com
parent 0823bd89
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -39,7 +39,6 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
	if (!ndev->mgmt_chann)
		return -ENODEV;

	drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
	ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
	if (ret == -ETIME) {
		xdna_mailbox_stop_channel(ndev->mgmt_chann);
+1 −1
Original line number Diff line number Diff line
@@ -321,7 +321,7 @@ static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
	if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
		return 0;

	return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
	return aie2_pm_set_dpm(ndev, dpm_level);
}

static struct xrs_action_ops aie2_xrs_actions = {
+1 −0
Original line number Diff line number Diff line
@@ -286,6 +286,7 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
/* aie2_pm.c */
int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);

/* aie2_psp.c */
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
+16 −1
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@

#include "aie2_pci.h"
#include "amdxdna_pci_drv.h"
#include "amdxdna_pm.h"

#define AIE2_CLK_GATING_ENABLE	1
#define AIE2_CLK_GATING_DISABLE	0
@@ -26,6 +27,20 @@ static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
	return 0;
}

int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
	int ret;

	ret = amdxdna_pm_resume_get(ndev->xdna);
	if (ret)
		return ret;

	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
	amdxdna_pm_suspend_put(ndev->xdna);

	return ret;
}

int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
{
	int ret;
@@ -94,7 +109,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type
		return -EOPNOTSUPP;
	}

	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
	ret = aie2_pm_set_dpm(ndev, dpm_level);
	if (ret)
		return ret;

+4 −23
Original line number Diff line number Diff line
@@ -11,7 +11,6 @@

#include "aie2_pci.h"
#include "amdxdna_pci_drv.h"
#include "amdxdna_pm.h"

#define SMU_RESULT_OK		1

@@ -67,16 +66,12 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
	u32 freq;
	int ret;

	ret = amdxdna_pm_resume_get(ndev->xdna);
	if (ret)
		return ret;

	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
			    ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
	if (ret) {
		XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
			 ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
		goto suspend_put;
		return ret;
	}
	ndev->npuclk_freq = freq;

@@ -85,10 +80,9 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
	if (ret) {
		XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
			 ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
		goto suspend_put;
		return ret;
	}

	amdxdna_pm_suspend_put(ndev->xdna);
	ndev->hclk_freq = freq;
	ndev->dpm_level = dpm_level;
	ndev->max_tops = 2 * ndev->total_col;
@@ -98,35 +92,26 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
		 ndev->npuclk_freq, ndev->hclk_freq);

	return 0;

suspend_put:
	amdxdna_pm_suspend_put(ndev->xdna);
	return ret;
}

int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
	int ret;

	ret = amdxdna_pm_resume_get(ndev->xdna);
	if (ret)
		return ret;

	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
	if (ret) {
		XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
			 dpm_level, ret);
		goto suspend_put;
		return ret;
	}

	ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
	if (ret) {
		XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
			 dpm_level, ret);
		goto suspend_put;
		return ret;
	}

	amdxdna_pm_suspend_put(ndev->xdna);
	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
	ndev->dpm_level = dpm_level;
@@ -137,10 +122,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
		 ndev->npuclk_freq, ndev->hclk_freq);

	return 0;

suspend_put:
	amdxdna_pm_suspend_put(ndev->xdna);
	return ret;
}

int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
Loading