Commit dfb31428 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Weekly fixes pull.

  There is one mm fix in here for a HMM livelock triggered by the xe
  driver tests. Otherwise it's a pretty wide range of fixes across the
  board, ttm UAF regression fix, amdgpu fixes, nouveau doesn't crash my
  laptop anymore fix, and a fair bit of misc.

  Seems about right for rc3.

  mm:
   - mm: Fix a hmm_range_fault() livelock / starvation problem

  pagemap:
   - Revert "drm/pagemap: Disable device-to-device migration"

  ttm:
   - fix function return breaking reclaim
   - fix build failure on PREEMPT_RT
   - fix bo->resource UAF

  dma-buf:
   - include ioctl.h in uapi header

  sched:
   - fix kernel doc warning

  amdgpu:
   - LUT fixes
   - VCN5 fix
   - Dispclk fix
   - SMU 13.x fix
   - Fix race in VM acquire
   - PSP 15.x fix
   - UserQ fix

  amdxdna:
   - fix invalid payload for failed command
   - fix NULL ptr dereference
   - fix major fw version check
   - avoid inconsistent fw state on error

  i915/display:
   - Fix for Lenovo T14 G7 display not refreshing

  xe:
   - Do not preempt fence signaling CS instructions
   - Some leak and finalization fixes
   - Workaround fix

  nouveau:
   - avoid runtime suspend oops when using dp aux

  panthor:
   - fix gem_sync argument ordering

  solomon:
   - fix incorrect display output

  renesas:
   - fix DSI divider programming

  ethosu:
   - fix job submit error clean-up refcount
   - fix NPU_OP_ELEMENTWISE validation
   - handle possible underflows in IFM size calcs"

* tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel: (38 commits)
  accel: ethosu: Handle possible underflow in IFM size calculations
  accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with scalar
  accel: ethosu: Fix job submit error clean-up refcount underflows
  accel/amdxdna: Split mailbox channel create function
  drm/panthor: Correct the order of arguments passed to gem_sync
  Revert "drm/syncobj: Fix handle <-> fd ioctls with dirty stack"
  drm/ttm: Fix bo resource use-after-free
  nouveau/dpcd: return EBUSY for aux xfer if the device is asleep
  accel/amdxdna: Fix major version check on NPU1 platform
  drm/amdgpu/userq: refcount userqueues to avoid any race conditions
  drm/amdgpu/userq: Consolidate wait ioctl exit path
  drm/amdgpu/psp: Use Indirect access address for GFX to PSP mailbox
  drm/amdgpu: Fix use-after-free race in VM acquire
  drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x
  drm/xe: Fix memory leak in xe_vm_madvise_ioctl
  drm/xe/reg_sr: Fix leak on xa_store failure
  drm/xe/xe2_hpg: Correct implementation of Wa_16025250150
  drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure
  Revert "drm/pagemap: Disable device-to-device migration"
  drm/i915/psr: Fix for Panel Replay X granularity DPCD register handling
  ...
parents 3593e678 96bfe9ff
Loading
Loading
Loading
Loading
+8 −15
Original line number Diff line number Diff line
@@ -186,13 +186,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
	cmd_abo = job->cmd_bo;

	if (unlikely(job->job_timeout)) {
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
		ret = -EINVAL;
		goto out;
	}

	if (unlikely(!data) || unlikely(size != sizeof(u32))) {
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
		ret = -EINVAL;
		goto out;
	}
@@ -202,7 +202,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
	if (status == AIE2_STATUS_SUCCESS)
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
	else
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);

out:
	aie2_sched_notify(job);
@@ -244,13 +244,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
	cmd_abo = job->cmd_bo;

	if (unlikely(job->job_timeout)) {
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
		ret = -EINVAL;
		goto out;
	}

	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
		amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
		ret = -EINVAL;
		goto out;
	}
@@ -270,19 +270,12 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
		 fail_cmd_idx, fail_cmd_status);

	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
		ret = -EINVAL;
		goto out;
	} else {
		amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
	}
	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);

	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);

		cc->error_index = fail_cmd_idx;
		if (cc->error_index >= cc->command_count)
			cc->error_index = 0;
	}
out:
	aie2_sched_notify(job);
	return ret;
+28 −8
Original line number Diff line number Diff line
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
		return -ENODEV;

	ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
	if (ret == -ETIME) {
		xdna_mailbox_stop_channel(ndev->mgmt_chann);
		xdna_mailbox_destroy_channel(ndev->mgmt_chann);
		ndev->mgmt_chann = NULL;
	}
	if (ret == -ETIME)
		aie2_destroy_mgmt_chann(ndev);

	if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
		XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
@@ -296,13 +293,20 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
	}

	intr_reg = i2x.mb_head_ptr_reg + 4;
	hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
							      intr_reg, ret);
	hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox);
	if (!hwctx->priv->mbox_chann) {
		XDNA_ERR(xdna, "Not able to create channel");
		ret = -EINVAL;
		goto del_ctx_req;
	}

	ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x,
					 intr_reg, ret);
	if (ret) {
		XDNA_ERR(xdna, "Not able to create channel");
		ret = -EINVAL;
		goto free_channel;
	}
	ndev->hwctx_num++;

	XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
@@ -310,6 +314,8 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct

	return 0;

free_channel:
	xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
del_ctx_req:
	aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
	return ret;
@@ -325,7 +331,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc

	xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
	ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
	xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
	xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
	XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
	hwctx->priv->mbox_chann = NULL;
	hwctx->fw_ctx_id = -1;
@@ -914,6 +920,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
		ndev->exec_msg_ops = &legacy_exec_message_ops;
}

void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
{
	struct amdxdna_dev *xdna = ndev->xdna;

	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));

	if (!ndev->mgmt_chann)
		return;

	xdna_mailbox_stop_channel(ndev->mgmt_chann);
	xdna_mailbox_free_channel(ndev->mgmt_chann);
	ndev->mgmt_chann = NULL;
}

static inline struct amdxdna_gem_obj *
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
{
+37 −29
Original line number Diff line number Diff line
@@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)

	aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
	aie2_mgmt_fw_fini(ndev);
	xdna_mailbox_stop_channel(ndev->mgmt_chann);
	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
	ndev->mgmt_chann = NULL;
	aie2_destroy_mgmt_chann(ndev);
	drmm_kfree(&xdna->ddev, ndev->mbox);
	ndev->mbox = NULL;
	aie2_psp_stop(ndev->psp_hdl);
@@ -363,10 +361,29 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
	}
	pci_set_master(pdev);

	mbox_res.ringbuf_base = ndev->sram_base;
	mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
	mbox_res.mbox_base = ndev->mbox_base;
	mbox_res.mbox_size = MBOX_SIZE(ndev);
	mbox_res.name = "xdna_mailbox";
	ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
	if (!ndev->mbox) {
		XDNA_ERR(xdna, "failed to create mailbox device");
		ret = -ENODEV;
		goto disable_dev;
	}

	ndev->mgmt_chann = xdna_mailbox_alloc_channel(ndev->mbox);
	if (!ndev->mgmt_chann) {
		XDNA_ERR(xdna, "failed to alloc channel");
		ret = -ENODEV;
		goto disable_dev;
	}

	ret = aie2_smu_init(ndev);
	if (ret) {
		XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
		goto disable_dev;
		goto free_channel;
	}

	ret = aie2_psp_start(ndev->psp_hdl);
@@ -381,18 +398,6 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
		goto stop_psp;
	}

	mbox_res.ringbuf_base = ndev->sram_base;
	mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
	mbox_res.mbox_base = ndev->mbox_base;
	mbox_res.mbox_size = MBOX_SIZE(ndev);
	mbox_res.name = "xdna_mailbox";
	ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
	if (!ndev->mbox) {
		XDNA_ERR(xdna, "failed to create mailbox device");
		ret = -ENODEV;
		goto stop_psp;
	}

	mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
	if (mgmt_mb_irq < 0) {
		ret = mgmt_mb_irq;
@@ -401,13 +406,13 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
	}

	xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
	ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
	ret = xdna_mailbox_start_channel(ndev->mgmt_chann,
					 &ndev->mgmt_x2i,
					 &ndev->mgmt_i2x,
					 xdna_mailbox_intr_reg,
					 mgmt_mb_irq);
	if (!ndev->mgmt_chann) {
		XDNA_ERR(xdna, "failed to create management mailbox channel");
	if (ret) {
		XDNA_ERR(xdna, "failed to start management mailbox channel");
		ret = -EINVAL;
		goto stop_psp;
	}
@@ -415,38 +420,41 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
	ret = aie2_mgmt_fw_init(ndev);
	if (ret) {
		XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
		goto destroy_mgmt_chann;
		goto stop_fw;
	}

	ret = aie2_pm_init(ndev);
	if (ret) {
		XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
		goto destroy_mgmt_chann;
		goto stop_fw;
	}

	ret = aie2_mgmt_fw_query(ndev);
	if (ret) {
		XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
		goto destroy_mgmt_chann;
		goto stop_fw;
	}

	ret = aie2_error_async_events_alloc(ndev);
	if (ret) {
		XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
		goto destroy_mgmt_chann;
		goto stop_fw;
	}

	ndev->dev_status = AIE2_DEV_START;

	return 0;

destroy_mgmt_chann:
stop_fw:
	aie2_suspend_fw(ndev);
	xdna_mailbox_stop_channel(ndev->mgmt_chann);
	xdna_mailbox_destroy_channel(ndev->mgmt_chann);
stop_psp:
	aie2_psp_stop(ndev->psp_hdl);
fini_smu:
	aie2_smu_fini(ndev);
free_channel:
	xdna_mailbox_free_channel(ndev->mgmt_chann);
	ndev->mgmt_chann = NULL;
disable_dev:
	pci_disable_device(pdev);

+1 −0
Original line number Diff line number Diff line
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,

/* aie2_message.c */
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
+27 −0
Original line number Diff line number Diff line
@@ -135,6 +135,33 @@ u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
	return INVALID_CU_IDX;
}

int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo,
			  struct amdxdna_sched_job *job, u32 cmd_idx,
			  enum ert_cmd_state error_state)
{
	struct amdxdna_client *client = job->hwctx->client;
	struct amdxdna_cmd *cmd = abo->mem.kva;
	struct amdxdna_cmd_chain *cc = NULL;

	cmd->header &= ~AMDXDNA_CMD_STATE;
	cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, error_state);

	if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) {
		cc = amdxdna_cmd_get_payload(abo, NULL);
		cc->error_index = (cmd_idx < cc->command_count) ? cmd_idx : 0;
		abo = amdxdna_gem_get_obj(client, cc->data[0], AMDXDNA_BO_CMD);
		if (!abo)
			return -EINVAL;
		cmd = abo->mem.kva;
	}

	memset(cmd->data, 0xff, abo->mem.size - sizeof(*cmd));
	if (cc)
		amdxdna_gem_put_obj(abo);

	return 0;
}

/*
 * This should be called in close() and remove(). DO NOT call in other syscalls.
 * This guarantee that when hwctx and resources will be released, if user
Loading