Commit 103d53eb authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-fixes-7.0-2026-02-26' of...

Merge tag 'amd-drm-fixes-7.0-2026-02-26' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-fixes

amd-drm-fixes-7.0-2026-02-26:

amdgpu:
- UserQ fixes
- DC fix
- RAS fixes
- VCN 5 fix
- Slot reset fix
- Remove MES workaround that's no longer needed

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260226161330.3549393-1-alexander.deucher@amd.com
parents 82a499d2 6b0d8129
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -641,6 +641,7 @@ static void aca_error_fini(struct aca_error *aerr)
		aca_bank_error_remove(aerr, bank_error);

out_unlock:
	mutex_unlock(&aerr->lock);
	mutex_destroy(&aerr->lock);
}

+10 −7
Original line number Diff line number Diff line
@@ -7059,6 +7059,15 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
	dev_info(adev->dev, "PCI error: slot reset callback!!\n");

	memset(&reset_context, 0, sizeof(reset_context));
	INIT_LIST_HEAD(&device_list);
	hive = amdgpu_get_xgmi_hive(adev);
	if (hive) {
		mutex_lock(&hive->hive_lock);
		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
			list_add_tail(&tmp_adev->reset_list, &device_list);
	} else {
		list_add_tail(&adev->reset_list, &device_list);
	}

	if (adev->pcie_reset_ctx.swus)
		link_dev = adev->pcie_reset_ctx.swus;
@@ -7099,19 +7108,13 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
	reset_context.reset_req_dev = adev;
	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
	INIT_LIST_HEAD(&device_list);

	hive = amdgpu_get_xgmi_hive(adev);
	if (hive) {
		mutex_lock(&hive->hive_lock);
		reset_context.hive = hive;
		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
			tmp_adev->pcie_reset_ctx.in_link_reset = true;
			list_add_tail(&tmp_adev->reset_list, &device_list);
		}
	} else {
		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
		list_add_tail(&adev->reset_list, &device_list);
	}

	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
+7 −5
Original line number Diff line number Diff line
@@ -332,13 +332,13 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
	if (!context || !context->initialized) {
		dev_err(adev->dev, "TA is not initialized\n");
		ret = -EINVAL;
		goto err_free_shared_buf;
		goto free_shared_buf;
	}

	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
		dev_err(adev->dev, "Unsupported function to invoke TA\n");
		ret = -EOPNOTSUPP;
		goto err_free_shared_buf;
		goto free_shared_buf;
	}

	context->session_id = ta_id;
@@ -346,7 +346,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
	mutex_lock(&psp->ras_context.mutex);
	ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
	if (ret)
		goto err_free_shared_buf;
		goto unlock;

	ret = psp_fn_ta_invoke(psp, cmd_id);
	if (ret || context->resp_status) {
@@ -354,15 +354,17 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
			ret, context->resp_status);
		if (!ret) {
			ret = -EINVAL;
			goto err_free_shared_buf;
			goto unlock;
		}
	}

	if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
		ret = -EFAULT;

err_free_shared_buf:
unlock:
	mutex_unlock(&psp->ras_context.mutex);

free_shared_buf:
	kfree(shared_buf);

	return ret;
+18 −4
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@
static const struct dma_fence_ops amdgpu_userq_fence_ops;
static struct kmem_cache *amdgpu_userq_fence_slab;

#define AMDGPU_USERQ_MAX_HANDLES	(1U << 16)

int amdgpu_userq_fence_slab_init(void)
{
	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
@@ -478,6 +480,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
	if (!amdgpu_userq_enabled(dev))
		return -ENOTSUPP;

	if (args->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
	    args->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
	    args->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
		return -EINVAL;

	num_syncobj_handles = args->num_syncobj_handles;
	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
				      size_mul(sizeof(u32), num_syncobj_handles));
@@ -664,6 +671,11 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
	if (!amdgpu_userq_enabled(dev))
		return -ENOTSUPP;

	if (wait_info->num_syncobj_handles > AMDGPU_USERQ_MAX_HANDLES ||
	    wait_info->num_bo_write_handles > AMDGPU_USERQ_MAX_HANDLES ||
	    wait_info->num_bo_read_handles > AMDGPU_USERQ_MAX_HANDLES)
		return -EINVAL;

	num_read_bo_handles = wait_info->num_bo_read_handles;
	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
				      size_mul(sizeof(u32), num_read_bo_handles));
@@ -833,7 +845,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,

			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
						DMA_RESV_USAGE_READ, fence) {
				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
				if (num_fences >= wait_info->num_fences) {
					r = -EINVAL;
					goto free_fences;
				}
@@ -850,7 +862,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,

			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
						DMA_RESV_USAGE_WRITE, fence) {
				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
				if (num_fences >= wait_info->num_fences) {
					r = -EINVAL;
					goto free_fences;
				}
@@ -874,8 +886,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
					goto free_fences;

				dma_fence_unwrap_for_each(f, &iter, fence) {
					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
					if (num_fences >= wait_info->num_fences) {
						r = -EINVAL;
						dma_fence_put(fence);
						goto free_fences;
					}

@@ -898,8 +911,9 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
			if (r)
				goto free_fences;

			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
			if (num_fences >= wait_info->num_fences) {
				r = -EINVAL;
				dma_fence_put(fence);
				goto free_fences;
			}

+0 −5
Original line number Diff line number Diff line
@@ -720,11 +720,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
	mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
	mes_set_hw_res_pkt.oversubscription_timer = 50;
	if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f)
		mes_set_hw_res_pkt.enable_lr_compute_wa = 1;
	else
		dev_info_once(mes->adev->dev,
			      "MES FW version must be >= 0x7f to enable LR compute workaround.\n");

	if (amdgpu_mes_log_enable) {
		mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
Loading