Commit c4b6ddcf authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.18-2025-10-09' of...

Merge tag 'amd-drm-next-6.18-2025-10-09' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-next

amd-drm-next-6.18-2025-10-09:

amdgpu:
- DC DCE6 fixes
- GPU reset fixes
- Secure diplay messaging cleanup
- MES fix
- GPUVM locking fixes
- PMFW messaging cleanup
- PCI US/DS switch handling fix
- VCN queue reset fix
- DC FPU handling fix
- DCN 3.5 fix
- DC mirroring fix

amdkfd:
- Fix kfd process ref leak
- mmap write lock handling fix
- Fix comments in IOCTL

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20251009162915.981503-1-alexander.deucher@amd.com
parents 73bc073d d07e1426
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -2586,12 +2586,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
			 * from the KFD, trigger a segmentation fault in VM debug mode.
			 */
			if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
				struct kfd_process *p;

				pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
								pid_nr(process_info->pid), mem->va);

				// Send GPU VM fault to user space
				kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
								mem->va);
				p = kfd_lookup_process_by_pid(process_info->pid);
				if (p) {
					kfd_signal_vm_fault_event_with_userptr(p, mem->va);
					kfd_unref_process(p);
				}
			}

			ret = 0;
+30 −18
Original line number Diff line number Diff line
@@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));

		if (tmp_adev->asic_reset_res)
			r = tmp_adev->asic_reset_res;

		tmp_adev->asic_reset_res = 0;

		if (r) {
		if (tmp_adev->asic_reset_res) {
			/* bad news, how to tell it to userspace ?
			 * for ras error, we should report GPU bad status instead of
			 * reset failure
			 */
			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
				dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
					atomic_read(&tmp_adev->gpu_reset_counter));
			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
				dev_info(
					tmp_adev->dev,
					"GPU reset(%d) failed with error %d \n",
					atomic_read(
						&tmp_adev->gpu_reset_counter),
					tmp_adev->asic_reset_res);
			amdgpu_vf_error_put(tmp_adev,
					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
					    tmp_adev->asic_reset_res);
			if (!r)
				r = tmp_adev->asic_reset_res;
			tmp_adev->asic_reset_res = 0;
		} else {
			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
				 atomic_read(&tmp_adev->gpu_reset_counter));
			if (amdgpu_acpi_smart_shift_update(tmp_adev,
							   AMDGPU_SS_DEV_D0))
				dev_warn(tmp_adev->dev,
@@ -7157,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev)

static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
{
	struct pci_dev *parent = pci_upstream_bridge(adev->pdev);
	struct pci_dev *swus, *swds;
	int r;

	if (!parent || parent->vendor != PCI_VENDOR_ID_ATI)
	swds = pci_upstream_bridge(adev->pdev);
	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
		return;
	swus = pci_upstream_bridge(swds);
	if (!swus ||
	    (swus->vendor != PCI_VENDOR_ID_ATI &&
	     swus->vendor != PCI_VENDOR_ID_AMD) ||
	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
		return;

	/* If already saved, return */
	if (adev->pcie_reset_ctx.swus)
		return;
	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
	r = pci_save_state(parent);
	r = pci_save_state(swds);
	if (r)
		return;
	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent);
	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);

	parent = pci_upstream_bridge(parent);
	r = pci_save_state(parent);
	r = pci_save_state(swus);
	if (r)
		return;
	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent);
	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);

	adev->pcie_reset_ctx.swus = parent;
	adev->pcie_reset_ctx.swus = swus;
}

static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+5 −0
Original line number Diff line number Diff line
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_

	might_sleep();
	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
		if (amdgpu_in_reset(adev))
			goto failed_kiq_read;

		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	}
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3

	might_sleep();
	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
		if (amdgpu_in_reset(adev))
			goto failed_kiq_write;

		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+2 −8
Original line number Diff line number Diff line
@@ -1421,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)

	amdgpu_debugfs_vm_init(file_priv);

	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
	if (r)
		goto error_pasid;

	r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
	if (r)
		goto error_vm;

	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
	if (!fpriv->prt_va) {
		r = -ENOMEM;
@@ -1468,10 +1464,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
	amdgpu_vm_fini(adev, &fpriv->vm);

error_pasid:
	if (pasid) {
	if (pasid)
		amdgpu_pasid_free(pasid);
		amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
	}

	kfree(fpriv);

+1 −1
Original line number Diff line number Diff line
@@ -2352,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
	}

	ret = psp_ta_load(psp, &psp->securedisplay_context.context);
	if (!ret) {
	if (!ret && !psp->securedisplay_context.context.resp_status) {
		psp->securedisplay_context.context.initialized = true;
		mutex_init(&psp->securedisplay_context.mutex);
	} else
Loading