Commit 284fc30e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-next-2025-10-11-1' of https://gitlab.freedesktop.org/drm/kernel

Pull more drm fixes from Dave Airlie:
 "Just the follow up fixes for rc1 from the next branch, amdgpu and xe
  mostly with a single v3d fix in there.

  amdgpu:
   - DC DCE6 fixes
   - GPU reset fixes
   - Secure diplay messaging cleanup
   - MES fix
   - GPUVM locking fixes
   - PMFW messaging cleanup
   - PCI US/DS switch handling fix
   - VCN queue reset fix
   - DC FPU handling fix
   - DCN 3.5 fix
   - DC mirroring fix

  amdkfd:
   - Fix kfd process ref leak
   - mmap write lock handling fix
   - Fix comments in IOCTL

  xe:
   - Fix build with clang 16
   - Fix handling of invalid configfs syntax usage and spell out the
     expected syntax in the documentation
   - Do not try late bind firmware when running as VF since it shouldn't
     handle firmware loading
   - Fix idle assertion for local BOs
   - Fix uninitialized variable for late binding
   - Do not require perfmon_capable to expose free memory at page
     granularity. Handle it like other drm drivers do
   - Fix lock handling on suspend error path
   - Fix I2C controller resume after S3

  v3d:
   - fix fence locking"

* tag 'drm-next-2025-10-11-1' of https://gitlab.freedesktop.org/drm/kernel: (34 commits)
  drm/amd/display: Incorrect Mirror Cositing
  drm/amd/display: Enable Dynamic DTBCLK Switch
  drm/amdgpu: Report individual reset error
  drm/amdgpu: partially revert "revert to old status lock handling v3"
  drm/amd/display: Fix unsafe uses of kernel mode FPU
  drm/amd/pm: Disable VCN queue reset on SMU v13.0.6 due to regression
  drm/amdgpu: Fix general protection fault in amdgpu_vm_bo_reset_state_machine
  drm/amdgpu: Check swus/ds for switch state save
  drm/amdkfd: Fix two comments in kfd_ioctl.h
  drm/amd/pm: Avoid interface mismatch messaging
  drm/amdgpu: Merge amdgpu_vm_set_pasid into amdgpu_vm_init
  drm/amd/amdgpu: Fix the mes version that support inv_tlbs
  drm/amd: Check whether secure display TA loaded successfully
  drm/amdkfd: Fix mmap write lock not release
  drm/amdkfd: Fix kfd process ref leaking when userptr unmapping
  drm/amdgpu: Fix for GPU reset being blocked by KIQ I/O.
  drm/amd/display: Disable scaling on DCE6 for now
  drm/amd/display: Properly disable scaling on DCE6
  drm/amd/display: Properly clear SCL_*_FILTER_CONTROL on DCE6
  drm/amd/display: Add missing DCE6 SCL_HORZ_FILTER_INIT* SRIs
  ...
parents 1e5d41b9 c4b6ddcf
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -2586,12 +2586,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
			 * from the KFD, trigger a segmentation fault in VM debug mode.
			 */
			if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
				struct kfd_process *p;

				pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
								pid_nr(process_info->pid), mem->va);

				// Send GPU VM fault to user space
				kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
								mem->va);
				p = kfd_lookup_process_by_pid(process_info->pid);
				if (p) {
					kfd_signal_vm_fault_event_with_userptr(p, mem->va);
					kfd_unref_process(p);
				}
			}

			ret = 0;
+30 −18
Original line number Diff line number Diff line
@@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));

		if (tmp_adev->asic_reset_res)
			r = tmp_adev->asic_reset_res;

		tmp_adev->asic_reset_res = 0;

		if (r) {
		if (tmp_adev->asic_reset_res) {
			/* bad news, how to tell it to userspace ?
			 * for ras error, we should report GPU bad status instead of
			 * reset failure
			 */
			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
				dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
					atomic_read(&tmp_adev->gpu_reset_counter));
			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
				dev_info(
					tmp_adev->dev,
					"GPU reset(%d) failed with error %d \n",
					atomic_read(
						&tmp_adev->gpu_reset_counter),
					tmp_adev->asic_reset_res);
			amdgpu_vf_error_put(tmp_adev,
					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
					    tmp_adev->asic_reset_res);
			if (!r)
				r = tmp_adev->asic_reset_res;
			tmp_adev->asic_reset_res = 0;
		} else {
			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
				 atomic_read(&tmp_adev->gpu_reset_counter));
			if (amdgpu_acpi_smart_shift_update(tmp_adev,
							   AMDGPU_SS_DEV_D0))
				dev_warn(tmp_adev->dev,
@@ -7157,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev)

static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
{
	struct pci_dev *parent = pci_upstream_bridge(adev->pdev);
	struct pci_dev *swus, *swds;
	int r;

	if (!parent || parent->vendor != PCI_VENDOR_ID_ATI)
	swds = pci_upstream_bridge(adev->pdev);
	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
		return;
	swus = pci_upstream_bridge(swds);
	if (!swus ||
	    (swus->vendor != PCI_VENDOR_ID_ATI &&
	     swus->vendor != PCI_VENDOR_ID_AMD) ||
	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
		return;

	/* If already saved, return */
	if (adev->pcie_reset_ctx.swus)
		return;
	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
	r = pci_save_state(parent);
	r = pci_save_state(swds);
	if (r)
		return;
	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent);
	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);

	parent = pci_upstream_bridge(parent);
	r = pci_save_state(parent);
	r = pci_save_state(swus);
	if (r)
		return;
	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent);
	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);

	adev->pcie_reset_ctx.swus = parent;
	adev->pcie_reset_ctx.swus = swus;
}

static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+5 −0
Original line number Diff line number Diff line
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_

	might_sleep();
	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
		if (amdgpu_in_reset(adev))
			goto failed_kiq_read;

		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
	}
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3

	might_sleep();
	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
		if (amdgpu_in_reset(adev))
			goto failed_kiq_write;

		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+2 −8
Original line number Diff line number Diff line
@@ -1421,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)

	amdgpu_debugfs_vm_init(file_priv);

	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
	if (r)
		goto error_pasid;

	r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
	if (r)
		goto error_vm;

	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
	if (!fpriv->prt_va) {
		r = -ENOMEM;
@@ -1468,10 +1464,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
	amdgpu_vm_fini(adev, &fpriv->vm);

error_pasid:
	if (pasid) {
	if (pasid)
		amdgpu_pasid_free(pasid);
		amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
	}

	kfree(fpriv);

+1 −1
Original line number Diff line number Diff line
@@ -2352,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
	}

	ret = psp_ta_load(psp, &psp->securedisplay_context.context);
	if (!ret) {
	if (!ret && !psp->securedisplay_context.context.resp_status) {
		psp->securedisplay_context.context.initialized = true;
		mutex_init(&psp->securedisplay_context.mutex);
	} else
Loading