Commit ffe8ac92 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-next-2025-08-08' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "This is the fixes that built up in the merge window, mostly amdgpu and
  xe with one i915 display fix, seems like things are pretty good for
  rc1.

  i915:
   - DP LPFS fixes

  xe:
   - SRIOV: PF fixes and removal of need of module param
   - Fix driver unbind around Devcoredump
   - Mark xe driver as BROKEN if kernel page size is not 4kB

  amdgpu:
   - GC 9.5.0 fixes
   - SMU fix
   - DCE 6 DC fixes
   - mmhub client ID fixes
   - VRR fix
   - Backlight fix
   - UserQ fix
   - Legacy reset fix
   - Misc fixes

  amdkfd:
   - CRIU fix
   - Debugfs fix"

* tag 'drm-next-2025-08-08' of https://gitlab.freedesktop.org/drm/kernel: (28 commits)
  drm/amdgpu: add missing vram lost check for LEGACY RESET
  drm/amdgpu/discovery: fix fw based ip discovery
  drm/amdkfd: Destroy KFD debugfs after destroy KFD wq
  amdgpu/amdgpu_discovery: increase timeout limit for IFWI init
  drm/amdgpu: Update SDMA firmware version check for user queue support
  drm/amdgpu: Add NULL check for asic_funcs
  drm/amd/display: Revert "drm/amd/display: Fix AMDGPU_MAX_BL_LEVEL value"
  drm/amd/display: fix a Null pointer dereference vulnerability
  drm/amd/display: Add primary plane to commits for correct VRR handling
  drm/amdgpu: update mmhub 3.3 client id mappings
  drm/amdgpu: update mmhub 3.0.1 client id mappings
  drm/amdgpu: Retain job->vm in amdgpu_job_prepare_job
  drm/amd/display: Fix DCE 6.0 and 6.4 PLL programming.
  drm/amd/display: Don't overwrite dce60_clk_mgr
  drm/amdkfd: Fix checkpoint-restore on multi-xcc
  drm/amd: Restore cached manual clock settings during resume
  drm/amd: Restore cached power limit during resume
  drm/amdgpu: Update external revid for GC v9.5.0
  drm/amdgpu: Update supported modes for GC v9.5.0
  Mark xe driver as BROKEN if kernel page size is not 4kB
  ...
parents 2939a792 64c62751
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)

	adev->firmware.gpu_info_fw = NULL;

	if (adev->mman.discovery_bin)
		return 0;

	switch (adev->asic_type) {
	default:
		return 0;
@@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
		chip_name = "arcturus";
		break;
	case CHIP_NAVI12:
		if (adev->mman.discovery_bin)
			return 0;
		chip_name = "navi12";
		break;
	}
@@ -3271,6 +3270,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
	 * always assumed to be lost.
	 */
	switch (amdgpu_asic_reset_method(adev)) {
	case AMD_RESET_METHOD_LEGACY:
	case AMD_RESET_METHOD_LINK:
	case AMD_RESET_METHOD_BACO:
	case AMD_RESET_METHOD_MODE1:
+41 −35
Original line number Diff line number Diff line
@@ -276,7 +276,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
	u32 msg;

	if (!amdgpu_sriov_vf(adev)) {
		/* It can take up to a second for IFWI init to complete on some dGPUs,
		/* It can take up to two second for IFWI init to complete on some dGPUs,
		 * but generally it should be in the 60-100ms range.  Normally this starts
		 * as soon as the device gets power so by the time the OS loads this has long
		 * completed.  However, when a card is hotplugged via e.g., USB4, we need to
@@ -284,7 +284,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
		 * continue.
		 */

		for (i = 0; i < 1000; i++) {
		for (i = 0; i < 2000; i++) {
			msg = RREG32(mmMP0_SMN_C2PMSG_33);
			if (msg & 0x80000000)
				break;
@@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)

	switch (adev->asic_type) {
	case CHIP_VEGA10:
	case CHIP_VEGA12:
	case CHIP_RAVEN:
	case CHIP_VEGA20:
	case CHIP_ARCTURUS:
	case CHIP_ALDEBARAN:
		/* this is not fatal.  We have a fallback below
		 * if the new firmwares are not present. some of
		 * this will be overridden below to keep things
		 * consistent with the current behavior.
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		r = amdgpu_discovery_reg_base_init(adev);
		if (!r) {
			amdgpu_discovery_harvest_ip(adev);
			amdgpu_discovery_get_gfx_info(adev);
			amdgpu_discovery_get_mall_info(adev);
			amdgpu_discovery_get_vcn_info(adev);
		}
		break;
	default:
		r = amdgpu_discovery_reg_base_init(adev);
		if (r) {
			drm_err(&adev->ddev, "discovery failed: %d\n", r);
			return r;
		}

		amdgpu_discovery_harvest_ip(adev);
		amdgpu_discovery_get_gfx_info(adev);
		amdgpu_discovery_get_mall_info(adev);
		amdgpu_discovery_get_vcn_info(adev);
		break;
	}

	switch (adev->asic_type) {
	case CHIP_VEGA10:
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 4;
@@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
		break;
	case CHIP_VEGA12:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 4;
@@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
		break;
	case CHIP_RAVEN:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 1;
		adev->vcn.num_vcn_inst = 1;
@@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		}
		break;
	case CHIP_VEGA20:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega20_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 8;
@@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
		break;
	case CHIP_ARCTURUS:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		arct_reg_base_init(adev);
		adev->sdma.num_instances = 8;
		adev->vcn.num_vcn_inst = 2;
@@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
		break;
	case CHIP_ALDEBARAN:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		aldebaran_reg_base_init(adev);
		adev->sdma.num_instances = 5;
		adev->vcn.num_vcn_inst = 2;
@@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
		break;
	default:
		r = amdgpu_discovery_reg_base_init(adev);
		if (r) {
			drm_err(&adev->ddev, "discovery failed: %d\n", r);
			return r;
		}

		amdgpu_discovery_harvest_ip(adev);
		amdgpu_discovery_get_gfx_info(adev);
		amdgpu_discovery_get_mall_info(adev);
		amdgpu_discovery_get_vcn_info(adev);
		break;
	}

+0 −7
Original line number Diff line number Diff line
@@ -365,13 +365,6 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
			dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
			goto error;
		}
		/*
		 * The VM structure might be released after the VMID is
		 * assigned, we had multiple problems with people trying to use
		 * the VM pointer so better set it to NULL.
		 */
		if (!fence)
			job->vm = NULL;
		return fence;
	}

+2 −1
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)

bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
{
	if (amdgpu_sriov_vf(adev) || !adev->asic_funcs->get_pcie_replay_count ||
	if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
	    !adev->asic_funcs->get_pcie_replay_count ||
	    (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
		return false;

+4 −1
Original line number Diff line number Diff line
@@ -227,6 +227,7 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
					    uint16_t *nps_modes)
{
	struct amdgpu_device *adev = xcp_mgr->adev;
	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);

	if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
		return -EINVAL;
@@ -250,12 +251,14 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
		*num_xcp = 4;
		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
			     BIT(AMDGPU_NPS4_PARTITION_MODE);
		if (gc_ver == IP_VERSION(9, 5, 0))
			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
		break;
	case AMDGPU_CPX_PARTITION_MODE:
		*num_xcp = NUM_XCC(adev->gfx.xcc_mask);
		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
			     BIT(AMDGPU_NPS4_PARTITION_MODE);
		if (amdgpu_sriov_vf(adev))
		if (gc_ver == IP_VERSION(9, 5, 0))
			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
		break;
	default:
Loading