Commit 64c62751 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-fixes-6.17-2025-08-07' of...

Merge tag 'amd-drm-fixes-6.17-2025-08-07' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-next

amd-drm-fixes-6.17-2025-08-07:

amdgpu:
- GC 9.5.0 fixes
- SMU fix
- DCE 6 DC fixes
- mmhub client ID fixes
- VRR fix
- Backlight fix
- UserQ fix
- Legacy reset fix
- Misc fixes

amdkfd:
- CRIU fix
- Debugfs fix

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20250807132030.1168068-1-alexander.deucher@amd.com
parents 10acca92 81699fe8
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -2570,9 +2570,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)

	adev->firmware.gpu_info_fw = NULL;

	if (adev->mman.discovery_bin)
		return 0;

	switch (adev->asic_type) {
	default:
		return 0;
@@ -2594,6 +2591,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
		chip_name = "arcturus";
		break;
	case CHIP_NAVI12:
		if (adev->mman.discovery_bin)
			return 0;
		chip_name = "navi12";
		break;
	}
@@ -3271,6 +3270,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
	 * always assumed to be lost.
	 */
	switch (amdgpu_asic_reset_method(adev)) {
	case AMD_RESET_METHOD_LEGACY:
	case AMD_RESET_METHOD_LINK:
	case AMD_RESET_METHOD_BACO:
	case AMD_RESET_METHOD_MODE1:
+41 −35
Original line number Diff line number Diff line
@@ -276,7 +276,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
	u32 msg;

	if (!amdgpu_sriov_vf(adev)) {
		/* It can take up to a second for IFWI init to complete on some dGPUs,
		/* It can take up to two second for IFWI init to complete on some dGPUs,
		 * but generally it should be in the 60-100ms range.  Normally this starts
		 * as soon as the device gets power so by the time the OS loads this has long
		 * completed.  However, when a card is hotplugged via e.g., USB4, we need to
@@ -284,7 +284,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
		 * continue.
		 */

		for (i = 0; i < 1000; i++) {
		for (i = 0; i < 2000; i++) {
			msg = RREG32(mmMP0_SMN_C2PMSG_33);
			if (msg & 0x80000000)
				break;
@@ -2555,40 +2555,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)

	switch (adev->asic_type) {
	case CHIP_VEGA10:
	case CHIP_VEGA12:
	case CHIP_RAVEN:
	case CHIP_VEGA20:
	case CHIP_ARCTURUS:
	case CHIP_ALDEBARAN:
		/* this is not fatal.  We have a fallback below
		 * if the new firmwares are not present. some of
		 * this will be overridden below to keep things
		 * consistent with the current behavior.
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		r = amdgpu_discovery_reg_base_init(adev);
		if (!r) {
			amdgpu_discovery_harvest_ip(adev);
			amdgpu_discovery_get_gfx_info(adev);
			amdgpu_discovery_get_mall_info(adev);
			amdgpu_discovery_get_vcn_info(adev);
		}
		break;
	default:
		r = amdgpu_discovery_reg_base_init(adev);
		if (r) {
			drm_err(&adev->ddev, "discovery failed: %d\n", r);
			return r;
		}

		amdgpu_discovery_harvest_ip(adev);
		amdgpu_discovery_get_gfx_info(adev);
		amdgpu_discovery_get_mall_info(adev);
		amdgpu_discovery_get_vcn_info(adev);
		break;
	}

	switch (adev->asic_type) {
	case CHIP_VEGA10:
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 4;
@@ -2611,6 +2582,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
		break;
	case CHIP_VEGA12:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 4;
@@ -2633,6 +2609,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
		break;
	case CHIP_RAVEN:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega10_reg_base_init(adev);
		adev->sdma.num_instances = 1;
		adev->vcn.num_vcn_inst = 1;
@@ -2674,6 +2655,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		}
		break;
	case CHIP_VEGA20:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		vega20_reg_base_init(adev);
		adev->sdma.num_instances = 2;
		adev->gmc.num_umc = 8;
@@ -2697,6 +2683,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
		break;
	case CHIP_ARCTURUS:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		arct_reg_base_init(adev);
		adev->sdma.num_instances = 8;
		adev->vcn.num_vcn_inst = 2;
@@ -2725,6 +2716,11 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
		break;
	case CHIP_ALDEBARAN:
		/* This is not fatal.  We only need the discovery
		 * binary for sysfs.  We don't need it for a
		 * functional system.
		 */
		amdgpu_discovery_init(adev);
		aldebaran_reg_base_init(adev);
		adev->sdma.num_instances = 5;
		adev->vcn.num_vcn_inst = 2;
@@ -2751,6 +2747,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
		break;
	default:
		r = amdgpu_discovery_reg_base_init(adev);
		if (r) {
			drm_err(&adev->ddev, "discovery failed: %d\n", r);
			return r;
		}

		amdgpu_discovery_harvest_ip(adev);
		amdgpu_discovery_get_gfx_info(adev);
		amdgpu_discovery_get_mall_info(adev);
		amdgpu_discovery_get_vcn_info(adev);
		break;
	}

+0 −7
Original line number Diff line number Diff line
@@ -365,13 +365,6 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
			dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
			goto error;
		}
		/*
		 * The VM structure might be released after the VMID is
		 * assigned, we had multiple problems with people trying to use
		 * the VM pointer so better set it to NULL.
		 */
		if (!fence)
			job->vm = NULL;
		return fence;
	}

+2 −1
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)

bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
{
	if (amdgpu_sriov_vf(adev) || !adev->asic_funcs->get_pcie_replay_count ||
	if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
	    !adev->asic_funcs->get_pcie_replay_count ||
	    (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
		return false;

+4 −1
Original line number Diff line number Diff line
@@ -227,6 +227,7 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
					    uint16_t *nps_modes)
{
	struct amdgpu_device *adev = xcp_mgr->adev;
	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);

	if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
		return -EINVAL;
@@ -250,12 +251,14 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
		*num_xcp = 4;
		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
			     BIT(AMDGPU_NPS4_PARTITION_MODE);
		if (gc_ver == IP_VERSION(9, 5, 0))
			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
		break;
	case AMDGPU_CPX_PARTITION_MODE:
		*num_xcp = NUM_XCC(adev->gfx.xcc_mask);
		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
			     BIT(AMDGPU_NPS4_PARTITION_MODE);
		if (amdgpu_sriov_vf(adev))
		if (gc_ver == IP_VERSION(9, 5, 0))
			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
		break;
	default:
Loading