Commit 5839d27d authored by Lijo Lazar's avatar Lijo Lazar Committed by Alex Deucher
Browse files

drm/amdgpu: Use init level for pending_reset flag



Drop pending_reset flag in gmc block. Instead use init level to
determine which type of init is preferred - in this case MINIMAL.

Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Acked-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Tested-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9e0feb79
Loading
Loading
Loading
Loading
+8 −26
Original line number Diff line number Diff line
@@ -1699,7 +1699,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
	}

	/* Don't post if we need to reset whole hive on init */
	if (adev->gmc.xgmi.pending_reset)
	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
		return false;

	if (adev->has_hw_reset) {
@@ -3015,7 +3015,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
		amdgpu_ttm_set_buffer_funcs_status(adev, true);

	/* Don't init kfd if whole hive need to be reset during init */
	if (!adev->gmc.xgmi.pending_reset) {
	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
		kgd2kfd_init_zone_device(adev);
		amdgpu_amdkfd_device_init(adev);
	}
@@ -3529,14 +3529,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
		}

		/* skip unnecessary suspend if we do not initialize them yet */
		if (adev->gmc.xgmi.pending_reset &&
		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
			adev->ip_blocks[i].status.hw = false;
		if (!amdgpu_ip_member_of_hwini(
			    adev, adev->ip_blocks[i].version->type))
			continue;
		}

		/* skip suspend of gfx/mes and psp for S0ix
		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
@@ -4351,20 +4346,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
		if (adev->gmc.xgmi.num_physical_nodes) {
			dev_info(adev->dev, "Pending hive reset.\n");
			adev->gmc.xgmi.pending_reset = true;
			/* Only need to init necessary block for SMU to handle the reset */
			for (i = 0; i < adev->num_ip_blocks; i++) {
				if (!adev->ip_blocks[i].status.valid)
					continue;
				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
					DRM_DEBUG("IP %s disabled for hw_init.\n",
						adev->ip_blocks[i].version->funcs->name);
					adev->ip_blocks[i].status.hw = true;
				}
			}
			amdgpu_set_init_level(adev,
					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
				   !amdgpu_device_has_display_hardware(adev)) {
					r = psp_gpu_reset(adev);
@@ -4472,7 +4455,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	/* enable clockgating, etc. after ib tests, etc. since some blocks require
	 * explicit gating rather than handling it automatically.
	 */
	if (!adev->gmc.xgmi.pending_reset) {
	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
		r = amdgpu_device_ip_late_init(adev);
		if (r) {
			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
@@ -4549,7 +4532,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	if (px)
		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);

	if (adev->gmc.xgmi.pending_reset)
	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
				   msecs_to_jiffies(AMDGPU_RESUME_MS));

@@ -5443,7 +5426,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
			/* For XGMI run all resets in parallel to speed up the process */
			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
				tmp_adev->gmc.xgmi.pending_reset = false;
				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
					r = -EALREADY;
			} else
+0 −1
Original line number Diff line number Diff line
@@ -2512,7 +2512,6 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
	for (i = 0; i < mgpu_info.num_dgpu; i++) {
		adev = mgpu_info.gpu_ins[i].adev;
		flush_work(&adev->xgmi_reset_work);
		adev->gmc.xgmi.pending_reset = false;
	}

	/* reset function will rebuild the xgmi hive info , clear it now */
+0 −1
Original line number Diff line number Diff line
@@ -182,7 +182,6 @@ struct amdgpu_xgmi {
	bool supported;
	struct ras_common_if *ras_if;
	bool connected_to_cpu;
	bool pending_reset;
	struct amdgpu_xgmi_ras *ras;
};

+1 −1
Original line number Diff line number Diff line
@@ -3185,7 +3185,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
	 * when the GPU is pending on XGMI reset during probe time
	 * (Mostly after second bus reset), skip it now
	 */
	if (adev->gmc.xgmi.pending_reset)
	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
		return 0;
	ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
	/*
+3 −3
Original line number Diff line number Diff line
@@ -860,7 +860,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
	if (!adev->gmc.xgmi.supported)
		return 0;

	if (!adev->gmc.xgmi.pending_reset &&
	if ((adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
	    amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
		ret = psp_xgmi_initialize(&adev->psp, false, true);
		if (ret) {
@@ -907,7 +907,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)

	task_barrier_add_task(&hive->tb);

	if (!adev->gmc.xgmi.pending_reset &&
	if ((adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
	    amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
			/* update node list for other device in the hive */
@@ -985,7 +985,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
		}
	}

	if (!ret && !adev->gmc.xgmi.pending_reset)
	if (!ret && (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI))
		ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);

exit_unlock:
Loading