Commit 04141c05 authored by Lijo Lazar's avatar Lijo Lazar Committed by Alex Deucher
Browse files

drm/amdgpu: Extend bus status check to more cases



In case of unexpected errors, check if device is alive on the bus.

Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Reviewed-by: default avatarAsad Kamal <asad.kamal@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 16704901
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -1762,4 +1762,19 @@ extern const struct attribute_group amdgpu_flash_attr_group;

void amdgpu_set_init_level(struct amdgpu_device *adev,
			   enum amdgpu_init_lvl_id lvl);

static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
{
       u32 status;
       int r;

       r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
       if (r || PCI_POSSIBLE_ERROR(status)) {
		dev_err(adev->dev, "device lost from bus!");
		return -ENODEV;
       }

       return 0;
}

#endif
+1 −6
Original line number Diff line number Diff line
@@ -6071,14 +6071,9 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle)
{
	struct amdgpu_device *tmp_adev;
	int ret = 0;
	u32 status;

	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
		if (PCI_POSSIBLE_ERROR(status)) {
			dev_err(tmp_adev->dev, "device lost from bus!");
			ret = -ENODEV;
		}
		ret |= amdgpu_device_bus_status_check(tmp_adev);
	}

	return ret;
+4 −1
Original line number Diff line number Diff line
@@ -353,11 +353,14 @@ static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)

	if (adev->nbio.funcs->get_compute_partition_mode) {
		mode = adev->nbio.funcs->get_compute_partition_mode(adev);
		if (mode != derv_mode)
		if (mode != derv_mode) {
			dev_warn(
				adev->dev,
				"Mismatch in compute partition mode - reported : %d derived : %d",
				mode, derv_mode);
			if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
				amdgpu_device_bus_status_check(adev);
		}
	}

	return mode;
+10 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ static void smu_cmn_read_arg(struct smu_context *smu,
#define SMU_RESP_BUSY_OTHER     0xFC
#define SMU_RESP_DEBUG_END      0xFB

#define SMU_RESP_UNEXP (~0U)
/**
 * __smu_cmn_poll_stat -- poll for a status from the SMU
 * @smu: a pointer to SMU context
@@ -171,6 +172,15 @@ static void __smu_cmn_reg_print_error(struct smu_context *smu,
		dev_err_ratelimited(adev->dev,
				    "SMU: I'm debugging!");
		break;
	case SMU_RESP_UNEXP:
		if (amdgpu_device_bus_status_check(smu->adev)) {
			/* print error immediately if device is off the bus */
			dev_err(adev->dev,
				"SMU: response:0x%08X for index:%d param:0x%08X message:%s?",
				reg_c2pmsg_90, msg_index, param, message);
			break;
		}
		fallthrough;
	default:
		dev_err_ratelimited(adev->dev,
				    "SMU: response:0x%08X for index:%d param:0x%08X message:%s?",