Commit f4341197 authored by Xiang Liu's avatar Xiang Liu Committed by Alex Deucher
Browse files

drm/amdgpu: Add debug mask to disable CE logs



Add debug mask to disable kernel logs of RAS correctable errors,
including both ACA and CE error counter kernel messages.

Signed-off-by: default avatarXiang Liu <xiang.liu@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fb5ec217
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1282,6 +1282,7 @@ struct amdgpu_device {
	bool                            debug_exp_resets;
	bool                            debug_disable_gpu_ring_reset;
	bool                            debug_vm_userptr;
	bool                            debug_disable_ce_logs;

	/* Protection for the following isolation structure */
	struct mutex                    enforce_isolation_mutex;
+5 −0
Original line number Diff line number Diff line
@@ -115,6 +115,11 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
	u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
	int i;

	if (adev->debug_disable_ce_logs &&
	    bank->smu_err_type == ACA_SMU_TYPE_CE &&
	    !ACA_BANK_ERR_IS_DEFFERED(bank))
		return;

	RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
	/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
	for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+6 −0
Original line number Diff line number Diff line
@@ -144,6 +144,7 @@ enum AMDGPU_DEBUG_MASK {
	AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
	AMDGPU_DEBUG_SMU_POOL = BIT(7),
	AMDGPU_DEBUG_VM_USERPTR = BIT(8),
	AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
};

unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2278,6 +2279,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
		pr_info("debug: VM mode debug for userptr is enabled\n");
		adev->debug_vm_userptr = true;
	}

	if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
		pr_info("debug: disable kernel logs of correctalbe errors\n");
		adev->debug_disable_ce_logs = true;
	}
}

static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+3 −0
Original line number Diff line number Diff line
@@ -1107,6 +1107,9 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
					      err_info->de_count, blk_name);
			}
		} else {
			if (adev->debug_disable_ce_logs)
				return;

			for_each_ras_error(err_node, err_data) {
				err_info = &err_node->err_info;
				mcm_info = &err_info->mcm_info;