Commit 6122f5c7 authored by Trigger Huang's avatar Trigger Huang Committed by Alex Deucher
Browse files

drm/amdgpu: skip printing vram_lost if needed



The vm lost status can only be obtained after a GPU reset occurs, but
sometimes a dev core dump can be happened before GPU reset. So a new
argument is added to tell the dev core dump implementation whether to
skip printing the vram_lost status in the dump.
And this patch is also trying to decouple the core dump function from
the GPU reset function, by replacing the argument amdgpu_reset_context
with amdgpu_job to specify the context for core dump.

V2: Inform user if VRAM lost check is skipped so users don't assume
VRAM wasn't lost (Alex)

Signed-off-by: default avatarTrigger Huang <Trigger.Huang@amd.com>
Suggested-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7c1a2d8a
Loading
Loading
Loading
Loading
+11 −9
Original line number Diff line number Diff line
@@ -28,8 +28,8 @@
#include "atom.h"

#ifndef CONFIG_DEV_COREDUMP
void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
		     struct amdgpu_reset_context *reset_context)
void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
		     bool vram_lost, struct amdgpu_job *job)
{
}
#else
@@ -315,7 +315,9 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
		}
	}

	if (coredump->reset_vram_lost)
	if (coredump->skip_vram_check)
		drm_printf(&p, "VRAM lost check is skipped!\n");
	else if (coredump->reset_vram_lost)
		drm_printf(&p, "VRAM is lost due to GPU reset!\n");

	return count - iter.remain;
@@ -326,12 +328,11 @@ static void amdgpu_devcoredump_free(void *data)
	kfree(data);
}

void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
		     struct amdgpu_reset_context *reset_context)
void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
		     bool vram_lost, struct amdgpu_job *job)
{
	struct amdgpu_coredump_info *coredump;
	struct drm_device *dev = adev_to_drm(adev);
	struct amdgpu_job *job = reset_context->job;
	struct amdgpu_coredump_info *coredump;
	struct drm_sched_job *s_job;

	coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
@@ -341,11 +342,12 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
		return;
	}

	coredump->skip_vram_check = skip_vram_check;
	coredump->reset_vram_lost = vram_lost;

	if (reset_context->job && reset_context->job->vm) {
	if (job && job->vm) {
		struct amdgpu_vm *vm = job->vm;
		struct amdgpu_task_info *ti;
		struct amdgpu_vm *vm = reset_context->job->vm;

		ti = amdgpu_vm_get_task_info_vm(vm);
		if (ti) {
+3 −4
Original line number Diff line number Diff line
@@ -26,7 +26,6 @@
#define __AMDGPU_DEV_COREDUMP_H__

#include "amdgpu.h"
#include "amdgpu_reset.h"

#ifdef CONFIG_DEV_COREDUMP

@@ -36,12 +35,12 @@ struct amdgpu_coredump_info {
	struct amdgpu_device            *adev;
	struct amdgpu_task_info         reset_task_info;
	struct timespec64               reset_time;
	bool                            skip_vram_check;
	bool                            reset_vram_lost;
	struct amdgpu_ring              *ring;
};
#endif

void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
		     struct amdgpu_reset_context *reset_context);

void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
		     bool vram_lost, struct amdgpu_job *job);
#endif
+1 −1
Original line number Diff line number Diff line
@@ -5489,7 +5489,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);

				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
					amdgpu_coredump(tmp_adev, vram_lost, reset_context);
					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);

				if (vram_lost) {
					DRM_INFO("VRAM is lost due to GPU reset!\n");