Commit 4bbba79a authored by Pierre-Eric Pelloux-Prayer's avatar Pierre-Eric Pelloux-Prayer Committed by Alex Deucher
Browse files

drm/amdgpu: move devcoredump generation to a worker



Update the way drm_coredump_printer is used based on its documentation
and Xe's code: the main idea is to generate the final version in one go
and then use memcpy to return the chunks requested by the caller of
amdgpu_devcoredump_read.

The generation is moved to a separate worker thread.

This cuts the time to copy the dump from 40s to ~0s on my machine.

---
v3:
- removed adev->coredump_in_progress and instead use work as
  the synchronisation mechanism
- use kvfree instead of kfree
---

Signed-off-by: default avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 15e19d83
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -327,6 +327,7 @@ struct kfd_vm_fault_info;
struct amdgpu_hive_info;
struct amdgpu_reset_context;
struct amdgpu_reset_control;
struct amdgpu_coredump_info;

enum amdgpu_cp_irq {
	AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -1147,6 +1148,11 @@ struct amdgpu_device {

	struct amdgpu_reset_domain	*reset_domain;

#ifdef CONFIG_DEV_COREDUMP
	struct amdgpu_coredump_info	*coredump;
	struct work_struct		coredump_work;
#endif

	struct mutex			benchmark_mutex;

	bool                            scpm_enabled;
+76 −7
Original line number Diff line number Diff line
@@ -32,8 +32,13 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
		     bool vram_lost, struct amdgpu_job *job)
{
}
void amdgpu_coredump_init(struct amdgpu_device *adev)
{
}
#else

#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)

const char *hw_ip_names[MAX_HWIP] = {
	[GC_HWIP]		= "GC",
	[HDP_HWIP]		= "HDP",
@@ -196,11 +201,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
}

static ssize_t
amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
			void *data, size_t datalen)
amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump)
{
	struct drm_printer p;
	struct amdgpu_coredump_info *coredump = data;
	struct drm_print_iterator iter;
	struct amdgpu_vm_fault_info *fault_info;
	struct amdgpu_ip_block *ip_block;
@@ -208,7 +211,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,

	iter.data = buffer;
	iter.offset = 0;
	iter.start = offset;
	iter.remain = count;

	p = drm_coredump_printer(&iter);
@@ -322,9 +324,63 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
	return count - iter.remain;
}

static ssize_t
amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
			void *data, size_t datalen)
{
	struct amdgpu_coredump_info *coredump = data;
	ssize_t byte_copied;

	if (!coredump)
		return -ENODEV;

	if (!coredump->formatted)
		return -ENODEV;

	if (offset >= coredump->formatted_size)
		return 0;

	byte_copied = count < coredump->formatted_size - offset ? count :
		coredump->formatted_size - offset;
	memcpy(buffer, coredump->formatted + offset, byte_copied);

	return byte_copied;
}

static void amdgpu_devcoredump_free(void *data)
{
	kfree(data);
	struct amdgpu_coredump_info *coredump = data;

	kvfree(coredump->formatted);
	kvfree(data);
}

static void amdgpu_devcoredump_deferred_work(struct work_struct *work)
{
	struct amdgpu_device *adev = container_of(work, typeof(*adev), coredump_work);
	struct amdgpu_coredump_info *coredump = adev->coredump;

	/* Do a one-time preparation of the coredump output because
	 * repeatingly calling drm_coredump_printer is very slow.
	 */
	coredump->formatted_size = amdgpu_devcoredump_format(
		NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump);
	coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL);
	if (!coredump->formatted) {
		amdgpu_devcoredump_free(coredump);
		goto end;
	}

	amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump);

	/* If there's an existing coredump for this device, the free function will be
	 * called immediately so coredump might be invalid after the call to dev_coredumpm.
	 */
	dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);

end:
	adev->coredump = NULL;
}

void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
@@ -334,6 +390,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
	struct amdgpu_coredump_info *coredump;
	struct drm_sched_job *s_job;

	/* No need to generate a new coredump if there's one in progress already. */
	if (work_pending(&adev->coredump_work))
		return;

	coredump = kzalloc_obj(*coredump, GFP_NOWAIT);
	if (!coredump)
		return;
@@ -360,11 +420,20 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,

	ktime_get_ts64(&coredump->reset_time);

	dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
	/* Update the current coredump pointer (no lock needed, this function can only be called
	 * from a single thread)
	 */
	adev->coredump = coredump;
	/* Kick off coredump formatting to a worker thread. */
	queue_work(system_unbound_wq, &adev->coredump_work);

	drm_info(dev, "AMDGPU device coredump file has been created\n");
	drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
		 dev->primary->index);
}

void amdgpu_coredump_init(struct amdgpu_device *adev)
{
	INIT_WORK(&adev->coredump_work, amdgpu_devcoredump_deferred_work);
}
#endif
+7 −0
Original line number Diff line number Diff line
@@ -35,12 +35,19 @@ struct amdgpu_coredump_info {
	struct amdgpu_device            *adev;
	struct amdgpu_task_info         reset_task_info;
	struct timespec64               reset_time;

	bool                            skip_vram_check;
	bool                            reset_vram_lost;
	struct amdgpu_ring              *ring;
	/* Readable form of coredevdump, generate once to speed up
	 * reading it (see drm_coredump_printer's documentation).
	 */
	ssize_t				formatted_size;
	char				*formatted;
};
#endif

void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
		     bool vram_lost, struct amdgpu_job *job);
void amdgpu_coredump_init(struct amdgpu_device *adev);
#endif
+2 −0
Original line number Diff line number Diff line
@@ -3781,6 +3781,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);

	amdgpu_coredump_init(adev);

	adev->gfx.gfx_off_req_count = 1;
	adev->gfx.gfx_off_residency = 0;
	adev->gfx.gfx_off_entrycount = 0;