Commit d2fa0ec6 authored by Chenglei Xie's avatar Chenglei Xie Committed by Alex Deucher
Browse files

drm/amdgpu: refactor bad_page_work for corner case handling



When a poison is consumed on the guest before the guest receives the host's poison creation msg, a corner case may occur to have poison_handler complete processing earlier than it should to cause the guest to hang waiting for the req_bad_pages reply during a VF FLR, resulting in the VM becoming inaccessible in stress tests.

To fix this issue, this patch refactored the mailbox sequence by seperating the bad_page_work into two parts req_bad_pages_work and handle_bad_pages_work.
Old sequence:
  1.Stop data exchange work
  2.Guest sends MB_REQ_RAS_BAD_PAGES to host and keep polling for IDH_RAS_BAD_PAGES_READY
  3.If the IDH_RAS_BAD_PAGES_READY arrives within timeout limit, re-init the data exchange region for updated bad page info
    else timeout with error message
New sequence:
req_bad_pages_work:
  1.Stop data exhange work
  2.Guest sends MB_REQ_RAS_BAD_PAGES to host
Once Guest receives IDH_RAS_BAD_PAGES_READY event
handle_bad_pages_work:
  3.re-init the data exchange region for updated bad page info

Signed-off-by: default avatarChenglei Xie <Chenglei.Xie@amd.com>
Reviewed-by: default avatarShravan Kumar Gande <Shravankumar.Gande@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fc4e990a
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -267,7 +267,8 @@ struct amdgpu_virt {
	struct amdgpu_irq_src		rcv_irq;

	struct work_struct		flr_work;
	struct work_struct		bad_pages_work;
	struct work_struct		req_bad_pages_work;
	struct work_struct		handle_bad_pages_work;

	struct amdgpu_mm_table		mm_table;
	const struct amdgpu_virt_ops	*ops;
+28 −4
Original line number Diff line number Diff line
@@ -292,14 +292,32 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
	}
}

static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work)
static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work)
{
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);

	if (down_read_trylock(&adev->reset_domain->sem)) {
		amdgpu_virt_fini_data_exchange(adev);
		amdgpu_virt_request_bad_pages(adev);
		up_read(&adev->reset_domain->sem);
	}
}

/**
 * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
 * @work: pointer to the work_struct
 *
 * This work handler is triggered when bad pages are ready, and it reinitializes
 * the data exchange region to retrieve updated bad page information from the host.
 */
static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work)
{
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);

	if (down_read_trylock(&adev->reset_domain->sem)) {
		amdgpu_virt_fini_data_exchange(adev);
		amdgpu_virt_init_data_exchange(adev);
		up_read(&adev->reset_domain->sem);
	}
@@ -327,10 +345,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

	switch (event) {
	case IDH_RAS_BAD_PAGES_READY:
		xgpu_ai_mailbox_send_ack(adev);
		if (amdgpu_sriov_runtime(adev))
			schedule_work(&adev->virt.handle_bad_pages_work);
		break;
	case IDH_RAS_BAD_PAGES_NOTIFICATION:
		xgpu_ai_mailbox_send_ack(adev);
		if (amdgpu_sriov_runtime(adev))
			schedule_work(&adev->virt.bad_pages_work);
			schedule_work(&adev->virt.req_bad_pages_work);
		break;
	case IDH_UNRECOV_ERR_NOTIFICATION:
		xgpu_ai_mailbox_send_ack(adev);
@@ -415,7 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
	}

	INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
	INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work);
	INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work);
	INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work);

	return 0;
}
+28 −7
Original line number Diff line number Diff line
@@ -202,9 +202,6 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
	case IDH_REQ_RAS_CPER_DUMP:
		event = IDH_RAS_CPER_DUMP_READY;
		break;
	case IDH_REQ_RAS_BAD_PAGES:
		event = IDH_RAS_BAD_PAGES_READY;
		break;
	default:
		break;
	}
@@ -359,14 +356,32 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
	}
}

static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work)
static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work)
{
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);

	if (down_read_trylock(&adev->reset_domain->sem)) {
		amdgpu_virt_fini_data_exchange(adev);
		amdgpu_virt_request_bad_pages(adev);
		up_read(&adev->reset_domain->sem);
	}
}

/**
 * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information
 * @work: pointer to the work_struct
 *
 * This work handler is triggered when bad pages are ready, and it reinitializes
 * the data exchange region to retrieve updated bad page information from the host.
 */
static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work)
{
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);

	if (down_read_trylock(&adev->reset_domain->sem)) {
		amdgpu_virt_fini_data_exchange(adev);
		amdgpu_virt_init_data_exchange(adev);
		up_read(&adev->reset_domain->sem);
	}
@@ -397,10 +412,15 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

	switch (event) {
	case IDH_RAS_BAD_PAGES_READY:
		xgpu_nv_mailbox_send_ack(adev);
		if (amdgpu_sriov_runtime(adev))
			schedule_work(&adev->virt.handle_bad_pages_work);
		break;
	case IDH_RAS_BAD_PAGES_NOTIFICATION:
		xgpu_nv_mailbox_send_ack(adev);
		if (amdgpu_sriov_runtime(adev))
			schedule_work(&adev->virt.bad_pages_work);
			schedule_work(&adev->virt.req_bad_pages_work);
		break;
	case IDH_UNRECOV_ERR_NOTIFICATION:
		xgpu_nv_mailbox_send_ack(adev);
@@ -485,7 +505,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
	}

	INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
	INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work);
	INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work);
	INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work);

	return 0;
}
+0 −1
Original line number Diff line number Diff line
@@ -741,7 +741,6 @@ static void soc15_reg_base_init(struct amdgpu_device *adev)
void soc15_set_virt_ops(struct amdgpu_device *adev)
{
	adev->virt.ops = &xgpu_ai_virt_ops;

	/* init soc15 reg base early enough so we can
	 * request request full access for sriov before
	 * set_ip_blocks. */