Commit bfa579b3 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher
Browse files

drm/amdgpu: prepare to handle pasid poison consumption



Prepare to handle pasid poison consumption.

Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 314c38cd
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -747,10 +747,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
	return amdgpu_ras_get_fed_status(adev);
}

void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
				enum amdgpu_ras_block block, uint16_t pasid,
				pasid_notify pasid_fn, void *data, uint32_t reset)
{
	amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
}

void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
	enum amdgpu_ras_block block, uint32_t reset)
{
	amdgpu_umc_poison_handler(adev, block, reset);
	amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
}

int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+5 −0
Original line number Diff line number Diff line
@@ -337,6 +337,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
				struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint32_t reset);

void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint16_t pasid,
			pasid_notify pasid_fn, void *data, uint32_t reset);

bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
+13 −7
Original line number Diff line number Diff line
@@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
	return 0;
}

int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint32_t reset)
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint16_t pasid,
			pasid_notify pasid_fn, void *data, uint32_t reset)
{
	int ret = AMDGPU_RAS_SUCCESS;

@@ -290,18 +291,16 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
			}

			amdgpu_ras_error_data_fini(&err_data);
		} else {
			if (reset) {
				amdgpu_umc_bad_page_polling_timeout(adev,
							reset, MAX_UMC_POISON_POLLING_TIME_SYNC);
		} else {
				struct amdgpu_ras *con = amdgpu_ras_get_context(adev);

				amdgpu_ras_put_poison_req(adev,
					block, pasid, pasid_fn, data, reset);

				atomic_inc(&con->page_retirement_req_cnt);

				wake_up(&con->page_retirement_wq);
		}
		}
	} else {
		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
			adev->virt.ops->ras_poison_handler(adev, block);
@@ -313,6 +312,13 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
	return ret;
}

int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint32_t reset)
{
	return amdgpu_umc_pasid_poison_handler(adev,
				block, 0, NULL, NULL, reset);
}

int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
		void *ras_error_status,
		struct amdgpu_iv_entry *entry)
+3 −0
Original line number Diff line number Diff line
@@ -106,6 +106,9 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint32_t reset);
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint16_t pasid,
			pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
		struct amdgpu_irq_src *source,
		struct amdgpu_iv_entry *entry);
+2 −1
Original line number Diff line number Diff line
@@ -190,7 +190,8 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
	dev_warn(dev->adev->dev,
		 "poison is consumed by client %d, kick off gpu reset flow\n", client_id);

	amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
	amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
		block, pasid, NULL, NULL, reset);
}

static bool context_id_expected(struct kfd_dev *dev)