Commit d3ff6524 authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/amdgpu: add a helper for processing recoverable GPUVM faults



Add a common helper to remove the repeated logic from each
gmc module.

Suggested-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a50d32c4
Loading
Loading
Loading
Loading
+48 −0
Original line number Diff line number Diff line
@@ -524,6 +524,54 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
	} while (fault->timestamp < tmp);
}

int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
				  struct amdgpu_iv_entry *entry,
				  u64 addr,
				  u32 cam_index,
				  u32 node_id,
				  bool write_fault)
{
	int ret;

	if (adev->irq.retry_cam_enabled) {
		/* Delegate it to a different ring if the hardware hasn't
		 * already done it.
		 */
		if (entry->ih == &adev->irq.ih) {
			amdgpu_irq_delegate(adev, entry, 8);
			return 1;
		}

		ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
					     addr, entry->timestamp, write_fault);
		WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
		if (ret)
			return 1;
	} else {
		/* Process it only if it's the first fault for this address */
		if (entry->ih != &adev->irq.ih_soft &&
		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
					     entry->timestamp))
			return 1;

		/* Delegate it to a different ring if the hardware hasn't
		 * already done it.
		 */
		if (entry->ih == &adev->irq.ih) {
			amdgpu_irq_delegate(adev, entry, 8);
			return 1;
		}

		/* Try to handle the recoverable page faults by filling page
		 * tables
		 */
		if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id,
					   addr, entry->timestamp, write_fault))
			return 1;
	}
	return 0;
}

int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
	int r;
+6 −0
Original line number Diff line number Diff line
@@ -425,6 +425,12 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
			      uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
				     uint16_t pasid);
int amdgpu_gmc_handle_retry_fault(struct amdgpu_device *adev,
				  struct amdgpu_iv_entry *entry,
				  u64 addr,
				  u32 cam_index,
				  u32 node_id,
				  bool write_fault);
int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+3 −20
Original line number Diff line number Diff line
@@ -115,27 +115,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
	addr |= ((u64)entry->src_data[1] & 0xf) << 44;

	if (retry_fault) {
		int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
							write_fault);
		/* Returning 1 here also prevents sending the IV to the KFD */

		/* Process it only if it's the first fault for this address */
		if (entry->ih != &adev->irq.ih_soft &&
		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
					     entry->timestamp))
			return 1;

		/* Delegate it to a different ring if the hardware hasn't
		 * already done it.
		 */
		if (entry->ih == &adev->irq.ih) {
			amdgpu_irq_delegate(adev, entry, 8);
			return 1;
		}

		/* Try to handle the recoverable page faults by filling page
		 * tables
		 */
		if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
					   entry->timestamp, write_fault))
		if (ret == 1)
			return 1;
	}

+3 −20
Original line number Diff line number Diff line
@@ -114,27 +114,10 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
	addr |= ((u64)entry->src_data[1] & 0xf) << 44;

	if (retry_fault) {
		int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
							write_fault);
		/* Returning 1 here also prevents sending the IV to the KFD */

		/* Process it only if it's the first fault for this address */
		if (entry->ih != &adev->irq.ih_soft &&
		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
					     entry->timestamp))
			return 1;

		/* Delegate it to a different ring if the hardware hasn't
		 * already done it.
		 */
		if (entry->ih == &adev->irq.ih) {
			amdgpu_irq_delegate(adev, entry, 8);
			return 1;
		}

		/* Try to handle the recoverable page faults by filling page
		 * tables
		 */
		if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
					   entry->timestamp, write_fault))
		if (ret == 1)
			return 1;
	}

+3 −20
Original line number Diff line number Diff line
@@ -110,27 +110,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
		hub = &adev->vmhub[AMDGPU_GFXHUB(0)];

	if (retry_fault) {
		int ret = amdgpu_gmc_handle_retry_fault(adev, entry, addr, 0, 0,
							write_fault);
		/* Returning 1 here also prevents sending the IV to the KFD */

		/* Process it only if it's the first fault for this address */
		if (entry->ih != &adev->irq.ih_soft &&
		    amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
					     entry->timestamp))
			return 1;

		/* Delegate it to a different ring if the hardware hasn't
		 * already done it.
		 */
		if (entry->ih == &adev->irq.ih) {
			amdgpu_irq_delegate(adev, entry, 8);
			return 1;
		}

		/* Try to handle the recoverable page faults by filling page
		 * tables
		 */
		if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr,
					   entry->timestamp, write_fault))
		if (ret == 1)
			return 1;
	}

Loading