drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb v2 (a70cb217) · Commits · git / linux-net

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

+48 −0

Original line number	Diff line number	Diff line
		@@ -582,6 +582,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
		return 0;
		}

		void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
		uint32_t vmhub, uint32_t flush_type)
		{
		struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
		struct dma_fence *fence;
		struct amdgpu_job *job;
		int r;

		if (!hub->sdma_invalidation_workaround \|\| vmid \|\|
		!adev->mman.buffer_funcs_enabled \|\|
		!adev->ib_pool_ready \|\| amdgpu_in_reset(adev) \|\|
		!ring->sched.ready) {
		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
		flush_type);
		return;
		}

		/* The SDMA on Navi 1x has a bug which can theoretically result in memory
		* corruption if an invalidation happens at the same time as an VA
		* translation. Avoid this by doing the invalidation from the SDMA
		* itself at least for GART.
		*/
		mutex_lock(&adev->mman.gtt_window_lock);
		r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
		AMDGPU_FENCE_OWNER_UNDEFINED,
		16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
		&job);
		if (r)
		goto error_alloc;

		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
		job->vm_needs_flush = true;
		job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
		amdgpu_ring_pad_ib(ring, &job->ibs[0]);
		fence = amdgpu_job_submit(job);
		mutex_unlock(&adev->mman.gtt_window_lock);

		dma_fence_wait(fence, false);
		dma_fence_put(fence);

		return;

		error_alloc:
		mutex_unlock(&adev->mman.gtt_window_lock);
		dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
		}

		/**
		* amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
		* @adev: amdgpu_device pointer

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

+4 −1

Original line number	Diff line number	Diff line
		@@ -117,6 +117,8 @@ struct amdgpu_vmhub {

		uint32_t vm_contexts_disable;

		bool sdma_invalidation_workaround;

		const struct amdgpu_vmhub_funcs *vmhub_funcs;
		};

		@@ -335,7 +337,6 @@ struct amdgpu_gmc {
		u64 noretry_flags;
		};

		#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
		#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
		((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
		((adev), (pasid), (type), (allhub), (inst)))
		@@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
		int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
		void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
		int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
		void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
		uint32_t vmhub, uint32_t flush_type);

		extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
		extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);

drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c

+3 −0

Original line number	Diff line number	Diff line
		@@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
		GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK \|
		GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;

		/* TODO: This is only needed on some Navi 1x revisions */
		hub->sdma_invalidation_workaround = true;

		hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
		}

drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c

+43 −117

Original line number	Diff line number	Diff line
		@@ -231,20 +231,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
		* by the amdgpu vm/hsa code.
		*/

		static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
		unsigned int vmhub, uint32_t flush_type)
		/**
		* gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
		*
		* @adev: amdgpu_device pointer
		* @vmid: vm instance to flush
		* @vmhub: vmhub type
		* @flush_type: the flush type
		*
		* Flush the TLB for the requested page table.
		*/
		static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
		uint32_t vmhub, uint32_t flush_type)
		{
		bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
		u32 tmp;
		/* Use register 17 for GART */
		const unsigned int eng = 17;
		unsigned int i;
		unsigned char hub_ip = 0;
		u32 sem, req, ack;
		unsigned int i;
		u32 tmp;

		sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
		req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
		ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;

		/* flush hdp cache */
		adev->hdp.funcs->flush_hdp(adev, NULL);

		/* For SRIOV run time, driver shouldn't access the register through MMIO
		* Directly use kiq to do the vm invalidation instead
		*/
		if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
		(amdgpu_sriov_runtime(adev) \|\| !amdgpu_sriov_vf(adev)) &&
		down_read_trylock(&adev->reset_domain->sem)) {
		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
		1 << vmid);
		up_read(&adev->reset_domain->sem);
		return;
		}

		hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
		GC_HWIP : MMHUB_HWIP;
		hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;

		spin_lock(&adev->gmc.invalidate_lock);
		/*
		@@ -258,9 +287,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
		if (use_semaphore) {
		for (i = 0; i < adev->usec_timeout; i++) {
		/* a read return value of 1 means semaphore acuqire */
		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
		hub->eng_distance * eng, hub_ip);

		tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
		if (tmp & 0x1)
		break;
		udelay(1);
		@@ -270,9 +297,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
		DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
		}

		WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
		hub->eng_distance * eng,
		inv_req, hub_ip);
		WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);

		/*
		* Issue a dummy read to wait for the ACK register to be cleared
		@@ -280,14 +305,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
		*/
		if ((vmhub == AMDGPU_GFXHUB(0)) &&
		(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
		RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
		hub->eng_distance * eng, hub_ip);
		RREG32_RLC_NO_KIQ(req, hub_ip);

		/* Wait for ACK with a delay.*/
		for (i = 0; i < adev->usec_timeout; i++) {
		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
		hub->eng_distance * eng, hub_ip);

		tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
		tmp &= 1 << vmid;
		if (tmp)
		break;
		@@ -297,109 +319,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,

		/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
		if (use_semaphore)
		/*
		* add semaphore release after invalidation,
		* write with 0 means semaphore release
		*/
		WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
		hub->eng_distance * eng, 0, hub_ip);
		WREG32_RLC_NO_KIQ(sem, 0, hub_ip);

		spin_unlock(&adev->gmc.invalidate_lock);

		if (i < adev->usec_timeout)
		return;

		DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
		}

		/**
		* gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
		*
		* @adev: amdgpu_device pointer
		* @vmid: vm instance to flush
		* @vmhub: vmhub type
		* @flush_type: the flush type
		*
		* Flush the TLB for the requested page table.
		*/
		static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
		uint32_t vmhub, uint32_t flush_type)
		{
		struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
		struct dma_fence *fence;
		struct amdgpu_job *job;

		int r;

		/* flush hdp cache */
		adev->hdp.funcs->flush_hdp(adev, NULL);

		/* For SRIOV run time, driver shouldn't access the register through MMIO
		* Directly use kiq to do the vm invalidation instead
		*/
		if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
		(amdgpu_sriov_runtime(adev) \|\| !amdgpu_sriov_vf(adev)) &&
		down_read_trylock(&adev->reset_domain->sem)) {
		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
		const unsigned int eng = 17;
		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
		u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
		u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;

		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
		1 << vmid);

		up_read(&adev->reset_domain->sem);
		return;
		}

		mutex_lock(&adev->mman.gtt_window_lock);

		if (vmhub == AMDGPU_MMHUB0(0)) {
		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
		mutex_unlock(&adev->mman.gtt_window_lock);
		return;
		}

		BUG_ON(vmhub != AMDGPU_GFXHUB(0));

		if (!adev->mman.buffer_funcs_enabled \|\|
		!adev->ib_pool_ready \|\|
		amdgpu_in_reset(adev) \|\|
		ring->sched.ready == false) {
		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
		mutex_unlock(&adev->mman.gtt_window_lock);
		return;
		}

		/* The SDMA on Navi has a bug which can theoretically result in memory
		* corruption if an invalidation happens at the same time as an VA
		* translation. Avoid this by doing the invalidation from the SDMA
		* itself.
		*/
		r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
		AMDGPU_FENCE_OWNER_UNDEFINED,
		16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
		&job);
		if (r)
		goto error_alloc;

		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
		job->vm_needs_flush = true;
		job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
		amdgpu_ring_pad_ib(ring, &job->ibs[0]);
		fence = amdgpu_job_submit(job);

		mutex_unlock(&adev->mman.gtt_window_lock);

		dma_fence_wait(fence, false);
		dma_fence_put(fence);

		return;

		error_alloc:
		mutex_unlock(&adev->mman.gtt_window_lock);
		DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
		if (i >= adev->usec_timeout)
		dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
		vmhub);
		}

		/**

drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -271,7 +271,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
		if (i < adev->usec_timeout)
		return;

		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
		dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
		}

		/**