drm/amdgpu: implement TLB flush fence (d8a3f0a0) · Commits · git / linux-nf

drivers/gpu/drm/amd/amdgpu/Makefile

+2 −1

Original line number	Diff line number	Diff line
		@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
		amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
		atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
		atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
		amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
		amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
		amdgpu_ib.o amdgpu_pll.o \
		amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
		amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
		amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

+47 −15

Original line number	Diff line number	Diff line
		@@ -885,6 +885,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
		kfree(tlb_cb);
		}

		/**
		* amdgpu_vm_tlb_flush - prepare TLB flush
		*
		* @params: parameters for update
		* @fence: input fence to sync TLB flush with
		* @tlb_cb: the callback structure
		*
		* Increments the tlb sequence to make sure that future CS execute a VM flush.
		*/
		static void
		amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
		struct dma_fence **fence,
		struct amdgpu_vm_tlb_seq_struct *tlb_cb)
		{
		struct amdgpu_vm *vm = params->vm;

		if (!fence \|\| !*fence)
		return;

		tlb_cb->vm = vm;
		if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
		amdgpu_vm_tlb_seq_cb)) {
		dma_fence_put(vm->last_tlb_flush);
		vm->last_tlb_flush = dma_fence_get(*fence);
		} else {
		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
		}

		/* Prepare a TLB flush fence to be attached to PTs */
		if (!params->unlocked && vm->is_compute_context) {
		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);

		/* Makes sure no PD/PT is freed before the flush */
		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
		DMA_RESV_USAGE_BOOKKEEP);
		}
		}

		/**
		* amdgpu_vm_update_range - update a range in the vm page table
		*
		@@ -916,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device adev, struct amdgpu_vm vm,
		struct ttm_resource res, dma_addr_t pages_addr,
		struct dma_fence **fence)
		{
		struct amdgpu_vm_update_params params;
		struct amdgpu_vm_tlb_seq_struct *tlb_cb;
		struct amdgpu_vm_update_params params;
		struct amdgpu_res_cursor cursor;
		enum amdgpu_sync_mode sync_mode;
		int r, idx;
		@@ -927,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device adev, struct amdgpu_vm vm,

		tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
		if (!tlb_cb) {
		r = -ENOMEM;
		goto error_unlock;
		drm_dev_exit(idx);
		return -ENOMEM;
		}

		/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
		@@ -948,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device adev, struct amdgpu_vm vm,
		params.immediate = immediate;
		params.pages_addr = pages_addr;
		params.unlocked = unlocked;
		params.needs_flush = flush_tlb;
		params.allow_override = allow_override;

		/* Implicitly sync to command submissions in the same VM before
		@@ -1031,24 +1070,16 @@ int amdgpu_vm_update_range(struct amdgpu_device adev, struct amdgpu_vm vm,
		}

		r = vm->update_funcs->commit(&params, fence);
		if (r)
		goto error_free;

		if (flush_tlb \|\| params.table_freed) {
		tlb_cb->vm = vm;
		if (fence && *fence &&
		!dma_fence_add_callback(*fence, &tlb_cb->cb,
		amdgpu_vm_tlb_seq_cb)) {
		dma_fence_put(vm->last_tlb_flush);
		vm->last_tlb_flush = dma_fence_get(*fence);
		} else {
		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
		}
		if (params.needs_flush) {
		amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
		tlb_cb = NULL;
		}

		error_free:
		kfree(tlb_cb);

		error_unlock:
		amdgpu_vm_eviction_unlock(vm);
		drm_dev_exit(idx);
		return r;
		@@ -2391,6 +2422,7 @@ int amdgpu_vm_init(struct amdgpu_device adev, struct amdgpu_vm vm,

		mutex_init(&vm->eviction_lock);
		vm->evicting = false;
		vm->tlb_fence_context = dma_fence_context_alloc(1);

		r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
		false, &root, xcp_id);

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

+6 −2

Original line number	Diff line number	Diff line
		@@ -257,9 +257,9 @@ struct amdgpu_vm_update_params {
		unsigned int num_dw_left;

		/**
		* @table_freed: return true if page table is freed when updating
		* @needs_flush: true whenever we need to invalidate the TLB
		*/
		bool table_freed;
		bool needs_flush;

		/**
		* @allow_override: true for memory that is not uncached: allows MTYPE
		@@ -342,6 +342,7 @@ struct amdgpu_vm {
		atomic64_t tlb_seq;
		struct dma_fence *last_tlb_flush;
		atomic64_t kfd_last_flushed_seq;
		uint64_t tlb_fence_context;

		/* How many times we had to re-generate the page tables */
		uint64_t generation;
		@@ -611,5 +612,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
		uint64_t addr,
		uint32_t status,
		unsigned int vmhub);
		void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
		struct amdgpu_vm *vm,
		struct dma_fence **fence);

		#endif

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c

+3 −1

Original line number	Diff line number	Diff line
		@@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
		static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
		struct dma_fence **fence)
		{
		/* Flush HDP */
		if (p->needs_flush)
		atomic64_inc(&p->vm->tlb_seq);

		mb();
		amdgpu_device_flush_hdp(p->adev, NULL);
		return 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -972,7 +972,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
		while (cursor.pfn < frag_start) {
		/* Make sure previous mapping is freed */
		if (cursor.entry->bo) {
		params->table_freed = true;
		params->needs_flush = true;
		amdgpu_vm_pt_free_dfs(adev, params->vm,
		&cursor,
		params->unlocked);