drm/amdgpu: rework how isolation is enforced v2 (bd22e44a) · Commits · git / linux-net

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+11 −2

Original line number	Diff line number	Diff line
		@@ -1194,9 +1194,15 @@ struct amdgpu_device {
		bool debug_exp_resets;
		bool debug_disable_gpu_ring_reset;

		bool enforce_isolation[MAX_XCP];
		/* Added this mutex for cleaner shader isolation between GFX and compute processes */
		/* Protection for the following isolation structure */
		struct mutex enforce_isolation_mutex;
		bool enforce_isolation[MAX_XCP];
		struct amdgpu_isolation {
		void *owner;
		struct dma_fence *spearhead;
		struct amdgpu_sync active;
		struct amdgpu_sync prev;
		} isolation[MAX_XCP];

		struct amdgpu_init_level *init_lvl;

		@@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
		struct dma_fence amdgpu_device_get_gang(struct amdgpu_device adev);
		struct dma_fence amdgpu_device_switch_gang(struct amdgpu_device adev,
		struct dma_fence *gang);
		struct dma_fence amdgpu_device_enforce_isolation(struct amdgpu_device adev,
		struct amdgpu_ring *ring,
		struct amdgpu_job *job);
		bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
		ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
		ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

+97 −1

Original line number	Diff line number	Diff line
		@@ -4294,6 +4294,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
		mutex_init(&adev->gfx.reset_sem_mutex);
		/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
		mutex_init(&adev->enforce_isolation_mutex);
		for (i = 0; i < MAX_XCP; ++i) {
		adev->isolation[i].spearhead = dma_fence_get_stub();
		amdgpu_sync_create(&adev->isolation[i].active);
		amdgpu_sync_create(&adev->isolation[i].prev);
		}
		mutex_init(&adev->gfx.kfd_sch_mutex);
		mutex_init(&adev->gfx.workload_profile_mutex);
		mutex_init(&adev->vcn.workload_profile_mutex);
		@@ -4799,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)

		void amdgpu_device_fini_sw(struct amdgpu_device *adev)
		{
		int idx;
		int i, idx;
		bool px;

		amdgpu_device_ip_fini(adev);
		@@ -4807,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
		amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
		adev->accel_working = false;
		dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
		for (i = 0; i < MAX_XCP; ++i) {
		dma_fence_put(adev->isolation[i].spearhead);
		amdgpu_sync_free(&adev->isolation[i].active);
		amdgpu_sync_free(&adev->isolation[i].prev);
		}

		amdgpu_reset_fini(adev);

		@@ -6953,6 +6963,92 @@ struct dma_fence amdgpu_device_switch_gang(struct amdgpu_device adev,
		return NULL;
		}

		/**
		* amdgpu_device_enforce_isolation - enforce HW isolation
		* @adev: the amdgpu device pointer
		* @ring: the HW ring the job is supposed to run on
		* @job: the job which is about to be pushed to the HW ring
		*
		* Makes sure that only one client at a time can use the GFX block.
		* Returns: The dependency to wait on before the job can be pushed to the HW.
		* The function is called multiple times until NULL is returned.
		*/
		struct dma_fence amdgpu_device_enforce_isolation(struct amdgpu_device adev,
		struct amdgpu_ring *ring,
		struct amdgpu_job *job)
		{
		struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
		struct drm_sched_fence *f = job->base.s_fence;
		struct dma_fence *dep;
		void *owner;
		int r;

		/*
		* For now enforce isolation only for the GFX block since we only need
		* the cleaner shader on those rings.
		*/
		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
		ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
		return NULL;

		/*
		* All submissions where enforce isolation is false are handled as if
		* they come from a single client. Use ~0l as the owner to distinct it
		* from kernel submissions where the owner is NULL.
		*/
		owner = job->enforce_isolation ? f->owner : (void *)~0l;

		mutex_lock(&adev->enforce_isolation_mutex);

		/*
		* The "spearhead" submission is the first one which changes the
		* ownership to its client. We always need to wait for it to be
		* pushed to the HW before proceeding with anything.
		*/
		if (&f->scheduled != isolation->spearhead &&
		!dma_fence_is_signaled(isolation->spearhead)) {
		dep = isolation->spearhead;
		goto out_grab_ref;
		}

		if (isolation->owner != owner) {

		/*
		* Wait for any gang to be assembled before switching to a
		* different owner or otherwise we could deadlock the
		* submissions.
		*/
		if (!job->gang_submit) {
		dep = amdgpu_device_get_gang(adev);
		if (!dma_fence_is_signaled(dep))
		goto out_return_dep;
		dma_fence_put(dep);
		}

		dma_fence_put(isolation->spearhead);
		isolation->spearhead = dma_fence_get(&f->scheduled);
		amdgpu_sync_move(&isolation->active, &isolation->prev);
		isolation->owner = owner;
		}

		/*
		* Specifying the ring here helps to pipeline submissions even when
		* isolation is enabled. If that is not desired for testing NULL can be
		* used instead of the ring to enforce a CPU round trip while switching
		* between clients.
		*/
		dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
		r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
		if (r)
		DRM_WARN("OOM tracking isolation\n");

		out_grab_ref:
		dma_fence_get(dep);
		out_return_dep:
		mutex_unlock(&adev->enforce_isolation_mutex);
		return dep;
		}

		bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
		{
		switch (adev->asic_type) {

drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c

+15 −28

Original line number	Diff line number	Diff line
		@@ -287,23 +287,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
		(*id)->flushed_updates < updates \|\|
		!(*id)->last_flush \|\|
		((*id)->last_flush->context != fence_context &&
		!dma_fence_is_signaled((*id)->last_flush))) {
		struct dma_fence *tmp;

		/* Wait for the gang to be assembled before using a
		* reserved VMID or otherwise the gang could deadlock.
		*/
		tmp = amdgpu_device_get_gang(adev);
		if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
		*id = NULL;
		*fence = tmp;
		return 0;
		}
		dma_fence_put(tmp);
		!dma_fence_is_signaled((*id)->last_flush)))
		needs_flush = true;

		/* Make sure the id is owned by the gang before proceeding */
		if (!job->gang_submit \|\|
		(*id)->owner != vm->immediate.fence_context) {
		if ((*id)->owner != vm->immediate.fence_context \|\|
		(!adev->vm_manager.concurrent_flush && needs_flush)) {
		struct dma_fence *tmp;

		/* Don't use per engine and per process VMID at the
		* same time
		@@ -320,8 +309,6 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
		return 0;
		}
		}
		needs_flush = true;
		}

		/* Good we can use this VMID. Remember this submission as
		* user of the VMID.

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

+12 −4

Original line number	Diff line number	Diff line
		@@ -361,17 +361,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
		{
		struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
		struct amdgpu_job *job = to_amdgpu_job(sched_job);
		struct dma_fence *fence = NULL;
		struct dma_fence *fence;
		int r;

		r = drm_sched_entity_error(s_entity);
		if (r)
		goto error;

		if (job->gang_submit)
		if (job->gang_submit) {
		fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
		if (fence)
		return fence;
		}

		fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
		if (fence)
		return fence;

		if (!fence && job->vm && !job->vmid) {
		if (job->vm && !job->vmid) {
		r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
		if (r) {
		dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
		@@ -384,9 +391,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
		*/
		if (!fence)
		job->vm = NULL;
		return fence;
		}

		return fence;
		return NULL;

		error:
		dma_fence_set_error(&job->base.s_fence->finished, r);

drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c

+19 −0

Original line number	Diff line number	Diff line
		@@ -405,6 +405,25 @@ int amdgpu_sync_clone(struct amdgpu_sync source, struct amdgpu_sync clone)
		return 0;
		}

		/**
		* amdgpu_sync_move - move all fences from src to dst
		*
		* @src: source of the fences, empty after function
		* @dst: destination for the fences
		*
		* Moves all fences from source to destination. All fences in destination are
		* freed and source is empty after the function call.
		*/
		void amdgpu_sync_move(struct amdgpu_sync src, struct amdgpu_sync dst)
		{
		unsigned int i;

		amdgpu_sync_free(dst);

		for (i = 0; i < HASH_SIZE(src->fences); ++i)
		hlist_move_list(&src->fences[i], &dst->fences[i]);
		}

		/**
		* amdgpu_sync_push_to_job - push fences into job
		* @sync: sync object to get the fences from