Commit 9fc27cba authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/amdgpu: don't reemit ring contents more than once



If we cancel a bad job and reemit the ring contents, and
we get another timeout, cancel everything rather than reemitting.
The wptr markers are only relevant for the original emit.  If
we reemit, the wptr markers are no longer correct.

Reviewed-by: default avatarTimur Kristóf <timur.kristof@gmail.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
(cherry picked from commit fb62a206)
parent dc8a887d
Loading
Loading
Loading
Loading
+17 −5
Original line number Diff line number Diff line
@@ -709,6 +709,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
	struct amdgpu_ring *ring = af->ring;
	unsigned long flags;
	u32 seq, last_seq;
	bool reemitted = false;

	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
@@ -726,7 +727,9 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
		if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
			fence = container_of(unprocessed, struct amdgpu_fence, base);

			if (fence == af)
			if (fence->reemitted > 1)
				reemitted = true;
			else if (fence == af)
				dma_fence_set_error(&fence->base, -ETIME);
			else if (fence->context == af->context)
				dma_fence_set_error(&fence->base, -ECANCELED);
@@ -734,10 +737,17 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
		rcu_read_unlock();
	} while (last_seq != seq);
	spin_unlock_irqrestore(&ring->fence_drv.lock, flags);

	if (reemitted) {
		/* if we've already reemitted once then just cancel everything */
		amdgpu_fence_driver_force_completion(af->ring);
		af->ring->ring_backup_entries_to_copy = 0;
	} else {
		/* signal the guilty fence */
		amdgpu_fence_write(ring, (u32)af->base.seqno);
		amdgpu_fence_process(ring);
	}
}

void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
{
@@ -784,10 +794,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
			/* save everything if the ring is not guilty, otherwise
			 * just save the content from other contexts.
			 */
			if (!guilty_fence || (fence->context != guilty_fence->context))
			if (!fence->reemitted &&
			    (!guilty_fence || (fence->context != guilty_fence->context)))
				amdgpu_ring_backup_unprocessed_command(ring, wptr,
								       fence->wptr);
			wptr = fence->wptr;
			fence->reemitted++;
		}
		rcu_read_unlock();
	} while (last_seq != seq);
+2 −0
Original line number Diff line number Diff line
@@ -148,6 +148,8 @@ struct amdgpu_fence {
	u64				wptr;
	/* fence context for resets */
	u64				context;
	/* has this fence been reemitted */
	unsigned int			reemitted;
};

extern const struct drm_sched_backend_ops amdgpu_sched_ops;