Commit c68cbbfd authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: cleanup conditional execution



First of all calculating the number of dw to patch into a
conditional execution is not something HW generation specific.
This is just standard ring buffer calculations. While at it also
reduce the BUG_ON() into WARN_ON().

Then instead of a random bit pattern use 0 as default value for
the number of dw skipped, this way it's not mandatory any more
to patch the conditional execution.

And last make the address to check a parameter of the
conditional execution instead of getting this from the ring.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 86e14a73
Loading
Loading
Loading
Loading
+7 −14
Original line number Diff line number Diff line
@@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
	struct amdgpu_ib *ib = &ibs[0];
	struct dma_fence *tmp = NULL;
	bool need_ctx_switch;
	unsigned int patch_offset = ~0;
	struct amdgpu_vm *vm;
	uint64_t fence_ctx;
	uint32_t status = 0, alloc_size;
@@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
	bool secure, init_shadow;
	u64 shadow_va, csa_va, gds_va;
	int vmid = AMDGPU_JOB_GET_VMID(job);
	bool need_pipe_sync = false;
	unsigned int cond_exec;

	unsigned int i;
	int r = 0;
	bool need_pipe_sync = false;

	if (num_ibs == 0)
		return -EINVAL;
@@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
					    init_shadow, vmid);

	if (ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);
		cond_exec = amdgpu_ring_init_cond_exec(ring,
						       ring->cond_exe_gpu_addr);

	amdgpu_device_flush_hdp(adev, ring);

@@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
	}

	if (ring->funcs->emit_gfx_shadow) {
	if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
		amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);

		if (ring->funcs->init_cond_exec) {
			unsigned int ce_offset = ~0;

			ce_offset = amdgpu_ring_init_cond_exec(ring);
			if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
				amdgpu_ring_patch_cond_exec(ring, ce_offset);
		}
		amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
	}

	r = amdgpu_fence_emit(ring, f, job, fence_flags);
@@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
	if (ring->funcs->insert_end)
		ring->funcs->insert_end(ring);

	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
		amdgpu_ring_patch_cond_exec(ring, patch_offset);
	amdgpu_ring_patch_cond_exec(ring, cond_exec);

	ring->current_ctx = fence_ctx;
	if (vm && ring->funcs->emit_switch_buffer)
+26 −4
Original line number Diff line number Diff line
@@ -209,8 +209,7 @@ struct amdgpu_ring_funcs {
	void (*insert_end)(struct amdgpu_ring *ring);
	/* pad the indirect buffer to the necessary number of dw */
	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
	unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
	/* note usage for clock and power gating */
	void (*begin_use)(struct amdgpu_ring *ring);
	void (*end_use)(struct amdgpu_ring *ring);
@@ -327,8 +326,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
@@ -411,6 +409,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
	ring->count_dw -= count_dw;
}

/**
 * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
 * @ring: amdgpu_ring structure
 * @offset: offset returned by amdgpu_ring_init_cond_exec
 *
 * Calculate the dw count and patch it into a cond_exec command.
 */
static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
					       unsigned int offset)
{
	unsigned cur;

	if (!ring->funcs->init_cond_exec)
		return;

	WARN_ON(offset > ring->buf_mask);
	WARN_ON(ring->ring[offset] != 0);

	cur = (ring->wptr - 1) & ring->buf_mask;
	if (cur < offset)
		cur += ring->ring_size >> 2;
	ring->ring[offset] = cur - offset;
}

#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset)			\
	(ring->is_mes_queue && ring->mes_ctx ?				\
	 (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
+4 −4
Original line number Diff line number Diff line
@@ -658,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
	bool vm_flush_needed = job->vm_needs_flush;
	struct dma_fence *fence = NULL;
	bool pasid_mapping_needed = false;
	unsigned patch_offset = 0;
	unsigned int patch;
	int r;

	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
@@ -685,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,

	amdgpu_ring_ib_begin(ring);
	if (ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);
		patch = amdgpu_ring_init_cond_exec(ring,
						   ring->cond_exe_gpu_addr);

	if (need_pipe_sync)
		amdgpu_ring_emit_pipeline_sync(ring);
@@ -733,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
	}
	dma_fence_put(fence);

	if (ring->funcs->patch_cond_exec)
		amdgpu_ring_patch_cond_exec(ring, patch_offset);
	amdgpu_ring_patch_cond_exec(ring, patch);

	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
	if (ring->funcs->emit_switch_buffer) {
+6 −20
Original line number Diff line number Diff line
@@ -546,34 +546,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
}

static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
					    uint64_t addr)
{
	unsigned int ret;

	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
	amdgpu_ring_write(ring, lower_32_bits(addr));
	amdgpu_ring_write(ring, upper_32_bits(addr));
	amdgpu_ring_write(ring, 1);
	ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
	amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
	ret = ring->wptr & ring->buf_mask;
	amdgpu_ring_write(ring, 0);

	return ret;
}

static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
{
	unsigned int cur;

	WARN_ON_ONCE(offset > ring->buf_mask);
	WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);

	cur = (ring->wptr - 1) & ring->buf_mask;
	if (cur > offset)
		ring->ring[offset] = cur - offset;
	else
		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
}

static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
@@ -864,7 +851,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
	.test_ring = vpe_ring_test_ring,
	.test_ib = vpe_ring_test_ib,
	.init_cond_exec = vpe_ring_init_cond_exec,
	.patch_cond_exec = vpe_ring_patch_cond_exec,
	.preempt_ib = vpe_ring_preempt_ib,
	.begin_use = vpe_ring_begin_use,
	.end_use = vpe_ring_end_use,
+8 −20
Original line number Diff line number Diff line
@@ -8542,34 +8542,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
	amdgpu_ring_write(ring, 0);
}

static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
						       uint64_t addr)
{
	unsigned int ret;

	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
	amdgpu_ring_write(ring, lower_32_bits(addr));
	amdgpu_ring_write(ring, upper_32_bits(addr));
	/* discard following DWs if *cond_exec_gpu_addr==0 */
	amdgpu_ring_write(ring, 0);
	ret = ring->wptr & ring->buf_mask;
	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
	/* patch dummy value later */
	amdgpu_ring_write(ring, 0);

	return ret;
}

static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
{
	unsigned int cur;

	BUG_ON(offset > ring->buf_mask);
	BUG_ON(ring->ring[offset] != 0x55aa55aa);

	cur = (ring->wptr - 1) & ring->buf_mask;
	if (likely(cur > offset))
		ring->ring[offset] = cur - offset;
	else
		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
}

static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
	int i, r = 0;
@@ -9224,7 +9213,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
	.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
	.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
	.preempt_ib = gfx_v10_0_ring_preempt_ib,
	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
	.emit_wreg = gfx_v10_0_ring_emit_wreg,
Loading