drm/amdgpu/gfx9: rework pipeline sync packet sequence (9596097b) · Commits · git / linux-net

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

+37 −30

Original line number	Diff line number	Diff line
		@@ -5572,15 +5572,42 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
		amdgpu_ring_write(ring, 0);
		}

		static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
		static void gfx_v9_0_ring_emit_event_write(struct amdgpu_ring *ring,
		uint32_t event_type,
		uint32_t event_index)
		{
		int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
		uint32_t seq = ring->fence_drv.sync_seq;
		uint64_t addr = ring->fence_drv.gpu_addr;
		amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
		amdgpu_ring_write(ring, EVENT_TYPE(event_type) \|
		EVENT_INDEX(event_index));
		}

		static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
		{
		const unsigned int cp_coher_cntl =
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);

		gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
		lower_32_bits(addr), upper_32_bits(addr),
		seq, 0xffffffff, 4);
		/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
		amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
		amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
		amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
		amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
		amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
		amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
		amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
		}

		static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
		{
		if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
		gfx_v9_0_ring_emit_event_write(ring, VS_PARTIAL_FLUSH, 4);
		gfx_v9_0_ring_emit_event_write(ring, PS_PARTIAL_FLUSH, 4);
		}
		gfx_v9_0_ring_emit_event_write(ring, CS_PARTIAL_FLUSH, 4);
		gfx_v9_0_emit_mem_sync(ring);
		}

		static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
		@@ -7071,25 +7098,6 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
		gfx_v9_0_query_utc_edc_status(adev, err_data);
		}

		static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
		{
		const unsigned int cp_coher_cntl =
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) \|
		PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);

		/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
		amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
		amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
		amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
		amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
		amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
		amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
		amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
		}

		static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
		uint32_t pipe, bool enable)
		{
		@@ -7404,7 +7412,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
		.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
		.emit_frame_size = /* totally 242 maximum if 16 IBs */
		5 + /* COND_EXEC */
		7 + /* PIPELINE_SYNC */
		13 + /* PIPELINE_SYNC */
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
		2 + /* VM_FLUSH */
		@@ -7460,7 +7468,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
		.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
		.emit_frame_size = /* totally 242 maximum if 16 IBs */
		5 + /* COND_EXEC */
		7 + /* PIPELINE_SYNC */
		13 + /* PIPELINE_SYNC */
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
		2 + /* VM_FLUSH */
		@@ -7521,7 +7529,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
		20 + /* gfx_v9_0_ring_emit_gds_switch */
		7 + /* gfx_v9_0_ring_emit_hdp_flush */
		5 + /* hdp invalidate */
		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
		9 + /* gfx_v9_0_ring_emit_pipeline_sync */
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
		@@ -7564,7 +7572,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
		20 + /* gfx_v9_0_ring_emit_gds_switch */
		7 + /* gfx_v9_0_ring_emit_hdp_flush */
		5 + /* hdp invalidate */
		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */