Commit 8e8691ec authored by Nicholas Kazlauskas's avatar Nicholas Kazlauskas Committed by Alex Deucher
Browse files

drm/amd/display: Driver implementation for cursor offloading to DMU



[Why]
We require an interlock between driver and firmware for upcoming
features and given that this could possibly happen on any single
cursor programming call (and that we can't asynchronously wait for
firmware to respond because of it) we'd be regressing cursor performance
by at least an extra 40us per call.

When we could possibly have cursor update every 20us - 100s from high
frequency gaming mice this means that we'd be stuttering or dropping
updates and impacting overall cursor performance.

We want a solution that can:

1. Interlock between other firmware features
2. Not stall out or require the DMCUB lock for every single update

[How]
When cursor offloading is enabled and supported by an ASIC driver will
route the cursor programming through to DMU as part of the regular
DC stream cursor programming interfaces for attributes and position.

The atomic pipe programming version will not be updated: this will still
follow the existing programming path by keeping track of a field that
specifies when the register writes should be deferred to DMU.

Cursor locking is not required when cursor offload is in progress since
the updates are consolidated and processed by DMU once at the end
of the frame in a periodic manner.

The shared buffer the firmware queries from is allocated along with the
rest of the scratch state region in an area that's accessible by
both firmware and driver.

The size of the cursor offload (v1) state will not change, but it does
have a unique union per ASIC version with room for expansion if needed.

When firmware features notifying DMU of DRR updates are not enabled we
now send an explicit vtotal min/max update via driver to DMU firmware
whenever the vtotal max changes. This is to allow the cursor programming
to determine the appropriate latch update point offset from vupdate.

Reviewed-by: default avatarDillon Varone <dillon.varone@amd.com>
Signed-off-by: default avatarNicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: default avatarAlex Hung <alex.hung@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c58d6b1d
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -496,6 +496,10 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
			return true;
		}
	}

	if (dc->hwss.notify_cursor_offload_drr_update)
		dc->hwss.notify_cursor_offload_drr_update(dc, dc->current_state, stream);

	return false;
}

@@ -2188,8 +2192,14 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
		dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe);
	}

	for (i = 0; i < dc->current_state->stream_count; i++)
		dc_dmub_srv_control_cursor_offload(dc, dc->current_state, dc->current_state->streams[i], false);

	result = dc->hwss.apply_ctx_to_hw(dc, context);

	for (i = 0; i < context->stream_count; i++)
		dc_dmub_srv_control_cursor_offload(dc, context, context->streams[i], true);

	if (result != DC_OK) {
		/* Application of dc_state to hardware stopped. */
		dc->current_state->res_ctx.link_enc_cfg_ctx.mode = LINK_ENC_CFG_STEADY;
@@ -4488,6 +4498,8 @@ static void commit_planes_for_stream(struct dc *dc,
				pipe_ctx->plane_state->skip_manual_trigger)
			continue;

		if (dc->hwss.program_cursor_offload_now)
			dc->hwss.program_cursor_offload_now(dc, pipe_ctx);
		if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger)
			pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg);
	}
+12 −0
Original line number Diff line number Diff line
@@ -911,6 +911,13 @@ void hwss_build_fast_sequence(struct dc *dc,
					current_mpc_pipe->stream && current_mpc_pipe->plane_state &&
					current_mpc_pipe->plane_state->update_flags.bits.addr_update &&
					!current_mpc_pipe->plane_state->skip_manual_trigger) {
				if (dc->hwss.program_cursor_offload_now) {
					block_sequence[*num_steps].params.program_cursor_update_now_params.dc = dc;
					block_sequence[*num_steps].params.program_cursor_update_now_params.pipe_ctx = current_mpc_pipe;
					block_sequence[*num_steps].func = PROGRAM_CURSOR_UPDATE_NOW;
					(*num_steps)++;
				}

				block_sequence[*num_steps].params.program_manual_trigger_params.pipe_ctx = current_mpc_pipe;
				block_sequence[*num_steps].func = OPTC_PROGRAM_MANUAL_TRIGGER;
				(*num_steps)++;
@@ -1004,6 +1011,11 @@ void hwss_execute_sequence(struct dc *dc,
		case DMUB_HW_CONTROL_LOCK_FAST:
			dc->hwss.dmub_hw_control_lock_fast(params);
			break;
		case PROGRAM_CURSOR_UPDATE_NOW:
			dc->hwss.program_cursor_offload_now(
				params->program_cursor_update_now_params.dc,
				params->program_cursor_update_now_params.pipe_ctx);
			break;
		default:
			ASSERT(false);
			break;
+33 −9
Original line number Diff line number Diff line
@@ -231,6 +231,7 @@ void program_cursor_attributes(
	int i;
	struct resource_context *res_ctx;
	struct pipe_ctx *pipe_to_program = NULL;
	bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc);

	if (!stream)
		return;
@@ -245,24 +246,35 @@ void program_cursor_attributes(

		if (!pipe_to_program) {
			pipe_to_program = pipe_ctx;

			if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update) {
				dc->hwss.begin_cursor_offload_update(dc, pipe_ctx);
			} else {
				dc->hwss.cursor_lock(dc, pipe_to_program, true);
				if (pipe_to_program->next_odm_pipe)
					dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, true);
			}
		}

		dc->hwss.set_cursor_attribute(pipe_ctx);
		if (dc->ctx->dmub_srv)
			dc_send_update_cursor_info_to_dmu(pipe_ctx, i);
		if (dc->hwss.set_cursor_sdr_white_level)
			dc->hwss.set_cursor_sdr_white_level(pipe_ctx);
		if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe)
			dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx);
	}

	if (pipe_to_program) {
		if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update) {
			dc->hwss.commit_cursor_offload_update(dc, pipe_to_program);
		} else {
			dc->hwss.cursor_lock(dc, pipe_to_program, false);
			if (pipe_to_program->next_odm_pipe)
				dc->hwss.cursor_lock(dc, pipe_to_program->next_odm_pipe, false);
		}
	}
}

/*
 * dc_stream_check_cursor_attributes() - Check validitity of cursor attributes and surface address
@@ -366,6 +378,7 @@ void program_cursor_position(
	int i;
	struct resource_context *res_ctx;
	struct pipe_ctx *pipe_to_program = NULL;
	bool enable_cursor_offload = dc_dmub_srv_is_cursor_offload_enabled(dc);

	if (!stream)
		return;
@@ -384,17 +397,28 @@ void program_cursor_position(

		if (!pipe_to_program) {
			pipe_to_program = pipe_ctx;

			if (enable_cursor_offload && dc->hwss.begin_cursor_offload_update)
				dc->hwss.begin_cursor_offload_update(dc, pipe_ctx);
			else
				dc->hwss.cursor_lock(dc, pipe_to_program, true);
		}

		dc->hwss.set_cursor_position(pipe_ctx);
		if (enable_cursor_offload && dc->hwss.update_cursor_offload_pipe)
			dc->hwss.update_cursor_offload_pipe(dc, pipe_ctx);

		if (dc->ctx->dmub_srv)
			dc_send_update_cursor_info_to_dmu(pipe_ctx, i);
	}

	if (pipe_to_program)
	if (pipe_to_program) {
		if (enable_cursor_offload && dc->hwss.commit_cursor_offload_update)
			dc->hwss.commit_cursor_offload_update(dc, pipe_to_program);
		else
			dc->hwss.cursor_lock(dc, pipe_to_program, false);
	}
}

bool dc_stream_set_cursor_position(
	struct dc_stream_state *stream,
+1 −0
Original line number Diff line number Diff line
@@ -530,6 +530,7 @@ struct dc_config {
	bool set_pipe_unlock_order;
	bool enable_dpia_pre_training;
	bool unify_link_enc_assignment;
	bool enable_cursor_offload;
	struct spl_sharpness_range dcn_sharpness_range;
	struct spl_sharpness_range dcn_override_sharpness_range;
};
+99 −0
Original line number Diff line number Diff line
@@ -1174,6 +1174,100 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con
	dmub_srv_subvp_save_surf_addr(dc_dmub_srv->dmub, addr, subvp_index);
}

void dc_dmub_srv_cursor_offload_init(struct dc *dc)
{
	struct dmub_rb_cmd_cursor_offload_init *init;
	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
	union dmub_rb_cmd cmd;

	if (!dc->config.enable_cursor_offload)
		return;

	if (!dc_dmub_srv->dmub->meta_info.feature_bits.bits.cursor_offload_v1_support)
		return;

	if (!dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr || !dc_dmub_srv->dmub->cursor_offload_fb.cpu_addr)
		return;

	if (!dc_dmub_srv->dmub->cursor_offload_v1)
		return;

	if (!dc_dmub_srv->dmub->shared_state)
		return;

	memset(&cmd, 0, sizeof(cmd));

	init = &cmd.cursor_offload_init;
	init->header.type = DMUB_CMD__CURSOR_OFFLOAD;
	init->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_INIT;
	init->header.payload_bytes = sizeof(init->init_data);
	init->init_data.state_addr.quad_part = dc_dmub_srv->dmub->cursor_offload_fb.gpu_addr;
	init->init_data.state_size = dc_dmub_srv->dmub->cursor_offload_fb.size;

	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);

	dc_dmub_srv->cursor_offload_enabled = true;
}

void dc_dmub_srv_control_cursor_offload(struct dc *dc, struct dc_state *context,
					const struct dc_stream_state *stream, bool enable)
{
	struct pipe_ctx const *pipe_ctx;
	struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl;
	union dmub_rb_cmd cmd;

	if (!dc_dmub_srv_is_cursor_offload_enabled(dc))
		return;

	if (!stream)
		return;

	pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream);
	if (!pipe_ctx || !pipe_ctx->stream_res.tg || pipe_ctx->stream != stream)
		return;

	memset(&cmd, 0, sizeof(cmd));

	cntl = &cmd.cursor_offload_stream_ctnl;
	cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD;
	cntl->header.sub_type =
		enable ? DMUB_CMD__CURSOR_OFFLOAD_STREAM_ENABLE : DMUB_CMD__CURSOR_OFFLOAD_STREAM_DISABLE;
	cntl->header.payload_bytes = sizeof(cntl->data);

	cntl->data.otg_inst = pipe_ctx->stream_res.tg->inst;
	cntl->data.line_time_in_ns = 1u + (uint32_t)(div64_u64(stream->timing.h_total * 1000000ull,
							       stream->timing.pix_clk_100hz / 10));

	cntl->data.v_total_max = stream->adjust.v_total_max > stream->timing.v_total ?
					 stream->adjust.v_total_max :
					 stream->timing.v_total;

	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd,
				     enable ? DM_DMUB_WAIT_TYPE_NO_WAIT : DM_DMUB_WAIT_TYPE_WAIT);
}

void dc_dmub_srv_program_cursor_now(struct dc *dc, const struct pipe_ctx *pipe)
{
	struct dmub_rb_cmd_cursor_offload_stream_cntl *cntl;
	union dmub_rb_cmd cmd;

	if (!dc_dmub_srv_is_cursor_offload_enabled(dc))
		return;

	if (!pipe || !pipe->stream || !pipe->stream_res.tg)
		return;

	memset(&cmd, 0, sizeof(cmd));

	cntl = &cmd.cursor_offload_stream_ctnl;
	cntl->header.type = DMUB_CMD__CURSOR_OFFLOAD;
	cntl->header.sub_type = DMUB_CMD__CURSOR_OFFLOAD_STREAM_PROGRAM;
	cntl->header.payload_bytes = sizeof(cntl->data);
	cntl->data.otg_inst = pipe->stream_res.tg->inst;

	dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
}

bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait)
{
	struct dc_context *dc_ctx;
@@ -2231,6 +2325,11 @@ bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uin
	return result;
}

bool dc_dmub_srv_is_cursor_offload_enabled(const struct dc *dc)
{
	return dc->ctx->dmub_srv && dc->ctx->dmub_srv->cursor_offload_enabled;
}

void dc_dmub_srv_release_hw(const struct dc *dc)
{
	struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv;
Loading