Commit 75400348 authored by Ray Wu's avatar Ray Wu Committed by Alex Deucher
Browse files

drm/amd/display: Add Idle state manager(ISM)

[Why]

Rapid allow/disallow of idle optimization calls, whether it be IPS or
self-refresh features, can end up using more power if actual
time-in-idle is low. It can also spam DMUB command submission in a way
that prevents it from servicing other requestors.

[How]

Introduce the Idle State Manager (ISM) to amdgpu. It maintains a finite
state machine that uses a hysteresis to determine if a delay should be
inserted between a caller allowing idle, and when the actual idle
optimizations are programmed.

A second timer is also introduced to enable static screen optimizations
(SSO) such as PSR1 and Replay low HZ idle mode. Rapid SSO enable/disable
can have a negative power impact on some low hz video playback, and can
introduce user lag for PSR1 (due to up to 3 frames of sync latency).

This effectively rate-limits idle optimizations, based on hysteresis.

This also replaces the existing delay logic used for PSR1, allowing
drm_vblank_crtc_config.disable_immediate = true, and thus allowing
drm_crtc_vblank_restore().

v2:
* Loosen criteria for ISM to exit idle optimizations; it failed to exit
  idle correctly on cursor updates when there are no drm_vblank
  requestors,
* Document default_ism_config
* Convert pr_debug to trace events to reduce overhead on frequent
  codepaths
* checkpatch.pl fixes

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/4527
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3709


Fixes: 58a261bf ("drm/amd/display: use a more lax vblank enable policy for older ASICs")
Signed-off-by: default avatarRay Wu <ray.wu@amd.com>
Signed-off-by: default avatarLeo Li <sunpeng.li@amd.com>
Reviewed-by: default avatarMario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e4b1715a
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
#include <drm/display/drm_dp_mst_helper.h>
#include "modules/inc/mod_freesync.h"
#include "amdgpu_dm_irq_params.h"
#include "amdgpu_dm_ism.h"

struct amdgpu_bo;
struct amdgpu_device;
@@ -486,6 +487,10 @@ struct amdgpu_crtc {
	int deferred_flip_completion;
	/* parameters access from DM IRQ handler */
	struct dm_irq_params dm_irq_params;

	/* DM idle state manager */
	struct amdgpu_dm_ism ism;

	/* pll sharing */
	struct amdgpu_atom_ss ss;
	bool ss_enabled;
+2 −1
Original line number Diff line number Diff line
@@ -40,7 +40,8 @@ AMDGPUDM = \
	amdgpu_dm_replay.o \
	amdgpu_dm_quirks.o \
	amdgpu_dm_wb.o \
	amdgpu_dm_colorop.o
	amdgpu_dm_colorop.o \
	amdgpu_dm_ism.o

ifdef CONFIG_DRM_AMD_DC_FP
AMDGPUDM += dc_fpu.o
+9 −25
Original line number Diff line number Diff line
@@ -3283,6 +3283,7 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)

		mutex_lock(&dm->dc_lock);

		amdgpu_dm_ism_disable(dm);
		dc_allow_idle_optimizations(adev->dm.dc, false);

		dm->cached_dc_state = dc_state_create_copy(dm->dc->current_state);
@@ -3316,6 +3317,9 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)

	amdgpu_dm_irq_suspend(adev);

	scoped_guard(mutex, &dm->dc_lock)
		amdgpu_dm_ism_disable(dm);

	hpd_rx_irq_work_suspend(dm);

	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
@@ -3606,6 +3610,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)

		dc_resume(dm->dc);

		amdgpu_dm_ism_enable(dm);
		amdgpu_dm_irq_resume_early(adev);

		for (i = 0; i < dc_state->stream_count; i++) {
@@ -3666,6 +3671,9 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
	/* program HPD filter */
	dc_resume(dm->dc);

	scoped_guard(mutex, &dm->dc_lock)
		amdgpu_dm_ism_enable(dm);

	/*
	 * early enable HPD Rx IRQ, should be done before set mode as short
	 * pulse interrupts are used for MST
@@ -9334,31 +9342,7 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
	if (acrtc_state) {
		timing = &acrtc_state->stream->timing;

		/*
		 * Depending on when the HW latching event of double-buffered
		 * registers happen relative to the PSR SDP deadline, and how
		 * bad the Panel clock has drifted since the last ALPM off
		 * event, there can be up to 3 frames of delay between sending
		 * the PSR exit cmd to DMUB fw, and when the panel starts
		 * displaying live frames.
		 *
		 * We can set:
		 *
		 * 20/100 * offdelay_ms = 3_frames_ms
		 * => offdelay_ms = 5 * 3_frames_ms
		 *
		 * This ensures that `3_frames_ms` will only be experienced as a
		 * 20% delay on top how long the display has been static, and
		 * thus make the delay less perceivable.
		 */
		if (acrtc_state->stream->link->psr_settings.psr_version <
		    DC_PSR_VERSION_UNSUPPORTED) {
			offdelay = DIV64_U64_ROUND_UP((u64)5 * 3 * 10 *
						      timing->v_total *
						      timing->h_total,
						      timing->pix_clk_100hz);
			config.offdelay_ms = offdelay ?: 30;
		} else if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
		if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
			   IP_VERSION(3, 5, 0) ||
			   !(adev->flags & AMD_IS_APU)) {
			/*
+53 −37
Original line number Diff line number Diff line
@@ -124,37 +124,37 @@ bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state)
 * - Enable condition same as above
 * - Disable when vblank counter is enabled
 */
static void amdgpu_dm_crtc_set_panel_sr_feature(
	struct vblank_control_work *vblank_work,
void amdgpu_dm_crtc_set_panel_sr_feature(
	struct amdgpu_display_manager *dm,
	struct amdgpu_crtc *acrtc,
	struct dc_stream_state *stream,
	bool vblank_enabled, bool allow_sr_entry)
{
	struct dc_link *link = vblank_work->stream->link;
	struct dc_link *link = stream->link;
	bool is_sr_active = (link->replay_settings.replay_allow_active ||
				 link->psr_settings.psr_allow_active);
	bool is_crc_window_active = false;
	bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc);
	bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(acrtc);

#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
	is_crc_window_active =
		amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base);
		amdgpu_dm_crc_window_is_activated(&acrtc->base);
#endif

	if (link->replay_settings.replay_feature_enabled && !vrr_active &&
		allow_sr_entry && !is_sr_active && !is_crc_window_active) {
		amdgpu_dm_replay_enable(vblank_work->stream, true);
		amdgpu_dm_replay_enable(stream, true);
	} else if (vblank_enabled) {
		if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active)
			amdgpu_dm_psr_disable(vblank_work->stream, false);
			amdgpu_dm_psr_disable(stream, false);
	} else if (link->psr_settings.psr_feature_enabled && !vrr_active &&
		allow_sr_entry && !is_sr_active && !is_crc_window_active) {

		struct amdgpu_dm_connector *aconn =
			(struct amdgpu_dm_connector *) vblank_work->stream->dm_stream_context;
			(struct amdgpu_dm_connector *) stream->dm_stream_context;

		if (!aconn->disallow_edp_enter_psr) {
			struct amdgpu_display_manager *dm = vblank_work->dm;

			amdgpu_dm_psr_enable(vblank_work->stream);
			amdgpu_dm_psr_enable(stream);
			if (dm->idle_workqueue &&
			    (dm->dc->config.disable_ips == DMUB_IPS_ENABLE) &&
			    dm->dc->idle_optimizations_allowed &&
@@ -251,33 +251,15 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work)

	mutex_lock(&dm->dc_lock);

	if (vblank_work->enable)
	if (vblank_work->enable) {
		dm->active_vblank_irq_count++;
	else if (dm->active_vblank_irq_count)
		dm->active_vblank_irq_count--;

		amdgpu_dm_ism_commit_event(&vblank_work->acrtc->ism,
				DM_ISM_EVENT_EXIT_IDLE_REQUESTED);
	} else {
		if (dm->active_vblank_irq_count > 0)
		dc_allow_idle_optimizations(dm->dc, false);

	/*
	 * Control PSR based on vblank requirements from OS
	 *
	 * If panel supports PSR SU, there's no need to disable PSR when OS is
	 * submitting fast atomic commits (we infer this by whether the OS
	 * requests vblank events). Fast atomic commits will simply trigger a
	 * full-frame-update (FFU); a specific case of selective-update (SU)
	 * where the SU region is the full hactive*vactive region. See
	 * fill_dc_dirty_rects().
	 */
	if (vblank_work->stream && vblank_work->stream->link && vblank_work->acrtc) {
		amdgpu_dm_crtc_set_panel_sr_feature(
			vblank_work, vblank_work->enable,
			vblank_work->acrtc->dm_irq_params.allow_sr_entry);
	}

	if (dm->active_vblank_irq_count == 0) {
		dc_post_update_surfaces_to_stream(dm->dc);
		dc_allow_idle_optimizations(dm->dc, true);
			dm->active_vblank_irq_count--;
		amdgpu_dm_ism_commit_event(&vblank_work->acrtc->ism,
				DM_ISM_EVENT_ENTER_IDLE_REQUESTED);
	}

	mutex_unlock(&dm->dc_lock);
@@ -476,6 +458,9 @@ static struct drm_crtc_state *amdgpu_dm_crtc_duplicate_state(struct drm_crtc *cr

static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc)
{
	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);

	amdgpu_dm_ism_fini(&acrtc->ism);
	drm_crtc_cleanup(crtc);
	kfree(crtc);
}
@@ -719,6 +704,35 @@ static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = {
	.get_scanout_position = amdgpu_crtc_get_scanout_position,
};

/*
 * This hysteresis filter as configured will:
 *
 * * Search through the latest 8[filter_history_size] entries in history,
 *   skipping entries that are older than [filter_old_history_threshold] frames
 *   (0 means ignore age)
 * * Searches for short-idle-periods that lasted shorter than
 *   4[filter_num_frames] frames-times
 * * If there is at least 1[filter_entry_count] short-idle-period, then a delay
 *   of 4[activation_num_delay_frames] will applied before allowing idle
 *   optimizations again.
 * * An additional delay of 11[sso_num_frames] is applied before enabling
 *   panel-specific optimizations.
 *
 * The values were determined empirically on another OS, optimizing for Z8
 * residency on APUs when running a productivity + web browsing test.
 *
 * TODO: Run similar tests to determine if these values are also optimal for
 * Linux, and if each APU generation benefits differently.
 */
static struct amdgpu_dm_ism_config default_ism_config = {
	.filter_num_frames = 4,
	.filter_history_size = 8,
	.filter_entry_count = 1,
	.activation_num_delay_frames = 4,
	.filter_old_history_threshold = 0,
	.sso_num_frames = 11,
};

int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
			       struct drm_plane *plane,
			       uint32_t crtc_index)
@@ -749,6 +763,8 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
	if (res)
		goto fail;

	amdgpu_dm_ism_init(&acrtc->ism, &default_ism_config);

	drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs);

	/* Create (reset) the plane state */
+6 −0
Original line number Diff line number Diff line
@@ -27,6 +27,12 @@
#ifndef __AMDGPU_DM_CRTC_H__
#define __AMDGPU_DM_CRTC_H__

void amdgpu_dm_crtc_set_panel_sr_feature(
	struct amdgpu_display_manager *dm,
	struct amdgpu_crtc *acrtc,
	struct dc_stream_state *stream,
	bool vblank_enabled, bool allow_sr_entry);

void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc);

bool amdgpu_dm_crtc_modeset_required(struct drm_crtc_state *crtc_state,
Loading