Commit 70ec2e8b authored by Ville Syrjälä's avatar Ville Syrjälä Committed by Tvrtko Ursulin
Browse files

drm/i915/dsb: Don't use indexed register writes needlessly



Turns out the DSB indexed register write command has
rather significant initial overhead compared to the normal
MMIO write command. Based on some quick experiments on TGL
you have to write the register at least ~5 times for the
indexed write command to come out ahead. If you write the
register less times than that the MMIO write is faster.

So it seems my automagic indexed write logic was a bit
misguided. Go back to the original approach only use
indexed writes for the cases we know will benefit from
it (indexed LUT register updates).

Currently we shouldn't have any cases where this truly
matters (just some rare double writes to the precision
LUT index registers), but we will need to switch the
legacy LUT updates to write each LUT register twice (to
avoid some palette anti-collision logic troubles).
This would be close to the worst case for using indexed
writes (two writes per register, and 256 separate registers).
Using the MMIO write command should shave off around 30%
of the execution time compared to using the indexed write
command.

Cc: stable@vger.kernel.org
Fixes: 34d8311f ("drm/i915/dsb: Re-instate DSB for LUT updates")
Fixes: 25ea3411 ("drm/i915/dsb: Use non-posted register writes for legacy LUT")
Signed-off-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241120164123.12706-2-ville.syrjala@linux.intel.com


Reviewed-by: default avatarUma Shankar <uma.shankar@intel.com>
(cherry picked from commit ecba559a)
Signed-off-by: default avatarTvrtko Ursulin <tursulin@ursulin.net>
parent fac04efc
Loading
Loading
Loading
Loading
+31 −20
Original line number Diff line number Diff line
@@ -1343,6 +1343,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state,
		intel_de_write_fw(display, reg, val);
}

static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state,
				  i915_reg_t reg, u32 val)
{
	struct intel_display *display = to_intel_display(crtc_state);

	if (crtc_state->dsb_color_vblank)
		intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val);
	else
		intel_de_write_fw(display, reg, val);
}

static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
			   const struct drm_property_blob *blob)
{
@@ -1458,7 +1469,7 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state,
		      prec_index);

	for (i = 0; i < lut_size; i++)
		ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
				      ilk_lut_10(&lut[i]));

	/*
@@ -1612,14 +1623,14 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
		 * ToDo: Extend to max 7.0. Enable 32 bit input value
		 * as compared to just 16 to achieve this.
		 */
		ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
				      DISPLAY_VER(display) >= 14 ?
				      mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
	}

	/* Clamp values > 1.0. */
	while (i++ < glk_degamma_lut_size(display))
		ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
				      DISPLAY_VER(display) >= 14 ?
				      1 << 24 : 1 << 16);

@@ -1687,9 +1698,9 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
	for (i = 0; i < 9; i++) {
		const struct drm_color_lut *entry = &lut[i];

		ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
				      ilk_lut_12p4_ldw(entry));
		ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
				      ilk_lut_12p4_udw(entry));
	}

@@ -1726,9 +1737,9 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
	for (i = 1; i < 257; i++) {
		entry = &lut[i * 8];

		ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
				      ilk_lut_12p4_ldw(entry));
		ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
				      ilk_lut_12p4_udw(entry));
	}

@@ -1747,9 +1758,9 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
	for (i = 0; i < 256; i++) {
		entry = &lut[i * 8 * 128];

		ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
				      ilk_lut_12p4_ldw(entry));
		ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
		ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
				      ilk_lut_12p4_udw(entry));
	}

+16 −3
Original line number Diff line number Diff line
@@ -273,15 +273,19 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_
}

/**
 * intel_dsb_reg_write() - Emit register wriite to the DSB context
 * intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context
 * @dsb: DSB context
 * @reg: register address.
 * @val: value.
 *
 * This function is used for writing register-value pair in command
 * buffer of DSB.
 *
 * Note that indexed writes are slower than normal MMIO writes
 * for a small number (less than 5 or so) of writes to the same
 * register.
 */
void intel_dsb_reg_write(struct intel_dsb *dsb,
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
				 i915_reg_t reg, u32 val)
{
	/*
@@ -340,6 +344,15 @@ void intel_dsb_reg_write(struct intel_dsb *dsb,
	}
}

void intel_dsb_reg_write(struct intel_dsb *dsb,
			 i915_reg_t reg, u32 val)
{
	intel_dsb_emit(dsb, val,
		       (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
		       (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
		       i915_mmio_reg_offset(reg));
}

static u32 intel_dsb_mask_to_byte_en(u32 mask)
{
	return (!!(mask & 0xff000000) << 3 |
+2 −0
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb);
void intel_dsb_cleanup(struct intel_dsb *dsb);
void intel_dsb_reg_write(struct intel_dsb *dsb,
			 i915_reg_t reg, u32 val);
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
				 i915_reg_t reg, u32 val);
void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
				i915_reg_t reg, u32 mask, u32 val);
void intel_dsb_noop(struct intel_dsb *dsb, int count);