Commit f170523a authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/gt: Consolidate the CS timestamp clocks



Pull the GT clock information [used to derive CS timestamps and PM
interval] under the GT so that is it local to the users. In doing so, we
consolidate the two references for the same information, of which the
runtime-info took note of a potential clock source override and scaling
factors.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201223122359.22562-2-chris@chris-wilson.co.uk
parent 8391c9b2
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -404,34 +404,34 @@ static int frequency_show(struct seq_file *m, void *unused)
		seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
		seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
		seq_printf(m, "CAGF: %dMHz\n", cagf);
		seq_printf(m, "RP CUR UP EI: %d (%dns)\n",
		seq_printf(m, "RP CUR UP EI: %d (%lldns)\n",
			   rpcurupei,
			   intel_gt_pm_interval_to_ns(gt, rpcurupei));
		seq_printf(m, "RP CUR UP: %d (%dns)\n",
		seq_printf(m, "RP CUR UP: %d (%lldns)\n",
			   rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
		seq_printf(m, "RP PREV UP: %d (%dns)\n",
		seq_printf(m, "RP PREV UP: %d (%lldns)\n",
			   rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
		seq_printf(m, "Up threshold: %d%%\n",
			   rps->power.up_threshold);
		seq_printf(m, "RP UP EI: %d (%dns)\n",
		seq_printf(m, "RP UP EI: %d (%lldns)\n",
			   rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
		seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n",
		seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n",
			   rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));

		seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n",
		seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n",
			   rpcurdownei,
			   intel_gt_pm_interval_to_ns(gt, rpcurdownei));
		seq_printf(m, "RP CUR DOWN: %d (%dns)\n",
		seq_printf(m, "RP CUR DOWN: %d (%lldns)\n",
			   rpcurdown,
			   intel_gt_pm_interval_to_ns(gt, rpcurdown));
		seq_printf(m, "RP PREV DOWN: %d (%dns)\n",
		seq_printf(m, "RP PREV DOWN: %d (%lldns)\n",
			   rpprevdown,
			   intel_gt_pm_interval_to_ns(gt, rpprevdown));
		seq_printf(m, "Down threshold: %d%%\n",
			   rps->power.down_threshold);
		seq_printf(m, "RP DOWN EI: %d (%dns)\n",
		seq_printf(m, "RP DOWN EI: %d (%lldns)\n",
			   rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
		seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n",
		seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n",
			   rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));

		max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
+2 −4
Original line number Diff line number Diff line
@@ -248,16 +248,14 @@ intel_context_clear_nopreempt(struct intel_context *ce)

static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{
	const u32 period =
		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
	const u32 period = ce->engine->gt->clock_period_ns;

	return READ_ONCE(ce->runtime.total) * period;
}

static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
{
	const u32 period =
		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
	const u32 period = ce->engine->gt->clock_period_ns;

	return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
}
+2 −2
Original line number Diff line number Diff line
@@ -46,6 +46,8 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)

int intel_gt_init_mmio(struct intel_gt *gt)
{
	intel_gt_init_clock_frequency(gt);

	intel_uc_init_mmio(&gt->uc);
	intel_sseu_info_init(gt);

@@ -546,8 +548,6 @@ int intel_gt_init(struct intel_gt *gt)
	 */
	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);

	intel_gt_init_clock_frequency(gt);

	err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
	if (err)
		goto out_fw;
+157 −40
Original line number Diff line number Diff line
@@ -7,34 +7,146 @@
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"

#define MHZ_12   12000000 /* 12MHz (24MHz/2), 83.333ns */
#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */
#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */
static u32 read_reference_ts_freq(struct intel_uncore *uncore)
{
	u32 ts_override = intel_uncore_read(uncore, GEN9_TIMESTAMP_OVERRIDE);
	u32 base_freq, frac_freq;

	base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
	base_freq *= 1000000;

	frac_freq = ((ts_override &
		      GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
		     GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
	frac_freq = 1000000 / (frac_freq + 1);

	return base_freq + frac_freq;
}

static u32 read_clock_frequency(const struct intel_gt *gt)
static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore,
					u32 rpm_config_reg)
{
	if (INTEL_GEN(gt->i915) >= 11) {
		u32 config;
	u32 f19_2_mhz = 19200000;
	u32 f24_mhz = 24000000;
	u32 crystal_clock =
		(rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
		GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;

		config = intel_uncore_read(gt->uncore, RPM_CONFIG0);
		config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK;
		config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
	switch (crystal_clock) {
	case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
		return f19_2_mhz;
	case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
		return f24_mhz;
	default:
		MISSING_CASE(crystal_clock);
		return 0;
	}
}

static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore,
					u32 rpm_config_reg)
{
	u32 f19_2_mhz = 19200000;
	u32 f24_mhz = 24000000;
	u32 f25_mhz = 25000000;
	u32 f38_4_mhz = 38400000;
	u32 crystal_clock =
		(rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
		GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;

		switch (config) {
		case 0: return MHZ_12;
		case 1:
		case 2: return MHZ_19_2;
	switch (crystal_clock) {
	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
		return f24_mhz;
	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
		return f19_2_mhz;
	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
		return f38_4_mhz;
	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
		return f25_mhz;
	default:
		case 3: return MHZ_12_5;
		MISSING_CASE(crystal_clock);
		return 0;
	}
	} else if (INTEL_GEN(gt->i915) >= 9) {
		if (IS_GEN9_LP(gt->i915))
			return MHZ_19_2;
		else
			return MHZ_12;
}

static u32 read_clock_frequency(struct intel_uncore *uncore)
{
	u32 f12_5_mhz = 12500000;
	u32 f19_2_mhz = 19200000;
	u32 f24_mhz = 24000000;

	if (INTEL_GEN(uncore->i915) <= 4) {
		/*
		 * PRMs say:
		 *
		 *     "The value in this register increments once every 16
		 *      hclks." (through the “Clocking Configuration”
		 *      (“CLKCFG”) MCHBAR register)
		 */
		return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
	} else if (INTEL_GEN(uncore->i915) <= 8) {
		/*
		 * PRMs say:
		 *
		 *     "The PCU TSC counts 10ns increments; this timestamp
		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
		 *      rolling over every 1.5 hours).
		 */
		return f12_5_mhz;
	} else if (INTEL_GEN(uncore->i915) <= 9) {
		u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE);
		u32 freq = 0;

		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
			freq = read_reference_ts_freq(uncore);
		} else {
		return MHZ_12_5;
			freq = IS_GEN9_LP(uncore->i915) ? f19_2_mhz : f24_mhz;

			/*
			 * Now figure out how the command stream's timestamp
			 * register increments from this frequency (it might
			 * increment only every few clock cycle).
			 */
			freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >>
				      CTC_SHIFT_PARAMETER_SHIFT);
		}

		return freq;
	} else if (INTEL_GEN(uncore->i915) <= 12) {
		u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE);
		u32 freq = 0;

		/*
		 * First figure out the reference frequency. There are 2 ways
		 * we can compute the frequency, either through the
		 * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE
		 * tells us which one we should use.
		 */
		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
			freq = read_reference_ts_freq(uncore);
		} else {
			u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0);

			if (INTEL_GEN(uncore->i915) <= 10)
				freq = gen10_get_crystal_clock_freq(uncore, c0);
			else
				freq = gen11_get_crystal_clock_freq(uncore, c0);

			/*
			 * Now figure out how the command stream's timestamp
			 * register increments from this frequency (it might
			 * increment only every few clock cycle).
			 */
			freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
				      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
		}

		return freq;
	}

	MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n");
	return 0;
}

void intel_gt_init_clock_frequency(struct intel_gt *gt)
@@ -43,20 +155,27 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
	 * Note that on gen11+, the clock frequency may be reconfigured.
	 * We do not, and we assume nobody else does.
	 */
	gt->clock_frequency = read_clock_frequency(gt);
	gt->clock_frequency = read_clock_frequency(gt->uncore);
	if (gt->clock_frequency)
		gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);

	GT_TRACE(gt,
		 "Using clock frequency: %dkHz\n",
		 gt->clock_frequency / 1000);
		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
		 gt->clock_frequency / 1000,
		 gt->clock_period_ns,
		 div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX),
			 USEC_PER_SEC));

}

#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void intel_gt_check_clock_frequency(const struct intel_gt *gt)
{
	if (gt->clock_frequency != read_clock_frequency(gt)) {
	if (gt->clock_frequency != read_clock_frequency(gt->uncore)) {
		dev_err(gt->i915->drm.dev,
			"GT clock frequency changed, was %uHz, now %uHz!\n",
			gt->clock_frequency,
			read_clock_frequency(gt));
			read_clock_frequency(gt->uncore));
	}
}
#endif
@@ -66,26 +185,24 @@ static u64 div_u64_roundup(u64 nom, u32 den)
	return div_u64(nom + den - 1, den);
}

u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count)
u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
{
	return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000),
			       gt->clock_frequency);
	return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
}

u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count)
u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
{
	return intel_gt_clock_interval_to_ns(gt, 16 * count);
}

u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns)
u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
{
	return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns),
			       1000 * 1000 * 1000);
	return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
}

u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
{
	u32 val;
	u64 val;

	/*
	 * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
@@ -94,9 +211,9 @@ u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
	 * EI/thresholds are "bad", leading to a very sluggish or even
	 * frozen machine.
	 */
	val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16);
	val = div_u64_roundup(intel_gt_ns_to_clock_interval(gt, ns), 16);
	if (IS_GEN(gt->i915, 6))
		val = roundup(val, 25);
		val = div_u64_roundup(val, 25) * 25;

	return val;
}
+4 −4
Original line number Diff line number Diff line
@@ -18,10 +18,10 @@ void intel_gt_check_clock_frequency(const struct intel_gt *gt);
static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {}
#endif

u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count);
u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count);
u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count);
u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count);

u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns);
u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns);
u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns);
u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns);

#endif /* __INTEL_GT_CLOCK_UTILS_H__ */
Loading