Commit f5b00975 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

Merge back cpuidle material for 7.1

parents f4c31b07 a943787d
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -10,5 +10,10 @@
 * check the time till the closest expected timer event.
 */
#define RESIDENCY_THRESHOLD_NS	(15 * NSEC_PER_USEC)
/*
 * If the closest timer is in this range, the governor idle state selection need
 * not be adjusted after the scheduler tick has been stopped.
 */
#define SAFE_TIMER_RANGE_NS	(2 * TICK_NSEC)

#endif /* __CPUIDLE_GOVERNOR_H */
+9 −6
Original line number Diff line number Diff line
@@ -261,13 +261,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
		predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
		/*
		 * If the tick is already stopped, the cost of possible short
		 * idle duration misprediction is much higher, because the CPU
		 * may be stuck in a shallow idle state for a long time as a
		 * result of it.  In that case, say we might mispredict and use
		 * the known time till the closest timer event for the idle
		 * state selection.
		 * idle duration misprediction is higher because the CPU may get
		 * stuck in a shallow idle state then.  To avoid that, if
		 * predicted_ns is small enough, say it might be mispredicted
		 * and use the known time till the closest timer for idle state
		 * selection unless that timer is going to trigger within
		 * SAFE_TIMER_RANGE_NS in which case it can be regarded as a
		 * sufficient safety net.
		 */
		if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC)
		if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC &&
		    data->next_timer_ns > SAFE_TIMER_RANGE_NS)
			predicted_ns = data->next_timer_ns;
	} else {
		/*
+34 −47
Original line number Diff line number Diff line
@@ -407,50 +407,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
	 * better choice.
	 */
	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
		int min_idx = idx0;

		if (tick_nohz_tick_stopped()) {
			/*
			 * Look for the shallowest idle state below the current
			 * candidate one whose target residency is at least
			 * equal to the tick period length.
			 */
			while (min_idx < idx &&
			       drv->states[min_idx].target_residency_ns < TICK_NSEC)
				min_idx++;

			/*
			 * Avoid selecting a state with a lower index, but with
			 * the same target residency as the current candidate
			 * one.
			 */
			if (drv->states[min_idx].target_residency_ns ==
					drv->states[idx].target_residency_ns)
				goto constraint;
		}

		/*
		 * If the minimum state index is greater than or equal to the
		 * index of the state with the maximum intercepts metric and
		 * the corresponding state is enabled, there is no need to look
		 * at the deeper states.
		 */
		if (min_idx >= intercept_max_idx &&
		    !dev->states_usage[min_idx].disable) {
			idx = min_idx;
			goto constraint;
		}

		/*
		 * Look for the deepest enabled idle state, at most as deep as
		 * the one with the maximum intercepts metric, whose target
		 * residency had not been greater than the idle duration in over
		 * a half of the relevant cases in the past.
		 *
		 * Take the possible duration limitation present if the tick
		 * has been stopped already into account.
		 */
		for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) {
		for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) {
			intercept_sum += cpu_data->state_bins[i].intercepts;

			if (dev->states_usage[i].disable)
@@ -463,7 +426,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
		}
	}

constraint:
	/*
	 * If there is a latency constraint, it may be necessary to select an
	 * idle state shallower than the current candidate one.
@@ -472,13 +434,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
		idx = constraint_idx;

	/*
	 * If either the candidate state is state 0 or its target residency is
	 * low enough, there is basically nothing more to do, but if the sleep
	 * length is not updated, the subsequent wakeup will be counted as an
	 * "intercept" which may be problematic in the cases when timer wakeups
	 * are dominant.  Namely, it may effectively prevent deeper idle states
	 * from being selected at one point even if no imminent timers are
	 * scheduled.
	 * If the tick has not been stopped and either the candidate state is
	 * state 0 or its target residency is low enough, there is basically
	 * nothing more to do, but if the sleep length is not updated, the
	 * subsequent wakeup will be counted as an "intercept".  That may be
	 * problematic in the cases when timer wakeups are dominant because it
	 * may effectively prevent deeper idle states from being selected at one
	 * point even if no imminent timers are scheduled.
	 *
	 * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one
	 * CPU are unlikely (user space has a default 50 us slack value for
@@ -494,7 +456,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
	 * shallow idle states regardless of the wakeup type, so the sleep
	 * length need not be known in that case.
	 */
	if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
	if (!tick_nohz_tick_stopped() && (!idx ||
	     drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) &&
	    (2 * cpu_data->short_idles >= cpu_data->total ||
	     latency_req < LATENCY_THRESHOLD_NS))
		goto out_tick;
@@ -502,6 +465,30 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
	cpu_data->sleep_length_ns = duration_ns;

	/*
	 * If the tick has been stopped and the closest timer is too far away,
	 * update the selection to prevent the CPU from getting stuck in a
	 * shallow idle state for too long.
	 */
	if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS &&
	    drv->states[idx].target_residency_ns < TICK_NSEC) {
		/*
		 * Look for the deepest enabled idle state with exit latency
		 * within the PM QoS limit and with target residency within
		 * duration_ns.
		 */
		for (i = constraint_idx; i > idx; i--) {
			if (dev->states_usage[i].disable)
				continue;

			if (drv->states[i].target_residency_ns <= duration_ns) {
				idx = i;
				break;
			}
		}
		return idx;
	}

	if (!idx)
		goto out_tick;

+42 −0
Original line number Diff line number Diff line
@@ -983,6 +983,43 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = {
		.enter = NULL }
};

static struct cpuidle_state ptl_cstates[] __initdata = {
	{
		.name = "C1",
		.desc = "MWAIT 0x00",
		.flags = MWAIT2flg(0x00),
		.exit_latency = 1,
		.target_residency = 1,
		.enter = &intel_idle,
		.enter_s2idle = intel_idle_s2idle, },
	{
		.name = "C1E",
		.desc = "MWAIT 0x01",
		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
		.exit_latency = 10,
		.target_residency = 10,
		.enter = &intel_idle,
		.enter_s2idle = intel_idle_s2idle, },
	{
		.name = "C6S",
		.desc = "MWAIT 0x21",
		.flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED,
		.exit_latency = 300,
		.target_residency = 300,
		.enter = &intel_idle,
		.enter_s2idle = intel_idle_s2idle, },
	{
		.name = "C10",
		.desc = "MWAIT 0x60",
		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
		.exit_latency = 370,
		.target_residency = 2500,
		.enter = &intel_idle,
		.enter_s2idle = intel_idle_s2idle, },
	{
		.enter = NULL }
};

static struct cpuidle_state gmt_cstates[] __initdata = {
	{
		.name = "C1",
@@ -1561,6 +1598,10 @@ static const struct idle_cpu idle_cpu_mtl_l __initconst = {
	.state_table = mtl_l_cstates,
};

static const struct idle_cpu idle_cpu_ptl __initconst = {
	.state_table = ptl_cstates,
};

static const struct idle_cpu idle_cpu_gmt __initconst = {
	.state_table = gmt_cstates,
};
@@ -1669,6 +1710,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
	X86_MATCH_VFM(INTEL_ALDERLAKE,		&idle_cpu_adl),
	X86_MATCH_VFM(INTEL_ALDERLAKE_L,	&idle_cpu_adl_l),
	X86_MATCH_VFM(INTEL_METEORLAKE_L,	&idle_cpu_mtl_l),
	X86_MATCH_VFM(INTEL_PANTHERLAKE_L,	&idle_cpu_ptl),
	X86_MATCH_VFM(INTEL_ATOM_GRACEMONT,	&idle_cpu_gmt),
	X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X,	&idle_cpu_spr),
	X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X,	&idle_cpu_spr),