Loading drivers/cpuidle/governors/gov.h +5 −0 Original line number Diff line number Diff line Loading @@ -10,5 +10,10 @@ * check the time till the closest expected timer event. */ #define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) /* * If the closest timer is in this range, the governor idle state selection need * not be adjusted after the scheduler tick has been stopped. */ #define SAFE_TIMER_RANGE_NS (2 * TICK_NSEC) #endif /* __CPUIDLE_GOVERNOR_H */ drivers/cpuidle/governors/menu.c +9 −6 Original line number Diff line number Diff line Loading @@ -261,13 +261,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); /* * If the tick is already stopped, the cost of possible short * idle duration misprediction is much higher, because the CPU * may be stuck in a shallow idle state for a long time as a * result of it. In that case, say we might mispredict and use * the known time till the closest timer event for the idle * state selection. * idle duration misprediction is higher because the CPU may get * stuck in a shallow idle state then. To avoid that, if * predicted_ns is small enough, say it might be mispredicted * and use the known time till the closest timer for idle state * selection unless that timer is going to trigger within * SAFE_TIMER_RANGE_NS in which case it can be regarded as a * sufficient safety net. */ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC && data->next_timer_ns > SAFE_TIMER_RANGE_NS) predicted_ns = data->next_timer_ns; } else { /* Loading drivers/cpuidle/governors/teo.c +34 −47 Original line number Diff line number Diff line Loading @@ -407,50 +407,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * better choice. */ if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { int min_idx = idx0; if (tick_nohz_tick_stopped()) { /* * Look for the shallowest idle state below the current * candidate one whose target residency is at least * equal to the tick period length. */ while (min_idx < idx && drv->states[min_idx].target_residency_ns < TICK_NSEC) min_idx++; /* * Avoid selecting a state with a lower index, but with * the same target residency as the current candidate * one. */ if (drv->states[min_idx].target_residency_ns == drv->states[idx].target_residency_ns) goto constraint; } /* * If the minimum state index is greater than or equal to the * index of the state with the maximum intercepts metric and * the corresponding state is enabled, there is no need to look * at the deeper states. */ if (min_idx >= intercept_max_idx && !dev->states_usage[min_idx].disable) { idx = min_idx; goto constraint; } /* * Look for the deepest enabled idle state, at most as deep as * the one with the maximum intercepts metric, whose target * residency had not been greater than the idle duration in over * a half of the relevant cases in the past. * * Take the possible duration limitation present if the tick * has been stopped already into account. */ for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) { for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) { intercept_sum += cpu_data->state_bins[i].intercepts; if (dev->states_usage[i].disable) Loading @@ -463,7 +426,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } } constraint: /* * If there is a latency constraint, it may be necessary to select an * idle state shallower than the current candidate one. Loading @@ -472,13 +434,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = constraint_idx; /* * If either the candidate state is state 0 or its target residency is * low enough, there is basically nothing more to do, but if the sleep * length is not updated, the subsequent wakeup will be counted as an * "intercept" which may be problematic in the cases when timer wakeups * are dominant. Namely, it may effectively prevent deeper idle states * from being selected at one point even if no imminent timers are * scheduled. * If the tick has not been stopped and either the candidate state is * state 0 or its target residency is low enough, there is basically * nothing more to do, but if the sleep length is not updated, the * subsequent wakeup will be counted as an "intercept". That may be * problematic in the cases when timer wakeups are dominant because it * may effectively prevent deeper idle states from being selected at one * point even if no imminent timers are scheduled. * * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one * CPU are unlikely (user space has a default 50 us slack value for Loading @@ -494,7 +456,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * shallow idle states regardless of the wakeup type, so the sleep * length need not be known in that case. */ if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && if (!tick_nohz_tick_stopped() && (!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && (2 * cpu_data->short_idles >= cpu_data->total || latency_req < LATENCY_THRESHOLD_NS)) goto out_tick; Loading @@ -502,6 +465,30 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; /* * If the tick has been stopped and the closest timer is too far away, * update the selection to prevent the CPU from getting stuck in a * shallow idle state for too long. */ if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS && drv->states[idx].target_residency_ns < TICK_NSEC) { /* * Look for the deepest enabled idle state with exit latency * within the PM QoS limit and with target residency within * duration_ns. */ for (i = constraint_idx; i > idx; i--) { if (dev->states_usage[i].disable) continue; if (drv->states[i].target_residency_ns <= duration_ns) { idx = i; break; } } return idx; } if (!idx) goto out_tick; Loading drivers/idle/intel_idle.c +42 −0 Original line number Diff line number Diff line Loading @@ -983,6 +983,43 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = { .enter = NULL } }; static struct cpuidle_state ptl_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 10, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", .desc = "MWAIT 0x21", .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 300, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 370, .target_residency = 2500, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } }; static struct cpuidle_state gmt_cstates[] __initdata = { { .name = "C1", Loading Loading @@ -1561,6 +1598,10 @@ static const struct idle_cpu idle_cpu_mtl_l __initconst = { .state_table = mtl_l_cstates, }; static const struct idle_cpu idle_cpu_ptl __initconst = { .state_table = ptl_cstates, }; static const struct idle_cpu idle_cpu_gmt __initconst = { .state_table = gmt_cstates, }; Loading Loading @@ -1669,6 +1710,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl), X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l), X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl), X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr), Loading Loading
drivers/cpuidle/governors/gov.h +5 −0 Original line number Diff line number Diff line Loading @@ -10,5 +10,10 @@ * check the time till the closest expected timer event. */ #define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC) /* * If the closest timer is in this range, the governor idle state selection need * not be adjusted after the scheduler tick has been stopped. */ #define SAFE_TIMER_RANGE_NS (2 * TICK_NSEC) #endif /* __CPUIDLE_GOVERNOR_H */
drivers/cpuidle/governors/menu.c +9 −6 Original line number Diff line number Diff line Loading @@ -261,13 +261,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns); /* * If the tick is already stopped, the cost of possible short * idle duration misprediction is much higher, because the CPU * may be stuck in a shallow idle state for a long time as a * result of it. In that case, say we might mispredict and use * the known time till the closest timer event for the idle * state selection. * idle duration misprediction is higher because the CPU may get * stuck in a shallow idle state then. To avoid that, if * predicted_ns is small enough, say it might be mispredicted * and use the known time till the closest timer for idle state * selection unless that timer is going to trigger within * SAFE_TIMER_RANGE_NS in which case it can be regarded as a * sufficient safety net. */ if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC) if (tick_nohz_tick_stopped() && predicted_ns < TICK_NSEC && data->next_timer_ns > SAFE_TIMER_RANGE_NS) predicted_ns = data->next_timer_ns; } else { /* Loading
drivers/cpuidle/governors/teo.c +34 −47 Original line number Diff line number Diff line Loading @@ -407,50 +407,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * better choice. */ if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) { int min_idx = idx0; if (tick_nohz_tick_stopped()) { /* * Look for the shallowest idle state below the current * candidate one whose target residency is at least * equal to the tick period length. */ while (min_idx < idx && drv->states[min_idx].target_residency_ns < TICK_NSEC) min_idx++; /* * Avoid selecting a state with a lower index, but with * the same target residency as the current candidate * one. */ if (drv->states[min_idx].target_residency_ns == drv->states[idx].target_residency_ns) goto constraint; } /* * If the minimum state index is greater than or equal to the * index of the state with the maximum intercepts metric and * the corresponding state is enabled, there is no need to look * at the deeper states. */ if (min_idx >= intercept_max_idx && !dev->states_usage[min_idx].disable) { idx = min_idx; goto constraint; } /* * Look for the deepest enabled idle state, at most as deep as * the one with the maximum intercepts metric, whose target * residency had not been greater than the idle duration in over * a half of the relevant cases in the past. * * Take the possible duration limitation present if the tick * has been stopped already into account. */ for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) { for (i = idx - 1, intercept_sum = 0; i >= idx0; i--) { intercept_sum += cpu_data->state_bins[i].intercepts; if (dev->states_usage[i].disable) Loading @@ -463,7 +426,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } } constraint: /* * If there is a latency constraint, it may be necessary to select an * idle state shallower than the current candidate one. Loading @@ -472,13 +434,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = constraint_idx; /* * If either the candidate state is state 0 or its target residency is * low enough, there is basically nothing more to do, but if the sleep * length is not updated, the subsequent wakeup will be counted as an * "intercept" which may be problematic in the cases when timer wakeups * are dominant. Namely, it may effectively prevent deeper idle states * from being selected at one point even if no imminent timers are * scheduled. * If the tick has not been stopped and either the candidate state is * state 0 or its target residency is low enough, there is basically * nothing more to do, but if the sleep length is not updated, the * subsequent wakeup will be counted as an "intercept". That may be * problematic in the cases when timer wakeups are dominant because it * may effectively prevent deeper idle states from being selected at one * point even if no imminent timers are scheduled. * * However, frequent timers in the RESIDENCY_THRESHOLD_NS range on one * CPU are unlikely (user space has a default 50 us slack value for Loading @@ -494,7 +456,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * shallow idle states regardless of the wakeup type, so the sleep * length need not be known in that case. */ if ((!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && if (!tick_nohz_tick_stopped() && (!idx || drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS) && (2 * cpu_data->short_idles >= cpu_data->total || latency_req < LATENCY_THRESHOLD_NS)) goto out_tick; Loading @@ -502,6 +465,30 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, duration_ns = tick_nohz_get_sleep_length(&delta_tick); cpu_data->sleep_length_ns = duration_ns; /* * If the tick has been stopped and the closest timer is too far away, * update the selection to prevent the CPU from getting stuck in a * shallow idle state for too long. */ if (tick_nohz_tick_stopped() && duration_ns > SAFE_TIMER_RANGE_NS && drv->states[idx].target_residency_ns < TICK_NSEC) { /* * Look for the deepest enabled idle state with exit latency * within the PM QoS limit and with target residency within * duration_ns. */ for (i = constraint_idx; i > idx; i--) { if (dev->states_usage[i].disable) continue; if (drv->states[i].target_residency_ns <= duration_ns) { idx = i; break; } } return idx; } if (!idx) goto out_tick; Loading
drivers/idle/intel_idle.c +42 −0 Original line number Diff line number Diff line Loading @@ -983,6 +983,43 @@ static struct cpuidle_state mtl_l_cstates[] __initdata = { .enter = NULL } }; static struct cpuidle_state ptl_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C1E", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 10, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C6S", .desc = "MWAIT 0x21", .flags = MWAIT2flg(0x21) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 300, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .name = "C10", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 370, .target_residency = 2500, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { .enter = NULL } }; static struct cpuidle_state gmt_cstates[] __initdata = { { .name = "C1", Loading Loading @@ -1561,6 +1598,10 @@ static const struct idle_cpu idle_cpu_mtl_l __initconst = { .state_table = mtl_l_cstates, }; static const struct idle_cpu idle_cpu_ptl __initconst = { .state_table = ptl_cstates, }; static const struct idle_cpu idle_cpu_gmt __initconst = { .state_table = gmt_cstates, }; Loading Loading @@ -1669,6 +1710,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_VFM(INTEL_ALDERLAKE, &idle_cpu_adl), X86_MATCH_VFM(INTEL_ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_VFM(INTEL_METEORLAKE_L, &idle_cpu_mtl_l), X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &idle_cpu_ptl), X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &idle_cpu_spr), Loading