Skip to content

Commit cb64c51

Browse files
committed
Merge tag 'pm-6.15-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki: "These are mostly cpufreq fixes, some of which address recent regressions and some address older issues that have come to light during the last two weeks, and a runtime PM documentation correction: - Fix the performance-to-frequency scaling factor computation on systems using HWP in the intel_pstate driver after a recent incorrect update of it (Rafael Wysocki) - Fix the usage of the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil cpufreq governor after a recent update of it that has caused frequency limits changes to be missed sometimes (Rafael Wysocki) - Address some recently discovered synchronization issues related to frequency limits changes in the schedutil cpufreq governor and in the cpufreq core (Rafael Wysocki) - Fix ITMT support in the amd-pstate cpufreq driver so that it is enabled after asym priorities have been correctly initialized for all CPUs (K Prateek Nayak) - Fix changing min/max limits in the amd-pstate cpufreq driver while on the performance governor (Dhananjay Ugwekar) - Fix a function name in the runtime PM documentation that was previously incorrectly updated by mistake (Sakari Ailus)" * tag 'pm-6.15-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: Avoid using inconsistent policy->min and policy->max cpufreq/sched: Set need_freq_update in ignore_dl_rate_limit() cpufreq/sched: Explicitly synchronize limits_changed flag handling cpufreq/sched: Fix the usage of CPUFREQ_NEED_UPDATE_LIMITS Documentation: PM: runtime: Fix a reference to pm_runtime_autosuspend() cpufreq: intel_pstate: Fix hwp_get_cpu_scaling() cpufreq/amd-pstate: Enable ITMT support after initializing core rankings cpufreq/amd-pstate: Fix min_limit perf and freq updation for performance governor
2 parents 4b82886 + f3b25a1 commit cb64c51

File tree

5 files changed

+84
-37
lines changed

5 files changed

+84
-37
lines changed

Documentation/power/runtime_pm.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the
154154
device in that case. If there is no idle callback, or if the callback returns
155155
0, then the PM core will attempt to carry out a runtime suspend of the device,
156156
also respecting devices configured for autosuspend. In essence this means a
157-
call to __pm_runtime_autosuspend() (do note that drivers needs to update the
157+
call to pm_runtime_autosuspend() (do note that drivers needs to update the
158158
device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
159159
this circumstance). To prevent this (for example, if the callback routine has
160160
started a delayed suspend), the routine must return a non-zero value. Negative

drivers/cpufreq/amd-pstate.c

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
607607
union perf_cached perf = READ_ONCE(cpudata->perf);
608608

609609
perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
610-
perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
610+
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
611611

612-
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
612+
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
613613
perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);
614+
WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq));
615+
} else {
616+
perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
617+
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
618+
}
614619

615-
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
616-
WRITE_ONCE(cpudata->min_limit_freq, policy->min);
617620
WRITE_ONCE(cpudata->perf, perf);
618621
}
619622

@@ -791,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu)
791794
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
792795
}
793796

794-
/*
795-
* Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
796-
* due to locking, so queue the work for later.
797-
*/
798-
static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
799-
{
800-
sched_set_itmt_support();
801-
}
802-
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
803-
804797
#define CPPC_MAX_PERF U8_MAX
805798

806799
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
@@ -811,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
811804

812805
cpudata->hw_prefcore = true;
813806

814-
/*
815-
* The priorities can be set regardless of whether or not
816-
* sched_set_itmt_support(true) has been called and it is valid to
817-
* update them at any time after it has been called.
818-
*/
807+
/* Priorities must be initialized before ITMT support can be toggled on. */
819808
sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
820-
821-
schedule_work(&sched_prefcore_work);
822809
}
823810

824811
static void amd_pstate_update_limits(unsigned int cpu)
@@ -1193,6 +1180,9 @@ static ssize_t show_energy_performance_preference(
11931180

11941181
static void amd_pstate_driver_cleanup(void)
11951182
{
1183+
if (amd_pstate_prefcore)
1184+
sched_clear_itmt_support();
1185+
11961186
cppc_state = AMD_PSTATE_DISABLE;
11971187
current_pstate_driver = NULL;
11981188
}
@@ -1235,6 +1225,10 @@ static int amd_pstate_register_driver(int mode)
12351225
return ret;
12361226
}
12371227

1228+
/* Enable ITMT support once all CPUs have initialized their asym priorities. */
1229+
if (amd_pstate_prefcore)
1230+
sched_set_itmt_support();
1231+
12381232
return 0;
12391233
}
12401234

drivers/cpufreq/cpufreq.c

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
540540
{
541541
unsigned int idx;
542542

543-
target_freq = clamp_val(target_freq, policy->min, policy->max);
544-
545543
if (!policy->freq_table)
546544
return target_freq;
547545

@@ -565,7 +563,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
565563
unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
566564
unsigned int target_freq)
567565
{
568-
return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
566+
unsigned int min = READ_ONCE(policy->min);
567+
unsigned int max = READ_ONCE(policy->max);
568+
569+
/*
570+
* If this function runs in parallel with cpufreq_set_policy(), it may
571+
* read policy->min before the update and policy->max after the update
572+
* or the other way around, so there is no ordering guarantee.
573+
*
574+
* Resolve this by always honoring the max (in case it comes from
575+
* thermal throttling or similar).
576+
*/
577+
if (unlikely(min > max))
578+
min = max;
579+
580+
return __resolve_freq(policy, clamp_val(target_freq, min, max),
581+
CPUFREQ_RELATION_LE);
569582
}
570583
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
571584

@@ -2384,6 +2397,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
23842397
if (cpufreq_disabled())
23852398
return -ENODEV;
23862399

2400+
target_freq = clamp_val(target_freq, policy->min, policy->max);
23872401
target_freq = __resolve_freq(policy, target_freq, relation);
23882402

23892403
pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
@@ -2708,11 +2722,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
27082722
* Resolve policy min/max to available frequencies. It ensures
27092723
* no frequency resolution will neither overshoot the requested maximum
27102724
* nor undershoot the requested minimum.
2725+
*
2726+
* Avoid storing intermediate values in policy->max or policy->min and
2727+
* compiler optimizations around them because they may be accessed
2728+
* concurrently by cpufreq_driver_resolve_freq() during the update.
27112729
*/
2712-
policy->min = new_data.min;
2713-
policy->max = new_data.max;
2714-
policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
2715-
policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
2730+
WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H));
2731+
new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L);
2732+
WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min);
2733+
27162734
trace_cpu_frequency_limits(policy);
27172735

27182736
cpufreq_update_pressure(policy);

drivers/cpufreq/intel_pstate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2209,7 +2209,7 @@ static int knl_get_turbo_pstate(int cpu)
22092209
static int hwp_get_cpu_scaling(int cpu)
22102210
{
22112211
if (hybrid_scaling_factor) {
2212-
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
2212+
struct cpuinfo_x86 *c = &cpu_data(cpu);
22132213
u8 cpu_type = c->topo.intel_type;
22142214

22152215
/*

kernel/sched/cpufreq_schedutil.c

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,23 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
8181
if (!cpufreq_this_cpu_can_update(sg_policy->policy))
8282
return false;
8383

84-
if (unlikely(sg_policy->limits_changed)) {
85-
sg_policy->limits_changed = false;
86-
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
84+
if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
85+
WRITE_ONCE(sg_policy->limits_changed, false);
86+
sg_policy->need_freq_update = true;
87+
88+
/*
89+
* The above limits_changed update must occur before the reads
90+
* of policy limits in cpufreq_driver_resolve_freq() or a policy
91+
* limits update might be missed, so use a memory barrier to
92+
* ensure it.
93+
*
94+
* This pairs with the write memory barrier in sugov_limits().
95+
*/
96+
smp_mb();
97+
98+
return true;
99+
} else if (sg_policy->need_freq_update) {
100+
/* ignore_dl_rate_limit() wants a new frequency to be found. */
87101
return true;
88102
}
89103

@@ -95,10 +109,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
95109
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
96110
unsigned int next_freq)
97111
{
98-
if (sg_policy->need_freq_update)
112+
if (sg_policy->need_freq_update) {
99113
sg_policy->need_freq_update = false;
100-
else if (sg_policy->next_freq == next_freq)
114+
/*
115+
* The policy limits have changed, but if the return value of
116+
* cpufreq_driver_resolve_freq() after applying the new limits
117+
* is still equal to the previously selected frequency, the
118+
* driver callback need not be invoked unless the driver
119+
* specifically wants that to happen on every update of the
120+
* policy limits.
121+
*/
122+
if (sg_policy->next_freq == next_freq &&
123+
!cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
124+
return false;
125+
} else if (sg_policy->next_freq == next_freq) {
101126
return false;
127+
}
102128

103129
sg_policy->next_freq = next_freq;
104130
sg_policy->last_freq_update_time = time;
@@ -365,7 +391,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; }
365391
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
366392
{
367393
if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
368-
sg_cpu->sg_policy->limits_changed = true;
394+
sg_cpu->sg_policy->need_freq_update = true;
369395
}
370396

371397
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
@@ -871,7 +897,16 @@ static void sugov_limits(struct cpufreq_policy *policy)
871897
mutex_unlock(&sg_policy->work_lock);
872898
}
873899

874-
sg_policy->limits_changed = true;
900+
/*
901+
* The limits_changed update below must take place before the updates
902+
* of policy limits in cpufreq_set_policy() or a policy limits update
903+
* might be missed, so use a memory barrier to ensure it.
904+
*
905+
* This pairs with the memory barrier in sugov_should_update_freq().
906+
*/
907+
smp_wmb();
908+
909+
WRITE_ONCE(sg_policy->limits_changed, true);
875910
}
876911

877912
struct cpufreq_governor schedutil_gov = {

0 commit comments

Comments
 (0)