Skip to content

Commit 7983a0b

Browse files
committed
Merge back earlier cpufreq material for 6.15
2 parents f96d92f + ed7cad0 commit 7983a0b

File tree

4 files changed

+47
-31
lines changed

4 files changed

+47
-31
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2316,6 +2316,9 @@
23162316
per_cpu_perf_limits
23172317
Allow per-logical-CPU P-State performance control limits using
23182318
cpufreq sysfs interface
2319+
no_cas
2320+
Do not enable capacity-aware scheduling (CAS) on
2321+
hybrid systems
23192322

23202323
intremap= [X86-64,Intel-IOMMU,EARLY]
23212324
on enable Interrupt Remapping (default)

Documentation/admin-guide/pm/intel_pstate.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,9 @@ of them have to be prepended with the ``intel_pstate=`` prefix.
696696
Use per-logical-CPU P-State limits (see `Coordination of P-state
697697
Limits`_ for details).
698698

699+
``no_cas``
700+
Do not enable capacity-aware scheduling (CAS) which is enabled by
701+
default on hybrid systems.
699702

700703
Diagnostics and Tuning
701704
======================

drivers/cpufreq/cpufreq_governor.c

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
145145
time_elapsed = update_time - j_cdbs->prev_update_time;
146146
j_cdbs->prev_update_time = update_time;
147147

148-
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
148+
/*
149+
* cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if
150+
* it's obtained from get_cpu_idle_time_jiffy() when NOHZ is
151+
* off, where idle_time is calculated by the difference between
152+
* time elapsed in jiffies and "busy time" obtained from CPU
153+
* statistics. If a CPU is 100% busy, the time elapsed and busy
154+
* time should grow with the same amount in two consecutive
155+
* samples, but in practice there could be a tiny difference,
156+
* making the accumulated idle time decrease sometimes. Hence,
157+
* in this case, idle_time should be regarded as 0 in order to
158+
* make the further process correct.
159+
*/
160+
if (cur_idle_time > j_cdbs->prev_cpu_idle)
161+
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
162+
else
163+
idle_time = 0;
164+
149165
j_cdbs->prev_cpu_idle = cur_idle_time;
150166

151167
if (ignore_nice) {
@@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
162178
* calls, so the previous load value can be used then.
163179
*/
164180
load = j_cdbs->prev_load;
165-
} else if (unlikely((int)idle_time > 2 * sampling_rate &&
181+
} else if (unlikely(idle_time > 2 * sampling_rate &&
166182
j_cdbs->prev_load)) {
167183
/*
168184
* If the CPU had gone completely idle and a task has
@@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
189205
load = j_cdbs->prev_load;
190206
j_cdbs->prev_load = 0;
191207
} else {
192-
if (time_elapsed >= idle_time) {
208+
if (time_elapsed > idle_time)
193209
load = 100 * (time_elapsed - idle_time) / time_elapsed;
194-
} else {
195-
/*
196-
* That can happen if idle_time is returned by
197-
* get_cpu_idle_time_jiffy(). In that case
198-
* idle_time is roughly equal to the difference
199-
* between time_elapsed and "busy time" obtained
200-
* from CPU statistics. Then, the "busy time"
201-
* can end up being greater than time_elapsed
202-
* (for example, if jiffies_64 and the CPU
203-
* statistics are updated by different CPUs),
204-
* so idle_time may in fact be negative. That
205-
* means, though, that the CPU was busy all
206-
* the time (on the rough average) during the
207-
* last sampling interval and 100 can be
208-
* returned as the load.
209-
*/
210-
load = (int)idle_time < 0 ? 100 : 0;
211-
}
210+
else
211+
load = 0;
212+
212213
j_cdbs->prev_load = load;
213214
}
214215

215-
if (unlikely((int)idle_time > 2 * sampling_rate)) {
216+
if (unlikely(idle_time > 2 * sampling_rate)) {
216217
unsigned int periods = idle_time / sampling_rate;
217218

218219
if (periods < idle_periods)

drivers/cpufreq/intel_pstate.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,8 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
936936
NULL,
937937
};
938938

939+
static bool no_cas __ro_after_init;
940+
939941
static struct cpudata *hybrid_max_perf_cpu __read_mostly;
940942
/*
941943
* Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
@@ -1041,6 +1043,10 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
10411043

10421044
static void hybrid_init_cpu_capacity_scaling(bool refresh)
10431045
{
1046+
/* Bail out if enabling capacity-aware scheduling is prohibited. */
1047+
if (no_cas)
1048+
return;
1049+
10441050
/*
10451051
* If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
10461052
* scaling has been enabled already and the driver is just changing the
@@ -3688,6 +3694,15 @@ static int __init intel_pstate_init(void)
36883694
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
36893695
return -ENODEV;
36903696

3697+
/*
3698+
* The Intel pstate driver will be ignored if the platform
3699+
* firmware has its own power management modes.
3700+
*/
3701+
if (intel_pstate_platform_pwr_mgmt_exists()) {
3702+
pr_info("P-states controlled by the platform\n");
3703+
return -ENODEV;
3704+
}
3705+
36913706
id = x86_match_cpu(hwp_support_ids);
36923707
if (id) {
36933708
hwp_forced = intel_pstate_hwp_is_enabled();
@@ -3743,15 +3758,6 @@ static int __init intel_pstate_init(void)
37433758
default_driver = &intel_cpufreq;
37443759

37453760
hwp_cpu_matched:
3746-
/*
3747-
* The Intel pstate driver will be ignored if the platform
3748-
* firmware has its own power management modes.
3749-
*/
3750-
if (intel_pstate_platform_pwr_mgmt_exists()) {
3751-
pr_info("P-states controlled by the platform\n");
3752-
return -ENODEV;
3753-
}
3754-
37553761
if (!hwp_active && hwp_only)
37563762
return -ENOTSUPP;
37573763

@@ -3835,6 +3841,9 @@ static int __init intel_pstate_setup(char *str)
38353841
if (!strcmp(str, "no_hwp"))
38363842
no_hwp = 1;
38373843

3844+
if (!strcmp(str, "no_cas"))
3845+
no_cas = true;
3846+
38383847
if (!strcmp(str, "force"))
38393848
force_load = 1;
38403849
if (!strcmp(str, "hwp_only"))

0 commit comments

Comments
 (0)