Skip to content

Commit 1201c50

Browse files
committed
Merge branches 'pm-cpuidle' and 'pm-cpufreq'
Merge CPU power management updates for 6.6-rc1: - Rework the menu and teo cpuidle governors to avoid calling tick_nohz_get_sleep_length(), which is likely to become quite expensive going forward, too often and improve making decisions regarding whether or not to stop the scheduler tick in the teo governor (Rafael Wysocki). - Improve the performance of cpufreq_stats_create_table() in some cases (Liao Chang). - Fix two issues in the amd-pstate-ut cpufreq driver (Swapnil Sapkal). - Use clamp() helper macro to improve the code readability in cpufreq_verify_within_limits() (Liao Chang). - Set stale CPU frequency to minimum in intel_pstate (Doug Smythies). * pm-cpuidle: cpuidle: teo: Avoid unnecessary variable assignments cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases cpuidle: teo: Gather statistics regarding whether or not to stop the tick cpuidle: teo: Skip tick_nohz_get_sleep_length() call in some cases cpuidle: teo: Do not call tick_nohz_get_sleep_length() upfront cpuidle: teo: Drop utilized from struct teo_cpu cpuidle: teo: Avoid stopping the tick unnecessarily when bailing out cpuidle: teo: Update idle duration estimate when choosing shallower state * pm-cpufreq: cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver cpufreq: amd-pstate-ut: Remove module parameter access cpufreq: Use clamp() helper macro to improve the code readability cpufreq: intel_pstate: set stale CPU frequency to minimum cpufreq: stats: Improve the performance of cpufreq_stats_create_table()
3 parents e26a99d + 78aabcb + 60dd283 commit 1201c50

File tree

7 files changed

+231
-151
lines changed

7 files changed

+231
-151
lines changed

drivers/cpufreq/amd-pstate-ut.c

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
6464
static bool get_shared_mem(void)
6565
{
6666
bool result = false;
67-
char path[] = "/sys/module/amd_pstate/parameters/shared_mem";
68-
char buf[5] = {0};
69-
struct file *filp = NULL;
70-
loff_t pos = 0;
71-
ssize_t ret;
72-
73-
if (!boot_cpu_has(X86_FEATURE_CPPC)) {
74-
filp = filp_open(path, O_RDONLY, 0);
75-
if (IS_ERR(filp))
76-
pr_err("%s unable to open %s file!\n", __func__, path);
77-
else {
78-
ret = kernel_read(filp, &buf, sizeof(buf), &pos);
79-
if (ret < 0)
80-
pr_err("%s read %s file fail ret=%ld!\n",
81-
__func__, path, (long)ret);
82-
filp_close(filp, NULL);
83-
}
8467

85-
if ('Y' == *buf)
86-
result = true;
87-
}
68+
if (!boot_cpu_has(X86_FEATURE_CPPC))
69+
result = true;
8870

8971
return result;
9072
}
@@ -158,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index)
158140
if (ret) {
159141
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
160142
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
161-
return;
143+
goto skip_test;
162144
}
163145

164146
nominal_perf = cppc_perf.nominal_perf;
@@ -169,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index)
169151
if (ret) {
170152
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
171153
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
172-
return;
154+
goto skip_test;
173155
}
174156

175157
nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
@@ -187,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index)
187169
nominal_perf, cpudata->nominal_perf,
188170
lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
189171
lowest_perf, cpudata->lowest_perf);
190-
return;
172+
goto skip_test;
191173
}
192174

193175
if (!((highest_perf >= nominal_perf) &&
@@ -198,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index)
198180
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
199181
__func__, cpu, highest_perf, nominal_perf,
200182
lowest_nonlinear_perf, lowest_perf);
201-
return;
183+
goto skip_test;
202184
}
185+
cpufreq_cpu_put(policy);
203186
}
204187

205188
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
189+
return;
190+
skip_test:
191+
cpufreq_cpu_put(policy);
206192
}
207193

208194
/*
@@ -230,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index)
230216
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
231217
__func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
232218
cpudata->lowest_nonlinear_freq, cpudata->min_freq);
233-
return;
219+
goto skip_test;
234220
}
235221

236222
if (cpudata->min_freq != policy->min) {
237223
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
238224
pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
239225
__func__, cpu, cpudata->min_freq, policy->min);
240-
return;
226+
goto skip_test;
241227
}
242228

243229
if (cpudata->boost_supported) {
@@ -249,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index)
249235
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
250236
__func__, cpu, policy->max, cpudata->max_freq,
251237
cpudata->nominal_freq);
252-
return;
238+
goto skip_test;
253239
}
254240
} else {
255241
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
256242
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
257-
return;
243+
goto skip_test;
258244
}
245+
cpufreq_cpu_put(policy);
259246
}
260247

261248
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
249+
return;
250+
skip_test:
251+
cpufreq_cpu_put(policy);
262252
}
263253

264254
static int __init amd_pstate_ut_init(void)

drivers/cpufreq/cpufreq_stats.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
243243

244244
/* Find valid-unique entries */
245245
cpufreq_for_each_valid_entry(pos, policy->freq_table)
246-
if (freq_table_get_index(stats, pos->frequency) == -1)
246+
if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED ||
247+
freq_table_get_index(stats, pos->frequency) == -1)
247248
stats->freq_table[i++] = pos->frequency;
248249

249250
stats->state_num = i;

drivers/cpufreq/intel_pstate.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
26092609
intel_pstate_clear_update_util_hook(policy->cpu);
26102610
intel_pstate_hwp_set(policy->cpu);
26112611
}
2612+
/*
2613+
* policy->cur is never updated with the intel_pstate driver, but it
2614+
* is used as a stale frequency value. So, keep it within limits.
2615+
*/
2616+
policy->cur = policy->min;
26122617

26132618
mutex_unlock(&intel_pstate_limits_lock);
26142619

drivers/cpuidle/governors/gov.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
/* Common definitions for cpuidle governors. */
4+
5+
#ifndef __CPUIDLE_GOVERNOR_H
6+
#define __CPUIDLE_GOVERNOR_H
7+
8+
/*
9+
* Idle state target residency threshold used for deciding whether or not to
10+
* check the time till the closest expected timer event.
11+
*/
12+
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
13+
14+
#endif /* __CPUIDLE_GOVERNOR_H */

drivers/cpuidle/governors/menu.c

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include <linux/sched/stat.h>
2020
#include <linux/math64.h>
2121

22+
#include "gov.h"
23+
2224
#define BUCKETS 12
2325
#define INTERVAL_SHIFT 3
2426
#define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
166168
* of points is below a threshold. If it is... then use the
167169
* average of these 8 points as the estimated value.
168170
*/
169-
static unsigned int get_typical_interval(struct menu_device *data,
170-
unsigned int predicted_us)
171+
static unsigned int get_typical_interval(struct menu_device *data)
171172
{
172173
int i, divisor;
173174
unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ static unsigned int get_typical_interval(struct menu_device *data,
195196
}
196197
}
197198

198-
/*
199-
* If the result of the computation is going to be discarded anyway,
200-
* avoid the computation altogether.
201-
*/
202-
if (min >= predicted_us)
199+
if (!max)
203200
return UINT_MAX;
204201

205202
if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
267264
{
268265
struct menu_device *data = this_cpu_ptr(&menu_devices);
269266
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
270-
unsigned int predicted_us;
271267
u64 predicted_ns;
272268
u64 interactivity_req;
273269
unsigned int nr_iowaiters;
@@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
279275
data->needs_update = 0;
280276
}
281277

282-
/* determine the expected residency time, round up */
283-
delta = tick_nohz_get_sleep_length(&delta_tick);
284-
if (unlikely(delta < 0)) {
285-
delta = 0;
286-
delta_tick = 0;
287-
}
288-
data->next_timer_ns = delta;
289-
290278
nr_iowaiters = nr_iowait_cpu(dev->cpu);
291-
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
279+
280+
/* Find the shortest expected idle interval. */
281+
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
282+
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
283+
unsigned int timer_us;
284+
285+
/* Determine the time till the closest timer. */
286+
delta = tick_nohz_get_sleep_length(&delta_tick);
287+
if (unlikely(delta < 0)) {
288+
delta = 0;
289+
delta_tick = 0;
290+
}
291+
292+
data->next_timer_ns = delta;
293+
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
294+
295+
/* Round up the result for half microseconds. */
296+
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
297+
data->next_timer_ns *
298+
data->correction_factor[data->bucket],
299+
RESOLUTION * DECAY * NSEC_PER_USEC);
300+
/* Use the lowest expected idle interval to pick the idle state. */
301+
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
302+
} else {
303+
/*
304+
* Because the next timer event is not going to be determined
305+
* in this case, assume that without the tick the closest timer
306+
* will be in distant future and that the closest tick will occur
307+
* after 1/2 of the tick period.
308+
*/
309+
data->next_timer_ns = KTIME_MAX;
310+
delta_tick = TICK_NSEC / 2;
311+
data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
312+
}
292313

293314
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
294315
((data->next_timer_ns < drv->states[1].target_residency_ns ||
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
303324
return 0;
304325
}
305326

306-
/* Round up the result for half microseconds. */
307-
predicted_us = div_u64(data->next_timer_ns *
308-
data->correction_factor[data->bucket] +
309-
(RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
310-
RESOLUTION * DECAY * NSEC_PER_USEC);
311-
/* Use the lowest expected idle interval to pick the idle state. */
312-
predicted_ns = (u64)min(predicted_us,
313-
get_typical_interval(data, predicted_us)) *
314-
NSEC_PER_USEC;
315-
316327
if (tick_nohz_tick_stopped()) {
317328
/*
318329
* If the tick is already stopped, the cost of possible short

0 commit comments

Comments
 (0)