Skip to content

Commit 5a367f7

Browse files
lukaszluba-armrafaeljw
authored andcommitted
PM: EM: Add performance field to struct em_perf_state and optimize
The performance doesn't scale linearly with the frequency. Also, it may be different in different workloads. Some CPUs are designed to be particularly good at some applications e.g. images or video processing and other CPUs in different. When those different types of CPUs are combined in one SoC they should be properly modeled to get max of the HW in Energy Aware Scheduler (EAS). The Energy Model (EM) provides the power vs. performance curves to the EAS, but assumes the CPUs capacity is fixed and scales linearly with the frequency. This patch allows to adjust the curve on the 'performance' axis as well. Code speed optimization: Removing map_util_freq() allows to avoid one division and one multiplication operations from the EAS hot code path. Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent ee1a198 commit 5a367f7

File tree

2 files changed

+39
-12
lines changed

2 files changed

+39
-12
lines changed

include/linux/energy_model.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
/**
1515
* struct em_perf_state - Performance state of a performance domain
16+
* @performance: CPU performance (capacity) at a given frequency
1617
* @frequency: The frequency in KHz, for consistency with CPUFreq
1718
* @power: The power consumed at this level (by 1 CPU or by a registered
1819
* device). It can be a total power: static and dynamic.
@@ -21,6 +22,7 @@
2122
* @flags: see "em_perf_state flags" description below.
2223
*/
2324
struct em_perf_state {
25+
unsigned long performance;
2426
unsigned long frequency;
2527
unsigned long power;
2628
unsigned long cost;
@@ -196,25 +198,25 @@ void em_table_free(struct em_perf_table __rcu *table);
196198
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
197199
* @table: List of performance states, in ascending order
198200
* @nr_perf_states: Number of performance states
199-
* @freq: Frequency to map with the EM
201+
* @max_util: Max utilization to map with the EM
200202
* @pd_flags: Performance Domain flags
201203
*
202204
* It is called from the scheduler code quite frequently and as a consequence
203205
* doesn't implement any check.
204206
*
205-
* Return: An efficient performance state id, high enough to meet @freq
207+
* Return: An efficient performance state id, high enough to meet @max_util
206208
* requirement.
207209
*/
208210
static inline int
209211
em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
210-
unsigned long freq, unsigned long pd_flags)
212+
unsigned long max_util, unsigned long pd_flags)
211213
{
212214
struct em_perf_state *ps;
213215
int i;
214216

215217
for (i = 0; i < nr_perf_states; i++) {
216218
ps = &table[i];
217-
if (ps->frequency >= freq) {
219+
if (ps->performance >= max_util) {
218220
if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
219221
ps->flags & EM_PERF_STATE_INEFFICIENT)
220222
continue;
@@ -245,9 +247,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
245247
unsigned long max_util, unsigned long sum_util,
246248
unsigned long allowed_cpu_cap)
247249
{
248-
unsigned long freq, ref_freq, scale_cpu;
249250
struct em_perf_table *em_table;
250251
struct em_perf_state *ps;
252+
unsigned long scale_cpu;
251253
int cpu, i;
252254

253255
#ifdef CONFIG_SCHED_DEBUG
@@ -260,25 +262,23 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
260262
/*
261263
* In order to predict the performance state, map the utilization of
262264
* the most utilized CPU of the performance domain to a requested
263-
* frequency, like schedutil. Take also into account that the real
264-
* frequency might be set lower (due to thermal capping). Thus, clamp
265+
* performance, like schedutil. Take also into account that the real
266+
* performance might be set lower (due to thermal capping). Thus, clamp
265267
* max utilization to the allowed CPU capacity before calculating
266-
* effective frequency.
268+
* effective performance.
267269
*/
268270
cpu = cpumask_first(to_cpumask(pd->cpus));
269271
scale_cpu = arch_scale_cpu_capacity(cpu);
270-
ref_freq = arch_scale_freq_ref(cpu);
271272

272273
max_util = min(max_util, allowed_cpu_cap);
273-
freq = map_util_freq(max_util, ref_freq, scale_cpu);
274274

275275
/*
276276
* Find the lowest performance state of the Energy Model above the
277-
* requested frequency.
277+
* requested performance.
278278
*/
279279
em_table = rcu_dereference(pd->em_table);
280280
i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
281-
freq, pd->flags);
281+
max_util, pd->flags);
282282
ps = &em_table->state[i];
283283

284284
/*

kernel/power/energy_model.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
4646
debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
4747
debugfs_create_ulong("power", 0444, d, &ps->power);
4848
debugfs_create_ulong("cost", 0444, d, &ps->cost);
49+
debugfs_create_ulong("performance", 0444, d, &ps->performance);
4950
debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
5051
}
5152

@@ -159,6 +160,30 @@ struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
159160
return table;
160161
}
161162

163+
static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
164+
struct em_perf_state *table, int nr_states)
165+
{
166+
u64 fmax, max_cap;
167+
int i, cpu;
168+
169+
/* This is needed only for CPUs and EAS skip other devices */
170+
if (!_is_cpu_device(dev))
171+
return;
172+
173+
cpu = cpumask_first(em_span_cpus(pd));
174+
175+
/*
176+
* Calculate the performance value for each frequency with
177+
* linear relationship. The final CPU capacity might not be ready at
178+
* boot time, but the EM will be updated a bit later with correct one.
179+
*/
180+
fmax = (u64) table[nr_states - 1].frequency;
181+
max_cap = (u64) arch_scale_cpu_capacity(cpu);
182+
for (i = 0; i < nr_states; i++)
183+
table[i].performance = div64_u64(max_cap * table[i].frequency,
184+
fmax);
185+
}
186+
162187
static int em_compute_costs(struct device *dev, struct em_perf_state *table,
163188
struct em_data_callback *cb, int nr_states,
164189
unsigned long flags)
@@ -318,6 +343,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
318343
table[i].frequency = prev_freq = freq;
319344
}
320345

346+
em_init_performance(dev, pd, table, nr_states);
347+
321348
ret = em_compute_costs(dev, table, cb, nr_states, flags);
322349
if (ret)
323350
return -EINVAL;

0 commit comments

Comments
 (0)