Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 3ccf388

Browse files
lukaszluba-armCarlos Llamas
authored andcommitted
UPSTREAM: PM: EM: Add min/max available performance state limits
On some devices there are HW dependencies for shared frequency and voltage between devices. It will impact Energy Aware Scheduler (EAS) decision, where CPUs share the voltage & frequency domain with other CPUs or devices e.g. - Mid CPUs + Big CPU - Little CPU + L3 cache in DSU - some other device + Little CPUs Detailed explanation of one example: When the L3 cache frequency is increased, the affected Little CPUs might run at higher voltage and frequency. That higher voltage causes higher CPU power and thus more energy is used for running the tasks. This is important for background running tasks, which try to run on energy efficient CPUs. Therefore, add performance state limits which are applied for the device (in this case CPU). This is important on SoCs with HW dependencies mentioned above so that the Energy Aware Scheduler (EAS) does not use performance states outside the valid min-max range for energy calculation. (cherry picked from commit 5609296) Change-Id: Ib6fcbe28698d49b3454eef765549fed7d9629881 Signed-off-by: Lukasz Luba <lukasz.luba@arm.com> Link: https://patch.msgid.link/20241030164126.1263793-2-lukasz.luba@arm.com Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent bc95a02 commit 3ccf388

File tree

2 files changed

+73
-8
lines changed

2 files changed

+73
-8
lines changed

include/linux/energy_model.h

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ struct em_perf_table {
5555
* struct em_perf_domain - Performance domain
5656
* @em_table: Pointer to the runtime modifiable em_perf_table
5757
* @nr_perf_states: Number of performance states
58+
* @min_perf_state: Minimum allowed Performance State index
59+
* @max_perf_state: Maximum allowed Performance State index
5860
* @flags: See "em_perf_domain flags"
5961
* @cpus: Cpumask covering the CPUs of the domain. It's here
6062
* for performance reasons to avoid potential cache
@@ -70,6 +72,8 @@ struct em_perf_table {
7072
struct em_perf_domain {
7173
struct em_perf_table __rcu *em_table;
7274
int nr_perf_states;
75+
int min_perf_state;
76+
int max_perf_state;
7377
unsigned long flags;
7478
unsigned long cpus[];
7579
};
@@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table);
173177
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
174178
int nr_states);
175179
int em_dev_update_chip_binning(struct device *dev);
180+
int em_update_performance_limits(struct em_perf_domain *pd,
181+
unsigned long freq_min_khz, unsigned long freq_max_khz);
176182

177183
/**
178184
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
179185
* @table: List of performance states, in ascending order
180-
* @nr_perf_states: Number of performance states
186+
* @pd: performance domain for which this must be done
181187
* @max_util: Max utilization to map with the EM
182-
* @pd_flags: Performance Domain flags
183188
*
184189
* It is called from the scheduler code quite frequently and as a consequence
185190
* doesn't implement any check.
@@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev);
188193
* requirement.
189194
*/
190195
static inline int
191-
em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
192-
unsigned long max_util, unsigned long pd_flags)
196+
em_pd_get_efficient_state(struct em_perf_state *table,
197+
struct em_perf_domain *pd, unsigned long max_util)
193198
{
199+
unsigned long pd_flags = pd->flags;
200+
int min_ps = pd->min_perf_state;
201+
int max_ps = pd->max_perf_state;
194202
struct em_perf_state *ps;
195203
int i;
196204

197-
for (i = 0; i < nr_perf_states; i++) {
205+
for (i = min_ps; i <= max_ps; i++) {
198206
ps = &table[i];
199207
if (ps->performance >= max_util) {
200208
if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
@@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
204212
}
205213
}
206214

207-
return nr_perf_states - 1;
215+
return max_ps;
208216
}
209217

210218
/**
@@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
253261
* requested performance.
254262
*/
255263
em_table = rcu_dereference(pd->em_table);
256-
i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
257-
max_util, pd->flags);
264+
i = em_pd_get_efficient_state(em_table->state, pd, max_util);
258265
ps = &em_table->state[i];
259266

260267
/*
@@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev)
391398
{
392399
return -EINVAL;
393400
}
401+
static inline
402+
int em_update_performance_limits(struct em_perf_domain *pd,
403+
unsigned long freq_min_khz, unsigned long freq_max_khz)
404+
{
405+
return -EINVAL;
406+
}
394407
#endif
395408

396409
#endif

kernel/power/energy_model.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
628628
goto unlock;
629629

630630
dev->em_pd->flags |= flags;
631+
dev->em_pd->min_perf_state = 0;
632+
dev->em_pd->max_perf_state = nr_states - 1;
631633

632634
em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
633635

@@ -856,3 +858,53 @@ int em_dev_update_chip_binning(struct device *dev)
856858
return em_recalc_and_update(dev, pd, em_table);
857859
}
858860
EXPORT_SYMBOL_GPL(em_dev_update_chip_binning);
861+
862+
863+
/**
864+
* em_update_performance_limits() - Update Energy Model with performance
865+
* limits information.
866+
* @pd : Performance Domain with EM that has to be updated.
867+
* @freq_min_khz : New minimum allowed frequency for this device.
868+
* @freq_max_khz : New maximum allowed frequency for this device.
869+
*
870+
* This function allows to update the EM with information about available
871+
* performance levels. It takes the minimum and maximum frequency in kHz
872+
* and does internal translation to performance levels.
873+
* Returns 0 on success or -EINVAL when failed.
874+
*/
875+
int em_update_performance_limits(struct em_perf_domain *pd,
876+
unsigned long freq_min_khz, unsigned long freq_max_khz)
877+
{
878+
struct em_perf_state *table;
879+
int min_ps = -1;
880+
int max_ps = -1;
881+
int i;
882+
883+
if (!pd)
884+
return -EINVAL;
885+
886+
rcu_read_lock();
887+
table = em_perf_state_from_pd(pd);
888+
889+
for (i = 0; i < pd->nr_perf_states; i++) {
890+
if (freq_min_khz == table[i].frequency)
891+
min_ps = i;
892+
if (freq_max_khz == table[i].frequency)
893+
max_ps = i;
894+
}
895+
rcu_read_unlock();
896+
897+
/* Only update when both are found and sane */
898+
if (min_ps < 0 || max_ps < 0 || max_ps < min_ps)
899+
return -EINVAL;
900+
901+
902+
/* Guard simultaneous updates and make them atomic */
903+
mutex_lock(&em_pd_mutex);
904+
pd->min_perf_state = min_ps;
905+
pd->max_perf_state = max_ps;
906+
mutex_unlock(&em_pd_mutex);
907+
908+
return 0;
909+
}
910+
EXPORT_SYMBOL_GPL(em_update_performance_limits);

0 commit comments

Comments
 (0)