PM: EM: Optimize em_cpu_energy() and remove division (1b600da5) · Commits · git / linux-net

include/linux/energy_model.h

+15 −40

Original line number	Diff line number	Diff line
		@@ -115,27 +115,6 @@ struct em_perf_domain {
		#define EM_MAX_NUM_CPUS 16
		#endif

		/*
		* To avoid an overflow on 32bit machines while calculating the energy
		* use a different order in the operation. First divide by the 'cpu_scale'
		* which would reduce big value stored in the 'cost' field, then multiply by
		* the 'sum_util'. This would allow to handle existing platforms, which have
		* e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts.
		* In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util'
		* could be 4096, then multiplication: 'cost' * 'sum_util' would overflow.
		* This reordering of operations has some limitations, we lose small
		* precision in the estimation (comparing to 64bit platform w/o reordering).
		*
		* We are safe on 64bit machine.
		*/
		#ifdef CONFIG_64BIT
		#define em_estimate_energy(cost, sum_util, scale_cpu) \
		(((cost) * (sum_util)) / (scale_cpu))
		#else
		#define em_estimate_energy(cost, sum_util, scale_cpu) \
		(((cost) / (scale_cpu)) * (sum_util))
		#endif

		struct em_data_callback {
		/**
		* active_power() - Provide power at the next performance state of
		@@ -249,8 +228,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		{
		struct em_perf_table *em_table;
		struct em_perf_state *ps;
		unsigned long scale_cpu;
		int cpu, i;
		int i;

		#ifdef CONFIG_SCHED_DEBUG
		WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
		@@ -267,9 +245,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		* max utilization to the allowed CPU capacity before calculating
		* effective performance.
		*/
		cpu = cpumask_first(to_cpumask(pd->cpus));
		scale_cpu = arch_scale_cpu_capacity(cpu);

		max_util = map_util_perf(max_util);
		max_util = min(max_util, allowed_cpu_cap);

		/*
		@@ -282,11 +258,11 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		ps = &em_table->state[i];

		/*
		* The capacity of a CPU in the domain at the performance state (ps)
		* can be computed as:
		* The performance (capacity) of a CPU in the domain at the performance
		* state (ps) can be computed as:
		*
		* ps->freq * scale_cpu
		* ps->cap = -------------------- (1)
		* ps->performance = -------------------- (1)
		* cpu_max_freq
		*
		* So, ignoring the costs of idle states (which are not available in
		@@ -295,9 +271,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		*
		* ps->power * cpu_util
		* cpu_nrg = -------------------- (2)
		* ps->cap
		* ps->performance
		*
		* since 'cpu_util / ps->cap' represents its percentage of busy time.
		* since 'cpu_util / ps->performance' represents its percentage of busy
		* time.
		*
		* NOTE: Although the result of this computation actually is in
		* units of power, it can be manipulated as an energy value
		@@ -307,9 +284,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
		* of two terms:
		*
		* ps->power * cpu_max_freq cpu_util
		* cpu_nrg = ------------------------ * --------- (3)
		* ps->freq scale_cpu
		* ps->power * cpu_max_freq
		* cpu_nrg = ------------------------ * cpu_util (3)
		* ps->freq * scale_cpu
		*
		* The first term is static, and is stored in the em_perf_state struct
		* as 'ps->cost'.
		@@ -319,11 +296,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
		* total energy of the domain (which is the simple sum of the energy of
		* all of its CPUs) can be factorized as:
		*
		* ps->cost * \Sum cpu_util
		* pd_nrg = ------------------------ (4)
		* scale_cpu
		* pd_nrg = ps->cost * \Sum cpu_util (4)
		*/
		return em_estimate_energy(ps->cost, sum_util, scale_cpu);
		return ps->cost * sum_util;
		}

		/**

kernel/power/energy_model.c

+3 −4

Original line number	Diff line number	Diff line
		@@ -192,11 +192,9 @@ static int em_compute_costs(struct device dev, struct em_perf_state table,
		unsigned long flags)
		{
		unsigned long prev_cost = ULONG_MAX;
		u64 fmax;
		int i, ret;

		/* Compute the cost of each performance state. */
		fmax = (u64) table[nr_states - 1].frequency;
		for (i = nr_states - 1; i >= 0; i--) {
		unsigned long power_res, cost;

		@@ -208,8 +206,9 @@ static int em_compute_costs(struct device dev, struct em_perf_state table,
		return -EINVAL;
		}
		} else {
		power_res = table[i].power;
		cost = div64_u64(fmax * power_res, table[i].frequency);
		/* increase resolution of 'cost' precision */
		power_res = table[i].power * 10;
		cost = power_res / table[i].performance;
		}

		table[i].cost = cost;