Commit a2b4d0f8 authored by Len Brown's avatar Len Brown
Browse files

tools/power turbostat: Favor cpu# over core#



Turbostat collects statistics and outputs results in "topology order",
which means it prioritizes the core# over the cpu#.
The strategy is to minimize wakesups to a core -- which is
important when measuring an idle system.

But core order is problematic, because Linux core#'s are physical
(within each package), and thus subject to APIC-id scrambling
that may be done by the hardware or the BIOS.

As a result users may be are faced with rows in a confusing order:

sudo turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1
Core	CPU	Busy%	CPU%c6	UncMHz
-	-	1.25	72.18	3400
0	4	7.74	0.00
1	5	1.77	88.59
2	6	0.48	96.73
3	7	0.21	98.34
4	8	0.14	96.85
5	9	0.26	97.55
6	10	0.44	97.24
7	11	0.12	96.18
8	0	5.41	0.31	3400
8	1	0.19
12	2	0.41	0.22
12	3	0.08
32	12	0.04	99.21
33	13	0.25	94.92

Abandon the legacy "core# topology order" in favor of simply
ordering by cpu#, with a special case to handle HT siblings
that may not have adjacent cpu#'s.

sudo ./turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1
1.003001 sec
Core	CPU	Busy%	CPU%c6	UncMHz
-	-	1.38	80.55	1600
8	0	10.94	0.00	1600
8	1	0.53
12	2	2.90	0.45
12	3	0.11
0	4	1.96	91.20
1	5	0.97	96.40
2	6	0.24	94.72
3	7	0.31	98.01
4	8	0.20	98.20
5	9	0.62	96.00
6	10	0.06	98.15
7	11	0.12	99.31
32	12	0.04	99.07
33	13	0.27	95.09

The result is that cpu#'s now take precedence over core#'s.

Signed-off-by: default avatarLen Brown <len.brown@intel.com>
parent 6be5c151
Loading
Loading
Loading
Loading
+69 −54
Original line number Diff line number Diff line
@@ -2187,20 +2187,6 @@ struct pkg_data {
#define ODD_COUNTERS odd.threads, odd.cores, odd.packages
#define EVEN_COUNTERS even.threads, even.cores, even.packages

#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
	((thread_base) +						      \
	 ((pkg_no) *							      \
	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
	 ((core_no) * topo.threads_per_core) +				      \
	 (thread_no))

#define GET_CORE(core_base, core_no, node_no, pkg_no)			\
	((core_base) +							\
	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
	 ((node_no) * topo.cores_per_node) +				\
	 (core_no))

/*
 * The accumulated sum of MSR is defined as a monotonic
 * increasing MSR, it will be accumulated periodically,
@@ -2392,6 +2378,8 @@ struct platform_counters {
	struct rapl_counter energy_psys;	/* MSR_PLATFORM_ENERGY_STATUS */
} platform_counters_odd, platform_counters_even;

#define	MAX_HT_ID	3	/* support SMT-4 */

struct cpu_topology {
	int cpu_id;
	int core_id;		/* unique within a package */
@@ -2401,7 +2389,7 @@ struct cpu_topology {
	int physical_node_id;
	int logical_node_id;	/* 0-based count within the package */
	int ht_id;		/* unique within a core */
	int ht_sibling_cpu_id;
	int ht_sibling_cpu_id[MAX_HT_ID + 1];
	int type;
	cpu_set_t *put_ids;	/* Processing Unit/Thread IDs */
} *cpus;
@@ -2458,27 +2446,38 @@ int cpu_is_not_allowed(int cpu)
int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
		 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
{
	int retval, pkg_no, core_no, thread_no, node_no;
	int cpu, retval;

	retval = 0;

	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
		struct thread_data *t;
		struct core_data *c;
		struct pkg_data *p;

					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
		int pkg_id = cpus[cpu].package_id;

					if (cpu_is_not_allowed(t->cpu_id))
		if (cpu_is_not_allowed(cpu))
			continue;

					c = GET_CORE(core_base, core_no, node_no, pkg_no);
		if (cpus[cpu].ht_id > 0)	/* skip HT sibling */
			continue;

					retval |= func(t, c, &pkg_base[pkg_no]);
				}
			}
		t = &thread_base[cpu];
		c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, pkg_id)];
		p = &pkg_base[pkg_id];

		retval |= func(t, c, p);

		/* Handle HT sibling now */
		int i;

		for (i = MAX_HT_ID; i > 0; --i) {	/* ht_id 0 is self */
			if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
				continue;
			t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];

			retval |= func(t, c, p);
		}
	}
	return retval;
@@ -6168,7 +6167,7 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
	return 0;
}

int get_thread_siblings(struct cpu_topology *thiscpu)
int set_thread_siblings(struct cpu_topology *thiscpu)
{
	char path[80], character;
	FILE *filep;
@@ -6206,8 +6205,11 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
				if (sib_core == thiscpu->core_id) {
					CPU_SET_S(so, size, thiscpu->put_ids);
					if ((so != cpu) && (cpus[so].ht_id < 0)) {
						cpus[so].ht_id = thread_id++;
						cpus[cpu].ht_sibling_cpu_id = so;
						cpus[so].ht_id = thread_id;
						cpus[cpu].ht_sibling_cpu_id[thread_id] = so;
						if (debug)
							fprintf(stderr, "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n", __func__, cpu, thread_id, so);
						thread_id += 1;
					}
				}
			}
@@ -6229,30 +6231,40 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
		   struct core_data *core_base, struct pkg_data *pkg_base,
		   struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
{
	int retval, pkg_no, node_no, core_no, thread_no;
	int cpu, retval;

	retval = 0;

	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
		struct thread_data *t, *t2;
		struct core_data *c, *c2;
		struct pkg_data *p, *p2;

					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
		if (cpu_is_not_allowed(cpu))
			continue;

					if (cpu_is_not_allowed(t->cpu_id))
		if (cpus[cpu].ht_id > 0)	/* skip HT sibling */
			continue;

					t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
		t = &thread_base[cpu];
		t2 = &thread_base2[cpu];
		c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
		c2 = &core_base2[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
		p = &pkg_base[cpus[cpu].package_id];
		p2 = &pkg_base2[cpus[cpu].package_id];

					c = GET_CORE(core_base, core_no, node_no, pkg_no);
					c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
		retval |= func(t, c, p, t2, c2, p2);

					retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]);
				}
			}
		/* Handle HT sibling now */
		int i;

		for (i = MAX_HT_ID; i > 0; --i) {	/* ht_id 0 is self */
			if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
				continue;
			t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];
			t2 = &thread_base2[cpus[cpu].ht_sibling_cpu_id[i]];

			retval |= func(t, c, p, t2, c2, p2);
		}
	}
	return retval;
@@ -6391,10 +6403,13 @@ int mark_cpu_present(int cpu)
	return 0;
}

int init_ht_id(int cpu)
int clear_ht_id(int cpu)
{
	int i;

	cpus[cpu].ht_id = -1;
	cpus[cpu].ht_sibling_cpu_id = -1;
	for (i = 0; i <= MAX_HT_ID; ++i)
		cpus[cpu].ht_sibling_cpu_id[i] = -1;
	return 0;
}

@@ -9579,7 +9594,7 @@ void topology_probe(bool startup)
	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);

	for_all_proc_cpus(init_ht_id);
	for_all_proc_cpus(clear_ht_id);

	for_all_proc_cpus(set_cpu_hybrid_type);

@@ -9624,7 +9639,7 @@ void topology_probe(bool startup)
			max_core_id = cpus[i].core_id;

		/* get thread information */
		siblings = get_thread_siblings(&cpus[i]);
		siblings = set_thread_siblings(&cpus[i]);
		if (siblings > max_siblings)
			max_siblings = siblings;
		if (cpus[i].ht_id == 0)
@@ -9748,8 +9763,8 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
	if (node_id < 0)
		node_id = 0;

	t = GET_THREAD(thread_base, cpus[cpu_id].ht_id, core_id, node_id, pkg_id);
	c = GET_CORE(core_base, core_id, node_id, pkg_id);
	t = &thread_base[cpu_id];
	c = &core_base[GLOBAL_CORE_ID(core_id, pkg_id)];

	t->cpu_id = cpu_id;
	if (!cpu_is_not_allowed(cpu_id)) {