Commit e8536dd4 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo
Browse files

perf ftrace latency: Introduce --bucket-range to ask for linear bucketing



In addition to showing it exponentially, using log2() to figure out the
histogram index, allow for showing it linearly:

The preexisting more, the default:

  # perf ftrace latency --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -    2 ns |          0 |                                         |
       2 -    4 ns |          0 |                                         |
       4 -    8 ns |          0 |                                         |
       8 -   16 ns |          0 |                                         |
      16 -   32 ns |          0 |                                         |
      32 -   64 ns |          0 |                                         |
      64 -  128 ns |        238 | #                                       |
     128 -  256 ns |       1704 | ##########                              |
     256 -  512 ns |        672 | ###                                     |
     512 - 1024 ns |       4458 | ##########################              |
       1 -    2 us |        677 | ####                                    |
       2 -    4 us |          5 |                                         |
       4 -    8 us |          0 |                                         |
       8 -   16 us |          0 |                                         |
      16 -   32 us |          0 |                                         |
      32 -   64 us |          0 |                                         |
      64 -  128 us |          0 |                                         |
     128 -  256 us |          0 |                                         |
     256 -  512 us |          0 |                                         |
     512 - 1024 us |          0 |                                         |
       1 - ...  ms |          0 |                                         |
  #

The new histogram mode:

  # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -  151 ns |        265 | #                                       |
     151 -  301 ns |       1797 | ###########                             |
     301 -  451 ns |        258 | #                                       |
     451 -  601 ns |        289 | #                                       |
     601 -  751 ns |       2049 | #############                           |
     751 -  901 ns |        967 | ######                                  |
     901 - 1051 ns |        513 | ###                                     |
    1.05 - 1.20 us |        114 |                                         |
    1.20 - 1.35 us |        559 | ###                                     |
    1.35 - 1.50 us |        189 | #                                       |
    1.50 - 1.65 us |        137 |                                         |
    1.65 - 1.80 us |         32 |                                         |
    1.80 - 1.95 us |          2 |                                         |
    1.95 - 2.10 us |          0 |                                         |
    2.10 - 2.25 us |          1 |                                         |
    2.25 - 2.40 us |          1 |                                         |
    2.40 - 2.55 us |          0 |                                         |
    2.55 - 2.70 us |          0 |                                         |
    2.70 - 2.85 us |          0 |                                         |
    2.85 - 3.00 us |          1 |                                         |
    3.00 - ...  us |          4 |                                         |
  #

Co-developed-by: default avatarGabriele Monaco <gmonaco@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org


Signed-off-by: default avatarGabriele Monaco <gmonaco@redhat.com>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 12115c60
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
--use-nsec::
	Use nano-second instead of micro-second as a base unit of the histogram.

--bucket-range=::
	Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.


OPTIONS for 'perf ftrace profile'
---------------------------------
+53 −13
Original line number Diff line number Diff line
@@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
		if (ftrace->use_nsec)
			num *= 1000;

		if (!ftrace->bucket_range) {
			i = log2(num);
			if (i < 0)
				i = 0;
		} else {
			// Less than 1 unit (ms or ns), or, in the future,
			// than the min latency desired.
			i = 0;
			if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
				i = num / ftrace->bucket_range + 1;
		}
		if (i >= NUM_BUCKET)
			i = NUM_BUCKET - 1;

@@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
	       "  DURATION    ", "COUNT", bar_total, "GRAPH");

	bar_len = buckets[0] * bar_total / total;
	printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",

	printf("  %4d - %4d %s | %10d | %.*s%*s |\n",
	       0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");

	for (i = 1; i < NUM_BUCKET - 1; i++) {
		int start = (1 << (i - 1));
		int stop = 1 << i;
		int start, stop;
		const char *unit = use_nsec ? "ns" : "us";

		if (!ftrace->bucket_range) {
			start = (1 << (i - 1));
			stop  = 1 << i;

			if (start >= 1024) {
				start >>= 10;
				stop >>= 10;
				unit = use_nsec ? "us" : "ms";
			}
		} else {
			start = (i - 1) * ftrace->bucket_range + 1;
			stop  = i * ftrace->bucket_range + 1;

			if (start >= 1000) {
				double dstart = start / 1000.0,
				       dstop  = stop / 1000.0;
				printf("  %4.2f - %-4.2f", dstart, dstop);
				unit = use_nsec ? "us" : "ms";
				goto print_bucket_info;
			}
		}

		printf("  %4d - %4d", start, stop);
print_bucket_info:
		bar_len = buckets[i] * bar_total / total;
		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
		       start, stop, unit, buckets[i], bar_len, bar,
		printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
		       bar_total - bar_len, "");
	}

	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
	if (!ftrace->bucket_range) {
		printf("  %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
	} else {
		int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;

		if (upper_outlier >= 1000) {
			double dstart = upper_outlier / 1000.0;

			printf("  %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
		} else {
			printf("  %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
		}
	}
	printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
	       bar_len, bar, bar_total - bar_len, "");

}
@@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
#endif
	OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
		    "Use nano-second histogram"),
	OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
		    "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
	OPT_PARENT(common_options),
	};
	const struct option profile_options[] = {
+2 −0
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
		return -1;
	}

	skel->rodata->bucket_range = ftrace->bucket_range;

	/* don't need to set cpu filter for system-wide mode */
	if (ftrace->target.cpu_list) {
		ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
+14 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ int enabled = 0;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;

SEC("kprobe/func")
int BPF_PROG(func_begin)
@@ -100,12 +101,25 @@ int BPF_PROG(func_end)
		if (delta < 0)
			return 0;

		if (bucket_range != 0) {
			delta /= cmp_base;
			// Less than 1 unit (ms or ns), or, in the future,
			// than the min latency desired.
			key = 0;
			if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
				key = delta / bucket_range + 1;
				if (key >= NUM_BUCKET)
					key = NUM_BUCKET - 1;
			}
			goto do_lookup;
		}
		// calculate index using delta
		for (key = 0; key < (NUM_BUCKET - 1); key++) {
			if (delta < (cmp_base << key))
				break;
		}

do_lookup:
		hist = bpf_map_lookup_elem(&latency, &key);
		if (!hist)
			return 0;
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ struct perf_ftrace {
	unsigned long		percpu_buffer_size;
	bool			inherit;
	bool			use_nsec;
	unsigned int		bucket_range;
	int			graph_depth;
	int			func_stack_trace;
	int			func_irq_info;