Commit 8c984209 authored by Yang Jihong's avatar Yang Jihong Committed by Arnaldo Carvalho de Melo
Browse files

perf kwork top: Implements BPF-based cpu usage statistics



Use BPF to collect statistics on the CPU usage based on perf BPF skeletons.

Example usage:

  # perf kwork top -h

   Usage: perf kwork top [<options>]

      -b, --use-bpf         Use BPF to measure task cpu usage
      -C, --cpu <cpu>       list of cpus to profile
      -i, --input <file>    input file name
      -n, --name <name>     event name to profile
      -s, --sort <key[,key2...]>
                            sort by key(s): rate, runtime, tid
          --time <str>      Time span for analysis (start,stop)

  #
  # perf kwork -k sched top -b
  Starting trace, Hit <Ctrl+C> to stop and report
  ^C
  Total  : 160702.425 ms, 8 cpus
  %Cpu(s):  36.00% id,   0.00% hi,   0.00% si
  %Cpu0   [||||||||||||||||||              61.66%]
  %Cpu1   [||||||||||||||||||              61.27%]
  %Cpu2   [|||||||||||||||||||             66.40%]
  %Cpu3   [||||||||||||||||||              61.28%]
  %Cpu4   [||||||||||||||||||              61.82%]
  %Cpu5   [|||||||||||||||||||||||         77.41%]
  %Cpu6   [||||||||||||||||||              61.73%]
  %Cpu7   [||||||||||||||||||              63.25%]

        PID     SPID    %CPU           RUNTIME  COMMMAND
    -------------------------------------------------------------
          0        0   38.72       8089.463 ms  [swapper/1]
          0        0   38.71       8084.547 ms  [swapper/3]
          0        0   38.33       8007.532 ms  [swapper/0]
          0        0   38.26       7992.985 ms  [swapper/6]
          0        0   38.17       7971.865 ms  [swapper/4]
          0        0   36.74       7447.765 ms  [swapper/7]
          0        0   33.59       6486.942 ms  [swapper/2]
          0        0   22.58       3771.268 ms  [swapper/5]
       9545     9351    2.48        447.136 ms  sched-messaging
       9574     9351    2.09        418.583 ms  sched-messaging
       9724     9351    2.05        372.407 ms  sched-messaging
       9531     9351    2.01        368.804 ms  sched-messaging
       9512     9351    2.00        362.250 ms  sched-messaging
       9514     9351    1.95        357.767 ms  sched-messaging
       9538     9351    1.86        384.476 ms  sched-messaging
       9712     9351    1.84        386.490 ms  sched-messaging
       9723     9351    1.83        380.021 ms  sched-messaging
       9722     9351    1.82        382.738 ms  sched-messaging
       9517     9351    1.81        354.794 ms  sched-messaging
       9559     9351    1.79        344.305 ms  sched-messaging
       9725     9351    1.77        365.315 ms  sched-messaging
  <SNIP>

  # perf kwork -k sched top -b -n perf
  Starting trace, Hit <Ctrl+C> to stop and report
  ^C
  Total  : 151563.332 ms, 8 cpus
  %Cpu(s):  26.49% id,   0.00% hi,   0.00% si
  %Cpu0   [                                 0.01%]
  %Cpu1   [                                 0.00%]
  %Cpu2   [                                 0.00%]
  %Cpu3   [                                 0.00%]
  %Cpu4   [                                 0.00%]
  %Cpu5   [                                 0.00%]
  %Cpu6   [                                 0.00%]
  %Cpu7   [                                 0.00%]

        PID     SPID    %CPU           RUNTIME  COMMMAND
    -------------------------------------------------------------
       9754     9754    0.01          2.303 ms  perf

  #
  # perf kwork -k sched top -b -C 2,3,4
  Starting trace, Hit <Ctrl+C> to stop and report
  ^C
  Total  :  48016.721 ms, 3 cpus
  %Cpu(s):  27.82% id,   0.00% hi,   0.00% si
  %Cpu2   [||||||||||||||||||||||          74.68%]
  %Cpu3   [|||||||||||||||||||||           71.06%]
  %Cpu4   [|||||||||||||||||||||           70.91%]

        PID     SPID    %CPU           RUNTIME  COMMMAND
    -------------------------------------------------------------
          0        0   29.08       4734.998 ms  [swapper/4]
          0        0   28.93       4710.029 ms  [swapper/3]
          0        0   25.31       3912.363 ms  [swapper/2]
      10248    10158    1.62        264.931 ms  sched-messaging
      10253    10158    1.62        265.136 ms  sched-messaging
      10158    10158    1.60        263.013 ms  bash
      10360    10158    1.49        243.639 ms  sched-messaging
      10413    10158    1.48        238.604 ms  sched-messaging
      10531    10158    1.47        234.067 ms  sched-messaging
      10400    10158    1.47        240.631 ms  sched-messaging
      10355    10158    1.47        230.586 ms  sched-messaging
      10377    10158    1.43        234.835 ms  sched-messaging
      10526    10158    1.42        232.045 ms  sched-messaging
      10298    10158    1.41        222.396 ms  sched-messaging
      10410    10158    1.38        221.853 ms  sched-messaging
      10364    10158    1.38        226.042 ms  sched-messaging
      10480    10158    1.36        213.633 ms  sched-messaging
      10370    10158    1.36        223.620 ms  sched-messaging
      10553    10158    1.34        217.169 ms  sched-messaging
      10291    10158    1.34        211.516 ms  sched-messaging
      10251    10158    1.34        218.813 ms  sched-messaging
      10522    10158    1.33        218.498 ms  sched-messaging
      10288    10158    1.33        216.787 ms  sched-messaging
  <SNIP>

Reviewed-by: default avatarIan Rogers <irogers@google.com>
Signed-off-by: default avatarYang Jihong <yangjihong1@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Link: https://lore.kernel.org/r/20230812084917.169338-15-yangjihong1@huawei.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent aa172a5a
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ There are several variants of 'perf kwork':
        perf kwork latency -b
        perf kwork timehist
        perf kwork top
        perf kwork top -b

   By default it shows the individual work events such as irq, workqeueu,
   including the run time and delay (time between raise and actually entry):
@@ -181,6 +182,10 @@ OPTIONS for 'perf kwork timehist'
OPTIONS for 'perf kwork top'
---------------------------------

-b::
--use-bpf::
	Use BPF to measure task cpu usage.

-C::
--cpu::
	Only show events for the given CPU(s) (comma separated list).
+1 −0
Original line number Diff line number Diff line
@@ -1032,6 +1032,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
SKELETONS += $(SKEL_OUT)/kwork_top.skel.h
SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h

+59 −3
Original line number Diff line number Diff line
@@ -1619,8 +1619,14 @@ static void top_print_header(struct perf_kwork *kwork __maybe_unused)
	int ret;

	printf("\n ");
	ret = printf(" %*s  %*s  %*s  %-*s",
	ret = printf(" %*s %s%*s%s %*s  %*s  %-*s",
		     PRINT_PID_WIDTH, "PID",

		     kwork->use_bpf ? " " : "",
		     kwork->use_bpf ? PRINT_PID_WIDTH : 0,
		     kwork->use_bpf ? "SPID" : "",
		     kwork->use_bpf ? " " : "",

		     PRINT_CPU_USAGE_WIDTH, "%CPU",
		     PRINT_RUNTIME_HEADER_WIDTH + RPINT_DECIMAL_WIDTH, "RUNTIME",
		     PRINT_TASK_NAME_WIDTH, "COMMMAND");
@@ -1639,6 +1645,12 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_
	 */
	ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id);

	/*
	 * tgid
	 */
	if (kwork->use_bpf)
		ret += printf(" %*d ", PRINT_PID_WIDTH, work->tgid);

	/*
	 * cpu usage
	 */
@@ -1656,6 +1668,12 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_
	/*
	 * command
	 */
	if (kwork->use_bpf)
		ret += printf(" %s%s%s",
			      work->is_kthread ? "[" : "",
			      work->name,
			      work->is_kthread ? "]" : "");
	else
		ret += printf(" %-*s", PRINT_TASK_NAME_WIDTH, work->name);

	printf("\n");
@@ -2153,6 +2171,36 @@ static void perf_kwork__top_report(struct perf_kwork *kwork)
	printf("\n");
}

static int perf_kwork__top_bpf(struct perf_kwork *kwork)
{
	int ret;

	signal(SIGINT, sig_handler);
	signal(SIGTERM, sig_handler);

	ret = perf_kwork__top_prepare_bpf(kwork);
	if (ret)
		return -1;

	printf("Starting trace, Hit <Ctrl+C> to stop and report\n");

	perf_kwork__top_start();

	/*
	 * a simple pause, wait here for stop signal
	 */
	pause();

	perf_kwork__top_finish();

	perf_kwork__top_read_bpf(kwork);

	perf_kwork__top_cleanup_bpf();

	return 0;

}

static int perf_kwork__top(struct perf_kwork *kwork)
{
	struct __top_cpus_runtime *cpus_runtime;
@@ -2165,7 +2213,11 @@ static int perf_kwork__top(struct perf_kwork *kwork)
	kwork->top_stat.cpus_runtime = cpus_runtime;
	bitmap_zero(kwork->top_stat.all_cpus_bitmap, MAX_NR_CPUS);

	if (kwork->use_bpf)
		ret = perf_kwork__top_bpf(kwork);
	else
		ret = perf_kwork__read_events(kwork);

	if (ret)
		goto out;

@@ -2380,6 +2432,10 @@ int cmd_kwork(int argc, const char **argv)
		   "Time span for analysis (start,stop)"),
	OPT_STRING('i', "input", &input_name, "file",
		   "input file name"),
#ifdef HAVE_BPF_SKEL
	OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf,
		    "Use BPF to measure task cpu usage"),
#endif
	OPT_PARENT(kwork_options)
	};
	const char *kwork_usage[] = {
+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ endif

ifeq ($(CONFIG_LIBTRACEEVENT),y)
  perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
  perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork_top.o
endif

perf-$(CONFIG_LIBELF) += symbol-elf.o
+286 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * bpf_kwork_top.c
 *
 * Copyright (c) 2022  Huawei Inc,  Yang Jihong <yangjihong1@huawei.com>
 */

#include <time.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>

#include <linux/time64.h>

#include "util/debug.h"
#include "util/evsel.h"
#include "util/kwork.h"

#include <bpf/bpf.h>
#include <perf/cpumap.h>

#include "util/bpf_skel/kwork_top.skel.h"

/*
 * This should be in sync with "util/kwork_top.bpf.c"
 */
#define MAX_COMMAND_LEN 16

struct time_data {
	__u64 timestamp;
};

struct work_data {
	__u64 runtime;
};

struct task_data {
	__u32 tgid;
	__u32 is_kthread;
	char comm[MAX_COMMAND_LEN];
};

struct work_key {
	__u32 type;
	__u32 pid;
	__u64 task_p;
};

struct task_key {
	__u32 pid;
	__u32 cpu;
};

struct kwork_class_bpf {
	struct kwork_class *class;
	void (*load_prepare)(void);
};

static struct kwork_top_bpf *skel;

void perf_kwork__top_start(void)
{
	struct timespec ts;

	clock_gettime(CLOCK_MONOTONIC, &ts);
	skel->bss->from_timestamp = (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
	skel->bss->enabled = 1;
	pr_debug("perf kwork top start at: %lld\n", skel->bss->from_timestamp);
}

void perf_kwork__top_finish(void)
{
	struct timespec ts;

	skel->bss->enabled = 0;
	clock_gettime(CLOCK_MONOTONIC, &ts);
	skel->bss->to_timestamp = (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
	pr_debug("perf kwork top finish at: %lld\n", skel->bss->to_timestamp);
}

static void sched_load_prepare(void)
{
	bpf_program__set_autoload(skel->progs.on_switch, true);
}

static struct kwork_class_bpf kwork_sched_bpf = {
	.load_prepare = sched_load_prepare,
};

static struct kwork_class_bpf *
kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = {
	[KWORK_CLASS_SCHED]	= &kwork_sched_bpf,
};

static bool valid_kwork_class_type(enum kwork_class_type type)
{
	return type >= 0 && type < KWORK_CLASS_MAX ? true : false;
}

static int setup_filters(struct perf_kwork *kwork)
{
	u8 val = 1;
	int i, nr_cpus, fd;
	struct perf_cpu_map *map;

	if (kwork->cpu_list) {
		fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter);
		if (fd < 0) {
			pr_debug("Invalid cpu filter fd\n");
			return -1;
		}

		map = perf_cpu_map__new(kwork->cpu_list);
		if (!map) {
			pr_debug("Invalid cpu_list\n");
			return -1;
		}

		nr_cpus = libbpf_num_possible_cpus();
		for (i = 0; i < perf_cpu_map__nr(map); i++) {
			struct perf_cpu cpu = perf_cpu_map__cpu(map, i);

			if (cpu.cpu >= nr_cpus) {
				perf_cpu_map__put(map);
				pr_err("Requested cpu %d too large\n", cpu.cpu);
				return -1;
			}
			bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
		}
		perf_cpu_map__put(map);

		skel->bss->has_cpu_filter = 1;
	}

	return 0;
}

int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
{
	struct bpf_program *prog;
	struct kwork_class *class;
	struct kwork_class_bpf *class_bpf;
	enum kwork_class_type type;

	skel = kwork_top_bpf__open();
	if (!skel) {
		pr_debug("Failed to open kwork top skeleton\n");
		return -1;
	}

	/*
	 * set all progs to non-autoload,
	 * then set corresponding progs according to config
	 */
	bpf_object__for_each_program(prog, skel->obj)
		bpf_program__set_autoload(prog, false);

	list_for_each_entry(class, &kwork->class_list, list) {
		type = class->type;
		if (!valid_kwork_class_type(type) ||
		    !kwork_class_bpf_supported_list[type]) {
			pr_err("Unsupported bpf trace class %s\n", class->name);
			goto out;
		}

		class_bpf = kwork_class_bpf_supported_list[type];
		class_bpf->class = class;

		if (class_bpf->load_prepare)
			class_bpf->load_prepare();
	}

	if (kwork_top_bpf__load(skel)) {
		pr_debug("Failed to load kwork top skeleton\n");
		goto out;
	}

	if (setup_filters(kwork))
		goto out;

	if (kwork_top_bpf__attach(skel)) {
		pr_debug("Failed to attach kwork top skeleton\n");
		goto out;
	}

	return 0;

out:
	kwork_top_bpf__destroy(skel);
	return -1;
}

static void read_task_info(struct kwork_work *work)
{
	int fd;
	struct task_data data;
	struct task_key key = {
		.pid = work->id,
		.cpu = work->cpu,
	};

	fd = bpf_map__fd(skel->maps.kwork_top_tasks);
	if (fd < 0) {
		pr_debug("Invalid top tasks map fd\n");
		return;
	}

	if (!bpf_map_lookup_elem(fd, &key, &data)) {
		work->tgid = data.tgid;
		work->is_kthread = data.is_kthread;
		work->name = strdup(data.comm);
	}
}
static int add_work(struct perf_kwork *kwork, struct work_key *key,
		    struct work_data *data, int cpu)
{
	struct kwork_class_bpf *bpf_trace;
	struct kwork_work *work;
	struct kwork_work tmp = {
		.id = key->pid,
		.cpu = cpu,
		.name = NULL,
	};
	enum kwork_class_type type = key->type;

	if (!valid_kwork_class_type(type)) {
		pr_debug("Invalid class type %d to add work\n", type);
		return -1;
	}

	bpf_trace = kwork_class_bpf_supported_list[type];
	tmp.class = bpf_trace->class;

	work = perf_kwork_add_work(kwork, tmp.class, &tmp);
	if (!work)
		return -1;

	work->total_runtime = data->runtime;
	read_task_info(work);

	return 0;
}

int perf_kwork__top_read_bpf(struct perf_kwork *kwork)
{
	int i, fd, nr_cpus;
	struct work_data *data;
	struct work_key key, prev;

	fd = bpf_map__fd(skel->maps.kwork_top_works);
	if (fd < 0) {
		pr_debug("Invalid top runtime fd\n");
		return -1;
	}

	nr_cpus = libbpf_num_possible_cpus();
	data = calloc(nr_cpus, sizeof(struct work_data));
	if (!data)
		return -1;

	memset(&prev, 0, sizeof(prev));
	while (!bpf_map_get_next_key(fd, &prev, &key)) {
		if ((bpf_map_lookup_elem(fd, &key, data)) != 0) {
			pr_debug("Failed to lookup top elem\n");
			return -1;
		}

		for (i = 0; i < nr_cpus; i++) {
			if (data[i].runtime == 0)
				continue;

			if (add_work(kwork, &key, &data[i], i))
				return -1;
		}
		prev = key;
	}
	free(data);

	return 0;
}

void perf_kwork__top_cleanup_bpf(void)
{
	kwork_top_bpf__destroy(skel);
}
Loading