Commit 1bbeaf83 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-tools-for-v6.9-2024-03-13' of...

Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools updates from Namhyung Kim:
 "perf stat:

   - Support new 'cluster' aggregation mode for shared resources
     depending on the hardware configuration:

        $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1

         Performance counter stats for 'system wide':

        S0-D0-CLS0    2         85,051,822      cycles
        S0-D0-CLS0    2         73,909,908      instructions      #    0.87  insn per cycle
        S0-D0-CLS2    2         93,365,918      cycles
        S0-D0-CLS2    2         83,006,158      instructions      #    0.89  insn per cycle
        S0-D0-CLS4    2        104,157,523      cycles
        S0-D0-CLS4    2         53,234,396      instructions      #    0.51  insn per cycle
        S0-D0-CLS6    2         65,891,079      cycles
        S0-D0-CLS6    2         41,478,273      instructions      #    0.63  insn per cycle

               1.002407989 seconds time elapsed

   - Various fixes and cleanups for event metrics including NaN handling

  perf script:

   - Use libcapstone if available to disassemble the instructions. This
     enables 'perf script -F disasm' and 'perf script --insn-trace=disasm'
     (for Intel-PT):

        $ perf script -F event,ip,disasm
        cycles:P:  ffffffffa988d428             wrmsr
        cycles:P:  ffffffffa9839d25             movq %rax, %r14
        cycles:P:  ffffffffa9cdcaf0             endbr64
        cycles:P:  ffffffffa988d428             wrmsr
        cycles:P:  ffffffffa988d428             wrmsr
        cycles:P:  ffffffffaa401f86             iretq
        cycles:P:  ffffffffa99c4de5             movq 0x30(%rcx), %r8
        cycles:P:  ffffffffa988d428             wrmsr
        cycles:P:  ffffffffaa401f86             iretq
        cycles:P:  ffffffffa9907983             movl 0x68(%rbx), %eax
        cycles:P:  ffffffffa988d428             wrmsr

   - Expose sample ID / stream ID to python scripts

  perf test:

   - Add more perf test cases from Redhat internal test suites. This
     time it adds the base infra and a few perf probe tests. More to
     come. :)

   - Add 'perf test -p' for parallel execution and fix some issues found
     by the parallel test

   - Support symbol test to print symbols in given (active) module:

        $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko
        --- start ---
        Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko
        Overlapping symbols:
         7a990-7a9a0 l __pfx_ext4_exit_fs
         7a990-7a9a0 g __pfx_cleanup_module
        Overlapping symbols:
         7a9a0-7aa1c l ext4_exit_fs
         7a9a0-7aa1c g cleanup_module
        ...

  JSON metric updates:

   - A new round of Intel metric updates

   - Support Power11 PVR (compatible to Power10)

   - Fix cache latency events on Zen 4 to set SliceId properly

  Internal:

   - Fix reference counting for 'map' data structure, tireless work from
     Ian!

   - More memory optimization for struct thread and annotate histogram.
     Now, 'perf report' (TUI) and 'perf annotate' should be much
     lighter-weight in terms of memory footprint

   - Support cross-arch perf register access. Clean up the build
     configuration so that it can detect arch-register support at
     runtime. This can allow to parse register data in sample which was
     recorded in a different arch

  Others:

   - Sync task state in 'perf sched' to kernel using trace event fields.
     The task states have been changed so tools cannot assume a fixed
     encoding

   - Clean up 'perf mem' to generalize the arch-specific events

   - Add support for local and global variables to data type profiling.
     This would increase the success rate of type resolution with DWARF

   - Add short option -H for --hierarchy in 'perf report' and 'perf top'"

* tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits)
  perf annotate: Add comments in the data structures
  perf annotate: Remove sym_hist.addr[] array
  perf annotate: Calculate instruction overhead using hashmap
  perf annotate: Add a hashmap for symbol histogram
  perf threads: Reduce table size from 256 to 8
  perf threads: Switch from rbtree to hashmap
  perf threads: Move threads to its own files
  perf machine: Move machine's threads into its own abstraction
  perf machine: Move fprintf to for_each loop and a callback
  perf trace: Ignore thread hashing in summary
  perf report: Sort child tasks by tid
  perf vendor events amd: Fix Zen 4 cache latency events
  perf version: Display availability of OpenCSD support
  perf vendor events intel: Add umasks/occ_sel to PCU events.
  perf map: Fix map reference count issues
  libperf evlist: Avoid out-of-bounds access
  perf lock contention: Account contending locks too
  perf metrics: Fix segv for metrics with no events
  perf metrics: Fix metric matching
  perf pmu: Fix a potential memory leak in perf_pmu__lookup()
  ...
parents 63bd30f2 0f66dfe7
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ FEATURE_TESTS_EXTRA := \
         gtk2-infobar                   \
         hello                          \
         libbabeltrace                  \
         libcapstone                    \
         libbfd-liberty                 \
         libbfd-liberty-z               \
         libopencsd                     \
@@ -134,6 +135,7 @@ FEATURE_DISPLAY ?= \
         libcrypto              \
         libunwind              \
         libdw-dwarf-unwind     \
         libcapstone            \
         zlib                   \
         lzma                   \
         get_cpuid              \
+4 −0
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ FILES= \
         test-timerfd.bin                       \
         test-libdw-dwarf-unwind.bin            \
         test-libbabeltrace.bin                 \
         test-libcapstone.bin			\
         test-compile-32.bin                    \
         test-compile-x32.bin                   \
         test-zlib.bin                          \
@@ -286,6 +287,9 @@ $(OUTPUT)test-libdw-dwarf-unwind.bin:
$(OUTPUT)test-libbabeltrace.bin:
	$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)

$(OUTPUT)test-libcapstone.bin:
	$(BUILD) # -lcapstone provided by $(FEATURE_CHECK_LDFLAGS-libcapstone)

$(OUTPUT)test-compile-32.bin:
	$(CC) -m32 -o $@ test-compile.c

+4 −0
Original line number Diff line number Diff line
@@ -134,6 +134,10 @@
#undef main
#endif

#define main main_test_libcapstone
# include "test-libcapstone.c"
#undef main

#define main main_test_lzma
# include "test-lzma.c"
#undef main
+11 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <capstone/capstone.h>

int main(void)
{
	csh handle;

	cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
	return 0;
}
+12 −6
Original line number Diff line number Diff line
@@ -248,10 +248,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist)

static void perf_evlist__id_hash(struct perf_evlist *evlist,
				 struct perf_evsel *evsel,
				 int cpu, int thread, u64 id)
				 int cpu_map_idx, int thread, u64 id)
{
	int hash;
	struct perf_sample_id *sid = SID(evsel, cpu, thread);
	struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread);

	sid->id = id;
	sid->evsel = evsel;
@@ -269,21 +269,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist)

void perf_evlist__id_add(struct perf_evlist *evlist,
			 struct perf_evsel *evsel,
			 int cpu, int thread, u64 id)
			 int cpu_map_idx, int thread, u64 id)
{
	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
	if (!SID(evsel, cpu_map_idx, thread))
		return;

	perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id);
	evsel->id[evsel->ids++] = id;
}

int perf_evlist__id_add_fd(struct perf_evlist *evlist,
			   struct perf_evsel *evsel,
			   int cpu, int thread, int fd)
			   int cpu_map_idx, int thread, int fd)
{
	u64 read_data[4] = { 0, };
	int id_idx = 1; /* The first entry is the counter value */
	u64 id;
	int ret;

	if (!SID(evsel, cpu_map_idx, thread))
		return -1;

	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
	if (!ret)
		goto add;
@@ -312,7 +318,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
	id = read_data[id_idx];

add:
	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
	perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id);
	return 0;
}

Loading