Commit 6c4aa896 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "Seqlock optimizations that arose in a perf context and were merged
  into the perf tree:

   - seqlock: Add raw_seqcount_try_begin (Suren Baghdasaryan)
   - mm: Convert mm_lock_seq to a proper seqcount (Suren Baghdasaryan)
   - mm: Introduce mmap_lock_speculate_{try_begin|retry} (Suren
     Baghdasaryan)
   - mm/gup: Use raw_seqcount_try_begin() (Peter Zijlstra)

  Core perf enhancements:

   - Reduce 'struct page' footprint of perf by mapping pages in advance
     (Lorenzo Stoakes)
   - Save raw sample data conditionally based on sample type (Yabin Cui)
   - Reduce sampling overhead by checking sample_type in
     perf_sample_save_callchain() and perf_sample_save_brstack() (Yabin
     Cui)
   - Export perf_exclude_event() (Namhyung Kim)

  Uprobes scalability enhancements: (Andrii Nakryiko)

   - Simplify find_active_uprobe_rcu() VMA checks
   - Add speculative lockless VMA-to-inode-to-uprobe resolution
   - Simplify session consumer tracking
   - Decouple return_instance list traversal and freeing
   - Ensure return_instance is detached from the list before freeing
   - Reuse return_instances between multiple uretprobes within task
   - Guard against kmemdup() failing in dup_return_instance()

  AMD core PMU driver enhancements:

   - Relax privilege filter restriction on AMD IBS (Namhyung Kim)

  AMD RAPL energy counters support: (Dhananjay Ugwekar)

   - Introduce topology_logical_core_id() (K Prateek Nayak)
   - Remove the unused get_rapl_pmu_cpumask() function
   - Remove the cpu_to_rapl_pmu() function
   - Rename rapl_pmu variables
   - Make rapl_model struct global
   - Add arguments to the init and cleanup functions
   - Modify the generic variable names to *_pkg*
   - Remove the global variable rapl_msrs
   - Move the cntr_mask to rapl_pmus struct
   - Add core energy counter support for AMD CPUs

  Intel core PMU driver enhancements:

   - Support RDPMC 'metrics clear mode' feature (Kan Liang)
   - Clarify adaptive PEBS processing (Kan Liang)
   - Factor out functions for PEBS records processing (Kan Liang)
   - Simplify the PEBS records processing for adaptive PEBS (Kan Liang)

  Intel uncore driver enhancements: (Kan Liang)

   - Convert buggy pmu->func_id use to pmu->registered
   - Support more units on Granite Rapids"

* tag 'perf-core-2025-01-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  perf: map pages in advance
  perf/x86/intel/uncore: Support more units on Granite Rapids
  perf/x86/intel/uncore: Clean up func_id
  perf/x86/intel: Support RDPMC metrics clear mode
  uprobes: Guard against kmemdup() failing in dup_return_instance()
  perf/x86: Relax privilege filter restriction on AMD IBS
  perf/core: Export perf_exclude_event()
  uprobes: Reuse return_instances between multiple uretprobes within task
  uprobes: Ensure return_instance is detached from the list before freeing
  uprobes: Decouple return_instance list traversal and freeing
  uprobes: Simplify session consumer tracking
  uprobes: add speculative lockless VMA-to-inode-to-uprobe resolution
  uprobes: simplify find_active_uprobe_rcu() VMA checks
  mm: introduce mmap_lock_speculate_{try_begin|retry}
  mm: convert mm_lock_seq to a proper seqcount
  mm/gup: Use raw_seqcount_try_begin()
  seqlock: add raw_seqcount_try_begin
  perf/x86/rapl: Add core energy counter support for AMD CPUs
  perf/x86/rapl: Move the cntr_mask to rapl_pmus struct
  perf/x86/rapl: Remove the global variable rapl_msrs
  ...
parents a6640c8c b709eb87
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -135,6 +135,10 @@ Thread-related topology information in the kernel:
    The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
    "core_id."

  - topology_logical_core_id();

    The logical core ID to which a thread belongs.



System topology examples
+1 −1
Original line number Diff line number Diff line
@@ -981,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event,
	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
		raw.frag.size = cpuhw->usedss;
		raw.frag.data = cpuhw->stop;
		perf_sample_save_raw_data(&data, &raw);
		perf_sample_save_raw_data(&data, event, &raw);
	}

	overflow = perf_event_overflow(event, &data, &regs);
+3 −3
Original line number Diff line number Diff line
@@ -981,7 +981,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
	cpuhw->flags &= ~PMU_F_ENABLED;
}

/* perf_exclude_event() - Filter event
/* perf_event_exclude() - Filter event
 * @event:	The perf event
 * @regs:	pt_regs structure
 * @sde_regs:	Sample-data-entry (sde) regs structure
@@ -990,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
 *
 * Return non-zero if the event shall be excluded.
 */
static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs,
			      struct perf_sf_sde_regs *sde_regs)
{
	if (event->attr.exclude_user && user_mode(regs))
@@ -1073,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event,
	data.tid_entry.pid = basic->hpp & LPP_PID_MASK;

	overflow = 0;
	if (perf_exclude_event(event, &regs, sde_regs))
	if (perf_event_exclude(event, &regs, sde_regs))
		goto out;
	if (perf_event_overflow(event, &data, &regs)) {
		overflow = 1;
+1 −1
Original line number Diff line number Diff line
@@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
		raw.frag.size = rawsize;
		raw.frag.data = cpump->save;
		perf_sample_save_raw_data(&data, &raw);
		perf_sample_save_raw_data(&data, event, &raw);
	}

	overflow = perf_event_overflow(event, &data, &regs);
+1 −1
Original line number Diff line number Diff line
@@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
		raw.frag.size = rawsize;
		raw.frag.data = cpump->save;
		perf_sample_save_raw_data(&data, &raw);
		perf_sample_save_raw_data(&data, event, &raw);
	}

	overflow = perf_event_overflow(event, &data, &regs);
Loading