Commit 22d407b1 authored by Suren Baghdasaryan's avatar Suren Baghdasaryan Committed by Andrew Morton
Browse files

lib: add allocation tagging support for memory allocation profiling

Introduce CONFIG_MEM_ALLOC_PROFILING which provides definitions to easily
instrument memory allocators.  It registers an "alloc_tags" codetag type
with /proc/allocinfo interface to output allocation tag information when
the feature is enabled.

CONFIG_MEM_ALLOC_PROFILING_DEBUG is provided for debugging the memory
allocation profiling instrumentation.

Memory allocation profiling can be enabled or disabled at runtime using
/proc/sys/vm/mem_profiling sysctl when CONFIG_MEM_ALLOC_PROFILING_DEBUG=n.
CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT enables memory allocation
profiling by default.

[surenb@google.com: Documentation/filesystems/proc.rst: fix allocinfo title]
  Link: https://lkml.kernel.org/r/20240326073813.727090-1-surenb@google.com
[surenb@google.com: do limited memory accounting for modules with ARCH_NEEDS_WEAK_PER_CPU]
  Link: https://lkml.kernel.org/r/20240402180933.1663992-2-surenb@google.com
[klarasmodin@gmail.com: explicitly include irqflags.h in alloc_tag.h]
  Link: https://lkml.kernel.org/r/20240407133252.173636-1-klarasmodin@gmail.com
[surenb@google.com: fix alloc_tag_init() to prevent passing NULL to PTR_ERR()]
  Link: https://lkml.kernel.org/r/20240417003349.2520094-1-surenb@google.com
Link: https://lkml.kernel.org/r/20240321163705.3067592-14-surenb@google.com


Signed-off-by: default avatarSuren Baghdasaryan <surenb@google.com>
Co-developed-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: default avatarKlara Modin <klarasmodin@gmail.com>
Tested-by: default avatarKees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andreas Hindborg <a.hindborg@samsung.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Gary Guo <gary@garyguo.net>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 47a92dfb
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
- mem_profiling         (only if CONFIG_MEM_ALLOC_PROFILING=y)
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
The default value is 65530.


mem_profiling
==============

Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)

1: Enable memory profiling.

0: Disable memory profiling.

Enabling memory profiling introduces a small performance overhead for all
memory allocations.

The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.


memory_failure_early_kill:
==========================

+29 −0
Original line number Diff line number Diff line
@@ -688,6 +688,7 @@ files are there, and which are missing.
 ============ ===============================================================
 File         Content
 ============ ===============================================================
 allocinfo    Memory allocations profiling information
 apm          Advanced power management info
 bootconfig   Kernel command line obtained from boot config,
 	      and, if there were kernel parameters from the
@@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.


allocinfo
~~~~~~~~~

Provides information about memory allocations at all locations in the code
base. Each allocation in the code is identified by its source file, line
number, module (if originates from a loadable module) and the function calling
the allocation. The number of bytes allocated and number of calls at each
location are reported.

Example output.

::

    > sort -rn /proc/allocinfo
   127664128    31168 mm/page_ext.c:270 func:alloc_page_ext
    56373248     4737 mm/slub.c:2259 func:alloc_slab_page
    14880768     3633 mm/readahead.c:247 func:page_cache_ra_unbounded
    14417920     3520 mm/mm_init.c:2530 func:alloc_large_system_hash
    13377536      234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs
    11718656     2861 mm/filemap.c:1919 func:__filemap_get_folio
     9192960     2800 kernel/fork.c:307 func:alloc_thread_stack_node
     4206592        4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable
     4136960     1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start
     3940352      962 mm/memory.c:4214 func:alloc_anon_folio
     2894464    22613 fs/kernfs/dir.c:615 func:__kernfs_new_node
     ...


meminfo
~~~~~~~

+14 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ASM_GENERIC_CODETAG_LDS_H
#define __ASM_GENERIC_CODETAG_LDS_H

#define SECTION_WITH_BOUNDARIES(_name)	\
	. = ALIGN(8);			\
	__start_##_name = .;		\
	KEEP(*(_name))			\
	__stop_##_name = .;

#define CODETAG_SECTIONS()		\
	SECTION_WITH_BOUNDARIES(alloc_tags)

#endif /* __ASM_GENERIC_CODETAG_LDS_H */
+3 −0
Original line number Diff line number Diff line
@@ -50,6 +50,8 @@
 *               [__nosave_begin, __nosave_end] for the nosave data
 */

#include <asm-generic/codetag.lds.h>

#ifndef LOAD_OFFSET
#define LOAD_OFFSET 0
#endif
@@ -366,6 +368,7 @@
	. = ALIGN(8);							\
	BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes)		\
	BOUNDED_SECTION_BY(__dyndbg, ___dyndbg)				\
	CODETAG_SECTIONS()						\
	LIKELY_PROFILE()		       				\
	BRANCH_PROFILE()						\
	TRACE_PRINTKS()							\
+156 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * allocation tagging
 */
#ifndef _LINUX_ALLOC_TAG_H
#define _LINUX_ALLOC_TAG_H

#include <linux/bug.h>
#include <linux/codetag.h>
#include <linux/container_of.h>
#include <linux/preempt.h>
#include <asm/percpu.h>
#include <linux/cpumask.h>
#include <linux/static_key.h>
#include <linux/irqflags.h>

struct alloc_tag_counters {
	u64 bytes;
	u64 calls;
};

/*
 * An instance of this structure is created in a special ELF section at every
 * allocation callsite. At runtime, the special section is treated as
 * an array of these. Embedded codetag utilizes codetag framework.
 */
struct alloc_tag {
	struct codetag			ct;
	struct alloc_tag_counters __percpu	*counters;
} __aligned(8);

#ifdef CONFIG_MEM_ALLOC_PROFILING

static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
{
	return container_of(ct, struct alloc_tag, ct);
}

#ifdef ARCH_NEEDS_WEAK_PER_CPU
/*
 * When percpu variables are required to be defined as weak, static percpu
 * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
 * Instead we will accound all module allocations to a single counter.
 */
DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);

#define DEFINE_ALLOC_TAG(_alloc_tag)						\
	static struct alloc_tag _alloc_tag __used __aligned(8)			\
	__section("alloc_tags") = {						\
		.ct = CODE_TAG_INIT,						\
		.counters = &_shared_alloc_tag };

#else /* ARCH_NEEDS_WEAK_PER_CPU */

#define DEFINE_ALLOC_TAG(_alloc_tag)						\
	static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr);	\
	static struct alloc_tag _alloc_tag __used __aligned(8)			\
	__section("alloc_tags") = {						\
		.ct = CODE_TAG_INIT,						\
		.counters = &_alloc_tag_cntr };

#endif /* ARCH_NEEDS_WEAK_PER_CPU */

DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
			mem_alloc_profiling_key);

static inline bool mem_alloc_profiling_enabled(void)
{
	return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
				   &mem_alloc_profiling_key);
}

static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
{
	struct alloc_tag_counters v = { 0, 0 };
	struct alloc_tag_counters *counter;
	int cpu;

	for_each_possible_cpu(cpu) {
		counter = per_cpu_ptr(tag->counters, cpu);
		v.bytes += counter->bytes;
		v.calls += counter->calls;
	}

	return v;
}

#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
{
	WARN_ONCE(ref && ref->ct,
		  "alloc_tag was not cleared (got tag for %s:%u)\n",
		  ref->ct->filename, ref->ct->lineno);

	WARN_ONCE(!tag, "current->alloc_tag not set");
}

static inline void alloc_tag_sub_check(union codetag_ref *ref)
{
	WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
}
#else
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
#endif

/* Caller should verify both ref and tag to be valid */
static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
	ref->ct = &tag->ct;
	/*
	 * We need in increment the call counter every time we have a new
	 * allocation or when we split a large allocation into smaller ones.
	 * Each new reference for every sub-allocation needs to increment call
	 * counter because when we free each part the counter will be decremented.
	 */
	this_cpu_inc(tag->counters->calls);
}

static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
{
	alloc_tag_add_check(ref, tag);
	if (!ref || !tag)
		return;

	__alloc_tag_ref_set(ref, tag);
	this_cpu_add(tag->counters->bytes, bytes);
}

static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
{
	struct alloc_tag *tag;

	alloc_tag_sub_check(ref);
	if (!ref || !ref->ct)
		return;

	tag = ct_to_alloc_tag(ref->ct);

	this_cpu_sub(tag->counters->bytes, bytes);
	this_cpu_dec(tag->counters->calls);

	ref->ct = NULL;
}

#else /* CONFIG_MEM_ALLOC_PROFILING */

#define DEFINE_ALLOC_TAG(_alloc_tag)
static inline bool mem_alloc_profiling_enabled(void) { return false; }
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
				 size_t bytes) {}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}

#endif /* CONFIG_MEM_ALLOC_PROFILING */

#endif /* _LINUX_ALLOC_TAG_H */
Loading