Commit 6c99d4eb authored by Pavel Tikhomirov's avatar Pavel Tikhomirov Committed by Andrew Morton
Browse files

kmemleak: enable tracking for percpu pointers

Patch series "kmemleak: support for percpu memory leak detect'.

This is a rework of this series:
https://lore.kernel.org/lkml/20200921020007.35803-1-chenjun102@huawei.com/

Originally I was investigating a percpu leak on our customer nodes and
having this functionality was a huge help, which lead to this fix [1].

So probably it's a good idea to have it in mainstream too, especially as
after [2] it became much easier to implement (we already have a separate
tree for percpu pointers).

[1] commit 0af8c09c ("netfilter: x_tables: fix percpu counter block leak on error path when creating new netns")
[2] commit 39042079 ("kmemleak: avoid RCU stalls when freeing metadata for per-CPU pointers")


This patch (of 2):

This basically does:

- Add min_percpu_addr and max_percpu_addr to filter out unrelated data
  similar to min_addr and max_addr;

- Set min_count for percpu pointers to 1 to start tracking them;

- Calculate checksum of percpu area as xor of crc32 for each cpu;

- Split pointer lookup and update refs code into separate helper and use
  it twice: once as if the pointer is a virtual pointer and once as if
  it's percpu.

[ptikhomirov@virtuozzo.com: v2]
  Link: https://lkml.kernel.org/r/20240731025526.157529-2-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-1-ptikhomirov@virtuozzo.com
Link: https://lkml.kernel.org/r/20240725041223.872472-2-ptikhomirov@virtuozzo.com


Signed-off-by: default avatarPavel Tikhomirov <ptikhomirov@virtuozzo.com>
Reviewed-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Chen Jun <chenjun102@huawei.com>
Cc: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fbe76a65
Loading
Loading
Loading
Loading
+94 −59
Original line number Diff line number Diff line
@@ -224,6 +224,10 @@ static int kmemleak_error;
static unsigned long min_addr = ULONG_MAX;
static unsigned long max_addr;

/* minimum and maximum address that may be valid per-CPU pointers */
static unsigned long min_percpu_addr = ULONG_MAX;
static unsigned long max_percpu_addr;

static struct task_struct *scan_thread;
/* used to avoid reporting of recently allocated objects */
static unsigned long jiffies_min_age;
@@ -294,12 +298,19 @@ static void hex_dump_object(struct seq_file *seq,
	const u8 *ptr = (const u8 *)object->pointer;
	size_t len;

	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
		return;

	if (object->flags & OBJECT_PERCPU)
		ptr = (const u8 *)this_cpu_ptr((void __percpu *)object->pointer);

	/* limit the number of lines to HEX_MAX_LINES */
	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);

	if (object->flags & OBJECT_PERCPU)
		warn_or_seq_printf(seq, "  hex dump (first %zu bytes on cpu %d):\n",
				   len, raw_smp_processor_id());
	else
		warn_or_seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
	kasan_disable_current();
	warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE,
@@ -695,10 +706,14 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,

	untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
	/*
	 * Only update min_addr and max_addr with object
	 * storing virtual address.
	 * Only update min_addr and max_addr with object storing virtual
	 * address. And update min_percpu_addr max_percpu_addr for per-CPU
	 * objects.
	 */
	if (!(objflags & (OBJECT_PHYS | OBJECT_PERCPU))) {
	if (objflags & OBJECT_PERCPU) {
		min_percpu_addr = min(min_percpu_addr, untagged_ptr);
		max_percpu_addr = max(max_percpu_addr, untagged_ptr + size);
	} else if (!(objflags & OBJECT_PHYS)) {
		min_addr = min(min_addr, untagged_ptr);
		max_addr = max(max_addr, untagged_ptr + size);
	}
@@ -1055,12 +1070,8 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
{
	pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size);

	/*
	 * Percpu allocations are only scanned and not reported as leaks
	 * (min_count is set to 0).
	 */
	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
		create_object_percpu((unsigned long)ptr, size, 0, gfp);
		create_object_percpu((unsigned long)ptr, size, 1, gfp);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);

@@ -1304,12 +1315,23 @@ static bool update_checksum(struct kmemleak_object *object)
{
	u32 old_csum = object->checksum;

	if (WARN_ON_ONCE(object->flags & (OBJECT_PHYS | OBJECT_PERCPU)))
	if (WARN_ON_ONCE(object->flags & OBJECT_PHYS))
		return false;

	kasan_disable_current();
	kcsan_disable_current();
	if (object->flags & OBJECT_PERCPU) {
		unsigned int cpu;

		object->checksum = 0;
		for_each_possible_cpu(cpu) {
			void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu);

			object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size);
		}
	} else {
		object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size);
	}
	kasan_enable_current();
	kcsan_enable_current();

@@ -1340,6 +1362,64 @@ static void update_refs(struct kmemleak_object *object)
	}
}

static void pointer_update_refs(struct kmemleak_object *scanned,
			 unsigned long pointer, unsigned int objflags)
{
	struct kmemleak_object *object;
	unsigned long untagged_ptr;
	unsigned long excess_ref;

	untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
	if (objflags & OBJECT_PERCPU) {
		if (untagged_ptr < min_percpu_addr || untagged_ptr >= max_percpu_addr)
			return;
	} else {
		if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
			return;
	}

	/*
	 * No need for get_object() here since we hold kmemleak_lock.
	 * object->use_count cannot be dropped to 0 while the object
	 * is still present in object_tree_root and object_list
	 * (with updates protected by kmemleak_lock).
	 */
	object = __lookup_object(pointer, 1, objflags);
	if (!object)
		return;
	if (object == scanned)
		/* self referenced, ignore */
		return;

	/*
	 * Avoid the lockdep recursive warning on object->lock being
	 * previously acquired in scan_object(). These locks are
	 * enclosed by scan_mutex.
	 */
	raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
	/* only pass surplus references (object already gray) */
	if (color_gray(object)) {
		excess_ref = object->excess_ref;
		/* no need for update_refs() if object already gray */
	} else {
		excess_ref = 0;
		update_refs(object);
	}
	raw_spin_unlock(&object->lock);

	if (excess_ref) {
		object = lookup_object(excess_ref, 0);
		if (!object)
			return;
		if (object == scanned)
			/* circular reference, ignore */
			return;
		raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
		update_refs(object);
		raw_spin_unlock(&object->lock);
	}
}

/*
 * Memory scanning is a long process and it needs to be interruptible. This
 * function checks whether such interrupt condition occurred.
@@ -1372,13 +1452,10 @@ static void scan_block(void *_start, void *_end,
	unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER);
	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
	unsigned long flags;
	unsigned long untagged_ptr;

	raw_spin_lock_irqsave(&kmemleak_lock, flags);
	for (ptr = start; ptr < end; ptr++) {
		struct kmemleak_object *object;
		unsigned long pointer;
		unsigned long excess_ref;

		if (scan_should_stop())
			break;
@@ -1387,50 +1464,8 @@ static void scan_block(void *_start, void *_end,
		pointer = *(unsigned long *)kasan_reset_tag((void *)ptr);
		kasan_enable_current();

		untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer);
		if (untagged_ptr < min_addr || untagged_ptr >= max_addr)
			continue;

		/*
		 * No need for get_object() here since we hold kmemleak_lock.
		 * object->use_count cannot be dropped to 0 while the object
		 * is still present in object_tree_root and object_list
		 * (with updates protected by kmemleak_lock).
		 */
		object = lookup_object(pointer, 1);
		if (!object)
			continue;
		if (object == scanned)
			/* self referenced, ignore */
			continue;

		/*
		 * Avoid the lockdep recursive warning on object->lock being
		 * previously acquired in scan_object(). These locks are
		 * enclosed by scan_mutex.
		 */
		raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
		/* only pass surplus references (object already gray) */
		if (color_gray(object)) {
			excess_ref = object->excess_ref;
			/* no need for update_refs() if object already gray */
		} else {
			excess_ref = 0;
			update_refs(object);
		}
		raw_spin_unlock(&object->lock);

		if (excess_ref) {
			object = lookup_object(excess_ref, 0);
			if (!object)
				continue;
			if (object == scanned)
				/* circular reference, ignore */
				continue;
			raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
			update_refs(object);
			raw_spin_unlock(&object->lock);
		}
		pointer_update_refs(scanned, pointer, 0);
		pointer_update_refs(scanned, pointer, OBJECT_PERCPU);
	}
	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
}