Commit 0f21b911 authored by Vlastimil Babka's avatar Vlastimil Babka Committed by Andrew Morton
Browse files

mm/page_alloc: simplify and cleanup pcp locking

The pcp locking relies on pcp_spin_trylock() which has to be used together
with pcp_trylock_prepare()/pcp_trylock_finish() to work properly on !SMP
!RT configs.  This is tedious and error-prone.

We can remove pcp_spin_lock() and underlying pcpu_spin_lock() because we
don't use it.  Afterwards pcp_spin_unlock() is only used together with
pcp_spin_trylock().  Therefore we can add the UP_flags parameter to them
both and handle pcp_trylock_prepare()/finish() within.

Additionally for the configs where pcp_trylock_prepare()/finish() are
no-op (SMP || RT) make them pass &UP_flags to a no-op inline function. 
This ensures typechecking and makes the local variable "used" so we can
remove the __maybe_unused attributes.

In my compile testing, bloat-o-meter reported no change on SMP config, so
the compiler is capable of optimizing away the no-ops same as before, and
we have simplified the code using pcp_spin_trylock().

Link: https://lkml.kernel.org/r/20251015-b4-pcp-lock-cleanup-v2-1-740d999595d5@suse.cz


Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarJoshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: default avatarSuren Baghdasaryan <surenb@google.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 91e69129
Loading
Loading
Loading
Loading
+40 −59
Original line number Diff line number Diff line
@@ -99,9 +99,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
/*
 * On SMP, spin_trylock is sufficient protection.
 * On PREEMPT_RT, spin_trylock is equivalent on both SMP and UP.
 * Pass flags to a no-op inline function to typecheck and silence the unused
 * variable warning.
 */
#define pcp_trylock_prepare(flags)	do { } while (0)
#define pcp_trylock_finish(flag)	do { } while (0)
static inline void __pcp_trylock_noop(unsigned long *flags) { }
#define pcp_trylock_prepare(flags)	__pcp_trylock_noop(&(flags))
#define pcp_trylock_finish(flags)	__pcp_trylock_noop(&(flags))
#else

/* UP spin_trylock always succeeds so disable IRQs to prevent re-entrancy. */
@@ -129,15 +132,6 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
 * Generic helper to lookup and a per-cpu variable with an embedded spinlock.
 * Return value should be used with equivalent unlock helper.
 */
#define pcpu_spin_lock(type, member, ptr)				\
({									\
	type *_ret;							\
	pcpu_task_pin();						\
	_ret = this_cpu_ptr(ptr);					\
	spin_lock(&_ret->member);					\
	_ret;								\
})

#define pcpu_spin_trylock(type, member, ptr)				\
({									\
	type *_ret;							\
@@ -157,14 +151,21 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
})

/* struct per_cpu_pages specific helpers. */
#define pcp_spin_lock(ptr)						\
	pcpu_spin_lock(struct per_cpu_pages, lock, ptr)

#define pcp_spin_trylock(ptr)						\
	pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
#define pcp_spin_trylock(ptr, UP_flags)					\
({									\
	struct per_cpu_pages *__ret;					\
	pcp_trylock_prepare(UP_flags);					\
	__ret = pcpu_spin_trylock(struct per_cpu_pages, lock, ptr);	\
	if (!__ret)							\
		pcp_trylock_finish(UP_flags);				\
	__ret;								\
})

#define pcp_spin_unlock(ptr)						\
	pcpu_spin_unlock(lock, ptr)
#define pcp_spin_unlock(ptr, UP_flags)					\
({									\
	pcpu_spin_unlock(lock, ptr);					\
	pcp_trylock_finish(UP_flags);					\
})

#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
@@ -2887,13 +2888,10 @@ static bool free_frozen_page_commit(struct zone *zone,
		if (to_free == 0 || pcp->count == 0)
			break;

		pcp_spin_unlock(pcp);
		pcp_trylock_finish(*UP_flags);
		pcp_spin_unlock(pcp, *UP_flags);

		pcp_trylock_prepare(*UP_flags);
		pcp = pcp_spin_trylock(zone->per_cpu_pageset);
		pcp = pcp_spin_trylock(zone->per_cpu_pageset, *UP_flags);
		if (!pcp) {
			pcp_trylock_finish(*UP_flags);
			ret = false;
			break;
		}
@@ -2904,8 +2902,7 @@ static bool free_frozen_page_commit(struct zone *zone,
		 * returned in an unlocked state.
		 */
		if (smp_processor_id() != cpu) {
			pcp_spin_unlock(pcp);
			pcp_trylock_finish(*UP_flags);
			pcp_spin_unlock(pcp, *UP_flags);
			ret = false;
			break;
		}
@@ -2937,7 +2934,7 @@ static bool free_frozen_page_commit(struct zone *zone,
static void __free_frozen_pages(struct page *page, unsigned int order,
				fpi_t fpi_flags)
{
	unsigned long __maybe_unused UP_flags;
	unsigned long UP_flags;
	struct per_cpu_pages *pcp;
	struct zone *zone;
	unsigned long pfn = page_to_pfn(page);
@@ -2973,17 +2970,15 @@ static void __free_frozen_pages(struct page *page, unsigned int order,
		add_page_to_zone_llist(zone, page, order);
		return;
	}
	pcp_trylock_prepare(UP_flags);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
	if (pcp) {
		if (!free_frozen_page_commit(zone, pcp, page, migratetype,
						order, fpi_flags, &UP_flags))
			return;
		pcp_spin_unlock(pcp);
		pcp_spin_unlock(pcp, UP_flags);
	} else {
		free_one_page(zone, page, pfn, order, fpi_flags);
	}
	pcp_trylock_finish(UP_flags);
}

void free_frozen_pages(struct page *page, unsigned int order)
@@ -2996,7 +2991,7 @@ void free_frozen_pages(struct page *page, unsigned int order)
 */
void free_unref_folios(struct folio_batch *folios)
{
	unsigned long __maybe_unused UP_flags;
	unsigned long UP_flags;
	struct per_cpu_pages *pcp = NULL;
	struct zone *locked_zone = NULL;
	int i, j;
@@ -3039,8 +3034,7 @@ void free_unref_folios(struct folio_batch *folios)
		if (zone != locked_zone ||
		    is_migrate_isolate(migratetype)) {
			if (pcp) {
				pcp_spin_unlock(pcp);
				pcp_trylock_finish(UP_flags);
				pcp_spin_unlock(pcp, UP_flags);
				locked_zone = NULL;
				pcp = NULL;
			}
@@ -3059,10 +3053,8 @@ void free_unref_folios(struct folio_batch *folios)
			 * trylock is necessary as folios may be getting freed
			 * from IRQ or SoftIRQ context after an IO completion.
			 */
			pcp_trylock_prepare(UP_flags);
			pcp = pcp_spin_trylock(zone->per_cpu_pageset);
			pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
			if (unlikely(!pcp)) {
				pcp_trylock_finish(UP_flags);
				free_one_page(zone, &folio->page, pfn,
					      order, FPI_NONE);
				continue;
@@ -3085,10 +3077,8 @@ void free_unref_folios(struct folio_batch *folios)
		}
	}

	if (pcp) {
		pcp_spin_unlock(pcp);
		pcp_trylock_finish(UP_flags);
	}
	if (pcp)
		pcp_spin_unlock(pcp, UP_flags);
	folio_batch_reinit(folios);
}

@@ -3339,15 +3329,12 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
	struct per_cpu_pages *pcp;
	struct list_head *list;
	struct page *page;
	unsigned long __maybe_unused UP_flags;
	unsigned long UP_flags;

	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
	pcp_trylock_prepare(UP_flags);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
	if (!pcp) {
		pcp_trylock_finish(UP_flags);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
	if (!pcp)
		return NULL;
	}

	/*
	 * On allocation, reduce the number of pages that are batch freed.
@@ -3357,8 +3344,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
	pcp->free_count >>= 1;
	list = &pcp->lists[order_to_pindex(migratetype, order)];
	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
	pcp_spin_unlock(pcp);
	pcp_trylock_finish(UP_flags);
	pcp_spin_unlock(pcp, UP_flags);
	if (page) {
		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
		zone_statistics(preferred_zone, zone, 1);
@@ -5045,7 +5031,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
			struct page **page_array)
{
	struct page *page;
	unsigned long __maybe_unused UP_flags;
	unsigned long UP_flags;
	struct zone *zone;
	struct zoneref *z;
	struct per_cpu_pages *pcp;
@@ -5139,10 +5125,9 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
		goto failed;

	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
	pcp_trylock_prepare(UP_flags);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
	pcp = pcp_spin_trylock(zone->per_cpu_pageset, UP_flags);
	if (!pcp)
		goto failed_irq;
		goto failed;

	/* Attempt the batch allocation */
	pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
@@ -5159,8 +5144,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
		if (unlikely(!page)) {
			/* Try and allocate at least one page */
			if (!nr_account) {
				pcp_spin_unlock(pcp);
				goto failed_irq;
				pcp_spin_unlock(pcp, UP_flags);
				goto failed;
			}
			break;
		}
@@ -5171,8 +5156,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
		page_array[nr_populated++] = page;
	}

	pcp_spin_unlock(pcp);
	pcp_trylock_finish(UP_flags);
	pcp_spin_unlock(pcp, UP_flags);

	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
	zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
@@ -5180,9 +5164,6 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
out:
	return nr_populated;

failed_irq:
	pcp_trylock_finish(UP_flags);

failed:
	page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
	if (page)