Commit 4522aed4 authored by Kairui Song's avatar Kairui Song Committed by Andrew Morton
Browse files

mm, swap: rename and move some swap cluster definition and helpers

No feature change, move cluster related definitions and helpers to
mm/swap.h, also tidy up and add a "swap_" prefix for cluster lock/unlock
helpers, so they can be used outside of swap files.  And while at it, add
kerneldoc.

Link: https://lkml.kernel.org/r/20250916160100.31545-7-ryncsn@gmail.com


Signed-off-by: default avatarKairui Song <kasong@tencent.com>
Reviewed-by: default avatarBaolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: default avatarBarry Song <baohua@kernel.org>
Acked-by: default avatarChris Li <chrisl@kernel.org>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Suggested-by: default avatarChris Li <chrisl@kernel.org>
Acked-by: default avatarNhat Pham <nphamcs@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: kernel test robot <oliver.sang@intel.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent ae38eb21
Loading
Loading
Loading
Loading
+0 −34
Original line number Diff line number Diff line
@@ -235,40 +235,6 @@ enum {
/* Special value in each swap_map continuation */
#define SWAP_CONT_MAX	0x7f	/* Max count */

/*
 * We use this to track usage of a cluster. A cluster is a block of swap disk
 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
 * free clusters are organized into a list. We fetch an entry from the list to
 * get a free cluster.
 *
 * The flags field determines if a cluster is free. This is
 * protected by cluster lock.
 */
struct swap_cluster_info {
	spinlock_t lock;	/*
				 * Protect swap_cluster_info fields
				 * other than list, and swap_info_struct->swap_map
				 * elements corresponding to the swap cluster.
				 */
	u16 count;
	u8 flags;
	u8 order;
	struct list_head list;
};

/* All on-list cluster must have a non-zero flag. */
enum swap_cluster_flags {
	CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
	CLUSTER_FLAG_FREE,
	CLUSTER_FLAG_NONFULL,
	CLUSTER_FLAG_FRAG,
	/* Clusters with flags above are allocatable */
	CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
	CLUSTER_FLAG_FULL,
	CLUSTER_FLAG_DISCARD,
	CLUSTER_FLAG_MAX,
};

/*
 * The first page in the swap file is the swap header, which is always marked
 * bad to prevent it from being allocated as an entry. This also prevents the
+70 −0
Original line number Diff line number Diff line
@@ -7,10 +7,80 @@ struct swap_iocb;

extern int page_cluster;

#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER	HPAGE_PMD_NR
#define swap_entry_order(order)	(order)
#else
#define SWAPFILE_CLUSTER	256
#define swap_entry_order(order)	0
#endif

/*
 * We use this to track usage of a cluster. A cluster is a block of swap disk
 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
 * free clusters are organized into a list. We fetch an entry from the list to
 * get a free cluster.
 *
 * The flags field determines if a cluster is free. This is
 * protected by cluster lock.
 */
struct swap_cluster_info {
	spinlock_t lock;	/*
				 * Protect swap_cluster_info fields
				 * other than list, and swap_info_struct->swap_map
				 * elements corresponding to the swap cluster.
				 */
	u16 count;
	u8 flags;
	u8 order;
	struct list_head list;
};

/* All on-list cluster must have a non-zero flag. */
enum swap_cluster_flags {
	CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
	CLUSTER_FLAG_FREE,
	CLUSTER_FLAG_NONFULL,
	CLUSTER_FLAG_FRAG,
	/* Clusters with flags above are allocatable */
	CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
	CLUSTER_FLAG_FULL,
	CLUSTER_FLAG_DISCARD,
	CLUSTER_FLAG_MAX,
};

#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */

static inline struct swap_cluster_info *swp_offset_cluster(
		struct swap_info_struct *si, pgoff_t offset)
{
	return &si->cluster_info[offset / SWAPFILE_CLUSTER];
}

/**
 * swap_cluster_lock - Lock and return the swap cluster of given offset.
 * @si: swap device the cluster belongs to.
 * @offset: the swap entry offset, pointing to a valid slot.
 *
 * Context: The caller must ensure the offset is in the valid range and
 * protect the swap device with reference count or locks.
 */
static inline struct swap_cluster_info *swap_cluster_lock(
		struct swap_info_struct *si, unsigned long offset)
{
	struct swap_cluster_info *ci = swp_offset_cluster(si, offset);

	spin_lock(&ci->lock);
	return ci;
}

static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
{
	spin_unlock(&ci->lock);
}

/* linux/mm/page_io.c */
int sio_pool_init(void);
struct swap_iocb;
+29 −68
Original line number Diff line number Diff line
@@ -58,9 +58,6 @@ static void swap_entries_free(struct swap_info_struct *si,
static void swap_range_alloc(struct swap_info_struct *si,
			     unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
					      unsigned long offset);
static inline void unlock_cluster(struct swap_cluster_info *ci);

static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -258,9 +255,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
	 * swap_map is HAS_CACHE only, which means the slots have no page table
	 * reference or pending writeback, and can't be allocated to others.
	 */
	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
	need_reclaim = swap_only_has_cache(si, offset, nr_pages);
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	if (!need_reclaim)
		goto out_unlock;

@@ -385,19 +382,6 @@ static void discard_swap_cluster(struct swap_info_struct *si,
	}
}

#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER	HPAGE_PMD_NR

#define swap_entry_order(order)	(order)
#else
#define SWAPFILE_CLUSTER	256

/*
 * Define swap_entry_order() as constant to let compiler to optimize
 * out some code if !CONFIG_THP_SWAP
 */
#define swap_entry_order(order)	0
#endif
#define LATENCY_LIMIT		256

static inline bool cluster_is_empty(struct swap_cluster_info *info)
@@ -425,34 +409,12 @@ static inline unsigned int cluster_index(struct swap_info_struct *si,
	return ci - si->cluster_info;
}

static inline struct swap_cluster_info *offset_to_cluster(struct swap_info_struct *si,
							  unsigned long offset)
{
	return &si->cluster_info[offset / SWAPFILE_CLUSTER];
}

static inline unsigned int cluster_offset(struct swap_info_struct *si,
					  struct swap_cluster_info *ci)
{
	return cluster_index(si, ci) * SWAPFILE_CLUSTER;
}

static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
						     unsigned long offset)
{
	struct swap_cluster_info *ci;

	ci = offset_to_cluster(si, offset);
	spin_lock(&ci->lock);

	return ci;
}

static inline void unlock_cluster(struct swap_cluster_info *ci)
{
	spin_unlock(&ci->lock);
}

static void move_cluster(struct swap_info_struct *si,
			 struct swap_cluster_info *ci, struct list_head *list,
			 enum swap_cluster_flags new_flags)
@@ -808,7 +770,7 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
	}
out:
	relocate_cluster(si, ci);
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	if (si->flags & SWP_SOLIDSTATE) {
		this_cpu_write(percpu_swap_cluster.offset[order], next);
		this_cpu_write(percpu_swap_cluster.si[order], si);
@@ -875,7 +837,7 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
		if (ci->flags == CLUSTER_FLAG_NONE)
			relocate_cluster(si, ci);

		unlock_cluster(ci);
		swap_cluster_unlock(ci);
		if (to_scan <= 0)
			break;
	}
@@ -914,7 +876,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
		if (offset == SWAP_ENTRY_INVALID)
			goto new_cluster;

		ci = lock_cluster(si, offset);
		ci = swap_cluster_lock(si, offset);
		/* Cluster could have been used by another order */
		if (cluster_is_usable(ci, order)) {
			if (cluster_is_empty(ci))
@@ -922,7 +884,7 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o
			found = alloc_swap_scan_cluster(si, ci, offset,
							order, usage);
		} else {
			unlock_cluster(ci);
			swap_cluster_unlock(ci);
		}
		if (found)
			goto done;
@@ -1203,7 +1165,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
	if (!si || !offset || !get_swap_device_info(si))
		return false;

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
	if (cluster_is_usable(ci, order)) {
		if (cluster_is_empty(ci))
			offset = cluster_offset(si, ci);
@@ -1211,7 +1173,7 @@ static bool swap_alloc_fast(swp_entry_t *entry,
		if (found)
			*entry = swp_entry(si->type, found);
	} else {
		unlock_cluster(ci);
		swap_cluster_unlock(ci);
	}

	put_swap_device(si);
@@ -1479,14 +1441,14 @@ static void swap_entries_put_cache(struct swap_info_struct *si,
	unsigned long offset = swp_offset(entry);
	struct swap_cluster_info *ci;

	ci = lock_cluster(si, offset);
	if (swap_only_has_cache(si, offset, nr))
	ci = swap_cluster_lock(si, offset);
	if (swap_only_has_cache(si, offset, nr)) {
		swap_entries_free(si, ci, entry, nr);
	else {
	} else {
		for (int i = 0; i < nr; i++, entry.val++)
			swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
	}
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
}

static bool swap_entries_put_map(struct swap_info_struct *si,
@@ -1504,7 +1466,7 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
	if (count != 1 && count != SWAP_MAP_SHMEM)
		goto fallback;

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
	if (!swap_is_last_map(si, offset, nr, &has_cache)) {
		goto locked_fallback;
	}
@@ -1513,21 +1475,20 @@ static bool swap_entries_put_map(struct swap_info_struct *si,
	else
		for (i = 0; i < nr; i++)
			WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
	unlock_cluster(ci);
	swap_cluster_unlock(ci);

	return has_cache;

fallback:
	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
locked_fallback:
	for (i = 0; i < nr; i++, entry.val++) {
		count = swap_entry_put_locked(si, ci, entry, 1);
		if (count == SWAP_HAS_CACHE)
			has_cache = true;
	}
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	return has_cache;

}

/*
@@ -1577,7 +1538,7 @@ static void swap_entries_free(struct swap_info_struct *si,
	unsigned char *map_end = map + nr_pages;

	/* It should never free entries across different clusters */
	VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1));
	VM_BUG_ON(ci != swp_offset_cluster(si, offset + nr_pages - 1));
	VM_BUG_ON(cluster_is_empty(ci));
	VM_BUG_ON(ci->count < nr_pages);

@@ -1652,9 +1613,9 @@ bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
	struct swap_cluster_info *ci;
	int count;

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
	count = swap_count(si->swap_map[offset]);
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	return !!count;
}

@@ -1677,7 +1638,7 @@ int swp_swapcount(swp_entry_t entry)

	offset = swp_offset(entry);

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);

	count = swap_count(si->swap_map[offset]);
	if (!(count & COUNT_CONTINUED))
@@ -1700,7 +1661,7 @@ int swp_swapcount(swp_entry_t entry)
		n *= (SWAP_CONT_MAX + 1);
	} while (tmp_count & COUNT_CONTINUED);
out:
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	return count;
}

@@ -1715,7 +1676,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
	int i;
	bool ret = false;

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);
	if (nr_pages == 1) {
		if (swap_count(map[roffset]))
			ret = true;
@@ -1728,7 +1689,7 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
		}
	}
unlock_out:
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	return ret;
}

@@ -2662,8 +2623,8 @@ static void wait_for_allocation(struct swap_info_struct *si)
	BUG_ON(si->flags & SWP_WRITEOK);

	for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) {
		ci = lock_cluster(si, offset);
		unlock_cluster(ci);
		ci = swap_cluster_lock(si, offset);
		swap_cluster_unlock(ci);
	}
}

@@ -3579,7 +3540,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
	offset = swp_offset(entry);
	VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER);
	VM_WARN_ON(usage == 1 && nr > 1);
	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);

	err = 0;
	for (i = 0; i < nr; i++) {
@@ -3634,7 +3595,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr)
	}

unlock_out:
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	return err;
}

@@ -3733,7 +3694,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)

	offset = swp_offset(entry);

	ci = lock_cluster(si, offset);
	ci = swap_cluster_lock(si, offset);

	count = swap_count(si->swap_map[offset]);

@@ -3793,7 +3754,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
out_unlock_cont:
	spin_unlock(&si->cont_lock);
out:
	unlock_cluster(ci);
	swap_cluster_unlock(ci);
	put_swap_device(si);
outer:
	if (page)