Commit 4984d746 authored by Kairui Song's avatar Kairui Song Committed by Andrew Morton
Browse files

mm, swap: check swap table directly for checking cache

Instead of looking at the swap map, check swap table directly to tell if a
swap slot is cached.  Prepares for the removal of SWAP_HAS_CACHE.

Link: https://lkml.kernel.org/r/20251220-swap-table-p2-v5-16-8862a265a033@tencent.com


Signed-off-by: default avatarKairui Song <kasong@tencent.com>
Reviewed-by: default avatarBaoquan He <bhe@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: Deepanshu Kartikey <kartikey406@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 270f0951
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ void __swapcache_clear_cached(struct swap_info_struct *si,
 *   swap entries in the page table, similar to locking swap cache folio.
 * - See the comment of get_swap_device() for more complex usage.
 */
bool swap_cache_has_folio(swp_entry_t entry);
struct folio *swap_cache_get_folio(swp_entry_t entry);
void *swap_cache_get_shadow(swp_entry_t entry);
void swap_cache_del_folio(struct folio *folio);
@@ -335,8 +336,6 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr,

static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
{
	struct swap_info_struct *si = __swap_entry_to_info(entry);
	pgoff_t offset = swp_offset(entry);
	int i;

	/*
@@ -345,8 +344,9 @@ static inline int non_swapcache_batch(swp_entry_t entry, int max_nr)
	 * be in conflict with the folio in swap cache.
	 */
	for (i = 0; i < max_nr; i++) {
		if ((si->swap_map[offset + i] & SWAP_HAS_CACHE))
		if (swap_cache_has_folio(entry))
			return i;
		entry.val++;
	}

	return i;
@@ -449,6 +449,11 @@ static inline int swap_writeout(struct folio *folio,
	return 0;
}

static inline bool swap_cache_has_folio(swp_entry_t entry)
{
	return false;
}

static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
{
	return NULL;
+16 −0
Original line number Diff line number Diff line
@@ -102,6 +102,22 @@ struct folio *swap_cache_get_folio(swp_entry_t entry)
	return NULL;
}

/**
 * swap_cache_has_folio - Check if a swap slot has cache.
 * @entry: swap entry indicating the slot.
 *
 * Context: Caller must ensure @entry is valid and protect the swap
 * device with reference count or locks.
 */
bool swap_cache_has_folio(swp_entry_t entry)
{
	unsigned long swp_tb;

	swp_tb = swap_table_get(__swap_entry_to_cluster(entry),
				swp_cluster_offset(entry));
	return swp_tb_is_folio(swp_tb);
}

/**
 * swap_cache_get_shadow - Looks up a shadow in the swap cache.
 * @entry: swap entry used for the lookup.
+29 −26
Original line number Diff line number Diff line
@@ -792,23 +792,18 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
	unsigned int nr_pages = 1 << order;
	unsigned long offset = start, end = start + nr_pages;
	unsigned char *map = si->swap_map;
	int nr_reclaim;
	unsigned long swp_tb;

	spin_unlock(&ci->lock);
	do {
		switch (READ_ONCE(map[offset])) {
		case 0:
		if (swap_count(READ_ONCE(map[offset])))
			break;
		case SWAP_HAS_CACHE:
			nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
			if (nr_reclaim < 0)
				goto out;
		swp_tb = swap_table_get(ci, offset % SWAPFILE_CLUSTER);
		if (swp_tb_is_folio(swp_tb)) {
			if (__try_to_reclaim_swap(si, offset, TTRS_ANYWAY) < 0)
				break;
		default:
			goto out;
		}
	} while (++offset < end);
out:
	spin_lock(&ci->lock);

	/*
@@ -829,37 +824,41 @@ static bool cluster_reclaim_range(struct swap_info_struct *si,
	 * Recheck the range no matter reclaim succeeded or not, the slot
	 * could have been be freed while we are not holding the lock.
	 */
	for (offset = start; offset < end; offset++)
		if (READ_ONCE(map[offset]))
	for (offset = start; offset < end; offset++) {
		swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
		if (swap_count(map[offset]) || !swp_tb_is_null(swp_tb))
			return false;
	}

	return true;
}

static bool cluster_scan_range(struct swap_info_struct *si,
			       struct swap_cluster_info *ci,
			       unsigned long start, unsigned int nr_pages,
			       unsigned long offset, unsigned int nr_pages,
			       bool *need_reclaim)
{
	unsigned long offset, end = start + nr_pages;
	unsigned long end = offset + nr_pages;
	unsigned char *map = si->swap_map;
	unsigned long swp_tb;

	if (cluster_is_empty(ci))
		return true;

	for (offset = start; offset < end; offset++) {
		switch (READ_ONCE(map[offset])) {
		case 0:
			continue;
		case SWAP_HAS_CACHE:
	do {
		if (swap_count(map[offset]))
			return false;
		swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
		if (swp_tb_is_folio(swp_tb)) {
			WARN_ON_ONCE(!(map[offset] & SWAP_HAS_CACHE));
			if (!vm_swap_full())
				return false;
			*need_reclaim = true;
			continue;
		default:
			return false;
		}
		} else {
			/* A entry with no count and no cache must be null */
			VM_WARN_ON_ONCE(!swp_tb_is_null(swp_tb));
		}
	} while (++offset < end);

	return true;
}
@@ -1030,7 +1029,8 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
		to_scan--;

		while (offset < end) {
			if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) {
			if (!swap_count(READ_ONCE(map[offset])) &&
			    swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER))) {
				spin_unlock(&ci->lock);
				nr_reclaim = __try_to_reclaim_swap(si, offset,
								   TTRS_ANYWAY);
@@ -1981,6 +1981,7 @@ void swap_put_entries_direct(swp_entry_t entry, int nr)
	struct swap_info_struct *si;
	bool any_only_cache = false;
	unsigned long offset;
	unsigned long swp_tb;

	si = get_swap_device(entry);
	if (WARN_ON_ONCE(!si))
@@ -2005,7 +2006,9 @@ void swap_put_entries_direct(swp_entry_t entry, int nr)
	 */
	for (offset = start_offset; offset < end_offset; offset += nr) {
		nr = 1;
		if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
		swp_tb = swap_table_get(__swap_offset_to_cluster(si, offset),
					offset % SWAPFILE_CLUSTER);
		if (!swap_count(READ_ONCE(si->swap_map[offset])) && swp_tb_is_folio(swp_tb)) {
			/*
			 * Folios are always naturally aligned in swap so
			 * advance forward to the next boundary. Zero means no
+3 −7
Original line number Diff line number Diff line
@@ -1190,17 +1190,13 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
		 * Check if the swap entry is cached after acquiring the src_pte
		 * lock. Otherwise, we might miss a newly loaded swap cache folio.
		 *
		 * Check swap_map directly to minimize overhead, READ_ONCE is sufficient.
		 * We are trying to catch newly added swap cache, the only possible case is
		 * when a folio is swapped in and out again staying in swap cache, using the
		 * same entry before the PTE check above. The PTL is acquired and released
		 * twice, each time after updating the swap_map's flag. So holding
		 * the PTL here ensures we see the updated value. False positive is possible,
		 * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the
		 * cache, or during the tiny synchronization window between swap cache and
		 * swap_map, but it will be gone very quickly, worst result is retry jitters.
		 * twice, each time after updating the swap table. So holding
		 * the PTL here ensures we see the updated value.
		 */
		if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) {
		if (swap_cache_has_folio(entry)) {
			double_pt_unlock(dst_ptl, src_ptl);
			return -EAGAIN;
		}