Commit de85024b authored by Kairui Song's avatar Kairui Song Committed by Andrew Morton
Browse files

mm, swap: remove workaround for unsynchronized swap map cache state

Remove the "skip if exists" check from commit a65b0e76 ("zswap: make
shrinking memcg-aware").  It was needed because there is a tiny time
window between setting the SWAP_HAS_CACHE bit and actually adding the
folio to the swap cache.  If a user is trying to add the folio into the
swap cache but another user was interrupted after setting SWAP_HAS_CACHE
but hasn't added the folio to the swap cache yet, it might lead to a
deadlock.

We have moved the bit setting to the same critical section as adding the
folio, so this is no longer needed.  Remove it and clean it up.

Link: https://lkml.kernel.org/r/20251220-swap-table-p2-v5-13-8862a265a033@tencent.com


Signed-off-by: default avatarKairui Song <kasong@tencent.com>
Reviewed-by: default avatarBaoquan He <bhe@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: Deepanshu Kartikey <kartikey406@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 2732acda
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -260,7 +260,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
void swap_cache_del_folio(struct folio *folio);
struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
				     struct mempolicy *mpol, pgoff_t ilx,
				     bool *alloced, bool skip_if_exists);
				     bool *alloced);
/* Below helpers require the caller to lock and pass in the swap cluster. */
void __swap_cache_del_folio(struct swap_cluster_info *ci,
			    struct folio *folio, swp_entry_t entry, void *shadow);
+10 −17
Original line number Diff line number Diff line
@@ -444,8 +444,6 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
 * @folio: folio to be added.
 * @gfp: memory allocation flags for charge, can be 0 if @charged if true.
 * @charged: if the folio is already charged.
 * @skip_if_exists: if the slot is in a cached state, return NULL.
 *                  This is an old workaround that will be removed shortly.
 *
 * Update the swap_map and add folio as swap cache, typically before swapin.
 * All swap slots covered by the folio must have a non-zero swap count.
@@ -456,8 +454,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
 */
static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
						  struct folio *folio,
						  gfp_t gfp, bool charged,
						  bool skip_if_exists)
						  gfp_t gfp, bool charged)
{
	struct folio *swapcache = NULL;
	void *shadow;
@@ -477,7 +474,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
		 * might return a folio that is irrelevant to the faulting
		 * entry because @entry is aligned down. Just return NULL.
		 */
		if (ret != -EEXIST || skip_if_exists || folio_test_large(folio))
		if (ret != -EEXIST || folio_test_large(folio))
			goto failed;

		swapcache = swap_cache_get_folio(entry);
@@ -510,8 +507,6 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
 * @mpol: NUMA memory allocation policy to be applied
 * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
 * @new_page_allocated: sets true if allocation happened, false otherwise
 * @skip_if_exists: if the slot is a partially cached state, return NULL.
 *                  This is a workaround that would be removed shortly.
 *
 * Allocate a folio in the swap cache for one swap slot, typically before
 * doing IO (e.g. swap in or zswap writeback). The swap slot indicated by
@@ -524,8 +519,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
 */
struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
				     struct mempolicy *mpol, pgoff_t ilx,
				     bool *new_page_allocated,
				     bool skip_if_exists)
				     bool *new_page_allocated)
{
	struct swap_info_struct *si = __swap_entry_to_info(entry);
	struct folio *folio;
@@ -546,8 +540,7 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
	if (!folio)
		return NULL;
	/* Try add the new folio, returns existing folio or NULL on failure. */
	result = __swap_cache_prepare_and_add(entry, folio, gfp_mask,
					      false, skip_if_exists);
	result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
	if (result == folio)
		*new_page_allocated = true;
	else
@@ -576,7 +569,7 @@ struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
	unsigned long nr_pages = folio_nr_pages(folio);

	entry = swp_entry(swp_type(entry), round_down(offset, nr_pages));
	swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true, false);
	swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true);
	if (swapcache == folio)
		swap_read_folio(folio, NULL);
	return swapcache;
@@ -604,7 +597,7 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,

	mpol = get_vma_policy(vma, addr, 0, &ilx);
	folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
					&page_allocated, false);
				       &page_allocated);
	mpol_cond_put(mpol);

	if (page_allocated)
@@ -723,7 +716,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
		/* Ok, do the async read-ahead now */
		folio = swap_cache_alloc_folio(
			swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx,
			&page_allocated, false);
			&page_allocated);
		if (!folio)
			continue;
		if (page_allocated) {
@@ -741,7 +734,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
skip:
	/* The page was likely read above, so no need for plugging here */
	folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
					&page_allocated, false);
				       &page_allocated);
	if (unlikely(page_allocated))
		swap_read_folio(folio, NULL);
	return folio;
@@ -846,7 +839,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
				continue;
		}
		folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
						&page_allocated, false);
					       &page_allocated);
		if (si)
			put_swap_device(si);
		if (!folio)
@@ -868,7 +861,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
skip:
	/* The folio was likely read above, so no need for plugging here */
	folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx,
					&page_allocated, false);
				       &page_allocated);
	if (unlikely(page_allocated))
		swap_read_folio(folio, NULL);
	return folio;
+1 −1
Original line number Diff line number Diff line
@@ -1014,7 +1014,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,

	mpol = get_task_policy(current);
	folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol,
				       NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
				       NO_INTERLEAVE_INDEX, &folio_was_allocated);
	put_swap_device(si);
	if (!folio)
		return -ENOMEM;