Commit c2a967f6 authored by Yu Zhao's avatar Yu Zhao Committed by Andrew Morton
Browse files

mm/hugetlb_vmemmap: don't synchronize_rcu() without HVO

hugetlb_vmemmap_optimize_folio() and hugetlb_vmemmap_restore_folio() are
wrappers meant to be called regardless of whether HVO is enabled. 
Therefore, they should not call synchronize_rcu().  Otherwise, it
regresses use cases not enabling HVO.

So move synchronize_rcu() to __hugetlb_vmemmap_optimize_folio() and
__hugetlb_vmemmap_restore_folio(), and call it once for each batch of
folios when HVO is enabled.

Link: https://lkml.kernel.org/r/20240719042503.2752316-1-yuzhao@google.com


Fixes: bd225530 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers")
Signed-off-by: default avatarYu Zhao <yuzhao@google.com>
Reported-by: default avatarkernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202407091001.1250ad4a-oliver.sang@intel.com


Reported-by: default avatarJanosch Frank <frankja@linux.ibm.com>
Tested-by: default avatarMarc Hartmayer <mhartmay@linux.ibm.com>
Acked-by: default avatarMuchun Song <muchun.song@linux.dev>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 9eace7e8
Loading
Loading
Loading
Loading
+20 −20
Original line number Diff line number Diff line
@@ -43,6 +43,8 @@ struct vmemmap_remap_walk {
#define VMEMMAP_SPLIT_NO_TLB_FLUSH	BIT(0)
/* Skip the TLB flush when we remap the PTE */
#define VMEMMAP_REMAP_NO_TLB_FLUSH	BIT(1)
/* synchronize_rcu() to avoid writes from page_ref_add_unless() */
#define VMEMMAP_SYNCHRONIZE_RCU		BIT(2)
	unsigned long		flags;
};

@@ -457,6 +459,9 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
	if (!folio_test_hugetlb_vmemmap_optimized(folio))
		return 0;

	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
		synchronize_rcu();

	vmemmap_end	= vmemmap_start + hugetlb_vmemmap_size(h);
	vmemmap_reuse	= vmemmap_start;
	vmemmap_start	+= HUGETLB_VMEMMAP_RESERVE_SIZE;
@@ -489,10 +494,7 @@ static int __hugetlb_vmemmap_restore_folio(const struct hstate *h,
 */
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
{
	/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
	synchronize_rcu();

	return __hugetlb_vmemmap_restore_folio(h, folio, 0);
	return __hugetlb_vmemmap_restore_folio(h, folio, VMEMMAP_SYNCHRONIZE_RCU);
}

/**
@@ -515,14 +517,14 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
	struct folio *folio, *t_folio;
	long restored = 0;
	long ret = 0;

	/* avoid writes from page_ref_add_unless() while unfolding vmemmap */
	synchronize_rcu();
	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;

	list_for_each_entry_safe(folio, t_folio, folio_list, lru) {
		if (folio_test_hugetlb_vmemmap_optimized(folio)) {
			ret = __hugetlb_vmemmap_restore_folio(h, folio,
							      VMEMMAP_REMAP_NO_TLB_FLUSH);
			ret = __hugetlb_vmemmap_restore_folio(h, folio, flags);
			/* only need to synchronize_rcu() once for each batch */
			flags &= ~VMEMMAP_SYNCHRONIZE_RCU;

			if (ret)
				break;
			restored++;
@@ -570,6 +572,9 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
		return ret;

	static_branch_inc(&hugetlb_optimize_vmemmap_key);

	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
		synchronize_rcu();
	/*
	 * Very Subtle
	 * If VMEMMAP_REMAP_NO_TLB_FLUSH is set, TLB flushing is not performed
@@ -617,10 +622,7 @@ void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
{
	LIST_HEAD(vmemmap_pages);

	/* avoid writes from page_ref_add_unless() while folding vmemmap */
	synchronize_rcu();

	__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);
	__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, VMEMMAP_SYNCHRONIZE_RCU);
	free_vmemmap_page_list(&vmemmap_pages);
}

@@ -647,6 +649,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
{
	struct folio *folio;
	LIST_HEAD(vmemmap_pages);
	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;

	list_for_each_entry(folio, folio_list, lru) {
		int ret = hugetlb_vmemmap_split_folio(h, folio);
@@ -663,14 +666,12 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l

	flush_tlb_all();

	/* avoid writes from page_ref_add_unless() while folding vmemmap */
	synchronize_rcu();

	list_for_each_entry(folio, folio_list, lru) {
		int ret;

		ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages,
						       VMEMMAP_REMAP_NO_TLB_FLUSH);
		ret = __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags);
		/* only need to synchronize_rcu() once for each batch */
		flags &= ~VMEMMAP_SYNCHRONIZE_RCU;

		/*
		 * Pages to be freed may have been accumulated.  If we
@@ -684,8 +685,7 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
			flush_tlb_all();
			free_vmemmap_page_list(&vmemmap_pages);
			INIT_LIST_HEAD(&vmemmap_pages);
			__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages,
							 VMEMMAP_REMAP_NO_TLB_FLUSH);
			__hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, flags);
		}
	}