Commit a148a204 authored by Jane Chu's avatar Jane Chu Committed by Andrew Morton
Browse files

mm/memory-failure: fix missing ->mf_stats count in hugetlb poison

When a newly poisoned subpage ends up in an already poisoned hugetlb
folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is
not.  Fix the inconsistency by designating action_result() to update them
both.

While at it, define __get_huge_page_for_hwpoison() return values in terms
of symbol names for better readibility.  Also rename
folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the
function does more than the conventional bit setting and the fact three
possible return values are expected.

Link: https://lkml.kernel.org/r/20260120232234.3462258-1-jane.chu@oracle.com


Fixes: 18f41fa6 ("mm: memory-failure: bump memory failure stats to pglist_data")
Signed-off-by: default avatarJane Chu <jane.chu@oracle.com>
Acked-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Cc: Chris Mason <clm@meta.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: William Roche <william.roche@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent a0f3c084
Loading
Loading
Loading
Loading
+56 −37
Original line number Diff line number Diff line
@@ -1883,12 +1883,22 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
	return count;
}

static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
#define	MF_HUGETLB_FREED		0	/* freed hugepage */
#define	MF_HUGETLB_IN_USED		1	/* in-use hugepage */
#define	MF_HUGETLB_NON_HUGEPAGE		2	/* not a hugepage */
#define	MF_HUGETLB_FOLIO_PRE_POISONED	3	/* folio already poisoned */
#define	MF_HUGETLB_PAGE_PRE_POISONED	4	/* exact page already poisoned */
#define	MF_HUGETLB_RETRY		5	/* hugepage is busy, retry */
/*
 * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
 * to keep track of the poisoned pages.
 */
static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
{
	struct llist_head *head;
	struct raw_hwp_page *raw_hwp;
	struct raw_hwp_page *p;
	int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
	int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0;

	/*
	 * Once the hwpoison hugepage has lost reliable raw error info,
@@ -1896,20 +1906,17 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
	 * so skip to add additional raw error info.
	 */
	if (folio_test_hugetlb_raw_hwp_unreliable(folio))
		return -EHWPOISON;
		return MF_HUGETLB_FOLIO_PRE_POISONED;
	head = raw_hwp_list_head(folio);
	llist_for_each_entry(p, head->first, node) {
		if (p->page == page)
			return -EHWPOISON;
			return MF_HUGETLB_PAGE_PRE_POISONED;
	}

	raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
	if (raw_hwp) {
		raw_hwp->page = page;
		llist_add(&raw_hwp->node, head);
		/* the first error event will be counted in action_result(). */
		if (ret)
			num_poisoned_pages_inc(page_to_pfn(page));
	} else {
		/*
		 * Failed to save raw error info.  We no longer trace all
@@ -1957,42 +1964,39 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)

/*
 * Called from hugetlb code with hugetlb_lock held.
 *
 * Return values:
 *   0             - free hugepage
 *   1             - in-use hugepage
 *   2             - not a hugepage
 *   -EBUSY        - the hugepage is busy (try to retry)
 *   -EHWPOISON    - the hugepage is already hwpoisoned
 */
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
				 bool *migratable_cleared)
{
	struct page *page = pfn_to_page(pfn);
	struct folio *folio = page_folio(page);
	int ret = 2;	/* fallback to normal page handling */
	bool count_increased = false;
	int ret, rc;

	if (!folio_test_hugetlb(folio))
	if (!folio_test_hugetlb(folio)) {
		ret = MF_HUGETLB_NON_HUGEPAGE;
		goto out;

	if (flags & MF_COUNT_INCREASED) {
		ret = 1;
	} else if (flags & MF_COUNT_INCREASED) {
		ret = MF_HUGETLB_IN_USED;
		count_increased = true;
	} else if (folio_test_hugetlb_freed(folio)) {
		ret = 0;
		ret = MF_HUGETLB_FREED;
	} else if (folio_test_hugetlb_migratable(folio)) {
		ret = folio_try_get(folio);
		if (ret)
		if (folio_try_get(folio)) {
			ret = MF_HUGETLB_IN_USED;
			count_increased = true;
		} else {
		ret = -EBUSY;
			ret = MF_HUGETLB_FREED;
		}
	} else {
		ret = MF_HUGETLB_RETRY;
		if (!(flags & MF_NO_RETRY))
			goto out;
	}

	if (folio_set_hugetlb_hwpoison(folio, page)) {
		ret = -EHWPOISON;
	rc = hugetlb_update_hwpoison(folio, page);
	if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
		ret = rc;
		goto out;
	}

@@ -2017,10 +2021,16 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 * with basic operations like hugepage allocation/free/demotion.
 * So some of prechecks for hwpoison (pinning, and testing/setting
 * PageHWPoison) should be done in single hugetlb_lock range.
 * Returns:
 *	0		- not hugetlb, or recovered
 *	-EBUSY		- not recovered
 *	-EOPNOTSUPP	- hwpoison_filter'ed
 *	-EHWPOISON	- folio or exact page already poisoned
 *	-EFAULT		- kill_accessing_process finds current->mm null
 */
static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
{
	int res;
	int res, rv;
	struct page *p = pfn_to_page(pfn);
	struct folio *folio;
	unsigned long page_flags;
@@ -2029,22 +2039,31 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
	*hugetlb = 1;
retry:
	res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
	if (res == 2) { /* fallback to normal page handling */
	switch (res) {
	case MF_HUGETLB_NON_HUGEPAGE:	/* fallback to normal page handling */
		*hugetlb = 0;
		return 0;
	} else if (res == -EHWPOISON) {
		if (flags & MF_ACTION_REQUIRED) {
			folio = page_folio(p);
			res = kill_accessing_process(current, folio_pfn(folio), flags);
		}
		action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
		return res;
	} else if (res == -EBUSY) {
	case MF_HUGETLB_RETRY:
		if (!(flags & MF_NO_RETRY)) {
			flags |= MF_NO_RETRY;
			goto retry;
		}
		return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
	case MF_HUGETLB_FOLIO_PRE_POISONED:
	case MF_HUGETLB_PAGE_PRE_POISONED:
		rv = -EHWPOISON;
		if (flags & MF_ACTION_REQUIRED) {
			folio = page_folio(p);
			rv = kill_accessing_process(current, folio_pfn(folio), flags);
		}
		if (res == MF_HUGETLB_PAGE_PRE_POISONED)
			action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
		else
			action_result(pfn, MF_MSG_HUGE, MF_FAILED);
		return rv;
	default:
		WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED));
		break;
	}

	folio = page_folio(p);
@@ -2055,7 +2074,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
		if (migratable_cleared)
			folio_set_hugetlb_migratable(folio);
		folio_unlock(folio);
		if (res == 1)
		if (res == MF_HUGETLB_IN_USED)
			folio_put(folio);
		return -EOPNOTSUPP;
	}
@@ -2064,7 +2083,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
	 * Handling free hugepage.  The possible race with hugepage allocation
	 * or demotion can be prevented by PageHWPoison flag.
	 */
	if (res == 0) {
	if (res == MF_HUGETLB_FREED) {
		folio_unlock(folio);
		if (__page_handle_poison(p) > 0) {
			page_ref_inc(p);