Commit 7c53dfbd authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton
Browse files

mm: add PTE_MARKER_GUARD PTE marker

Add a new PTE marker that results in any access causing the accessing
process to segfault.

This is preferable to PTE_MARKER_POISONED, which results in the same
handling as hardware poisoned memory, and is thus undesirable for cases
where we simply wish to 'soft' poison a range.

This is in preparation for implementing the ability to specify guard pages
at the page table level, i.e.  ranges that, when accessed, should cause
process termination.

Additionally, rename zap_drop_file_uffd_wp() to zap_drop_markers() - the
function checks the ZAP_FLAG_DROP_MARKER flag so naming it for this single
purpose was simply incorrect.

We then reuse the same logic to determine whether a zap should clear a
guard entry - this should only be performed on teardown and never on
MADV_DONTNEED or MADV_FREE.

We additionally add a WARN_ON_ONCE() in hugetlb logic should a guard
marker be encountered there, as we explicitly do not support this
operation and this should not occur.

Link: https://lkml.kernel.org/r/f47f3d5acca2dcf9bbf655b6d33f3dc713e4a4a0.1730123433.git.lorenzo.stoakes@oracle.com


Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: default avatarVlastimil Babka <vbabkba@suse.cz>
Suggested-by: default avatarVlastimil Babka <vbabka@suse.cz>
Suggested-by: default avatarJann Horn <jannh@google.com>
Suggested-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 5f6170a4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -544,7 +544,7 @@ static inline pte_marker copy_pte_marker(
{
	pte_marker srcm = pte_marker_get(entry);
	/* Always copy error entries. */
	pte_marker dstm = srcm & PTE_MARKER_POISONED;
	pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD);

	/* Only copy PTE markers if UFFD register matches. */
	if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma))
+23 −1
Original line number Diff line number Diff line
@@ -426,9 +426,19 @@ typedef unsigned long pte_marker;
 * "Poisoned" here is meant in the very general sense of "future accesses are
 * invalid", instead of referring very specifically to hardware memory errors.
 * This marker is meant to represent any of various different causes of this.
 *
 * Note that, when encountered by the faulting logic, PTEs with this marker will
 * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error
 * logic.
 */
#define  PTE_MARKER_POISONED			BIT(1)
#define  PTE_MARKER_MASK			(BIT(2) - 1)
/*
 * Indicates that, on fault, this PTE will case a SIGSEGV signal to be
 * sent. This means guard markers behave in effect as if the region were mapped
 * PROT_NONE, rather than if they were a memory hole or equivalent.
 */
#define  PTE_MARKER_GUARD			BIT(2)
#define  PTE_MARKER_MASK			(BIT(3) - 1)

static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
{
@@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry)
{
	return is_pte_marker_entry(entry) &&
	    (pte_marker_get(entry) & PTE_MARKER_POISONED);

}

static inline swp_entry_t make_guard_swp_entry(void)
{
	return make_pte_marker_entry(PTE_MARKER_GUARD);
}

static inline int is_guard_swp_entry(swp_entry_t entry)
{
	return is_pte_marker_entry(entry) &&
		(pte_marker_get(entry) & PTE_MARKER_GUARD);
}

/*
+4 −0
Original line number Diff line number Diff line
@@ -6353,6 +6353,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
				ret = VM_FAULT_HWPOISON_LARGE |
				      VM_FAULT_SET_HINDEX(hstate_index(h));
				goto out_mutex;
			} else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) {
				/* This isn't supported in hugetlb. */
				ret = VM_FAULT_SIGSEGV;
				goto out_mutex;
			}
		}

+15 −3
Original line number Diff line number Diff line
@@ -1455,7 +1455,7 @@ static inline bool should_zap_folio(struct zap_details *details,
	return !folio_test_anon(folio);
}

static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
static inline bool zap_drop_markers(struct zap_details *details)
{
	if (!details)
		return false;
@@ -1476,7 +1476,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
	if (vma_is_anonymous(vma))
		return;

	if (zap_drop_file_uffd_wp(details))
	if (zap_drop_markers(details))
		return;

	for (;;) {
@@ -1671,7 +1671,15 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
			 * drop the marker if explicitly requested.
			 */
			if (!vma_is_anonymous(vma) &&
			    !zap_drop_file_uffd_wp(details))
			    !zap_drop_markers(details))
				continue;
		} else if (is_guard_swp_entry(entry)) {
			/*
			 * Ordinary zapping should not remove guard PTE
			 * markers. Only do so if we should remove PTE markers
			 * in general.
			 */
			if (!zap_drop_markers(details))
				continue;
		} else if (is_hwpoison_entry(entry) ||
			   is_poisoned_swp_entry(entry)) {
@@ -4003,6 +4011,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
	if (marker & PTE_MARKER_POISONED)
		return VM_FAULT_HWPOISON;

	/* Hitting a guard page is always a fatal condition. */
	if (marker & PTE_MARKER_GUARD)
		return VM_FAULT_SIGSEGV;

	if (pte_marker_entry_uffd_wp(entry))
		return pte_marker_handle_uffd_wp(vmf);

+4 −2
Original line number Diff line number Diff line
@@ -236,9 +236,11 @@ static long change_pte_range(struct mmu_gather *tlb,
			} else if (is_pte_marker_entry(entry)) {
				/*
				 * Ignore error swap entries unconditionally,
				 * because any access should sigbus anyway.
				 * because any access should sigbus/sigsegv
				 * anyway.
				 */
				if (is_poisoned_swp_entry(entry))
				if (is_poisoned_swp_entry(entry) ||
				    is_guard_swp_entry(entry))
					continue;
				/*
				 * If this is uffd-wp pte marker and we'd like