Commit d9712937 authored by Axel Rasmussen's avatar Axel Rasmussen Committed by Andrew Morton
Browse files

mm: userfaultfd: combine 'mode' and 'wp_copy' arguments

Many userfaultfd ioctl functions take both a 'mode' and a 'wp_copy'
argument.  In future commits we plan to plumb the flags through to more
places, so we'd be proliferating the very long argument list even further.

Let's take the time to simplify the argument list.  Combine the two
arguments into one - and generalize, so when we add more flags in the
future, it doesn't imply more function arguments.

Since the modes (copy, zeropage, continue) are mutually exclusive, store
them as an integer value (0, 1, 2) in the low bits.  Place combine-able
flag bits in the high bits.

This is quite similar to an earlier patch proposed by Nadav Amit
("userfaultfd: introduce uffd_flags" [1]).  The main difference is that
patch only handled flags, whereas this patch *also* combines the "mode"
argument into the same type to shorten the argument list.

[1]: https://lore.kernel.org/all/20220619233449.181323-2-namit@vmware.com/

Link: https://lkml.kernel.org/r/20230314221250.682452-4-axelrasmussen@google.com


Signed-off-by: default avatarAxel Rasmussen <axelrasmussen@google.com>
Acked-by: default avatarJames Houghton <jthoughton@google.com>
Acked-by: default avatarPeter Xu <peterx@redhat.com>
Acked-by: default avatarMike Rapoport (IBM) <rppt@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 61c50040
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -1729,6 +1729,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
	struct uffdio_copy uffdio_copy;
	struct uffdio_copy __user *user_uffdio_copy;
	struct userfaultfd_wake_range range;
	uffd_flags_t flags = 0;

	user_uffdio_copy = (struct uffdio_copy __user *) arg;

@@ -1755,10 +1756,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
		goto out;
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
		goto out;
	if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
		flags |= MFILL_ATOMIC_WP;
	if (mmget_not_zero(ctx->mm)) {
		ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
					uffdio_copy.len, &ctx->mmap_changing,
					uffdio_copy.mode);
					flags);
		mmput(ctx->mm);
	} else {
		return -ESRCH;
+4 −6
Original line number Diff line number Diff line
@@ -162,9 +162,8 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
			     struct vm_area_struct *dst_vma,
			     unsigned long dst_addr,
			     unsigned long src_addr,
			     enum mcopy_atomic_mode mode,
			     struct page **pagep,
			     bool wp_copy);
			     uffd_flags_t flags,
			     struct page **pagep);
#endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
						struct vm_area_struct *vma,
@@ -397,9 +396,8 @@ static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
					   struct vm_area_struct *dst_vma,
					   unsigned long dst_addr,
					   unsigned long src_addr,
					   enum mcopy_atomic_mode mode,
					   struct page **pagep,
					   bool wp_copy)
					   uffd_flags_t flags,
					   struct page **pagep)
{
	BUG();
	return 0;
+3 −2
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/percpu_counter.h>
#include <linux/xattr.h>
#include <linux/fs_parser.h>
#include <linux/userfaultfd_k.h>

/* inode in-kernel data */

@@ -156,11 +157,11 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
				  struct vm_area_struct *dst_vma,
				  unsigned long dst_addr,
				  unsigned long src_addr,
				  bool zeropage, bool wp_copy,
				  uffd_flags_t flags,
				  struct page **pagep);
#else /* !CONFIG_SHMEM */
#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \
			       src_addr, zeropage, wp_copy, pagep) ({ BUG(); 0; })
			       src_addr, flags, pagep) ({ BUG(); 0; })
#endif /* CONFIG_SHMEM */
#endif /* CONFIG_USERFAULTFD */

+30 −16
Original line number Diff line number Diff line
@@ -40,30 +40,44 @@ extern int sysctl_unprivileged_userfaultfd;

extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);

/*
 * The mode of operation for __mcopy_atomic and its helpers.
 *
 * This is almost an implementation detail (mcopy_atomic below doesn't take this
 * as a parameter), but it's exposed here because memory-kind-specific
 * implementations (e.g. hugetlbfs) need to know the mode of operation.
 */
enum mcopy_atomic_mode {
	/* A normal copy_from_user into the destination range. */
	MCOPY_ATOMIC_NORMAL,
	/* Don't copy; map the destination range to the zero page. */
	MCOPY_ATOMIC_ZEROPAGE,
	/* Just install pte(s) with the existing page(s) in the page cache. */
	MCOPY_ATOMIC_CONTINUE,
/* A combined operation mode + behavior flags. */
typedef unsigned int __bitwise uffd_flags_t;

/* Mutually exclusive modes of operation. */
enum mfill_atomic_mode {
	MFILL_ATOMIC_COPY,
	MFILL_ATOMIC_ZEROPAGE,
	MFILL_ATOMIC_CONTINUE,
	NR_MFILL_ATOMIC_MODES,
};

#define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1)
#define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr))
#define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr))
#define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1))

static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected)
{
	return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected);
}

static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode)
{
	flags &= ~MFILL_ATOMIC_MODE_MASK;
	return flags | ((__force uffd_flags_t) mode);
}

/* Flags controlling behavior. These behavior changes are mode-independent. */
#define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0)

extern int mfill_atomic_install_pte(pmd_t *dst_pmd,
				    struct vm_area_struct *dst_vma,
				    unsigned long dst_addr, struct page *page,
				    bool newly_allocated, bool wp_copy);
				    bool newly_allocated, uffd_flags_t flags);

extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
				 unsigned long src_start, unsigned long len,
				 atomic_t *mmap_changing, __u64 mode);
				 atomic_t *mmap_changing, uffd_flags_t flags);
extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
				     unsigned long dst_start,
				     unsigned long len,
+6 −6
Original line number Diff line number Diff line
@@ -6163,12 +6163,12 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
			     struct vm_area_struct *dst_vma,
			     unsigned long dst_addr,
			     unsigned long src_addr,
			     enum mcopy_atomic_mode mode,
			     struct page **pagep,
			     bool wp_copy)
			     uffd_flags_t flags,
			     struct page **pagep)
{
	struct mm_struct *dst_mm = dst_vma->vm_mm;
	bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
	bool is_continue = uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE);
	bool wp_enabled = (flags & MFILL_ATOMIC_WP);
	struct hstate *h = hstate_vma(dst_vma);
	struct address_space *mapping = dst_vma->vm_file->f_mapping;
	pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
@@ -6303,7 +6303,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
	 * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
	 * with wp flag set, don't set pte write bit.
	 */
	if (wp_copy || (is_continue && !vm_shared))
	if (wp_enabled || (is_continue && !vm_shared))
		writable = 0;
	else
		writable = dst_vma->vm_flags & VM_WRITE;
@@ -6318,7 +6318,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
	_dst_pte = huge_pte_mkdirty(_dst_pte);
	_dst_pte = pte_mkyoung(_dst_pte);

	if (wp_copy)
	if (wp_enabled)
		_dst_pte = huge_pte_mkuffd_wp(_dst_pte);

	set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
Loading