mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-18 03:23:53 -04:00
userfaultfd: add UFFDIO_CONTINUE ioctl
This ioctl is how userspace ought to resolve "minor" userfaults. The
idea is, userspace is notified that a minor fault has occurred. It
might change the contents of the page using its second non-UFFD mapping,
or not. Then, it calls UFFDIO_CONTINUE to tell the kernel "I have
ensured the page contents are correct, carry on setting up the mapping".
Note that it doesn't make much sense to use UFFDIO_{COPY,ZEROPAGE} for
MINOR registered VMAs. ZEROPAGE maps the VMA to the zero page; but in
the minor fault case, we already have some pre-existing underlying page.
Likewise, UFFDIO_COPY isn't useful if we have a second non-UFFD mapping.
We'd just use memcpy() or similar instead.
It turns out hugetlb_mcopy_atomic_pte() already does very close to what
we want, if an existing page is provided via `struct page **pagep`. We
already special-case the behavior a bit for the UFFDIO_ZEROPAGE case, so
just extend that design: add an enum for the three modes of operation,
and make the small adjustments needed for the MCOPY_ATOMIC_CONTINUE
case. (Basically, look up the existing page, and avoid adding the
existing page to the page cache or calling set_page_huge_active() on
it.)
Link: https://lkml.kernel.org/r/20210301222728.176417-5-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Adam Ruprecht <ruprecht@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chinwen Chang <chinwen.chang@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michal Koutn" <mkoutny@suse.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oliver Upton <oupton@google.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Shawn Anastasio <shawn@anastas.io>
Cc: Steven Price <steven.price@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
714c189108
commit
f619147104
@@ -37,6 +37,22 @@ extern int sysctl_unprivileged_userfaultfd;
|
||||
|
||||
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
|
||||
|
||||
/*
|
||||
* The mode of operation for __mcopy_atomic and its helpers.
|
||||
*
|
||||
* This is almost an implementation detail (mcopy_atomic below doesn't take this
|
||||
* as a parameter), but it's exposed here because memory-kind-specific
|
||||
* implementations (e.g. hugetlbfs) need to know the mode of operation.
|
||||
*/
|
||||
enum mcopy_atomic_mode {
|
||||
/* A normal copy_from_user into the destination range. */
|
||||
MCOPY_ATOMIC_NORMAL,
|
||||
/* Don't copy; map the destination range to the zero page. */
|
||||
MCOPY_ATOMIC_ZEROPAGE,
|
||||
/* Just install pte(s) with the existing page(s) in the page cache. */
|
||||
MCOPY_ATOMIC_CONTINUE,
|
||||
};
|
||||
|
||||
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long src_start, unsigned long len,
|
||||
bool *mmap_changing, __u64 mode);
|
||||
@@ -44,6 +60,8 @@ extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
|
||||
unsigned long dst_start,
|
||||
unsigned long len,
|
||||
bool *mmap_changing);
|
||||
extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
|
||||
unsigned long len, bool *mmap_changing);
|
||||
extern int mwriteprotect_range(struct mm_struct *dst_mm,
|
||||
unsigned long start, unsigned long len,
|
||||
bool enable_wp, bool *mmap_changing);
|
||||
|
||||
Reference in New Issue
Block a user