Commit ab04945f authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton
Browse files

mm: update mem char driver to use mmap_prepare

Update the mem char driver (backing /dev/mem and /dev/zero) to use
f_op->mmap_prepare hook rather than the deprecated f_op->mmap.

The /dev/zero implementation has a very unique and rather concerning
characteristic in that it converts MAP_PRIVATE mmap() mappings anonymous
when they are, in fact, not.

The new f_op->mmap_prepare() can support this, but rather than introducing
a helper function to perform this hack (and risk introducing other users),
utilise the success hook to do so.

We utilise the newly introduced shmem_zero_setup_desc() to allow for the
shared mapping case via an f_op->mmap_prepare() hook.

We also use the desc->action_error_hook to filter the remap error to
-EAGAIN to keep behaviour consistent.

Link: https://lkml.kernel.org/r/48f60764d7a6901819d1af778fa33b775d2e8c77.1760959442.git.lorenzo.stoakes@oracle.com


Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Robin Murohy <robin.murphy@arm.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 89646d9c
Loading
Loading
Loading
Loading
+50 −34
Original line number Diff line number Diff line
@@ -304,13 +304,13 @@ static unsigned zero_mmap_capabilities(struct file *file)
}

/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
static inline int private_mapping_ok(struct vm_area_desc *desc)
{
	return is_nommu_shared_mapping(vma->vm_flags);
	return is_nommu_shared_mapping(desc->vm_flags);
}
#else

static inline int private_mapping_ok(struct vm_area_struct *vma)
static inline int private_mapping_ok(struct vm_area_desc *desc)
{
	return 1;
}
@@ -322,46 +322,49 @@ static const struct vm_operations_struct mmap_mem_ops = {
#endif
};

static int mmap_mem(struct file *file, struct vm_area_struct *vma)
static int mmap_filter_error(int err)
{
	size_t size = vma->vm_end - vma->vm_start;
	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
	return -EAGAIN;
}

static int mmap_mem_prepare(struct vm_area_desc *desc)
{
	struct file *file = desc->file;
	const size_t size = vma_desc_size(desc);
	const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT;

	/* Does it even fit in phys_addr_t? */
	if (offset >> PAGE_SHIFT != vma->vm_pgoff)
	if (offset >> PAGE_SHIFT != desc->pgoff)
		return -EINVAL;

	/* It's illegal to wrap around the end of the physical address space. */
	if (offset + (phys_addr_t)size - 1 < offset)
		return -EINVAL;

	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
	if (!valid_mmap_phys_addr_range(desc->pgoff, size))
		return -EINVAL;

	if (!private_mapping_ok(vma))
	if (!private_mapping_ok(desc))
		return -ENOSYS;

	if (!range_is_allowed(vma->vm_pgoff, size))
	if (!range_is_allowed(desc->pgoff, size))
		return -EPERM;

	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
						&vma->vm_page_prot))
	if (!phys_mem_access_prot_allowed(file, desc->pgoff, size,
					  &desc->page_prot))
		return -EINVAL;

	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
	desc->page_prot = phys_mem_access_prot(file, desc->pgoff,
					       size,
						 vma->vm_page_prot);
					       desc->page_prot);

	vma->vm_ops = &mmap_mem_ops;
	desc->vm_ops = &mmap_mem_ops;

	/* Remap-pfn-range will mark the range VM_IO. */
	mmap_action_remap_full(desc, desc->pgoff);
	/* We filter remap errors to -EAGAIN. */
	desc->action.error_hook = mmap_filter_error;

	/* Remap-pfn-range will mark the range VM_IO */
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
			    size,
			    vma->vm_page_prot)) {
		return -EAGAIN;
	}
	return 0;
}

@@ -501,14 +504,26 @@ static ssize_t read_zero(struct file *file, char __user *buf,
	return cleared;
}

static int mmap_zero(struct file *file, struct vm_area_struct *vma)
static int mmap_zero_private_success(const struct vm_area_struct *vma)
{
	/*
	 * This is a highly unique situation where we mark a MAP_PRIVATE mapping
	 * of /dev/zero anonymous, despite it not being.
	 */
	vma_set_anonymous((struct vm_area_struct *)vma);

	return 0;
}

static int mmap_zero_prepare(struct vm_area_desc *desc)
{
#ifndef CONFIG_MMU
	return -ENOSYS;
#endif
	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	vma_set_anonymous(vma);
	if (desc->vm_flags & VM_SHARED)
		return shmem_zero_setup_desc(desc);

	desc->action.success_hook = mmap_zero_private_success;
	return 0;
}

@@ -526,10 +541,11 @@ static unsigned long get_unmapped_area_zero(struct file *file,
{
	if (flags & MAP_SHARED) {
		/*
		 * mmap_zero() will call shmem_zero_setup() to create a file,
		 * so use shmem's get_unmapped_area in case it can be huge;
		 * and pass NULL for file as in mmap.c's get_unmapped_area(),
		 * so as not to confuse shmem with our handle on "/dev/zero".
		 * mmap_zero_prepare() will call shmem_zero_setup() to create a
		 * file, so use shmem's get_unmapped_area in case it can be
		 * huge; and pass NULL for file as in mmap.c's
		 * get_unmapped_area(), so as not to confuse shmem with our
		 * handle on "/dev/zero".
		 */
		return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
	}
@@ -632,7 +648,7 @@ static const struct file_operations __maybe_unused mem_fops = {
	.llseek		= memory_lseek,
	.read		= read_mem,
	.write		= write_mem,
	.mmap		= mmap_mem,
	.mmap_prepare	= mmap_mem_prepare,
	.open		= open_mem,
#ifndef CONFIG_MMU
	.get_unmapped_area = get_unmapped_area_mem,
@@ -668,7 +684,7 @@ static const struct file_operations zero_fops = {
	.write_iter	= write_iter_zero,
	.splice_read	= copy_splice_read,
	.splice_write	= splice_write_zero,
	.mmap		= mmap_zero,
	.mmap_prepare	= mmap_zero_prepare,
	.get_unmapped_area = get_unmapped_area_zero,
#ifndef CONFIG_MMU
	.mmap_capabilities = zero_mmap_capabilities,