Commit d61f0d59 authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton
Browse files

mm: move vma_shrink(), vma_expand() to internal header

The vma_shrink() and vma_expand() functions are internal VMA manipulation
functions which we ought to abstract for use outside of memory management
code.

To achieve this, we replace shift_arg_pages() in fs/exec.c with an
invocation of a new relocate_vma_down() function implemented in mm/mmap.c,
which enables us to also move move_page_tables() and vma_iter_prev_range()
to internal.h.

The purpose of doing this is to isolate key VMA manipulation functions in
order that we can both abstract them and later render them easily
testable.

Link: https://lkml.kernel.org/r/3cfcd9ec433e032a85f636fdc0d7d98fafbd19c5.1722251717.git.lorenzo.stoakes@oracle.com


Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Gow <davidgow@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Kees Cook <kees@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Rae Moar <rmoar@google.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fa04c08f
Loading
Loading
Loading
Loading
+6 −75
Original line number Diff line number Diff line
@@ -711,80 +711,6 @@ static int copy_strings_kernel(int argc, const char *const *argv,

#ifdef CONFIG_MMU

/*
 * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
 * the binfmt code determines where the new stack should reside, we shift it to
 * its final location.  The process proceeds as follows:
 *
 * 1) Use shift to calculate the new vma endpoints.
 * 2) Extend vma to cover both the old and new ranges.  This ensures the
 *    arguments passed to subsequent functions are consistent.
 * 3) Move vma's page tables to the new range.
 * 4) Free up any cleared pgd range.
 * 5) Shrink the vma to cover only the new range.
 */
static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long old_start = vma->vm_start;
	unsigned long old_end = vma->vm_end;
	unsigned long length = old_end - old_start;
	unsigned long new_start = old_start - shift;
	unsigned long new_end = old_end - shift;
	VMA_ITERATOR(vmi, mm, new_start);
	struct vm_area_struct *next;
	struct mmu_gather tlb;

	BUG_ON(new_start > new_end);

	/*
	 * ensure there are no vmas between where we want to go
	 * and where we are
	 */
	if (vma != vma_next(&vmi))
		return -EFAULT;

	vma_iter_prev_range(&vmi);
	/*
	 * cover the whole range: [new_start, old_end)
	 */
	if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
		return -ENOMEM;

	/*
	 * move the page tables downwards, on failure we rely on
	 * process cleanup to remove whatever mess we made.
	 */
	if (length != move_page_tables(vma, old_start,
				       vma, new_start, length, false, true))
		return -ENOMEM;

	lru_add_drain();
	tlb_gather_mmu(&tlb, mm);
	next = vma_next(&vmi);
	if (new_end > old_start) {
		/*
		 * when the old and new regions overlap clear from new_end.
		 */
		free_pgd_range(&tlb, new_end, old_end, new_end,
			next ? next->vm_start : USER_PGTABLES_CEILING);
	} else {
		/*
		 * otherwise, clean from old_start; this is done to not touch
		 * the address space in [new_end, old_start) some architectures
		 * have constraints on va-space that make this illegal (IA64) -
		 * for the others its just a little faster.
		 */
		free_pgd_range(&tlb, old_start, old_end, new_end,
			next ? next->vm_start : USER_PGTABLES_CEILING);
	}
	tlb_finish_mmu(&tlb);

	vma_prev(&vmi);
	/* Shrink the vma to just the new range */
	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}

/*
 * Finalizes the stack vm_area_struct. The flags and permissions are updated,
 * the stack is optionally relocated, and some extra space is added.
@@ -877,7 +803,12 @@ int setup_arg_pages(struct linux_binprm *bprm,

	/* Move stack pages down in memory. */
	if (stack_shift) {
		ret = shift_arg_pages(vma, stack_shift);
		/*
		 * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
		 * the binfmt code determines where the new stack should reside, we shift it to
		 * its final location.
		 */
		ret = relocate_vma_down(vma, stack_shift);
		if (ret)
			goto out_unlock;
	}
+1 −16
Original line number Diff line number Diff line
@@ -1005,12 +1005,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
	return mas_prev(&vmi->mas, 0);
}

static inline
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
{
	return mas_prev_range(&vmi->mas, 0);
}

static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
	return vmi->mas.index;
@@ -2520,11 +2514,6 @@ int set_page_dirty_lock(struct page *page);

int get_cmdline(struct task_struct *task, char *buffer, int buflen);

extern unsigned long move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len,
		bool need_rmap_locks, bool for_stack);

/*
 * Flags used by change_protection().  For now we make it a bitmap so
 * that we can pass in multiple flags just like parameters.  However
@@ -3267,11 +3256,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);

/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
		      unsigned long start, unsigned long end, pgoff_t pgoff,
		      struct vm_area_struct *next);
extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
		       unsigned long start, unsigned long end, pgoff_t pgoff);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void unlink_file_vma(struct vm_area_struct *);
@@ -3279,6 +3263,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
	unsigned long addr, unsigned long len, pgoff_t pgoff,
	bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);

static inline int check_data_rlimit(unsigned long rlim,
				    unsigned long new,
+18 −0
Original line number Diff line number Diff line
@@ -1305,6 +1305,12 @@ static inline struct vm_area_struct
			  vma_policy(vma), new_ctx, anon_vma_name(vma));
}

int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
	      unsigned long start, unsigned long end, pgoff_t pgoff,
	      struct vm_area_struct *next);
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
	       unsigned long start, unsigned long end, pgoff_t pgoff);

enum {
	/* mark page accessed */
	FOLL_TOUCH = 1 << 16,
@@ -1528,6 +1534,12 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
	return 0;
}

static inline
struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
{
	return mas_prev_range(&vmi->mas, 0);
}

/*
 * VMA lock generalization
 */
@@ -1639,4 +1651,10 @@ void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);

/* mremap.c */
unsigned long move_page_tables(struct vm_area_struct *vma,
	unsigned long old_addr, struct vm_area_struct *new_vma,
	unsigned long new_addr, unsigned long len,
	bool need_rmap_locks, bool for_stack);

#endif	/* __MM_INTERNAL_H */
+81 −0
Original line number Diff line number Diff line
@@ -4088,3 +4088,84 @@ static int __meminit init_reserve_notifier(void)
	return 0;
}
subsys_initcall(init_reserve_notifier);

/*
 * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
 * this VMA and its relocated range, which will now reside at [vma->vm_start -
 * shift, vma->vm_end - shift).
 *
 * This function is almost certainly NOT what you want for anything other than
 * early executable temporary stack relocation.
 */
int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
{
	/*
	 * The process proceeds as follows:
	 *
	 * 1) Use shift to calculate the new vma endpoints.
	 * 2) Extend vma to cover both the old and new ranges.  This ensures the
	 *    arguments passed to subsequent functions are consistent.
	 * 3) Move vma's page tables to the new range.
	 * 4) Free up any cleared pgd range.
	 * 5) Shrink the vma to cover only the new range.
	 */

	struct mm_struct *mm = vma->vm_mm;
	unsigned long old_start = vma->vm_start;
	unsigned long old_end = vma->vm_end;
	unsigned long length = old_end - old_start;
	unsigned long new_start = old_start - shift;
	unsigned long new_end = old_end - shift;
	VMA_ITERATOR(vmi, mm, new_start);
	struct vm_area_struct *next;
	struct mmu_gather tlb;

	BUG_ON(new_start > new_end);

	/*
	 * ensure there are no vmas between where we want to go
	 * and where we are
	 */
	if (vma != vma_next(&vmi))
		return -EFAULT;

	vma_iter_prev_range(&vmi);
	/*
	 * cover the whole range: [new_start, old_end)
	 */
	if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
		return -ENOMEM;

	/*
	 * move the page tables downwards, on failure we rely on
	 * process cleanup to remove whatever mess we made.
	 */
	if (length != move_page_tables(vma, old_start,
				       vma, new_start, length, false, true))
		return -ENOMEM;

	lru_add_drain();
	tlb_gather_mmu(&tlb, mm);
	next = vma_next(&vmi);
	if (new_end > old_start) {
		/*
		 * when the old and new regions overlap clear from new_end.
		 */
		free_pgd_range(&tlb, new_end, old_end, new_end,
			next ? next->vm_start : USER_PGTABLES_CEILING);
	} else {
		/*
		 * otherwise, clean from old_start; this is done to not touch
		 * the address space in [new_end, old_start) some architectures
		 * have constraints on va-space that make this illegal (IA64) -
		 * for the others its just a little faster.
		 */
		free_pgd_range(&tlb, old_start, old_end, new_end,
			next ? next->vm_start : USER_PGTABLES_CEILING);
	}
	tlb_finish_mmu(&tlb);

	vma_prev(&vmi);
	/* Shrink the vma to just the new range */
	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
}