Commit d82c0a37 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull execve updates from Kees Cook:

 - Support non-BSS ELF segments with zero filesz

   Eric Biederman and I refactored ELF segment loading to handle the
   case where a segment has a smaller filesz than memsz. Traditionally
   linkers only did this for .bss and it was always the last segment. As
   a result, the kernel only handled this case when it was the last
   segment. We've had two recent cases where linkers were trying to use
   these kinds of segments for other reasons, and the were in the middle
   of the segment list. There was no good reason for the kernel not to
   support this, and the refactor actually ends up making things more
   readable too.

 - Enable namespaced binfmt_misc

   Christian Brauner has made it possible to use binfmt_misc with mount
   namespaces. This means some traditionally root-only interfaces (for
   adding/removing formats) are now more exposed (but believed to be
   safe).

 - Remove struct tag 'dynamic' from ELF UAPI

   Alejandro Colomar noticed that the ELF UAPI has been polluting the
   struct namespace with an unused and overly generic tag named
   "dynamic" for no discernible reason for many many years. After
   double-checking various distro source repositories, it has been
   removed.

 - Clean up binfmt_elf_fdpic debug output (Greg Ungerer)

* tag 'execve-v6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  binfmt_misc: enable sandboxed mounts
  binfmt_misc: cleanup on filesystem umount
  binfmt_elf_fdpic: clean up debug warnings
  mm: Remove unused vm_brk()
  binfmt_elf: Only report padzero() errors when PROT_WRITE
  binfmt_elf: Use elf_load() for library
  binfmt_elf: Use elf_load() for interpreter
  binfmt_elf: elf_bss no longer used by load_elf_binary()
  binfmt_elf: Support segments with 0 filesz and misaligned starts
  elf, uapi: Remove struct tag 'dynamic'
parents 5e372699 21ca59b3
Loading
Loading
Loading
Loading
+75 −140
Original line number Diff line number Diff line
@@ -110,38 +110,19 @@ static struct linux_binfmt elf_format = {

#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))

static int set_brk(unsigned long start, unsigned long end, int prot)
{
	start = ELF_PAGEALIGN(start);
	end = ELF_PAGEALIGN(end);
	if (end > start) {
/*
		 * Map the last of the bss segment.
		 * If the header is requesting these pages to be
		 * executable, honour that (ppc32 needs this).
		 */
		int error = vm_brk_flags(start, end - start,
				prot & PROT_EXEC ? VM_EXEC : 0);
		if (error)
			return error;
	}
	current->mm->start_brk = current->mm->brk = end;
	return 0;
}

/* We need to explicitly zero any fractional pages
   after the data section (i.e. bss).  This would
   contain the junk from the file that should not
   be in memory
 * We need to explicitly zero any trailing portion of the page that follows
 * p_filesz when it ends before the page ends (e.g. bss), otherwise this
 * memory will contain the junk from the file that should not be present.
 */
static int padzero(unsigned long elf_bss)
static int padzero(unsigned long address)
{
	unsigned long nbyte;

	nbyte = ELF_PAGEOFFSET(elf_bss);
	nbyte = ELF_PAGEOFFSET(address);
	if (nbyte) {
		nbyte = ELF_MIN_ALIGN - nbyte;
		if (clear_user((void __user *) elf_bss, nbyte))
		if (clear_user((void __user *)address, nbyte))
			return -EFAULT;
	}
	return 0;
@@ -367,6 +348,11 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
	return 0;
}

/*
 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
 * into memory at "addr". (Note that p_filesz is rounded up to the
 * next page, so any extra bytes from the file must be wiped.)
 */
static unsigned long elf_map(struct file *filep, unsigned long addr,
		const struct elf_phdr *eppnt, int prot, int type,
		unsigned long total_size)
@@ -406,6 +392,60 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
	return(map_addr);
}

/*
 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
 * into memory at "addr". Memory from "p_filesz" through "p_memsz"
 * rounded up to the next page is zeroed.
 */
static unsigned long elf_load(struct file *filep, unsigned long addr,
		const struct elf_phdr *eppnt, int prot, int type,
		unsigned long total_size)
{
	unsigned long zero_start, zero_end;
	unsigned long map_addr;

	if (eppnt->p_filesz) {
		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
		if (BAD_ADDR(map_addr))
			return map_addr;
		if (eppnt->p_memsz > eppnt->p_filesz) {
			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
				eppnt->p_filesz;
			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
				eppnt->p_memsz;

			/*
			 * Zero the end of the last mapped page but ignore
			 * any errors if the segment isn't writable.
			 */
			if (padzero(zero_start) && (prot & PROT_WRITE))
				return -EFAULT;
		}
	} else {
		map_addr = zero_start = ELF_PAGESTART(addr);
		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
			eppnt->p_memsz;
	}
	if (eppnt->p_memsz > eppnt->p_filesz) {
		/*
		 * Map the last of the segment.
		 * If the header is requesting these pages to be
		 * executable, honour that (ppc32 needs this).
		 */
		int error;

		zero_start = ELF_PAGEALIGN(zero_start);
		zero_end = ELF_PAGEALIGN(zero_end);

		error = vm_brk_flags(zero_start, zero_end - zero_start,
				     prot & PROT_EXEC ? VM_EXEC : 0);
		if (error)
			map_addr = error;
	}
	return map_addr;
}


static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
{
	elf_addr_t min_addr = -1;
@@ -596,8 +636,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
	struct elf_phdr *eppnt;
	unsigned long load_addr = 0;
	int load_addr_set = 0;
	unsigned long last_bss = 0, elf_bss = 0;
	int bss_prot = 0;
	unsigned long error = ~0UL;
	unsigned long total_size;
	int i;
@@ -634,7 +672,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
			else if (no_base && interp_elf_ex->e_type == ET_DYN)
				load_addr = -vaddr;

			map_addr = elf_map(interpreter, load_addr + vaddr,
			map_addr = elf_load(interpreter, load_addr + vaddr,
					eppnt, elf_prot, elf_type, total_size);
			total_size = 0;
			error = map_addr;
@@ -660,51 +698,9 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
				error = -ENOMEM;
				goto out;
			}

			/*
			 * Find the end of the file mapping for this phdr, and
			 * keep track of the largest address we see for this.
			 */
			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
			if (k > elf_bss)
				elf_bss = k;

			/*
			 * Do the same thing for the memory mapping - between
			 * elf_bss and last_bss is the bss section.
			 */
			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
			if (k > last_bss) {
				last_bss = k;
				bss_prot = elf_prot;
			}
		}
	}

	/*
	 * Now fill out the bss section: first pad the last page from
	 * the file up to the page boundary, and zero it from elf_bss
	 * up to the end of the page.
	 */
	if (padzero(elf_bss)) {
		error = -EFAULT;
		goto out;
	}
	/*
	 * Next, align both the file and mem bss up to the page size,
	 * since this is where elf_bss was just zeroed up to, and where
	 * last_bss will end after the vm_brk_flags() below.
	 */
	elf_bss = ELF_PAGEALIGN(elf_bss);
	last_bss = ELF_PAGEALIGN(last_bss);
	/* Finally, if there is still more bss to allocate, do it. */
	if (last_bss > elf_bss) {
		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
				bss_prot & PROT_EXEC ? VM_EXEC : 0);
		if (error)
			goto out;
	}

	error = load_addr;
out:
	return error;
@@ -828,8 +824,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
	unsigned long error;
	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
	struct elf_phdr *elf_property_phdata = NULL;
	unsigned long elf_bss, elf_brk;
	int bss_prot = 0;
	unsigned long elf_brk;
	int retval, i;
	unsigned long elf_entry;
	unsigned long e_entry;
@@ -1020,7 +1015,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
	if (retval < 0)
		goto out_free_dentry;

	elf_bss = 0;
	elf_brk = 0;

	start_code = ~0UL;
@@ -1040,33 +1034,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
		if (elf_ppnt->p_type != PT_LOAD)
			continue;

		if (unlikely (elf_brk > elf_bss)) {
			unsigned long nbyte;

			/* There was a PT_LOAD segment with p_memsz > p_filesz
			   before this one. Map anonymous pages, if needed,
			   and clear the area.  */
			retval = set_brk(elf_bss + load_bias,
					 elf_brk + load_bias,
					 bss_prot);
			if (retval)
				goto out_free_dentry;
			nbyte = ELF_PAGEOFFSET(elf_bss);
			if (nbyte) {
				nbyte = ELF_MIN_ALIGN - nbyte;
				if (nbyte > elf_brk - elf_bss)
					nbyte = elf_brk - elf_bss;
				if (clear_user((void __user *)elf_bss +
							load_bias, nbyte)) {
					/*
					 * This bss-zeroing can fail if the ELF
					 * file specifies odd protections. So
					 * we don't check the return value
					 */
				}
			}
		}

		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
				     !!interpreter, false);

@@ -1162,7 +1129,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
			}
		}

		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
				elf_prot, elf_flags, total_size);
		if (BAD_ADDR(error)) {
			retval = IS_ERR_VALUE(error) ?
@@ -1210,40 +1177,24 @@ static int load_elf_binary(struct linux_binprm *bprm)

		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;

		if (k > elf_bss)
			elf_bss = k;
		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
			end_code = k;
		if (end_data < k)
			end_data = k;
		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
		if (k > elf_brk) {
			bss_prot = elf_prot;
		if (k > elf_brk)
			elf_brk = k;
	}
	}

	e_entry = elf_ex->e_entry + load_bias;
	phdr_addr += load_bias;
	elf_bss += load_bias;
	elf_brk += load_bias;
	start_code += load_bias;
	end_code += load_bias;
	start_data += load_bias;
	end_data += load_bias;

	/* Calling set_brk effectively mmaps the pages that we need
	 * for the bss and break sections.  We must do this before
	 * mapping in the interpreter, to make sure it doesn't wind
	 * up getting placed where the bss needs to go.
	 */
	retval = set_brk(elf_bss, elf_brk, bss_prot);
	if (retval)
		goto out_free_dentry;
	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
		retval = -EFAULT; /* Nobody gets to see this, but.. */
		goto out_free_dentry;
	}
	current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);

	if (interpreter) {
		elf_entry = load_elf_interp(interp_elf_ex,
@@ -1369,7 +1320,6 @@ static int load_elf_library(struct file *file)
{
	struct elf_phdr *elf_phdata;
	struct elf_phdr *eppnt;
	unsigned long elf_bss, bss, len;
	int retval, error, i, j;
	struct elfhdr elf_ex;

@@ -1414,30 +1364,15 @@ static int load_elf_library(struct file *file)
		eppnt++;

	/* Now use mmap to map the library into memory. */
	error = vm_mmap(file,
			ELF_PAGESTART(eppnt->p_vaddr),
			(eppnt->p_filesz +
			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
	error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
			eppnt,
			PROT_READ | PROT_WRITE | PROT_EXEC,
			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
			(eppnt->p_offset -
			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
	if (error != ELF_PAGESTART(eppnt->p_vaddr))
		goto out_free_ph;
			0);

	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
	if (padzero(elf_bss)) {
		error = -EFAULT;
	if (error != ELF_PAGESTART(eppnt->p_vaddr))
		goto out_free_ph;
	}

	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
	if (bss > len) {
		error = vm_brk(len, bss - len);
		if (error)
			goto out_free_ph;
	}
	error = 0;

out_free_ph:
+12 −8
Original line number Diff line number Diff line
@@ -899,10 +899,12 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
	kdebug("- DYNAMIC[]: %lx", params->dynamic_addr);
	seg = loadmap->segs;
	for (loop = 0; loop < loadmap->nsegs; loop++, seg++)
		kdebug("- LOAD[%d] : %08x-%08x [va=%x ms=%x]",
		kdebug("- LOAD[%d] : %08llx-%08llx [va=%llx ms=%llx]",
		       loop,
		       seg->addr, seg->addr + seg->p_memsz - 1,
		       seg->p_vaddr, seg->p_memsz);
		       (unsigned long long) seg->addr,
		       (unsigned long long) seg->addr + seg->p_memsz - 1,
		       (unsigned long long) seg->p_vaddr,
		       (unsigned long long) seg->p_memsz);

	return 0;

@@ -1081,9 +1083,10 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
		maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
				phdr->p_offset - disp);

		kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
		       loop, phdr->p_memsz + disp, prot, flags,
		       phdr->p_offset - disp, maddr);
		kdebug("mmap[%d] <file> sz=%llx pr=%x fl=%x of=%llx --> %08lx",
		       loop, (unsigned long long) phdr->p_memsz + disp,
		       prot, flags, (unsigned long long) phdr->p_offset - disp,
		       maddr);

		if (IS_ERR_VALUE(maddr))
			return (int) maddr;
@@ -1145,8 +1148,9 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,

#else
		if (excess > 0) {
			kdebug("clear[%d] ad=%lx sz=%lx",
			       loop, maddr + phdr->p_filesz, excess);
			kdebug("clear[%d] ad=%llx sz=%lx", loop,
			       (unsigned long long) maddr + phdr->p_filesz,
			       excess);
			if (clear_user((void *) maddr + phdr->p_filesz, excess))
				return -EFAULT;
		}
+320 −66

File changed.

Preview size limit exceeded, changes collapsed.

+10 −0
Original line number Diff line number Diff line
@@ -90,6 +90,16 @@ struct linux_binfmt {
#endif
} __randomize_layout;

#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc {
	struct list_head entries;
	rwlock_t entries_lock;
	bool enabled;
} __randomize_layout;

extern struct binfmt_misc init_binfmt_misc;
#endif

extern void __register_binfmt(struct linux_binfmt *fmt, int insert);

/* Registration of default binfmt handlers */
+1 −2
Original line number Diff line number Diff line
@@ -3308,8 +3308,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif

/* These take the mm semaphore themselves */
extern int __must_check vm_brk(unsigned long, unsigned long);
/* This takes the mm semaphore itself */
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
Loading