Commit 6cca1195 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull close_range() fix from Al Viro:
 "Fix the logic in descriptor table trimming"

* tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  close_range(): fix the logics in descriptor table trimming
parents 0c559323 678379e1
Loading
Loading
Loading
Loading
+34 −61
Original line number Diff line number Diff line
@@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
	return test_bit(fd, fdt->open_fds);
}

static unsigned int count_open_files(struct fdtable *fdt)
{
	unsigned int size = fdt->max_fds;
	unsigned int i;

	/* Find the last open fd */
	for (i = size / BITS_PER_LONG; i > 0; ) {
		if (fdt->open_fds[--i])
			break;
	}
	i = (i + 1) * BITS_PER_LONG;
	return i;
}

/*
 * Note that a sane fdtable size always has to be a multiple of
 * BITS_PER_LONG, since we have bitmaps that are sized by this.
 *
 * 'max_fds' will normally already be properly aligned, but it
 * turns out that in the close_range() -> __close_range() ->
 * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
 * up having a 'max_fds' value that isn't already aligned.
 *
 * Rather than make close_range() have to worry about this,
 * just make that BITS_PER_LONG alignment be part of a sane
 * fdtable size. Becuase that's really what it is.
 * punch_hole is optional - when close_range() is asked to unshare
 * and close, we don't need to copy descriptors in that range, so
 * a smaller cloned descriptor table might suffice if the last
 * currently opened descriptor falls into that range.
 */
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole)
{
	unsigned int count;
	unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds);

	count = count_open_files(fdt);
	if (max_fds < NR_OPEN_DEFAULT)
		max_fds = NR_OPEN_DEFAULT;
	return ALIGN(min(count, max_fds), BITS_PER_LONG);
	if (last == fdt->max_fds)
		return NR_OPEN_DEFAULT;
	if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
		last = find_last_bit(fdt->open_fds, punch_hole->from);
		if (last == punch_hole->from)
			return NR_OPEN_DEFAULT;
	}
	return ALIGN(last + 1, BITS_PER_LONG);
}

/*
 * Allocate a new files structure and copy contents from the
 * passed in files structure.
 * errorp will be valid only when the returned files_struct is NULL.
 * Allocate a new descriptor table and copy contents from the passed in
 * instance.  Returns a pointer to cloned table on success, ERR_PTR()
 * on failure.  For 'punch_hole' see sane_fdtable_size().
 */
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole)
{
	struct files_struct *newf;
	struct file **old_fds, **new_fds;
	unsigned int open_files, i;
	struct fdtable *old_fdt, *new_fdt;
	int error;

	*errorp = -ENOMEM;
	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
	if (!newf)
		goto out;
		return ERR_PTR(-ENOMEM);

	atomic_set(&newf->count, 1);

@@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int

	spin_lock(&oldf->file_lock);
	old_fdt = files_fdtable(oldf);
	open_files = sane_fdtable_size(old_fdt, max_fds);
	open_files = sane_fdtable_size(old_fdt, punch_hole);

	/*
	 * Check whether we need to allocate a larger fd array and fd set.
@@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int

		new_fdt = alloc_fdtable(open_files - 1);
		if (!new_fdt) {
			*errorp = -ENOMEM;
			error = -ENOMEM;
			goto out_release;
		}

		/* beyond sysctl_nr_open; nothing to do */
		if (unlikely(new_fdt->max_fds < open_files)) {
			__free_fdtable(new_fdt);
			*errorp = -EMFILE;
			error = -EMFILE;
			goto out_release;
		}

@@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
		 */
		spin_lock(&oldf->file_lock);
		old_fdt = files_fdtable(oldf);
		open_files = sane_fdtable_size(old_fdt, max_fds);
		open_files = sane_fdtable_size(old_fdt, punch_hole);
	}

	copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
@@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int

out_release:
	kmem_cache_free(files_cachep, newf);
out:
	return NULL;
	return ERR_PTR(error);
}

static struct fdtable *close_files(struct files_struct * files)
@@ -748,36 +733,24 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
	if (fd > max_fd)
		return -EINVAL;

	if (flags & CLOSE_RANGE_UNSHARE) {
		int ret;
		unsigned int max_unshare_fds = NR_OPEN_MAX;
	if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) {
		struct fd_range range = {fd, max_fd}, *punch_hole = &range;

		/*
		 * If the caller requested all fds to be made cloexec we always
		 * copy all of the file descriptors since they still want to
		 * use them.
		 */
		if (!(flags & CLOSE_RANGE_CLOEXEC)) {
			/*
			 * If the requested range is greater than the current
			 * maximum, we're closing everything so only copy all
			 * file descriptors beneath the lowest file descriptor.
			 */
			rcu_read_lock();
			if (max_fd >= last_fd(files_fdtable(cur_fds)))
				max_unshare_fds = fd;
			rcu_read_unlock();
		}

		ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
		if (ret)
			return ret;
		if (flags & CLOSE_RANGE_CLOEXEC)
			punch_hole = NULL;

		fds = dup_fd(cur_fds, punch_hole);
		if (IS_ERR(fds))
			return PTR_ERR(fds);
		/*
		 * We used to share our file descriptor table, and have now
		 * created a private one, make sure we're using it below.
		 */
		if (fds)
		swap(cur_fds, fds);
	}

+4 −4
Original line number Diff line number Diff line
@@ -22,7 +22,6 @@
 * as this is the granularity returned by copy_fdset().
 */
#define NR_OPEN_DEFAULT BITS_PER_LONG
#define NR_OPEN_MAX ~0U

struct fdtable {
	unsigned int max_fds;
@@ -106,7 +105,10 @@ struct task_struct;

void put_files_struct(struct files_struct *fs);
int unshare_files(void);
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
struct fd_range {
	unsigned int from, to;
};
struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
		int (*)(const void *, struct file *, unsigned),
@@ -115,8 +117,6 @@ int iterate_fd(struct files_struct *, unsigned,
extern int close_fd(unsigned int fd);
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *file_close_fd(unsigned int fd);
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
		      struct files_struct **new_fdp);

extern struct kmem_cache *files_cachep;

+14 −18
Original line number Diff line number Diff line
@@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
		      int no_files)
{
	struct files_struct *oldf, *newf;
	int error = 0;

	/*
	 * A background process may not have any files ...
	 */
	oldf = current->files;
	if (!oldf)
		goto out;
		return 0;

	if (no_files) {
		tsk->files = NULL;
		goto out;
		return 0;
	}

	if (clone_flags & CLONE_FILES) {
		atomic_inc(&oldf->count);
		goto out;
		return 0;
	}

	newf = dup_fd(oldf, NR_OPEN_MAX, &error);
	if (!newf)
		goto out;
	newf = dup_fd(oldf, NULL);
	if (IS_ERR(newf))
		return PTR_ERR(newf);

	tsk->files = newf;
	error = 0;
out:
	return error;
	return 0;
}

static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
@@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
/*
 * Unshare file descriptor table if it is being shared
 */
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
	       struct files_struct **new_fdp)
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
{
	struct files_struct *fd = current->files;
	int error = 0;

	if ((unshare_flags & CLONE_FILES) &&
	    (fd && atomic_read(&fd->count) > 1)) {
		*new_fdp = dup_fd(fd, max_fds, &error);
		if (!*new_fdp)
			return error;
		fd = dup_fd(fd, NULL);
		if (IS_ERR(fd))
			return PTR_ERR(fd);
		*new_fdp = fd;
	}

	return 0;
@@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
	err = unshare_fs(unshare_flags, &new_fs);
	if (err)
		goto bad_unshare_out;
	err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
	err = unshare_fd(unshare_flags, &new_fd);
	if (err)
		goto bad_unshare_cleanup_fs;
	err = unshare_userns(unshare_flags, &new_cred);
@@ -3398,7 +3394,7 @@ int unshare_files(void)
	struct files_struct *old, *copy = NULL;
	int error;

	error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
	error = unshare_fd(CLONE_FILES, &copy);
	if (error || !copy)
		return error;