Commit dd6c438c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs fixes from Christian Brauner:

 - eventpoll: fix ep_remove() UAF and follow-up cleanup

 - fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference
   error

 - writeback: Fix use after free in inode_switch_wbs_work_fn()

 - fuse: reject oversized dirents in page cache

 - fs: aio: reject partial mremap to avoid Null-pointer-dereference
   error

 - nstree: fix func. parameter kernel-doc warnings

 - fs: Handle multiply claimed blocks more gracefully with mmb

* tag 'vfs-7.1-rc1.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  eventpoll: drop vestigial epi->dying flag
  eventpoll: drop dead bool return from ep_remove_epi()
  eventpoll: refresh eventpoll_release() fast-path comment
  eventpoll: move f_lock acquisition into ep_remove_file()
  eventpoll: fix ep_remove struct eventpoll / struct file UAF
  eventpoll: move epi_fget() up
  eventpoll: rename ep_remove_safe() back to ep_remove()
  eventpoll: drop vestigial __ prefix from ep_remove_{file,epi}()
  eventpoll: kill __ep_remove()
  eventpoll: split __ep_remove()
  eventpoll: use hlist_is_singular_node() in __ep_remove()
  fs: Handle multiply claimed blocks more gracefully with mmb
  nstree: fix func. parameter kernel-doc warnings
  fs: aio: reject partial mremap to avoid Null-pointer-dereference error
  fuse: reject oversized dirents in page cache
  writeback: Fix use after free in inode_switch_wbs_work_fn()
  fs: aio: set VMA_DONTCOPY_BIT in mmap to fix NULL-pointer-dereference error
parents bd1886d6 ac8777cc
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -422,7 +422,8 @@ static int aio_ring_mremap(struct vm_area_struct *vma)

		ctx = rcu_dereference(table->table[i]);
		if (ctx && ctx->aio_ring_file == file) {
			if (!atomic_read(&ctx->dead)) {
			if (!atomic_read(&ctx->dead) &&
			    (ctx->mmap_size == (vma->vm_end - vma->vm_start))) {
				ctx->user_id = ctx->mmap_base = vma->vm_start;
				res = 0;
			}
@@ -447,7 +448,7 @@ static const struct vm_operations_struct aio_ring_vm_ops = {

static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
{
	vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT);
	vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTCOPY_BIT);
	desc->vm_ops = &aio_ring_vm_ops;
	return 0;
}
+8 −1
Original line number Diff line number Diff line
@@ -719,8 +719,15 @@ void mmb_mark_buffer_dirty(struct buffer_head *bh,
	mark_buffer_dirty(bh);
	if (!bh->b_mmb) {
		spin_lock(&mmb->lock);
		/*
		 * For a corrupted filesystem with multiply claimed blocks this
		 * can fail. Avoid corrupting the linked list in that case.
		 */
		if (cmpxchg(&bh->b_mmb, NULL, mmb) != NULL) {
			spin_unlock(&mmb->lock);
			return;
		}
		list_move_tail(&bh->b_assoc_buffers, &mmb->list);
		bh->b_mmb = mmb;
		spin_unlock(&mmb->lock);
	}
}
+78 −80
Original line number Diff line number Diff line
@@ -148,13 +148,6 @@ struct epitem {
	/* The file descriptor information this item refers to */
	struct epoll_filefd ffd;

	/*
	 * Protected by file->f_lock, true for to-be-released epitem already
	 * removed from the "struct file" items list; together with
	 * eventpoll->refcount orchestrates "struct eventpoll" disposal
	 */
	bool dying;

	/* List containing poll wait queues */
	struct eppoll_entry *pwqlist;

@@ -220,10 +213,7 @@ struct eventpoll {
	struct hlist_head refs;
	u8 loop_check_depth;

	/*
	 * usage count, used together with epitem->dying to
	 * orchestrate the disposal of this struct
	 */
	/* usage count, orchestrates "struct eventpoll" disposal */
	refcount_t refcount;

	/* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */
@@ -827,36 +817,47 @@ static void ep_free(struct eventpoll *ep)
}

/*
 * Removes a "struct epitem" from the eventpoll RB tree and deallocates
 * all the associated resources. Must be called with "mtx" held.
 * If the dying flag is set, do the removal only if force is true.
 * This prevents ep_clear_and_put() from dropping all the ep references
 * while running concurrently with eventpoll_release_file().
 * Returns true if the eventpoll can be disposed.
 * The ffd.file pointer may be in the process of being torn down due to
 * being closed, but we may not have finished eventpoll_release() yet.
 *
 * Normally, even with the atomic_long_inc_not_zero, the file may have
 * been free'd and then gotten re-allocated to something else (since
 * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
 *
 * But for epoll, users hold the ep->mtx mutex, and as such any file in
 * the process of being free'd will block in eventpoll_release_file()
 * and thus the underlying file allocation will not be free'd, and the
 * file re-use cannot happen.
 *
 * For the same reason we can avoid a rcu_read_lock() around the
 * operation - 'ffd.file' cannot go away even if the refcount has
 * reached zero (but we must still not call out to ->poll() functions
 * etc).
 */
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
static struct file *epi_fget(const struct epitem *epi)
{
	struct file *file = epi->ffd.file;
	struct epitems_head *to_free;
	struct hlist_head *head;
	struct file *file;

	lockdep_assert_irqs_enabled();
	file = epi->ffd.file;
	if (!file_ref_get(&file->f_ref))
		file = NULL;
	return file;
}

/*
	 * Removes poll wait queue hooks.
 * Takes &file->f_lock; returns with it released.
 */
	ep_unregister_pollwait(ep, epi);
static void ep_remove_file(struct eventpoll *ep, struct epitem *epi,
			     struct file *file)
{
	struct epitems_head *to_free = NULL;
	struct hlist_head *head;

	/* Remove the current item from the list of epoll hooks */
	spin_lock(&file->f_lock);
	if (epi->dying && !force) {
		spin_unlock(&file->f_lock);
		return false;
	}
	lockdep_assert_held(&ep->mtx);

	to_free = NULL;
	spin_lock(&file->f_lock);
	head = file->f_ep;
	if (head->first == &epi->fllink && !epi->fllink.next) {
	if (hlist_is_singular_node(&epi->fllink, head)) {
		/* See eventpoll_release() for details. */
		WRITE_ONCE(file->f_ep, NULL);
		if (!is_file_epoll(file)) {
@@ -869,6 +870,11 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
	hlist_del_rcu(&epi->fllink);
	spin_unlock(&file->f_lock);
	free_ephead(to_free);
}

static void ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
{
	lockdep_assert_held(&ep->mtx);

	rb_erase_cached(&epi->rbn, &ep->rbr);

@@ -888,15 +894,31 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
	kfree_rcu(epi, rcu);

	percpu_counter_dec(&ep->user->epoll_watches);
	return true;
}

/*
 * ep_remove variant for callers owing an additional reference to the ep
 */
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
static void ep_remove(struct eventpoll *ep, struct epitem *epi)
{
	if (__ep_remove(ep, epi, false))
	struct file *file __free(fput) = NULL;

	lockdep_assert_irqs_enabled();
	lockdep_assert_held(&ep->mtx);

	ep_unregister_pollwait(ep, epi);

	/*
	 * If we manage to grab a reference it means we're not in
	 * eventpoll_release_file() and aren't going to be: once @file's
	 * refcount has reached zero, file_ref_get() cannot bring it back.
	 */
	file = epi_fget(epi);
	if (!file)
		return;

	ep_remove_file(ep, epi, file);
	ep_remove_epi(ep, epi);
	WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
}

@@ -923,7 +945,7 @@ static void ep_clear_and_put(struct eventpoll *ep)

	/*
	 * Walks through the whole tree and try to free each "struct epitem".
	 * Note that ep_remove_safe() will not remove the epitem in case of a
	 * Note that ep_remove() will not remove the epitem in case of a
	 * racing eventpoll_release_file(); the latter will do the removal.
	 * At this point we are sure no poll callbacks will be lingering around.
	 * Since we still own a reference to the eventpoll struct, the loop can't
@@ -932,7 +954,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
	for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) {
		next = rb_next(rbp);
		epi = rb_entry(rbp, struct epitem, rbn);
		ep_remove_safe(ep, epi);
		ep_remove(ep, epi);
		cond_resched();
	}

@@ -1012,34 +1034,6 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
	return res;
}

/*
 * The ffd.file pointer may be in the process of being torn down due to
 * being closed, but we may not have finished eventpoll_release() yet.
 *
 * Normally, even with the atomic_long_inc_not_zero, the file may have
 * been free'd and then gotten re-allocated to something else (since
 * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
 *
 * But for epoll, users hold the ep->mtx mutex, and as such any file in
 * the process of being free'd will block in eventpoll_release_file()
 * and thus the underlying file allocation will not be free'd, and the
 * file re-use cannot happen.
 *
 * For the same reason we can avoid a rcu_read_lock() around the
 * operation - 'ffd.file' cannot go away even if the refcount has
 * reached zero (but we must still not call out to ->poll() functions
 * etc).
 */
static struct file *epi_fget(const struct epitem *epi)
{
	struct file *file;

	file = epi->ffd.file;
	if (!file_ref_get(&file->f_ref))
		file = NULL;
	return file;
}

/*
 * Differs from ep_eventpoll_poll() in that internal callers already have
 * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
@@ -1117,18 +1111,17 @@ void eventpoll_release_file(struct file *file)
{
	struct eventpoll *ep;
	struct epitem *epi;
	bool dispose;

	/*
	 * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from
	 * touching the epitems list before eventpoll_release_file() can access
	 * the ep->mtx.
	 * A concurrent ep_remove() cannot outrace us: it pins @file via
	 * epi_fget(), which fails once __fput() has dropped the refcount
	 * to zero -- the path we're on. So any racing ep_remove() bails
	 * and leaves the epi for us to clean up here.
	 */
again:
	spin_lock(&file->f_lock);
	if (file->f_ep && file->f_ep->first) {
		epi = hlist_entry(file->f_ep->first, struct epitem, fllink);
		epi->dying = true;
		spin_unlock(&file->f_lock);

		/*
@@ -1137,10 +1130,15 @@ void eventpoll_release_file(struct file *file)
		 */
		ep = epi->ep;
		mutex_lock(&ep->mtx);
		dispose = __ep_remove(ep, epi, true);

		ep_unregister_pollwait(ep, epi);

		ep_remove_file(ep, epi, file);
		ep_remove_epi(ep, epi);

		mutex_unlock(&ep->mtx);

		if (dispose && ep_refcount_dec_and_test(ep))
		if (ep_refcount_dec_and_test(ep))
			ep_free(ep);
		goto again;
	}
@@ -1619,21 +1617,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
		mutex_unlock(&tep->mtx);

	/*
	 * ep_remove_safe() calls in the later error paths can't lead to
	 * ep_remove() calls in the later error paths can't lead to
	 * ep_free() as the ep file itself still holds an ep reference.
	 */
	ep_get(ep);

	/* now check if we've created too many backpaths */
	if (unlikely(full_check && reverse_path_check())) {
		ep_remove_safe(ep, epi);
		ep_remove(ep, epi);
		return -EINVAL;
	}

	if (epi->event.events & EPOLLWAKEUP) {
		error = ep_create_wakeup_source(epi);
		if (error) {
			ep_remove_safe(ep, epi);
			ep_remove(ep, epi);
			return error;
		}
	}
@@ -1657,7 +1655,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
	 * high memory pressure.
	 */
	if (unlikely(!epq.epi)) {
		ep_remove_safe(ep, epi);
		ep_remove(ep, epi);
		return -ENOMEM;
	}

@@ -2352,7 +2350,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
			 * The eventpoll itself is still alive: the refcount
			 * can't go to zero here.
			 */
			ep_remove_safe(ep, epi);
			ep_remove(ep, epi);
			error = 0;
		} else {
			error = -ENOENT;
+19 −17
Original line number Diff line number Diff line
@@ -568,16 +568,19 @@ void inode_switch_wbs_work_fn(struct work_struct *work)
	struct inode_switch_wbs_context *isw, *next_isw;
	struct llist_node *list;

	list = llist_del_all(&new_wb->switch_wbs_ctxs);
	/*
	 * Nothing to do? That would be a problem as references held by isw
	 * items protect wb from freeing...
	 */
	if (WARN_ON_ONCE(!list))
		return;

	/*
	 * Grab out reference to wb so that it cannot get freed under us
	 * Grab our reference to wb so that it cannot get freed under us
	 * after we process all the isw items.
	 */
	wb_get(new_wb);
	while (1) {
		list = llist_del_all(&new_wb->switch_wbs_ctxs);
		/* Nothing to do? */
		if (!list)
			break;
	/*
	 * In addition to synchronizing among switchers, I_WB_SWITCH
	 * tells the RCU protected stat update paths to grab the i_page
@@ -589,7 +592,6 @@ void inode_switch_wbs_work_fn(struct work_struct *work)

	llist_for_each_entry_safe(isw, next_isw, list, list)
		process_inode_switch_wbs(new_wb, isw);
	}
	wb_put(new_wb);
}

+4 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
	unsigned int offset;
	void *addr;

	/* Dirent doesn't fit in readdir cache page?  Skip caching. */
	if (reclen > PAGE_SIZE)
		return;

	spin_lock(&fi->rdc.lock);
	/*
	 * Is cache already completed?  Or this entry does not go at the end of
Loading