Unverified Commit 4d428dca authored by Max Kellermann's avatar Max Kellermann Committed by Christian Brauner
Browse files

netfs: fix reference leak



Commit 20d72b00 ("netfs: Fix the request's work item to not
require a ref") modified netfs_alloc_request() to initialize the
reference counter to 2 instead of 1.  The rationale was that the
requet's "work" would release the second reference after completion
(via netfs_{read,write}_collection_worker()).  That works most of the
time if all goes well.

However, it leaks this additional reference if the request is released
before the I/O operation has been submitted: the error code path only
decrements the reference counter once and the work item will never be
queued because there will never be a completion.

This has caused outages of our whole server cluster today because
tasks were blocked in netfs_wait_for_outstanding_io(), leading to
deadlocks in Ceph (another bug that I will address soon in another
patch).  This was caused by a netfs_pgpriv2_begin_copy_to_cache() call
which failed in fscache_begin_write_operation().  The leaked
netfs_io_request was never completed, leaving `netfs_inode.io_count`
with a positive value forever.

All of this is super-fragile code.  Finding out which code paths will
lead to an eventual completion and which do not is hard to see:

- Some functions like netfs_create_write_req() allocate a request, but
  will never submit any I/O.

- netfs_unbuffered_read_iter_locked() calls netfs_unbuffered_read()
  and then netfs_put_request(); however, netfs_unbuffered_read() can
  also fail early before submitting the I/O request, therefore another
  netfs_put_request() call must be added there.

A rule of thumb is that functions that return a `netfs_io_request` do
not submit I/O, and all of their callers must be checked.

For my taste, the whole netfs code needs an overhaul to make reference
counting easier to understand and less fragile & obscure.  But to fix
this bug here and now and produce a patch that is adequate for a
stable backport, I tried a minimal approach that quickly frees the
request object upon early failure.

I decided against adding a second netfs_put_request() each time
because that would cause code duplication which obscures the code
further.  Instead, I added the function netfs_put_failed_request()
which frees such a failed request synchronously under the assumption
that the reference count is exactly 2 (as initially set by
netfs_alloc_request() and never touched), verified by a
WARN_ON_ONCE().  It then deinitializes the request object (without
going through the "cleanup_work" indirection) and frees the allocation
(with RCU protection to protect against concurrent access by
netfs_requests_seq_start()).

All code paths that fail early have been changed to call
netfs_put_failed_request() instead of netfs_put_request().
Additionally, I have added a netfs_put_request() call to
netfs_unbuffered_read() as explained above because the
netfs_put_failed_request() approach does not work there.

Fixes: 20d72b00 ("netfs: Fix the request's work item to not require a ref")
Signed-off-by: default avatarMax Kellermann <max.kellermann@ionos.com>
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Paulo Alcantara <pc@manguebit.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: stable@vger.kernel.org
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 9158c6bb
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -369,7 +369,7 @@ void netfs_readahead(struct readahead_control *ractl)
	return netfs_put_request(rreq, netfs_rreq_trace_put_return);

cleanup_free:
	return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
	return netfs_put_failed_request(rreq);
}
EXPORT_SYMBOL(netfs_readahead);

@@ -472,7 +472,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
	return ret < 0 ? ret : 0;

discard:
	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
	netfs_put_failed_request(rreq);
alloc_error:
	folio_unlock(folio);
	return ret;
@@ -532,7 +532,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
	return ret < 0 ? ret : 0;

discard:
	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
	netfs_put_failed_request(rreq);
alloc_error:
	folio_unlock(folio);
	return ret;
@@ -699,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
	return 0;

error_put:
	netfs_put_request(rreq, netfs_rreq_trace_put_failed);
	netfs_put_failed_request(rreq);
error:
	if (folio) {
		folio_unlock(folio);
@@ -754,7 +754,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
	return ret < 0 ? ret : 0;

error_put:
	netfs_put_request(rreq, netfs_rreq_trace_put_discard);
	netfs_put_failed_request(rreq);
error:
	_leave(" = %d", ret);
	return ret;
+6 −1
Original line number Diff line number Diff line
@@ -131,6 +131,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)

	if (rreq->len == 0) {
		pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
		netfs_put_request(rreq, netfs_rreq_trace_put_discard);
		return -EIO;
	}

@@ -205,7 +206,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
	if (user_backed_iter(iter)) {
		ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
		if (ret < 0)
			goto out;
			goto error_put;
		rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
		rreq->direct_bv_count = ret;
		rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
@@ -238,6 +239,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
	if (ret > 0)
		orig_count -= ret;
	return ret;

error_put:
	netfs_put_failed_request(rreq);
	return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_read_iter_locked);

+5 −1
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
			n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
			if (n < 0) {
				ret = n;
				goto out;
				goto error_put;
			}
			wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
			wreq->direct_bv_count = n;
@@ -101,6 +101,10 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
out:
	netfs_put_request(wreq, netfs_rreq_trace_put_return);
	return ret;

error_put:
	netfs_put_failed_request(wreq);
	return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);

+1 −0
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
void netfs_clear_subrequests(struct netfs_io_request *rreq);
void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
void netfs_put_failed_request(struct netfs_io_request *rreq);
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);

static inline void netfs_see_request(struct netfs_io_request *rreq,
+27 −3
Original line number Diff line number Diff line
@@ -116,10 +116,8 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
	netfs_stat_d(&netfs_n_rh_rreq);
}

static void netfs_free_request(struct work_struct *work)
static void netfs_deinit_request(struct netfs_io_request *rreq)
{
	struct netfs_io_request *rreq =
		container_of(work, struct netfs_io_request, cleanup_work);
	struct netfs_inode *ictx = netfs_inode(rreq->inode);
	unsigned int i;

@@ -149,6 +147,14 @@ static void netfs_free_request(struct work_struct *work)

	if (atomic_dec_and_test(&ictx->io_count))
		wake_up_var(&ictx->io_count);
}

static void netfs_free_request(struct work_struct *work)
{
	struct netfs_io_request *rreq =
		container_of(work, struct netfs_io_request, cleanup_work);

	netfs_deinit_request(rreq);
	call_rcu(&rreq->rcu, netfs_free_request_rcu);
}

@@ -167,6 +173,24 @@ void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
	}
}

/*
 * Free a request (synchronously) that was just allocated but has
 * failed before it could be submitted.
 */
void netfs_put_failed_request(struct netfs_io_request *rreq)
{
	int r = refcount_read(&rreq->ref);

	/* new requests have two references (see
	 * netfs_alloc_request(), and this function is only allowed on
	 * new request objects
	 */
	WARN_ON_ONCE(r != 2);

	trace_netfs_rreq_ref(rreq->debug_id, r, netfs_rreq_trace_put_failed);
	netfs_free_request(&rreq->cleanup_work);
}

/*
 * Allocate and partially initialise an I/O request structure.
 */
Loading