Commit 016dc851 authored by David Howells's avatar David Howells
Browse files

netfs: Implement unbuffered/DIO read support



Implement support for unbuffered and DIO reads in the netfs library,
utilising the existing read helper code to do block splitting and
individual queuing.  The code also handles extraction of the destination
buffer from the supplied iterator, allowing async unbuffered reads to take
place.

The read will be split up according to the rsize setting and, if supplied,
the ->clamp_length() method.  Note that the next subrequest will be issued
as soon as issue_op returns, without waiting for previous ones to finish.
The network filesystem needs to pause or handle queuing them if it doesn't
want to fire them all at the server simultaneously.

Once all the subrequests have finished, the state will be assessed and the
amount of data to be indicated as having being obtained will be
determined.  As the subrequests may finish in any order, if an intermediate
subrequest is short, any further subrequests may be copied into the buffer
and then abandoned.

In the future, this will also take care of doing an unbuffered read from
encrypted content, with the decryption being done by the library.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
parent e2e2e839
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
netfs-y := \
	buffered_read.o \
	buffered_write.o \
	direct_read.o \
	io.o \
	iterator.o \
	locking.o \

fs/netfs/direct_read.c

0 → 100644
+125 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-or-later
/* Direct I/O support.
 *
 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/sched/mm.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/netfs.h>
#include "internal.h"

/**
 * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
 * @iocb: The I/O control descriptor describing the read
 * @iter: The output buffer (also specifies read length)
 *
 * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
 * output buffer.  No use is made of the pagecache.
 *
 * The caller must hold any appropriate locks.
 */
static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter)
{
	struct netfs_io_request *rreq;
	ssize_t ret;
	size_t orig_count = iov_iter_count(iter);
	bool async = !is_sync_kiocb(iocb);

	_enter("");

	if (!orig_count)
		return 0; /* Don't update atime */

	ret = kiocb_write_and_wait(iocb, orig_count);
	if (ret < 0)
		return ret;
	file_accessed(iocb->ki_filp);

	rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
				   iocb->ki_pos, orig_count,
				   NETFS_DIO_READ);
	if (IS_ERR(rreq))
		return PTR_ERR(rreq);

	netfs_stat(&netfs_n_rh_dio_read);
	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read);

	/* If this is an async op, we have to keep track of the destination
	 * buffer for ourselves as the caller's iterator will be trashed when
	 * we return.
	 *
	 * In such a case, extract an iterator to represent as much of the the
	 * output buffer as we can manage.  Note that the extraction might not
	 * be able to allocate a sufficiently large bvec array and may shorten
	 * the request.
	 */
	if (user_backed_iter(iter)) {
		ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
		if (ret < 0)
			goto out;
		rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
		rreq->direct_bv_count = ret;
		rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
		rreq->len = iov_iter_count(&rreq->iter);
	} else {
		rreq->iter = *iter;
		rreq->len = orig_count;
		rreq->direct_bv_unpin = false;
		iov_iter_advance(iter, orig_count);
	}

	// TODO: Set up bounce buffer if needed

	if (async)
		rreq->iocb = iocb;

	ret = netfs_begin_read(rreq, is_sync_kiocb(iocb));
	if (ret < 0)
		goto out; /* May be -EIOCBQUEUED */
	if (!async) {
		// TODO: Copy from bounce buffer
		iocb->ki_pos += rreq->transferred;
		ret = rreq->transferred;
	}

out:
	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
	if (ret > 0)
		orig_count -= ret;
	if (ret != -EIOCBQUEUED)
		iov_iter_revert(iter, orig_count - iov_iter_count(iter));
	return ret;
}

/**
 * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read
 * @iocb: The I/O control descriptor describing the read
 * @iter: The output buffer (also specifies read length)
 *
 * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
 * output buffer.  No use is made of the pagecache.
 */
ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	ssize_t ret;

	if (!iter->count)
		return 0; /* Don't update atime */

	ret = netfs_start_io_direct(inode);
	if (ret == 0) {
		ret = netfs_unbuffered_read_iter_locked(iocb, iter);
		netfs_end_io_direct(inode);
	}
	return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_read_iter);
+1 −0
Original line number Diff line number Diff line
@@ -100,6 +100,7 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
 * stats.c
 */
#ifdef CONFIG_NETFS_STATS
extern atomic_t netfs_n_rh_dio_read;
extern atomic_t netfs_n_rh_readahead;
extern atomic_t netfs_n_rh_readpage;
extern atomic_t netfs_n_rh_rreq;
+75 −8
Original line number Diff line number Diff line
@@ -78,7 +78,9 @@ static void netfs_read_from_server(struct netfs_io_request *rreq,
				   struct netfs_io_subrequest *subreq)
{
	netfs_stat(&netfs_n_rh_download);
	if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)

	if (rreq->origin != NETFS_DIO_READ &&
	    iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
		pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n",
			rreq->debug_id, subreq->debug_index,
			iov_iter_count(&subreq->io_iter), subreq->len,
@@ -341,6 +343,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
	}
}

/*
 * Determine how much we can admit to having read from a DIO read.
 */
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
	struct netfs_io_subrequest *subreq;
	unsigned int i;
	size_t transferred = 0;

	for (i = 0; i < rreq->direct_bv_count; i++)
		flush_dcache_page(rreq->direct_bv[i].bv_page);

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		if (subreq->error || subreq->transferred == 0)
			break;
		transferred += subreq->transferred;
		if (subreq->transferred < subreq->len)
			break;
	}

	for (i = 0; i < rreq->direct_bv_count; i++)
		flush_dcache_page(rreq->direct_bv[i].bv_page);

	rreq->transferred = transferred;
	task_io_account_read(transferred);

	if (rreq->iocb) {
		rreq->iocb->ki_pos += transferred;
		if (rreq->iocb->ki_complete)
			rreq->iocb->ki_complete(
				rreq->iocb, rreq->error ? rreq->error : transferred);
	}
	if (rreq->netfs_ops->done)
		rreq->netfs_ops->done(rreq);
	inode_dio_end(rreq->inode);
}

/*
 * Assess the state of a read request and decide what to do next.
 *
@@ -361,7 +400,10 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async)
		return;
	}

	if (rreq->origin != NETFS_DIO_READ)
		netfs_rreq_unlock_folios(rreq);
	else
		netfs_rreq_assess_dio(rreq);

	trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
@@ -526,14 +568,16 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
			struct netfs_io_subrequest *subreq,
			struct iov_iter *io_iter)
{
	enum netfs_io_source source;
	enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
	size_t lsize;

	_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);

	if (rreq->origin != NETFS_DIO_READ) {
		source = netfs_cache_prepare_read(subreq, rreq->i_size);
		if (source == NETFS_INVALID_READ)
			goto out;
	}

	if (source == NETFS_DOWNLOAD_FROM_SERVER) {
		/* Call out to the netfs to let it shrink the request to fit
@@ -544,6 +588,8 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
		 */
		if (subreq->len > rreq->i_size - subreq->start)
			subreq->len = rreq->i_size - subreq->start;
		if (rreq->rsize && subreq->len > rreq->rsize)
			subreq->len = rreq->rsize;

		if (rreq->netfs_ops->clamp_length &&
		    !rreq->netfs_ops->clamp_length(subreq)) {
@@ -662,6 +708,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
		return -EIO;
	}

	if (rreq->origin == NETFS_DIO_READ)
		inode_dio_begin(rreq->inode);

	// TODO: Use bounce buffer if requested
	rreq->io_iter = rreq->iter;

	INIT_WORK(&rreq->work, netfs_rreq_work);
@@ -673,11 +723,25 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
	atomic_set(&rreq->nr_outstanding, 1);
	io_iter = rreq->io_iter;
	do {
		_debug("submit %llx + %zx >= %llx",
		       rreq->start, rreq->submitted, rreq->i_size);
		if (rreq->origin == NETFS_DIO_READ &&
		    rreq->start + rreq->submitted >= rreq->i_size)
			break;
		if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
			break;
		if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
		    test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
			break;

	} while (rreq->submitted < rreq->len);

	if (!rreq->submitted) {
		netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
		ret = 0;
		goto out;
	}

	if (sync) {
		/* Keep nr_outstanding incremented so that the ref always
		 * belongs to us, and the service code isn't punted off to a
@@ -694,7 +758,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
			    TASK_UNINTERRUPTIBLE);

		ret = rreq->error;
		if (ret == 0 && rreq->submitted < rreq->len) {
		if (ret == 0 && rreq->submitted < rreq->len &&
		    rreq->origin != NETFS_DIO_READ) {
			trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
			ret = -EIO;
		}
@@ -702,7 +767,9 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
		/* If we decrement nr_outstanding to 0, the ref belongs to us. */
		if (atomic_dec_and_test(&rreq->nr_outstanding))
			netfs_rreq_assess(rreq, false);
		ret = 0;
		ret = -EIOCBQUEUED;
	}

out:
	return ret;
}
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
	[NETFS_READPAGE]	= "RP",
	[NETFS_READ_FOR_WRITE]	= "RW",
	[NETFS_WRITEBACK]	= "WB",
	[NETFS_DIO_READ]	= "DR",
};

/*
Loading