Unverified Commit d43558ae authored by Joanne Koong's avatar Joanne Koong Committed by Christian Brauner
Browse files

iomap: track pending read bytes more optimally



Instead of incrementing read_bytes_pending for every folio range read in
(which requires acquiring the spinlock to do so), set read_bytes_pending
to the folio size when the first range is asynchronously read in, keep
track of how many bytes total are asynchronously read in, and adjust
read_bytes_pending accordingly after issuing requests to read in all the
necessary ranges.

iomap_read_folio_ctx->cur_folio_in_bio can be removed since a non-zero
value for pending bytes necessarily indicates the folio is in the bio.

Signed-off-by: default avatarJoanne Koong <joannelkoong@gmail.com>
Suggested-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 87a13819
Loading
Loading
Loading
Loading
+102 −21
Original line number Diff line number Diff line
@@ -362,7 +362,6 @@ static void iomap_read_end_io(struct bio *bio)

struct iomap_read_folio_ctx {
	struct folio		*cur_folio;
	bool			cur_folio_in_bio;
	void			*read_ctx;
	struct readahead_control *rac;
};
@@ -380,19 +379,11 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
{
	struct folio *folio = ctx->cur_folio;
	const struct iomap *iomap = &iter->iomap;
	struct iomap_folio_state *ifs = folio->private;
	size_t poff = offset_in_folio(folio, pos);
	loff_t length = iomap_length(iter);
	sector_t sector;
	struct bio *bio = ctx->read_ctx;

	ctx->cur_folio_in_bio = true;
	if (ifs) {
		spin_lock_irq(&ifs->state_lock);
		ifs->read_bytes_pending += plen;
		spin_unlock_irq(&ifs->state_lock);
	}

	sector = iomap_sector(iomap, pos);
	if (!bio || bio_end_sector(bio) != sector ||
	    !bio_add_folio(bio, folio, plen, poff)) {
@@ -422,8 +413,93 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
	}
}

static void iomap_read_init(struct folio *folio)
{
	struct iomap_folio_state *ifs = folio->private;

	if (ifs) {
		size_t len = folio_size(folio);

		/*
		 * ifs->read_bytes_pending is used to track how many bytes are
		 * read in asynchronously by the IO helper. We need to track
		 * this so that we can know when the IO helper has finished
		 * reading in all the necessary ranges of the folio and can end
		 * the read.
		 *
		 * Increase ->read_bytes_pending by the folio size to start, and
		 * add a +1 bias. We'll subtract the bias and any uptodate /
		 * zeroed ranges that did not require IO in iomap_read_end()
		 * after we're done processing the folio.
		 *
		 * We do this because otherwise, we would have to increment
		 * ifs->read_bytes_pending every time a range in the folio needs
		 * to be read in, which can get expensive since the spinlock
		 * needs to be held whenever modifying ifs->read_bytes_pending.
		 *
		 * We add the bias to ensure the read has not been ended on the
		 * folio when iomap_read_end() is called, even if the IO helper
		 * has already finished reading in the entire folio.
		 */
		spin_lock_irq(&ifs->state_lock);
		ifs->read_bytes_pending += len + 1;
		spin_unlock_irq(&ifs->state_lock);
	}
}

/*
 * This ends IO if no bytes were submitted to an IO helper.
 *
 * Otherwise, this calibrates ifs->read_bytes_pending to represent only the
 * submitted bytes (see comment in iomap_read_init()). If all bytes submitted
 * have already been completed by the IO helper, then this will end the read.
 * Else the IO helper will end the read after all submitted ranges have been
 * read.
 */
static void iomap_read_end(struct folio *folio, size_t bytes_pending)
{
	struct iomap_folio_state *ifs;

	/*
	 * If there are no bytes pending, this means we are responsible for
	 * unlocking the folio here, since no IO helper has taken ownership of
	 * it.
	 */
	if (!bytes_pending) {
		folio_unlock(folio);
		return;
	}

	ifs = folio->private;
	if (ifs) {
		bool end_read, uptodate;
		/*
		 * Subtract any bytes that were initially accounted to
		 * read_bytes_pending but skipped for IO.
		 * The +1 accounts for the bias we added in iomap_read_init().
		 */
		size_t bytes_accounted = folio_size(folio) + 1 -
				bytes_pending;

		spin_lock_irq(&ifs->state_lock);
		ifs->read_bytes_pending -= bytes_accounted;
		/*
		 * If !ifs->read_bytes_pending, this means all pending reads
		 * by the IO helper have already completed, which means we need
		 * to end the folio read here. If ifs->read_bytes_pending != 0,
		 * the IO helper will end the folio read.
		 */
		end_read = !ifs->read_bytes_pending;
		if (end_read)
			uptodate = ifs_is_fully_uptodate(folio, ifs);
		spin_unlock_irq(&ifs->state_lock);
		if (end_read)
			folio_end_read(folio, uptodate);
	}
}

static int iomap_read_folio_iter(struct iomap_iter *iter,
		struct iomap_read_folio_ctx *ctx)
		struct iomap_read_folio_ctx *ctx, size_t *bytes_pending)
{
	const struct iomap *iomap = &iter->iomap;
	loff_t pos = iter->pos;
@@ -460,6 +536,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
			folio_zero_range(folio, poff, plen);
			iomap_set_range_uptodate(folio, poff, plen);
		} else {
			if (!*bytes_pending)
				iomap_read_init(folio);
			*bytes_pending += plen;
			iomap_bio_read_folio_range(iter, ctx, pos, plen);
		}

@@ -482,17 +561,18 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
	struct iomap_read_folio_ctx ctx = {
		.cur_folio	= folio,
	};
	size_t bytes_pending = 0;
	int ret;

	trace_iomap_readpage(iter.inode, 1);

	while ((ret = iomap_iter(&iter, ops)) > 0)
		iter.status = iomap_read_folio_iter(&iter, &ctx);
		iter.status = iomap_read_folio_iter(&iter, &ctx,
				&bytes_pending);

	iomap_bio_submit_read(&ctx);

	if (!ctx.cur_folio_in_bio)
		folio_unlock(folio);
	iomap_read_end(folio, bytes_pending);

	/*
	 * Just like mpage_readahead and block_read_full_folio, we always
@@ -504,24 +584,23 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_read_folio);

static int iomap_readahead_iter(struct iomap_iter *iter,
		struct iomap_read_folio_ctx *ctx)
		struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending)
{
	int ret;

	while (iomap_length(iter)) {
		if (ctx->cur_folio &&
		    offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
			if (!ctx->cur_folio_in_bio)
				folio_unlock(ctx->cur_folio);
			iomap_read_end(ctx->cur_folio, *cur_bytes_pending);
			ctx->cur_folio = NULL;
		}
		if (!ctx->cur_folio) {
			ctx->cur_folio = readahead_folio(ctx->rac);
			if (WARN_ON_ONCE(!ctx->cur_folio))
				return -EINVAL;
			ctx->cur_folio_in_bio = false;
			*cur_bytes_pending = 0;
		}
		ret = iomap_read_folio_iter(iter, ctx);
		ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending);
		if (ret)
			return ret;
	}
@@ -554,16 +633,18 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
	struct iomap_read_folio_ctx ctx = {
		.rac	= rac,
	};
	size_t cur_bytes_pending;

	trace_iomap_readahead(rac->mapping->host, readahead_count(rac));

	while (iomap_iter(&iter, ops) > 0)
		iter.status = iomap_readahead_iter(&iter, &ctx);
		iter.status = iomap_readahead_iter(&iter, &ctx,
					&cur_bytes_pending);

	iomap_bio_submit_read(&ctx);

	if (ctx.cur_folio && !ctx.cur_folio_in_bio)
		folio_unlock(ctx.cur_folio);
	if (ctx.cur_folio)
		iomap_read_end(ctx.cur_folio, cur_bytes_pending);
}
EXPORT_SYMBOL_GPL(iomap_readahead);