Commit 3304b3fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs iomap updates from Christian Brauner:

 - Erofs page cache sharing preliminaries:

   Plumb a void *private parameter through iomap_read_folio() and
   iomap_readahead() into iomap_iter->private, matching iomap DIO. Erofs
   uses this to replace a bogus kmap_to_page() call, as preparatory work
   for page cache sharing.

 - Fix for invalid folio access:

   Fix an invalid folio access when a folio without iomap_folio_state
   is fully submitted to the IO helper — the helper may call
   folio_end_read() at any time, so ctx->cur_folio must be invalidated
   after full submission.

* tag 'vfs-7.0-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  iomap: fix invalid folio access after folio_end_read()
  erofs: hold read context in iomap_iter if needed
  iomap: stash iomap read ctx in the private field of iomap_iter
parents 157d3d6e aa35dd5c
Loading
Loading
Loading
Loading
+46 −21
Original line number Diff line number Diff line
@@ -267,13 +267,20 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
	folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
}

struct erofs_iomap_iter_ctx {
	struct page *page;
	void *base;
};

static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
	int ret;
	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
	struct erofs_iomap_iter_ctx *ctx = iter->private;
	struct super_block *sb = inode->i_sb;
	struct erofs_map_blocks map;
	struct erofs_map_dev mdev;
	int ret;

	map.m_la = offset;
	map.m_llen = length;
@@ -284,7 +291,6 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	iomap->offset = map.m_la;
	iomap->length = map.m_llen;
	iomap->flags = 0;
	iomap->private = NULL;
	iomap->addr = IOMAP_NULL_ADDR;
	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
		iomap->type = IOMAP_HOLE;
@@ -310,16 +316,20 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	}

	if (map.m_flags & EROFS_MAP_META) {
		void *ptr;
		iomap->type = IOMAP_INLINE;
		/* read context should read the inlined data */
		if (ctx) {
			struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
			void *ptr;

		iomap->type = IOMAP_INLINE;
			ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
						 erofs_inode_in_metabox(inode));
			if (IS_ERR(ptr))
				return PTR_ERR(ptr);
			iomap->inline_data = ptr;
		iomap->private = buf.base;
			ctx->page = buf.page;
			ctx->base = buf.base;
		}
	} else {
		iomap->type = IOMAP_MAPPED;
	}
@@ -329,18 +339,18 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
		ssize_t written, unsigned int flags, struct iomap *iomap)
{
	void *ptr = iomap->private;
	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
	struct erofs_iomap_iter_ctx *ctx = iter->private;

	if (ptr) {
	if (ctx && ctx->base) {
		struct erofs_buf buf = {
			.page = kmap_to_page(ptr),
			.base = ptr,
			.page = ctx->page,
			.base = ctx->base,
		};

		DBG_BUGON(iomap->type != IOMAP_INLINE);
		erofs_put_metabuf(&buf);
	} else {
		DBG_BUGON(iomap->type == IOMAP_INLINE);
		ctx->base = NULL;
	}
	return written;
}
@@ -370,18 +380,30 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 */
static int erofs_read_folio(struct file *file, struct folio *folio)
{
	struct iomap_read_folio_ctx read_ctx = {
		.ops		= &iomap_bio_read_ops,
		.cur_folio	= folio,
	};
	struct erofs_iomap_iter_ctx iter_ctx = {};

	trace_erofs_read_folio(folio, true);

	iomap_bio_read_folio(folio, &erofs_iomap_ops);
	iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
	return 0;
}

static void erofs_readahead(struct readahead_control *rac)
{
	struct iomap_read_folio_ctx read_ctx = {
		.ops		= &iomap_bio_read_ops,
		.rac		= rac,
	};
	struct erofs_iomap_iter_ctx iter_ctx = {};

	trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
					readahead_count(rac), true);

	iomap_bio_readahead(rac, &erofs_iomap_ops);
	iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
}

static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
@@ -401,9 +423,12 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
	if (IS_DAX(inode))
		return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
#endif
	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
		struct erofs_iomap_iter_ctx iter_ctx = {};

		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
				    NULL, 0, NULL, 0);
				    NULL, 0, &iter_ctx, 0);
	}
	return filemap_read(iocb, to, 0);
}

+2 −2
Original line number Diff line number Diff line
@@ -979,7 +979,7 @@ static int fuse_read_folio(struct file *file, struct folio *folio)
		return -EIO;
	}

	iomap_read_folio(&fuse_iomap_ops, &ctx);
	iomap_read_folio(&fuse_iomap_ops, &ctx, NULL);
	fuse_invalidate_atime(inode);
	return 0;
}
@@ -1081,7 +1081,7 @@ static void fuse_readahead(struct readahead_control *rac)
	if (fuse_is_bad(inode))
		return;

	iomap_readahead(&fuse_iomap_ops, &ctx);
	iomap_readahead(&fuse_iomap_ops, &ctx, NULL);
}

static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
+31 −26
Original line number Diff line number Diff line
@@ -418,8 +418,6 @@ static void iomap_read_init(struct folio *folio)
	struct iomap_folio_state *ifs = folio->private;

	if (ifs) {
		size_t len = folio_size(folio);

		/*
		 * ifs->read_bytes_pending is used to track how many bytes are
		 * read in asynchronously by the IO helper. We need to track
@@ -427,23 +425,19 @@ static void iomap_read_init(struct folio *folio)
		 * reading in all the necessary ranges of the folio and can end
		 * the read.
		 *
		 * Increase ->read_bytes_pending by the folio size to start, and
		 * add a +1 bias. We'll subtract the bias and any uptodate /
		 * zeroed ranges that did not require IO in iomap_read_end()
		 * after we're done processing the folio.
		 * Increase ->read_bytes_pending by the folio size to start.
		 * We'll subtract any uptodate / zeroed ranges that did not
		 * require IO in iomap_read_end() after we're done processing
		 * the folio.
		 *
		 * We do this because otherwise, we would have to increment
		 * ifs->read_bytes_pending every time a range in the folio needs
		 * to be read in, which can get expensive since the spinlock
		 * needs to be held whenever modifying ifs->read_bytes_pending.
		 *
		 * We add the bias to ensure the read has not been ended on the
		 * folio when iomap_read_end() is called, even if the IO helper
		 * has already finished reading in the entire folio.
		 */
		spin_lock_irq(&ifs->state_lock);
		WARN_ON_ONCE(ifs->read_bytes_pending != 0);
		ifs->read_bytes_pending = len + 1;
		ifs->read_bytes_pending = folio_size(folio);
		spin_unlock_irq(&ifs->state_lock);
	}
}
@@ -474,11 +468,9 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)

		/*
		 * Subtract any bytes that were initially accounted to
		 * read_bytes_pending but skipped for IO. The +1 accounts for
		 * the bias we added in iomap_read_init().
		 * read_bytes_pending but skipped for IO.
		 */
		ifs->read_bytes_pending -=
			(folio_size(folio) + 1 - bytes_submitted);
		ifs->read_bytes_pending -= folio_size(folio) - bytes_submitted;

		/*
		 * If !ifs->read_bytes_pending, this means all pending reads by
@@ -492,14 +484,16 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
		spin_unlock_irq(&ifs->state_lock);
		if (end_read)
			folio_end_read(folio, uptodate);
	} else if (!bytes_submitted) {
	} else {
		/*
		 * If there were no bytes submitted, this means we are
		 * responsible for unlocking the folio here, since no IO helper
		 * has taken ownership of it. If there were bytes submitted,
		 * then the IO helper will end the read via
		 * iomap_finish_folio_read().
		 * If a folio without an ifs is submitted to the IO helper, the
		 * read must be on the entire folio and the IO helper takes
		 * ownership of the folio. This means we should only enter
		 * iomap_read_end() for the !ifs case if no bytes were submitted
		 * to the IO helper, in which case we are responsible for
		 * unlocking the folio here.
		 */
		WARN_ON_ONCE(bytes_submitted);
		folio_unlock(folio);
	}
}
@@ -511,6 +505,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
	loff_t pos = iter->pos;
	loff_t length = iomap_length(iter);
	struct folio *folio = ctx->cur_folio;
	size_t folio_len = folio_size(folio);
	size_t poff, plen;
	loff_t pos_diff;
	int ret;
@@ -524,8 +519,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,

	ifs_alloc(iter->inode, folio, iter->flags);

	length = min_t(loff_t, length,
			folio_size(folio) - offset_in_folio(folio, pos));
	length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
	while (length) {
		iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff,
				&plen);
@@ -555,7 +549,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
						  plen, ret, GFP_NOFS);
			if (ret)
				return ret;

			*bytes_submitted += plen;
			/*
			 * If the entire folio has been read in by the IO
			 * helper, then the helper owns the folio and will end
			 * the read on it.
			 */
			if (*bytes_submitted == folio_len)
				ctx->cur_folio = NULL;
		}

		ret = iomap_iter_advance(iter, plen);
@@ -568,13 +570,14 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
}

void iomap_read_folio(const struct iomap_ops *ops,
		struct iomap_read_folio_ctx *ctx)
		struct iomap_read_folio_ctx *ctx, void *private)
{
	struct folio *folio = ctx->cur_folio;
	struct iomap_iter iter = {
		.inode		= folio->mapping->host,
		.pos		= folio_pos(folio),
		.len		= folio_size(folio),
		.private	= private,
	};
	size_t bytes_submitted = 0;
	int ret;
@@ -588,7 +591,8 @@ void iomap_read_folio(const struct iomap_ops *ops,
	if (ctx->ops->submit_read)
		ctx->ops->submit_read(ctx);

	iomap_read_end(folio, bytes_submitted);
	if (ctx->cur_folio)
		iomap_read_end(ctx->cur_folio, bytes_submitted);
}
EXPORT_SYMBOL_GPL(iomap_read_folio);

@@ -633,13 +637,14 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
 * the filesystem to be reentered.
 */
void iomap_readahead(const struct iomap_ops *ops,
		struct iomap_read_folio_ctx *ctx)
		struct iomap_read_folio_ctx *ctx, void *private)
{
	struct readahead_control *rac = ctx->rac;
	struct iomap_iter iter = {
		.inode	= rac->mapping->host,
		.pos	= readahead_pos(rac),
		.len	= readahead_length(rac),
		.private = private,
	};
	size_t cur_bytes_submitted;

+4 −4
Original line number Diff line number Diff line
@@ -345,9 +345,9 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
		const struct iomap_ops *ops,
		const struct iomap_write_ops *write_ops, void *private);
void iomap_read_folio(const struct iomap_ops *ops,
		struct iomap_read_folio_ctx *ctx);
		struct iomap_read_folio_ctx *ctx, void *private);
void iomap_readahead(const struct iomap_ops *ops,
		struct iomap_read_folio_ctx *ctx);
		struct iomap_read_folio_ctx *ctx, void *private);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
@@ -599,7 +599,7 @@ static inline void iomap_bio_read_folio(struct folio *folio,
		.cur_folio	= folio,
	};

	iomap_read_folio(ops, &ctx);
	iomap_read_folio(ops, &ctx, NULL);
}

static inline void iomap_bio_readahead(struct readahead_control *rac,
@@ -610,7 +610,7 @@ static inline void iomap_bio_readahead(struct readahead_control *rac,
		.rac		= rac,
	};

	iomap_readahead(ops, &ctx);
	iomap_readahead(ops, &ctx, NULL);
}
#endif /* CONFIG_BLOCK */