Commit e820dbeb authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: convert btrfs_buffered_write() to use folios



The buffered write path is still heavily utilizing the page interface.
Since we have converted it to do a page-by-page copying, it's much easier
to convert all involved functions to folio interface, this involves:

- btrfs_copy_from_user()
- btrfs_drop_folio()
- prepare_uptodate_page()
- prepare_one_page()
- lock_and_cleanup_extent_if_need()
- btrfs_dirty_page()

All function are changed to accept a folio parameter, and if the word
"page" is in the function name, change that to "folio" too.

The function btrfs_dirty_page() is exported for v1 space cache, convert
v1 cache call site to convert its page to folio for the new interface.

And there is a small enhancement for prepare_one_folio(), instead of
manually waiting for the page writeback, let __filemap_get_folio() to
handle that by using FGP_WRITEBEGIN, which implies
(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE).

Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent c87c2997
Loading
Loading
Loading
Loading
+62 −76
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@
 * calls into generic code.
 */
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
					 struct page *page, struct iov_iter *i)
					 struct folio *folio, struct iov_iter *i)
{
	size_t copied = 0;
	size_t total_copied = 0;
@@ -53,10 +53,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
		/*
		 * Copy data from userspace to the current page
		 */
		copied = copy_page_from_iter_atomic(page, offset, count, i);
		copied = copy_folio_from_iter_atomic(folio, offset, count, i);

		/* Flush processor's dcache for this page */
		flush_dcache_page(page);
		flush_dcache_folio(folio);

		/*
		 * if we get a partial write, we can end up with
@@ -68,7 +68,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
		 * back to page at a time copies after we return 0.
		 */
		if (unlikely(copied < count)) {
			if (!PageUptodate(page)) {
			if (!folio_test_uptodate(folio)) {
				iov_iter_revert(i, copied);
				copied = 0;
			}
@@ -84,9 +84,9 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
}

/*
 * unlocks pages after btrfs_file_write is done with them
 * Unlock folio after btrfs_file_write() is done with it.
 */
static void btrfs_drop_page(struct btrfs_fs_info *fs_info, struct page *page,
static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio,
			     u64 pos, u64 copied)
{
	u64 block_start = round_down(pos, fs_info->sectorsize);
@@ -94,26 +94,25 @@ static void btrfs_drop_page(struct btrfs_fs_info *fs_info, struct page *page,

	ASSERT(block_len <= U32_MAX);
	/*
	 * Page checked is some magic around finding pages that have been
	 * modified without going through btrfs_set_page_dirty clear it here.
	 * Folio checked is some magic around finding folios that have been
	 * modified without going through btrfs_dirty_folio().  Clear it here.
	 * There should be no need to mark the pages accessed as
	 * prepare_one_page() should have marked them accessed in
	 * prepare_one_page() via find_or_create_page()
	 * prepare_one_folio() should have marked them accessed in
	 * prepare_one_folio() via find_or_create_page()
	 */
	btrfs_folio_clamp_clear_checked(fs_info, page_folio(page), block_start,
					block_len);
	unlock_page(page);
	put_page(page);
	btrfs_folio_clamp_clear_checked(fs_info, folio, block_start, block_len);
	folio_unlock(folio);
	folio_put(folio);
}

/*
 * After btrfs_copy_from_user(), update the following things for delalloc:
 * - Mark newly dirtied pages as DELALLOC in the io tree.
 * - Mark newly dirtied folio as DELALLOC in the io tree.
 *   Used to advise which range is to be written back.
 * - Mark modified pages as Uptodate/Dirty and not needing COW fixup
 * - Mark modified folio as Uptodate/Dirty and not needing COW fixup
 * - Update inode size for past EOF write
 */
int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
		      size_t write_bytes, struct extent_state **cached, bool noreserve)
{
	struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -122,7 +121,6 @@ int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
	u64 start_pos;
	u64 end_of_last_block;
	u64 end_pos = pos + write_bytes;
	struct folio *folio = page_folio(page);
	loff_t isize = i_size_read(&inode->vfs_inode);
	unsigned int extra_bits = 0;

@@ -835,14 +833,12 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
}

/*
 * on error we return an unlocked page and the error value
 * on success we return a locked page and 0
 * On error return an unlocked folio and the error value
 * On success return a locked folio and 0
 */
static int prepare_uptodate_page(struct inode *inode,
				 struct page *page, u64 pos,
static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos,
				  u64 len, bool force_uptodate)
{
	struct folio *folio = page_folio(page);
	u64 clamp_start = max_t(u64, pos, folio_pos(folio));
	u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
	int ret = 0;
@@ -873,23 +869,13 @@ static int prepare_uptodate_page(struct inode *inode,
	 * The private flag check is essential for subpage as we need to store
	 * extra bitmap using folio private.
	 */
	if (page->mapping != inode->i_mapping || !folio_test_private(folio)) {
	if (folio->mapping != inode->i_mapping || !folio_test_private(folio)) {
		folio_unlock(folio);
		return -EAGAIN;
	}
	return 0;
}

static fgf_t get_prepare_fgp_flags(bool nowait)
{
	fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;

	if (nowait)
		fgp_flags |= FGP_NOWAIT;

	return fgp_flags;
}

static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
{
	gfp_t gfp;
@@ -904,60 +890,60 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
}

/*
 * this just gets page into the page cache and locks them down.
 * Get folio into the page cache and lock it.
 */
static noinline int prepare_one_page(struct inode *inode, struct page **page_ret,
static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret,
				      loff_t pos, size_t write_bytes,
				      bool force_uptodate, bool nowait)
{
	unsigned long index = pos >> PAGE_SHIFT;
	gfp_t mask = get_prepare_gfp_flags(inode, nowait);
	fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
	struct page *page;
	fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
	struct folio *folio;
	int ret = 0;

again:
	page = pagecache_get_page(inode->i_mapping, index, fgp_flags,
				  mask | __GFP_WRITE);
	if (!page) {
	folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask);
	if (IS_ERR(folio)) {
		if (nowait)
			ret = -EAGAIN;
		else
			ret = -ENOMEM;
			ret = PTR_ERR(folio);
		return ret;
	}
	ret = set_page_extent_mapped(page);
	/* Only support page sized folio yet. */
	ASSERT(folio_order(folio) == 0);
	ret = set_folio_extent_mapped(folio);
	if (ret < 0) {
		unlock_page(page);
		put_page(page);
		folio_unlock(folio);
		folio_put(folio);
		return ret;
	}
	ret = prepare_uptodate_page(inode, page, pos, write_bytes, force_uptodate);
	ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate);
	if (ret) {
		/* The page is already unlocked. */
		put_page(page);
		/* The folio is already unlocked. */
		folio_put(folio);
		if (!nowait && ret == -EAGAIN) {
			ret = 0;
			goto again;
		}
		return ret;
	}
	wait_on_page_writeback(page);
	*page_ret = page;
	*folio_ret = folio;
	return 0;
}

/*
 * This function locks the extent and properly waits for data=ordered extents
 * to finish before allowing the pages to be modified if need.
 * Locks the extent and properly waits for data=ordered extents to finish
 * before allowing the folios to be modified if need.
 *
 * The return value:
 * Return:
 * 1 - the extent is locked
 * 0 - the extent is not locked, and everything is OK
 * -EAGAIN - need re-prepare the pages
 * -EAGAIN - need to prepare the folios again
 */
static noinline int
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
				loff_t pos, size_t write_bytes,
				u64 *lockstart, u64 *lockend, bool nowait,
				struct extent_state **cached_state)
@@ -976,8 +962,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
		if (nowait) {
			if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
					     cached_state)) {
				unlock_page(page);
				put_page(page);
				folio_unlock(folio);
				folio_put(folio);
				return -EAGAIN;
			}
		} else {
@@ -991,8 +977,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
		    ordered->file_offset <= last_pos) {
			unlock_extent(&inode->io_tree, start_pos, last_pos,
				      cached_state);
			unlock_page(page);
			put_page(page);
			folio_unlock(folio);
			folio_put(folio);
			btrfs_start_ordered_extent(ordered);
			btrfs_put_ordered_extent(ordered);
			return -EAGAIN;
@@ -1006,10 +992,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
	}

	/*
	 * We should be called after prepare_one_page() which should have locked
	 * We should be called after prepare_one_folio() which should have locked
	 * all pages in the range.
	 */
	WARN_ON(!PageLocked(page));
	WARN_ON(!folio_test_locked(folio));

	return ret;
}
@@ -1190,12 +1176,12 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
		size_t copied;
		size_t dirty_sectors;
		size_t num_sectors;
		struct page *page = NULL;
		struct folio *folio = NULL;
		int extents_locked;
		bool force_page_uptodate = false;

		/*
		 * Fault pages before locking them in prepare_one_page()
		 * Fault pages before locking them in prepare_one_folio()
		 * to avoid recursive lock
		 */
		if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
@@ -1261,7 +1247,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
			break;
		}

		ret = prepare_one_page(inode, &page, pos, write_bytes,
		ret = prepare_one_folio(inode, &folio, pos, write_bytes,
					force_page_uptodate, false);
		if (ret) {
			btrfs_delalloc_release_extents(BTRFS_I(inode),
@@ -1270,7 +1256,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
		}

		extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
						page, pos, write_bytes, &lockstart,
						folio, pos, write_bytes, &lockstart,
						&lockend, nowait, &cached_state);
		if (extents_locked < 0) {
			if (!nowait && extents_locked == -EAGAIN)
@@ -1282,7 +1268,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
			break;
		}

		copied = btrfs_copy_from_user(pos, write_bytes, page, i);
		copied = btrfs_copy_from_user(pos, write_bytes, folio, i);

		num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
		dirty_sectors = round_up(copied + sector_offset,
@@ -1314,7 +1300,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
		release_bytes = round_up(copied + sector_offset,
					fs_info->sectorsize);

		ret = btrfs_dirty_page(BTRFS_I(inode), page, pos, copied,
		ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied,
					&cached_state, only_release_metadata);

		/*
@@ -1332,7 +1318,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)

		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
		if (ret) {
			btrfs_drop_page(fs_info, page, pos, copied);
			btrfs_drop_folio(fs_info, folio, pos, copied);
			break;
		}

@@ -1340,7 +1326,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
		if (only_release_metadata)
			btrfs_check_nocow_unlock(BTRFS_I(inode));

		btrfs_drop_page(fs_info, page, pos, copied);
		btrfs_drop_folio(fs_info, folio, pos, copied);

		cond_resched();

+2 −2
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
			    const struct btrfs_ioctl_encoded_io_args *encoded);
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
		      size_t write_bytes, struct extent_state **cached, bool noreserve);
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+2 −2
Original line number Diff line number Diff line
@@ -1464,7 +1464,7 @@ static int __btrfs_write_out_cache(struct inode *inode,
		u64 dirty_start = i * PAGE_SIZE;
		u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start;

		ret = btrfs_dirty_page(BTRFS_I(inode), io_ctl->pages[i],
		ret = btrfs_dirty_folio(BTRFS_I(inode), page_folio(io_ctl->pages[i]),
					dirty_start, dirty_len, &cached_state, false);
		if (ret < 0)
			goto out_nospc;