Commit bd610c09 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: only unlock the to-be-submitted ranges inside a folio



[SUBPAGE COMPRESSION LIMITS]
Currently inside writepage_delalloc(), if a delalloc range is going to
be submitted asynchronously (inline or compression, the page
dirty/writeback/unlock are all handled in at different time, not at the
submission time), then we return 1 and extent_writepage() will skip the
submission.

This is fine if every sector matches page size, but if a sector is
smaller than page size (aka, subpage case), then it can be very
problematic, for example for the following 64K page:

     0     16K     32K    48K     64K
     |/|   |///////|      |/|
       |                    |
       4K                   52K

Where |/| is the dirty range we need to submit.

In the above case, we need the following different handling for the 3
ranges:

- [0, 4K) needs to be submitted for regular write
  A single sector cannot be compressed.

- [16K, 32K) needs to be submitted for compressed write

- [48K, 52K) needs to be submitted for regular write.

Above, if we try to submit [16K, 32K) for compressed write, we will
return 1 and immediately, and without submitting the remaining
[48K, 52K) range.

Furthermore, since extent_writepage() will exit without unlocking any
sectors, the submitted range [0, 4K) will not have sector unlocked.

That's the reason why for now subpage is only allowed for full page
range.

[ENHANCEMENT]
- Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap
  This records which sectors will be submitted by extent_writepage_io().
  This allows us to track which sectors needs to be submitted thus later
  to be properly unlocked.

  For asynchronously submitted range (inline/compression), the
  corresponding bits will be cleared from that bitmap.

- Only return 1 if no sector needs to be submitted in
  writepage_delalloc()

- Only submit sectors marked by submission bitmap inside
  extent_writepage_io()
  So we won't touch the asynchronously submitted part.

- Introduce btrfs_folio_end_writer_lock_bitmap() helper
  This will only unlock the involved sectors specified by @bitmap
  parameter, to avoid touching the range asynchronously submitted.

Please note that, since subpage compression is still limited to page
aligned range, this change is only a preparation for future sector
perfect compression support for subpage.

Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 49a99073
Loading
Loading
Loading
Loading
+51 −38
Original line number Diff line number Diff line
@@ -101,6 +101,13 @@ struct btrfs_bio_ctrl {
	blk_opf_t opf;
	btrfs_bio_end_io_t end_io_func;
	struct writeback_control *wbc;

	/*
	 * The sectors of the page which are going to be submitted by
	 * extent_writepage_io().
	 * This is to avoid touching ranges covered by compression/inline.
	 */
	unsigned long submit_bitmap;
};

static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
 */
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
						 struct folio *folio,
						 struct writeback_control *wbc)
						 struct btrfs_bio_ctrl *bio_ctrl)
{
	struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
	struct writeback_control *wbc = bio_ctrl->wbc;
	const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
	const u64 page_start = folio_pos(folio);
	const u64 page_end = page_start + folio_size(folio) - 1;
@@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
	u64 delalloc_to_write = 0;
	int ret = 0;

	/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
	if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
		ASSERT(fs_info->sectors_per_page > 1);
		btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap);
	} else {
		bio_ctrl->submit_bitmap = 1;
	}

	/* Lock all (subpage) delalloc ranges inside the folio first. */
	while (delalloc_start < page_end) {
		delalloc_end = page_end;
@@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
		}

		/*
		 * We can hit btrfs_run_delalloc_range() with >0 return value.
		 *
		 * This happens when either the IO is already done and folio
		 * unlocked (inline) or the IO submission and folio unlock would
		 * be handled as async (compression).
		 *
		 * Inline is only possible for regular sectorsize for now.
		 *
		 * Compression is possible for both subpage and regular cases,
		 * but even for subpage compression only happens for page aligned
		 * range, thus the found delalloc range must go beyond current
		 * folio.
		 * We have some ranges that's going to be submitted asynchronously
		 * (compression or inline).  These range have their own control
		 * on when to unlock the pages.  We should not touch them
		 * anymore, so clear the range from the submission bitmap.
		 */
		if (ret > 0)
			ASSERT(!is_subpage || found_start + found_len >= page_end);

		if (ret > 0) {
			unsigned int start_bit = (found_start - page_start) >>
						 fs_info->sectorsize_bits;
			unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
						page_start) >> fs_info->sectorsize_bits;
			bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
		}
		/*
		 * Above btrfs_run_delalloc_range() may have unlocked the folio,
		 * thus for the last range, we cannot touch the folio anymore.
@@ -1230,10 +1242,10 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
		DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);

	/*
	 * If btrfs_run_dealloc_range() already started I/O and unlocked
	 * the folios, we just need to account for them here.
	 * If all ranges are submitted asynchronously, we just need to account
	 * for them here.
	 */
	if (ret == 1) {
	if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) {
		wbc->nr_to_write -= delalloc_to_write;
		return 1;
	}
@@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
{
	struct btrfs_fs_info *fs_info = inode->root->fs_info;
	unsigned long range_bitmap = 0;
	/*
	 * This is the default value for sectorsize == PAGE_SIZE case.
	 * We known we need to write the dirty sector (aka the page),
	 * even if the page is not dirty (we cleared it before entering).
	 *
	 * For subpage cases we will get the correct bitmap later.
	 */
	unsigned long dirty_bitmap = 1;
	unsigned int bitmap_size = 1;
	bool submitted_io = false;
	const u64 folio_start = folio_pos(folio);
	u64 cur;
@@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
		return 1;
	}

	if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
		ASSERT(fs_info->sectors_per_page > 1);
		btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap);
		bitmap_size = fs_info->sectors_per_page;
	}
	for (cur = start; cur < start + len; cur += fs_info->sectorsize)
		set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
	bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size);
	bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
		   fs_info->sectors_per_page);

	bio_ctrl->end_io_func = end_bbio_data_write;

	for_each_set_bit(bit, &dirty_bitmap, bitmap_size) {
	for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
		cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);

		if (cur >= i_size) {
@@ -1421,6 +1420,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
{
	struct inode *inode = folio->mapping->host;
	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
	const u64 page_start = folio_pos(folio);
	int ret;
	size_t pg_offset;
@@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
	if (folio->index == end_index)
		folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);

	/*
	 * Default to unlock the whole folio.
	 * The proper bitmap can only be initialized until writepage_delalloc().
	 */
	bio_ctrl->submit_bitmap = (unsigned long)-1;
	ret = set_folio_extent_mapped(folio);
	if (ret < 0)
		goto done;

	ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc);
	ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
	if (ret == 1)
		return 0;
	if (ret)
@@ -1466,8 +1471,11 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
		mapping_set_error(folio->mapping, ret);
	}

	btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio,
				    page_start, PAGE_SIZE);
	/*
	 * Only unlock ranges that are submitted. As there can be some async
	 * submitted ranges inside the folio.
	 */
	btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
	ASSERT(ret <= 0);
	return ret;
}
@@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
		if (pages_dirty && folio != locked_folio)
			ASSERT(folio_test_dirty(folio));

		/*
		 * Set the submission bitmap to submit all sectors.
		 * extent_writepage_io() will do the truncation correctly.
		 */
		bio_ctrl.submit_bitmap = (unsigned long)-1;
		ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
					  &bio_ctrl, i_size);
		if (ret == 1)
+33 −0
Original line number Diff line number Diff line
@@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
		folio_unlock(folio);
}

void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
					struct folio *folio, unsigned long bitmap)
{
	struct btrfs_subpage *subpage = folio_get_private(folio);
	const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
	unsigned long flags;
	bool last = false;
	int cleared = 0;
	int bit;

	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
		folio_unlock(folio);
		return;
	}

	if (atomic_read(&subpage->writers) == 0) {
		/* No writers, locked by plain lock_page(). */
		folio_unlock(folio);
		return;
	}

	spin_lock_irqsave(&subpage->lock, flags);
	for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
		if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
			cleared++;
	}
	ASSERT(atomic_read(&subpage->writers) >= cleared);
	last = atomic_sub_and_test(cleared, &subpage->writers);
	spin_unlock_irqrestore(&subpage->lock, flags);
	if (last)
		folio_unlock(folio);
}

#define subpage_test_bitmap_all_set(fs_info, subpage, name)		\
	bitmap_test_range_all_set(subpage->bitmaps,			\
			fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
+2 −0
Original line number Diff line number Diff line
@@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
				 struct folio *folio, u64 start, u32 len);
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
				 struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
					struct folio *folio, unsigned long bitmap);
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
				      struct folio *folio, u64 search_start,
				      u64 *found_start_ret, u32 *found_len_ret);