Commit b5d760d5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.17-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs iomap updates from Christian Brauner:

 - Refactor the iomap writeback code and split the generic and ioend/bio
   based writeback code.

   There are two methods that define the split between the generic
   writeback code, and the implemementation of it, and all knowledge of
   ioends and bios now sits below that layer.

 - Add fuse iomap support for buffered writes and dirty folio writeback.

   This is needed so that granular uptodate and dirty tracking can be
   used in fuse when large folios are enabled. This has two big
   advantages. For writes, instead of the entire folio needing to be
   read into the page cache, only the relevant portions need to be. For
   writeback, only the dirty portions need to be written back instead of
   the entire folio.

* tag 'vfs-6.17-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fuse: refactor writeback to use iomap_writepage_ctx inode
  fuse: hook into iomap for invalidating and checking partial uptodateness
  fuse: use iomap for folio laundering
  fuse: use iomap for writeback
  fuse: use iomap for buffered writes
  iomap: build the writeback code without CONFIG_BLOCK
  iomap: add read_folio_range() handler for buffered writes
  iomap: improve argument passing to iomap_read_folio_sync
  iomap: replace iomap_folio_ops with iomap_write_ops
  iomap: export iomap_writeback_folio
  iomap: move folio_unlock out of iomap_writeback_folio
  iomap: rename iomap_writepage_map to iomap_writeback_folio
  iomap: move all ioend handling to ioend.c
  iomap: add public helpers for uptodate state manipulation
  iomap: hide ioends from the generic writeback code
  iomap: refactor the writeback interface
  iomap: cleanup the pending writeback tracking in iomap_writepage_map_blocks
  iomap: pass more arguments using the iomap writeback context
  iomap: header diet
parents 0965549d d5212d81
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -167,7 +167,6 @@ structure below:
     struct dax_device   *dax_dev;
     void                *inline_data;
     void                *private;
     const struct iomap_folio_ops *folio_ops;
     u64                 validity_cookie;
 };

@@ -292,8 +291,6 @@ The fields are as follows:
   <https://lore.kernel.org/all/20180619164137.13720-7-hch@lst.de/>`_.
   This value will be passed unchanged to ``->iomap_end``.

 * ``folio_ops`` will be covered in the section on pagecache operations.

 * ``validity_cookie`` is a magic freshness value set by the filesystem
   that should be used to detect stale mappings.
   For pagecache operations this is critical for correct operation
+28 −29
Original line number Diff line number Diff line
@@ -57,21 +57,19 @@ The following address space operations can be wrapped easily:
 * ``bmap``
 * ``swap_activate``

``struct iomap_folio_ops``
``struct iomap_write_ops``
--------------------------

The ``->iomap_begin`` function for pagecache operations may set the
``struct iomap::folio_ops`` field to an ops structure to override
default behaviors of iomap:

.. code-block:: c

 struct iomap_folio_ops {
 struct iomap_write_ops {
     struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
                                unsigned len);
     void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
                       struct folio *folio);
     bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
     int (*read_folio_range)(const struct iomap_iter *iter,
     			struct folio *folio, loff_t pos, size_t len);
 };

iomap calls these functions:
@@ -127,6 +125,10 @@ iomap calls these functions:
    ``->iomap_valid``, then the iomap should considered stale and the
    validation failed.

  - ``read_folio_range``: Called to synchronously read in the range that will
    be written to. If this function is not provided, iomap will default to
    submitting a bio read request.

These ``struct kiocb`` flags are significant for buffered I/O with iomap:

 * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
@@ -271,7 +273,7 @@ writeback.
It does not lock ``i_rwsem`` or ``invalidate_lock``.

The dirty bit will be cleared for all folios run through the
``->map_blocks`` machinery described below even if the writeback fails.
``->writeback_range`` machinery described below even if the writeback fails.
This is to prevent dirty folio clots when storage devices fail; an
``-EIO`` is recorded for userspace to collect via ``fsync``.

@@ -283,15 +285,14 @@ The ``ops`` structure must be specified and is as follows:
.. code-block:: c

 struct iomap_writeback_ops {
     int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
                       loff_t offset, unsigned len);
     int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
     void (*discard_folio)(struct folio *folio, loff_t pos);
    int (*writeback_range)(struct iomap_writepage_ctx *wpc,
        struct folio *folio, u64 pos, unsigned int len, u64 end_pos);
    int (*writeback_submit)(struct iomap_writepage_ctx *wpc, int error);
 };

The fields are as follows:

  - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
  - ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file
    range (in bytes) given by ``offset`` and ``len``.
    iomap calls this function for each dirty fs block in each dirty folio,
    though it will `reuse mappings
@@ -306,27 +307,26 @@ The fields are as follows:
    This revalidation must be open-coded by the filesystem; it is
    unclear if ``iomap::validity_cookie`` can be reused for this
    purpose.
    This function must be supplied by the filesystem.

  - ``submit_ioend``: Allows the file systems to hook into writeback bio
    submission.
    This might include pre-write space accounting updates, or installing
    a custom ``->bi_end_io`` function for internal purposes, such as
    deferring the ioend completion to a workqueue to run metadata update
    transactions from process context before submitting the bio.
    This function is optional.

  - ``discard_folio``: iomap calls this function after ``->map_blocks``
    fails to schedule I/O for any part of a dirty folio.
    The function should throw away any reservations that may have been
    made for the write.
    If this methods fails to schedule I/O for any part of a dirty folio, it
    should throw away any reservations that may have been made for the write.
    The folio will be marked clean and an ``-EIO`` recorded in the
    pagecache.
    Filesystems can use this callback to `remove
    <https://lore.kernel.org/all/20201029163313.1766967-1-bfoster@redhat.com/>`_
    delalloc reservations to avoid having delalloc reservations for
    clean pagecache.
    This function is optional.
    This function must be supplied by the filesystem.

  - ``writeback_submit``: Submit the previous built writeback context.
    Block based file systems should use the iomap_ioend_writeback_submit
    helper, other file system can implement their own.
    File systems can optionall to hook into writeback bio submission.
    This might include pre-write space accounting updates, or installing
    a custom ``->bi_end_io`` function for internal purposes, such as
    deferring the ioend completion to a workqueue to run metadata update
    transactions from process context before submitting the bio.
    This function must be supplied by the filesystem.

Pagecache Writeback Completion
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -340,10 +340,9 @@ If the write failed, it will also set the error bits on the folios and
the address space.
This can happen in interrupt or process context, depending on the
storage device.

Filesystems that need to update internal bookkeeping (e.g. unwritten
extent conversions) should provide a ``->submit_ioend`` function to
set ``struct iomap_end::bio::bi_end_io`` to its own function.
extent conversions) should set their own bi_end_io on the bios
submitted by ``->submit_writeback``
This function should call ``iomap_finish_ioends`` after finishing its
own work (e.g. unwritten extent conversion).

+25 −12
Original line number Diff line number Diff line
@@ -540,30 +540,42 @@ static void blkdev_readahead(struct readahead_control *rac)
	iomap_readahead(rac, &blkdev_iomap_ops);
}

static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
		struct inode *inode, loff_t offset, unsigned int len)
static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc,
		struct folio *folio, u64 offset, unsigned int len, u64 end_pos)
{
	loff_t isize = i_size_read(inode);
	loff_t isize = i_size_read(wpc->inode);

	if (WARN_ON_ONCE(offset >= isize))
		return -EIO;
	if (offset >= wpc->iomap.offset &&
	    offset < wpc->iomap.offset + wpc->iomap.length)
		return 0;
	return blkdev_iomap_begin(inode, offset, isize - offset,

	if (offset < wpc->iomap.offset ||
	    offset >= wpc->iomap.offset + wpc->iomap.length) {
		int error;

		error = blkdev_iomap_begin(wpc->inode, offset, isize - offset,
				IOMAP_WRITE, &wpc->iomap, NULL);
		if (error)
			return error;
	}

	return iomap_add_to_ioend(wpc, folio, offset, end_pos, len);
}

static const struct iomap_writeback_ops blkdev_writeback_ops = {
	.map_blocks		= blkdev_map_blocks,
	.writeback_range	= blkdev_writeback_range,
	.writeback_submit	= iomap_ioend_writeback_submit,
};

static int blkdev_writepages(struct address_space *mapping,
		struct writeback_control *wbc)
{
	struct iomap_writepage_ctx wpc = { };
	struct iomap_writepage_ctx wpc = {
		.inode		= mapping->host,
		.wbc		= wbc,
		.ops		= &blkdev_writeback_ops
	};

	return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
	return iomap_writepages(&wpc);
}

const struct address_space_operations def_blk_aops = {
@@ -714,7 +726,8 @@ blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)

static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
{
	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops, NULL);
	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops, NULL,
			NULL);
}

/*
+1 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
config FUSE_FS
	tristate "FUSE (Filesystem in Userspace) support"
	select FS_POSIX_ACL
	select FS_IOMAP
	help
	  With FUSE it is possible to implement a fully functional filesystem
	  in a userspace program.
+153 −196
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/filelock.h>
#include <linux/splice.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/iomap.h>

static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
			  unsigned int open_flags, int opcode,
@@ -788,12 +789,16 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
	}
}

static int fuse_do_readfolio(struct file *file, struct folio *folio)
static int fuse_do_readfolio(struct file *file, struct folio *folio,
			     size_t off, size_t len)
{
	struct inode *inode = folio->mapping->host;
	struct fuse_mount *fm = get_fuse_mount(inode);
	loff_t pos = folio_pos(folio);
	struct fuse_folio_desc desc = { .length = folio_size(folio) };
	loff_t pos = folio_pos(folio) + off;
	struct fuse_folio_desc desc = {
		.offset = off,
		.length = len,
	};
	struct fuse_io_args ia = {
		.ap.args.page_zeroing = true,
		.ap.args.out_pages = true,
@@ -820,8 +825,6 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
	if (res < desc.length)
		fuse_short_read(inode, attr_ver, res, &ia.ap);

	folio_mark_uptodate(folio);

	return 0;
}

@@ -834,13 +837,26 @@ static int fuse_read_folio(struct file *file, struct folio *folio)
	if (fuse_is_bad(inode))
		goto out;

	err = fuse_do_readfolio(file, folio);
	err = fuse_do_readfolio(file, folio, 0, folio_size(folio));
	if (!err)
		folio_mark_uptodate(folio);

	fuse_invalidate_atime(inode);
 out:
	folio_unlock(folio);
	return err;
}

static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
				       struct folio *folio, loff_t pos,
				       size_t len)
{
	struct file *file = iter->private;
	size_t off = offset_in_folio(folio, pos);

	return fuse_do_readfolio(file, folio, off, len);
}

static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
			       int err)
{
@@ -1374,6 +1390,24 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
	}
}

static const struct iomap_write_ops fuse_iomap_write_ops = {
	.read_folio_range = fuse_iomap_read_folio_range,
};

static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			    unsigned int flags, struct iomap *iomap,
			    struct iomap *srcmap)
{
	iomap->type = IOMAP_MAPPED;
	iomap->length = length;
	iomap->offset = offset;
	return 0;
}

static const struct iomap_ops fuse_iomap_ops = {
	.iomap_begin	= fuse_iomap_begin,
};

static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
@@ -1383,6 +1417,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
	struct inode *inode = mapping->host;
	ssize_t err, count;
	struct fuse_conn *fc = get_fuse_conn(inode);
	bool writeback = false;

	if (fc->writeback_cache) {
		/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1391,16 +1426,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
		if (err)
			return err;

		if (fc->handle_killpriv_v2 &&
		    setattr_should_drop_suidgid(idmap,
						file_inode(file))) {
			goto writethrough;
		}

		return generic_file_write_iter(iocb, from);
		if (!fc->handle_killpriv_v2 ||
		    !setattr_should_drop_suidgid(idmap, file_inode(file)))
			writeback = true;
	}

writethrough:
	inode_lock(inode);

	err = count = generic_write_checks(iocb, from);
@@ -1419,6 +1449,15 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
			goto out;
		written = direct_write_fallback(iocb, from, written,
				fuse_perform_write(iocb, from));
	} else if (writeback) {
		/*
		 * Use iomap so that we can do granular uptodate reads
		 * and granular dirty tracking for large folios.
		 */
		written = iomap_file_buffered_write(iocb, from,
						    &fuse_iomap_ops,
						    &fuse_iomap_write_ops,
						    file);
	} else {
		written = fuse_perform_write(iocb, from);
	}
@@ -1793,7 +1832,7 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
		 * scope of the fi->lock alleviates xarray lock
		 * contention and noticeably improves performance.
		 */
		folio_end_writeback(ap->folios[i]);
		iomap_finish_folio_write(inode, ap->folios[i], 1);
		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
		wb_writeout_inc(&bdi->wb);
	}
@@ -1980,19 +2019,20 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
}

static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
					  uint32_t folio_index)
					  uint32_t folio_index, loff_t offset, unsigned len)
{
	struct inode *inode = folio->mapping->host;
	struct fuse_args_pages *ap = &wpa->ia.ap;

	ap->folios[folio_index] = folio;
	ap->descs[folio_index].offset = 0;
	ap->descs[folio_index].length = folio_size(folio);
	ap->descs[folio_index].offset = offset;
	ap->descs[folio_index].length = len;

	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
}

static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
							     size_t offset,
							     struct fuse_file *ff)
{
	struct inode *inode = folio->mapping->host;
@@ -2005,7 +2045,7 @@ static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio
		return NULL;

	fuse_writepage_add_to_bucket(fc, wpa);
	fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0);
	fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio) + offset, 0);
	wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
	wpa->inode = inode;
	wpa->ia.ff = ff;
@@ -2017,63 +2057,28 @@ static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio
	return wpa;
}

static int fuse_writepage_locked(struct folio *folio)
{
	struct address_space *mapping = folio->mapping;
	struct inode *inode = mapping->host;
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_writepage_args *wpa;
	struct fuse_args_pages *ap;
	struct fuse_file *ff;
	int error = -EIO;

	ff = fuse_write_file_get(fi);
	if (!ff)
		goto err;

	wpa = fuse_writepage_args_setup(folio, ff);
	error = -ENOMEM;
	if (!wpa)
		goto err_writepage_args;

	ap = &wpa->ia.ap;
	ap->num_folios = 1;

	folio_start_writeback(folio);
	fuse_writepage_args_page_fill(wpa, folio, 0);

	spin_lock(&fi->lock);
	list_add_tail(&wpa->queue_entry, &fi->queued_writes);
	fuse_flush_writepages(inode);
	spin_unlock(&fi->lock);

	return 0;

err_writepage_args:
	fuse_file_put(ff, false);
err:
	mapping_set_error(folio->mapping, error);
	return error;
}

struct fuse_fill_wb_data {
	struct fuse_writepage_args *wpa;
	struct fuse_file *ff;
	struct inode *inode;
	unsigned int max_folios;
	unsigned int nr_pages;
	/*
	 * nr_bytes won't overflow since fuse_writepage_need_send() caps
	 * wb requests to never exceed fc->max_pages (which has an upper bound
	 * of U16_MAX).
	 */
	unsigned int nr_bytes;
};

static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data,
			       unsigned int max_pages)
{
	struct fuse_args_pages *ap = &data->wpa->ia.ap;
	struct fuse_conn *fc = get_fuse_conn(data->inode);
	struct folio **folios;
	struct fuse_folio_desc *descs;
	unsigned int nfolios = min_t(unsigned int,
				     max_t(unsigned int, data->max_folios * 2,
					   FUSE_DEFAULT_MAX_PAGES_PER_REQ),
				    fc->max_pages);
				    max_pages);
	WARN_ON(nfolios <= data->max_folios);

	folios = fuse_folios_alloc(nfolios, GFP_NOFS, &descs);
@@ -2090,10 +2095,10 @@ static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
	return true;
}

static void fuse_writepages_send(struct fuse_fill_wb_data *data)
static void fuse_writepages_send(struct inode *inode,
				 struct fuse_fill_wb_data *data)
{
	struct fuse_writepage_args *wpa = data->wpa;
	struct inode *inode = data->inode;
	struct fuse_inode *fi = get_fuse_inode(inode);

	spin_lock(&fi->lock);
@@ -2102,199 +2107,150 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
	spin_unlock(&fi->lock);
}

static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
				     struct fuse_args_pages *ap,
static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
				     unsigned len, struct fuse_args_pages *ap,
				     struct fuse_fill_wb_data *data)
{
	struct folio *prev_folio;
	struct fuse_folio_desc prev_desc;
	unsigned bytes = data->nr_bytes + len;
	loff_t prev_pos;

	WARN_ON(!ap->num_folios);

	/* Reached max pages */
	if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages)
	if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
		return true;

	/* Reached max write bytes */
	if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write)
	if (bytes > fc->max_write)
		return true;

	/* Discontinuity */
	if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index)
	prev_folio = ap->folios[ap->num_folios - 1];
	prev_desc = ap->descs[ap->num_folios - 1];
	prev_pos = folio_pos(prev_folio) + prev_desc.offset + prev_desc.length;
	if (prev_pos != pos)
		return true;

	/* Need to grow the pages array?  If so, did the expansion fail? */
	if (ap->num_folios == data->max_folios && !fuse_pages_realloc(data))
	if (ap->num_folios == data->max_folios &&
	    !fuse_pages_realloc(data, fc->max_pages))
		return true;

	return false;
}

static int fuse_writepages_fill(struct folio *folio,
		struct writeback_control *wbc, void *_data)
static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
					  struct folio *folio, u64 pos,
					  unsigned len, u64 end_pos)
{
	struct fuse_fill_wb_data *data = _data;
	struct fuse_fill_wb_data *data = wpc->wb_ctx;
	struct fuse_writepage_args *wpa = data->wpa;
	struct fuse_args_pages *ap = &wpa->ia.ap;
	struct inode *inode = data->inode;
	struct inode *inode = wpc->inode;
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_conn *fc = get_fuse_conn(inode);
	int err;
	loff_t offset = offset_in_folio(folio, pos);

	WARN_ON_ONCE(!data);
	/* len will always be page aligned */
	WARN_ON_ONCE(len & (PAGE_SIZE - 1));

	if (!data->ff) {
		err = -EIO;
		data->ff = fuse_write_file_get(fi);
		if (!data->ff)
			goto out_unlock;
			return -EIO;
	}

	if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
		fuse_writepages_send(data);
	if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
		fuse_writepages_send(inode, data);
		data->wpa = NULL;
		data->nr_pages = 0;
		data->nr_bytes = 0;
	}

	if (data->wpa == NULL) {
		err = -ENOMEM;
		wpa = fuse_writepage_args_setup(folio, data->ff);
		wpa = fuse_writepage_args_setup(folio, offset, data->ff);
		if (!wpa)
			goto out_unlock;
			return -ENOMEM;
		fuse_file_get(wpa->ia.ff);
		data->max_folios = 1;
		ap = &wpa->ia.ap;
	}
	folio_start_writeback(folio);

	fuse_writepage_args_page_fill(wpa, folio, ap->num_folios);
	data->nr_pages += folio_nr_pages(folio);
	iomap_start_folio_write(inode, folio, 1);
	fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
				      offset, len);
	data->nr_bytes += len;

	err = 0;
	ap->num_folios++;
	if (!data->wpa)
		data->wpa = wpa;
out_unlock:
	folio_unlock(folio);

	return err;
	return len;
}

static int fuse_writepages(struct address_space *mapping,
			   struct writeback_control *wbc)
static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,
				       int error)
{
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_fill_wb_data data;
	int err;

	err = -EIO;
	if (fuse_is_bad(inode))
		goto out;

	if (wbc->sync_mode == WB_SYNC_NONE &&
	    fc->num_background >= fc->congestion_threshold)
		return 0;

	data.inode = inode;
	data.wpa = NULL;
	data.ff = NULL;
	data.nr_pages = 0;
	struct fuse_fill_wb_data *data = wpc->wb_ctx;

	err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
	if (data.wpa) {
		WARN_ON(!data.wpa->ia.ap.num_folios);
		fuse_writepages_send(&data);
	}
	if (data.ff)
		fuse_file_put(data.ff, false);
	WARN_ON_ONCE(!data);

out:
	return err;
	if (data->wpa) {
		WARN_ON(!data->wpa->ia.ap.num_folios);
		fuse_writepages_send(wpc->inode, data);
	}

/*
 * It's worthy to make sure that space is reserved on disk for the write,
 * but how to implement it without killing performance need more thinking.
 */
static int fuse_write_begin(const struct kiocb *iocb,
			    struct address_space *mapping,
			    loff_t pos, unsigned len, struct folio **foliop,
			    void **fsdata)
{
	pgoff_t index = pos >> PAGE_SHIFT;
	struct file *file = iocb->ki_filp;
	struct fuse_conn *fc = get_fuse_conn(file_inode(file));
	struct folio *folio;
	loff_t fsize;
	int err = -ENOMEM;

	WARN_ON(!fc->writeback_cache);

	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
			mapping_gfp_mask(mapping));
	if (IS_ERR(folio))
		goto error;
	if (data->ff)
		fuse_file_put(data->ff, false);

	if (folio_test_uptodate(folio) || len >= folio_size(folio))
		goto success;
	/*
	 * Check if the start of this folio comes after the end of file,
	 * in which case the readpage can be optimized away.
	 */
	fsize = i_size_read(mapping->host);
	if (fsize <= folio_pos(folio)) {
		size_t off = offset_in_folio(folio, pos);
		if (off)
			folio_zero_segment(folio, 0, off);
		goto success;
	return error;
}
	err = fuse_do_readfolio(file, folio);
	if (err)
		goto cleanup;
success:
	*foliop = folio;
	return 0;

cleanup:
	folio_unlock(folio);
	folio_put(folio);
error:
	return err;
}
static const struct iomap_writeback_ops fuse_writeback_ops = {
	.writeback_range	= fuse_iomap_writeback_range,
	.writeback_submit	= fuse_iomap_writeback_submit,
};

static int fuse_write_end(const struct kiocb *iocb,
			  struct address_space *mapping,
			  loff_t pos, unsigned len, unsigned copied,
			  struct folio *folio, void *fsdata)
static int fuse_writepages(struct address_space *mapping,
			   struct writeback_control *wbc)
{
	struct inode *inode = folio->mapping->host;

	/* Haven't copied anything?  Skip zeroing, size extending, dirtying. */
	if (!copied)
		goto unlock;

	pos += copied;
	if (!folio_test_uptodate(folio)) {
		/* Zero any unwritten bytes at the end of the page */
		size_t endoff = pos & ~PAGE_MASK;
		if (endoff)
			folio_zero_segment(folio, endoff, PAGE_SIZE);
		folio_mark_uptodate(folio);
	}

	if (pos > inode->i_size)
		i_size_write(inode, pos);
	struct inode *inode = mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_fill_wb_data data = {};
	struct iomap_writepage_ctx wpc = {
		.inode = inode,
		.iomap.type = IOMAP_MAPPED,
		.wbc = wbc,
		.ops = &fuse_writeback_ops,
		.wb_ctx	= &data,
	};

	folio_mark_dirty(folio);
	if (fuse_is_bad(inode))
		return -EIO;

unlock:
	folio_unlock(folio);
	folio_put(folio);
	if (wbc->sync_mode == WB_SYNC_NONE &&
	    fc->num_background >= fc->congestion_threshold)
		return 0;

	return copied;
	return iomap_writepages(&wpc);
}

static int fuse_launder_folio(struct folio *folio)
{
	int err = 0;
	struct fuse_fill_wb_data data = {};
	struct iomap_writepage_ctx wpc = {
		.inode = folio->mapping->host,
		.iomap.type = IOMAP_MAPPED,
		.ops = &fuse_writeback_ops,
		.wb_ctx	= &data,
	};

	if (folio_clear_dirty_for_io(folio)) {
		err = fuse_writepage_locked(folio);
		err = iomap_writeback_folio(&wpc, folio);
		err = fuse_iomap_writeback_submit(&wpc, err);
		if (!err)
			folio_wait_writeback(folio);
	}
@@ -3147,12 +3103,13 @@ static const struct address_space_operations fuse_file_aops = {
	.readahead	= fuse_readahead,
	.writepages	= fuse_writepages,
	.launder_folio	= fuse_launder_folio,
	.dirty_folio	= filemap_dirty_folio,
	.dirty_folio	= iomap_dirty_folio,
	.release_folio	= iomap_release_folio,
	.invalidate_folio = iomap_invalidate_folio,
	.is_partially_uptodate = iomap_is_partially_uptodate,
	.migrate_folio	= filemap_migrate_folio,
	.bmap		= fuse_bmap,
	.direct_IO	= fuse_direct_IO,
	.write_begin	= fuse_write_begin,
	.write_end	= fuse_write_end,
};

void fuse_init_file_inode(struct inode *inode, unsigned int flags)
Loading