Commit 2ff1e975 authored by David Howells's avatar David Howells
Browse files

netfs: Replace PG_fscache by setting folio->private and marking dirty



When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag.  The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.

The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.

Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.

If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.

With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.

Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems.  This reenables the
use of PG_fscache in that inode.  9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
parent 5f24162f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -517,7 +517,7 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
	struct fscache_cookie *cookie = ceph_fscache_cookie(ci);

	fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
			       ceph_fscache_write_terminated, inode, caching);
			       ceph_fscache_write_terminated, inode, true, caching);
}
#else
static inline void ceph_set_page_fscache(struct page *page)
+2 −0
Original line number Diff line number Diff line
@@ -577,6 +577,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)

	/* Set parameters for the netfs library */
	netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
	/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
	__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);

	spin_lock_init(&ci->i_ceph_lock);

+26 −10
Original line number Diff line number Diff line
@@ -10,8 +10,11 @@
#include "internal.h"

/*
 * Unlock the folios in a read operation.  We need to set PG_fscache on any
 * Unlock the folios in a read operation.  We need to set PG_writeback on any
 * folios we're going to write back before we unlock them.
 *
 * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use
 * PG_private_2 and do a direct write to the cache from here instead.
 */
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
{
@@ -48,14 +51,14 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
	xas_for_each(&xas, folio, last_page) {
		loff_t pg_end;
		bool pg_failed = false;
		bool folio_started;
		bool wback_to_cache = false;
		bool folio_started = false;

		if (xas_retry(&xas, folio))
			continue;

		pg_end = folio_pos(folio) + folio_size(folio) - 1;

		folio_started = false;
		for (;;) {
			loff_t sreq_end;

@@ -63,11 +66,17 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
				pg_failed = true;
				break;
			}
			if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
			if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
				if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
							       &subreq->flags)) {
					trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
					folio_start_fscache(folio);
					folio_started = true;
				}
			} else {
				wback_to_cache |=
					test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
			}
			pg_failed |= subreq_failed;
			sreq_end = subreq->start + subreq->len - 1;
			if (pg_end < sreq_end)
@@ -98,6 +107,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
				kfree(finfo);
			}
			folio_mark_uptodate(folio);
			if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
				folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
				filemap_dirty_folio(folio->mapping, folio);
			}
		}

		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
@@ -491,9 +505,11 @@ int netfs_write_begin(struct netfs_inode *ctx,
	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);

have_folio:
	if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) {
		ret = folio_wait_fscache_killable(folio);
		if (ret < 0)
			goto error;
	}
have_folio_no_wait:
	*_folio = folio;
	_leave(" = 0");
+44 −49
Original line number Diff line number Diff line
@@ -30,21 +30,13 @@ static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);

static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{
	if (netfs_group && !folio_get_private(folio))
		folio_attach_private(folio, netfs_get_group(netfs_group));
}
	void *priv = folio_get_private(folio);

#if IS_ENABLED(CONFIG_FSCACHE)
static void netfs_folio_start_fscache(bool caching, struct folio *folio)
{
	if (caching)
		folio_start_fscache(folio);
}
#else
static void netfs_folio_start_fscache(bool caching, struct folio *folio)
{
	if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
		folio_attach_private(folio, netfs_get_group(netfs_group));
	else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
		folio_detach_private(folio);
}
#endif

/*
 * Decide how we should modify a folio.  We might be attempting to do
@@ -63,11 +55,12 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
						    bool maybe_trouble)
{
	struct netfs_folio *finfo = netfs_folio_info(folio);
	struct netfs_group *group = netfs_folio_group(folio);
	loff_t pos = folio_file_pos(folio);

	_enter("");

	if (netfs_folio_group(folio) != netfs_group)
	if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
		return NETFS_FLUSH_CONTENT;

	if (folio_test_uptodate(folio))
@@ -396,9 +389,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
				folio_clear_dirty_for_io(folio);
			/* We make multiple writes to the folio... */
			if (!folio_test_writeback(folio)) {
				folio_wait_fscache(folio);
				folio_start_writeback(folio);
				folio_start_fscache(folio);
				if (wreq->iter.count == 0)
					trace_netfs_folio(folio, netfs_folio_trace_wthru);
				else
@@ -528,6 +519,7 @@ EXPORT_SYMBOL(netfs_file_write_iter);
 */
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
{
	struct netfs_group *group;
	struct folio *folio = page_folio(vmf->page);
	struct file *file = vmf->vma->vm_file;
	struct inode *inode = file_inode(file);
@@ -550,7 +542,8 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
		goto out;
	}

	if (netfs_folio_group(folio) != netfs_group) {
	group = netfs_folio_group(folio);
	if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
		folio_unlock(folio);
		err = filemap_fdatawait_range(inode->i_mapping,
					      folio_pos(folio),
@@ -606,8 +599,6 @@ static void netfs_kill_pages(struct address_space *mapping,

		trace_netfs_folio(folio, netfs_folio_trace_kill);
		folio_clear_uptodate(folio);
		if (folio_test_fscache(folio))
			folio_end_fscache(folio);
		folio_end_writeback(folio);
		folio_lock(folio);
		generic_error_remove_folio(mapping, folio);
@@ -643,8 +634,6 @@ static void netfs_redirty_pages(struct address_space *mapping,
		next = folio_next_index(folio);
		trace_netfs_folio(folio, netfs_folio_trace_redirty);
		filemap_dirty_folio(mapping, folio);
		if (folio_test_fscache(folio))
			folio_end_fscache(folio);
		folio_end_writeback(folio);
		folio_put(folio);
	} while (index = next, index <= last);
@@ -700,6 +689,10 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq)
				if (!folio_test_dirty(folio)) {
					folio_detach_private(folio);
					gcount++;
					if (group == NETFS_FOLIO_COPY_TO_CACHE)
						trace_netfs_folio(folio,
								  netfs_folio_trace_end_copy);
					else
						trace_netfs_folio(folio, netfs_folio_trace_clear_g);
				} else {
					trace_netfs_folio(folio, netfs_folio_trace_redirtied);
@@ -724,8 +717,6 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq)
			trace_netfs_folio(folio, netfs_folio_trace_clear);
		}
	end_wb:
		if (folio_test_fscache(folio))
			folio_end_fscache(folio);
		xas_advance(&xas, folio_next_index(folio) - 1);
		folio_end_writeback(folio);
	}
@@ -795,7 +786,6 @@ static void netfs_extend_writeback(struct address_space *mapping,
				   long *_count,
				   loff_t start,
				   loff_t max_len,
				   bool caching,
				   size_t *_len,
				   size_t *_top)
{
@@ -846,8 +836,7 @@ static void netfs_extend_writeback(struct address_space *mapping,
				break;
			}
			if (!folio_test_dirty(folio) ||
			    folio_test_writeback(folio) ||
			    folio_test_fscache(folio)) {
			    folio_test_writeback(folio)) {
				folio_unlock(folio);
				folio_put(folio);
				xas_reset(xas);
@@ -860,7 +849,8 @@ static void netfs_extend_writeback(struct address_space *mapping,
			if ((const struct netfs_group *)priv != group) {
				stop = true;
				finfo = netfs_folio_info(folio);
				if (finfo->netfs_group != group ||
				if (!finfo ||
				    finfo->netfs_group != group ||
				    finfo->dirty_offset > 0) {
					folio_unlock(folio);
					folio_put(folio);
@@ -894,12 +884,14 @@ static void netfs_extend_writeback(struct address_space *mapping,

		for (i = 0; i < folio_batch_count(&fbatch); i++) {
			folio = fbatch.folios[i];
			if (group == NETFS_FOLIO_COPY_TO_CACHE)
				trace_netfs_folio(folio, netfs_folio_trace_copy_plus);
			else
				trace_netfs_folio(folio, netfs_folio_trace_store_plus);

			if (!folio_clear_dirty_for_io(folio))
				BUG();
			folio_start_writeback(folio);
			netfs_folio_start_fscache(caching, folio);
			folio_unlock(folio);
		}

@@ -925,14 +917,14 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
	struct netfs_inode *ctx = netfs_inode(mapping->host);
	unsigned long long i_size = i_size_read(&ctx->inode);
	size_t len, max_len;
	bool caching = netfs_is_cache_enabled(ctx);
	long count = wbc->nr_to_write;
	int ret;

	_enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching);
	_enter(",%lx,%llx-%llx", folio->index, start, end);

	wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
				   NETFS_WRITEBACK);
				   group == NETFS_FOLIO_COPY_TO_CACHE ?
				   NETFS_COPY_TO_CACHE : NETFS_WRITEBACK);
	if (IS_ERR(wreq)) {
		folio_unlock(folio);
		return PTR_ERR(wreq);
@@ -941,7 +933,6 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
	if (!folio_clear_dirty_for_io(folio))
		BUG();
	folio_start_writeback(folio);
	netfs_folio_start_fscache(caching, folio);

	count -= folio_nr_pages(folio);

@@ -950,6 +941,9 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
	 * immediately lockable, is not dirty or is missing, or we reach the
	 * end of the range.
	 */
	if (group == NETFS_FOLIO_COPY_TO_CACHE)
		trace_netfs_folio(folio, netfs_folio_trace_copy);
	else
		trace_netfs_folio(folio, netfs_folio_trace_store);

	len = wreq->len;
@@ -973,7 +967,7 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,

		if (len < max_len)
			netfs_extend_writeback(mapping, group, xas, &count, start,
					       max_len, caching, &len, &wreq->upper_len);
					       max_len, &len, &wreq->upper_len);
	}

cant_expand:
@@ -997,15 +991,18 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,

		iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start,
				wreq->upper_len);
		if (group != NETFS_FOLIO_COPY_TO_CACHE) {
			__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
			ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
		} else {
			ret = netfs_begin_write(wreq, true, netfs_write_trace_copy_to_cache);
		}
		if (ret == 0 || ret == -EIOCBQUEUED)
			wbc->nr_to_write -= len / PAGE_SIZE;
	} else {
		_debug("write discard %zx @%llx [%llx]", len, start, i_size);

		/* The dirty region was entirely beyond the EOF. */
		fscache_clear_page_bits(mapping, start, len, caching);
		netfs_pages_written_back(wreq);
		ret = 0;
	}
@@ -1058,9 +1055,11 @@ static ssize_t netfs_writepages_begin(struct address_space *mapping,

		/* Skip any dirty folio that's not in the group of interest. */
		priv = folio_get_private(folio);
		if ((const struct netfs_group *)priv != group) {
			finfo = netfs_folio_info(folio);
			if (finfo->netfs_group != group) {
		if ((const struct netfs_group *)priv == NETFS_FOLIO_COPY_TO_CACHE) {
			group = NETFS_FOLIO_COPY_TO_CACHE;
		} else if ((const struct netfs_group *)priv != group) {
			finfo = __netfs_folio_info(priv);
			if (!finfo || finfo->netfs_group != group) {
				folio_put(folio);
				continue;
			}
@@ -1099,14 +1098,10 @@ static ssize_t netfs_writepages_begin(struct address_space *mapping,
		goto search_again;
	}

	if (folio_test_writeback(folio) ||
	    folio_test_fscache(folio)) {
	if (folio_test_writeback(folio)) {
		folio_unlock(folio);
		if (wbc->sync_mode != WB_SYNC_NONE) {
			folio_wait_writeback(folio);
#ifdef CONFIG_FSCACHE
			folio_wait_fscache(folio);
#endif
			goto lock_again;
		}

@@ -1265,6 +1260,7 @@ int netfs_launder_folio(struct folio *folio)

	bvec_set_folio(&bvec, folio, len, offset);
	iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len);
	if (group != NETFS_FOLIO_COPY_TO_CACHE)
		__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
	ret = netfs_begin_write(wreq, true, netfs_write_trace_launder);

@@ -1274,7 +1270,6 @@ int netfs_launder_folio(struct folio *folio)
	kfree(finfo);
	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
out:
	folio_wait_fscache(folio);
	_leave(" = %d", ret);
	return ret;
}
+8 −4
Original line number Diff line number Diff line
@@ -166,6 +166,7 @@ struct fscache_write_request {
	loff_t			start;
	size_t			len;
	bool			set_bits;
	bool			using_pgpriv2;
	netfs_io_terminated_t	term_func;
	void			*term_func_priv;
};
@@ -197,6 +198,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
{
	struct fscache_write_request *wreq = priv;

	if (wreq->using_pgpriv2)
		fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
					wreq->set_bits);

@@ -212,7 +214,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
			      loff_t start, size_t len, loff_t i_size,
			      netfs_io_terminated_t term_func,
			      void *term_func_priv,
			      bool cond)
			      bool using_pgpriv2, bool cond)
{
	struct fscache_write_request *wreq;
	struct netfs_cache_resources *cres;
@@ -230,6 +232,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
	wreq->mapping		= mapping;
	wreq->start		= start;
	wreq->len		= len;
	wreq->using_pgpriv2	= using_pgpriv2;
	wreq->set_bits		= cond;
	wreq->term_func		= term_func;
	wreq->term_func_priv	= term_func_priv;
@@ -257,6 +260,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
abandon_free:
	kfree(wreq);
abandon:
	if (using_pgpriv2)
		fscache_clear_page_bits(mapping, start, len, cond);
	if (term_func)
		term_func(term_func_priv, ret, false);
Loading