Commit a6293b3e authored by Amir Goldstein's avatar Amir Goldstein
Browse files

fs: factor out backing_file_{read,write}_iter() helpers



Overlayfs submits files io to backing files on other filesystems.
Factor out some common helpers to perform io to backing files, into
fs/backing-file.c.

Suggested-by: default avatarMiklos Szeredi <miklos@szeredi.hu>
Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com


Signed-off-by: default avatarAmir Goldstein <amir73il@gmail.com>
parent f91a704f
Loading
Loading
Loading
Loading
+210 −0
Original line number Diff line number Diff line
@@ -2,6 +2,9 @@
/*
 * Common helpers for stackable filesystems and backing files.
 *
 * Forked from fs/overlayfs/file.c.
 *
 * Copyright (C) 2017 Red Hat, Inc.
 * Copyright (C) 2023 CTERA Networks.
 */

@@ -46,3 +49,210 @@ struct file *backing_file_open(const struct path *user_path, int flags,
	return f;
}
EXPORT_SYMBOL_GPL(backing_file_open);

struct backing_aio {
	struct kiocb iocb;
	refcount_t ref;
	struct kiocb *orig_iocb;
	/* used for aio completion */
	void (*end_write)(struct file *);
	struct work_struct work;
	long res;
};

static struct kmem_cache *backing_aio_cachep;

#define BACKING_IOCB_MASK \
	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)

static rwf_t iocb_to_rw_flags(int flags)
{
	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
}

static void backing_aio_put(struct backing_aio *aio)
{
	if (refcount_dec_and_test(&aio->ref)) {
		fput(aio->iocb.ki_filp);
		kmem_cache_free(backing_aio_cachep, aio);
	}
}

static void backing_aio_cleanup(struct backing_aio *aio, long res)
{
	struct kiocb *iocb = &aio->iocb;
	struct kiocb *orig_iocb = aio->orig_iocb;

	if (aio->end_write)
		aio->end_write(orig_iocb->ki_filp);

	orig_iocb->ki_pos = iocb->ki_pos;
	backing_aio_put(aio);
}

static void backing_aio_rw_complete(struct kiocb *iocb, long res)
{
	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
	struct kiocb *orig_iocb = aio->orig_iocb;

	if (iocb->ki_flags & IOCB_WRITE)
		kiocb_end_write(iocb);

	backing_aio_cleanup(aio, res);
	orig_iocb->ki_complete(orig_iocb, res);
}

static void backing_aio_complete_work(struct work_struct *work)
{
	struct backing_aio *aio = container_of(work, struct backing_aio, work);

	backing_aio_rw_complete(&aio->iocb, aio->res);
}

static void backing_aio_queue_completion(struct kiocb *iocb, long res)
{
	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);

	/*
	 * Punt to a work queue to serialize updates of mtime/size.
	 */
	aio->res = res;
	INIT_WORK(&aio->work, backing_aio_complete_work);
	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
		   &aio->work);
}

static int backing_aio_init_wq(struct kiocb *iocb)
{
	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;

	if (sb->s_dio_done_wq)
		return 0;

	return sb_init_dio_done_wq(sb);
}


ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
			       struct kiocb *iocb, int flags,
			       struct backing_file_ctx *ctx)
{
	struct backing_aio *aio = NULL;
	const struct cred *old_cred;
	ssize_t ret;

	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
		return -EIO;

	if (!iov_iter_count(iter))
		return 0;

	if (iocb->ki_flags & IOCB_DIRECT &&
	    !(file->f_mode & FMODE_CAN_ODIRECT))
		return -EINVAL;

	old_cred = override_creds(ctx->cred);
	if (is_sync_kiocb(iocb)) {
		rwf_t rwf = iocb_to_rw_flags(flags);

		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
	} else {
		ret = -ENOMEM;
		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
		if (!aio)
			goto out;

		aio->orig_iocb = iocb;
		kiocb_clone(&aio->iocb, iocb, get_file(file));
		aio->iocb.ki_complete = backing_aio_rw_complete;
		refcount_set(&aio->ref, 2);
		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
		backing_aio_put(aio);
		if (ret != -EIOCBQUEUED)
			backing_aio_cleanup(aio, ret);
	}
out:
	revert_creds(old_cred);

	if (ctx->accessed)
		ctx->accessed(ctx->user_file);

	return ret;
}
EXPORT_SYMBOL_GPL(backing_file_read_iter);

ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
				struct kiocb *iocb, int flags,
				struct backing_file_ctx *ctx)
{
	const struct cred *old_cred;
	ssize_t ret;

	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
		return -EIO;

	if (!iov_iter_count(iter))
		return 0;

	ret = file_remove_privs(ctx->user_file);
	if (ret)
		return ret;

	if (iocb->ki_flags & IOCB_DIRECT &&
	    !(file->f_mode & FMODE_CAN_ODIRECT))
		return -EINVAL;

	/*
	 * Stacked filesystems don't support deferred completions, don't copy
	 * this property in case it is set by the issuer.
	 */
	flags &= ~IOCB_DIO_CALLER_COMP;

	old_cred = override_creds(ctx->cred);
	if (is_sync_kiocb(iocb)) {
		rwf_t rwf = iocb_to_rw_flags(flags);

		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
		if (ctx->end_write)
			ctx->end_write(ctx->user_file);
	} else {
		struct backing_aio *aio;

		ret = backing_aio_init_wq(iocb);
		if (ret)
			goto out;

		ret = -ENOMEM;
		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
		if (!aio)
			goto out;

		aio->orig_iocb = iocb;
		aio->end_write = ctx->end_write;
		kiocb_clone(&aio->iocb, iocb, get_file(file));
		aio->iocb.ki_flags = flags;
		aio->iocb.ki_complete = backing_aio_queue_completion;
		refcount_set(&aio->ref, 2);
		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
		backing_aio_put(aio);
		if (ret != -EIOCBQUEUED)
			backing_aio_cleanup(aio, ret);
	}
out:
	revert_creds(old_cred);

	return ret;
}
EXPORT_SYMBOL_GPL(backing_file_write_iter);

static int __init backing_aio_init(void)
{
	backing_aio_cachep = kmem_cache_create("backing_aio",
					       sizeof(struct backing_aio),
					       0, SLAB_HWCACHE_ALIGN, NULL);
	if (!backing_aio_cachep)
		return -ENOMEM;

	return 0;
}
fs_initcall(backing_aio_init);
+13 −175
Original line number Diff line number Diff line
@@ -16,19 +16,6 @@
#include <linux/backing-file.h>
#include "overlayfs.h"

#include "../internal.h"	/* for sb_init_dio_done_wq */

struct ovl_aio_req {
	struct kiocb iocb;
	refcount_t ref;
	struct kiocb *orig_iocb;
	/* used for aio completion */
	struct work_struct work;
	long res;
};

static struct kmem_cache *ovl_aio_request_cachep;

static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
	if (realinode != ovl_inode_upper(inode))
@@ -275,84 +262,16 @@ static void ovl_file_accessed(struct file *file)
	touch_atime(&file->f_path);
}

#define OVL_IOCB_MASK \
	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)

static rwf_t iocb_to_rw_flags(int flags)
{
	return (__force rwf_t)(flags & OVL_IOCB_MASK);
}

static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
{
	if (refcount_dec_and_test(&aio_req->ref)) {
		fput(aio_req->iocb.ki_filp);
		kmem_cache_free(ovl_aio_request_cachep, aio_req);
	}
}

static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
	struct kiocb *iocb = &aio_req->iocb;
	struct kiocb *orig_iocb = aio_req->orig_iocb;

	if (iocb->ki_flags & IOCB_WRITE)
		ovl_file_modified(orig_iocb->ki_filp);

	orig_iocb->ki_pos = iocb->ki_pos;
	ovl_aio_put(aio_req);
}

static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
{
	struct ovl_aio_req *aio_req = container_of(iocb,
						   struct ovl_aio_req, iocb);
	struct kiocb *orig_iocb = aio_req->orig_iocb;

	if (iocb->ki_flags & IOCB_WRITE)
		kiocb_end_write(iocb);

	ovl_aio_cleanup_handler(aio_req);
	orig_iocb->ki_complete(orig_iocb, res);
}

static void ovl_aio_complete_work(struct work_struct *work)
{
	struct ovl_aio_req *aio_req = container_of(work,
						   struct ovl_aio_req, work);

	ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
}

static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
{
	struct ovl_aio_req *aio_req = container_of(iocb,
						   struct ovl_aio_req, iocb);
	struct kiocb *orig_iocb = aio_req->orig_iocb;

	/*
	 * Punt to a work queue to serialize updates of mtime/size.
	 */
	aio_req->res = res;
	INIT_WORK(&aio_req->work, ovl_aio_complete_work);
	queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
		   &aio_req->work);
}

static int ovl_init_aio_done_wq(struct super_block *sb)
{
	if (sb->s_dio_done_wq)
		return 0;

	return sb_init_dio_done_wq(sb);
}

static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
	struct file *file = iocb->ki_filp;
	struct fd real;
	const struct cred *old_cred;
	ssize_t ret;
	struct backing_file_ctx ctx = {
		.cred = ovl_creds(file_inode(file)->i_sb),
		.user_file = file,
		.accessed = ovl_file_accessed,
	};

	if (!iov_iter_count(iter))
		return 0;
@@ -361,37 +280,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
	if (ret)
		return ret;

	ret = -EINVAL;
	if (iocb->ki_flags & IOCB_DIRECT &&
	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
		goto out_fdput;

	old_cred = ovl_override_creds(file_inode(file)->i_sb);
	if (is_sync_kiocb(iocb)) {
		rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);

		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
	} else {
		struct ovl_aio_req *aio_req;

		ret = -ENOMEM;
		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
		if (!aio_req)
			goto out;

		aio_req->orig_iocb = iocb;
		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
		refcount_set(&aio_req->ref, 2);
		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
		ovl_aio_put(aio_req);
		if (ret != -EIOCBQUEUED)
			ovl_aio_cleanup_handler(aio_req);
	}
out:
	revert_creds(old_cred);
	ovl_file_accessed(file);
out_fdput:
	ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
				     &ctx);
	fdput(real);

	return ret;
@@ -402,9 +292,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(file);
	struct fd real;
	const struct cred *old_cred;
	ssize_t ret;
	int ifl = iocb->ki_flags;
	struct backing_file_ctx ctx = {
		.cred = ovl_creds(inode->i_sb),
		.user_file = file,
		.end_write = ovl_file_modified,
	};

	if (!iov_iter_count(iter))
		return 0;
@@ -412,19 +306,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
	inode_lock(inode);
	/* Update mode */
	ovl_copyattr(inode);
	ret = file_remove_privs(file);
	if (ret)
		goto out_unlock;

	ret = ovl_real_fdget(file, &real);
	if (ret)
		goto out_unlock;

	ret = -EINVAL;
	if (iocb->ki_flags & IOCB_DIRECT &&
	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
		goto out_fdput;

	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);

@@ -433,39 +319,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
	 * this property in case it is set by the issuer.
	 */
	ifl &= ~IOCB_DIO_CALLER_COMP;

	old_cred = ovl_override_creds(file_inode(file)->i_sb);
	if (is_sync_kiocb(iocb)) {
		rwf_t rwf = iocb_to_rw_flags(ifl);

		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
		/* Update size */
		ovl_file_modified(file);
	} else {
		struct ovl_aio_req *aio_req;

		ret = ovl_init_aio_done_wq(inode->i_sb);
		if (ret)
			goto out;

		ret = -ENOMEM;
		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
		if (!aio_req)
			goto out;

		aio_req->orig_iocb = iocb;
		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
		aio_req->iocb.ki_flags = ifl;
		aio_req->iocb.ki_complete = ovl_aio_queue_completion;
		refcount_set(&aio_req->ref, 2);
		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
		ovl_aio_put(aio_req);
		if (ret != -EIOCBQUEUED)
			ovl_aio_cleanup_handler(aio_req);
	}
out:
	revert_creds(old_cred);
out_fdput:
	ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
	fdput(real);

out_unlock:
@@ -777,19 +631,3 @@ const struct file_operations ovl_file_operations = {
	.copy_file_range	= ovl_copy_file_range,
	.remap_file_range	= ovl_remap_file_range,
};

int __init ovl_aio_request_cache_init(void)
{
	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
						   sizeof(struct ovl_aio_req),
						   0, SLAB_HWCACHE_ALIGN, NULL);
	if (!ovl_aio_request_cachep)
		return -ENOMEM;

	return 0;
}

void ovl_aio_request_cache_destroy(void)
{
	kmem_cache_destroy(ovl_aio_request_cachep);
}
+6 −2
Original line number Diff line number Diff line
@@ -417,6 +417,12 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);

static inline const struct cred *ovl_creds(struct super_block *sb)
{
	return OVL_FS(sb)->creator_cred;
}

int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
@@ -829,8 +835,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,

/* file.c */
extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+3 −8
Original line number Diff line number Diff line
@@ -1501,14 +1501,10 @@ static int __init ovl_init(void)
	if (ovl_inode_cachep == NULL)
		return -ENOMEM;

	err = ovl_aio_request_cache_init();
	if (!err) {
	err = register_filesystem(&ovl_fs_type);
	if (!err)
		return 0;

		ovl_aio_request_cache_destroy();
	}
	kmem_cache_destroy(ovl_inode_cachep);

	return err;
@@ -1524,7 +1520,6 @@ static void __exit ovl_exit(void)
	 */
	rcu_barrier();
	kmem_cache_destroy(ovl_inode_cachep);
	ovl_aio_request_cache_destroy();
}

module_init(ovl_init);
+15 −0
Original line number Diff line number Diff line
@@ -9,9 +9,24 @@
#define _LINUX_BACKING_FILE_H

#include <linux/file.h>
#include <linux/uio.h>
#include <linux/fs.h>

struct backing_file_ctx {
	const struct cred *cred;
	struct file *user_file;
	void (*accessed)(struct file *);
	void (*end_write)(struct file *);
};

struct file *backing_file_open(const struct path *user_path, int flags,
			       const struct path *real_path,
			       const struct cred *cred);
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
			       struct kiocb *iocb, int flags,
			       struct backing_file_ctx *ctx);
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
				struct kiocb *iocb, int flags,
				struct backing_file_ctx *ctx);

#endif /* _LINUX_BACKING_FILE_H */