Commit 4b6b4321 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse updates from Miklos Szeredi:

 - Add mechanism for cleaning out unused, stale dentries; controlled via
   a module option (Luis Henriques)

 - Fix various bugs

 - Cleanups

* tag 'fuse-update-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: Uninitialized variable in fuse_epoch_work()
  fuse: fix io-uring list corruption for terminated non-committed requests
  fuse: signal that a fuse inode should exhibit local fs behaviors
  fuse: Always flush the page cache before FOPEN_DIRECT_IO write
  fuse: Invalidate the page cache after FOPEN_DIRECT_IO write
  fuse: rename 'namelen' to 'namesize'
  fuse: use strscpy instead of strcpy
  fuse: refactor fuse_conn_put() to remove negative logic.
  fuse: new work queue to invalidate dentries from old epochs
  fuse: new work queue to periodically invalidate expired dentries
  dcache: export shrink_dentry_list() and add new helper d_dispose_if_unused()
  fuse: add WARN_ON and comment for RCU revalidate
  fuse: Fix whitespace for fuse_uring_args_to_ring() comment
  fuse: missing copy_finish in fuse-over-io-uring argument copies
  fuse: fix readahead reclaim deadlock
parents 7cd122b5 8da059f2
Loading
Loading
Loading
Loading
+12 −6
Original line number Diff line number Diff line
@@ -1104,6 +1104,15 @@ struct dentry *d_find_alias_rcu(struct inode *inode)
	return de;
}

void d_dispose_if_unused(struct dentry *dentry, struct list_head *dispose)
{
	spin_lock(&dentry->d_lock);
	if (!dentry->d_lockref.count)
		to_shrink_list(dentry, dispose);
	spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL(d_dispose_if_unused);

/*
 *	Try to kill dentries associated with this inode.
 * WARNING: you must own a reference to inode.
@@ -1114,12 +1123,8 @@ void d_prune_aliases(struct inode *inode)
	struct dentry *dentry;

	spin_lock(&inode->i_lock);
	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
		spin_lock(&dentry->d_lock);
		if (!dentry->d_lockref.count)
			to_shrink_list(dentry, &dispose);
		spin_unlock(&dentry->d_lock);
	}
	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias)
		d_dispose_if_unused(dentry, &dispose);
	spin_unlock(&inode->i_lock);
	shrink_dentry_list(&dispose);
}
@@ -1159,6 +1164,7 @@ void shrink_dentry_list(struct list_head *list)
		shrink_kill(dentry);
	}
}
EXPORT_SYMBOL(shrink_dentry_list);

static enum lru_status dentry_lru_isolate(struct list_head *item,
		struct list_lru_one *lru, void *arg)
+5 −4
Original line number Diff line number Diff line
@@ -846,7 +846,7 @@ void fuse_copy_init(struct fuse_copy_state *cs, bool write,
}

/* Unmap and put previous page of userspace buffer */
static void fuse_copy_finish(struct fuse_copy_state *cs)
void fuse_copy_finish(struct fuse_copy_state *cs)
{
	if (cs->currbuf) {
		struct pipe_buffer *buf = cs->currbuf;
@@ -2041,13 +2041,14 @@ static int fuse_notify_resend(struct fuse_conn *fc)

/*
 * Increments the fuse connection epoch.  This will result of dentries from
 * previous epochs to be invalidated.
 *
 * XXX optimization: add call to shrink_dcache_sb()?
 * previous epochs to be invalidated.  Additionally, if inval_wq is set, a work
 * queue is scheduled to trigger the invalidation.
 */
static int fuse_notify_inc_epoch(struct fuse_conn *fc)
{
	atomic_inc(&fc->epoch);
	if (inval_wq)
		schedule_work(&fc->epoch_work);

	return 0;
}
+8 −4
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req,
	lockdep_assert_not_held(&queue->lock);
	spin_lock(&queue->lock);
	ent->fuse_req = NULL;
	list_del_init(&req->list);
	if (test_bit(FR_BACKGROUND, &req->flags)) {
		queue->active_background--;
		spin_lock(&fc->bg_lock);
@@ -598,7 +599,9 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
	cs.is_uring = true;
	cs.req = req;

	return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
	err = fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
	fuse_copy_finish(&cs);
	return err;
}

/*
@@ -649,6 +652,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
	/* copy the payload */
	err = fuse_copy_args(&cs, num_args, args->in_pages,
			     (struct fuse_arg *)in_args, 0);
	fuse_copy_finish(&cs);
	if (err) {
		pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
		return err;
+221 −27
Original line number Diff line number Diff line
@@ -27,40 +27,219 @@ module_param(allow_sys_admin_access, bool, 0644);
MODULE_PARM_DESC(allow_sys_admin_access,
		 "Allow users with CAP_SYS_ADMIN in initial userns to bypass allow_other access check");

static void fuse_advise_use_readdirplus(struct inode *dir)
struct dentry_bucket {
	struct rb_root tree;
	spinlock_t lock;
};

#define HASH_BITS	5
#define HASH_SIZE	(1 << HASH_BITS)
static struct dentry_bucket dentry_hash[HASH_SIZE];
struct delayed_work dentry_tree_work;

/* Minimum invalidation work queue frequency */
#define FUSE_DENTRY_INVAL_FREQ_MIN 5

unsigned __read_mostly inval_wq;
static int inval_wq_set(const char *val, const struct kernel_param *kp)
{
	struct fuse_inode *fi = get_fuse_inode(dir);
	unsigned int num;
	unsigned int old = inval_wq;
	int ret;

	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
	if (!val)
		return -EINVAL;

	ret = kstrtouint(val, 0, &num);
	if (ret)
		return ret;

	if ((num < FUSE_DENTRY_INVAL_FREQ_MIN) && (num != 0))
		return -EINVAL;

	/* This should prevent overflow in secs_to_jiffies() */
	if (num > USHRT_MAX)
		return -EINVAL;

	*((unsigned int *)kp->arg) = num;

	if (num && !old)
		schedule_delayed_work(&dentry_tree_work,
				      secs_to_jiffies(num));
	else if (!num && old)
		cancel_delayed_work_sync(&dentry_tree_work);

	return 0;
}
static const struct kernel_param_ops inval_wq_ops = {
	.set = inval_wq_set,
	.get = param_get_uint,
};
module_param_cb(inval_wq, &inval_wq_ops, &inval_wq, 0644);
__MODULE_PARM_TYPE(inval_wq, "uint");
MODULE_PARM_DESC(inval_wq,
		 "Dentries invalidation work queue period in secs (>= "
		 __stringify(FUSE_DENTRY_INVAL_FREQ_MIN) ").");

#if BITS_PER_LONG >= 64
static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
static inline struct dentry_bucket *get_dentry_bucket(struct dentry *dentry)
{
	entry->d_fsdata = (void *) time;
	int i = hash_ptr(dentry, HASH_BITS);

	return &dentry_hash[i];
}

static inline u64 fuse_dentry_time(const struct dentry *entry)
static void fuse_advise_use_readdirplus(struct inode *dir)
{
	return (u64)entry->d_fsdata;
	struct fuse_inode *fi = get_fuse_inode(dir);

	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}

#else
union fuse_dentry {
struct fuse_dentry {
	u64 time;
	union {
		struct rcu_head rcu;
		struct rb_node node;
	};
	struct dentry *dentry;
};

static void __fuse_dentry_tree_del_node(struct fuse_dentry *fd,
					struct dentry_bucket *bucket)
{
	if (!RB_EMPTY_NODE(&fd->node)) {
		rb_erase(&fd->node, &bucket->tree);
		RB_CLEAR_NODE(&fd->node);
	}
}

static void fuse_dentry_tree_del_node(struct dentry *dentry)
{
	struct fuse_dentry *fd = dentry->d_fsdata;
	struct dentry_bucket *bucket = get_dentry_bucket(dentry);

	spin_lock(&bucket->lock);
	__fuse_dentry_tree_del_node(fd, bucket);
	spin_unlock(&bucket->lock);
}

static void fuse_dentry_tree_add_node(struct dentry *dentry)
{
	struct fuse_dentry *fd = dentry->d_fsdata;
	struct dentry_bucket *bucket;
	struct fuse_dentry *cur;
	struct rb_node **p, *parent = NULL;

	if (!inval_wq)
		return;

	bucket = get_dentry_bucket(dentry);

	spin_lock(&bucket->lock);

	__fuse_dentry_tree_del_node(fd, bucket);

	p = &bucket->tree.rb_node;
	while (*p) {
		parent = *p;
		cur = rb_entry(*p, struct fuse_dentry, node);
		if (fd->time < cur->time)
			p = &(*p)->rb_left;
		else
			p = &(*p)->rb_right;
	}
	rb_link_node(&fd->node, parent, p);
	rb_insert_color(&fd->node, &bucket->tree);
	spin_unlock(&bucket->lock);
}

/*
 * work queue which, when enabled, will periodically check for expired dentries
 * in the dentries tree.
 */
static void fuse_dentry_tree_work(struct work_struct *work)
{
	LIST_HEAD(dispose);
	struct fuse_dentry *fd;
	struct rb_node *node;
	int i;

	for (i = 0; i < HASH_SIZE; i++) {
		spin_lock(&dentry_hash[i].lock);
		node = rb_first(&dentry_hash[i].tree);
		while (node) {
			fd = rb_entry(node, struct fuse_dentry, node);
			if (time_after64(get_jiffies_64(), fd->time)) {
				rb_erase(&fd->node, &dentry_hash[i].tree);
				RB_CLEAR_NODE(&fd->node);
				spin_unlock(&dentry_hash[i].lock);
				d_dispose_if_unused(fd->dentry, &dispose);
				cond_resched();
				spin_lock(&dentry_hash[i].lock);
			} else
				break;
			node = rb_first(&dentry_hash[i].tree);
		}
		spin_unlock(&dentry_hash[i].lock);
		shrink_dentry_list(&dispose);
	}

	if (inval_wq)
		schedule_delayed_work(&dentry_tree_work,
				      secs_to_jiffies(inval_wq));
}

void fuse_epoch_work(struct work_struct *work)
{
	struct fuse_conn *fc = container_of(work, struct fuse_conn,
					    epoch_work);
	struct fuse_mount *fm;
	struct inode *inode;

	down_read(&fc->killsb);

	inode = fuse_ilookup(fc, FUSE_ROOT_ID, &fm);
	if (inode) {
		iput(inode);
		/* Remove all possible active references to cached inodes */
		shrink_dcache_sb(fm->sb);
	} else
		pr_warn("Failed to get root inode");

	up_read(&fc->killsb);
}

void fuse_dentry_tree_init(void)
{
	int i;

	for (i = 0; i < HASH_SIZE; i++) {
		spin_lock_init(&dentry_hash[i].lock);
		dentry_hash[i].tree = RB_ROOT;
	}
	INIT_DELAYED_WORK(&dentry_tree_work, fuse_dentry_tree_work);
}

void fuse_dentry_tree_cleanup(void)
{
	int i;

	inval_wq = 0;
	cancel_delayed_work_sync(&dentry_tree_work);

	for (i = 0; i < HASH_SIZE; i++)
		WARN_ON_ONCE(!RB_EMPTY_ROOT(&dentry_hash[i].tree));
}

static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
{
	((union fuse_dentry *) dentry->d_fsdata)->time = time;
	((struct fuse_dentry *) dentry->d_fsdata)->time = time;
}

static inline u64 fuse_dentry_time(const struct dentry *entry)
{
	return ((union fuse_dentry *) entry->d_fsdata)->time;
	return ((struct fuse_dentry *) entry->d_fsdata)->time;
}
#endif

static void fuse_dentry_settime(struct dentry *dentry, u64 time)
{
@@ -81,6 +260,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
	}

	__fuse_dentry_settime(dentry, time);
	fuse_dentry_tree_add_node(dentry);
}

/*
@@ -283,21 +463,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
	goto out;
}

#if BITS_PER_LONG < 64
static int fuse_dentry_init(struct dentry *dentry)
{
	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
	struct fuse_dentry *fd;

	fd = kzalloc(sizeof(struct fuse_dentry),
			  GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
	if (!fd)
		return -ENOMEM;

	fd->dentry = dentry;
	RB_CLEAR_NODE(&fd->node);
	dentry->d_fsdata = fd;

	return 0;
}

	return dentry->d_fsdata ? 0 : -ENOMEM;
static void fuse_dentry_prune(struct dentry *dentry)
{
	struct fuse_dentry *fd = dentry->d_fsdata;

	if (!RB_EMPTY_NODE(&fd->node))
		fuse_dentry_tree_del_node(dentry);
}

static void fuse_dentry_release(struct dentry *dentry)
{
	union fuse_dentry *fd = dentry->d_fsdata;
	struct fuse_dentry *fd = dentry->d_fsdata;

	kfree_rcu(fd, rcu);
}
#endif

static int fuse_dentry_delete(const struct dentry *dentry)
{
@@ -331,10 +526,9 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
const struct dentry_operations fuse_dentry_operations = {
	.d_revalidate	= fuse_dentry_revalidate,
	.d_delete	= fuse_dentry_delete,
#if BITS_PER_LONG < 64
	.d_init		= fuse_dentry_init,
	.d_prune	= fuse_dentry_prune,
	.d_release	= fuse_dentry_release,
#endif
	.d_automount	= fuse_dentry_automount,
};

@@ -471,7 +665,7 @@ static int get_security_context(struct dentry *entry, umode_t mode,
	u32 total_len = sizeof(*header);
	int err, nr_ctx = 0;
	const char *name = NULL;
	size_t namelen;
	size_t namesize;

	err = security_dentry_init_security(entry, mode, &entry->d_name,
					    &name, &lsmctx);
@@ -482,12 +676,12 @@ static int get_security_context(struct dentry *entry, umode_t mode,

	if (lsmctx.len) {
		nr_ctx = 1;
		namelen = strlen(name) + 1;
		namesize = strlen(name) + 1;
		err = -EIO;
		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
		if (WARN_ON(namesize > XATTR_NAME_MAX + 1 ||
		    lsmctx.len > S32_MAX))
			goto out_err;
		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namesize +
					    lsmctx.len);
	}

@@ -504,8 +698,8 @@ static int get_security_context(struct dentry *entry, umode_t mode,
		fctx->size = lsmctx.len;
		ptr += sizeof(*fctx);

		strcpy(ptr, name);
		ptr += namelen;
		strscpy(ptr, name, namesize);
		ptr += namesize;

		memcpy(ptr, lsmctx.context, lsmctx.len);
	}
+29 −8
Original line number Diff line number Diff line
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
			fuse_file_io_release(ff, ra->inode);

		if (!args) {
			/* Do nothing when server does not implement 'open' */
			/* Do nothing when server does not implement 'opendir' */
		} else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) {
			fuse_release_end(ff->fm, args, 0);
		} else if (sync) {
			fuse_simple_request(ff->fm, args);
			fuse_release_end(ff->fm, args, 0);
@@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
	struct fuse_file *ff;
	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
	bool open = isdir ? !fc->no_opendir : !fc->no_open;
	bool release = !isdir || open;

	ff = fuse_file_alloc(fm, open);
	/*
	 * ff->args->release_args still needs to be allocated (so we can hold an
	 * inode reference while there are pending inflight file operations when
	 * ->release() is called, see fuse_prepare_release()) even if
	 * fc->no_open is set else it becomes possible for reclaim to deadlock
	 * if while servicing the readahead request the server triggers reclaim
	 * and reclaim evicts the inode of the file being read ahead.
	 */
	ff = fuse_file_alloc(fm, release);
	if (!ff)
		return ERR_PTR(-ENOMEM);

@@ -152,15 +163,16 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
			fuse_file_free(ff);
			return ERR_PTR(err);
		} else {
			if (isdir) {
				/* No release needed */
				kfree(ff->args);
				ff->args = NULL;
			if (isdir)
				fc->no_opendir = 1;
			else
			} else {
				fc->no_open = 1;
			}
		}
	}

	if (isdir)
		ff->open_flags &= ~FOPEN_DIRECT_IO;
@@ -1652,7 +1664,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
	if (!ia)
		return -ENOMEM;

	if (fopen_direct_io && fc->direct_io_allow_mmap) {
	if (fopen_direct_io) {
		res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
		if (res) {
			fuse_io_free(ia);
@@ -1726,6 +1738,15 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
	if (res > 0)
		*ppos = pos;

	if (res > 0 && write && fopen_direct_io) {
		/*
		 * As in generic_file_direct_write(), invalidate after the
		 * write, to invalidate read-ahead cache that may have competed
		 * with the write.
		 */
		invalidate_inode_pages2_range(mapping, idx_from, idx_to);
	}

	return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
Loading