Unverified Commit c0410adf authored by David Howells's avatar David Howells Committed by Christian Brauner
Browse files

afs: Fix the locking used by afs_get_link()

The afs filesystem in the kernel doesn't do locking correctly for symbolic
links.  There are a number of problems:

 (1) It doesn't do any locking around afs_read_single() to prevent races
     between multiple ->get_link() calls, thereby allowing the possibility
     of leaks.

 (2) It doesn't use RCU barriering when accessing the buffer pointers
     during RCU pathwalk.

 (3) It can race with another thread updating the contents of the symlink
     if a third party updated it on the server.

Fix this by the following means:

 (0) Move symlink handling into its own file as this makes it more
     complicated.

 (1) Take the validate_lock around afs_read_single() to prevent races
     between multiple ->get_link() calls.

 (2) Keep a separate copy of the symlink contents with an rcu_head.  This
     is always going to be a lot smaller than a page, so it can be
     kmalloc'd and save quite a bit of memory.  It also needs a refcount
     for non-RCU pathwalk.

 (3) Split the symlink read and write-to-cache routines in afs from those
     for directories.

 (4) Discard the I/O buffer as soon as the write-to-cache completes as this
     is a full page (plus a folio_queue).

 (5) If there's no cache, discard the I/O buffer immediately after reading
     and copying if there is no cache.

Fixes: eae9e789 ("afs: Use netfslib for symlinks, allowing them to be cached")
Fixes: 6698c02d ("afs: Locally initialise the contents of a new symlink on creation")
Closes: https://sashiko.dev/#/patchset/20260326104544.509518-1-dhowells%40redhat.com


Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/20260512123404.719402-25-dhowells@redhat.com


cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 9871938f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ kafs-y := \
	server.o \
	server_list.o \
	super.o \
	symlink.o \
	validation.o \
	vlclient.o \
	vl_alias.o \
+34 −34
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
		      struct dentry *old_dentry, struct inode *new_dir,
		      struct dentry *new_dentry, unsigned int flags);
static int afs_dir_writepages(struct address_space *mapping,
			      struct writeback_control *wbc);

const struct file_operations afs_dir_file_operations = {
	.open		= afs_dir_open,
@@ -68,7 +70,7 @@ const struct inode_operations afs_dir_inode_operations = {
};

const struct address_space_operations afs_dir_aops = {
	.writepages	= afs_single_writepages,
	.writepages	= afs_dir_writepages,
};

const struct dentry_operations afs_fs_dentry_operations = {
@@ -233,23 +235,14 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
	struct iov_iter iter;
	ssize_t ret;
	loff_t i_size;
	bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
		       !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));

	i_size = i_size_read(&dvnode->netfs.inode);
	if (is_dir) {
	if (i_size < AFS_DIR_BLOCK_SIZE)
		return afs_bad(dvnode, afs_file_error_dir_small);
	if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
		trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
		return -EFBIG;
	}
	} else {
		if (i_size > AFSPATHMAX) {
			trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
			return -EFBIG;
		}
	}

	/* Expand the storage.  TODO: Shrink the storage too. */
	if (dvnode->directory_size < i_size) {
@@ -277,24 +270,18 @@ static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
			 * buffer.
			 */
			ret = -ESTALE;
		} else if (is_dir) {
		} else {
			int ret2 = afs_dir_check(dvnode);

			if (ret2 < 0)
				ret = ret2;
		} else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
			/* NUL-terminate a symlink. */
			char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);

			symlink[i_size] = 0;
			kunmap_local(symlink);
		}
	}

	return ret;
}

ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
static ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
{
	ssize_t ret;

@@ -1763,13 +1750,20 @@ static int afs_link(struct dentry *from, struct inode *dir,
	return ret;
}

static void afs_symlink_put(struct afs_operation *op)
{
	kfree(op->create.symlink);
	op->create.symlink = NULL;
	afs_create_put(op);
}

static const struct afs_operation_ops afs_symlink_operation = {
	.issue_afs_rpc	= afs_fs_symlink,
	.issue_yfs_rpc	= yfs_fs_symlink,
	.success	= afs_create_success,
	.aborted	= afs_check_for_remote_deletion,
	.edit_dir	= afs_create_edit_dir,
	.put		= afs_create_put,
	.put		= afs_symlink_put,
};

/*
@@ -1779,7 +1773,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
		       struct dentry *dentry, const char *content)
{
	struct afs_operation *op;
	struct afs_symlink *symlink;
	struct afs_vnode *dvnode = AFS_FS_I(dir);
	size_t clen = strlen(content);
	int ret;

	_enter("{%llx:%llu},{%pd},%s",
@@ -1791,12 +1787,20 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
		goto error;

	ret = -EINVAL;
	if (strlen(content) >= AFSPATHMAX)
	if (clen >= AFSPATHMAX)
		goto error;

	ret = -ENOMEM;
	symlink = kmalloc_flex(struct afs_symlink, content, clen + 1, GFP_KERNEL);
	if (!symlink)
		goto error;
	refcount_set(&symlink->ref, 1);
	memcpy(symlink->content, content, clen + 1);

	op = afs_alloc_operation(NULL, dvnode->volume);
	if (IS_ERR(op)) {
		ret = PTR_ERR(op);
		kfree(symlink);
		goto error;
	}

@@ -1808,7 +1812,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
	op->dentry		= dentry;
	op->ops			= &afs_symlink_operation;
	op->create.reason	= afs_edit_dir_for_symlink;
	op->create.symlink	= content;
	op->create.symlink	= symlink;
	op->mtime		= current_time(dir);
	ret = afs_do_sync_operation(op);
	afs_dir_unuse_cookie(dvnode, ret);
@@ -2192,15 +2196,13 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}

/*
 * Write the file contents to the cache as a single blob.
 * Write the directory contents to the cache as a single blob.
 */
int afs_single_writepages(struct address_space *mapping,
static int afs_dir_writepages(struct address_space *mapping,
			      struct writeback_control *wbc)
{
	struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
	struct iov_iter iter;
	bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
		       !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
	int ret = 0;

	/* Need to lock to prevent the folio queue and folios from being thrown
@@ -2215,9 +2217,7 @@ int afs_single_writepages(struct address_space *mapping,
		down_read(&dvnode->validate_lock);
	}

	if (is_dir ?
	    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
	    atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
		iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
				     i_size_read(&dvnode->netfs.inode));
		ret = netfs_writeback_single(mapping, wbc, &iter);
+2 −2
Original line number Diff line number Diff line
@@ -886,7 +886,7 @@ void afs_fs_symlink(struct afs_operation *op)
	namesz = name->len;
	padsz = (4 - (namesz & 3)) & 3;

	c_namesz = strlen(op->create.symlink);
	c_namesz = strlen(op->create.symlink->content);
	c_padsz = (4 - (c_namesz & 3)) & 3;

	reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
@@ -910,7 +910,7 @@ void afs_fs_symlink(struct afs_operation *op)
		bp = (void *) bp + padsz;
	}
	*bp++ = htonl(c_namesz);
	memcpy(bp, op->create.symlink, c_namesz);
	memcpy(bp, op->create.symlink->content, c_namesz);
	bp = (void *) bp + c_namesz;
	if (c_padsz > 0) {
		memset(bp, 0, c_padsz);
+4 −92
Original line number Diff line number Diff line
@@ -25,96 +25,6 @@
#include "internal.h"
#include "afs_fs.h"

void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
{
	size_t size = strlen(op->create.symlink) + 1;
	size_t dsize = 0;
	char *p;

	if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
				      mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
		return;

	vnode->directory_size = dsize;
	p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
	memcpy(p, op->create.symlink, size);
	kunmap_local(p);
	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
	netfs_single_mark_inode_dirty(&vnode->netfs.inode);
}

static void afs_put_link(void *arg)
{
	struct folio *folio = virt_to_folio(arg);

	kunmap_local(arg);
	folio_put(folio);
}

const char *afs_get_link(struct dentry *dentry, struct inode *inode,
			 struct delayed_call *callback)
{
	struct afs_vnode *vnode = AFS_FS_I(inode);
	struct folio *folio;
	char *content;
	ssize_t ret;

	if (!dentry) {
		/* RCU pathwalk. */
		if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
			return ERR_PTR(-ECHILD);
		goto good;
	}

	if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
		goto fetch;

	ret = afs_validate(vnode, NULL);
	if (ret < 0)
		return ERR_PTR(ret);

	if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
	    test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
		goto good;

fetch:
	ret = afs_read_single(vnode, NULL);
	if (ret < 0)
		return ERR_PTR(ret);
	set_bit(AFS_VNODE_DIR_READ, &vnode->flags);

good:
	folio = folioq_folio(vnode->directory, 0);
	folio_get(folio);
	content = kmap_local_folio(folio, 0);
	set_delayed_call(callback, afs_put_link, content);
	return content;
}

int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
	DEFINE_DELAYED_CALL(done);
	const char *content;
	int len;

	content = afs_get_link(dentry, d_inode(dentry), &done);
	if (IS_ERR(content)) {
		do_delayed_call(&done);
		return PTR_ERR(content);
	}

	len = umin(strlen(content), buflen);
	if (copy_to_user(buffer, content, len))
		len = -EFAULT;
	do_delayed_call(&done);
	return len;
}

static const struct inode_operations afs_symlink_inode_operations = {
	.get_link	= afs_get_link,
	.readlink	= afs_readlink,
};

static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
{
	static unsigned long once_only;
@@ -214,7 +124,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
			inode->i_mode	= S_IFLNK | status->mode;
			inode->i_op	= &afs_symlink_inode_operations;
		}
		inode->i_mapping->a_ops	= &afs_dir_aops;
		inode->i_mapping->a_ops	= &afs_symlink_aops;
		inode_nohighmem(inode);
		mapping_set_release_always(inode->i_mapping);
		break;
@@ -769,12 +679,14 @@ void afs_evict_inode(struct inode *inode)
			.range_end = LLONG_MAX,
		};

		afs_single_writepages(inode->i_mapping, &wbc);
		inode->i_mapping->a_ops->writepages(inode->i_mapping, &wbc);
	}

	netfs_wait_for_outstanding_io(inode);
	truncate_inode_pages_final(&inode->i_data);
	netfs_free_folioq_buffer(vnode->directory);
	if (vnode->symlink)
		afs_evict_symlink(vnode);

	afs_set_cache_aux(vnode, &aux);
	netfs_clear_inode_writeback(inode, &aux);
+26 −8
Original line number Diff line number Diff line
@@ -710,6 +710,7 @@ struct afs_vnode {
#define AFS_VNODE_DIR_READ	11		/* Set if we've read a dir's contents */

	struct folio_queue	*directory;	/* Directory contents */
	struct afs_symlink __rcu *symlink;	/* Symlink content */
	struct list_head	wb_keys;	/* List of keys available for writeback */
	struct list_head	pending_locks;	/* locks waiting to be granted */
	struct list_head	granted_locks;	/* locks granted on this file */
@@ -776,6 +777,15 @@ struct afs_permits {
	struct afs_permit	permits[] __counted_by(nr_permits);	/* List of permits sorted by key pointer */
};

/*
 * Copy of symlink content for normal use.
 */
struct afs_symlink {
	struct rcu_head		rcu;
	refcount_t		ref;
	char			content[];
};

/*
 * Error prioritisation and accumulation.
 */
@@ -887,7 +897,7 @@ struct afs_operation {
		struct {
			int	reason;		/* enum afs_edit_dir_reason */
			mode_t	mode;
			const char *symlink;
			struct afs_symlink *symlink;
		} create;
		struct {
			bool	need_rehash;
@@ -1098,13 +1108,10 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;

ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
	__acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
int afs_single_writepages(struct address_space *mapping,
			  struct writeback_control *wbc);

/*
 * dir_edit.c
@@ -1247,10 +1254,6 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
 */
extern const struct afs_operation_ops afs_fetch_status_operation;

void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
			 struct delayed_call *callback);
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
@@ -1600,6 +1603,21 @@ void afs_detach_volume_from_servers(struct afs_volume *volume, struct afs_server
extern int __init afs_fs_init(void);
extern void afs_fs_exit(void);

/*
 * symlink.c
 */
extern const struct inode_operations afs_symlink_inode_operations;
extern const struct address_space_operations afs_symlink_aops;

void afs_invalidate_symlink(struct afs_vnode *vnode);
void afs_evict_symlink(struct afs_vnode *vnode);
void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
const char *afs_get_link(struct dentry *dentry, struct inode *inode,
			 struct delayed_call *callback);
int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
int afs_symlink_writepages(struct address_space *mapping,
			   struct writeback_control *wbc);

/*
 * validation.c
 */
Loading