Commit 5cd64d4f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A fix for a race condition around r_parent tracking that took a long
  time to track down from Alex and some fixes for potential crashes on
  accessing invalid memory from Max and myself.

  All marked for stable"

* tag 'ceph-for-6.17-rc6' of https://github.com/ceph/ceph-client:
  libceph: fix invalid accesses to ceph_connection_v1_info
  ceph: fix crash after fscrypt_encrypt_pagecache_blocks() error
  ceph: always call ceph_shift_unused_folios_left()
  ceph: fix race condition where r_parent becomes stale before sending message
  ceph: fix race condition validating r_parent before applying state
parents 395d68e5 cdbc9836
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -1264,7 +1264,9 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
								0,
								gfp_flags);
		if (IS_ERR(pages[index])) {
			if (PTR_ERR(pages[index]) == -EINVAL) {
			int err = PTR_ERR(pages[index]);

			if (err == -EINVAL) {
				pr_err_client(cl, "inode->i_blkbits=%hhu\n",
						inode->i_blkbits);
			}
@@ -1273,7 +1275,7 @@ static inline int move_dirty_folio_in_page_array(struct address_space *mapping,
			BUG_ON(ceph_wbc->locked_pages == 0);

			pages[index] = NULL;
			return PTR_ERR(pages[index]);
			return err;
		}
	} else {
		pages[index] = &folio->page;
@@ -1687,6 +1689,7 @@ static int ceph_writepages_start(struct address_space *mapping,

process_folio_batch:
		rc = ceph_process_folio_batch(mapping, wbc, &ceph_wbc);
		ceph_shift_unused_folios_left(&ceph_wbc.fbatch);
		if (rc)
			goto release_folios;

@@ -1695,8 +1698,6 @@ static int ceph_writepages_start(struct address_space *mapping,
			goto release_folios;

		if (ceph_wbc.processed_in_fbatch) {
			ceph_shift_unused_folios_left(&ceph_wbc.fbatch);

			if (folio_batch_count(&ceph_wbc.fbatch) == 0 &&
			    ceph_wbc.locked_pages < ceph_wbc.max_pages) {
				doutc(cl, "reached end fbatch, trying for more\n");
+6 −8
Original line number Diff line number Diff line
@@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p)
	struct ceph_mds_client *mdsc = fsc->mdsc;
	struct ceph_mds_request *req;
	struct rb_node *rp;
	int pathlen = 0;
	u64 pathbase;
	char *path;

	mutex_lock(&mdsc->mutex);
@@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p)
		if (req->r_inode) {
			seq_printf(s, " #%llx", ceph_ino(req->r_inode));
		} else if (req->r_dentry) {
			path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
						    &pathbase, 0);
			struct ceph_path_info path_info;
			path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);
			if (IS_ERR(path))
				path = NULL;
			spin_lock(&req->r_dentry->d_lock);
@@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p)
				   req->r_dentry,
				   path ? path : "");
			spin_unlock(&req->r_dentry->d_lock);
			ceph_mdsc_free_path(path, pathlen);
			ceph_mdsc_free_path_info(&path_info);
		} else if (req->r_path1) {
			seq_printf(s, " #%llx/%s", req->r_ino1.ino,
				   req->r_path1);
@@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p)
		}

		if (req->r_old_dentry) {
			path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &pathlen,
						    &pathbase, 0);
			struct ceph_path_info path_info;
			path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, &path_info, 0);
			if (IS_ERR(path))
				path = NULL;
			spin_lock(&req->r_old_dentry->d_lock);
@@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p)
				   req->r_old_dentry,
				   path ? path : "");
			spin_unlock(&req->r_old_dentry->d_lock);
			ceph_mdsc_free_path(path, pathlen);
			ceph_mdsc_free_path_info(&path_info);
		} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
			if (req->r_ino2.ino)
				seq_printf(s, " #%llx/%s", req->r_ino2.ino,
+7 −10
Original line number Diff line number Diff line
@@ -1271,10 +1271,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,

	/* If op failed, mark everyone involved for errors */
	if (result) {
		int pathlen = 0;
		u64 base = 0;
		char *path = ceph_mdsc_build_path(mdsc, dentry, &pathlen,
						  &base, 0);
		struct ceph_path_info path_info = {0};
		char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);

		/* mark error on parent + clear complete */
		mapping_set_error(req->r_parent->i_mapping, result);
@@ -1288,8 +1286,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
		mapping_set_error(req->r_old_inode->i_mapping, result);

		pr_warn_client(cl, "failure path=(%llx)%s result=%d!\n",
			       base, IS_ERR(path) ? "<<bad>>" : path, result);
		ceph_mdsc_free_path(path, pathlen);
			       path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
		ceph_mdsc_free_path_info(&path_info);
	}
out:
	iput(req->r_old_inode);
@@ -1347,8 +1345,6 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
	int err = -EROFS;
	int op;
	char *path;
	int pathlen;
	u64 pathbase;

	if (ceph_snap(dir) == CEPH_SNAPDIR) {
		/* rmdir .snap/foo is RMSNAP */
@@ -1367,14 +1363,15 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
	if (!dn) {
		try_async = false;
	} else {
		path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
		struct ceph_path_info path_info;
		path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
		if (IS_ERR(path)) {
			try_async = false;
			err = 0;
		} else {
			err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
		}
		ceph_mdsc_free_path(path, pathlen);
		ceph_mdsc_free_path_info(&path_info);
		dput(dn);

		/* For none EACCES cases will let the MDS do the mds auth check */
+10 −14
Original line number Diff line number Diff line
@@ -368,8 +368,6 @@ int ceph_open(struct inode *inode, struct file *file)
	int flags, fmode, wanted;
	struct dentry *dentry;
	char *path;
	int pathlen;
	u64 pathbase;
	bool do_sync = false;
	int mask = MAY_READ;

@@ -399,14 +397,15 @@ int ceph_open(struct inode *inode, struct file *file)
	if (!dentry) {
		do_sync = true;
	} else {
		path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
		struct ceph_path_info path_info;
		path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
		if (IS_ERR(path)) {
			do_sync = true;
			err = 0;
		} else {
			err = ceph_mds_check_access(mdsc, path, mask);
		}
		ceph_mdsc_free_path(path, pathlen);
		ceph_mdsc_free_path_info(&path_info);
		dput(dentry);

		/* For none EACCES cases will let the MDS do the mds auth check */
@@ -614,15 +613,13 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
	mapping_set_error(req->r_parent->i_mapping, result);

	if (result) {
		int pathlen = 0;
		u64 base = 0;
		char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &pathlen,
						  &base, 0);
		struct ceph_path_info path_info = {0};
		char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, &path_info, 0);

		pr_warn_client(cl,
			"async create failure path=(%llx)%s result=%d!\n",
			base, IS_ERR(path) ? "<<bad>>" : path, result);
		ceph_mdsc_free_path(path, pathlen);
			path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
		ceph_mdsc_free_path_info(&path_info);

		ceph_dir_clear_complete(req->r_parent);
		if (!d_unhashed(dentry))
@@ -791,8 +788,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
	int mask;
	int err;
	char *path;
	int pathlen;
	u64 pathbase;

	doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n",
	      dir, ceph_vinop(dir), dentry, dentry,
@@ -814,7 +809,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
	if (!dn) {
		try_async = false;
	} else {
		path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0);
		struct ceph_path_info path_info;
		path = ceph_mdsc_build_path(mdsc, dn, &path_info, 0);
		if (IS_ERR(path)) {
			try_async = false;
			err = 0;
@@ -826,7 +822,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
				mask |= MAY_WRITE;
			err = ceph_mds_check_access(mdsc, path, mask);
		}
		ceph_mdsc_free_path(path, pathlen);
		ceph_mdsc_free_path_info(&path_info);
		dput(dn);

		/* For none EACCES cases will let the MDS do the mds auth check */
+72 −16
Original line number Diff line number Diff line
@@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
	return 0;
}

/*
 * Check if the parent inode matches the vino from directory reply info
 */
static inline bool ceph_vino_matches_parent(struct inode *parent,
					    struct ceph_vino vino)
{
	return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
}

/*
 * Validate that the directory inode referenced by @req->r_parent matches the
 * inode number and snapshot id contained in the reply's directory record.  If
 * they do not match – which can theoretically happen if the parent dentry was
 * moved between the time the request was issued and the reply arrived – fall
 * back to looking up the correct inode in the inode cache.
 *
 * A reference is *always* returned.  Callers that receive a different inode
 * than the original @parent are responsible for dropping the extra reference
 * once the reply has been processed.
 */
static struct inode *ceph_get_reply_dir(struct super_block *sb,
					struct inode *parent,
					struct ceph_mds_reply_info_parsed *rinfo)
{
	struct ceph_vino vino;

	if (unlikely(!rinfo->diri.in))
		return parent; /* nothing to compare against */

	/* If we didn't have a cached parent inode to begin with, just bail out. */
	if (!parent)
		return NULL;

	vino.ino  = le64_to_cpu(rinfo->diri.in->ino);
	vino.snap = le64_to_cpu(rinfo->diri.in->snapid);

	if (likely(ceph_vino_matches_parent(parent, vino)))
		return parent; /* matches – use the original reference */

	/* Mismatch – this should be rare.  Emit a WARN and obtain the correct inode. */
	WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
		  ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);

	return ceph_get_inode(sb, vino, NULL);
}

/**
 * ceph_new_inode - allocate a new inode in advance of an expected create
 * @dir: parent directory for new inode
@@ -1523,6 +1569,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
	struct ceph_vino tvino, dvino;
	struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
	struct ceph_client *cl = fsc->client;
	struct inode *parent_dir = NULL;
	int err = 0;

	doutc(cl, "%p is_dentry %d is_target %d\n", req,
@@ -1536,10 +1583,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
	}

	if (rinfo->head->is_dentry) {
		struct inode *dir = req->r_parent;

		if (dir) {
			err = ceph_fill_inode(dir, NULL, &rinfo->diri,
		/*
		 * r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
		 * so we need to get the correct inode
		 */
		parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
		if (unlikely(IS_ERR(parent_dir))) {
			err = PTR_ERR(parent_dir);
			goto done;
		}
		if (parent_dir) {
			err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
					      rinfo->dirfrag, session, -1,
					      &req->r_caps_reservation);
			if (err < 0)
@@ -1548,14 +1602,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
			WARN_ON_ONCE(1);
		}

		if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
		if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
		    test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
		    !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
			bool is_nokey = false;
			struct qstr dname;
			struct dentry *dn, *parent;
			struct fscrypt_str oname = FSTR_INIT(NULL, 0);
			struct ceph_fname fname = { .dir	= dir,
			struct ceph_fname fname = { .dir	= parent_dir,
						    .name	= rinfo->dname,
						    .ctext	= rinfo->altname,
						    .name_len	= rinfo->dname_len,
@@ -1564,10 +1618,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
			BUG_ON(!rinfo->head->is_target);
			BUG_ON(req->r_dentry);

			parent = d_find_any_alias(dir);
			parent = d_find_any_alias(parent_dir);
			BUG_ON(!parent);

			err = ceph_fname_alloc_buffer(dir, &oname);
			err = ceph_fname_alloc_buffer(parent_dir, &oname);
			if (err < 0) {
				dput(parent);
				goto done;
@@ -1576,7 +1630,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
			err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
			if (err < 0) {
				dput(parent);
				ceph_fname_free_buffer(dir, &oname);
				ceph_fname_free_buffer(parent_dir, &oname);
				goto done;
			}
			dname.name = oname.name;
@@ -1595,7 +1649,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
				      dname.len, dname.name, dn);
				if (!dn) {
					dput(parent);
					ceph_fname_free_buffer(dir, &oname);
					ceph_fname_free_buffer(parent_dir, &oname);
					err = -ENOMEM;
					goto done;
				}
@@ -1610,12 +1664,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
				    ceph_snap(d_inode(dn)) != tvino.snap)) {
				doutc(cl, " dn %p points to wrong inode %p\n",
				      dn, d_inode(dn));
				ceph_dir_clear_ordered(dir);
				ceph_dir_clear_ordered(parent_dir);
				d_delete(dn);
				dput(dn);
				goto retry_lookup;
			}
			ceph_fname_free_buffer(dir, &oname);
			ceph_fname_free_buffer(parent_dir, &oname);

			req->r_dentry = dn;
			dput(parent);
@@ -1794,6 +1848,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
					    &dvino, ptvino);
	}
done:
	/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
	if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
		iput(parent_dir);
	doutc(cl, "done err=%d\n", err);
	return err;
}
@@ -2487,22 +2544,21 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode,
	int truncate_retry = 20; /* The RMW will take around 50ms */
	struct dentry *dentry;
	char *path;
	int pathlen;
	u64 pathbase;
	bool do_sync = false;

	dentry = d_find_alias(inode);
	if (!dentry) {
		do_sync = true;
	} else {
		path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0);
		struct ceph_path_info path_info;
		path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0);
		if (IS_ERR(path)) {
			do_sync = true;
			err = 0;
		} else {
			err = ceph_mds_check_access(mdsc, path, MAY_WRITE);
		}
		ceph_mdsc_free_path(path, pathlen);
		ceph_mdsc_free_path_info(&path_info);
		dput(dentry);

		/* For none EACCES cases will let the MDS do the mds auth check */
Loading