Commit 8bd9238e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-6.18-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:

 - some messenger improvements (Eric and Max)

 - address an issue (also affected userspace) of incorrect permissions
   being granted to users who have access to multiple different CephFS
   instances within the same cluster (Kotresh)

 - a bunch of assorted CephFS fixes (Slava)

* tag 'ceph-for-6.18-rc1' of https://github.com/ceph/ceph-client:
  ceph: add bug tracking system info to MAINTAINERS
  ceph: fix multifs mds auth caps issue
  ceph: cleanup in ceph_alloc_readdir_reply_buffer()
  ceph: fix potential NULL dereference issue in ceph_fill_trace()
  libceph: add empty check to ceph_con_get_out_msg()
  libceph: pass the message pointer instead of loading con->out_msg
  libceph: make ceph_con_get_out_msg() return the message pointer
  ceph: fix potential race condition on operations with CEPH_I_ODIRECT flag
  ceph: refactor wake_up_bit() pattern of calling
  ceph: fix potential race condition in ceph_ioctl_lazyio()
  ceph: fix overflowed constant issue in ceph_do_objects_copy()
  ceph: fix wrong sizeof argument issue in register_session()
  ceph: add checking of wait_for_completion_killable() return value
  ceph: make ceph_start_io_*() killable
  libceph: Use HMAC-SHA256 library instead of crypto_shash
parents 91b436fc d74d6c0e
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -5694,6 +5694,7 @@ M: Xiubo Li <xiubli@redhat.com>
L:	ceph-devel@vger.kernel.org
S:	Supported
W:	http://ceph.com/
B:	https://tracker.ceph.com/
T:	git https://github.com/ceph/ceph-client.git
F:	include/linux/ceph/
F:	include/linux/crush/
@@ -5705,6 +5706,7 @@ M: Ilya Dryomov <idryomov@gmail.com>
L:	ceph-devel@vger.kernel.org
S:	Supported
W:	http://ceph.com/
B:	https://tracker.ceph.com/
T:	git https://github.com/ceph/ceph-client.git
F:	Documentation/filesystems/ceph.rst
F:	fs/ceph/
@@ -21357,6 +21359,7 @@ R: Dongsheng Yang <dongsheng.yang@easystack.cn>
L:	ceph-devel@vger.kernel.org
S:	Supported
W:	http://ceph.com/
B:	https://tracker.ceph.com/
T:	git https://github.com/ceph/ceph-client.git
F:	Documentation/ABI/testing/sysfs-bus-rbd
F:	drivers/block/rbd.c
+1 −2
Original line number Diff line number Diff line
@@ -1260,8 +1260,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
	spin_unlock(&fsc->async_unlink_conflict_lock);

	spin_lock(&dentry->d_lock);
	di->flags &= ~CEPH_DENTRY_ASYNC_UNLINK;
	wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT);
	clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags);
	spin_unlock(&dentry->d_lock);

	synchronize_rcu();
+15 −15
Original line number Diff line number Diff line
@@ -579,8 +579,7 @@ static void wake_async_create_waiters(struct inode *inode,

	spin_lock(&ci->i_ceph_lock);
	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
		ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE;
		wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT);
		clear_and_wake_up_bit(CEPH_ASYNC_CREATE_BIT, &ci->i_ceph_flags);

		if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) {
			ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS;
@@ -762,8 +761,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
	}

	spin_lock(&dentry->d_lock);
	di->flags &= ~CEPH_DENTRY_ASYNC_CREATE;
	wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT);
	clear_and_wake_up_bit(CEPH_DENTRY_ASYNC_CREATE_BIT, &di->flags);
	spin_unlock(&dentry->d_lock);

	return ret;
@@ -2121,10 +2119,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
	if (ceph_inode_is_shutdown(inode))
		return -ESTALE;

	if (direct_lock)
		ceph_start_io_direct(inode);
	else
	ret = direct_lock ? ceph_start_io_direct(inode) :
			    ceph_start_io_read(inode);
	if (ret)
		return ret;

	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
		want |= CEPH_CAP_FILE_CACHE;
@@ -2277,7 +2275,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
	    (fi->flags & CEPH_F_SYNC))
		return copy_splice_read(in, ppos, pipe, len, flags);

	ceph_start_io_read(inode);
	ret = ceph_start_io_read(inode);
	if (ret)
		return ret;

	want = CEPH_CAP_FILE_CACHE;
	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2356,10 +2356,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
		direct_lock = true;

retry_snap:
	if (direct_lock)
		ceph_start_io_direct(inode);
	else
	err = direct_lock ? ceph_start_io_direct(inode) :
			    ceph_start_io_write(inode);
	if (err)
		goto out_unlocked;

	if (iocb->ki_flags & IOCB_APPEND) {
		err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -2878,7 +2878,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
	struct ceph_object_id src_oid, dst_oid;
	struct ceph_osd_client *osdc;
	struct ceph_osd_request *req;
	size_t bytes = 0;
	ssize_t bytes = 0;
	u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
	u32 src_objlen, dst_objlen;
	u32 object_size = src_ci->i_layout.object_size;
@@ -2928,7 +2928,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
					"OSDs don't support copy-from2; disabling copy offload\n");
			}
			doutc(cl, "returned %d\n", ret);
			if (!bytes)
			if (bytes <= 0)
				bytes = ret;
			goto out;
		}
+11 −0
Original line number Diff line number Diff line
@@ -1794,6 +1794,11 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
			goto done;
		}

		if (unlikely(!in)) {
			err = -EINVAL;
			goto done;
		}

		/* attach proper inode */
		if (d_really_is_negative(dn)) {
			ceph_dir_clear_ordered(dir);
@@ -1829,6 +1834,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
		doutc(cl, " linking snapped dir %p to dn %p\n", in,
		      req->r_dentry);
		ceph_dir_clear_ordered(dir);

		if (unlikely(!in)) {
			err = -EINVAL;
			goto done;
		}

		ihold(in);
		err = splice_dentry(&req->r_dentry, in);
		if (err < 0)
+75 −25
Original line number Diff line number Diff line
@@ -21,15 +21,24 @@
/* Call with exclusively locked inode->i_rwsem */
static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
{
	bool is_odirect;

	lockdep_assert_held_write(&inode->i_rwsem);

	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT) {
	spin_lock(&ci->i_ceph_lock);
		ci->i_ceph_flags &= ~CEPH_I_ODIRECT;
	/* ensure that bit state is consistent */
	smp_mb__before_atomic();
	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
	if (is_odirect) {
		clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
		/* ensure modified bit is visible */
		smp_mb__after_atomic();
	}
	spin_unlock(&ci->i_ceph_lock);

	if (is_odirect)
		inode_dio_wait(inode);
}
}

/**
 * ceph_start_io_read - declare the file is being used for buffered reads
@@ -47,20 +56,35 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
 * Note that buffered writes and truncates both take a write lock on
 * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
 */
void
ceph_start_io_read(struct inode *inode)
int ceph_start_io_read(struct inode *inode)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	bool is_odirect;
	int err;

	/* Be an optimist! */
	down_read(&inode->i_rwsem);
	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
		return;
	err = down_read_killable(&inode->i_rwsem);
	if (err)
		return err;

	spin_lock(&ci->i_ceph_lock);
	/* ensure that bit state is consistent */
	smp_mb__before_atomic();
	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
	spin_unlock(&ci->i_ceph_lock);
	if (!is_odirect)
		return 0;
	up_read(&inode->i_rwsem);

	/* Slow path.... */
	down_write(&inode->i_rwsem);
	err = down_write_killable(&inode->i_rwsem);
	if (err)
		return err;

	ceph_block_o_direct(ci, inode);
	downgrade_write(&inode->i_rwsem);

	return 0;
}

/**
@@ -83,11 +107,12 @@ ceph_end_io_read(struct inode *inode)
 * Declare that a buffered write operation is about to start, and ensure
 * that we block all direct I/O.
 */
void
ceph_start_io_write(struct inode *inode)
int ceph_start_io_write(struct inode *inode)
{
	down_write(&inode->i_rwsem);
	int err = down_write_killable(&inode->i_rwsem);
	if (!err)
		ceph_block_o_direct(ceph_inode(inode), inode);
	return err;
}

/**
@@ -106,12 +131,22 @@ ceph_end_io_write(struct inode *inode)
/* Call with exclusively locked inode->i_rwsem */
static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
{
	bool is_odirect;

	lockdep_assert_held_write(&inode->i_rwsem);

	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)) {
	spin_lock(&ci->i_ceph_lock);
		ci->i_ceph_flags |= CEPH_I_ODIRECT;
	/* ensure that bit state is consistent */
	smp_mb__before_atomic();
	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
	if (!is_odirect) {
		set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
		/* ensure modified bit is visible */
		smp_mb__after_atomic();
	}
	spin_unlock(&ci->i_ceph_lock);

	if (!is_odirect) {
		/* FIXME: unmap_mapping_range? */
		filemap_write_and_wait(inode->i_mapping);
	}
@@ -133,20 +168,35 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
 * Note that buffered writes and truncates both take a write lock on
 * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
 */
void
ceph_start_io_direct(struct inode *inode)
int ceph_start_io_direct(struct inode *inode)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	bool is_odirect;
	int err;

	/* Be an optimist! */
	down_read(&inode->i_rwsem);
	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
		return;
	err = down_read_killable(&inode->i_rwsem);
	if (err)
		return err;

	spin_lock(&ci->i_ceph_lock);
	/* ensure that bit state is consistent */
	smp_mb__before_atomic();
	is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
	spin_unlock(&ci->i_ceph_lock);
	if (is_odirect)
		return 0;
	up_read(&inode->i_rwsem);

	/* Slow path.... */
	down_write(&inode->i_rwsem);
	err = down_write_killable(&inode->i_rwsem);
	if (err)
		return err;

	ceph_block_buffered(ci, inode);
	downgrade_write(&inode->i_rwsem);

	return 0;
}

/**
Loading