Commit 6467dfdf authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-6.11-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "A small patchset to address bogus I/O errors and ultimately an
  assertion failure in the face of watch errors with -o exclusive
  mappings in RBD marked for stable and some assorted CephFS fixes"

* tag 'ceph-for-6.11-rc1' of https://github.com/ceph/ceph-client:
  rbd: don't assume rbd_is_lock_owner() for exclusive mappings
  rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings
  rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
  ceph: fix incorrect kmalloc size of pagevec mempool
  ceph: periodically flush the cap releases
  ceph: convert comma to semicolon in __ceph_dentry_dir_lease_touch()
  ceph: use cap_wait_list only if debugfs is enabled
parents 732c2753 3ceccb14
Loading
Loading
Loading
Loading
+15 −20
Original line number Diff line number Diff line
@@ -362,7 +362,7 @@ enum rbd_watch_state {
enum rbd_lock_state {
	RBD_LOCK_STATE_UNLOCKED,
	RBD_LOCK_STATE_LOCKED,
	RBD_LOCK_STATE_RELEASING,
	RBD_LOCK_STATE_QUIESCING,
};

/* WatchNotify::ClientId */
@@ -422,7 +422,7 @@ struct rbd_device {
	struct list_head	running_list;
	struct completion	acquire_wait;
	int			acquire_err;
	struct completion	releasing_wait;
	struct completion	quiescing_wait;

	spinlock_t		object_map_lock;
	u8			*object_map;
@@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
	lockdep_assert_held(&rbd_dev->lock_rwsem);

	return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
	       rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING;
	       rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING;
}

static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
@@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
	lockdep_assert_held(&rbd_dev->lock_rwsem);
	spin_lock(&rbd_dev->lock_lists_lock);
	if (!list_empty(&img_req->lock_item)) {
		rbd_assert(!list_empty(&rbd_dev->running_list));
		list_del_init(&img_req->lock_item);
		need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
		need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
			       list_empty(&rbd_dev->running_list));
	}
	spin_unlock(&rbd_dev->lock_lists_lock);
	if (need_wakeup)
		complete(&rbd_dev->releasing_wait);
		complete(&rbd_dev->quiescing_wait);
}

static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
	if (rbd_lock_add_request(img_req))
		return 1;

	if (rbd_dev->opts->exclusive) {
		WARN_ON(1); /* lock got released? */
		return -EROFS;
	}

	/*
	 * Note the use of mod_delayed_work() in rbd_acquire_lock()
	 * and cancel_delayed_work() in wake_lock_waiters().
@@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
	/*
	 * Ensure that all in-flight IO is flushed.
	 */
	rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
	rbd_assert(!completion_done(&rbd_dev->releasing_wait));
	rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING;
	rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
	if (list_empty(&rbd_dev->running_list))
		return true;

	up_write(&rbd_dev->lock_rwsem);
	wait_for_completion(&rbd_dev->releasing_wait);
	wait_for_completion(&rbd_dev->quiescing_wait);

	down_write(&rbd_dev->lock_rwsem);
	if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
	if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING)
		return false;

	rbd_assert(list_empty(&rbd_dev->running_list));
@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
			rbd_warn(rbd_dev, "failed to update lock cookie: %d",
				 ret);

		if (rbd_dev->opts->exclusive)
			rbd_warn(rbd_dev,
			     "temporarily releasing lock on exclusive mapping");

		/*
		 * Lock cookie cannot be updated on older OSDs, so do
		 * a manual release and queue an acquire.
@@ -5376,7 +5376,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
	INIT_LIST_HEAD(&rbd_dev->acquiring_list);
	INIT_LIST_HEAD(&rbd_dev->running_list);
	init_completion(&rbd_dev->acquire_wait);
	init_completion(&rbd_dev->releasing_wait);
	init_completion(&rbd_dev->quiescing_wait);

	spin_lock_init(&rbd_dev->object_map_lock);

@@ -6582,11 +6582,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
	if (ret)
		return ret;

	/*
	 * The lock may have been released by now, unless automatic lock
	 * transitions are disabled.
	 */
	rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
	return 0;
}

+6 −0
Original line number Diff line number Diff line
@@ -3067,10 +3067,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
				       flags, &_got);
		WARN_ON_ONCE(ret == -EAGAIN);
		if (!ret) {
#ifdef CONFIG_DEBUG_FS
			struct ceph_mds_client *mdsc = fsc->mdsc;
			struct cap_wait cw;
#endif
			DEFINE_WAIT_FUNC(wait, woken_wake_function);

#ifdef CONFIG_DEBUG_FS
			cw.ino = ceph_ino(inode);
			cw.tgid = current->tgid;
			cw.need = need;
@@ -3079,6 +3082,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
			spin_lock(&mdsc->caps_list_lock);
			list_add(&cw.list, &mdsc->cap_wait_list);
			spin_unlock(&mdsc->caps_list_lock);
#endif

			/* make sure used fmode not timeout */
			ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
@@ -3097,9 +3101,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
			remove_wait_queue(&ci->i_cap_wq, &wait);
			ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);

#ifdef CONFIG_DEBUG_FS
			spin_lock(&mdsc->caps_list_lock);
			list_del(&cw.list);
			spin_unlock(&mdsc->caps_list_lock);
#endif

			if (ret == -EAGAIN)
				continue;
+1 −1
Original line number Diff line number Diff line
@@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
	}

	spin_lock(&mdsc->dentry_list_lock);
	__dentry_dir_lease_touch(mdsc, di),
	__dentry_dir_lease_touch(mdsc, di);
	spin_unlock(&mdsc->dentry_list_lock);
}

+4 −0
Original line number Diff line number Diff line
@@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work)
		}
		mutex_unlock(&mdsc->mutex);

		ceph_flush_cap_releases(mdsc, s);

		mutex_lock(&s->s_mutex);
		if (renew_caps)
			send_renew_caps(mdsc, s);
@@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
	mdsc->last_renew_caps = jiffies;
	INIT_LIST_HEAD(&mdsc->cap_delay_list);
#ifdef CONFIG_DEBUG_FS
	INIT_LIST_HEAD(&mdsc->cap_wait_list);
#endif
	spin_lock_init(&mdsc->cap_delay_lock);
	INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
	INIT_LIST_HEAD(&mdsc->snap_flush_list);
+6 −0
Original line number Diff line number Diff line
@@ -416,6 +416,8 @@ struct ceph_quotarealm_inode {
	struct inode *inode;
};

#ifdef CONFIG_DEBUG_FS

struct cap_wait {
	struct list_head	list;
	u64			ino;
@@ -424,6 +426,8 @@ struct cap_wait {
	int			want;
};

#endif

enum {
	CEPH_MDSC_STOPPING_BEGIN = 1,
	CEPH_MDSC_STOPPING_FLUSHING = 2,
@@ -512,7 +516,9 @@ struct ceph_mds_client {
	spinlock_t	caps_list_lock;
	struct		list_head caps_list; /* unused (reserved or
						unreserved) */
#ifdef CONFIG_DEBUG_FS
	struct		list_head cap_wait_list;
#endif
	int		caps_total_count;    /* total caps allocated */
	int		caps_use_count;      /* in use */
	int		caps_use_max;	     /* max used caps */
Loading