Commit 894d1b3d authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

locking/mutex: Remove wakeups from under mutex::wait_lock



In preparation to nest mutex::wait_lock under rq::lock we need
to remove wakeups from under it.

Do this by utilizing wake_qs to defer the wakeup until after the
lock is dropped.

[Heavily changed after 55f036ca ("locking: WW mutex cleanup") and
08295b3b ("locking: Implement an algorithm choice for Wound-Wait
mutexes")]
[jstultz: rebased to mainline, added extra wake_up_q & init
 to avoid hangs, similar to Connor's rework of this patch]

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarJuri Lelli <juri.lelli@redhat.com>
Signed-off-by: default avatarJohn Stultz <jstultz@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarMetin Kaya <metin.kaya@arm.com>
Acked-by: default avatarDavidlohr Bueso <dave@stgolabs.net>
Tested-by: default avatarK Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: default avatarMetin Kaya <metin.kaya@arm.com>
Link: https://lore.kernel.org/r/20241009235352.1614323-2-jstultz@google.com
parent 7e019dcc
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
	struct rt_mutex_waiter rt_waiter;
	struct futex_hash_bucket *hb;
	struct futex_q q = futex_q_init;
	DEFINE_WAKE_Q(wake_q);
	int res, ret;

	if (!IS_ENABLED(CONFIG_FUTEX_PI))
@@ -1018,8 +1019,11 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
	 * such that futex_unlock_pi() is guaranteed to observe the waiter when
	 * it sees the futex_q::pi_state.
	 */
	ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
	ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
	preempt_disable();
	raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
	wake_up_q(&wake_q);
	preempt_enable();

	if (ret) {
		if (ret == 1)
+12 −4
Original line number Diff line number Diff line
@@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
		    struct lockdep_map *nest_lock, unsigned long ip,
		    struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
	DEFINE_WAKE_Q(wake_q);
	struct mutex_waiter waiter;
	struct ww_mutex *ww;
	int ret;
@@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
	 */
	if (__mutex_trylock(lock)) {
		if (ww_ctx)
			__ww_mutex_check_waiters(lock, ww_ctx);
			__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);

		goto skip_wait;
	}
@@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
		 * Add in stamp order, waking up waiters that must kill
		 * themselves.
		 */
		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q);
		if (ret)
			goto err_early_kill;
	}
@@ -681,6 +682,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
		}

		raw_spin_unlock(&lock->wait_lock);
		/* Make sure we do wakeups before calling schedule */
		wake_up_q(&wake_q);
		wake_q_init(&wake_q);

		schedule_preempt_disabled();

		first = __mutex_waiter_is_first(lock, &waiter);
@@ -714,7 +719,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
		 */
		if (!ww_ctx->is_wait_die &&
		    !__mutex_waiter_is_first(lock, &waiter))
			__ww_mutex_check_waiters(lock, ww_ctx);
			__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
	}

	__mutex_remove_waiter(lock, &waiter);
@@ -730,6 +735,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
		ww_mutex_lock_acquired(ww, ww_ctx);

	raw_spin_unlock(&lock->wait_lock);
	wake_up_q(&wake_q);
	preempt_enable();
	return 0;

@@ -741,6 +747,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
	raw_spin_unlock(&lock->wait_lock);
	debug_mutex_free_waiter(&waiter);
	mutex_release(&lock->dep_map, ip);
	wake_up_q(&wake_q);
	preempt_enable();
	return ret;
}
@@ -951,9 +958,10 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
	if (owner & MUTEX_FLAG_HANDOFF)
		__mutex_handoff(lock, next);

	preempt_disable();
	raw_spin_unlock(&lock->wait_lock);

	wake_up_q(&wake_q);
	preempt_enable();
}

#ifndef CONFIG_DEBUG_LOCK_ALLOC
+37 −14
Original line number Diff line number Diff line
@@ -34,13 +34,15 @@

static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
					struct rt_mutex *lock,
					struct ww_acquire_ctx *ww_ctx)
					struct ww_acquire_ctx *ww_ctx,
					struct wake_q_head *wake_q)
{
	return 0;
}

static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
					    struct ww_acquire_ctx *ww_ctx)
					    struct ww_acquire_ctx *ww_ctx,
					    struct wake_q_head *wake_q)
{
}

@@ -1201,7 +1203,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
					   struct rt_mutex_waiter *waiter,
					   struct task_struct *task,
					   struct ww_acquire_ctx *ww_ctx,
					   enum rtmutex_chainwalk chwalk)
					   enum rtmutex_chainwalk chwalk,
					   struct wake_q_head *wake_q)
{
	struct task_struct *owner = rt_mutex_owner(lock);
	struct rt_mutex_waiter *top_waiter = waiter;
@@ -1245,7 +1248,10 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,

		/* Check whether the waiter should back out immediately */
		rtm = container_of(lock, struct rt_mutex, rtmutex);
		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
		preempt_disable();
		res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
		wake_up_q(wake_q);
		preempt_enable();
		if (res) {
			raw_spin_lock(&task->pi_lock);
			rt_mutex_dequeue(lock, waiter);
@@ -1674,12 +1680,14 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
 * @state:	The task state for sleeping
 * @chwalk:	Indicator whether full or partial chainwalk is requested
 * @waiter:	Initializer waiter for blocking
 * @wake_q:	The wake_q to wake tasks after we release the wait_lock
 */
static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
				       struct ww_acquire_ctx *ww_ctx,
				       unsigned int state,
				       enum rtmutex_chainwalk chwalk,
				       struct rt_mutex_waiter *waiter)
				       struct rt_mutex_waiter *waiter,
				       struct wake_q_head *wake_q)
{
	struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
	struct ww_mutex *ww = ww_container_of(rtm);
@@ -1690,7 +1698,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
	/* Try to acquire the lock again: */
	if (try_to_take_rt_mutex(lock, current, NULL)) {
		if (build_ww_mutex() && ww_ctx) {
			__ww_mutex_check_waiters(rtm, ww_ctx);
			__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
			ww_mutex_lock_acquired(ww, ww_ctx);
		}
		return 0;
@@ -1700,7 +1708,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,

	trace_contention_begin(lock, LCB_F_RT);

	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
	ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q);
	if (likely(!ret))
		ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);

@@ -1708,7 +1716,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
		/* acquired the lock */
		if (build_ww_mutex() && ww_ctx) {
			if (!ww_ctx->is_wait_die)
				__ww_mutex_check_waiters(rtm, ww_ctx);
				__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
			ww_mutex_lock_acquired(ww, ww_ctx);
		}
	} else {
@@ -1730,7 +1738,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,

static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
					     struct ww_acquire_ctx *ww_ctx,
					     unsigned int state)
					     unsigned int state,
					     struct wake_q_head *wake_q)
{
	struct rt_mutex_waiter waiter;
	int ret;
@@ -1739,7 +1748,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
	waiter.ww_ctx = ww_ctx;

	ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
				  &waiter);
				  &waiter, wake_q);

	debug_rt_mutex_free_waiter(&waiter);
	return ret;
@@ -1755,6 +1764,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
				     struct ww_acquire_ctx *ww_ctx,
				     unsigned int state)
{
	DEFINE_WAKE_Q(wake_q);
	unsigned long flags;
	int ret;

@@ -1776,8 +1786,11 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
	 * irqsave/restore variants.
	 */
	raw_spin_lock_irqsave(&lock->wait_lock, flags);
	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
	ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q);
	preempt_disable();
	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
	wake_up_q(&wake_q);
	preempt_enable();
	rt_mutex_post_schedule();

	return ret;
@@ -1803,8 +1816,10 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
/**
 * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
 * @lock:	The underlying RT mutex
 * @wake_q:	The wake_q to wake tasks after we release the wait_lock
 */
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
					   struct wake_q_head *wake_q)
{
	struct rt_mutex_waiter waiter;
	struct task_struct *owner;
@@ -1821,7 +1836,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)

	trace_contention_begin(lock, LCB_F_RT);

	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
	task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q);

	for (;;) {
		/* Try to acquire the lock again */
@@ -1832,7 +1847,11 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
			owner = rt_mutex_owner(lock);
		else
			owner = NULL;
		preempt_disable();
		raw_spin_unlock_irq(&lock->wait_lock);
		wake_up_q(wake_q);
		wake_q_init(wake_q);
		preempt_enable();

		if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
			schedule_rtlock();
@@ -1857,10 +1876,14 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
{
	unsigned long flags;
	DEFINE_WAKE_Q(wake_q);

	raw_spin_lock_irqsave(&lock->wait_lock, flags);
	rtlock_slowlock_locked(lock);
	rtlock_slowlock_locked(lock, &wake_q);
	preempt_disable();
	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
	wake_up_q(&wake_q);
	preempt_enable();
}

#endif /* RT_MUTEX_BUILD_SPINLOCKS */
+9 −3
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
 * @lock:		the rt_mutex to take
 * @waiter:		the pre-initialized rt_mutex_waiter
 * @task:		the task to prepare
 * @wake_q:		the wake_q to wake tasks after we release the wait_lock
 *
 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
@@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
 */
int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
					struct rt_mutex_waiter *waiter,
					struct task_struct *task)
					struct task_struct *task,
					struct wake_q_head *wake_q)
{
	int ret;

@@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,

	/* We enforce deadlock detection for futexes */
	ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL,
				      RT_MUTEX_FULL_CHAINWALK);
				      RT_MUTEX_FULL_CHAINWALK, wake_q);

	if (ret && !rt_mutex_owner(lock)) {
		/*
@@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
				      struct task_struct *task)
{
	int ret;
	DEFINE_WAKE_Q(wake_q);

	raw_spin_lock_irq(&lock->wait_lock);
	ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
	ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
	if (unlikely(ret))
		remove_waiter(lock, waiter);
	preempt_disable();
	raw_spin_unlock_irq(&lock->wait_lock);
	wake_up_q(&wake_q);
	preempt_enable();

	return ret;
}
+2 −1
Original line number Diff line number Diff line
@@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
				     struct rt_mutex_waiter *waiter,
				     struct task_struct *task);
				     struct task_struct *task,
				     struct wake_q_head *);
extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
				     struct rt_mutex_waiter *waiter,
				     struct task_struct *task);
Loading