futex/pi: Fix recursive rt_mutex waiter state (fbeb558b) · Commits · git / linux-net

kernel/futex/pi.c

+48 −28

Original line number	Diff line number	Diff line
		@@ -611,29 +611,16 @@ int futex_lock_pi_atomic(u32 __user uaddr, struct futex_hash_bucket hb,
		/*
		* Caller must hold a reference on @pi_state.
		*/
		static int wake_futex_pi(u32 __user uaddr, u32 uval, struct futex_pi_state pi_state)
		static int wake_futex_pi(u32 __user *uaddr, u32 uval,
		struct futex_pi_state *pi_state,
		struct rt_mutex_waiter *top_waiter)
		{
		struct rt_mutex_waiter *top_waiter;
		struct task_struct *new_owner;
		bool postunlock = false;
		DEFINE_RT_WAKE_Q(wqh);
		u32 curval, newval;
		int ret = 0;

		top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
		if (WARN_ON_ONCE(!top_waiter)) {
		/*
		* As per the comment in futex_unlock_pi() this should not happen.
		*
		* When this happens, give up our locks and try again, giving
		* the futex_lock_pi() instance time to complete, either by
		* waiting on the rtmutex or removing itself from the futex
		* queue.
		*/
		ret = -EAGAIN;
		goto out_unlock;
		}

		new_owner = top_waiter->task;

		/*
		@@ -1046,19 +1033,33 @@ int futex_lock_pi(u32 __user uaddr, unsigned int flags, ktime_t time, int tryl
		ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);

		cleanup:
		spin_lock(q.lock_ptr);
		/*
		* If we failed to acquire the lock (deadlock/signal/timeout), we must
		* first acquire the hb->lock before removing the lock from the
		* rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
		* lists consistent.
		* must unwind the above, however we canont lock hb->lock because
		* rt_mutex already has a waiter enqueued and hb->lock can itself try
		* and enqueue an rt_waiter through rtlock.
		*
		* Doing the cleanup without holding hb->lock can cause inconsistent
		* state between hb and pi_state, but only in the direction of not
		* seeing a waiter that is leaving.
		*
		* See futex_unlock_pi(), it deals with this inconsistency.
		*
		* There be dragons here, since we must deal with the inconsistency on
		* the way out (here), it is impossible to detect/warn about the race
		* the other way around (missing an incoming waiter).
		*
		* In particular; it is important that futex_unlock_pi() can not
		* observe this inconsistency.
		* What could possibly go wrong...
		*/
		if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
		ret = 0;

		/*
		* Now that the rt_waiter has been dequeued, it is safe to use
		* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
		* the
		*/
		spin_lock(q.lock_ptr);
		/*
		* Waiter is unqueued.
		*/
		@@ -1143,6 +1144,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
		top_waiter = futex_top_waiter(hb, &key);
		if (top_waiter) {
		struct futex_pi_state *pi_state = top_waiter->pi_state;
		struct rt_mutex_waiter *rt_waiter;

		ret = -EINVAL;
		if (!pi_state)
		@@ -1155,22 +1157,39 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
		if (pi_state->owner != current)
		goto out_unlock;

		get_pi_state(pi_state);
		/*
		* By taking wait_lock while still holding hb->lock, we ensure
		* there is no point where we hold neither; and therefore
		* wake_futex_p() must observe a state consistent with what we
		* observed.
		* there is no point where we hold neither; and thereby
		* wake_futex_pi() must observe any new waiters.
		*
		* Since the cleanup: case in futex_lock_pi() removes the
		* rt_waiter without holding hb->lock, it is possible for
		* wake_futex_pi() to not find a waiter while the above does,
		* in this case the waiter is on the way out and it can be
		* ignored.
		*
		* In particular; this forces __rt_mutex_start_proxy() to
		* complete such that we're guaranteed to observe the
		* rt_waiter. Also see the WARN in wake_futex_pi().
		* rt_waiter.
		*/
		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);

		/*
		* Futex vs rt_mutex waiter state -- if there are no rt_mutex
		* waiters even though futex thinks there are, then the waiter
		* is leaving and the uncontended path is safe to take.
		*/
		rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
		if (!rt_waiter) {
		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
		goto do_uncontended;
		}

		get_pi_state(pi_state);
		spin_unlock(&hb->lock);

		/* drops pi_state->pi_mutex.wait_lock */
		ret = wake_futex_pi(uaddr, uval, pi_state);
		ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);

		put_pi_state(pi_state);

		@@ -1198,6 +1217,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
		return ret;
		}

		do_uncontended:
		/*
		* We have no kernel internal state, i.e. no waiters in the
		* kernel. Waiters which are about to queue themselves are stuck

kernel/futex/requeue.c

+4 −2

Original line number	Diff line number	Diff line
		@@ -850,11 +850,13 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
		pi_mutex = &q.pi_state->pi_mutex;
		ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);

		/* Current is not longer pi_blocked_on */
		spin_lock(q.lock_ptr);
		/*
		* See futex_unlock_pi()'s cleanup: comment.
		*/
		if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
		ret = 0;

		spin_lock(q.lock_ptr);
		debug_rt_mutex_free_waiter(&rt_waiter);
		/*
		* Fixup the pi_state owner and possibly acquire the lock if we