Commit 0958d657 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'wq-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fix from Tejun Heo:

 - Fix false positive stall reports on weakly ordered architectures
   where the lockless worklist/timestamp check in the watchdog can
   observe stale values due to memory reordering.

   Recheck under pool->lock to confirm.

* tag 'wq-for-7.0-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Better describe stall check
  workqueue: Fix false positive stall reports
parents 53d85a20 e398978d
Loading
Loading
Loading
Loading
+22 −3
Original line number Diff line number Diff line
@@ -7699,8 +7699,29 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
		else
			ts = touched;

		/* did we stall? */
		/*
		 * Did we stall?
		 *
		 * Do a lockless check first to do not disturb the system.
		 *
		 * Prevent false positives by double checking the timestamp
		 * under pool->lock. The lock makes sure that the check reads
		 * an updated pool->last_progress_ts when this CPU saw
		 * an already updated pool->worklist above. It seems better
		 * than adding another barrier into __queue_work() which
		 * is a hotter path.
		 */
		if (time_after(now, ts + thresh)) {
			scoped_guard(raw_spinlock_irqsave, &pool->lock) {
				pool_ts = pool->last_progress_ts;
				if (time_after(pool_ts, touched))
					ts = pool_ts;
				else
					ts = touched;
			}
			if (!time_after(now, ts + thresh))
				continue;

			lockup_detected = true;
			stall_time = jiffies_to_msecs(now - pool_ts) / 1000;
			max_stall_time = max(max_stall_time, stall_time);
@@ -7712,8 +7733,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
			pr_cont_pool_info(pool);
			pr_cont(" stuck for %us!\n", stall_time);
		}


	}

	if (lockup_detected)