Commit 0b2b066f authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring/io-wq: only create a new worker if it can make progress



Hashed work is serialized by io-wq, intended to be used for cases like
serializing buffered writes to a regular file, where the file system
will serialize the workers anyway with a mutex or similar. Since they
would be forcibly serialized and blocked, it's more efficient for io-wq
to handle these individually rather than issue them in parallel.

If a worker is currently handling a hashed work item and gets blocked,
don't create a new worker if the next work item is also hashed and
mapped to the same bucket. That new worker would not be able to make any
progress anyway.

Reported-by: default avatarFengnan Chang <changfengnan@bytedance.com>
Reported-by: default avatarDiangang Li <lidiangang@bytedance.com>
Link: https://lore.kernel.org/io-uring/20250522090909.73212-1-changfengnan@bytedance.com/


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8343cae3
Loading
Loading
Loading
Loading
+28 −0
Original line number Diff line number Diff line
@@ -419,6 +419,30 @@ static bool io_queue_worker_create(struct io_worker *worker,
	return false;
}

/* Defer if current and next work are both hashed to the same chain */
static bool io_wq_hash_defer(struct io_wq_work *work, struct io_wq_acct *acct)
{
	unsigned int hash, work_flags;
	struct io_wq_work *next;

	lockdep_assert_held(&acct->lock);

	work_flags = atomic_read(&work->flags);
	if (!__io_wq_is_hashed(work_flags))
		return false;

	/* should not happen, io_acct_run_queue() said we had work */
	if (wq_list_empty(&acct->work_list))
		return true;

	hash = __io_get_work_hash(work_flags);
	next = container_of(acct->work_list.first, struct io_wq_work, list);
	work_flags = atomic_read(&next->flags);
	if (!__io_wq_is_hashed(work_flags))
		return false;
	return hash == __io_get_work_hash(work_flags);
}

static void io_wq_dec_running(struct io_worker *worker)
{
	struct io_wq_acct *acct = io_wq_get_acct(worker);
@@ -433,6 +457,10 @@ static void io_wq_dec_running(struct io_worker *worker)
		return;
	if (!io_acct_run_queue(acct))
		return;
	if (io_wq_hash_defer(worker->cur_work, acct)) {
		raw_spin_unlock(&acct->lock);
		return;
	}

	raw_spin_unlock(&acct->lock);
	atomic_inc(&acct->nr_running);