Commit 38aa434a authored by Li Chen's avatar Li Chen Committed by Jens Axboe
Browse files

io_uring/io-wq: add exit-on-idle state



io-wq uses an idle timeout to shrink the pool, but keeps the last worker
around indefinitely to avoid churn.

For tasks that used io_uring for file I/O and then stop using io_uring,
this can leave an iou-wrk-* thread behind even after all io_uring
instances are gone. This is unnecessary overhead and also gets in the
way of process checkpoint/restore.

Add an exit-on-idle state that makes all io-wq workers exit as soon as
they become idle, and provide io_wq_set_exit_on_idle() to toggle it.

Signed-off-by: default avatarLi Chen <me@linux.beauty>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 806ae939
Loading
Loading
Loading
Loading
+25 −2
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ enum {

enum {
	IO_WQ_BIT_EXIT		= 0,	/* wq exiting */
	IO_WQ_BIT_EXIT_ON_IDLE	= 1,	/* allow all workers to exit on idle */
};

enum {
@@ -707,9 +708,13 @@ static int io_wq_worker(void *data)
		raw_spin_lock(&acct->workers_lock);
		/*
		 * Last sleep timed out. Exit if we're not the last worker,
		 * or if someone modified our affinity.
		 * or if someone modified our affinity. If wq is marked
		 * idle-exit, drop the worker as well. This is used to avoid
		 * keeping io-wq workers around for tasks that no longer have
		 * any active io_uring instances.
		 */
		if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
		if ((last_timeout && (exit_mask || acct->nr_workers > 1)) ||
		    test_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state)) {
			acct->nr_workers--;
			raw_spin_unlock(&acct->workers_lock);
			__set_current_state(TASK_RUNNING);
@@ -967,6 +972,24 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
	return false;
}

void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable)
{
	if (!wq->task)
		return;

	if (!enable) {
		clear_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state);
		return;
	}

	if (test_and_set_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state))
		return;

	rcu_read_lock();
	io_wq_for_each_worker(wq, io_wq_worker_wake, NULL);
	rcu_read_unlock();
}

static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
	do {
+1 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ struct io_wq_data {
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable);

void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);