Commit 36b9818a authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring/rsrc: don't offload node free



struct delayed_work rsrc_put_work was previously used to offload node
freeing because io_rsrc_node_ref_zero() was previously called by RCU in
the IRQ context. Now, as percpu refcounting is gone, we can do it
eagerly at the spot without pushing it to a worker.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/13fb1aac1e8d068ad8fd4a0c6d0d157ab61b90c0.1680576071.git.asml.silence@gmail.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ff7c75ec
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -330,9 +330,6 @@ struct io_ring_ctx {
	struct io_rsrc_data		*file_data;
	struct io_rsrc_data		*buf_data;

	struct delayed_work		rsrc_put_work;
	struct callback_head		rsrc_put_tw;
	struct llist_head		rsrc_put_llist;
	/* protected by ->uring_lock */
	struct list_head		rsrc_ref_list;

+0 −6
Original line number Diff line number Diff line
@@ -326,9 +326,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
	INIT_LIST_HEAD(&ctx->timeout_list);
	INIT_LIST_HEAD(&ctx->ltimeout_list);
	INIT_LIST_HEAD(&ctx->rsrc_ref_list);
	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
	init_task_work(&ctx->rsrc_put_tw, io_rsrc_put_tw);
	init_llist_head(&ctx->rsrc_put_llist);
	init_llist_head(&ctx->work_llist);
	INIT_LIST_HEAD(&ctx->tctx_list);
	ctx->submit_state.free_list.next = NULL;
@@ -2821,11 +2818,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
		io_rsrc_node_destroy(ctx->rsrc_node);
	if (ctx->rsrc_backup_node)
		io_rsrc_node_destroy(ctx->rsrc_backup_node);
	flush_delayed_work(&ctx->rsrc_put_work);
	flush_delayed_work(&ctx->fallback_work);

	WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
	WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));

#if defined(CONFIG_UNIX)
	if (ctx->ring_sock) {
+4 −55
Original line number Diff line number Diff line
@@ -145,15 +145,8 @@ static void io_rsrc_put_work_one(struct io_rsrc_data *rsrc_data,
{
	struct io_ring_ctx *ctx = rsrc_data->ctx;

	if (prsrc->tag) {
		if (ctx->flags & IORING_SETUP_IOPOLL) {
			mutex_lock(&ctx->uring_lock);
			io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
			mutex_unlock(&ctx->uring_lock);
		} else {
	if (prsrc->tag)
		io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
		}
	}
	rsrc_data->do_put(ctx, prsrc);
}

@@ -176,32 +169,6 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
		complete(&rsrc_data->done);
}

void io_rsrc_put_work(struct work_struct *work)
{
	struct io_ring_ctx *ctx;
	struct llist_node *node;

	ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work);
	node = llist_del_all(&ctx->rsrc_put_llist);

	while (node) {
		struct io_rsrc_node *ref_node;
		struct llist_node *next = node->next;

		ref_node = llist_entry(node, struct io_rsrc_node, llist);
		__io_rsrc_put_work(ref_node);
		node = next;
	}
}

void io_rsrc_put_tw(struct callback_head *cb)
{
	struct io_ring_ctx *ctx = container_of(cb, struct io_ring_ctx,
					       rsrc_put_tw);

	io_rsrc_put_work(&ctx->rsrc_put_work.work);
}

void io_wait_rsrc_data(struct io_rsrc_data *data)
{
	if (data && !atomic_dec_and_test(&data->refs))
@@ -217,34 +184,18 @@ void io_rsrc_node_ref_zero(struct io_rsrc_node *node)
	__must_hold(&node->rsrc_data->ctx->uring_lock)
{
	struct io_ring_ctx *ctx = node->rsrc_data->ctx;
	bool first_add = false;
	unsigned long delay = HZ;

	node->done = true;

	/* if we are mid-quiesce then do not delay */
	if (node->rsrc_data->quiesce)
		delay = 0;

	while (!list_empty(&ctx->rsrc_ref_list)) {
		node = list_first_entry(&ctx->rsrc_ref_list,
					    struct io_rsrc_node, node);
		/* recycle ref nodes in order */
		if (!node->done)
			break;
		list_del(&node->node);
		first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist);
	}

	if (!first_add)
		return;

	if (ctx->submitter_task) {
		if (!task_work_add(ctx->submitter_task, &ctx->rsrc_put_tw,
				   ctx->notify_method))
			return;
		list_del(&node->node);
		__io_rsrc_put_work(node);
	}
	mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay);
}

static struct io_rsrc_node *io_rsrc_node_alloc(void)
@@ -320,13 +271,11 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
		if (ret < 0) {
			atomic_inc(&data->refs);
			/* wait for all works potentially completing data->done */
			flush_delayed_work(&ctx->rsrc_put_work);
			reinit_completion(&data->done);
			mutex_lock(&ctx->uring_lock);
			break;
		}

		flush_delayed_work(&ctx->rsrc_put_work);
		ret = wait_for_completion_interruptible(&data->done);
		if (!ret) {
			mutex_lock(&ctx->uring_lock);