Commit b6f58a3f authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring: move struct io_kiocb from task_struct to io_uring_task



Rather than store the task_struct itself in struct io_kiocb, store
the io_uring specific task_struct. The life times are the same in terms
of io_uring, and this avoids doing some dereferences through the
task_struct. For the hot path of putting local task references, we can
deref req->tctx instead, which we'll need anyway in that function
regardless of whether it's local or remote references.

This is mostly straight forward, except the original task PF_EXITING
check needs a bit of tweaking. task_work is _always_ run from the
originating task, except in the fallback case, where it's run from a
kernel thread. Replace the potentially racy (in case of fallback work)
checks for req->task->flags with current->flags. It's either the still
the original task, in which case PF_EXITING will be sane, or it has
PF_KTHREAD set, in which case it's fallback work. Both cases should
prevent moving forward with the given request.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 6ed368cc
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,

static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
	return cmd_to_io_kiocb(cmd)->task;
	return cmd_to_io_kiocb(cmd)->tctx->task;
}

#endif /* _LINUX_IO_URING_CMD_H */
+2 −1
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@ struct io_uring_task {
	/* submission side */
	int				cached_refs;
	const struct io_ring_ctx 	*last;
	struct task_struct		*task;
	struct io_wq			*io_wq;
	struct file			*registered_rings[IO_RINGFD_REG_MAX];

@@ -625,7 +626,7 @@ struct io_kiocb {
	struct io_cqe			cqe;

	struct io_ring_ctx		*ctx;
	struct task_struct		*task;
	struct io_uring_task		*tctx;

	union {
		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+1 −1
Original line number Diff line number Diff line
@@ -205,7 +205,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
		.opcode	= cancel->opcode,
		.seq	= atomic_inc_return(&req->ctx->cancel_seq),
	};
	struct io_uring_task *tctx = req->task->io_uring;
	struct io_uring_task *tctx = req->tctx;
	int ret;

	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
+1 −1
Original line number Diff line number Diff line
@@ -203,7 +203,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)

		hlist_for_each_entry(req, &hb->list, hash_node)
			seq_printf(m, "  op=%d, task_works=%d\n", req->opcode,
					task_work_pending(req->task));
					task_work_pending(req->tctx->task));
	}

	if (has_lock)
+15 −19
Original line number Diff line number Diff line
@@ -206,7 +206,7 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
{
	bool matched;

	if (tctx && head->task->io_uring != tctx)
	if (tctx && head->tctx != tctx)
		return false;
	if (cancel_all)
		return true;
@@ -407,11 +407,8 @@ static void io_clean_op(struct io_kiocb *req)
		kfree(req->apoll);
		req->apoll = NULL;
	}
	if (req->flags & REQ_F_INFLIGHT) {
		struct io_uring_task *tctx = req->task->io_uring;

		atomic_dec(&tctx->inflight_tracked);
	}
	if (req->flags & REQ_F_INFLIGHT)
		atomic_dec(&req->tctx->inflight_tracked);
	if (req->flags & REQ_F_CREDS)
		put_cred(req->creds);
	if (req->flags & REQ_F_ASYNC_DATA) {
@@ -425,7 +422,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req)
{
	if (!(req->flags & REQ_F_INFLIGHT)) {
		req->flags |= REQ_F_INFLIGHT;
		atomic_inc(&req->task->io_uring->inflight_tracked);
		atomic_inc(&req->tctx->inflight_tracked);
	}
}

@@ -514,7 +511,7 @@ static void io_prep_async_link(struct io_kiocb *req)
static void io_queue_iowq(struct io_kiocb *req)
{
	struct io_kiocb *link = io_prep_linked_timeout(req);
	struct io_uring_task *tctx = req->task->io_uring;
	struct io_uring_task *tctx = req->tctx;

	BUG_ON(!tctx);
	BUG_ON(!tctx->io_wq);
@@ -529,7 +526,7 @@ static void io_queue_iowq(struct io_kiocb *req)
	 * procedure rather than attempt to run this request (or create a new
	 * worker for it).
	 */
	if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
	if (WARN_ON_ONCE(!same_thread_group(tctx->task, current)))
		atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);

	trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
@@ -678,17 +675,17 @@ static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
}

/* must to be called somewhat shortly after putting a request */
static inline void io_put_task(struct task_struct *task)
static inline void io_put_task(struct io_kiocb *req)
{
	struct io_uring_task *tctx = task->io_uring;
	struct io_uring_task *tctx = req->tctx;

	if (likely(task == current)) {
	if (likely(tctx->task == current)) {
		tctx->cached_refs++;
	} else {
		percpu_counter_sub(&tctx->inflight, 1);
		if (unlikely(atomic_read(&tctx->in_cancel)))
			wake_up(&tctx->wait);
		put_task_struct(task);
		put_task_struct(tctx->task);
	}
}

@@ -1207,7 +1204,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req,

static void io_req_normal_work_add(struct io_kiocb *req)
{
	struct io_uring_task *tctx = req->task->io_uring;
	struct io_uring_task *tctx = req->tctx;
	struct io_ring_ctx *ctx = req->ctx;

	/* task_work already pending, we're done */
@@ -1226,7 +1223,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
		return;
	}

	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
	if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method)))
		return;

	io_fallback_tw(tctx, false);
@@ -1343,8 +1340,7 @@ static void io_req_task_cancel(struct io_kiocb *req, struct io_tw_state *ts)
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
{
	io_tw_lock(req->ctx, ts);
	/* req->task == current here, checking PF_EXITING is safe */
	if (unlikely(req->task->flags & PF_EXITING))
	if (unlikely(io_should_terminate_tw()))
		io_req_defer_failed(req, -EFAULT);
	else if (req->flags & REQ_F_FORCE_ASYNC)
		io_queue_iowq(req);
@@ -1403,7 +1399,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
		}
		io_put_file(req);
		io_req_put_rsrc_nodes(req);
		io_put_task(req->task);
		io_put_task(req);

		node = req->comp_list.next;
		io_req_add_to_cache(req, ctx);
@@ -2019,7 +2015,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
	req->flags = (__force io_req_flags_t) sqe_flags;
	req->cqe.user_data = READ_ONCE(sqe->user_data);
	req->file = NULL;
	req->task = current;
	req->tctx = current->io_uring;
	req->cancel_seq_set = false;

	if (unlikely(opcode >= IORING_OP_LAST)) {
Loading