Commit 5d4740fc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.2-2022-12-19' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Improve the locking for timeouts. This was originally queued up for
   the initial pull, but I messed up and it got missed. (Pavel)

 - Fix an issue with running task_work from the wait path, causing some
   inefficiencies (me)

 - Add a clear of ->free_iov upfront in the 32-bit compat data
   importing, so we ensure that it's always sane at completion time (me)

 - Use call_rcu_hurry() for the eventfd signaling (Dylan)

 - Ordering fix for multishot recv completions (Pavel)

 - Add the io_uring trace header to the MAINTAINERS entry (Ammar)

* tag 'io_uring-6.2-2022-12-19' of git://git.kernel.dk/linux:
  MAINTAINERS: io_uring: Add include/trace/events/io_uring.h
  io_uring/net: fix cleanup after recycle
  io_uring/net: ensure compat import handlers clear free_iov
  io_uring: include task_work run after scheduling in wait for events
  io_uring: don't use TIF_NOTIFY_SIGNAL to test for availability of task_work
  io_uring: use call_rcu_hurry if signaling an eventfd
  io_uring: fix overflow handling regression
  io_uring: ease timeout flush locking requirements
  io_uring: revise completion_lock locking
  io_uring: protect cq_timeouts with timeout_lock
parents 0a924817 5ad70eb2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10878,6 +10878,7 @@ T: git git://git.kernel.dk/liburing
F:	io_uring/
F:	include/linux/io_uring.h
F:	include/linux/io_uring_types.h
F:	include/trace/events/io_uring.h
F:	include/uapi/linux/io_uring.h
F:	tools/io_uring/
+30 −10
Original line number Diff line number Diff line
@@ -538,7 +538,7 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
	} else {
		atomic_inc(&ev_fd->refs);
		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
			call_rcu(&ev_fd->rcu, io_eventfd_ops);
			call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
		else
			atomic_dec(&ev_fd->refs);
	}
@@ -572,11 +572,10 @@ static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)

void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
	if (ctx->off_timeout_used || ctx->drain_active) {
		spin_lock(&ctx->completion_lock);
	if (ctx->off_timeout_used)
		io_flush_timeouts(ctx);
		if (ctx->drain_active)
	if (ctx->drain_active) {
		spin_lock(&ctx->completion_lock);
		io_queue_deferred(ctx);
		spin_unlock(&ctx->completion_lock);
	}
@@ -597,6 +596,18 @@ static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
		spin_unlock(&ctx->completion_lock);
}

static inline void io_cq_lock(struct io_ring_ctx *ctx)
	__acquires(ctx->completion_lock)
{
	spin_lock(&ctx->completion_lock);
}

static inline void io_cq_unlock(struct io_ring_ctx *ctx)
	__releases(ctx->completion_lock)
{
	spin_unlock(&ctx->completion_lock);
}

/* keep it inlined for io_submit_flush_completions() */
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
	__releases(ctx->completion_lock)
@@ -916,7 +927,7 @@ static void __io_req_complete_post(struct io_kiocb *req)

	io_cq_lock(ctx);
	if (!(req->flags & REQ_F_CQE_SKIP))
		__io_fill_cqe_req(ctx, req);
		io_fill_cqe_req(ctx, req);

	/*
	 * If we're the last reference to this request, add to our locked
@@ -1074,9 +1085,9 @@ static void __io_req_find_next_prep(struct io_kiocb *req)
{
	struct io_ring_ctx *ctx = req->ctx;

	io_cq_lock(ctx);
	spin_lock(&ctx->completion_lock);
	io_disarm_next(req);
	io_cq_unlock_post(ctx);
	spin_unlock(&ctx->completion_lock);
}

static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
@@ -2470,7 +2481,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
	}
	if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
		return -ETIME;
	return 1;

	/*
	 * Run task_work after scheduling. If we got woken because of
	 * task_work being processed, run it now rather than let the caller
	 * do another wait loop.
	 */
	ret = io_run_task_work_sig(ctx);
	return ret < 0 ? ret : 1;
}

/*
@@ -2535,6 +2553,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
		prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
						TASK_INTERRUPTIBLE);
		ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
		if (__io_cqring_events_user(ctx) >= min_events)
			break;
		cond_resched();
	} while (ret > 0);

+1 −13
Original line number Diff line number Diff line
@@ -87,17 +87,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
#define io_for_each_link(pos, head) \
	for (pos = (head); pos; pos = pos->link)

static inline void io_cq_lock(struct io_ring_ctx *ctx)
	__acquires(ctx->completion_lock)
{
	spin_lock(&ctx->completion_lock);
}

static inline void io_cq_unlock(struct io_ring_ctx *ctx)
{
	spin_unlock(&ctx->completion_lock);
}

void io_cq_unlock_post(struct io_ring_ctx *ctx);

static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
@@ -277,8 +266,7 @@ static inline int io_run_task_work(void)

static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
{
	return test_thread_flag(TIF_NOTIFY_SIGNAL) ||
		!wq_list_empty(&ctx->work_llist);
	return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
}

static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
+2 −1
Original line number Diff line number Diff line
@@ -494,6 +494,7 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
	if (req->flags & REQ_F_BUFFER_SELECT) {
		compat_ssize_t clen;

		iomsg->free_iov = NULL;
		if (msg.msg_iovlen == 0) {
			sr->len = 0;
		} else if (msg.msg_iovlen > 1) {
@@ -819,10 +820,10 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
		goto retry_multishot;

	if (mshot_finished) {
		io_netmsg_recycle(req, issue_flags);
		/* fast path, check for non-NULL to avoid function call */
		if (kmsg->free_iov)
			kfree(kmsg->free_iov);
		io_netmsg_recycle(req, issue_flags);
		req->flags &= ~REQ_F_NEED_CLEANUP;
	}

+1 −1
Original line number Diff line number Diff line
@@ -1062,7 +1062,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
			continue;

		req->cqe.flags = io_put_kbuf(req, 0);
		__io_fill_cqe_req(req->ctx, req);
		io_fill_cqe_req(req->ctx, req);
	}

	if (unlikely(!nr_events))
Loading