Commit 8c930747 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Fix a syzbot issue for the msg ring cache added in this release. No
   ill effects from this one, but it did make KMSAN unhappy (me)

 - Sanitize the NAPI timeout handling, by unifying the value handling
   into all ktime_t rather than converting back and forth (Pavel)

 - Fail NAPI registration for IOPOLL rings, it's not supported (Pavel)

 - Fix a theoretical issue with ring polling and cancelations (Pavel)

 - Various little cleanups and fixes (Pavel)

* tag 'io_uring-6.11-20240726' of git://git.kernel.dk/linux:
  io_uring/napi: pass ktime to io_napi_adjust_timeout
  io_uring/napi: use ktime in busy polling
  io_uring/msg_ring: fix uninitialized use of target_req->flags
  io_uring: align iowq and task request error handling
  io_uring: kill REQ_F_CANCEL_SEQ
  io_uring: simplify io_uring_cmd return
  io_uring: fix io_match_task must_hold
  io_uring: don't allow netpolling with SETUP_IOPOLL
  io_uring: tighten task exit cancellations
parents bc4eee85 35816961
Loading
Loading
Loading
Loading
+1 −4
Original line number Diff line number Diff line
@@ -404,7 +404,7 @@ struct io_ring_ctx {
	spinlock_t		napi_lock;	/* napi_list lock */

	/* napi busy poll default timeout */
	unsigned int		napi_busy_poll_to;
	ktime_t			napi_busy_poll_dt;
	bool			napi_prefer_busy_poll;
	bool			napi_enabled;

@@ -461,7 +461,6 @@ enum {
	REQ_F_SUPPORT_NOWAIT_BIT,
	REQ_F_ISREG_BIT,
	REQ_F_POLL_NO_LAZY_BIT,
	REQ_F_CANCEL_SEQ_BIT,
	REQ_F_CAN_POLL_BIT,
	REQ_F_BL_EMPTY_BIT,
	REQ_F_BL_NO_RECYCLE_BIT,
@@ -536,8 +535,6 @@ enum {
	REQ_F_HASH_LOCKED	= IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
	/* don't use lazy poll wake for this request */
	REQ_F_POLL_NO_LAZY	= IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
	/* cancel sequence is set and valid */
	REQ_F_CANCEL_SEQ	= IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT),
	/* file is pollable */
	REQ_F_CAN_POLL		= IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
	/* buffer list was empty after selection of buffer */
+9 −4
Original line number Diff line number Diff line
@@ -1849,7 +1849,7 @@ void io_wq_submit_work(struct io_wq_work *work)
	} while (1);

	/* avoid locking problems by failing it from a clean context */
	if (ret < 0)
	if (ret)
		io_req_task_queue_fail(req, ret);
}

@@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,

	if (uts) {
		struct timespec64 ts;
		ktime_t dt;

		if (get_timespec64(&ts, uts))
			return -EFAULT;

		iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
		io_napi_adjust_timeout(ctx, &iowq, &ts);
		dt = timespec64_to_ktime(ts);
		iowq.timeout = ktime_add(dt, ktime_get());
		io_napi_adjust_timeout(ctx, &iowq, dt);
	}

	if (sig) {
@@ -3031,8 +3033,11 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
		bool loop = false;

		io_uring_drop_tctx_refs(current);
		if (!tctx_inflight(tctx, !cancel_all))
			break;

		/* read completions before cancelations */
		inflight = tctx_inflight(tctx, !cancel_all);
		inflight = tctx_inflight(tctx, false);
		if (!inflight)
			break;

+1 −1
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ struct io_wait_queue {
	ktime_t timeout;

#ifdef CONFIG_NET_RX_BUSY_POLL
	unsigned int napi_busy_poll_to;
	ktime_t napi_busy_poll_dt;
	bool napi_prefer_busy_poll;
#endif
};
+3 −3
Original line number Diff line number Diff line
@@ -110,10 +110,10 @@ static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
	if (spin_trylock(&ctx->msg_lock)) {
		req = io_alloc_cache_get(&ctx->msg_cache);
		spin_unlock(&ctx->msg_lock);
	}
		if (req)
			return req;
	return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN);
	}
	return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
}

static int io_msg_data_remote(struct io_kiocb *req)
+30 −30
Original line number Diff line number Diff line
@@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
	return NULL;
}

static inline ktime_t net_to_ktime(unsigned long t)
{
	/* napi approximating usecs, reverse busy_loop_current_time */
	return ns_to_ktime(t << 10);
}

void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{
	struct hlist_head *hash_list;
@@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
		__io_napi_remove_stale(ctx);
}

static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
					     unsigned long bp_usec)
static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
					     ktime_t bp)
{
	if (bp_usec) {
		unsigned long end_time = start_time + bp_usec;
		unsigned long now = busy_loop_current_time();
	if (bp) {
		ktime_t end_time = ktime_add(start_time, bp);
		ktime_t now = net_to_ktime(busy_loop_current_time());

		return time_after(now, end_time);
		return ktime_after(now, end_time);
	}

	return true;
@@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data,
		return true;
	if (io_should_wake(iowq) || io_has_work(iowq->ctx))
		return true;
	if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
	if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
				      iowq->napi_busy_poll_dt))
		return true;

	return false;
@@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
 */
void io_napi_init(struct io_ring_ctx *ctx)
{
	u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;

	INIT_LIST_HEAD(&ctx->napi_list);
	spin_lock_init(&ctx->napi_lock);
	ctx->napi_prefer_busy_poll = false;
	ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
	ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
}

/*
@@ -217,11 +226,13 @@ void io_napi_free(struct io_ring_ctx *ctx)
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
{
	const struct io_uring_napi curr = {
		.busy_poll_to 	  = ctx->napi_busy_poll_to,
		.busy_poll_to 	  = ktime_to_us(ctx->napi_busy_poll_dt),
		.prefer_busy_poll = ctx->napi_prefer_busy_poll
	};
	struct io_uring_napi napi;

	if (ctx->flags & IORING_SETUP_IOPOLL)
		return -EINVAL;
	if (copy_from_user(&napi, arg, sizeof(napi)))
		return -EFAULT;
	if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
@@ -230,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
	if (copy_to_user(arg, &curr, sizeof(curr)))
		return -EFAULT;

	WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
	WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
	WRITE_ONCE(ctx->napi_enabled, true);
	return 0;
@@ -247,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
{
	const struct io_uring_napi curr = {
		.busy_poll_to 	  = ctx->napi_busy_poll_to,
		.busy_poll_to 	  = ktime_to_us(ctx->napi_busy_poll_dt),
		.prefer_busy_poll = ctx->napi_prefer_busy_poll
	};

	if (arg && copy_to_user(arg, &curr, sizeof(curr)))
		return -EFAULT;

	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
	WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
	WRITE_ONCE(ctx->napi_enabled, false);
	return 0;
@@ -271,25 +282,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 * the NAPI timeout accordingly.
 */
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
			      struct timespec64 *ts)
			      ktime_t to_wait)
{
	unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);

	if (ts) {
		struct timespec64 poll_to_ts;

		poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
		if (timespec64_compare(ts, &poll_to_ts) < 0) {
			s64 poll_to_ns = timespec64_to_ns(ts);
			if (poll_to_ns > 0) {
				u64 val = poll_to_ns + 999;
				do_div(val, 1000);
				poll_to = val;
			}
		}
	}
	ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);

	if (to_wait)
		poll_dt = min(poll_dt, to_wait);

	iowq->napi_busy_poll_to = poll_to;
	iowq->napi_busy_poll_dt = poll_dt;
}

/*
@@ -318,7 +318,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
	LIST_HEAD(napi_list);
	bool is_stale = false;

	if (!READ_ONCE(ctx->napi_busy_poll_to))
	if (!READ_ONCE(ctx->napi_busy_poll_dt))
		return 0;
	if (list_empty_careful(&ctx->napi_list))
		return 0;
Loading