Commit fa58e6e9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring fixes from Jens Axboe:

 - Fix for a NOMMU bug with io_uring, where NOMMU doesn't grab page refs
   at mmap time. NOMMU also has entirely broken FOLL_PIN support, yet
   here we are

 - A few fixes covering minor issues introduced in this merge window

 - data race annotation to shut up KCSAN for when io-wq limits are
   applied

 - A nospec addition for direct descriptor file updating. Rest of the
   direct descriptor path already had this, but for some reason the
   update did not. Now they are all the same

 - Various minor defensive changes that claude identified and suggested
   terrible fixes for, turned into actually useful cleanups:

       - Use kvfree() for the imu cache. These can come from kmalloc or
         vmalloc depending on size, but the in-cache ones are capped
         where it's always kmalloc based. Change to kvfree() in the
         cleanup path, making future changes unlikely to mess that up

       - Negative kbuf consumption lengths. Can't happen right now, but
         cqe->res is used directly, which if other codes changes could
         then be an error value

 - Fix for an issue with the futex code, where partial wakes on a
   vectored fuxes would potentially wake the same futex twice, rather
   than move on to the next one. This could confuse an application as it
   would've expected the next futex to have been woken

 - Fix for a bug with ring resizing, where SQEs or CQEs might not have
   been copied correctly if large SQEs or CQEs are used in the ring.
   Application side issue, where SQEs or CQEs might have been lost
   during resize

 - Fix for a bug where EPOLL_URING_WAKE might have been lost, causing a
   multishot poll to not be terminated when it's nested, like it should
   have been

 - Fix for an issue with signed comparison of poll references for the
   slow path

 - Fix for a user struct UAF in the zcrx code

 - Two minor zcrx cleanups

* tag 'io_uring-7.1-20260424' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring: take page references for NOMMU pbuf_ring mmaps
  io_uring/poll: ensure EPOLL_ONESHOT is propagated for EPOLL_URING_WAKE
  io_uring/zcrx: warn on freelist violations
  io_uring/zcrx: clear RQ headers on init
  io_uring/zcrx: fix user_struct uaf
  io_uring/register: fix ring resizing with mixed/large SQEs/CQEs
  io_uring/futex: ensure partial wakes are appropriately dequeued
  io_uring/rw: add defensive hardening for negative kbuf lengths
  io_uring/rsrc: use kvfree() for the imu cache
  io_uring/rsrc: unify nospec indexing for direct descriptors
  io_uring: fix spurious fput in registered ring path
  io_uring: fix iowq_limits data race in tctx node addition
  io_uring/tctx: mark io_wq as exiting before error path teardown
  io_uring/tctx: check for setup tctx->io_wq before teardown
  io_uring/poll: fix signed comparison in io_poll_get_ownership()
parents b85900e9 d0be8884
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -64,7 +64,7 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
static inline void io_cache_free(struct io_alloc_cache *cache, void *obj)
{
	if (!io_alloc_cache_put(cache, obj))
		kfree(obj);
		kvfree(obj);
}

#endif
+3 −1
Original line number Diff line number Diff line
@@ -159,8 +159,10 @@ static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q)
	struct io_kiocb *req = q->wake_data;
	struct io_futexv_data *ifd = req->async_data;

	if (!io_futexv_claim(ifd))
	if (!io_futexv_claim(ifd)) {
		__futex_wake_mark(q);
		return;
	}
	if (unlikely(!__futex_wake_mark(q)))
		return;

+2 −1
Original line number Diff line number Diff line
@@ -2575,6 +2575,7 @@ struct file *io_uring_ctx_get_file(unsigned int fd, bool registered)
		return ERR_PTR(-EBADF);
	if (io_is_uring_fops(file))
		return file;
	if (!registered)
		fput(file);
	return ERR_PTR(-EOPNOTSUPP);
}
+45 −1
Original line number Diff line number Diff line
@@ -366,9 +366,53 @@ unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr,

#else /* !CONFIG_MMU */

/*
 * Drop the pages that were initially referenced and added in
 * io_uring_mmap(). We cannot have had a mremap() as that isn't supported,
 * hence the vma should be identical to the one we initially referenced and
 * mapped, and partial unmaps and splitting isn't possible on a file backed
 * mapping.
 */
static void io_uring_nommu_vm_close(struct vm_area_struct *vma)
{
	unsigned long index;

	for (index = vma->vm_start; index < vma->vm_end; index += PAGE_SIZE)
		put_page(virt_to_page((void *) index));
}

static const struct vm_operations_struct io_uring_nommu_vm_ops = {
	.close = io_uring_nommu_vm_close,
};

int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
	return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL;
	struct io_ring_ctx *ctx = file->private_data;
	struct io_mapped_region *region;
	unsigned long i;

	if (!is_nommu_shared_mapping(vma->vm_flags))
		return -EINVAL;

	guard(mutex)(&ctx->mmap_lock);
	region = io_mmap_get_region(ctx, vma->vm_pgoff);
	if (!region || !io_region_is_set(region))
		return -EINVAL;

	if ((vma->vm_end - vma->vm_start) !=
	    (unsigned long) region->nr_pages << PAGE_SHIFT)
		return -EINVAL;

	/*
	 * Pin the pages so io_free_region()'s release_pages() does not
	 * drop the last reference while this VMA exists. delete_vma()
	 * in mm/nommu.c calls vma_close() which runs ->close above.
	 */
	for (i = 0; i < region->nr_pages; i++)
		get_page(region->pages[i]);

	vma->vm_ops = &io_uring_nommu_vm_ops;
	return 0;
}

unsigned int io_uring_nommu_mmap_capabilities(struct file *file)
+4 −2
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
 */
static inline bool io_poll_get_ownership(struct io_kiocb *req)
{
	if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
	if (unlikely((unsigned int)atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
		return io_poll_get_ownership_slowpath(req);
	return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
}
@@ -417,8 +417,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
		 * disable multishot as there is a circular dependency between
		 * CQ posting and triggering the event.
		 */
		if (mask & EPOLL_URING_WAKE)
		if (mask & EPOLL_URING_WAKE) {
			poll->events |= EPOLLONESHOT;
			req->apoll_events |= EPOLLONESHOT;
		}

		/* optional, saves extra locking for removal in tw handler */
		if (mask && poll->events & EPOLLONESHOT) {
Loading