Commit a312e170 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:
 "Not a lot in terms of features this time around, mostly just cleanups
  and code consolidation:

   - Support for PI meta data read/write via io_uring, with NVMe and
     SCSI covered

   - Cleanup the per-op structure caching, making it consistent across
     various command types

   - Consolidate the various user mapped features into a concept called
     regions, making the various users of that consistent

   - Various cleanups and fixes"

* tag 'for-6.14/io_uring-20250119' of git://git.kernel.dk/linux: (56 commits)
  io_uring/fdinfo: fix io_uring_show_fdinfo() misuse of ->d_iname
  io_uring: reuse io_should_terminate_tw() for cmds
  io_uring: Factor out a function to parse restrictions
  io_uring/rsrc: require cloned buffers to share accounting contexts
  io_uring: simplify the SQPOLL thread check when cancelling requests
  io_uring: expose read/write attribute capability
  io_uring/rw: don't gate retry on completion context
  io_uring/rw: handle -EAGAIN retry at IO completion time
  io_uring/rw: use io_rw_recycle() from cleanup path
  io_uring/rsrc: simplify the bvec iter count calculation
  io_uring: ensure io_queue_deferred() is out-of-line
  io_uring/rw: always clear ->bytes_done on io_async_rw setup
  io_uring/rw: use NULL for rw->free_iovec assigment
  io_uring/rw: don't mask in f_iocb_flags
  io_uring/msg_ring: Drop custom destructor
  io_uring: Move old async data allocation helper to header
  io_uring/rw: Allocate async data through helper
  io_uring/net: Allocate msghdr async data through helper
  io_uring/uring_cmd: Allocate async data through generic helper
  io_uring/poll: Allocate apoll with generic alloc_cache helper
  ...
parents 1cbfb828 561e3a0c
Loading
Loading
Loading
Loading
+69 −15
Original line number Diff line number Diff line
@@ -118,17 +118,18 @@ static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,

static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
{
	unsigned short nr_vecs = bip->bip_max_vcnt - 1;
	struct bio_vec *copy = &bip->bip_vec[1];
	size_t bytes = bip->bip_iter.bi_size;
	struct iov_iter iter;
	unsigned short orig_nr_vecs = bip->bip_max_vcnt - 1;
	struct bio_vec *orig_bvecs = &bip->bip_vec[1];
	struct bio_vec *bounce_bvec = &bip->bip_vec[0];
	size_t bytes = bounce_bvec->bv_len;
	struct iov_iter orig_iter;
	int ret;

	iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
	ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
	iov_iter_bvec(&orig_iter, ITER_DEST, orig_bvecs, orig_nr_vecs, bytes);
	ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
	WARN_ON_ONCE(ret != bytes);

	bio_integrity_unpin_bvec(copy, nr_vecs, true);
	bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true);
}

/**
@@ -301,16 +302,15 @@ static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
	return nr_bvecs;
}

int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
{
	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
	unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
	struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
	struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
	size_t offset, bytes = iter->count;
	unsigned int direction, nr_bvecs;
	struct iov_iter iter;
	int ret, nr_vecs;
	size_t offset;
	bool copy;

	if (bio_integrity(bio))
@@ -323,8 +323,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
	else
		direction = ITER_SOURCE;

	iov_iter_ubuf(&iter, direction, ubuf, bytes);
	nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
	nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS + 1);
	if (nr_vecs > BIO_MAX_VECS)
		return -E2BIG;
	if (nr_vecs > UIO_FASTIOV) {
@@ -334,8 +333,8 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
		pages = NULL;
	}

	copy = !iov_iter_is_aligned(&iter, align, align);
	ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
	copy = !iov_iter_is_aligned(iter, align, align);
	ret = iov_iter_extract_pages(iter, &pages, bytes, nr_vecs, 0, &offset);
	if (unlikely(ret < 0))
		goto free_bvec;

@@ -365,6 +364,55 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes)
	return ret;
}

static void bio_uio_meta_to_bip(struct bio *bio, struct uio_meta *meta)
{
	struct bio_integrity_payload *bip = bio_integrity(bio);

	if (meta->flags & IO_INTEGRITY_CHK_GUARD)
		bip->bip_flags |= BIP_CHECK_GUARD;
	if (meta->flags & IO_INTEGRITY_CHK_APPTAG)
		bip->bip_flags |= BIP_CHECK_APPTAG;
	if (meta->flags & IO_INTEGRITY_CHK_REFTAG)
		bip->bip_flags |= BIP_CHECK_REFTAG;

	bip->app_tag = meta->app_tag;
}

int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta)
{
	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
	unsigned int integrity_bytes;
	int ret;
	struct iov_iter it;

	if (!bi)
		return -EINVAL;
	/*
	 * original meta iterator can be bigger.
	 * process integrity info corresponding to current data buffer only.
	 */
	it = meta->iter;
	integrity_bytes = bio_integrity_bytes(bi, bio_sectors(bio));
	if (it.count < integrity_bytes)
		return -EINVAL;

	/* should fit into two bytes */
	BUILD_BUG_ON(IO_INTEGRITY_VALID_FLAGS >= (1 << 16));

	if (meta->flags && (meta->flags & ~IO_INTEGRITY_VALID_FLAGS))
		return -EINVAL;

	it.count = integrity_bytes;
	ret = bio_integrity_map_user(bio, &it);
	if (!ret) {
		bio_uio_meta_to_bip(bio, meta);
		bip_set_seed(bio_integrity(bio), meta->seed);
		iov_iter_advance(&meta->iter, integrity_bytes);
		meta->seed += bio_integrity_intervals(bi, bio_sectors(bio));
	}
	return ret;
}

/**
 * bio_integrity_prep - Prepare bio for integrity I/O
 * @bio:	bio to prepare
@@ -435,6 +483,11 @@ bool bio_integrity_prep(struct bio *bio)
	if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
		bip->bip_flags |= BIP_IP_CHECKSUM;

	/* describe what tags to check in payload */
	if (bi->csum_type)
		bip->bip_flags |= BIP_CHECK_GUARD;
	if (bi->flags & BLK_INTEGRITY_REF_TAG)
		bip->bip_flags |= BIP_CHECK_REFTAG;
	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
			offset_in_page(buf)) < len) {
		printk(KERN_ERR "could not attach integrity payload\n");
@@ -559,7 +612,8 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,

	bip->bip_vec = bip_src->bip_vec;
	bip->bip_iter = bip_src->bip_iter;
	bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
	bip->bip_flags = bip_src->bip_flags & BIP_CLONE_FLAGS;
	bip->app_tag = bip_src->app_tag;

	return 0;
}
+9 −1
Original line number Diff line number Diff line
@@ -115,8 +115,16 @@ EXPORT_SYMBOL(blk_rq_map_integrity_sg);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
			      ssize_t bytes)
{
	int ret = bio_integrity_map_user(rq->bio, ubuf, bytes);
	int ret;
	struct iov_iter iter;
	unsigned int direction;

	if (op_is_write(req_op(rq)))
		direction = ITER_DEST;
	else
		direction = ITER_SOURCE;
	iov_iter_ubuf(&iter, direction, ubuf, bytes);
	ret = bio_integrity_map_user(rq->bio, &iter);
	if (ret)
		return ret;

+35 −10
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
	struct bio bio;
	ssize_t ret;

	WARN_ON_ONCE(iocb->ki_flags & IOCB_HAS_METADATA);
	if (nr_pages <= DIO_INLINE_BIO_VECS)
		vecs = inline_vecs;
	else {
@@ -124,12 +125,16 @@ static void blkdev_bio_end_io(struct bio *bio)
{
	struct blkdev_dio *dio = bio->bi_private;
	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
	bool is_sync = dio->flags & DIO_IS_SYNC;

	if (bio->bi_status && !dio->bio.bi_status)
		dio->bio.bi_status = bio->bi_status;

	if (!is_sync && (dio->iocb->ki_flags & IOCB_HAS_METADATA))
		bio_integrity_unmap_user(bio);

	if (atomic_dec_and_test(&dio->ref)) {
		if (!(dio->flags & DIO_IS_SYNC)) {
		if (!is_sync) {
			struct kiocb *iocb = dio->iocb;
			ssize_t ret;

@@ -221,14 +226,16 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
			 * a retry of this from blocking context.
			 */
			if (unlikely(iov_iter_count(iter))) {
				bio_release_pages(bio, false);
				bio_clear_flag(bio, BIO_REFFED);
				bio_put(bio);
				blk_finish_plug(&plug);
				return -EAGAIN;
				ret = -EAGAIN;
				goto fail;
			}
			bio->bi_opf |= REQ_NOWAIT;
		}
		if (!is_sync && (iocb->ki_flags & IOCB_HAS_METADATA)) {
			ret = bio_integrity_map_iter(bio, iocb->private);
			if (unlikely(ret))
				goto fail;
		}

		if (is_read) {
			if (dio->flags & DIO_SHOULD_DIRTY)
@@ -269,6 +276,12 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,

	bio_put(&dio->bio);
	return ret;
fail:
	bio_release_pages(bio, false);
	bio_clear_flag(bio, BIO_REFFED);
	bio_put(bio);
	blk_finish_plug(&plug);
	return ret;
}

static void blkdev_bio_end_io_async(struct bio *bio)
@@ -286,6 +299,9 @@ static void blkdev_bio_end_io_async(struct bio *bio)
		ret = blk_status_to_errno(bio->bi_status);
	}

	if (iocb->ki_flags & IOCB_HAS_METADATA)
		bio_integrity_unmap_user(bio);

	iocb->ki_complete(iocb, ret);

	if (dio->flags & DIO_SHOULD_DIRTY) {
@@ -330,10 +346,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
		bio_iov_bvec_set(bio, iter);
	} else {
		ret = bio_iov_iter_get_pages(bio, iter);
		if (unlikely(ret)) {
			bio_put(bio);
			return ret;
		}
		if (unlikely(ret))
			goto out_bio_put;
	}
	dio->size = bio->bi_iter.bi_size;

@@ -346,6 +360,13 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
		task_io_account_write(bio->bi_iter.bi_size);
	}

	if (iocb->ki_flags & IOCB_HAS_METADATA) {
		ret = bio_integrity_map_iter(bio, iocb->private);
		WRITE_ONCE(iocb->private, NULL);
		if (unlikely(ret))
			goto out_bio_put;
	}

	if (iocb->ki_flags & IOCB_ATOMIC)
		bio->bi_opf |= REQ_ATOMIC;

@@ -360,6 +381,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
		submit_bio(bio);
	}
	return -EIOCBQUEUED;

out_bio_put:
	bio_put(bio);
	return ret;
}

static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+13 −8
Original line number Diff line number Diff line
@@ -885,6 +885,12 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
	return BLK_STS_OK;
}

static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd)
{
	cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag);
	cmnd->rw.lbatm = cpu_to_le16(0xffff);
}

static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd,
			      struct request *req)
{
@@ -1017,18 +1023,17 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
			control |= NVME_RW_PRINFO_PRACT;
		}

		switch (ns->head->pi_type) {
		case NVME_NS_DPS_PI_TYPE3:
		if (bio_integrity_flagged(req->bio, BIP_CHECK_GUARD))
			control |= NVME_RW_PRINFO_PRCHK_GUARD;
			break;
		case NVME_NS_DPS_PI_TYPE1:
		case NVME_NS_DPS_PI_TYPE2:
			control |= NVME_RW_PRINFO_PRCHK_GUARD |
					NVME_RW_PRINFO_PRCHK_REF;
		if (bio_integrity_flagged(req->bio, BIP_CHECK_REFTAG)) {
			control |= NVME_RW_PRINFO_PRCHK_REF;
			if (op == nvme_cmd_zone_append)
				control |= NVME_RW_APPEND_PIREMAP;
			nvme_set_ref_tag(ns, cmnd, req);
			break;
		}
		if (bio_integrity_flagged(req->bio, BIP_CHECK_APPTAG)) {
			control |= NVME_RW_PRINFO_PRCHK_APP;
			nvme_set_app_tag(req, cmnd);
		}
	}

+2 −2
Original line number Diff line number Diff line
@@ -809,14 +809,14 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
		if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM))
			scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM;

		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
		if (bio_integrity_flagged(bio, BIP_CHECK_GUARD))
			scmd->prot_flags |= SCSI_PROT_GUARD_CHECK;
	}

	if (dif != T10_PI_TYPE3_PROTECTION) {	/* DIX/DIF Type 0, 1, 2 */
		scmd->prot_flags |= SCSI_PROT_REF_INCREMENT;

		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
		if (bio_integrity_flagged(bio, BIP_CHECK_REFTAG))
			scmd->prot_flags |= SCSI_PROT_REF_CHECK;
	}

Loading