Commit 59a7d12a authored by Anuj Gupta's avatar Anuj Gupta Committed by Jens Axboe
Browse files

io_uring: introduce attributes for read/write and PI support



Add the ability to pass additional attributes along with read/write.
Application can prepare attibute specific information and pass its
address using the SQE field:
	__u64	attr_ptr;

Along with setting a mask indicating attributes being passed:
	__u64	attr_type_mask;

Overall 64 attributes are allowed and currently one attribute
'IORING_RW_ATTR_FLAG_PI' is supported.

With PI attribute, userspace can pass following information:
- flags: integrity check flags IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG}
- len: length of PI/metadata buffer
- addr: address of metadata buffer
- seed: seed value for reftag remapping
- app_tag: application defined 16b value

Process this information to prepare uio_meta_descriptor and pass it down
using kiocb->private.

PI attribute is supported only for direct IO.

Signed-off-by: default avatarAnuj Gupta <anuj20.g@samsung.com>
Signed-off-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20241128112240.8867-7-anuj20.g@samsung.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 4de2ce04
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -98,6 +98,10 @@ struct io_uring_sqe {
			__u64	addr3;
			__u64	__pad2[1];
		};
		struct {
			__u64	attr_ptr; /* pointer to attribute information */
			__u64	attr_type_mask; /* bit mask of attributes */
		};
		__u64	optval;
		/*
		 * If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +111,18 @@ struct io_uring_sqe {
	};
};

/* sqe->attr_type_mask flags */
#define IORING_RW_ATTR_FLAG_PI	(1U << 0)
/* PI attribute information */
struct io_uring_attr_pi {
		__u16	flags;
		__u16	app_tag;
		__u32	len;
		__u64	addr;
		__u64	seed;
		__u64	rsvd;
};

/*
 * If sqe->file_index is set to this for opcodes that instantiate a new
 * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+2 −0
Original line number Diff line number Diff line
@@ -3876,6 +3876,8 @@ static int __init io_uring_init(void)
	BUILD_BUG_SQE_ELEM(46, __u16,  __pad3[0]);
	BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
	BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
	BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr);
	BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask);
	BUILD_BUG_SQE_ELEM(56, __u64,  __pad2);

	BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
+81 −2
Original line number Diff line number Diff line
@@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
	return 0;
}

static inline void io_meta_save_state(struct io_async_rw *io)
{
	io->meta_state.seed = io->meta.seed;
	iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
}

static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
{
	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
		io->meta.seed = io->meta_state.seed;
		iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
	}
}

static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
			 u64 attr_ptr, u64 attr_type_mask)
{
	struct io_uring_attr_pi pi_attr;
	struct io_async_rw *io;
	int ret;

	if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
	    sizeof(pi_attr)))
		return -EFAULT;

	if (pi_attr.rsvd)
		return -EINVAL;

	io = req->async_data;
	io->meta.flags = pi_attr.flags;
	io->meta.app_tag = pi_attr.app_tag;
	io->meta.seed = pi_attr.seed;
	ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
			  pi_attr.len, &io->meta.iter);
	if (unlikely(ret < 0))
		return ret;
	rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
	io_meta_save_state(io);
	return ret;
}

static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
		      int ddir, bool do_import)
{
	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
	unsigned ioprio;
	u64 attr_type_mask;
	int ret;

	rw->kiocb.ki_pos = READ_ONCE(sqe->off);
@@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
		rw->kiocb.ki_ioprio = get_current_ioprio();
	}
	rw->kiocb.dio_complete = NULL;
	rw->kiocb.ki_flags = 0;

	rw->addr = READ_ONCE(sqe->addr);
	rw->len = READ_ONCE(sqe->len);
	rw->flags = READ_ONCE(sqe->rw_flags);
	return io_prep_rw_setup(req, ddir, do_import);
	ret = io_prep_rw_setup(req, ddir, do_import);

	if (unlikely(ret))
		return ret;

	attr_type_mask = READ_ONCE(sqe->attr_type_mask);
	if (attr_type_mask) {
		u64 attr_ptr;

		/* only PI attribute is supported currently */
		if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
			return -EINVAL;

		attr_ptr = READ_ONCE(sqe->attr_ptr);
		ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
	}
	return ret;
}

int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
static void io_resubmit_prep(struct io_kiocb *req)
{
	struct io_async_rw *io = req->async_data;
	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);

	io_meta_restore(io, &rw->kiocb);
	iov_iter_restore(&io->iter, &io->iter_state);
}

@@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req)
	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
		return false;

	/* never retry for meta io */
	if (kiocb->ki_flags & IOCB_HAS_METADATA)
		return false;

	/*
	 * just use poll if we can, and don't attempt if the fs doesn't
	 * support callback based unlocks
@@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
	if (!(req->flags & REQ_F_FIXED_FILE))
		req->flags |= io_file_get_flags(file);

	kiocb->ki_flags = file->f_iocb_flags;
	kiocb->ki_flags |= file->f_iocb_flags;
	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
	if (unlikely(ret))
		return ret;
@@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
		kiocb->ki_complete = io_complete_rw;
	}

	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
		struct io_async_rw *io = req->async_data;

		/*
		 * We have a union of meta fields with wpq used for buffered-io
		 * in io_async_rw, so fail it here.
		 */
		if (!(req->file->f_flags & O_DIRECT))
			return -EOPNOTSUPP;
		kiocb->private = &io->meta;
	}

	return 0;
}

@@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
	 * manually if we need to.
	 */
	iov_iter_restore(&io->iter, &io->iter_state);
	io_meta_restore(io, kiocb);

	do {
		/*
@@ -1125,6 +1203,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
	} else {
ret_eagain:
		iov_iter_restore(&io->iter, &io->iter_state);
		io_meta_restore(io, kiocb);
		if (kiocb->ki_flags & IOCB_WRITE)
			io_req_end_write(req);
		return -EAGAIN;
+13 −1
Original line number Diff line number Diff line
@@ -2,6 +2,11 @@

#include <linux/pagemap.h>

struct io_meta_state {
	u32			seed;
	struct iov_iter_state	iter_meta;
};

struct io_async_rw {
	size_t				bytes_done;
	struct iov_iter			iter;
@@ -9,7 +14,14 @@ struct io_async_rw {
	struct iovec			fast_iov;
	struct iovec			*free_iovec;
	int				free_iov_nr;
	/* wpq is for buffered io, while meta fields are used with direct io */
	union {
		struct wait_page_queue		wpq;
		struct {
			struct uio_meta			meta;
			struct io_meta_state		meta_state;
		};
	};
};

int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);