Commit f2fed441 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

loop: stop using vfs_iter_{read,write} for buffered I/O



vfs_iter_{read,write} always perform direct I/O when the file has the
O_DIRECT flag set, which breaks disabling direct I/O using the
LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls.

This was recenly reported as a regression, but as far as I can tell
was only uncovered by better checking for block sizes and has been
around since the direct I/O support was added.

Fix this by using the existing aio code that calls the raw read/write
iter methods instead.  Note that despite the comments there is no need
for block drivers to ever call flush_dcache_page themselves, and the
call is a left-over from prehistoric times.

Fixes: ab1cb278 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO")
Reported-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Tested-by: default avatarDarrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20250409130940.3685677-1-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0dba7a05
Loading
Loading
Loading
Loading
+17 −95
Original line number Diff line number Diff line
@@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size)
		kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE);
}

static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
{
	struct iov_iter i;
	ssize_t bw;

	iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);

	bw = vfs_iter_write(file, &i, ppos, 0);

	if (likely(bw ==  bvec->bv_len))
		return 0;

	printk_ratelimited(KERN_ERR
		"loop: Write error at byte offset %llu, length %i.\n",
		(unsigned long long)*ppos, bvec->bv_len);
	if (bw >= 0)
		bw = -EIO;
	return bw;
}

static int lo_write_simple(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	struct bio_vec bvec;
	struct req_iterator iter;
	int ret = 0;

	rq_for_each_segment(bvec, rq, iter) {
		ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
		if (ret < 0)
			break;
		cond_resched();
	}

	return ret;
}

static int lo_read_simple(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	struct bio_vec bvec;
	struct req_iterator iter;
	struct iov_iter i;
	ssize_t len;

	rq_for_each_segment(bvec, rq, iter) {
		iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len);
		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
		if (len < 0)
			return len;

		flush_dcache_page(bvec.bv_page);

		if (len != bvec.bv_len) {
			struct bio *bio;

			__rq_for_each_bio(bio, rq)
				zero_fill_bio(bio);
			break;
		}
		cond_resched();
	}

	return 0;
}

static void loop_clear_limits(struct loop_device *lo, int mode)
{
	struct queue_limits lim = queue_limits_start_update(lo->lo_queue);
@@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq)
	struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
	blk_status_t ret = BLK_STS_OK;

	if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
	if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
	    req_op(rq) != REQ_OP_READ) {
		if (cmd->ret < 0)
			ret = errno_to_blk_status(cmd->ret);
@@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq)
		cmd->ret = 0;
		blk_mq_requeue_request(rq, true);
	} else {
		if (cmd->use_aio) {
		struct bio *bio = rq->bio;

		while (bio) {
			zero_fill_bio(bio);
			bio = bio->bi_next;
		}
		}

		ret = BLK_STS_IOERR;
end_io:
		blk_mq_end_request(rq, ret);
@@ -445,9 +378,14 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,

	cmd->iocb.ki_pos = pos;
	cmd->iocb.ki_filp = file;
	cmd->iocb.ki_ioprio = req_get_ioprio(rq);
	if (cmd->use_aio) {
		cmd->iocb.ki_complete = lo_rw_aio_complete;
		cmd->iocb.ki_flags = IOCB_DIRECT;
	cmd->iocb.ki_ioprio = req_get_ioprio(rq);
	} else {
		cmd->iocb.ki_complete = NULL;
		cmd->iocb.ki_flags = 0;
	}

	if (rw == ITER_SOURCE)
		ret = file->f_op->write_iter(&cmd->iocb, &iter);
@@ -458,7 +396,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,

	if (ret != -EIOCBQUEUED)
		lo_rw_aio_complete(&cmd->iocb, ret);
	return 0;
	return -EIOCBQUEUED;
}

static int do_req_filebacked(struct loop_device *lo, struct request *rq)
@@ -466,15 +404,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
	struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
	loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;

	/*
	 * lo_write_simple and lo_read_simple should have been covered
	 * by io submit style function like lo_rw_aio(), one blocker
	 * is that lo_read_simple() need to call flush_dcache_page after
	 * the page is written from kernel, and it isn't easy to handle
	 * this in io submit style function which submits all segments
	 * of the req at one time. And direct read IO doesn't need to
	 * run flush_dcache_page().
	 */
	switch (req_op(rq)) {
	case REQ_OP_FLUSH:
		return lo_req_flush(lo, rq);
@@ -490,15 +419,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
	case REQ_OP_DISCARD:
		return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
	case REQ_OP_WRITE:
		if (cmd->use_aio)
		return lo_rw_aio(lo, cmd, pos, ITER_SOURCE);
		else
			return lo_write_simple(lo, rq, pos);
	case REQ_OP_READ:
		if (cmd->use_aio)
		return lo_rw_aio(lo, cmd, pos, ITER_DEST);
		else
			return lo_read_simple(lo, rq, pos);
	default:
		WARN_ON_ONCE(1);
		return -EIO;
@@ -1922,7 +1845,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
	struct loop_device *lo = rq->q->queuedata;
	int ret = 0;
	struct mem_cgroup *old_memcg = NULL;
	const bool use_aio = cmd->use_aio;

	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
		ret = -EIO;
@@ -1952,7 +1874,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
	}
 failed:
	/* complete non-aio request */
	if (!use_aio || ret) {
	if (ret != -EIOCBQUEUED) {
		if (ret == -EOPNOTSUPP)
			cmd->ret = ret;
		else