Commit ba58f57f authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu
Browse files

md/raid1: factor out the code to manage sequential IO



There is no functional change for now, make read_balance() cleaner and
prepare to fix problems and refactor the handler of sequential IO.

Co-developed-by: default avatarPaul Luse <paul.e.luse@linux.intel.com>
Signed-off-by: default avatarPaul Luse <paul.e.luse@linux.intel.com>
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarXiao Ni <xni@redhat.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240229095714.926789-11-yukuai1@huaweicloud.com
parent 9f3ced79
Loading
Loading
Loading
Loading
+37 −34
Original line number Diff line number Diff line
@@ -705,6 +705,31 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
	return bb_disk;
}

static bool is_sequential(struct r1conf *conf, int disk, struct r1bio *r1_bio)
{
	/* TODO: address issues with this check and concurrency. */
	return conf->mirrors[disk].next_seq_sect == r1_bio->sector ||
	       conf->mirrors[disk].head_position == r1_bio->sector;
}

/*
 * If buffered sequential IO size exceeds optimal iosize, check if there is idle
 * disk. If yes, choose the idle disk.
 */
static bool should_choose_next(struct r1conf *conf, int disk)
{
	struct raid1_info *mirror = &conf->mirrors[disk];
	int opt_iosize;

	if (!test_bit(Nonrot, &mirror->rdev->flags))
		return false;

	opt_iosize = bdev_io_opt(mirror->rdev->bdev) >> 9;
	return opt_iosize > 0 && mirror->seq_start != MaxSector &&
	       mirror->next_seq_sect > opt_iosize &&
	       mirror->next_seq_sect - opt_iosize >= mirror->seq_start;
}

/*
 * This routine returns the disk from which the requested read should
 * be done. There is a per-array 'next expected sequential IO' sector
@@ -768,29 +793,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
		pending = atomic_read(&rdev->nr_pending);
		dist = abs(this_sector - conf->mirrors[disk].head_position);
		/* Don't change to another disk for sequential reads */
		if (conf->mirrors[disk].next_seq_sect == this_sector
		    || dist == 0) {
			int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
			struct raid1_info *mirror = &conf->mirrors[disk];

			/*
			 * If buffered sequential IO size exceeds optimal
			 * iosize, check if there is idle disk. If yes, choose
			 * the idle disk. read_balance could already choose an
			 * idle disk before noticing it's a sequential IO in
			 * this disk. This doesn't matter because this disk
			 * will idle, next time it will be utilized after the
			 * first disk has IO size exceeds optimal iosize. In
			 * this way, iosize of the first disk will be optimal
			 * iosize at least. iosize of the second disk might be
			 * small, but not a big deal since when the second disk
			 * starts IO, the first disk is likely still busy.
			 */
			if (test_bit(Nonrot, &rdev->flags) && opt_iosize > 0 &&
			    mirror->seq_start != MaxSector &&
			    mirror->next_seq_sect > opt_iosize &&
			    mirror->next_seq_sect - opt_iosize >=
			    mirror->seq_start) {
		if (is_sequential(conf, disk, r1_bio)) {
			if (!should_choose_next(conf, disk)) {
				best_disk = disk;
				break;
			}
			/*
			 * Add 'pending' to avoid choosing this disk if
			 * there is other idle disk.
@@ -801,10 +808,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
			 * will be chosen.
			 */
			sequential_disk = disk;
			} else {
				best_disk = disk;
				break;
			}
		}

		if (min_pending > pending) {