Commit f63f1735 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Song Liu
Browse files

md/raid5: use the atomic queue limit update APIs



Build the queue limits outside the queue and apply them using
queue_limits_set.  To make the code more obvious also split the queue
limits handling into separate helpers.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed--by: default avatarSong Liu <song@kernel.org>
Tested-by: default avatarSong Liu <song@kernel.org>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240303140150.5435-8-hch@lst.de
parent 97894f7d
Loading
Loading
Loading
Loading
+65 −65
Original line number Diff line number Diff line
@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
	return 0;
}

static void raid5_set_io_opt(struct r5conf *conf)
static int raid5_set_limits(struct mddev *mddev)
{
	blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
			 (conf->raid_disks - conf->max_degraded));
	struct r5conf *conf = mddev->private;
	struct queue_limits lim;
	int data_disks, stripe;
	struct md_rdev *rdev;

	/*
	 * The read-ahead size must cover two whole stripes, which is
	 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
	 */
	data_disks = conf->previous_raid_disks - conf->max_degraded;

	/*
	 * We can only discard a whole stripe. It doesn't make sense to
	 * discard data disk but write parity disk
	 */
	stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));

	blk_set_stacking_limits(&lim);
	lim.io_min = mddev->chunk_sectors << 9;
	lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
	lim.raid_partial_stripes_expensive = 1;
	lim.discard_granularity = stripe;
	lim.max_write_zeroes_sectors = 0;
	mddev_stack_rdev_limits(mddev, &lim);
	rdev_for_each(rdev, mddev)
		queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
				mddev->gendisk->disk_name);

	/*
	 * Zeroing is required for discard, otherwise data could be lost.
	 *
	 * Consider a scenario: discard a stripe (the stripe could be
	 * inconsistent if discard_zeroes_data is 0); write one disk of the
	 * stripe (the stripe could be inconsistent again depending on which
	 * disks are used to calculate parity); the disk is broken; The stripe
	 * data of this disk is lost.
	 *
	 * We only allow DISCARD if the sysadmin has confirmed that only safe
	 * devices are in use by setting a module parameter.  A better idea
	 * might be to turn DISCARD into WRITE_ZEROES requests, as that is
	 * required to be safe.
	 */
	if (!devices_handle_discard_safely ||
	    lim.max_discard_sectors < (stripe >> 9) ||
	    lim.discard_granularity < stripe)
		lim.max_hw_discard_sectors = 0;

	/*
	 * Requests require having a bitmap for each stripe.
	 * Limit the max sectors based on this.
	 */
	lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);

	/* No restrictions on the number of segments in the request */
	lim.max_segments = USHRT_MAX;

	return queue_limits_set(mddev->queue, &lim);
}

static int raid5_run(struct mddev *mddev)
@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev)
	int i;
	long long min_offset_diff = 0;
	int first = 1;
	int ret = -EIO;

	if (mddev->recovery_cp != MaxSector)
		pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev)
	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));

	if (!mddev_is_dm(mddev)) {
		int chunk_size;
		/* read-ahead size must cover two whole stripes, which
		 * is 2 * (datadisks) * chunksize where 'n' is the
		 * number of raid devices
		 */
		int data_disks = conf->previous_raid_disks - conf->max_degraded;
		int stripe = data_disks *
			((mddev->chunk_sectors << 9) / PAGE_SIZE);

		chunk_size = mddev->chunk_sectors << 9;
		blk_queue_io_min(mddev->queue, chunk_size);
		raid5_set_io_opt(conf);
		mddev->queue->limits.raid_partial_stripes_expensive = 1;
		/*
		 * We can only discard a whole stripe. It doesn't make sense to
		 * discard data disk but write parity disk
		 */
		stripe = stripe * PAGE_SIZE;
		stripe = roundup_pow_of_two(stripe);
		mddev->queue->limits.discard_granularity = stripe;

		blk_queue_max_write_zeroes_sectors(mddev->queue, 0);

		rdev_for_each(rdev, mddev) {
			disk_stack_limits(mddev->gendisk, rdev->bdev,
					  rdev->data_offset << 9);
			disk_stack_limits(mddev->gendisk, rdev->bdev,
					  rdev->new_data_offset << 9);
		}

		/*
		 * zeroing is required, otherwise data
		 * could be lost. Consider a scenario: discard a stripe
		 * (the stripe could be inconsistent if
		 * discard_zeroes_data is 0); write one disk of the
		 * stripe (the stripe could be inconsistent again
		 * depending on which disks are used to calculate
		 * parity); the disk is broken; The stripe data of this
		 * disk is lost.
		 *
		 * We only allow DISCARD if the sysadmin has confirmed that
		 * only safe devices are in use by setting a module parameter.
		 * A better idea might be to turn DISCARD into WRITE_ZEROES
		 * requests, as that is required to be safe.
		 */
		if (!devices_handle_discard_safely ||
		    mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
		    mddev->queue->limits.discard_granularity < stripe)
			blk_queue_max_discard_sectors(mddev->queue, 0);

		/*
		 * Requests require having a bitmap for each stripe.
		 * Limit the max sectors based on this.
		 */
		blk_queue_max_hw_sectors(mddev->queue,
			RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));

		/* No restrictions on the number of segments in the request */
		blk_queue_max_segments(mddev->queue, USHRT_MAX);
		ret = raid5_set_limits(mddev);
		if (ret)
			goto abort;
	}

	if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev)
	free_conf(conf);
	mddev->private = NULL;
	pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
	return -EIO;
	return ret;
}

static void raid5_free(struct mddev *mddev, void *priv)
@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf)
		spin_unlock_irq(&conf->device_lock);
		wake_up(&conf->wait_for_overlap);

		if (!mddev_is_dm(conf->mddev))
			raid5_set_io_opt(conf);
		mddev_update_io_opt(conf->mddev,
			conf->raid_disks - conf->max_degraded);
	}
}