Commit d37977f0 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-6.9-20240306' of...

Merge tag 'md-6.9-20240306' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.9/block

Pull MD atomic queue limits changes from Song.

* tag 'md-6.9-20240306' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  block: remove disk_stack_limits
  md: remove mddev->queue
  md: don't initialize queue limits
  md/raid10: use the atomic queue limit update APIs
  md/raid5: use the atomic queue limit update APIs
  md/raid1: use the atomic queue limit update APIs
  md/raid0: use the atomic queue limit update APIs
  md: add queue limit helpers
  md: add a mddev_is_dm helper
  md: add a mddev_add_trace_msg helper
  md: add a mddev_trace_remap helper
parents 34a2cf3f dd27a84b
Loading
Loading
Loading
Loading
+0 −24
Original line number Diff line number Diff line
@@ -916,30 +916,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
}
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);

/**
 * disk_stack_limits - adjust queue limits for stacked drivers
 * @disk:  MD/DM gendisk (top)
 * @bdev:  the underlying block device (bottom)
 * @offset:  offset to beginning of data within component device
 *
 * Description:
 *    Merges the limits for a top level gendisk and a bottom level
 *    block_device.
 */
void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
		       sector_t offset)
{
	struct request_queue *t = disk->queue;

	if (blk_stack_limits(&t->limits, &bdev_get_queue(bdev)->limits,
			get_start_sect(bdev) + (offset >> 9)) < 0)
		pr_notice("%s: Warning: Device %pg is misaligned\n",
			disk->disk_name, bdev);

	disk_update_readahead(disk);
}
EXPORT_SYMBOL(disk_stack_limits);

/**
 * blk_queue_update_dma_pad - update pad mask
 * @q:     the request queue for the device
+3 −6
Original line number Diff line number Diff line
@@ -1046,8 +1046,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
		if (dirty || need_write) {
			if (!writing) {
				md_bitmap_wait_writes(bitmap);
				if (bitmap->mddev->queue)
					blk_add_trace_msg(bitmap->mddev->queue,
				mddev_add_trace_msg(bitmap->mddev,
					"md bitmap_unplug");
			}
			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
@@ -1319,9 +1318,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
	}
	bitmap->allclean = 1;

	if (bitmap->mddev->queue)
		blk_add_trace_msg(bitmap->mddev->queue,
				  "md bitmap_daemon_work");
	mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work");

	/* Any file-page which is PENDING now needs to be written.
	 * So set NEEDWRITE now, then after we make any last-minute changes
+64 −25
Original line number Diff line number Diff line
@@ -65,7 +65,6 @@
#include <linux/percpu-refcount.h>
#include <linux/part_stat.h>

#include <trace/events/block.h>
#include "md.h"
#include "md-bitmap.h"
#include "md-cluster.h"
@@ -2411,7 +2410,7 @@ int md_integrity_register(struct mddev *mddev)

	if (list_empty(&mddev->disks))
		return 0; /* nothing to do */
	if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
	if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
		return 0; /* shouldn't register, or already is */
	rdev_for_each(rdev, mddev) {
		/* skip spares and non-functional disks */
@@ -2464,7 +2463,7 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
	struct blk_integrity *bi_mddev;

	if (!mddev->gendisk)
	if (mddev_is_dm(mddev))
		return 0;

	bi_mddev = blk_get_integrity(mddev->gendisk);
@@ -2857,8 +2856,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
	pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
		 mdname(mddev), mddev->in_sync);

	if (mddev->queue)
		blk_add_trace_msg(mddev->queue, "md md_update_sb");
	mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
	md_bitmap_update_sb(mddev->bitmap);
	rdev_for_each(rdev, mddev) {
@@ -4166,7 +4164,6 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
		mddev->in_sync = 1;
		del_timer_sync(&mddev->safemode_timer);
	}
	blk_set_stacking_limits(&mddev->queue->limits);
	pers->run(mddev);
	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
	if (!mddev->thread)
@@ -5753,6 +5750,51 @@ static const struct kobj_type md_ktype = {

int mdp_major = 0;

/* stack the limit for all rdevs into lim */
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
{
	struct md_rdev *rdev;

	rdev_for_each(rdev, mddev) {
		queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
					mddev->gendisk->disk_name);
	}
}
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);

/* apply the extra stacking limits from a new rdev into mddev */
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
{
	struct queue_limits lim;

	if (mddev_is_dm(mddev))
		return 0;

	lim = queue_limits_start_update(mddev->gendisk->queue);
	queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
				mddev->gendisk->disk_name);
	return queue_limits_commit_update(mddev->gendisk->queue, &lim);
}
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);

/* update the optimal I/O size after a reshape */
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes)
{
	struct queue_limits lim;

	if (mddev_is_dm(mddev))
		return;

	/* don't bother updating io_opt if we can't suspend the array */
	if (mddev_suspend(mddev, false) < 0)
		return;
	lim = queue_limits_start_update(mddev->gendisk->queue);
	lim.io_opt = lim.io_min * nr_stripes;
	queue_limits_commit_update(mddev->gendisk->queue, &lim);
	mddev_resume(mddev);
}
EXPORT_SYMBOL_GPL(mddev_update_io_opt);

static void mddev_delayed_delete(struct work_struct *ws)
{
	struct mddev *mddev = container_of(ws, struct mddev, del_work);
@@ -5835,9 +5877,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
	disk->fops = &md_fops;
	disk->private_data = mddev;

	mddev->queue = disk->queue;
	blk_set_stacking_limits(&mddev->queue->limits);
	blk_queue_write_cache(mddev->queue, true, true);
	blk_queue_write_cache(disk->queue, true, true);
	disk->events |= DISK_EVENT_MEDIA_CHANGE;
	mddev->gendisk = disk;
	error = add_disk(disk);
@@ -5979,7 +6019,7 @@ int md_run(struct mddev *mddev)
		invalidate_bdev(rdev->bdev);
		if (mddev->ro != MD_RDONLY && rdev_read_only(rdev)) {
			mddev->ro = MD_RDONLY;
			if (mddev->gendisk)
			if (!mddev_is_dm(mddev))
				set_disk_ro(mddev->gendisk, 1);
		}

@@ -6141,7 +6181,8 @@ int md_run(struct mddev *mddev)
		}
	}

	if (mddev->queue) {
	if (!mddev_is_dm(mddev)) {
		struct request_queue *q = mddev->gendisk->queue;
		bool nonrot = true;

		rdev_for_each(rdev, mddev) {
@@ -6153,14 +6194,14 @@ int md_run(struct mddev *mddev)
		if (mddev->degraded)
			nonrot = false;
		if (nonrot)
			blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
			blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
		else
			blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
		blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
			blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
		blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);

		/* Set the NOWAIT flags if all underlying devices support it */
		if (nowait)
			blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
			blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
	}
	if (pers->sync_request) {
		if (mddev->kobj.sd &&
@@ -6406,8 +6447,10 @@ static void mddev_detach(struct mddev *mddev)
		mddev->pers->quiesce(mddev, 0);
	}
	md_unregister_thread(mddev, &mddev->thread);
	if (mddev->queue)
		blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/

	/* the unplug fn references 'conf' */
	if (!mddev_is_dm(mddev))
		blk_sync_queue(mddev->gendisk->queue);
}

static void __md_stop(struct mddev *mddev)
@@ -7125,7 +7168,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
	if (!bdev_nowait(rdev->bdev)) {
		pr_info("%s: Disabling nowait because %pg does not support nowait\n",
			mdname(mddev), rdev->bdev);
		blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->queue);
		blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
	}
	/*
	 * Kick recovery, maybe this spare has to be added to the
@@ -7362,11 +7405,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
	if (!rv) {
		if (mddev_is_clustered(mddev))
			md_cluster_ops->update_size(mddev, old_dev_sectors);
		else if (mddev->queue) {
		else if (!mddev_is_dm(mddev))
			set_capacity_and_notify(mddev->gendisk,
						mddev->array_sectors);
	}
	}
	return rv;
}

@@ -8686,10 +8728,7 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,

	bio_chain(discard_bio, bio);
	bio_clone_blkg_association(discard_bio, bio);
	if (mddev->gendisk)
		trace_block_bio_remap(discard_bio,
				disk_devt(mddev->gendisk),
				bio->bi_iter.bi_sector);
	mddev_trace_remap(mddev, discard_bio, bio->bi_iter.bi_sector);
	submit_bio_noacct(discard_bio);
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
@@ -9182,7 +9221,7 @@ void md_do_sync(struct md_thread *thread)
			mddev->delta_disks > 0 &&
			mddev->pers->finish_reshape &&
			mddev->pers->size &&
			mddev->queue) {
			!mddev_is_dm(mddev)) {
		mddev_lock_nointr(mddev);
		md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
		mddev_unlock(mddev);
+26 −2
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <trace/events/block.h>
#include "md-cluster.h"

#define MaxSector (~(sector_t)0)
@@ -479,7 +480,6 @@ struct mddev {
	struct timer_list		safemode_timer;
	struct percpu_ref		writes_pending;
	int				sync_checkers;	/* # of threads checking writes_pending */
	struct request_queue		*queue;	/* for plugging ... */

	struct bitmap			*bitmap; /* the bitmap for the device */
	struct {
@@ -868,7 +868,7 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
{
	if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
	    !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors)
		mddev->queue->limits.max_write_zeroes_sectors = 0;
		mddev->gendisk->queue->limits.max_write_zeroes_sectors = 0;
}

static inline int mddev_suspend_and_lock(struct mddev *mddev)
@@ -907,7 +907,31 @@ void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
int do_md_run(struct mddev *mddev);
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);

extern const struct block_device_operations md_fops;

/*
 * MD devices can be used undeneath by DM, in which case ->gendisk is NULL.
 */
static inline bool mddev_is_dm(struct mddev *mddev)
{
	return !mddev->gendisk;
}

static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
		sector_t sector)
{
	if (!mddev_is_dm(mddev))
		trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
}

#define mddev_add_trace_msg(mddev, fmt, args...)			\
do {									\
	if (!mddev_is_dm(mddev))					\
		blk_add_trace_msg((mddev)->gendisk->queue, fmt, ##args); \
} while (0)

#endif /* _MD_MD_H */
+22 −20
Original line number Diff line number Diff line
@@ -379,6 +379,19 @@ static void raid0_free(struct mddev *mddev, void *priv)
	free_conf(mddev, conf);
}

static int raid0_set_limits(struct mddev *mddev)
{
	struct queue_limits lim;

	blk_set_stacking_limits(&lim);
	lim.max_hw_sectors = mddev->chunk_sectors;
	lim.max_write_zeroes_sectors = mddev->chunk_sectors;
	lim.io_min = mddev->chunk_sectors << 9;
	lim.io_opt = lim.io_min * mddev->raid_disks;
	mddev_stack_rdev_limits(mddev, &lim);
	return queue_limits_set(mddev->gendisk->queue, &lim);
}

static int raid0_run(struct mddev *mddev)
{
	struct r0conf *conf;
@@ -399,20 +412,10 @@ static int raid0_run(struct mddev *mddev)
		mddev->private = conf;
	}
	conf = mddev->private;
	if (mddev->queue) {
		struct md_rdev *rdev;

		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);

		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
		blk_queue_io_opt(mddev->queue,
				 (mddev->chunk_sectors << 9) * mddev->raid_disks);

		rdev_for_each(rdev, mddev) {
			disk_stack_limits(mddev->gendisk, rdev->bdev,
					  rdev->data_offset << 9);
		}
	if (!mddev_is_dm(mddev)) {
		ret = raid0_set_limits(mddev);
		if (ret)
			goto out_free_conf;
	}

	/* calculate array device size */
@@ -426,8 +429,10 @@ static int raid0_run(struct mddev *mddev)

	ret = md_integrity_register(mddev);
	if (ret)
		goto out_free_conf;
	return 0;
out_free_conf:
	free_conf(mddev, conf);

	return ret;
}

@@ -578,10 +583,7 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
	bio_set_dev(bio, tmp_dev->bdev);
	bio->bi_iter.bi_sector = sector + zone->dev_start +
		tmp_dev->data_offset;

	if (mddev->gendisk)
		trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
				      bio_sector);
	mddev_trace_remap(mddev, bio, bio_sector);
	mddev_check_write_zeroes(mddev, bio);
	submit_bio_noacct(bio);
}
Loading