Commit 0b66deb1 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-6.13-20241105' of...

Merge tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.13/block

Pull MD changes from Song:

"1. Enhance handling of faulty and blocked devices, by Yu Kuai.
 2. raid5-ppl atomic improvement, by Uros Bizjak.
 3. md-bitmap fix, by Yuan Can."

* tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/md-bitmap: Add missing destroy_work_on_stack()
  md/raid5: don't set Faulty rdev for blocked_rdev
  md/raid10: don't wait for Faulty rdev in wait_blocked_rdev()
  md/raid1: don't wait for Faulty rdev in wait_blocked_rdev()
  md/raid1: factor out helper to handle blocked rdev from raid1_write_request()
  md: don't record new badblocks for faulty rdev
  md: don't wait faulty rdev in md_wait_for_blocked_rdev()
  md: add a new helper rdev_blocked()
  md/raid5-ppl: Use atomic64_inc_return() in ppl_new_iounit()
parents 91ff97a7 6012169e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap)

	queue_work(md_bitmap_wq, &unplug_work.work);
	wait_for_completion(&done);
	destroy_work_on_stack(&unplug_work.work);
}

static void bitmap_unplug(struct mddev *mddev, bool sync)
+12 −3
Original line number Diff line number Diff line
@@ -9762,9 +9762,7 @@ EXPORT_SYMBOL(md_reap_sync_thread);
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
	sysfs_notify_dirent_safe(rdev->sysfs_state);
	wait_event_timeout(rdev->blocked_wait,
			   !test_bit(Blocked, &rdev->flags) &&
			   !test_bit(BlockedBadBlocks, &rdev->flags),
	wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev),
			   msecs_to_jiffies(5000));
	rdev_dec_pending(rdev, mddev);
}
@@ -9793,6 +9791,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
{
	struct mddev *mddev = rdev->mddev;
	int rv;

	/*
	 * Recording new badblocks for faulty rdev will force unnecessary
	 * super block updating. This is fragile for external management because
	 * userspace daemon may trying to remove this device and deadlock may
	 * occur. This will be probably solved in the mdadm, but it is safer to
	 * avoid it.
	 */
	if (test_bit(Faulty, &rdev->flags))
		return 1;

	if (is_new)
		s += rdev->new_data_offset;
	else
+24 −0
Original line number Diff line number Diff line
@@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
		trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
}

static inline bool rdev_blocked(struct md_rdev *rdev)
{
	/*
	 * Blocked will be set by error handler and cleared by daemon after
	 * updating superblock, meanwhile write IO should be blocked to prevent
	 * reading old data after power failure.
	 */
	if (test_bit(Blocked, &rdev->flags))
		return true;

	/*
	 * Faulty device should not be accessed anymore, there is no need to
	 * wait for bad block to be acknowledged.
	 */
	if (test_bit(Faulty, &rdev->flags))
		return false;

	/* rdev is blocked by badblocks. */
	if (test_bit(BlockedBadBlocks, &rdev->flags))
		return true;

	return false;
}

#define mddev_add_trace_msg(mddev, fmt, args...)			\
do {									\
	if (!mddev_is_dm(mddev))					\
+39 −36
Original line number Diff line number Diff line
@@ -1412,6 +1412,40 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
	submit_bio_noacct(read_bio);
}

static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
{
	struct r1conf *conf = mddev->private;
	int disks = conf->raid_disks * 2;
	int i;

retry:
	for (i = 0; i < disks; i++) {
		struct md_rdev *rdev = conf->mirrors[i].rdev;

		if (!rdev)
			continue;

		/* don't write here until the bad block is acknowledged */
		if (test_bit(WriteErrorSeen, &rdev->flags) &&
		    rdev_has_badblock(rdev, bio->bi_iter.bi_sector,
				      bio_sectors(bio)) < 0)
			set_bit(BlockedBadBlocks, &rdev->flags);

		if (rdev_blocked(rdev)) {
			if (bio->bi_opf & REQ_NOWAIT)
				return false;

			mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
					    rdev->raid_disk);
			atomic_inc(&rdev->nr_pending);
			md_wait_for_blocked_rdev(rdev, rdev->mddev);
			goto retry;
		}
	}

	return true;
}

static void raid1_write_request(struct mddev *mddev, struct bio *bio,
				int max_write_sectors)
{
@@ -1419,7 +1453,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
	struct r1bio *r1_bio;
	int i, disks;
	unsigned long flags;
	struct md_rdev *blocked_rdev;
	int first_clone;
	int max_sectors;
	bool write_behind = false;
@@ -1457,7 +1490,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
		return;
	}

 retry_write:
	if (!wait_blocked_rdev(mddev, bio)) {
		bio_wouldblock_error(bio);
		return;
	}

	r1_bio = alloc_r1bio(mddev, bio);
	r1_bio->sectors = max_write_sectors;

@@ -1473,7 +1510,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
	 */

	disks = conf->raid_disks * 2;
	blocked_rdev = NULL;
	max_sectors = r1_bio->sectors;
	for (i = 0;  i < disks; i++) {
		struct md_rdev *rdev = conf->mirrors[i].rdev;
@@ -1486,11 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
		if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
			write_behind = true;

		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
			atomic_inc(&rdev->nr_pending);
			blocked_rdev = rdev;
			break;
		}
		r1_bio->bios[i] = NULL;
		if (!rdev || test_bit(Faulty, &rdev->flags)) {
			if (i < conf->raid_disks)
@@ -1506,13 +1537,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,

			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
					     &first_bad, &bad_sectors);
			if (is_bad < 0) {
				/* mustn't write here until the bad block is
				 * acknowledged*/
				set_bit(BlockedBadBlocks, &rdev->flags);
				blocked_rdev = rdev;
				break;
			}
			if (is_bad && first_bad <= r1_bio->sector) {
				/* Cannot write here at all */
				bad_sectors -= (r1_bio->sector - first_bad);
@@ -1543,27 +1567,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
		r1_bio->bios[i] = bio;
	}

	if (unlikely(blocked_rdev)) {
		/* Wait for this device to become unblocked */
		int j;

		for (j = 0; j < i; j++)
			if (r1_bio->bios[j])
				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
		mempool_free(r1_bio, &conf->r1bio_pool);
		allow_barrier(conf, bio->bi_iter.bi_sector);

		if (bio->bi_opf & REQ_NOWAIT) {
			bio_wouldblock_error(bio);
			return;
		}
		mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
				blocked_rdev->raid_disk);
		md_wait_for_blocked_rdev(blocked_rdev, mddev);
		wait_barrier(conf, bio->bi_iter.bi_sector, false);
		goto retry_write;
	}

	/*
	 * When using a bitmap, we may call alloc_behind_master_bio below.
	 * alloc_behind_master_bio allocates a copy of the data payload a page
+18 −22
Original line number Diff line number Diff line
@@ -1285,9 +1285,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,

static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{
	int i;
	struct r10conf *conf = mddev->private;
	struct md_rdev *blocked_rdev;
	int i;

retry_wait:
	blocked_rdev = NULL;
@@ -1295,40 +1295,36 @@ static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
		struct md_rdev *rdev, *rrdev;

		rdev = conf->mirrors[i].rdev;
		rrdev = conf->mirrors[i].replacement;
		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
			atomic_inc(&rdev->nr_pending);
			blocked_rdev = rdev;
			break;
		}
		if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
			atomic_inc(&rrdev->nr_pending);
			blocked_rdev = rrdev;
			break;
		}

		if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
		if (rdev) {
			sector_t dev_sector = r10_bio->devs[i].addr;

			/*
			 * Discard request doesn't care the write result
			 * so it doesn't need to wait blocked disk here.
			 */
			if (!r10_bio->sectors)
				continue;

			if (rdev_has_badblock(rdev, dev_sector,
					      r10_bio->sectors) < 0) {
			if (test_bit(WriteErrorSeen, &rdev->flags) &&
			    r10_bio->sectors &&
			    rdev_has_badblock(rdev, dev_sector,
					      r10_bio->sectors) < 0)
				/*
				 * Mustn't write here until the bad block
				 * is acknowledged
				 * Mustn't write here until the bad
				 * block is acknowledged
				 */
				atomic_inc(&rdev->nr_pending);
				set_bit(BlockedBadBlocks, &rdev->flags);

			if (rdev_blocked(rdev)) {
				blocked_rdev = rdev;
				atomic_inc(&rdev->nr_pending);
				break;
			}
		}

		rrdev = conf->mirrors[i].replacement;
		if (rrdev && rdev_blocked(rrdev)) {
			atomic_inc(&rrdev->nr_pending);
			blocked_rdev = rrdev;
			break;
		}
	}

	if (unlikely(blocked_rdev)) {
Loading