Commit c9b39e51 authored by Song Liu's avatar Song Liu
Browse files

Merge branch 'md-6.14-bitmap' into md-6.14

Move bitmap_{start, end}write calls to md layer. These changes help
address hangs in bitmap_startwrite([1],[2]).

[1] https://lore.kernel.org/all/CAJpMwyjmHQLvm6zg1cmQErttNNQPDAAXPKM3xgTjMhbfts986Q@mail.gmail.com/
[2] https://lore.kernel.org/all/ADF7D720-5764-4AF3-B68E-1845988737AA@flyingcircus.io/

* md-6.14-bitmap:
  md/md-bitmap: move bitmap_{start, end}write to md upper layer
  md/raid5: implement pers->bitmap_sector()
  md: add a new callback pers->bitmap_sector()
  md/md-bitmap: remove the last parameter for bimtap_ops->endwrite()
  md/md-bitmap: factor behind write counters out from bitmap_{start/end}write()
parents 4fa91616 cd5fc653
Loading
Loading
Loading
Loading
+45 −29
Original line number Diff line number Diff line
@@ -1671,24 +1671,13 @@ __acquires(bitmap->lock)
}

static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
			     unsigned long sectors, bool behind)
			     unsigned long sectors)
{
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap)
		return 0;

	if (behind) {
		int bw;
		atomic_inc(&bitmap->behind_writes);
		bw = atomic_read(&bitmap->behind_writes);
		if (bw > bitmap->behind_writes_used)
			bitmap->behind_writes_used = bw;

		pr_debug("inc write-behind count %d/%lu\n",
			 bw, bitmap->mddev->bitmap_info.max_write_behind);
	}

	while (sectors) {
		sector_t blocks;
		bitmap_counter_t *bmc;
@@ -1737,21 +1726,13 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
}

static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
			    unsigned long sectors, bool success, bool behind)
			    unsigned long sectors)
{
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap)
		return;

	if (behind) {
		if (atomic_dec_and_test(&bitmap->behind_writes))
			wake_up(&bitmap->behind_wait);
		pr_debug("dec write-behind count %d/%lu\n",
			 atomic_read(&bitmap->behind_writes),
			 bitmap->mddev->bitmap_info.max_write_behind);
	}

	while (sectors) {
		sector_t blocks;
		unsigned long flags;
@@ -1764,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
			return;
		}

		if (success && !bitmap->mddev->degraded &&
		    bitmap->events_cleared < bitmap->mddev->events) {
		if (!bitmap->mddev->degraded) {
			if (bitmap->events_cleared < bitmap->mddev->events) {
				bitmap->events_cleared = bitmap->mddev->events;
				bitmap->need_sync = 1;
			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
				sysfs_notify_dirent_safe(
						bitmap->sysfs_can_clear);
			}

		if (!success && !NEEDED(*bmc))
		} else if (!NEEDED(*bmc)) {
			*bmc |= NEEDED_MASK;
		}

		if (COUNTER(*bmc) == COUNTER_MAX)
			wake_up(&bitmap->overflow_wait);
@@ -2062,6 +2044,37 @@ static void md_bitmap_free(void *data)
	kfree(bitmap);
}

static void bitmap_start_behind_write(struct mddev *mddev)
{
	struct bitmap *bitmap = mddev->bitmap;
	int bw;

	if (!bitmap)
		return;

	atomic_inc(&bitmap->behind_writes);
	bw = atomic_read(&bitmap->behind_writes);
	if (bw > bitmap->behind_writes_used)
		bitmap->behind_writes_used = bw;

	pr_debug("inc write-behind count %d/%lu\n",
		 bw, bitmap->mddev->bitmap_info.max_write_behind);
}

static void bitmap_end_behind_write(struct mddev *mddev)
{
	struct bitmap *bitmap = mddev->bitmap;

	if (!bitmap)
		return;

	if (atomic_dec_and_test(&bitmap->behind_writes))
		wake_up(&bitmap->behind_wait);
	pr_debug("dec write-behind count %d/%lu\n",
		 atomic_read(&bitmap->behind_writes),
		 bitmap->mddev->bitmap_info.max_write_behind);
}

static void bitmap_wait_behind_writes(struct mddev *mddev)
{
	struct bitmap *bitmap = mddev->bitmap;
@@ -2981,6 +2994,9 @@ static struct bitmap_operations bitmap_ops = {
	.dirty_bits		= bitmap_dirty_bits,
	.unplug			= bitmap_unplug,
	.daemon_work		= bitmap_daemon_work,

	.start_behind_write	= bitmap_start_behind_write,
	.end_behind_write	= bitmap_end_behind_write,
	.wait_behind_writes	= bitmap_wait_behind_writes,

	.startwrite		= bitmap_startwrite,
+5 −2
Original line number Diff line number Diff line
@@ -84,12 +84,15 @@ struct bitmap_operations {
			   unsigned long e);
	void (*unplug)(struct mddev *mddev, bool sync);
	void (*daemon_work)(struct mddev *mddev);

	void (*start_behind_write)(struct mddev *mddev);
	void (*end_behind_write)(struct mddev *mddev);
	void (*wait_behind_writes)(struct mddev *mddev);

	int (*startwrite)(struct mddev *mddev, sector_t offset,
			  unsigned long sectors, bool behind);
			  unsigned long sectors);
	void (*endwrite)(struct mddev *mddev, sector_t offset,
			 unsigned long sectors, bool success, bool behind);
			 unsigned long sectors);
	bool (*start_sync)(struct mddev *mddev, sector_t offset,
			   sector_t *blocks, bool degraded);
	void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
+29 −0
Original line number Diff line number Diff line
@@ -8745,12 +8745,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);

static void md_bitmap_start(struct mddev *mddev,
			    struct md_io_clone *md_io_clone)
{
	if (mddev->pers->bitmap_sector)
		mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
					   &md_io_clone->sectors);

	mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
				      md_io_clone->sectors);
}

static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
{
	mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
				    md_io_clone->sectors);
}

static void md_end_clone_io(struct bio *bio)
{
	struct md_io_clone *md_io_clone = bio->bi_private;
	struct bio *orig_bio = md_io_clone->orig_bio;
	struct mddev *mddev = md_io_clone->mddev;

	if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
		md_bitmap_end(mddev, md_io_clone);

	if (bio->bi_status && !orig_bio->bi_status)
		orig_bio->bi_status = bio->bi_status;

@@ -8775,6 +8795,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
	if (blk_queue_io_stat(bdev->bd_disk->queue))
		md_io_clone->start_time = bio_start_io_acct(*bio);

	if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
		md_io_clone->offset = (*bio)->bi_iter.bi_sector;
		md_io_clone->sectors = bio_sectors(*bio);
		md_bitmap_start(mddev, md_io_clone);
	}

	clone->bi_end_io = md_end_clone_io;
	clone->bi_private = md_io_clone;
	*bio = clone;
@@ -8793,6 +8819,9 @@ void md_free_cloned_bio(struct bio *bio)
	struct bio *orig_bio = md_io_clone->orig_bio;
	struct mddev *mddev = md_io_clone->mddev;

	if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
		md_bitmap_end(mddev, md_io_clone);

	if (bio->bi_status && !orig_bio->bi_status)
		orig_bio->bi_status = bio->bi_status;

+5 −0
Original line number Diff line number Diff line
@@ -746,6 +746,9 @@ struct md_personality
	void *(*takeover) (struct mddev *mddev);
	/* Changes the consistency policy of an active array. */
	int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
	/* convert io ranges from array to bitmap */
	void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
			      unsigned long *sectors);
};

struct md_sysfs_entry {
@@ -828,6 +831,8 @@ struct md_io_clone {
	struct mddev	*mddev;
	struct bio	*orig_bio;
	unsigned long	start_time;
	sector_t	offset;
	unsigned long	sectors;
	struct bio	bio_clone;
};

+6 −28
Original line number Diff line number Diff line
@@ -420,10 +420,8 @@ static void close_write(struct r1bio *r1_bio)
		r1_bio->behind_master_bio = NULL;
	}

	/* clear the bitmap if all writes complete successfully */
	mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
				    !test_bit(R1BIO_Degraded, &r1_bio->state),
				    test_bit(R1BIO_BehindIO, &r1_bio->state));
	if (test_bit(R1BIO_BehindIO, &r1_bio->state))
		mddev->bitmap_ops->end_behind_write(mddev);
	md_write_end(mddev);
}

@@ -480,8 +478,6 @@ static void raid1_end_write_request(struct bio *bio)
		if (!test_bit(Faulty, &rdev->flags))
			set_bit(R1BIO_WriteError, &r1_bio->state);
		else {
			/* Fail the request */
			set_bit(R1BIO_Degraded, &r1_bio->state);
			/* Finished with this branch */
			r1_bio->bios[mirror] = NULL;
			to_put = bio;
@@ -1535,11 +1531,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
			write_behind = true;

		r1_bio->bios[i] = NULL;
		if (!rdev || test_bit(Faulty, &rdev->flags)) {
			if (i < conf->raid_disks)
				set_bit(R1BIO_Degraded, &r1_bio->state);
		if (!rdev || test_bit(Faulty, &rdev->flags))
			continue;
		}

		atomic_inc(&rdev->nr_pending);
		if (test_bit(WriteErrorSeen, &rdev->flags)) {
@@ -1558,16 +1551,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
					 */
					max_sectors = bad_sectors;
				rdev_dec_pending(rdev, mddev);
				/* We don't set R1BIO_Degraded as that
				 * only applies if the disk is
				 * missing, so it might be re-added,
				 * and we want to know to recover this
				 * chunk.
				 * In this case the device is here,
				 * and the fact that this chunk is not
				 * in-sync is recorded in the bad
				 * block log
				 */
				continue;
			}
			if (is_bad) {
@@ -1645,9 +1628,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
			    stats.behind_writes < max_write_behind)
				alloc_behind_master_bio(r1_bio, bio);

			mddev->bitmap_ops->startwrite(
				mddev, r1_bio->sector, r1_bio->sectors,
				test_bit(R1BIO_BehindIO, &r1_bio->state));
			if (test_bit(R1BIO_BehindIO, &r1_bio->state))
				mddev->bitmap_ops->start_behind_write(mddev);
			first_clone = 0;
		}

@@ -2614,12 +2596,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
			 * errors.
			 */
			fail = true;
			if (!narrow_write_error(r1_bio, m)) {
			if (!narrow_write_error(r1_bio, m))
				md_error(conf->mddev,
					 conf->mirrors[m].rdev);
				/* an I/O failed, we can't clear the bitmap */
				set_bit(R1BIO_Degraded, &r1_bio->state);
			}
			rdev_dec_pending(conf->mirrors[m].rdev,
					 conf->mddev);
		}
@@ -2710,8 +2690,6 @@ static void raid1d(struct md_thread *thread)
			list_del(&r1_bio->retry_list);
			idx = sector_to_idx(r1_bio->sector);
			atomic_dec(&conf->nr_queued[idx]);
			if (mddev->degraded)
				set_bit(R1BIO_Degraded, &r1_bio->state);
			if (test_bit(R1BIO_WriteError, &r1_bio->state))
				close_write(r1_bio);
			raid_end_bio_io(r1_bio);
Loading