Commit 254c271d authored by Xiao Ni's avatar Xiao Ni Committed by Song Liu
Browse files

md/raid10: improve discard request for far layout



For far layout, the discard region is not continuous on disks. So it needs
far copies r10bio to cover all regions. It needs a way to know all r10bios
have finish or not. Similar with raid10_sync_request, only the first r10bio
master_bio records the discard bio. Other r10bios master_bio record the
first r10bio. The first r10bio can finish after other r10bios finish and
then return the discard bio.

Tested-by: default avatarAdrian Huang <ahuang12@lenovo.com>
Signed-off-by: default avatarXiao Ni <xni@redhat.com>
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
parent d30588b2
Loading
Loading
Loading
Loading
+60 −19
Original line number Diff line number Diff line
@@ -1518,6 +1518,28 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
		raid10_write_request(mddev, bio, r10_bio);
}

static void raid_end_discard_bio(struct r10bio *r10bio)
{
	struct r10conf *conf = r10bio->mddev->private;
	struct r10bio *first_r10bio;

	while (atomic_dec_and_test(&r10bio->remaining)) {

		allow_barrier(conf);

		if (!test_bit(R10BIO_Discard, &r10bio->state)) {
			first_r10bio = (struct r10bio *)r10bio->master_bio;
			free_r10bio(r10bio);
			r10bio = first_r10bio;
		} else {
			md_write_end(r10bio->mddev);
			bio_endio(r10bio->master_bio);
			free_r10bio(r10bio);
			break;
		}
	}
}

static void raid10_end_discard_request(struct bio *bio)
{
	struct r10bio *r10_bio = bio->bi_private;
@@ -1545,11 +1567,7 @@ static void raid10_end_discard_request(struct bio *bio)
		rdev = conf->mirrors[dev].rdev;
	}

	if (atomic_dec_and_test(&r10_bio->remaining)) {
		md_write_end(r10_bio->mddev);
		raid_end_bio_io(r10_bio);
	}

	raid_end_discard_bio(r10_bio);
	rdev_dec_pending(rdev, conf->mddev);
}

@@ -1563,7 +1581,9 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
{
	struct r10conf *conf = mddev->private;
	struct geom *geo = &conf->geo;
	struct r10bio *r10_bio;
	int far_copies = geo->far_copies;
	bool first_copy = true;
	struct r10bio *r10_bio, *first_r10bio;
	struct bio *split;
	int disk;
	sector_t chunk;
@@ -1637,16 +1657,6 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
		wait_barrier(conf);
	}

	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
	r10_bio->mddev = mddev;
	r10_bio->state = 0;
	r10_bio->sectors = 0;
	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);

	wait_blocked_dev(mddev, r10_bio);

	r10_bio->master_bio = bio;

	bio_start = bio->bi_iter.bi_sector;
	bio_end = bio_end_sector(bio);

@@ -1673,6 +1683,29 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
	end_disk_offset = (bio_end & geo->chunk_mask) +
				(last_stripe_index << geo->chunk_shift);

retry_discard:
	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
	r10_bio->mddev = mddev;
	r10_bio->state = 0;
	r10_bio->sectors = 0;
	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
	wait_blocked_dev(mddev, r10_bio);

	/*
	 * For far layout it needs more than one r10bio to cover all regions.
	 * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
	 * to record the discard bio. Other r10bio->master_bio record the first
	 * r10bio. The first r10bio only release after all other r10bios finish.
	 * The discard bio returns only first r10bio finishes
	 */
	if (first_copy) {
		r10_bio->master_bio = bio;
		set_bit(R10BIO_Discard, &r10_bio->state);
		first_copy = false;
		first_r10bio = r10_bio;
	} else
		r10_bio->master_bio = (struct bio *)first_r10bio;

	rcu_read_lock();
	for (disk = 0; disk < geo->raid_disks; disk++) {
		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
@@ -1764,11 +1797,19 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
		}
	}

	if (atomic_dec_and_test(&r10_bio->remaining)) {
		md_write_end(r10_bio->mddev);
		raid_end_bio_io(r10_bio);
	if (!geo->far_offset && --far_copies) {
		first_stripe_index += geo->stride >> geo->chunk_shift;
		start_disk_offset += geo->stride;
		last_stripe_index += geo->stride >> geo->chunk_shift;
		end_disk_offset += geo->stride;
		atomic_inc(&first_r10bio->remaining);
		raid_end_discard_bio(r10_bio);
		wait_barrier(conf);
		goto retry_discard;
	}

	raid_end_discard_bio(r10_bio);

	return 0;
out:
	allow_barrier(conf);
+1 −0
Original line number Diff line number Diff line
@@ -179,5 +179,6 @@ enum r10bio_state {
	R10BIO_Previous,
/* failfast devices did receive failfast requests. */
	R10BIO_FailFast,
	R10BIO_Discard,
};
#endif