Commit 03f7b57a authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-next-20230927' of...

Merge tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.7/block

Pull MD updates from Song:

"1. Make rdev add/remove independent from daemon thread, by Yu Kuai;
 2. Refactor code around quiesce() and mddev_suspend(), by Yu Kuai."

* tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: replace deprecated strncpy with memcpy
  md/md-linear: Annotate struct linear_conf with __counted_by
  md: don't check 'mddev->pers' and 'pers->quiesce' from suspend_lo_store()
  md: don't check 'mddev->pers' from suspend_hi_store()
  md-bitmap: suspend array earlier in location_store()
  md-bitmap: remove the checking of 'pers->quiesce' from location_store()
  md: don't rely on 'mddev->pers' to be set in mddev_suspend()
  md: initialize 'writes_pending' while allocating mddev
  md: initialize 'active_io' while allocating mddev
  md: delay remove_and_add_spares() for read only array to md_start_sync()
  md: factor out a helper rdev_addable() from remove_and_add_spares()
  md: factor out a helper rdev_is_spare() from remove_and_add_spares()
  md: factor out a helper rdev_removeable() from remove_and_add_spares()
  md: delay choosing sync action to md_start_sync()
  md: factor out a helper to choose sync action from md_check_recovery()
  md: use separate work_struct for md_start_sync()
parents aa511ff8 ceb04163
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -749,7 +749,11 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
		return ERR_PTR(-ENOMEM);
	}

	mddev_init(&rs->md);
	if (mddev_init(&rs->md)) {
		kfree(rs);
		ti->error = "Cannot initialize raid context";
		return ERR_PTR(-ENOMEM);
	}

	rs->raid_disks = raid_devs;
	rs->delta_disks = 0;
@@ -798,6 +802,7 @@ static void raid_set_free(struct raid_set *rs)
			dm_put_device(rs->ti, rs->dev[i].data_dev);
	}

	mddev_destroy(&rs->md);
	kfree(rs);
}

+20 −27
Original line number Diff line number Diff line
@@ -2351,11 +2351,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
	rv = mddev_lock(mddev);
	if (rv)
		return rv;

	mddev_suspend(mddev);
	if (mddev->pers) {
		if (!mddev->pers->quiesce) {
			rv = -EBUSY;
			goto out;
		}
		if (mddev->recovery || mddev->sync_thread) {
			rv = -EBUSY;
			goto out;
@@ -2369,11 +2367,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
			rv = -EBUSY;
			goto out;
		}
		if (mddev->pers) {
			mddev_suspend(mddev);

		md_bitmap_destroy(mddev);
			mddev_resume(mddev);
		}
		mddev->bitmap_info.offset = 0;
		if (mddev->bitmap_info.file) {
			struct file *f = mddev->bitmap_info.file;
@@ -2383,6 +2378,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
	} else {
		/* No bitmap, OK to set a location */
		long long offset;
		struct bitmap *bitmap;

		if (strncmp(buf, "none", 4) == 0)
			/* nothing to be done */;
		else if (strncmp(buf, "file:", 5) == 0) {
@@ -2406,26 +2403,21 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
				rv = -EINVAL;
				goto out;
			}

			mddev->bitmap_info.offset = offset;
			if (mddev->pers) {
				struct bitmap *bitmap;
			bitmap = md_bitmap_create(mddev, -1);
				mddev_suspend(mddev);
				if (IS_ERR(bitmap))
			if (IS_ERR(bitmap)) {
				rv = PTR_ERR(bitmap);
				else {
				goto out;
			}

			mddev->bitmap = bitmap;
			rv = md_bitmap_load(mddev);
					if (rv)
						mddev->bitmap_info.offset = 0;
				}
			if (rv) {
				mddev->bitmap_info.offset = 0;
				md_bitmap_destroy(mddev);
					mddev_resume(mddev);
				goto out;
			}
				mddev_resume(mddev);
			}
		}
	}
	if (!mddev->external) {
@@ -2437,6 +2429,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
	}
	rv = 0;
out:
	mddev_resume(mddev);
	mddev_unlock(mddev);
	if (rv)
		return rv;
+13 −13
Original line number Diff line number Diff line
@@ -69,6 +69,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
	if (!conf)
		return NULL;

	/*
	 * conf->raid_disks is copy of mddev->raid_disks. The reason to
	 * keep a copy of mddev->raid_disks in struct linear_conf is,
	 * mddev->raid_disks may not be consistent with pointers number of
	 * conf->disks[] when it is updated in linear_add() and used to
	 * iterate old conf->disks[] earray in linear_congested().
	 * Here conf->raid_disks is always consitent with number of
	 * pointers in conf->disks[] array, and mddev->private is updated
	 * with rcu_assign_pointer() in linear_addr(), such race can be
	 * avoided.
	 */
	conf->raid_disks = raid_disks;

	cnt = 0;
	conf->array_sectors = 0;

@@ -112,19 +125,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
			conf->disks[i-1].end_sector +
			conf->disks[i].rdev->sectors;

	/*
	 * conf->raid_disks is copy of mddev->raid_disks. The reason to
	 * keep a copy of mddev->raid_disks in struct linear_conf is,
	 * mddev->raid_disks may not be consistent with pointers number of
	 * conf->disks[] when it is updated in linear_add() and used to
	 * iterate old conf->disks[] earray in linear_congested().
	 * Here conf->raid_disks is always consitent with number of
	 * pointers in conf->disks[] array, and mddev->private is updated
	 * with rcu_assign_pointer() in linear_addr(), such race can be
	 * avoided.
	 */
	conf->raid_disks = raid_disks;

	return conf;

out:
+1 −1
Original line number Diff line number Diff line
@@ -12,6 +12,6 @@ struct linear_conf
	struct rcu_head		rcu;
	sector_t		array_sectors;
	int			raid_disks; /* a copy of mddev->raid_disks */
	struct dev_info		disks[];
	struct dev_info		disks[] __counted_by(raid_disks);
};
#endif
+266 −148
Original line number Diff line number Diff line
@@ -449,7 +449,7 @@ void mddev_suspend(struct mddev *mddev)
	set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
	percpu_ref_kill(&mddev->active_io);

	if (mddev->pers->prepare_suspend)
	if (mddev->pers && mddev->pers->prepare_suspend)
		mddev->pers->prepare_suspend(mddev);

	wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
@@ -631,16 +631,39 @@ void mddev_put(struct mddev *mddev)
		 * flush_workqueue() after mddev_find will succeed in waiting
		 * for the work to be done.
		 */
		INIT_WORK(&mddev->del_work, mddev_delayed_delete);
		queue_work(md_misc_wq, &mddev->del_work);
	}
	spin_unlock(&all_mddevs_lock);
}

static void md_safemode_timeout(struct timer_list *t);
static void md_start_sync(struct work_struct *ws);

void mddev_init(struct mddev *mddev)
static void active_io_release(struct percpu_ref *ref)
{
	struct mddev *mddev = container_of(ref, struct mddev, active_io);

	wake_up(&mddev->sb_wait);
}

static void no_op(struct percpu_ref *r) {}

int mddev_init(struct mddev *mddev)
{

	if (percpu_ref_init(&mddev->active_io, active_io_release,
			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
		return -ENOMEM;

	if (percpu_ref_init(&mddev->writes_pending, no_op,
			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
		percpu_ref_exit(&mddev->active_io);
		return -ENOMEM;
	}

	/* We want to start with the refcount at zero */
	percpu_ref_put(&mddev->writes_pending);

	mutex_init(&mddev->open_mutex);
	mutex_init(&mddev->reconfig_mutex);
	mutex_init(&mddev->sync_mutex);
@@ -662,9 +685,21 @@ void mddev_init(struct mddev *mddev)
	mddev->resync_min = 0;
	mddev->resync_max = MaxSector;
	mddev->level = LEVEL_NONE;

	INIT_WORK(&mddev->sync_work, md_start_sync);
	INIT_WORK(&mddev->del_work, mddev_delayed_delete);

	return 0;
}
EXPORT_SYMBOL_GPL(mddev_init);

void mddev_destroy(struct mddev *mddev)
{
	percpu_ref_exit(&mddev->active_io);
	percpu_ref_exit(&mddev->writes_pending);
}
EXPORT_SYMBOL_GPL(mddev_destroy);

static struct mddev *mddev_find_locked(dev_t unit)
{
	struct mddev *mddev;
@@ -708,13 +743,16 @@ static struct mddev *mddev_alloc(dev_t unit)
	new = kzalloc(sizeof(*new), GFP_KERNEL);
	if (!new)
		return ERR_PTR(-ENOMEM);
	mddev_init(new);

	error = mddev_init(new);
	if (error)
		goto out_free_new;

	spin_lock(&all_mddevs_lock);
	if (unit) {
		error = -EEXIST;
		if (mddev_find_locked(unit))
			goto out_free_new;
			goto out_destroy_new;
		new->unit = unit;
		if (MAJOR(unit) == MD_MAJOR)
			new->md_minor = MINOR(unit);
@@ -725,7 +763,7 @@ static struct mddev *mddev_alloc(dev_t unit)
		error = -ENODEV;
		new->unit = mddev_alloc_unit();
		if (!new->unit)
			goto out_free_new;
			goto out_destroy_new;
		new->md_minor = MINOR(new->unit);
		new->hold_active = UNTIL_STOP;
	}
@@ -733,8 +771,11 @@ static struct mddev *mddev_alloc(dev_t unit)
	list_add(&new->all_mddevs, &all_mddevs);
	spin_unlock(&all_mddevs_lock);
	return new;
out_free_new:

out_destroy_new:
	spin_unlock(&all_mddevs_lock);
	mddev_destroy(new);
out_free_new:
	kfree(new);
	return ERR_PTR(error);
}
@@ -745,6 +786,7 @@ static void mddev_free(struct mddev *mddev)
	list_del(&mddev->all_mddevs);
	spin_unlock(&all_mddevs_lock);

	mddev_destroy(mddev);
	kfree(mddev);
}

@@ -3879,7 +3921,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
		return rv;

	if (mddev->pers == NULL) {
		strncpy(mddev->clevel, buf, slen);
		memcpy(mddev->clevel, buf, slen);
		if (mddev->clevel[slen-1] == '\n')
			slen--;
		mddev->clevel[slen] = 0;
@@ -3912,7 +3954,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
	}

	/* Now find the new personality */
	strncpy(clevel, buf, slen);
	memcpy(clevel, buf, slen);
	if (clevel[slen-1] == '\n')
		slen--;
	clevel[slen] = 0;
@@ -4698,7 +4740,7 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len)
		size_t namelen = len-9;
		if (namelen >= sizeof(mddev->metadata_type))
			namelen = sizeof(mddev->metadata_type)-1;
		strncpy(mddev->metadata_type, buf+9, namelen);
		memcpy(mddev->metadata_type, buf+9, namelen);
		mddev->metadata_type[namelen] = 0;
		if (namelen && mddev->metadata_type[namelen-1] == '\n')
			mddev->metadata_type[--namelen] = 0;
@@ -4872,6 +4914,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
		/* A write to sync_action is enough to justify
		 * canceling read-auto mode
		 */
		flush_work(&mddev->sync_work);
		mddev->ro = MD_RDWR;
		md_wakeup_thread(mddev->sync_thread);
	}
@@ -5146,18 +5189,13 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
	err = mddev_lock(mddev);
	if (err)
		return err;
	err = -EINVAL;
	if (mddev->pers == NULL ||
	    mddev->pers->quiesce == NULL)
		goto unlock;

	mddev_suspend(mddev);
	mddev->suspend_lo = new;
	mddev_resume(mddev);

	err = 0;
unlock:
	mddev_unlock(mddev);
	return err ?: len;
	return len;
}
static struct md_sysfs_entry md_suspend_lo =
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -5183,18 +5221,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
	err = mddev_lock(mddev);
	if (err)
		return err;
	err = -EINVAL;
	if (mddev->pers == NULL)
		goto unlock;

	mddev_suspend(mddev);
	mddev->suspend_hi = new;
	mddev_resume(mddev);

	err = 0;
unlock:
	mddev_unlock(mddev);
	return err ?: len;
	return len;
}
static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -5597,21 +5630,6 @@ static void mddev_delayed_delete(struct work_struct *ws)
	kobject_put(&mddev->kobj);
}

static void no_op(struct percpu_ref *r) {}

int mddev_init_writes_pending(struct mddev *mddev)
{
	if (mddev->writes_pending.percpu_count_ptr)
		return 0;
	if (percpu_ref_init(&mddev->writes_pending, no_op,
			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
		return -ENOMEM;
	/* We want to start with the refcount at zero */
	percpu_ref_put(&mddev->writes_pending);
	return 0;
}
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);

struct mddev *md_alloc(dev_t dev, char *name)
{
	/*
@@ -5783,12 +5801,6 @@ static void md_safemode_timeout(struct timer_list *t)
}

static int start_dirty_degraded;
static void active_io_release(struct percpu_ref *ref)
{
	struct mddev *mddev = container_of(ref, struct mddev, active_io);

	wake_up(&mddev->sb_wait);
}

int md_run(struct mddev *mddev)
{
@@ -5869,15 +5881,10 @@ int md_run(struct mddev *mddev)
		nowait = nowait && bdev_nowait(rdev->bdev);
	}

	err = percpu_ref_init(&mddev->active_io, active_io_release,
				PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
	if (err)
		return err;

	if (!bioset_initialized(&mddev->bio_set)) {
		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
		if (err)
			goto exit_active_io;
			return err;
	}
	if (!bioset_initialized(&mddev->sync_set)) {
		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
@@ -6074,8 +6081,6 @@ int md_run(struct mddev *mddev)
	bioset_exit(&mddev->sync_set);
exit_bio_set:
	bioset_exit(&mddev->bio_set);
exit_active_io:
	percpu_ref_exit(&mddev->active_io);
	return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -6291,7 +6296,6 @@ static void __md_stop(struct mddev *mddev)
	module_put(pers->owner);
	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);

	percpu_ref_exit(&mddev->active_io);
	bioset_exit(&mddev->bio_set);
	bioset_exit(&mddev->sync_set);
	bioset_exit(&mddev->io_clone_set);
@@ -6306,7 +6310,6 @@ void md_stop(struct mddev *mddev)
	 */
	__md_stop_writes(mddev);
	__md_stop(mddev);
	percpu_ref_exit(&mddev->writes_pending);
}

EXPORT_SYMBOL_GPL(md_stop);
@@ -7646,6 +7649,10 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
		mutex_unlock(&mddev->open_mutex);
		sync_blockdev(bdev);
	}

	if (!md_is_rdwr(mddev))
		flush_work(&mddev->sync_work);

	err = mddev_lock(mddev);
	if (err) {
		pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
@@ -7886,7 +7893,6 @@ static void md_free_disk(struct gendisk *disk)
{
	struct mddev *mddev = disk->private_data;

	percpu_ref_exit(&mddev->writes_pending);
	mddev_free(mddev);
}

@@ -8570,6 +8576,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
	BUG_ON(mddev->ro == MD_RDONLY);
	if (mddev->ro == MD_AUTO_READ) {
		/* need to switch to read/write */
		flush_work(&mddev->sync_work);
		mddev->ro = MD_RDWR;
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
@@ -9161,6 +9168,85 @@ void md_do_sync(struct md_thread *thread)
}
EXPORT_SYMBOL_GPL(md_do_sync);

static bool rdev_removeable(struct md_rdev *rdev)
{
	/* rdev is not used. */
	if (rdev->raid_disk < 0)
		return false;

	/* There are still inflight io, don't remove this rdev. */
	if (atomic_read(&rdev->nr_pending))
		return false;

	/*
	 * An error occurred but has not yet been acknowledged by the metadata
	 * handler, don't remove this rdev.
	 */
	if (test_bit(Blocked, &rdev->flags))
		return false;

	/* Fautly rdev is not used, it's safe to remove it. */
	if (test_bit(Faulty, &rdev->flags))
		return true;

	/* Journal disk can only be removed if it's faulty. */
	if (test_bit(Journal, &rdev->flags))
		return false;

	/*
	 * 'In_sync' is cleared while 'raid_disk' is valid, which means
	 * replacement has just become active from pers->spare_active(), and
	 * then pers->hot_remove_disk() will replace this rdev with replacement.
	 */
	if (!test_bit(In_sync, &rdev->flags))
		return true;

	return false;
}

static bool rdev_is_spare(struct md_rdev *rdev)
{
	return !test_bit(Candidate, &rdev->flags) && rdev->raid_disk >= 0 &&
	       !test_bit(In_sync, &rdev->flags) &&
	       !test_bit(Journal, &rdev->flags) &&
	       !test_bit(Faulty, &rdev->flags);
}

static bool rdev_addable(struct md_rdev *rdev)
{
	/* rdev is already used, don't add it again. */
	if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 ||
	    test_bit(Faulty, &rdev->flags))
		return false;

	/* Allow to add journal disk. */
	if (test_bit(Journal, &rdev->flags))
		return true;

	/* Allow to add if array is read-write. */
	if (md_is_rdwr(rdev->mddev))
		return true;

	/*
	 * For read-only array, only allow to readd a rdev. And if bitmap is
	 * used, don't allow to readd a rdev that is too old.
	 */
	if (rdev->saved_raid_disk >= 0 && !test_bit(Bitmap_sync, &rdev->flags))
		return true;

	return false;
}

static bool md_spares_need_change(struct mddev *mddev)
{
	struct md_rdev *rdev;

	rdev_for_each(rdev, mddev)
		if (rdev_removeable(rdev) || rdev_addable(rdev))
			return true;
	return false;
}

static int remove_and_add_spares(struct mddev *mddev,
				 struct md_rdev *this)
{
@@ -9193,12 +9279,8 @@ static int remove_and_add_spares(struct mddev *mddev,
		synchronize_rcu();
	rdev_for_each(rdev, mddev) {
		if ((this == NULL || rdev == this) &&
		    rdev->raid_disk >= 0 &&
		    !test_bit(Blocked, &rdev->flags) &&
		    ((test_bit(RemoveSynchronized, &rdev->flags) ||
		     (!test_bit(In_sync, &rdev->flags) &&
		      !test_bit(Journal, &rdev->flags))) &&
		    atomic_read(&rdev->nr_pending)==0)) {
		    (test_bit(RemoveSynchronized, &rdev->flags) ||
		     rdev_removeable(rdev))) {
			if (mddev->pers->hot_remove_disk(
				    mddev, rdev) == 0) {
				sysfs_unlink_rdev(mddev, rdev);
@@ -9220,25 +9302,12 @@ static int remove_and_add_spares(struct mddev *mddev,
	rdev_for_each(rdev, mddev) {
		if (this && this != rdev)
			continue;
		if (test_bit(Candidate, &rdev->flags))
			continue;
		if (rdev->raid_disk >= 0 &&
		    !test_bit(In_sync, &rdev->flags) &&
		    !test_bit(Journal, &rdev->flags) &&
		    !test_bit(Faulty, &rdev->flags))
		if (rdev_is_spare(rdev))
			spares++;
		if (rdev->raid_disk >= 0)
			continue;
		if (test_bit(Faulty, &rdev->flags))
			continue;
		if (!test_bit(Journal, &rdev->flags)) {
			if (!md_is_rdwr(mddev) &&
			    !(rdev->saved_raid_disk >= 0 &&
			      !test_bit(Bitmap_sync, &rdev->flags)))
		if (!rdev_addable(rdev))
			continue;

		if (!test_bit(Journal, &rdev->flags))
			rdev->recovery_offset = 0;
		}
		if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
			/* failure here is OK */
			sysfs_link_rdev(mddev, rdev);
@@ -9254,9 +9323,81 @@ static int remove_and_add_spares(struct mddev *mddev,
	return spares;
}

static bool md_choose_sync_action(struct mddev *mddev, int *spares)
{
	/* Check if reshape is in progress first. */
	if (mddev->reshape_position != MaxSector) {
		if (mddev->pers->check_reshape == NULL ||
		    mddev->pers->check_reshape(mddev) != 0)
			return false;

		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		return true;
	}

	/*
	 * Remove any failed drives, then add spares if possible. Spares are
	 * also removed and re-added, to allow the personality to fail the
	 * re-add.
	 */
	*spares = remove_and_add_spares(mddev, NULL);
	if (*spares) {
		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
		clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);

		/* Start new recovery. */
		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		return true;
	}

	/* Check if recovery is in progress. */
	if (mddev->recovery_cp < MaxSector) {
		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		return true;
	}

	/* Delay to choose resync/check/repair in md_do_sync(). */
	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
		return true;

	/* Nothing to be done */
	return false;
}

static void md_start_sync(struct work_struct *ws)
{
	struct mddev *mddev = container_of(ws, struct mddev, del_work);
	struct mddev *mddev = container_of(ws, struct mddev, sync_work);
	int spares = 0;

	mddev_lock_nointr(mddev);

	if (!md_is_rdwr(mddev)) {
		/*
		 * On a read-only array we can:
		 * - remove failed devices
		 * - add already-in_sync devices if the array itself is in-sync.
		 * As we only add devices that are already in-sync, we can
		 * activate the spares immediately.
		 */
		remove_and_add_spares(mddev, NULL);
		goto not_running;
	}

	if (!md_choose_sync_action(mddev, &spares))
		goto not_running;

	if (!mddev->pers->sync_request)
		goto not_running;

	/*
	 * We are adding a device or devices to an array which has the bitmap
	 * stored on all devices. So make sure all bitmap pages get written.
	 */
	if (spares)
		md_bitmap_write_all(mddev->bitmap);

	rcu_assign_pointer(mddev->sync_thread,
			   md_register_thread(md_do_sync, mddev, "resync"));
@@ -9264,20 +9405,27 @@ static void md_start_sync(struct work_struct *ws)
		pr_warn("%s: could not start resync thread...\n",
			mdname(mddev));
		/* leave the spares where they are, it shouldn't hurt */
		goto not_running;
	}

	mddev_unlock(mddev);
	md_wakeup_thread(mddev->sync_thread);
	sysfs_notify_dirent_safe(mddev->sysfs_action);
	md_new_event();
	return;

not_running:
	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
	mddev_unlock(mddev);

	wake_up(&resync_wait);
		if (test_and_clear_bit(MD_RECOVERY_RECOVER,
				       &mddev->recovery))
			if (mddev->sysfs_action)
				sysfs_notify_dirent_safe(mddev->sysfs_action);
	} else
		md_wakeup_thread(mddev->sync_thread);
	if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
	    mddev->sysfs_action)
		sysfs_notify_dirent_safe(mddev->sysfs_action);
	md_new_event();
}

/*
@@ -9345,7 +9493,6 @@ void md_check_recovery(struct mddev *mddev)
		return;

	if (mddev_trylock(mddev)) {
		int spares = 0;
		bool try_set_sync = mddev->safemode != 0;

		if (!mddev->external && mddev->safemode == 1)
@@ -9353,30 +9500,43 @@ void md_check_recovery(struct mddev *mddev)

		if (!md_is_rdwr(mddev)) {
			struct md_rdev *rdev;

			if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
				/* sync_work already queued. */
				clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
				goto unlock;
			}

			if (!mddev->external && mddev->in_sync)
				/* 'Blocked' flag not needed as failed devices
				/*
				 * 'Blocked' flag not needed as failed devices
				 * will be recorded if array switched to read/write.
				 * Leaving it set will prevent the device
				 * from being removed.
				 */
				rdev_for_each(rdev, mddev)
					clear_bit(Blocked, &rdev->flags);
			/* On a read-only array we can:
			 * - remove failed devices
			 * - add already-in_sync devices if the array itself
			 *   is in-sync.
			 * As we only add devices that are already in-sync,
			 * we can activate the spares immediately.
			 */
			remove_and_add_spares(mddev, NULL);
			/* There is no thread, but we need to call

			/*
			 * There is no thread, but we need to call
			 * ->spare_active and clear saved_raid_disk
			 */
			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			md_reap_sync_thread(mddev);

			/*
			 * Let md_start_sync() to remove and add rdevs to the
			 * array.
			 */
			if (md_spares_need_change(mddev)) {
				set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
				queue_work(md_misc_wq, &mddev->sync_work);
			}

			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);

			goto unlock;
		}

@@ -9432,56 +9592,14 @@ void md_check_recovery(struct mddev *mddev)
		clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
		clear_bit(MD_RECOVERY_DONE, &mddev->recovery);

		if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
		    test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
			goto not_running;
		/* no recovery is running.
		 * remove any failed drives, then
		 * add spares if possible.
		 * Spares are also removed and re-added, to allow
		 * the personality to fail the re-add.
		 */

		if (mddev->reshape_position != MaxSector) {
			if (mddev->pers->check_reshape == NULL ||
			    mddev->pers->check_reshape(mddev) != 0)
				/* Cannot proceed */
				goto not_running;
			set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		} else if ((spares = remove_and_add_spares(mddev, NULL))) {
			clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
			clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
			clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		} else if (mddev->recovery_cp < MaxSector) {
			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
			/* nothing to be done ... */
			goto not_running;

		if (mddev->pers->sync_request) {
			if (spares) {
				/* We are adding a device or devices to an array
				 * which has the bitmap stored on all devices.
				 * So make sure all bitmap pages get written
				 */
				md_bitmap_write_all(mddev->bitmap);
			}
			INIT_WORK(&mddev->del_work, md_start_sync);
			queue_work(md_misc_wq, &mddev->del_work);
			goto unlock;
		}
	not_running:
		if (!mddev->sync_thread) {
		if (test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
		    !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
			queue_work(md_misc_wq, &mddev->sync_work);
		} else {
			clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
			wake_up(&resync_wait);
			if (test_and_clear_bit(MD_RECOVERY_RECOVER,
					       &mddev->recovery))
				if (mddev->sysfs_action)
					sysfs_notify_dirent_safe(mddev->sysfs_action);
		}

	unlock:
		wake_up(&mddev->sb_wait);
		mddev_unlock(mddev);
Loading