Commit c594de04 authored by Yu Kuai's avatar Yu Kuai Committed by Yu Kuai
Browse files

md: don't export md_cluster_ops

Add a new field 'cluster_ops' and initialize it md_setup_cluster(), so
that the gloable variable 'md_cluter_ops' doesn't need to be exported.
Also prepare to switch md-cluster to use md_submod_head.

Link: https://lore.kernel.org/linux-raid/20250215092225.2427977-7-yukuai1@huaweicloud.com


Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarSu Yue <glass.su@suse.com>
parent ff84e1b1
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -944,7 +944,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
				bmname(bitmap), err);
			goto out_no_sb;
		}
		bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
		bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
		goto re_read;
	}

@@ -2023,7 +2023,7 @@ static void md_bitmap_free(void *data)
		sysfs_put(bitmap->sysfs_can_clear);

	if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
		bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
		bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
		md_cluster_stop(bitmap->mddev);

	/* Shouldn't be needed - but just in case.... */
@@ -2231,7 +2231,7 @@ static int bitmap_load(struct mddev *mddev)
		mddev_create_serial_pool(mddev, rdev);

	if (mddev_is_clustered(mddev))
		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
		mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);

	/* Clear out old bitmap info first:  Either there is none, or we
	 * are resuming after someone else has possibly changed things,
+55 −39
Original line number Diff line number Diff line
@@ -85,8 +85,7 @@ static DEFINE_SPINLOCK(pers_lock);

static const struct kobj_type md_ktype;

const struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
static const struct md_cluster_operations *md_cluster_ops;
static struct module *md_cluster_mod;

static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
@@ -2663,11 +2662,11 @@ void md_update_sb(struct mddev *mddev, int force_change)
			force_change = 1;
		if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
			nospares = 1;
		ret = md_cluster_ops->metadata_update_start(mddev);
		ret = mddev->cluster_ops->metadata_update_start(mddev);
		/* Has someone else has updated the sb */
		if (!does_sb_need_changing(mddev)) {
			if (ret == 0)
				md_cluster_ops->metadata_update_cancel(mddev);
				mddev->cluster_ops->metadata_update_cancel(mddev);
			bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
							 BIT(MD_SB_CHANGE_DEVS) |
							 BIT(MD_SB_CHANGE_CLEAN));
@@ -2807,7 +2806,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
	/* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */

	if (mddev_is_clustered(mddev) && ret == 0)
		md_cluster_ops->metadata_update_finish(mddev);
		mddev->cluster_ops->metadata_update_finish(mddev);

	if (mddev->in_sync != sync_req ||
	    !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
@@ -2966,7 +2965,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
		else {
			err = 0;
			if (mddev_is_clustered(mddev))
				err = md_cluster_ops->remove_disk(mddev, rdev);
				err = mddev->cluster_ops->remove_disk(mddev, rdev);

			if (err == 0) {
				md_kick_rdev_from_array(rdev);
@@ -3076,7 +3075,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
			 * by this node eventually
			 */
			if (!mddev_is_clustered(rdev->mddev) ||
			    (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
			    (err = mddev->cluster_ops->gather_bitmaps(rdev)) == 0) {
				clear_bit(Faulty, &rdev->flags);
				err = add_bound_rdev(rdev);
			}
@@ -6994,7 +6993,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
				set_bit(Candidate, &rdev->flags);
			else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
				/* --add initiated by this node */
				err = md_cluster_ops->add_new_disk(mddev, rdev);
				err = mddev->cluster_ops->add_new_disk(mddev, rdev);
				if (err) {
					export_rdev(rdev, mddev);
					return err;
@@ -7011,14 +7010,14 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
		if (mddev_is_clustered(mddev)) {
			if (info->state & (1 << MD_DISK_CANDIDATE)) {
				if (!err) {
					err = md_cluster_ops->new_disk_ack(mddev,
						err == 0);
					err = mddev->cluster_ops->new_disk_ack(
							mddev, err == 0);
					if (err)
						md_kick_rdev_from_array(rdev);
				}
			} else {
				if (err)
					md_cluster_ops->add_new_disk_cancel(mddev);
					mddev->cluster_ops->add_new_disk_cancel(mddev);
				else
					err = add_bound_rdev(rdev);
			}
@@ -7098,10 +7097,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
		goto busy;

kick_rdev:
	if (mddev_is_clustered(mddev)) {
		if (md_cluster_ops->remove_disk(mddev, rdev))
	if (mddev_is_clustered(mddev) &&
	    mddev->cluster_ops->remove_disk(mddev, rdev))
		goto busy;
	}

	md_kick_rdev_from_array(rdev);
	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -7404,7 +7402,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
	rv = mddev->pers->resize(mddev, num_sectors);
	if (!rv) {
		if (mddev_is_clustered(mddev))
			md_cluster_ops->update_size(mddev, old_dev_sectors);
			mddev->cluster_ops->update_size(mddev, old_dev_sectors);
		else if (!mddev_is_dm(mddev))
			set_capacity_and_notify(mddev->gendisk,
						mddev->array_sectors);
@@ -7452,6 +7450,27 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
	return rv;
}

static int get_cluster_ops(struct mddev *mddev)
{
	spin_lock(&pers_lock);
	mddev->cluster_ops = md_cluster_ops;
	if (mddev->cluster_ops && !try_module_get(md_cluster_mod))
		mddev->cluster_ops = NULL;
	spin_unlock(&pers_lock);

	return mddev->cluster_ops == NULL ? -ENOENT : 0;
}

static void put_cluster_ops(struct mddev *mddev)
{
	if (!mddev->cluster_ops)
		return;

	mddev->cluster_ops->leave(mddev);
	module_put(md_cluster_mod);
	mddev->cluster_ops = NULL;
}

/*
 * update_array_info is used to change the configuration of an
 * on-line array.
@@ -7560,16 +7579,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)

			if (mddev->bitmap_info.nodes) {
				/* hold PW on all the bitmap lock */
				if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
				if (mddev->cluster_ops->lock_all_bitmaps(mddev) <= 0) {
					pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
					rv = -EPERM;
					md_cluster_ops->unlock_all_bitmaps(mddev);
					mddev->cluster_ops->unlock_all_bitmaps(mddev);
					goto err;
				}

				mddev->bitmap_info.nodes = 0;
				md_cluster_ops->leave(mddev);
				module_put(md_cluster_mod);
				put_cluster_ops(mddev);
				mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
			}
			mddev->bitmap_ops->destroy(mddev);
@@ -7853,7 +7871,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,

	case CLUSTERED_DISK_NACK:
		if (mddev_is_clustered(mddev))
			md_cluster_ops->new_disk_ack(mddev, false);
			mddev->cluster_ops->new_disk_ack(mddev, false);
		else
			err = -EINVAL;
		goto unlock;
@@ -8568,19 +8586,20 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);

int md_setup_cluster(struct mddev *mddev, int nodes)
{
	int ret;
	if (!md_cluster_ops)
	int ret = get_cluster_ops(mddev);

	if (ret) {
		request_module("md-cluster");
	spin_lock(&pers_lock);
		ret = get_cluster_ops(mddev);
	}

	/* ensure module won't be unloaded */
	if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
	if (ret) {
		pr_warn("can't find md-cluster module or get its reference.\n");
		spin_unlock(&pers_lock);
		return -ENOENT;
		return ret;
	}
	spin_unlock(&pers_lock);

	ret = md_cluster_ops->join(mddev, nodes);
	ret = mddev->cluster_ops->join(mddev, nodes);
	if (!ret)
		mddev->safemode_delay = 0;
	return ret;
@@ -8588,10 +8607,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)

void md_cluster_stop(struct mddev *mddev)
{
	if (!md_cluster_ops)
		return;
	md_cluster_ops->leave(mddev);
	module_put(md_cluster_mod);
	put_cluster_ops(mddev);
}

static int is_mddev_idle(struct mddev *mddev, int init)
@@ -8984,7 +9000,7 @@ void md_do_sync(struct md_thread *thread)
	}

	if (mddev_is_clustered(mddev)) {
		ret = md_cluster_ops->resync_start(mddev);
		ret = mddev->cluster_ops->resync_start(mddev);
		if (ret)
			goto skip;

@@ -9011,7 +9027,7 @@ void md_do_sync(struct md_thread *thread)
	 *
	 */
	if (mddev_is_clustered(mddev))
		md_cluster_ops->resync_start_notify(mddev);
		mddev->cluster_ops->resync_start_notify(mddev);
	do {
		int mddev2_minor = -1;
		mddev->curr_resync = MD_RESYNC_DELAYED;
@@ -9795,7 +9811,7 @@ void md_reap_sync_thread(struct mddev *mddev)
	 * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
	 * clustered raid */
	if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
		md_cluster_ops->resync_finish(mddev);
		mddev->cluster_ops->resync_finish(mddev);
	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
	clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -9803,13 +9819,13 @@ void md_reap_sync_thread(struct mddev *mddev)
	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
	/*
	 * We call md_cluster_ops->update_size here because sync_size could
	 * We call mddev->cluster_ops->update_size here because sync_size could
	 * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
	 * so it is time to update size across cluster.
	 */
	if (mddev_is_clustered(mddev) && is_reshaped
				      && !test_bit(MD_CLOSING, &mddev->flags))
		md_cluster_ops->update_size(mddev, old_dev_sectors);
		mddev->cluster_ops->update_size(mddev, old_dev_sectors);
	/* flag recovery needed just to double check */
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	sysfs_notify_dirent_safe(mddev->sysfs_completed);
@@ -10035,7 +10051,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
			if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
			    !(le32_to_cpu(sb->feature_map) &
			      MD_FEATURE_RESHAPE_ACTIVE) &&
			    !md_cluster_ops->resync_status_get(mddev)) {
			    !mddev->cluster_ops->resync_status_get(mddev)) {
				/*
				 * -1 to make raid1_add_disk() set conf->fullsync
				 * to 1. This could avoid skipping sync when the
+2 −1
Original line number Diff line number Diff line
@@ -320,6 +320,7 @@ extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
				int is_new);
struct md_cluster_info;
struct md_cluster_operations;

/**
 * enum mddev_flags - md device flags.
@@ -602,6 +603,7 @@ struct mddev {
	mempool_t *serial_info_pool;
	void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
	struct md_cluster_info		*cluster_info;
	const struct md_cluster_operations *cluster_ops;
	unsigned int			good_device_nr;	/* good device num within cluster raid */
	unsigned int			noio_flag; /* for memalloc scope API */

@@ -947,7 +949,6 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
	}
}

extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
	return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
+2 −2
Original line number Diff line number Diff line
@@ -287,7 +287,7 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
		return true;

	if (mddev_is_clustered(mddev) &&
	    md_cluster_ops->area_resyncing(mddev, READ, this_sector,
	    mddev->cluster_ops->area_resyncing(mddev, READ, this_sector,
					       this_sector + len))
		return true;

+5 −5
Original line number Diff line number Diff line
@@ -1469,7 +1469,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
	bool is_discard = (bio_op(bio) == REQ_OP_DISCARD);

	if (mddev_is_clustered(mddev) &&
	     md_cluster_ops->area_resyncing(mddev, WRITE,
	    mddev->cluster_ops->area_resyncing(mddev, WRITE,
		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {

		DEFINE_WAIT(w);
@@ -1480,7 +1480,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
		for (;;) {
			prepare_to_wait(&conf->wait_barrier,
					&w, TASK_IDLE);
			if (!md_cluster_ops->area_resyncing(mddev, WRITE,
			if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
							bio->bi_iter.bi_sector,
							bio_end_sector(bio)))
				break;
@@ -3040,7 +3040,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
		conf->cluster_sync_low = mddev->curr_resync_completed;
		conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
		/* Send resync message */
		md_cluster_ops->resync_info_update(mddev,
		mddev->cluster_ops->resync_info_update(mddev,
						       conf->cluster_sync_low,
						       conf->cluster_sync_high);
	}
Loading