Commit 984ff00c authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-6.17-20250803' of...

Merge tag 'md-6.17-20250803' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux into block-6.17

Pull MD changes from Yu:

"- mddev null-ptr-dereference fix, by Erkun
 - md-cluster fail to remove the faulty disk regression fix, by Heming
 - minor cleanup, by Li Nan and Jinchao
 - mdadm lifetime regression fix reported by syzkaller, by Yu Kuai"

* tag 'md-6.17-20250803' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux:
  md: make rdev_addable usable for rcu mode
  md/raid1: remove struct pool_info and related code
  md/raid1: change r1conf->r1bio_pool to a pointer type
  md: rename recovery_cp to resync_offset
  md/md-cluster: handle REMOVE message earlier
  md: fix create on open mddev lifetime regression
parents fad6551f 13017b42
Loading
Loading
Loading
Loading
+21 −21
Original line number Diff line number Diff line
@@ -439,7 +439,7 @@ static bool rs_is_reshapable(struct raid_set *rs)
/* Return true, if raid set in @rs is recovering */
static bool rs_is_recovering(struct raid_set *rs)
{
	return rs->md.recovery_cp < rs->md.dev_sectors;
	return rs->md.resync_offset < rs->md.dev_sectors;
}

/* Return true, if raid set in @rs is reshaping */
@@ -769,7 +769,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
	rs->md.layout = raid_type->algorithm;
	rs->md.new_layout = rs->md.layout;
	rs->md.delta_disks = 0;
	rs->md.recovery_cp = MaxSector;
	rs->md.resync_offset = MaxSector;

	for (i = 0; i < raid_devs; i++)
		md_rdev_init(&rs->dev[i].rdev);
@@ -913,7 +913,7 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
		rs->md.external = 0;
		rs->md.persistent = 1;
		rs->md.major_version = 2;
	} else if (rebuild && !rs->md.recovery_cp) {
	} else if (rebuild && !rs->md.resync_offset) {
		/*
		 * Without metadata, we will not be able to tell if the array
		 * is in-sync or not - we must assume it is not.  Therefore,
@@ -1696,20 +1696,20 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{
	/* raid0 does not recover */
	if (rs_is_raid0(rs))
		rs->md.recovery_cp = MaxSector;
		rs->md.resync_offset = MaxSector;
	/*
	 * A raid6 set has to be recovered either
	 * completely or for the grown part to
	 * ensure proper parity and Q-Syndrome
	 */
	else if (rs_is_raid6(rs))
		rs->md.recovery_cp = dev_sectors;
		rs->md.resync_offset = dev_sectors;
	/*
	 * Other raid set types may skip recovery
	 * depending on the 'nosync' flag.
	 */
	else
		rs->md.recovery_cp = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
		rs->md.resync_offset = test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)
				     ? MaxSector : dev_sectors;
}

@@ -2144,7 +2144,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
	sb->events = cpu_to_le64(mddev->events);

	sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
	sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
	sb->array_resync_offset = cpu_to_le64(mddev->resync_offset);

	sb->level = cpu_to_le32(mddev->level);
	sb->layout = cpu_to_le32(mddev->layout);
@@ -2335,18 +2335,18 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
	}

	if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
		mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
		mddev->resync_offset = le64_to_cpu(sb->array_resync_offset);

	/*
	 * During load, we set FirstUse if a new superblock was written.
	 * There are two reasons we might not have a superblock:
	 * 1) The raid set is brand new - in which case, all of the
	 *    devices must have their In_sync bit set.	Also,
	 *    recovery_cp must be 0, unless forced.
	 *    resync_offset must be 0, unless forced.
	 * 2) This is a new device being added to an old raid set
	 *    and the new device needs to be rebuilt - in which
	 *    case the In_sync bit will /not/ be set and
	 *    recovery_cp must be MaxSector.
	 *    resync_offset must be MaxSector.
	 * 3) This is/are a new device(s) being added to an old
	 *    raid set during takeover to a higher raid level
	 *    to provide capacity for redundancy or during reshape
@@ -2391,8 +2391,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
			      new_devs > 1 ? "s" : "");
			return -EINVAL;
		} else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) {
			DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)",
			      (unsigned long long) mddev->recovery_cp);
			DMERR("'rebuild' specified while raid set is not in-sync (resync_offset=%llu)",
			      (unsigned long long) mddev->resync_offset);
			return -EINVAL;
		} else if (rs_is_reshaping(rs)) {
			DMERR("'rebuild' specified while raid set is being reshaped (reshape_position=%llu)",
@@ -2697,11 +2697,11 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
	}
out:
	/*
	 * Raise recovery_cp in case data_offset != 0 to
	 * Raise resync_offset in case data_offset != 0 to
	 * avoid false recovery positives in the constructor.
	 */
	if (rs->md.recovery_cp < rs->md.dev_sectors)
		rs->md.recovery_cp += rs->dev[0].rdev.data_offset;
	if (rs->md.resync_offset < rs->md.dev_sectors)
		rs->md.resync_offset += rs->dev[0].rdev.data_offset;

	/* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
	rdev_for_each(rdev, &rs->md) {
@@ -2756,7 +2756,7 @@ static int rs_setup_takeover(struct raid_set *rs)
	}

	clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
	mddev->recovery_cp = MaxSector;
	mddev->resync_offset = MaxSector;

	while (d--) {
		rdev = &rs->dev[d].rdev;
@@ -2764,7 +2764,7 @@ static int rs_setup_takeover(struct raid_set *rs)
		if (test_bit(d, (void *) rs->rebuild_disks)) {
			clear_bit(In_sync, &rdev->flags);
			clear_bit(Faulty, &rdev->flags);
			mddev->recovery_cp = rdev->recovery_offset = 0;
			mddev->resync_offset = rdev->recovery_offset = 0;
			/* Bitmap has to be created when we do an "up" takeover */
			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
		}
@@ -3222,7 +3222,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
			if (r)
				goto bad;

			rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
			rs_setup_recovery(rs, rs->md.resync_offset < rs->md.dev_sectors ? rs->md.resync_offset : rs->md.dev_sectors);
		} else {
			/* This is no size change or it is shrinking, update size and record in superblocks */
			r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
@@ -3446,7 +3446,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,

	} else {
		if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
			r = mddev->recovery_cp;
			r = mddev->resync_offset;
		else
			r = mddev->curr_resync_completed;

@@ -4074,9 +4074,9 @@ static int raid_preresume(struct dm_target *ti)
	}

	/* Check for any resize/reshape on @rs and adjust/initiate */
	if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
	if (mddev->resync_offset && mddev->resync_offset < MaxSector) {
		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
		mddev->resync_min = mddev->recovery_cp;
		mddev->resync_min = mddev->resync_offset;
		if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
			mddev->resync_max_sectors = mddev->dev_sectors;
	}
+4 −4
Original line number Diff line number Diff line
@@ -1987,12 +1987,12 @@ static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s,

		md_bitmap_set_memory_bits(bitmap, sec, 1);
		md_bitmap_file_set_bit(bitmap, sec);
		if (sec < bitmap->mddev->recovery_cp)
		if (sec < bitmap->mddev->resync_offset)
			/* We are asserting that the array is dirty,
			 * so move the recovery_cp address back so
			 * so move the resync_offset address back so
			 * that it is obvious that it is dirty
			 */
			bitmap->mddev->recovery_cp = sec;
			bitmap->mddev->resync_offset = sec;
	}
}

@@ -2258,7 +2258,7 @@ static int bitmap_load(struct mddev *mddev)
	    || bitmap->events_cleared == mddev->events)
		/* no need to keep dirty bits to optimise a
		 * re-add of a missing device */
		start = mddev->recovery_cp;
		start = mddev->resync_offset;

	mutex_lock(&mddev->bitmap_info.mutex);
	err = md_bitmap_init_from_disk(bitmap, start);
+8 −8
Original line number Diff line number Diff line
@@ -337,11 +337,11 @@ static void recover_bitmaps(struct md_thread *thread)
			md_wakeup_thread(mddev->sync_thread);

		if (hi > 0) {
			if (lo < mddev->recovery_cp)
				mddev->recovery_cp = lo;
			if (lo < mddev->resync_offset)
				mddev->resync_offset = lo;
			/* wake up thread to continue resync in case resync
			 * is not finished */
			if (mddev->recovery_cp != MaxSector) {
			if (mddev->resync_offset != MaxSector) {
				/*
				 * clear the REMOTE flag since we will launch
				 * resync thread in current node.
@@ -863,9 +863,9 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
			lockres_free(bm_lockres);
			continue;
		}
		if ((hi > 0) && (lo < mddev->recovery_cp)) {
		if ((hi > 0) && (lo < mddev->resync_offset)) {
			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			mddev->recovery_cp = lo;
			mddev->resync_offset = lo;
			md_check_recovery(mddev);
		}

@@ -1027,7 +1027,7 @@ static int leave(struct mddev *mddev)
	 * Also, we should send BITMAP_NEEDS_SYNC message in
	 * case reshaping is interrupted.
	 */
	if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) ||
	if ((cinfo->slot_number > 0 && mddev->resync_offset != MaxSector) ||
	    (mddev->reshape_position != MaxSector &&
	     test_bit(MD_CLOSING, &mddev->flags)))
		resync_bitmap(mddev);
@@ -1605,8 +1605,8 @@ static int gather_bitmaps(struct md_rdev *rdev)
			pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
			goto out;
		}
		if ((hi > 0) && (lo < mddev->recovery_cp))
			mddev->recovery_cp = lo;
		if ((hi > 0) && (lo < mddev->resync_offset))
			mddev->resync_offset = lo;
	}
out:
	return err;
+44 −29
Original line number Diff line number Diff line
@@ -636,6 +636,12 @@ static void __mddev_put(struct mddev *mddev)
	    mddev->ctime || mddev->hold_active)
		return;

	/*
	 * If array is freed by stopping array, MD_DELETED is set by
	 * do_md_stop(), MD_DELETED is still set here in case mddev is freed
	 * directly by closing a mddev that is created by create_on_open.
	 */
	set_bit(MD_DELETED, &mddev->flags);
	/*
	 * Call queue_work inside the spinlock so that flush_workqueue() after
	 * mddev_find will succeed in waiting for the work to be done.
@@ -1409,13 +1415,13 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru
			mddev->layout = -1;

		if (sb->state & (1<<MD_SB_CLEAN))
			mddev->recovery_cp = MaxSector;
			mddev->resync_offset = MaxSector;
		else {
			if (sb->events_hi == sb->cp_events_hi &&
				sb->events_lo == sb->cp_events_lo) {
				mddev->recovery_cp = sb->recovery_cp;
				mddev->resync_offset = sb->resync_offset;
			} else
				mddev->recovery_cp = 0;
				mddev->resync_offset = 0;
		}

		memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
@@ -1541,13 +1547,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
	mddev->minor_version = sb->minor_version;
	if (mddev->in_sync)
	{
		sb->recovery_cp = mddev->recovery_cp;
		sb->resync_offset = mddev->resync_offset;
		sb->cp_events_hi = (mddev->events>>32);
		sb->cp_events_lo = (u32)mddev->events;
		if (mddev->recovery_cp == MaxSector)
		if (mddev->resync_offset == MaxSector)
			sb->state = (1<< MD_SB_CLEAN);
	} else
		sb->recovery_cp = 0;
		sb->resync_offset = 0;

	sb->layout = mddev->layout;
	sb->chunk_size = mddev->chunk_sectors << 9;
@@ -1895,7 +1901,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
		mddev->bitmap_info.default_space = (4096-1024) >> 9;
		mddev->reshape_backwards = 0;

		mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
		mddev->resync_offset = le64_to_cpu(sb->resync_offset);
		memcpy(mddev->uuid, sb->set_uuid, 16);

		mddev->max_disks =  (4096-256)/2;
@@ -2081,7 +2087,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
	sb->utime = cpu_to_le64((__u64)mddev->utime);
	sb->events = cpu_to_le64(mddev->events);
	if (mddev->in_sync)
		sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
		sb->resync_offset = cpu_to_le64(mddev->resync_offset);
	else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
		sb->resync_offset = cpu_to_le64(MaxSector);
	else
@@ -2761,7 +2767,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
	/* If this is just a dirty<->clean transition, and the array is clean
	 * and 'events' is odd, we can roll back to the previous clean state */
	if (nospares
	    && (mddev->in_sync && mddev->recovery_cp == MaxSector)
	    && (mddev->in_sync && mddev->resync_offset == MaxSector)
	    && mddev->can_decrease_events
	    && mddev->events != 1) {
		mddev->events--;
@@ -4297,9 +4303,9 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
static ssize_t
resync_start_show(struct mddev *mddev, char *page)
{
	if (mddev->recovery_cp == MaxSector)
	if (mddev->resync_offset == MaxSector)
		return sprintf(page, "none\n");
	return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
	return sprintf(page, "%llu\n", (unsigned long long)mddev->resync_offset);
}

static ssize_t
@@ -4325,7 +4331,7 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len)
		err = -EBUSY;

	if (!err) {
		mddev->recovery_cp = n;
		mddev->resync_offset = n;
		if (mddev->pers)
			set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
	}
@@ -6417,7 +6423,7 @@ static void md_clean(struct mddev *mddev)
	mddev->external_size = 0;
	mddev->dev_sectors = 0;
	mddev->raid_disks = 0;
	mddev->recovery_cp = 0;
	mddev->resync_offset = 0;
	mddev->resync_min = 0;
	mddev->resync_max = MaxSector;
	mddev->reshape_position = MaxSector;
@@ -7362,9 +7368,9 @@ int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
	 * openned
	 */
	if (info->state & (1<<MD_SB_CLEAN))
		mddev->recovery_cp = MaxSector;
		mddev->resync_offset = MaxSector;
	else
		mddev->recovery_cp = 0;
		mddev->resync_offset = 0;
	mddev->persistent    = ! info->not_persistent;
	mddev->external	     = 0;

@@ -8303,7 +8309,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
				seq_printf(seq, "\tresync=REMOTE");
			return 1;
		}
		if (mddev->recovery_cp < MaxSector) {
		if (mddev->resync_offset < MaxSector) {
			seq_printf(seq, "\tresync=PENDING");
			return 1;
		}
@@ -8946,7 +8952,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
		return mddev->resync_min;
	case ACTION_RESYNC:
		if (!mddev->bitmap)
			return mddev->recovery_cp;
			return mddev->resync_offset;
		return 0;
	case ACTION_RESHAPE:
		/*
@@ -9184,8 +9190,8 @@ void md_do_sync(struct md_thread *thread)
				   atomic_read(&mddev->recovery_active) == 0);
			mddev->curr_resync_completed = j;
			if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
			    j > mddev->recovery_cp)
				mddev->recovery_cp = j;
			    j > mddev->resync_offset)
				mddev->resync_offset = j;
			update_time = jiffies;
			set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
			sysfs_notify_dirent_safe(mddev->sysfs_completed);
@@ -9305,19 +9311,19 @@ void md_do_sync(struct md_thread *thread)
	    mddev->curr_resync > MD_RESYNC_ACTIVE) {
		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
			if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
				if (mddev->curr_resync >= mddev->recovery_cp) {
				if (mddev->curr_resync >= mddev->resync_offset) {
					pr_debug("md: checkpointing %s of %s.\n",
						 desc, mdname(mddev));
					if (test_bit(MD_RECOVERY_ERROR,
						&mddev->recovery))
						mddev->recovery_cp =
						mddev->resync_offset =
							mddev->curr_resync_completed;
					else
						mddev->recovery_cp =
						mddev->resync_offset =
							mddev->curr_resync;
				}
			} else
				mddev->recovery_cp = MaxSector;
				mddev->resync_offset = MaxSector;
		} else {
			if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
				mddev->curr_resync = MaxSector;
@@ -9421,6 +9427,12 @@ static bool rdev_is_spare(struct md_rdev *rdev)

static bool rdev_addable(struct md_rdev *rdev)
{
	struct mddev *mddev;

	mddev = READ_ONCE(rdev->mddev);
	if (!mddev)
		return false;

	/* rdev is already used, don't add it again. */
	if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 ||
	    test_bit(Faulty, &rdev->flags))
@@ -9431,7 +9443,7 @@ static bool rdev_addable(struct md_rdev *rdev)
		return true;

	/* Allow to add if array is read-write. */
	if (md_is_rdwr(rdev->mddev))
	if (md_is_rdwr(mddev))
		return true;

	/*
@@ -9533,7 +9545,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
	}

	/* Check if resync is in progress. */
	if (mddev->recovery_cp < MaxSector) {
	if (mddev->resync_offset < MaxSector) {
		remove_spares(mddev, NULL);
		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
@@ -9714,7 +9726,7 @@ void md_check_recovery(struct mddev *mddev)
		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
		(mddev->external == 0 && mddev->safemode == 1) ||
		(mddev->safemode == 2
		 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
		 && !mddev->in_sync && mddev->resync_offset == MaxSector)
		))
		return;

@@ -9771,8 +9783,8 @@ void md_check_recovery(struct mddev *mddev)
			 * remove disk.
			 */
			rdev_for_each_safe(rdev, tmp, mddev) {
				if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
						rdev->raid_disk < 0)
				if (rdev->raid_disk < 0 &&
				    test_and_clear_bit(ClusterRemove, &rdev->flags))
					md_kick_rdev_from_array(rdev);
			}
		}
@@ -10078,8 +10090,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)

	/* Check for change of roles in the active devices */
	rdev_for_each_safe(rdev2, tmp, mddev) {
		if (test_bit(Faulty, &rdev2->flags))
		if (test_bit(Faulty, &rdev2->flags)) {
			if (test_bit(ClusterRemove, &rdev2->flags))
				set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			continue;
		}

		/* Check if the roles changed */
		role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
+1 −1
Original line number Diff line number Diff line
@@ -523,7 +523,7 @@ struct mddev {
	unsigned long			normal_io_events; /* IO event timestamp */
	atomic_t			recovery_active; /* blocks scheduled, but not written */
	wait_queue_head_t		recovery_wait;
	sector_t			recovery_cp;
	sector_t			resync_offset;
	sector_t			resync_min;	/* user requested sync
							 * starts here */
	sector_t			resync_max;	/* resync should pause
Loading