Commit 7f67fdae authored by Song Liu's avatar Song Liu
Browse files

Merge branch 'md-6.12-bitmap' into md-6.12



From Yu Kuai (with minor changes by Song Liu):

The background is that currently bitmap is using a global spin_lock,
causing lock contention and huge IO performance degradation for all raid
levels.

However, it's impossible to implement a new lock free bitmap with
current situation that md-bitmap exposes the internal implementation
with lots of exported apis. Hence bitmap_operations is invented, to
describe bitmap core implementation, and a new bitmap can be introduced
with a new bitmap_operations, we only need to switch to the new one
during initialization.

And with this we can build bitmap as kernel module, but that's not
our concern for now.

This version was tested with mdadm tests and lvm2 tests. This set does
not introduce new errors in these tests.

* md-6.12-bitmap: (42 commits)
  md/md-bitmap: make in memory structure internal
  md/md-bitmap: merge md_bitmap_enabled() into bitmap_operations
  md/md-bitmap: merge md_bitmap_wait_behind_writes() into bitmap_operations
  md/md-bitmap: merge md_bitmap_free() into bitmap_operations
  md/md-bitmap: merge md_bitmap_set_pages() into struct bitmap_operations
  md/md-bitmap: merge md_bitmap_copy_from_slot() into struct bitmap_operation.
  md/md-bitmap: merge get_bitmap_from_slot() into bitmap_operations
  md/md-bitmap: merge md_bitmap_resize() into bitmap_operations
  md/md-bitmap: pass in mddev directly for md_bitmap_resize()
  md/md-bitmap: merge md_bitmap_daemon_work() into bitmap_operations
  md/md-bitmap: merge bitmap_unplug() into bitmap_operations
  md/md-bitmap: merge md_bitmap_unplug_async() into md_bitmap_unplug()
  md/md-bitmap: merge md_bitmap_sync_with_cluster() into bitmap_operations
  md/md-bitmap: merge md_bitmap_cond_end_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_close_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_end_sync() into bitmap_operations
  md/md-bitmap: remove the parameter 'aborted' for md_bitmap_end_sync()
  md/md-bitmap: merge md_bitmap_start_sync() into bitmap_operations
  md/md-bitmap: merge md_bitmap_endwrite() into bitmap_operations
  md/md-bitmap: merge md_bitmap_startwrite() into bitmap_operations
  ...

Signed-off-by: default avatarSong Liu <song@kernel.org>
parents b75197e8 59fdd433
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -3949,7 +3949,9 @@ static int __load_dirty_region_bitmap(struct raid_set *rs)
	/* Try loading the bitmap unless "raid0", which does not have one */
	if (!rs_is_raid0(rs) &&
	    !test_and_set_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags)) {
		r = md_bitmap_load(&rs->md);
		struct mddev *mddev = &rs->md;

		r = mddev->bitmap_ops->load(mddev);
		if (r)
			DMERR("Failed to load bitmap");
	}
@@ -4066,7 +4068,8 @@ static int raid_preresume(struct dm_target *ti)
	       mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
		int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;

		r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
		r = mddev->bitmap_ops->resize(mddev, mddev->dev_sectors,
					      chunksize, false);
		if (r)
			DMERR("Failed to resize bitmap");
	}
+432 −136

File changed.

Preview size limit exceeded, changes collapsed.

+48 −220
Original line number Diff line number Diff line
@@ -7,81 +7,7 @@
#ifndef BITMAP_H
#define BITMAP_H 1

#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
 * with version 3, it is host-endian which is non-portable
 * Version 5 is currently set only for clustered devices
 */
#define BITMAP_MAJOR_HI 4
#define BITMAP_MAJOR_CLUSTERED 5
#define	BITMAP_MAJOR_HOSTENDIAN 3

/*
 * in-memory bitmap:
 *
 * Use 16 bit block counters to track pending writes to each "chunk".
 * The 2 high order bits are special-purpose, the first is a flag indicating
 * whether a resync is needed.  The second is a flag indicating whether a
 * resync is active.
 * This means that the counter is actually 14 bits:
 *
 * +--------+--------+------------------------------------------------+
 * | resync | resync |               counter                          |
 * | needed | active |                                                |
 * |  (0-1) |  (0-1) |              (0-16383)                         |
 * +--------+--------+------------------------------------------------+
 *
 * The "resync needed" bit is set when:
 *    a '1' bit is read from storage at startup.
 *    a write request fails on some drives
 *    a resync is aborted on a chunk with 'resync active' set
 * It is cleared (and resync-active set) when a resync starts across all drives
 * of the chunk.
 *
 *
 * The "resync active" bit is set when:
 *    a resync is started on all drives, and resync_needed is set.
 *       resync_needed will be cleared (as long as resync_active wasn't already set).
 * It is cleared when a resync completes.
 *
 * The counter counts pending write requests, plus the on-disk bit.
 * When the counter is '1' and the resync bits are clear, the on-disk
 * bit can be cleared as well, thus setting the counter to 0.
 * When we set a bit, or in the counter (to start a write), if the fields is
 * 0, we first set the disk bit and set the counter to 1.
 *
 * If the counter is 0, the on-disk bit is clear and the stripe is clean
 * Anything that dirties the stripe pushes the counter to 2 (at least)
 * and sets the on-disk bit (lazily).
 * If a periodic sweep find the counter at 2, it is decremented to 1.
 * If the sweep find the counter at 1, the on-disk bit is cleared and the
 * counter goes to zero.
 *
 * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
 * counters as a fallback when "page" memory cannot be allocated:
 *
 * Normal case (page memory allocated):
 *
 *     page pointer (32-bit)
 *
 *     [ ] ------+
 *               |
 *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
 *                          c1   c2    c2048
 *
 * Hijacked case (page memory allocation failed):
 *
 *     hijacked page pointer (32-bit)
 *
 *     [		  ][		  ] (no page memory allocated)
 *      counter #1 (16-bit) counter #2 (16-bit)
 *
 */

#ifdef __KERNEL__

#define PAGE_BITS (PAGE_SIZE << 3)
#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
#define BITMAP_MAGIC 0x6d746962

typedef __u16 bitmap_counter_t;
#define COUNTER_BITS 16
@@ -91,26 +17,6 @@ typedef __u16 bitmap_counter_t;
#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)

/* how many counters per page? */
#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
/* same, except a shift value for more efficient bitops */
#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
/* same, except a mask value for more efficient bitops */
#define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)

#define BITMAP_BLOCK_SHIFT 9

#endif

/*
 * bitmap structures:
 */

#define BITMAP_MAGIC 0x6d746962

/* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state {
@@ -152,136 +58,58 @@ typedef struct bitmap_super_s {
 *    devices.  For raid10 it is the size of the array.
 */

#ifdef __KERNEL__
struct md_bitmap_stats {
	u64		events_cleared;
	int		behind_writes;
	bool		behind_wait;

/* the in-memory bitmap is represented by bitmap_pages */
struct bitmap_page {
	/*
	 * map points to the actual memory page
	 */
	char *map;
	/*
	 * in emergencies (when map cannot be alloced), hijack the map
	 * pointer and use it as two counters itself
	 */
	unsigned int hijacked:1;
	/*
	 * If any counter in this page is '1' or '2' - and so could be
	 * cleared then that page is marked as 'pending'
	 */
	unsigned int pending:1;
	/*
	 * count of dirty bits on the page
	 */
	unsigned int  count:30;
};

/* the main bitmap structure - one per mddev */
struct bitmap {

	struct bitmap_counts {
		spinlock_t lock;
		struct bitmap_page *bp;
		unsigned long pages;		/* total number of pages
						 * in the bitmap */
		unsigned long missing_pages;	/* number of pages
						 * not yet allocated */
		unsigned long chunkshift;	/* chunksize = 2^chunkshift
						 * (for bitops) */
		unsigned long chunks;		/* Total number of data
						 * chunks for the array */
	} counts;

	struct mddev *mddev; /* the md device that the bitmap is for */

	__u64	events_cleared;
	int need_sync;

	struct bitmap_storage {
		struct file *file;		/* backing disk file */
		struct page *sb_page;		/* cached copy of the bitmap
						 * file superblock */
		unsigned long sb_index;
		struct page **filemap;		/* list of cache pages for
						 * the file */
		unsigned long *filemap_attr;	/* attributes associated
						 * w/ filemap pages */
		unsigned long file_pages;	/* number of pages in the file*/
		unsigned long bytes;		/* total bytes in the bitmap */
	} storage;

	unsigned long flags;

	int allclean;

	atomic_t behind_writes;
	unsigned long behind_writes_used; /* highest actual value at runtime */

	/*
	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
	 * file, cleaning up bits and flushing out pages to disk as necessary
	 */
	unsigned long daemon_lastrun; /* jiffies of last run */
	unsigned long last_end_sync; /* when we lasted called end_sync to
				      * update bitmap with resync progress */

	atomic_t pending_writes; /* pending writes to the bitmap file */
	wait_queue_head_t write_wait;
	wait_queue_head_t overflow_wait;
	wait_queue_head_t behind_wait;

	struct kernfs_node *sysfs_can_clear;
	int cluster_slot;		/* Slot offset for clustered env */
	unsigned long	missing_pages;
	unsigned long	file_pages;
	unsigned long	sync_size;
	unsigned long	pages;
	struct file	*file;
};

/* the bitmap API */

/* these are used only by md/bitmap */
struct bitmap *md_bitmap_create(struct mddev *mddev, int slot);
int md_bitmap_load(struct mddev *mddev);
void md_bitmap_flush(struct mddev *mddev);
void md_bitmap_destroy(struct mddev *mddev);

void md_bitmap_print_sb(struct bitmap *bitmap);
void md_bitmap_update_sb(struct bitmap *bitmap);
void md_bitmap_status(struct seq_file *seq, struct bitmap *bitmap);

int  md_bitmap_setallbits(struct bitmap *bitmap);
void md_bitmap_write_all(struct bitmap *bitmap);

void md_bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);

/* these are exported */
int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
			 unsigned long sectors, int behind);
void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
			unsigned long sectors, int success, int behind);
int md_bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void md_bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void md_bitmap_close_sync(struct bitmap *bitmap);
void md_bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void md_bitmap_sync_with_cluster(struct mddev *mddev,
struct bitmap_operations {
	bool (*enabled)(struct mddev *mddev);
	int (*create)(struct mddev *mddev, int slot);
	int (*resize)(struct mddev *mddev, sector_t blocks, int chunksize,
		      bool init);

	int (*load)(struct mddev *mddev);
	void (*destroy)(struct mddev *mddev);
	void (*flush)(struct mddev *mddev);
	void (*write_all)(struct mddev *mddev);
	void (*dirty_bits)(struct mddev *mddev, unsigned long s,
			   unsigned long e);
	void (*unplug)(struct mddev *mddev, bool sync);
	void (*daemon_work)(struct mddev *mddev);
	void (*wait_behind_writes)(struct mddev *mddev);

	int (*startwrite)(struct mddev *mddev, sector_t offset,
			  unsigned long sectors, bool behind);
	void (*endwrite)(struct mddev *mddev, sector_t offset,
			 unsigned long sectors, bool success, bool behind);
	bool (*start_sync)(struct mddev *mddev, sector_t offset,
			   sector_t *blocks, bool degraded);
	void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
	void (*cond_end_sync)(struct mddev *mddev, sector_t sector, bool force);
	void (*close_sync)(struct mddev *mddev);

	void (*update_sb)(void *data);
	int (*get_stats)(void *data, struct md_bitmap_stats *stats);

	void (*sync_with_cluster)(struct mddev *mddev,
				  sector_t old_lo, sector_t old_hi,
				  sector_t new_lo, sector_t new_hi);
	void *(*get_from_slot)(struct mddev *mddev, int slot);
	int (*copy_from_slot)(struct mddev *mddev, int slot, sector_t *lo,
			      sector_t *hi, bool clear_bits);
	void (*set_pages)(void *data, unsigned long pages);
	void (*free)(void *data);
};

void md_bitmap_unplug(struct bitmap *bitmap);
void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);

int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
		     int chunksize, int init);
struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot);
int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
			     sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);

static inline bool md_bitmap_enabled(struct bitmap *bitmap)
{
	return bitmap && bitmap->storage.filemap &&
	       !test_bit(BITMAP_STALE, &bitmap->flags);
}

#endif
/* the bitmap API */
void mddev_set_bitmap_ops(struct mddev *mddev);

#endif
+53 −38
Original line number Diff line number Diff line
@@ -317,7 +317,7 @@ static void recover_bitmaps(struct md_thread *thread)
					str, ret);
			goto clear_bit;
		}
		ret = md_bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
		ret = mddev->bitmap_ops->copy_from_slot(mddev, slot, &lo, &hi, true);
		if (ret) {
			pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
			goto clear_bit;
@@ -497,7 +497,7 @@ static void process_suspend_info(struct mddev *mddev,
	 * we don't want to trigger lots of WARN.
	 */
	if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
		md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
		mddev->bitmap_ops->sync_with_cluster(mddev, cinfo->sync_low,
						     cinfo->sync_hi, lo, hi);
	cinfo->sync_low = lo;
	cinfo->sync_hi = hi;
@@ -628,8 +628,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
		break;
	case BITMAP_RESIZE:
		if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
			ret = md_bitmap_resize(mddev->bitmap,
					    le64_to_cpu(msg->high), 0, 0);
			ret = mddev->bitmap_ops->resize(mddev,
							le64_to_cpu(msg->high),
							0, false);
		break;
	default:
		ret = -1;
@@ -856,7 +857,7 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
		}

		/* Read the disk bitmap sb and check if it needs recovery */
		ret = md_bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
		ret = mddev->bitmap_ops->copy_from_slot(mddev, i, &lo, &hi, false);
		if (ret) {
			pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
			lockres_free(bm_lockres);
@@ -1143,13 +1144,16 @@ static int update_bitmap_size(struct mddev *mddev, sector_t size)

static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
{
	struct bitmap_counts *counts;
	char str[64];
	struct dlm_lock_resource *bm_lockres;
	struct bitmap *bitmap = mddev->bitmap;
	unsigned long my_pages = bitmap->counts.pages;
	void *bitmap = mddev->bitmap;
	struct md_bitmap_stats stats;
	unsigned long my_pages;
	int i, rv;

	rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
	if (rv)
		return rv;

	my_pages = stats.pages;
	/*
	 * We need to ensure all the nodes can grow to a larger
	 * bitmap size before make the reshaping.
@@ -1159,17 +1163,22 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
		return rv;

	for (i = 0; i < mddev->bitmap_info.nodes; i++) {
		struct dlm_lock_resource *bm_lockres;
		char str[64];

		if (i == md_cluster_ops->slot_number(mddev))
			continue;

		bitmap = get_bitmap_from_slot(mddev, i);
		bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
		if (IS_ERR(bitmap)) {
			pr_err("can't get bitmap from slot %d\n", i);
			bitmap = NULL;
			goto out;
		}
		counts = &bitmap->counts;

		rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
		if (rv)
			goto out;
		/*
		 * If we can hold the bitmap lock of one node then
		 * the slot is not occupied, update the pages.
@@ -1183,21 +1192,21 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
		bm_lockres->flags |= DLM_LKF_NOQUEUE;
		rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
		if (!rv)
			counts->pages = my_pages;
			mddev->bitmap_ops->set_pages(bitmap, my_pages);
		lockres_free(bm_lockres);

		if (my_pages != counts->pages)
		if (my_pages != stats.pages)
			/*
			 * Let's revert the bitmap size if one node
			 * can't resize bitmap
			 */
			goto out;
		md_bitmap_free(bitmap);
		mddev->bitmap_ops->free(bitmap);
	}

	return 0;
out:
	md_bitmap_free(bitmap);
	mddev->bitmap_ops->free(bitmap);
	update_bitmap_size(mddev, oldsize);
	return -1;
}
@@ -1207,24 +1216,27 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
 */
static int cluster_check_sync_size(struct mddev *mddev)
{
	int i, rv;
	bitmap_super_t *sb;
	unsigned long my_sync_size, sync_size = 0;
	int node_num = mddev->bitmap_info.nodes;
	int current_slot = md_cluster_ops->slot_number(mddev);
	struct bitmap *bitmap = mddev->bitmap;
	char str[64];
	int node_num = mddev->bitmap_info.nodes;
	struct dlm_lock_resource *bm_lockres;
	struct md_bitmap_stats stats;
	void *bitmap = mddev->bitmap;
	unsigned long sync_size = 0;
	unsigned long my_sync_size;
	char str[64];
	int i, rv;

	sb = kmap_atomic(bitmap->storage.sb_page);
	my_sync_size = sb->sync_size;
	kunmap_atomic(sb);
	rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
	if (rv)
		return rv;

	my_sync_size = stats.sync_size;

	for (i = 0; i < node_num; i++) {
		if (i == current_slot)
			continue;

		bitmap = get_bitmap_from_slot(mddev, i);
		bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
		if (IS_ERR(bitmap)) {
			pr_err("can't get bitmap from slot %d\n", i);
			return -1;
@@ -1238,25 +1250,28 @@ static int cluster_check_sync_size(struct mddev *mddev)
		bm_lockres = lockres_init(mddev, str, NULL, 1);
		if (!bm_lockres) {
			pr_err("md-cluster: Cannot initialize %s\n", str);
			md_bitmap_free(bitmap);
			mddev->bitmap_ops->free(bitmap);
			return -1;
		}
		bm_lockres->flags |= DLM_LKF_NOQUEUE;
		rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
		if (!rv)
			md_bitmap_update_sb(bitmap);
			mddev->bitmap_ops->update_sb(bitmap);
		lockres_free(bm_lockres);

		sb = kmap_atomic(bitmap->storage.sb_page);
		if (sync_size == 0)
			sync_size = sb->sync_size;
		else if (sync_size != sb->sync_size) {
			kunmap_atomic(sb);
			md_bitmap_free(bitmap);
		rv = mddev->bitmap_ops->get_stats(bitmap, &stats);
		if (rv) {
			mddev->bitmap_ops->free(bitmap);
			return rv;
		}

		if (sync_size == 0) {
			sync_size = stats.sync_size;
		} else if (sync_size != stats.sync_size) {
			mddev->bitmap_ops->free(bitmap);
			return -1;
		}
		kunmap_atomic(sb);
		md_bitmap_free(bitmap);
		mddev->bitmap_ops->free(bitmap);
	}

	return (my_sync_size == sync_size) ? 0 : -1;
@@ -1585,7 +1600,7 @@ static int gather_bitmaps(struct md_rdev *rdev)
	for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
		if (sn == (cinfo->slot_number - 1))
			continue;
		err = md_bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
		err = mddev->bitmap_ops->copy_from_slot(mddev, sn, &lo, &hi, false);
		if (err) {
			pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
			goto out;
+100 −55
Original line number Diff line number Diff line
@@ -664,6 +664,7 @@ int mddev_init(struct mddev *mddev)
	mddev->resync_min = 0;
	mddev->resync_max = MaxSector;
	mddev->level = LEVEL_NONE;
	mddev_set_bitmap_ops(mddev);

	INIT_WORK(&mddev->sync_work, md_start_sync);
	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
@@ -1264,6 +1265,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
	return ret;
}

static u64 md_bitmap_events_cleared(struct mddev *mddev)
{
	struct md_bitmap_stats stats;
	int err;

	err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
	if (err)
		return 0;

	return stats.events_cleared;
}

/*
 * validate_super for 0.90.0
 * note: we are not using "freshest" for 0.9 superblock
@@ -1356,7 +1369,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru
		/* if adding to array with a bitmap, then we can accept an
		 * older device ... but not too old.
		 */
		if (ev1 < mddev->bitmap->events_cleared)
		if (ev1 < md_bitmap_events_cleared(mddev))
			return 0;
		if (ev1 < mddev->events)
			set_bit(Bitmap_sync, &rdev->flags);
@@ -1883,7 +1896,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
		/* If adding to array with a bitmap, then we can accept an
		 * older device, but not too old.
		 */
		if (ev1 < mddev->bitmap->events_cleared)
		if (ev1 < md_bitmap_events_cleared(mddev))
			return 0;
		if (ev1 < mddev->events)
			set_bit(Bitmap_sync, &rdev->flags);
@@ -2215,7 +2228,6 @@ super_1_allow_new_offset(struct md_rdev *rdev,
			 unsigned long long new_offset)
{
	/* All necessary checks on new >= old have been done */
	struct bitmap *bitmap;
	if (new_offset >= rdev->data_offset)
		return 1;

@@ -2232,11 +2244,18 @@ super_1_allow_new_offset(struct md_rdev *rdev,
	 */
	if (rdev->sb_start + (32+4)*2 > new_offset)
		return 0;
	bitmap = rdev->mddev->bitmap;
	if (bitmap && !rdev->mddev->bitmap_info.file &&
	    rdev->sb_start + rdev->mddev->bitmap_info.offset +
	    bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)

	if (!rdev->mddev->bitmap_info.file) {
		struct mddev *mddev = rdev->mddev;
		struct md_bitmap_stats stats;
		int err;

		err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
		if (!err && rdev->sb_start + mddev->bitmap_info.offset +
		    stats.file_pages * (PAGE_SIZE >> 9) > new_offset)
			return 0;
	}

	if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
		return 0;

@@ -2712,7 +2731,7 @@ void md_update_sb(struct mddev *mddev, int force_change)

	mddev_add_trace_msg(mddev, "md md_update_sb");
rewrite:
	md_bitmap_update_sb(mddev->bitmap);
	mddev->bitmap_ops->update_sb(mddev->bitmap);
	rdev_for_each(rdev, mddev) {
		if (rdev->sb_loaded != 1)
			continue; /* no noise on spare devices */
@@ -4572,17 +4591,23 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
	/* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
	while (*buf) {
		chunk = end_chunk = simple_strtoul(buf, &end, 0);
		if (buf == end) break;
		if (buf == end)
			break;

		if (*end == '-') { /* range */
			buf = end + 1;
			end_chunk = simple_strtoul(buf, &end, 0);
			if (buf == end) break;
			if (buf == end)
				break;
		}
		if (*end && !isspace(*end)) break;
		md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);

		if (*end && !isspace(*end))
			break;

		mddev->bitmap_ops->dirty_bits(mddev, chunk, end_chunk);
		buf = skip_spaces(end);
	}
	md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
	mddev->bitmap_ops->unplug(mddev, true); /* flush the bits to disk */
out:
	mddev_unlock(mddev);
	return len;
@@ -6098,16 +6123,10 @@ int md_run(struct mddev *mddev)
	}
	if (err == 0 && pers->sync_request &&
	    (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
		struct bitmap *bitmap;

		bitmap = md_bitmap_create(mddev, -1);
		if (IS_ERR(bitmap)) {
			err = PTR_ERR(bitmap);
		err = mddev->bitmap_ops->create(mddev, -1);
		if (err)
			pr_warn("%s: failed to create bitmap (%d)\n",
				mdname(mddev), err);
		} else
			mddev->bitmap = bitmap;

	}
	if (err)
		goto bitmap_abort;
@@ -6177,7 +6196,7 @@ int md_run(struct mddev *mddev)
		pers->free(mddev, mddev->private);
	mddev->private = NULL;
	module_put(pers->owner);
	md_bitmap_destroy(mddev);
	mddev->bitmap_ops->destroy(mddev);
abort:
	bioset_exit(&mddev->io_clone_set);
exit_sync_set:
@@ -6196,9 +6215,10 @@ int do_md_run(struct mddev *mddev)
	err = md_run(mddev);
	if (err)
		goto out;
	err = md_bitmap_load(mddev);

	err = mddev->bitmap_ops->load(mddev);
	if (err) {
		md_bitmap_destroy(mddev);
		mddev->bitmap_ops->destroy(mddev);
		goto out;
	}

@@ -6342,7 +6362,8 @@ static void __md_stop_writes(struct mddev *mddev)
		mddev->pers->quiesce(mddev, 1);
		mddev->pers->quiesce(mddev, 0);
	}
	md_bitmap_flush(mddev);

	mddev->bitmap_ops->flush(mddev);

	if (md_is_rdwr(mddev) &&
	    ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
@@ -6369,7 +6390,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);

static void mddev_detach(struct mddev *mddev)
{
	md_bitmap_wait_behind_writes(mddev);
	mddev->bitmap_ops->wait_behind_writes(mddev);
	if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
		mddev->pers->quiesce(mddev, 1);
		mddev->pers->quiesce(mddev, 0);
@@ -6384,7 +6405,8 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
	struct md_personality *pers = mddev->pers;
	md_bitmap_destroy(mddev);

	mddev->bitmap_ops->destroy(mddev);
	mddev_detach(mddev);
	spin_lock(&mddev->lock);
	mddev->pers = NULL;
@@ -7162,22 +7184,19 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
	err = 0;
	if (mddev->pers) {
		if (fd >= 0) {
			struct bitmap *bitmap;
			err = mddev->bitmap_ops->create(mddev, -1);
			if (!err)
				err = mddev->bitmap_ops->load(mddev);

			bitmap = md_bitmap_create(mddev, -1);
			if (!IS_ERR(bitmap)) {
				mddev->bitmap = bitmap;
				err = md_bitmap_load(mddev);
			} else
				err = PTR_ERR(bitmap);
			if (err) {
				md_bitmap_destroy(mddev);
				mddev->bitmap_ops->destroy(mddev);
				fd = -1;
			}
		} else if (fd < 0) {
			md_bitmap_destroy(mddev);
			mddev->bitmap_ops->destroy(mddev);
		}
	}

	if (fd < 0) {
		struct file *f = mddev->bitmap_info.file;
		if (f) {
@@ -7446,7 +7465,6 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
			goto err;
		}
		if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
			struct bitmap *bitmap;
			/* add the bitmap */
			if (mddev->bitmap) {
				rv = -EEXIST;
@@ -7460,24 +7478,24 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
				mddev->bitmap_info.default_offset;
			mddev->bitmap_info.space =
				mddev->bitmap_info.default_space;
			bitmap = md_bitmap_create(mddev, -1);
			if (!IS_ERR(bitmap)) {
				mddev->bitmap = bitmap;
				rv = md_bitmap_load(mddev);
			} else
				rv = PTR_ERR(bitmap);
			rv = mddev->bitmap_ops->create(mddev, -1);
			if (!rv)
				rv = mddev->bitmap_ops->load(mddev);

			if (rv)
				md_bitmap_destroy(mddev);
				mddev->bitmap_ops->destroy(mddev);
		} else {
			/* remove the bitmap */
			if (!mddev->bitmap) {
				rv = -ENOENT;
			struct md_bitmap_stats stats;

			rv = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
			if (rv)
				goto err;
			}
			if (mddev->bitmap->storage.file) {

			if (stats.file) {
				rv = -EINVAL;
				goto err;
			}

			if (mddev->bitmap_info.nodes) {
				/* hold PW on all the bitmap lock */
				if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
@@ -7492,7 +7510,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
				module_put(md_cluster_mod);
				mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
			}
			md_bitmap_destroy(mddev);
			mddev->bitmap_ops->destroy(mddev);
			mddev->bitmap_info.offset = 0;
		}
	}
@@ -8262,6 +8280,33 @@ static void md_seq_stop(struct seq_file *seq, void *v)
	spin_unlock(&all_mddevs_lock);
}

static void md_bitmap_status(struct seq_file *seq, struct mddev *mddev)
{
	struct md_bitmap_stats stats;
	unsigned long used_pages;
	unsigned long chunk_kb;
	int err;

	err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats);
	if (err)
		return;

	chunk_kb = mddev->bitmap_info.chunksize >> 10;
	used_pages = stats.pages - stats.missing_pages;

	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], %lu%s chunk",
		   used_pages, stats.pages, used_pages << (PAGE_SHIFT - 10),
		   chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
		   chunk_kb ? "KB" : "B");

	if (stats.file) {
		seq_puts(seq, ", file: ");
		seq_file_path(seq, stats.file, " \t\n");
	}

	seq_putc(seq, '\n');
}

static int md_seq_show(struct seq_file *seq, void *v)
{
	struct mddev *mddev;
@@ -8345,7 +8390,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
		} else
			seq_printf(seq, "\n       ");

		md_bitmap_status(seq, mddev->bitmap);
		md_bitmap_status(seq, mddev);

		seq_printf(seq, "\n");
	}
@@ -9397,7 +9442,7 @@ static void md_start_sync(struct work_struct *ws)
	 * stored on all devices. So make sure all bitmap pages get written.
	 */
	if (spares)
		md_bitmap_write_all(mddev->bitmap);
		mddev->bitmap_ops->write_all(mddev);

	name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
			"reshape" : "resync";
@@ -9485,7 +9530,7 @@ static void unregister_sync_thread(struct mddev *mddev)
void md_check_recovery(struct mddev *mddev)
{
	if (mddev->bitmap)
		md_bitmap_daemon_work(mddev);
		mddev->bitmap_ops->daemon_work(mddev);

	if (signal_pending(current)) {
		if (mddev->pers->sync_request && !mddev->external) {
@@ -9856,7 +9901,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
		if (ret)
			pr_info("md-cluster: resize failed\n");
		else
			md_bitmap_update_sb(mddev->bitmap);
			mddev->bitmap_ops->update_sb(mddev->bitmap);
	}

	/* Check for change of roles in the active devices */
Loading