Commit 61e62959 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-7.0/dm-changes' of...

Merge tag 'for-7.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mikulas Patocka:

 - dm-verity:
     - various optimizations and fixes related to forward error correction
     - add a .dm-verity keyring

 - dm-integrity: fix bugs with growing a device in bitmap mode

 - dm-mpath:
     - fix leaking fake timeout requests
     - fix UAF bug caused by stale rq->bio
     - fix minor bugs in device creation

 - dm-core:
     - fix a bug related to blkg association
     - avoid unnecessary blk-crypto work on invalid keys

 - dm-bufio:
     - dm-bufio cleanup and optimization (reducing hash table lookups)

 - various other minor fixes and cleanups

* tag 'for-7.0/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (35 commits)
  dm mpath: make pg_init_delay_msecs settable
  Revert "dm: fix a race condition in retrieve_deps"
  dm mpath: Add missing dm_put_device when failing to get scsi dh name
  dm vdo encodings: clean up header and version functions
  dm: use bio_clone_blkg_association
  dm: fix excessive blk-crypto operations for invalid keys
  dm-verity: fix section mismatch error
  dm-unstripe: fix mapping bug when there are multiple targets in a table
  dm-integrity: fix recalculation in bitmap mode
  dm-bufio: avoid redundant buffer_tree lookups
  dm-bufio: merge cache_put() into cache_put_and_wake()
  selftests: add dm-verity keyring selftests
  dm-verity: add dm-verity keyring
  dm: clear cloned request bio pointer when last clone bio completes
  dm-verity: fix up various workqueue-related comments
  dm-verity: switch to bio_advance_iter_single()
  dm-verity: consolidate the BH and normal work structs
  dm: add WQ_PERCPU to alloc_workqueue users
  dm-integrity: fix a typo in the code for write/discard race
  dm: use READ_ONCE in dm_blk_report_zones
  ...
parents 1e0ea4df 218b1699
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -433,7 +433,7 @@ Table line examples:
  8192 1960886272 linear 8:0 0 2048 # previous data segment

# Mapping table for e.g. raid5_rs reshape causing the size of the raid device to double-fold once the reshape finishes.
# Check the status output (e.g. "dmsetup status $RaidDev") for progess.
# Check the status output (e.g. "dmsetup status $RaidDev") for progress.

  0 $((2 * 1960886272)) raid raid5 7 0 region_size 2048 data_offset 8192 delta_disk 1 2 /dev/dm-0 /dev/dm-1 /dev/dm-2 /dev/dm-3

+7 −0
Original line number Diff line number Diff line
@@ -1372,6 +1372,13 @@ Kernel parameters
			For details see:
			Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst

	dm_verity.keyring_unsealed=
			[KNL] When set to 1, leave the dm-verity keyring
			unsealed after initialization so userspace can
			provision keys. Once the keyring is restricted
			it becomes active and is searched during signature
			verification.

	driver_async_probe=  [KNL]
			List of driver names to be probed asynchronously. *
			matches with all driver names. If * is specified, the
+2 −2
Original line number Diff line number Diff line
@@ -549,6 +549,8 @@ config DM_VERITY
	select CRYPTO_HASH
	select CRYPTO_LIB_SHA256
	select DM_BUFIO
	select REED_SOLOMON if DM_VERITY_FEC
	select REED_SOLOMON_DEC8 if DM_VERITY_FEC
	help
	  This device-mapper target creates a read-only device that
	  transparently validates the data on one underlying device against
@@ -598,8 +600,6 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG_PLATFORM_KEYRING
config DM_VERITY_FEC
	bool "Verity forward error correction support"
	depends on DM_VERITY
	select REED_SOLOMON
	select REED_SOLOMON_DEC8
	help
	  Add forward error correction support to dm-verity. This option
	  makes it possible to use pre-generated error correction data to
+99 −74
Original line number Diff line number Diff line
@@ -369,8 +369,8 @@ struct dm_buffer {
 *  - IO
 *  - Eviction or cache sizing.
 *
 * cache_get() and cache_put() are threadsafe, you do not need to
 * protect these calls with a surrounding mutex.  All the other
 * cache_get() and cache_put_and_wake() are threadsafe, you do not need
 * to protect these calls with a surrounding mutex.  All the other
 * methods are not threadsafe; they do use locking primitives, but
 * only enough to ensure get/put are threadsafe.
 */
@@ -401,36 +401,51 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
	return dm_hash_locks_index(block, num_locks);
}

static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
/* Get the buffer tree in the cache for the given block.  Doesn't lock it. */
static inline struct buffer_tree *cache_get_tree(struct dm_buffer_cache *bc,
						 sector_t block)
{
	return &bc->trees[cache_index(block, bc->num_locks)];
}

/* Lock the given buffer tree in the cache for reading. */
static inline void cache_read_lock(struct dm_buffer_cache *bc,
				   struct buffer_tree *tree)
{
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
		read_lock_bh(&tree->u.spinlock);
	else
		down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
		down_read(&tree->u.lock);
}

static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
/* Unlock the given buffer tree in the cache for reading. */
static inline void cache_read_unlock(struct dm_buffer_cache *bc,
				     struct buffer_tree *tree)
{
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
		read_unlock_bh(&tree->u.spinlock);
	else
		up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
		up_read(&tree->u.lock);
}

static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
/* Lock the given buffer tree in the cache for writing. */
static inline void cache_write_lock(struct dm_buffer_cache *bc,
				    struct buffer_tree *tree)
{
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
		write_lock_bh(&tree->u.spinlock);
	else
		down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
		down_write(&tree->u.lock);
}

static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
/* Unlock the given buffer tree in the cache for writing. */
static inline void cache_write_unlock(struct dm_buffer_cache *bc,
				      struct buffer_tree *tree)
{
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
		write_unlock_bh(&tree->u.spinlock);
	else
		up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
		up_write(&tree->u.lock);
}

/*
@@ -602,41 +617,25 @@ static void __cache_inc_buffer(struct dm_buffer *b)
	WRITE_ONCE(b->last_accessed, jiffies);
}

static struct dm_buffer *cache_get(struct dm_buffer_cache *bc, sector_t block)
static struct dm_buffer *cache_get(struct dm_buffer_cache *bc,
				   struct buffer_tree *tree, sector_t block)
{
	struct dm_buffer *b;

	cache_read_lock(bc, block);
	b = __cache_get(&bc->trees[cache_index(block, bc->num_locks)].root, block);
	/* Assuming tree == cache_get_tree(bc, block) */
	cache_read_lock(bc, tree);
	b = __cache_get(&tree->root, block);
	if (b) {
		lru_reference(&b->lru);
		__cache_inc_buffer(b);
	}
	cache_read_unlock(bc, block);
	cache_read_unlock(bc, tree);

	return b;
}

/*--------------*/

/*
 * Returns true if the hold count hits zero.
 * threadsafe
 */
static bool cache_put(struct dm_buffer_cache *bc, struct dm_buffer *b)
{
	bool r;

	cache_read_lock(bc, b->block);
	BUG_ON(!atomic_read(&b->hold_count));
	r = atomic_dec_and_test(&b->hold_count);
	cache_read_unlock(bc, b->block);

	return r;
}

/*--------------*/

typedef enum evict_result (*b_predicate)(struct dm_buffer *, void *);

/*
@@ -681,7 +680,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode

	b = le_to_buffer(le);
	/* __evict_pred will have locked the appropriate tree. */
	rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root);
	rb_erase(&b->node, &cache_get_tree(bc, b->block)->root);

	return b;
}
@@ -704,15 +703,17 @@ static struct dm_buffer *cache_evict(struct dm_buffer_cache *bc, int list_mode,
/*
 * Mark a buffer as clean or dirty. Not threadsafe.
 */
static void cache_mark(struct dm_buffer_cache *bc, struct dm_buffer *b, int list_mode)
static void cache_mark(struct dm_buffer_cache *bc, struct buffer_tree *tree,
		       struct dm_buffer *b, int list_mode)
{
	cache_write_lock(bc, b->block);
	/* Assuming tree == cache_get_tree(bc, b->block) */
	cache_write_lock(bc, tree);
	if (list_mode != b->list_mode) {
		lru_remove(&bc->lru[b->list_mode], &b->lru);
		b->list_mode = list_mode;
		lru_insert(&bc->lru[b->list_mode], &b->lru);
	}
	cache_write_unlock(bc, b->block);
	cache_write_unlock(bc, tree);
}

/*--------------*/
@@ -838,19 +839,21 @@ static bool __cache_insert(struct rb_root *root, struct dm_buffer *b)
	return true;
}

static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b)
static bool cache_insert(struct dm_buffer_cache *bc, struct buffer_tree *tree,
			 struct dm_buffer *b)
{
	bool r;

	if (WARN_ON_ONCE(b->list_mode >= LIST_SIZE))
		return false;

	cache_write_lock(bc, b->block);
	/* Assuming tree == cache_get_tree(bc, b->block) */
	cache_write_lock(bc, tree);
	BUG_ON(atomic_read(&b->hold_count) != 1);
	r = __cache_insert(&bc->trees[cache_index(b->block, bc->num_locks)].root, b);
	r = __cache_insert(&tree->root, b);
	if (r)
		lru_insert(&bc->lru[b->list_mode], &b->lru);
	cache_write_unlock(bc, b->block);
	cache_write_unlock(bc, tree);

	return r;
}
@@ -863,21 +866,23 @@ static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b)
 *
 * Not threadsafe.
 */
static bool cache_remove(struct dm_buffer_cache *bc, struct dm_buffer *b)
static bool cache_remove(struct dm_buffer_cache *bc, struct buffer_tree *tree,
			 struct dm_buffer *b)
{
	bool r;

	cache_write_lock(bc, b->block);
	/* Assuming tree == cache_get_tree(bc, b->block) */
	cache_write_lock(bc, tree);

	if (atomic_read(&b->hold_count) != 1) {
		r = false;
	} else {
		r = true;
		rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root);
		rb_erase(&b->node, &tree->root);
		lru_remove(&bc->lru[b->list_mode], &b->lru);
	}

	cache_write_unlock(bc, b->block);
	cache_write_unlock(bc, tree);

	return r;
}
@@ -1743,14 +1748,22 @@ static void __check_watermark(struct dm_bufio_client *c,
 *--------------------------------------------------------------
 */

static void cache_put_and_wake(struct dm_bufio_client *c, struct dm_buffer *b)
static void cache_put_and_wake(struct dm_bufio_client *c,
			       struct buffer_tree *tree, struct dm_buffer *b)
{
	bool wake;

	/* Assuming tree == cache_get_tree(&c->cache, b->block) */
	cache_read_lock(&c->cache, tree);
	BUG_ON(!atomic_read(&b->hold_count));
	wake = atomic_dec_and_test(&b->hold_count);
	cache_read_unlock(&c->cache, tree);

	/*
	 * Relying on waitqueue_active() is racey, but we sleep
	 * with schedule_timeout anyway.
	 */
	if (cache_put(&c->cache, b) &&
	    unlikely(waitqueue_active(&c->free_buffer_wait)))
	if (wake && unlikely(waitqueue_active(&c->free_buffer_wait)))
		wake_up(&c->free_buffer_wait);
}

@@ -1758,7 +1771,8 @@ static void cache_put_and_wake(struct dm_bufio_client *c, struct dm_buffer *b)
 * This assumes you have already checked the cache to see if the buffer
 * is already present (it will recheck after dropping the lock for allocation).
 */
static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
static struct dm_buffer *__bufio_new(struct dm_bufio_client *c,
				     struct buffer_tree *tree, sector_t block,
				     enum new_flag nf, int *need_submit,
				     struct list_head *write_list)
{
@@ -1778,7 +1792,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
	 * We've had a period where the mutex was unlocked, so need to
	 * recheck the buffer tree.
	 */
	b = cache_get(&c->cache, block);
	b = cache_get(&c->cache, tree, block);
	if (b) {
		__free_buffer_wake(new_b);
		goto found_buffer;
@@ -1806,13 +1820,13 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
	 * is set.  Otherwise another thread could get it and use
	 * it before it had been read.
	 */
	cache_insert(&c->cache, b);
	cache_insert(&c->cache, tree, b);

	return b;

found_buffer:
	if (nf == NF_PREFETCH) {
		cache_put_and_wake(c, b);
		cache_put_and_wake(c, tree, b);
		return NULL;
	}

@@ -1824,7 +1838,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
	 * the same buffer, it would deadlock if we waited.
	 */
	if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) {
		cache_put_and_wake(c, b);
		cache_put_and_wake(c, tree, b);
		return NULL;
	}

@@ -1858,6 +1872,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
		      enum new_flag nf, struct dm_buffer **bp,
		      unsigned short ioprio)
{
	struct buffer_tree *tree;
	int need_submit = 0;
	struct dm_buffer *b;

@@ -1869,10 +1884,11 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
	 * Fast path, hopefully the block is already in the cache.  No need
	 * to get the client lock for this.
	 */
	b = cache_get(&c->cache, block);
	tree = cache_get_tree(&c->cache, block);
	b = cache_get(&c->cache, tree, block);
	if (b) {
		if (nf == NF_PREFETCH) {
			cache_put_and_wake(c, b);
			cache_put_and_wake(c, tree, b);
			return NULL;
		}

@@ -1884,7 +1900,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
		 * the same buffer, it would deadlock if we waited.
		 */
		if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state))) {
			cache_put_and_wake(c, b);
			cache_put_and_wake(c, tree, b);
			return NULL;
		}
	}
@@ -1894,7 +1910,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
			return NULL;

		dm_bufio_lock(c);
		b = __bufio_new(c, block, nf, &need_submit, &write_list);
		b = __bufio_new(c, tree, block, nf, &need_submit, &write_list);
		dm_bufio_unlock(c);
	}

@@ -1981,18 +1997,20 @@ static void __dm_bufio_prefetch(struct dm_bufio_client *c,
	blk_start_plug(&plug);

	for (; n_blocks--; block++) {
		int need_submit;
		struct buffer_tree *tree;
		struct dm_buffer *b;
		int need_submit;

		b = cache_get(&c->cache, block);
		tree = cache_get_tree(&c->cache, block);
		b = cache_get(&c->cache, tree, block);
		if (b) {
			/* already in cache */
			cache_put_and_wake(c, b);
			cache_put_and_wake(c, tree, b);
			continue;
		}

		dm_bufio_lock(c);
		b = __bufio_new(c, block, NF_PREFETCH, &need_submit,
		b = __bufio_new(c, tree, block, NF_PREFETCH, &need_submit,
				&write_list);
		if (unlikely(!list_empty(&write_list))) {
			dm_bufio_unlock(c);
@@ -2037,6 +2055,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_prefetch_with_ioprio);
void dm_bufio_release(struct dm_buffer *b)
{
	struct dm_bufio_client *c = b->c;
	struct buffer_tree *tree = cache_get_tree(&c->cache, b->block);

	/*
	 * If there were errors on the buffer, and the buffer is not
@@ -2050,7 +2069,7 @@ void dm_bufio_release(struct dm_buffer *b)
		dm_bufio_lock(c);

		/* cache remove can fail if there are other holders */
		if (cache_remove(&c->cache, b)) {
		if (cache_remove(&c->cache, tree, b)) {
			__free_buffer_wake(b);
			dm_bufio_unlock(c);
			return;
@@ -2059,7 +2078,7 @@ void dm_bufio_release(struct dm_buffer *b)
		dm_bufio_unlock(c);
	}

	cache_put_and_wake(c, b);
	cache_put_and_wake(c, tree, b);
}
EXPORT_SYMBOL_GPL(dm_bufio_release);

@@ -2078,7 +2097,8 @@ void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
	if (!test_and_set_bit(B_DIRTY, &b->state)) {
		b->dirty_start = start;
		b->dirty_end = end;
		cache_mark(&c->cache, b, LIST_DIRTY);
		cache_mark(&c->cache, cache_get_tree(&c->cache, b->block), b,
			   LIST_DIRTY);
	} else {
		if (start < b->dirty_start)
			b->dirty_start = start;
@@ -2143,6 +2163,7 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
	lru_iter_begin(&c->cache.lru[LIST_DIRTY], &it);
	while ((e = lru_iter_next(&it, is_writing, c))) {
		struct dm_buffer *b = le_to_buffer(e);
		struct buffer_tree *tree;
		__cache_inc_buffer(b);

		BUG_ON(test_bit(B_READING, &b->state));
@@ -2156,10 +2177,12 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
			wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
		}

		tree = cache_get_tree(&c->cache, b->block);

		if (!test_bit(B_DIRTY, &b->state) && !test_bit(B_WRITING, &b->state))
			cache_mark(&c->cache, b, LIST_CLEAN);
			cache_mark(&c->cache, tree, b, LIST_CLEAN);

		cache_put_and_wake(c, b);
		cache_put_and_wake(c, tree, b);

		cond_resched();
	}
@@ -2227,17 +2250,18 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);

static void forget_buffer(struct dm_bufio_client *c, sector_t block)
{
	struct buffer_tree *tree = cache_get_tree(&c->cache, block);
	struct dm_buffer *b;

	b = cache_get(&c->cache, block);
	b = cache_get(&c->cache, tree, block);
	if (b) {
		if (likely(!smp_load_acquire(&b->state))) {
			if (cache_remove(&c->cache, b))
			if (cache_remove(&c->cache, tree, b))
				__free_buffer_wake(b);
			else
				cache_put_and_wake(c, b);
				cache_put_and_wake(c, tree, b);
		} else {
			cache_put_and_wake(c, b);
			cache_put_and_wake(c, tree, b);
		}
	}
}
@@ -2833,7 +2857,8 @@ static int __init dm_bufio_init(void)
	__cache_size_refresh();
	mutex_unlock(&dm_bufio_clients_lock);

	dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
	dm_bufio_wq = alloc_workqueue("dm_bufio_cache",
				      WQ_MEM_RECLAIM | WQ_PERCPU, 0);
	if (!dm_bufio_wq)
		return -ENOMEM;

+2 −8
Original line number Diff line number Diff line
@@ -2099,7 +2099,6 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
			    char **error)
{
	sector_t origin_sectors;
	int r;

	if (!at_least_one_arg(as, error))
@@ -2112,12 +2111,6 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
		return r;
	}

	origin_sectors = get_dev_size(ca->origin_dev);
	if (ca->ti->len > origin_sectors) {
		*error = "Device size larger than cached device";
		return -EINVAL;
	}

	return 0;
}

@@ -2533,7 +2526,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
		goto bad;
	}

	cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
	cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX,
				    WQ_MEM_RECLAIM | WQ_PERCPU, 0);
	if (!cache->wq) {
		*error = "could not create workqueue for metadata object";
		goto bad;
Loading