Commit 05aa69b0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.7/dm-fixes' of...

Merge tag 'for-6.7/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Various fixes for the DM delay target to address regressions
   introduced during the 6.7 merge window

 - Fixes to both DM bufio and the verity target for no-sleep mode,
   to address sleeping while atomic issues

 - Update DM crypt target in response to the treewide change that
   made MAX_ORDER inclusive

* tag 'for-6.7/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm-crypt: start allocating with MAX_ORDER
  dm-verity: don't use blocking calls from tasklets
  dm-bufio: fix no-sleep mode
  dm-delay: avoid duplicate logic
  dm-delay: fix bugs introduced by kthread mode
  dm-delay: fix a race between delay_presuspend and delay_bio
parents 23dfa043 13648e04
Loading
Loading
Loading
Loading
+62 −25
Original line number Diff line number Diff line
@@ -254,7 +254,7 @@ enum evict_result {

typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);

static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
	unsigned long tested = 0;
	struct list_head *h = lru->cursor;
@@ -295,6 +295,7 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con

		h = h->next;

		if (!no_sleep)
			cond_resched();
	}

@@ -382,7 +383,10 @@ struct dm_buffer {
 */

struct buffer_tree {
	union {
		struct rw_semaphore lock;
		rwlock_t spinlock;
	} u;
	struct rb_root root;
} ____cacheline_aligned_in_smp;

@@ -393,9 +397,12 @@ struct dm_buffer_cache {
	 * on the locks.
	 */
	unsigned int num_locks;
	bool no_sleep;
	struct buffer_tree trees[];
};

static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);

static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
	return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)

static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
	down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
	up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
	down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
	up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

/*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool

static void __lh_lock(struct lock_history *lh, unsigned int index)
{
	if (lh->write)
		down_write(&lh->cache->trees[index].lock);
	if (lh->write) {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			write_lock_bh(&lh->cache->trees[index].u.spinlock);
		else
			down_write(&lh->cache->trees[index].u.lock);
	} else {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			read_lock_bh(&lh->cache->trees[index].u.spinlock);
		else
		down_read(&lh->cache->trees[index].lock);
			down_read(&lh->cache->trees[index].u.lock);
	}
}

static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
	if (lh->write)
		up_write(&lh->cache->trees[index].lock);
	if (lh->write) {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			write_unlock_bh(&lh->cache->trees[index].u.spinlock);
		else
			up_write(&lh->cache->trees[index].u.lock);
	} else {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			read_unlock_bh(&lh->cache->trees[index].u.spinlock);
		else
		up_read(&lh->cache->trees[index].lock);
			up_read(&lh->cache->trees[index].u.lock);
	}
}

/*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
	return le_to_buffer(le);
}

static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
	unsigned int i;

	bc->num_locks = num_locks;
	bc->no_sleep = no_sleep;

	for (i = 0; i < bc->num_locks; i++) {
		init_rwsem(&bc->trees[i].lock);
		if (no_sleep)
			rwlock_init(&bc->trees[i].u.spinlock);
		else
			init_rwsem(&bc->trees[i].u.lock);
		bc->trees[i].root = RB_ROOT;
	}

@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
	struct lru_entry *le;
	struct dm_buffer *b;

	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
	if (!le)
		return NULL;

@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
	struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};

	while (true) {
		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
		if (!le)
			break;

@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
	unsigned int i;

	BUG_ON(bc->no_sleep);
	for (i = 0; i < bc->num_locks; i++) {
		down_write(&bc->trees[i].lock);
		down_write(&bc->trees[i].u.lock);
		__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
		up_write(&bc->trees[i].lock);
		up_write(&bc->trees[i].u.lock);
	}
}

@@ -979,8 +1017,6 @@ struct dm_bufio_client {
	struct dm_buffer_cache cache; /* must be last member */
};

static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);

/*----------------------------------------------------------------*/

#define dm_bufio_in_request()	(!!current->bio_list)
@@ -1871,6 +1907,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
	if (need_submit)
		submit_io(b, REQ_OP_READ, read_endio);

	if (nf != NF_GET)	/* we already tested this condition above */
		wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);

	if (b->read_error) {
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
		r = -ENOMEM;
		goto bad_client;
	}
	cache_init(&c->cache, num_locks);
	cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);

	c->bdev = bdev;
	c->block_size = block_size;
+1 −1
Original line number Diff line number Diff line
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
	unsigned int remaining_size;
	unsigned int order = MAX_ORDER - 1;
	unsigned int order = MAX_ORDER;

retry:
	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
+52 −60
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ struct delay_c {
	struct work_struct flush_expired_bios;
	struct list_head delayed_bios;
	struct task_struct *worker;
	atomic_t may_delay;
	bool may_delay;

	struct delay_class read;
	struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
	return !!dc->worker;
}

static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
{
	struct dm_delay_info *delayed, *next;

	mutex_lock(&delayed_bios_lock);
	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
			struct bio *bio = dm_bio_from_per_bio_data(delayed,
						sizeof(struct dm_delay_info));
			list_del(&delayed->list);
			dm_submit_bio_remap(bio, NULL);
			delayed->class->ops--;
		}
	}
	mutex_unlock(&delayed_bios_lock);
}

static int flush_worker_fn(void *data)
{
	struct delay_c *dc = data;

	while (1) {
		flush_delayed_bios_fast(dc, false);
		if (unlikely(list_empty(&dc->delayed_bios))) {
			set_current_state(TASK_INTERRUPTIBLE);
			schedule();
		} else
			cond_resched();
	}

	return 0;
}

static void flush_bios(struct bio *bio)
{
	struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
	}
}

static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
	struct dm_delay_info *delayed, *next;
	struct bio_list flush_bio_list;
	unsigned long next_expires = 0;
	unsigned long start_timer = 0;
	struct bio_list flush_bios = { };
	bool start_timer = false;
	bio_list_init(&flush_bio_list);

	mutex_lock(&delayed_bios_lock);
	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
		cond_resched();
		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
			struct bio *bio = dm_bio_from_per_bio_data(delayed,
						sizeof(struct dm_delay_info));
			list_del(&delayed->list);
			bio_list_add(&flush_bios, bio);
			bio_list_add(&flush_bio_list, bio);
			delayed->class->ops--;
			continue;
		}

		if (!delay_is_fast(dc)) {
			if (!start_timer) {
			start_timer = 1;
				start_timer = true;
				next_expires = delayed->expires;
		} else
			} else {
				next_expires = min(next_expires, delayed->expires);
			}
		}
	}
	mutex_unlock(&delayed_bios_lock);

	if (start_timer)
		queue_timeout(dc, next_expires);

	return bio_list_get(&flush_bios);
	flush_bios(bio_list_get(&flush_bio_list));
}

static int flush_worker_fn(void *data)
{
	struct delay_c *dc = data;

	while (!kthread_should_stop()) {
		flush_delayed_bios(dc, false);
		mutex_lock(&delayed_bios_lock);
		if (unlikely(list_empty(&dc->delayed_bios))) {
			set_current_state(TASK_INTERRUPTIBLE);
			mutex_unlock(&delayed_bios_lock);
			schedule();
		} else {
			mutex_unlock(&delayed_bios_lock);
			cond_resched();
		}
	}

	return 0;
}

static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
	struct delay_c *dc;

	dc = container_of(work, struct delay_c, flush_expired_bios);
	if (delay_is_fast(dc))
		flush_delayed_bios_fast(dc, false);
	else
		flush_bios(flush_delayed_bios(dc, false));
	flush_delayed_bios(dc, false);
}

static void delay_dtr(struct dm_target *ti)
@@ -177,7 +166,6 @@ static void delay_dtr(struct dm_target *ti)
	if (dc->worker)
		kthread_stop(dc->worker);

	if (!delay_is_fast(dc))
	mutex_destroy(&dc->timer_lock);

	kfree(dc);
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)

	ti->private = dc;
	INIT_LIST_HEAD(&dc->delayed_bios);
	atomic_set(&dc->may_delay, 1);
	mutex_init(&dc->timer_lock);
	dc->may_delay = true;
	dc->argc = argc;

	ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
					    "dm-delay-flush-worker");
		if (IS_ERR(dc->worker)) {
			ret = PTR_ERR(dc->worker);
			dc->worker = NULL;
			goto bad;
		}
	} else {
		timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
		INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
		mutex_init(&dc->timer_lock);
		dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
		if (!dc->kdelayd_wq) {
			ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
	struct dm_delay_info *delayed;
	unsigned long expires = 0;

	if (!c->delay || !atomic_read(&dc->may_delay))
	if (!c->delay)
		return DM_MAPIO_REMAPPED;

	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);

	mutex_lock(&delayed_bios_lock);
	if (unlikely(!dc->may_delay)) {
		mutex_unlock(&delayed_bios_lock);
		return DM_MAPIO_REMAPPED;
	}
	c->ops++;
	list_add_tail(&delayed->list, &dc->delayed_bios);
	mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
{
	struct delay_c *dc = ti->private;

	atomic_set(&dc->may_delay, 0);
	mutex_lock(&delayed_bios_lock);
	dc->may_delay = false;
	mutex_unlock(&delayed_bios_lock);

	if (delay_is_fast(dc))
		flush_delayed_bios_fast(dc, true);
	else {
	if (!delay_is_fast(dc))
		del_timer_sync(&dc->delay_timer);
		flush_bios(flush_delayed_bios(dc, true));
	}
	flush_delayed_bios(dc, true);
}

static void delay_resume(struct dm_target *ti)
{
	struct delay_c *dc = ti->private;

	atomic_set(&dc->may_delay, 1);
	dc->may_delay = true;
}

static int delay_map(struct dm_target *ti, struct bio *bio)
+2 −2
Original line number Diff line number Diff line
@@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
{
	if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
				 data, 1 << v->data_dev_block_bits,
				 verity_io_real_digest(v, io))))
				 verity_io_real_digest(v, io), true)))
		return 0;

	return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
	/* Always re-validate the corrected block against the expected hash */
	r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
			1 << v->data_dev_block_bits,
			verity_io_real_digest(v, io));
			verity_io_real_digest(v, io), true);
	if (unlikely(r < 0))
		return r;

+12 −11
Original line number Diff line number Diff line
@@ -135,19 +135,20 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
 * Wrapper for crypto_ahash_init, which handles verity salting.
 */
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
				struct crypto_wait *wait)
				struct crypto_wait *wait, bool may_sleep)
{
	int r;

	ahash_request_set_tfm(req, v->tfm);
	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
					CRYPTO_TFM_REQ_MAY_BACKLOG,
	ahash_request_set_callback(req,
		may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
		crypto_req_done, (void *)wait);
	crypto_init_wait(wait);

	r = crypto_wait_req(crypto_ahash_init(req), wait);

	if (unlikely(r < 0)) {
		if (r != -ENOMEM)
			DMERR("crypto_ahash_init failed: %d", r);
		return r;
	}
@@ -179,12 +180,12 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
}

int verity_hash(struct dm_verity *v, struct ahash_request *req,
		const u8 *data, size_t len, u8 *digest)
		const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
	int r;
	struct crypto_wait wait;

	r = verity_hash_init(v, req, &wait);
	r = verity_hash_init(v, req, &wait, may_sleep);
	if (unlikely(r < 0))
		goto out;

@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,

		r = verity_hash(v, verity_io_hash_req(v, io),
				data, 1 << v->hash_dev_block_bits,
				verity_io_real_digest(v, io));
				verity_io_real_digest(v, io), !io->in_tasklet);
		if (unlikely(r < 0))
			goto release_ret_r;

@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
			continue;
		}

		r = verity_hash_init(v, req, &wait);
		r = verity_hash_init(v, req, &wait, !io->in_tasklet);
		if (unlikely(r < 0))
			return r;

@@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data)

	io->in_tasklet = true;
	err = verity_verify_io(io);
	if (err == -EAGAIN) {
	if (err == -EAGAIN || err == -ENOMEM) {
		/* fallback to retrying with work-queue */
		INIT_WORK(&io->work, verity_work);
		queue_work(io->v->verify_wq, &io->work);
@@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
		goto out;

	r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
			v->zero_digest);
			v->zero_digest, true);

out:
	kfree(req);
Loading