Commit 2a695062 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Mike Snitzer
Browse files

dm-bufio: fix no-sleep mode



dm-bufio has a no-sleep mode. When activated (with the
DM_BUFIO_CLIENT_NO_SLEEP flag), the bufio client is read-only and we
could call dm_bufio_get from tasklets. This is used by dm-verity.

Unfortunately, commit 450e8dee ("dm bufio: improve concurrent IO
performance") broke this and the kernel would warn that cache_get()
was calling down_read() from no-sleeping context. The bug can be
reproduced by using "veritysetup open" with the "--use-tasklets"
flag.

This commit fixes dm-bufio, so that the tasklet mode works again, by
expanding use of the 'no_sleep_enabled' static_key to conditionally
use either a rw_semaphore or rwlock_t (which are colocated in the
buffer_tree structure using a union).

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v6.4
Fixes: 450e8dee ("dm bufio: improve concurrent IO performance")
Signed-off-by: default avatarMike Snitzer <snitzer@kernel.org>
parent ccadc8a2
Loading
Loading
Loading
Loading
+62 −25
Original line number Diff line number Diff line
@@ -254,7 +254,7 @@ enum evict_result {

typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);

static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
	unsigned long tested = 0;
	struct list_head *h = lru->cursor;
@@ -295,6 +295,7 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con

		h = h->next;

		if (!no_sleep)
			cond_resched();
	}

@@ -382,7 +383,10 @@ struct dm_buffer {
 */

struct buffer_tree {
	union {
		struct rw_semaphore lock;
		rwlock_t spinlock;
	} u;
	struct rb_root root;
} ____cacheline_aligned_in_smp;

@@ -393,9 +397,12 @@ struct dm_buffer_cache {
	 * on the locks.
	 */
	unsigned int num_locks;
	bool no_sleep;
	struct buffer_tree trees[];
};

static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);

static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
	return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)

static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
	down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
	up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
	down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
	up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
	if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
		write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
	else
		up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}

/*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool

static void __lh_lock(struct lock_history *lh, unsigned int index)
{
	if (lh->write)
		down_write(&lh->cache->trees[index].lock);
	if (lh->write) {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			write_lock_bh(&lh->cache->trees[index].u.spinlock);
		else
			down_write(&lh->cache->trees[index].u.lock);
	} else {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			read_lock_bh(&lh->cache->trees[index].u.spinlock);
		else
		down_read(&lh->cache->trees[index].lock);
			down_read(&lh->cache->trees[index].u.lock);
	}
}

static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
	if (lh->write)
		up_write(&lh->cache->trees[index].lock);
	if (lh->write) {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			write_unlock_bh(&lh->cache->trees[index].u.spinlock);
		else
			up_write(&lh->cache->trees[index].u.lock);
	} else {
		if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
			read_unlock_bh(&lh->cache->trees[index].u.spinlock);
		else
		up_read(&lh->cache->trees[index].lock);
			up_read(&lh->cache->trees[index].u.lock);
	}
}

/*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
	return le_to_buffer(le);
}

static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
	unsigned int i;

	bc->num_locks = num_locks;
	bc->no_sleep = no_sleep;

	for (i = 0; i < bc->num_locks; i++) {
		init_rwsem(&bc->trees[i].lock);
		if (no_sleep)
			rwlock_init(&bc->trees[i].u.spinlock);
		else
			init_rwsem(&bc->trees[i].u.lock);
		bc->trees[i].root = RB_ROOT;
	}

@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
	struct lru_entry *le;
	struct dm_buffer *b;

	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
	le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
	if (!le)
		return NULL;

@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
	struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};

	while (true) {
		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
		le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
		if (!le)
			break;

@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
	unsigned int i;

	BUG_ON(bc->no_sleep);
	for (i = 0; i < bc->num_locks; i++) {
		down_write(&bc->trees[i].lock);
		down_write(&bc->trees[i].u.lock);
		__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
		up_write(&bc->trees[i].lock);
		up_write(&bc->trees[i].u.lock);
	}
}

@@ -979,8 +1017,6 @@ struct dm_bufio_client {
	struct dm_buffer_cache cache; /* must be last member */
};

static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);

/*----------------------------------------------------------------*/

#define dm_bufio_in_request()	(!!current->bio_list)
@@ -1871,6 +1907,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
	if (need_submit)
		submit_io(b, REQ_OP_READ, read_endio);

	if (nf != NF_GET)	/* we already tested this condition above */
		wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);

	if (b->read_error) {
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
		r = -ENOMEM;
		goto bad_client;
	}
	cache_init(&c->cache, num_locks);
	cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);

	c->bdev = bdev;
	c->block_size = block_size;