Commit 23f57ed9 authored by Eric Biggers's avatar Eric Biggers Committed by Mikulas Patocka
Browse files

dm-verity: use 2-way interleaved SHA-256 hashing when supported



When the crypto library provides an optimized implementation of
sha256_finup_2x(), use it to interleave the hashing of pairs of data
blocks.  On some CPUs this nearly doubles hashing performance.  The
increase in overall throughput of cold-cache dm-verity reads that I'm
seeing on arm64 and x86_64 is roughly 35% (though this metric is hard to
measure as it jumps around a lot).

For now this is done only on data blocks, not Merkle tree blocks.  We
could use sha256_finup_2x() on Merkle tree blocks too, but that is less
important as there aren't as many Merkle tree blocks as data blocks, and
that would require some additional code restructuring.

Signed-off-by: default avatarEric Biggers <ebiggers@kernel.org>
Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
parent 379475dc
Loading
Loading
Loading
Loading
+86 −27
Original line number Diff line number Diff line
@@ -417,9 +417,12 @@ static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io,
static int verity_handle_data_hash_mismatch(struct dm_verity *v,
					    struct dm_verity_io *io,
					    struct bio *bio,
					    const u8 *want_digest,
					    sector_t blkno, u8 *data)
					    struct pending_block *block)
{
	const u8 *want_digest = block->want_digest;
	sector_t blkno = block->blkno;
	u8 *data = block->data;

	if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
		/*
		 * Error handling code (FEC included) cannot be run in the
@@ -448,6 +451,58 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v,
	return 0;
}

static void verity_clear_pending_blocks(struct dm_verity_io *io)
{
	int i;

	for (i = io->num_pending - 1; i >= 0; i--) {
		kunmap_local(io->pending_blocks[i].data);
		io->pending_blocks[i].data = NULL;
	}
	io->num_pending = 0;
}

static int verity_verify_pending_blocks(struct dm_verity *v,
					struct dm_verity_io *io,
					struct bio *bio)
{
	const unsigned int block_size = 1 << v->data_dev_block_bits;
	int i, r;

	if (io->num_pending == 2) {
		/* num_pending == 2 implies that the algorithm is SHA-256 */
		sha256_finup_2x(v->initial_hashstate.sha256,
				io->pending_blocks[0].data,
				io->pending_blocks[1].data, block_size,
				io->pending_blocks[0].real_digest,
				io->pending_blocks[1].real_digest);
	} else {
		for (i = 0; i < io->num_pending; i++) {
			r = verity_hash(v, io, io->pending_blocks[i].data,
					block_size,
					io->pending_blocks[i].real_digest);
			if (unlikely(r))
				return r;
		}
	}

	for (i = 0; i < io->num_pending; i++) {
		struct pending_block *block = &io->pending_blocks[i];

		if (likely(memcmp(block->real_digest, block->want_digest,
				  v->digest_size) == 0)) {
			if (v->validated_blocks)
				set_bit(block->blkno, v->validated_blocks);
		} else {
			r = verity_handle_data_hash_mismatch(v, io, bio, block);
			if (unlikely(r))
				return r;
		}
	}
	verity_clear_pending_blocks(io);
	return 0;
}

/*
 * Verify one "dm_verity_io" structure.
 */
@@ -455,10 +510,14 @@ static int verity_verify_io(struct dm_verity_io *io)
{
	struct dm_verity *v = io->v;
	const unsigned int block_size = 1 << v->data_dev_block_bits;
	const int max_pending = v->use_sha256_finup_2x ? 2 : 1;
	struct bvec_iter iter_copy;
	struct bvec_iter *iter;
	struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
	unsigned int b;
	int r;

	io->num_pending = 0;

	if (static_branch_unlikely(&use_bh_wq_enabled) && io->in_bh) {
		/*
@@ -472,21 +531,22 @@ static int verity_verify_io(struct dm_verity_io *io)

	for (b = 0; b < io->n_blocks;
	     b++, bio_advance_iter(bio, iter, block_size)) {
		int r;
		sector_t cur_block = io->block + b;
		sector_t blkno = io->block + b;
		struct pending_block *block;
		bool is_zero;
		struct bio_vec bv;
		void *data;

		if (v->validated_blocks && bio->bi_status == BLK_STS_OK &&
		    likely(test_bit(cur_block, v->validated_blocks)))
		    likely(test_bit(blkno, v->validated_blocks)))
			continue;

		r = verity_hash_for_block(v, io, cur_block,
					  verity_io_want_digest(v, io),
		block = &io->pending_blocks[io->num_pending];

		r = verity_hash_for_block(v, io, blkno, block->want_digest,
					  &is_zero);
		if (unlikely(r < 0))
			return r;
			goto error;

		bv = bio_iter_iovec(bio, *iter);
		if (unlikely(bv.bv_len < block_size)) {
@@ -497,7 +557,8 @@ static int verity_verify_io(struct dm_verity_io *io)
			 * data block size to be greater than PAGE_SIZE.
			 */
			DMERR_LIMIT("unaligned io (data block spans pages)");
			return -EIO;
			r = -EIO;
			goto error;
		}

		data = bvec_kmap_local(&bv);
@@ -511,30 +572,26 @@ static int verity_verify_io(struct dm_verity_io *io)
			kunmap_local(data);
			continue;
		}

		r = verity_hash(v, io, data, block_size,
				verity_io_real_digest(v, io));
		if (unlikely(r < 0)) {
			kunmap_local(data);
			return r;
		block->data = data;
		block->blkno = blkno;
		if (++io->num_pending == max_pending) {
			r = verity_verify_pending_blocks(v, io, bio);
			if (unlikely(r))
				goto error;
		}

		if (likely(memcmp(verity_io_real_digest(v, io),
				  verity_io_want_digest(v, io), v->digest_size) == 0)) {
			if (v->validated_blocks)
				set_bit(cur_block, v->validated_blocks);
			kunmap_local(data);
			continue;
	}
		r = verity_handle_data_hash_mismatch(v, io, bio,
						     verity_io_want_digest(v, io),
						     cur_block, data);
		kunmap_local(data);

	if (io->num_pending) {
		r = verity_verify_pending_blocks(v, io, bio);
		if (unlikely(r))
			return r;
			goto error;
	}

	return 0;

error:
	verity_clear_pending_blocks(io);
	return r;
}

/*
@@ -1277,6 +1334,8 @@ static int verity_setup_hash_alg(struct dm_verity *v, const char *alg_name)
		 * interleaved hashing support.
		 */
		v->use_sha256_lib = true;
		if (sha256_finup_2x_is_optimized())
			v->use_sha256_finup_2x = true;
		ti->per_io_data_size =
			offsetofend(struct dm_verity_io, hash_ctx.sha256);
	} else {
+17 −14
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ struct dm_verity {
	bool hash_failed:1;	/* set if hash of any block failed */
	bool use_bh_wq:1;	/* try to verify in BH wq before normal work-queue */
	bool use_sha256_lib:1;	/* use SHA-256 library instead of generic crypto API */
	bool use_sha256_finup_2x:1; /* use interleaved hashing optimization */
	unsigned int digest_size;	/* digest size for the current hash algorithm */
	enum verity_mode mode;	/* mode for handling verification errors */
	enum verity_mode error_mode;/* mode for handling I/O errors */
@@ -83,6 +84,13 @@ struct dm_verity {
	mempool_t recheck_pool;
};

struct pending_block {
	void *data;
	sector_t blkno;
	u8 want_digest[HASH_MAX_DIGESTSIZE];
	u8 real_digest[HASH_MAX_DIGESTSIZE];
};

struct dm_verity_io {
	struct dm_verity *v;

@@ -100,8 +108,15 @@ struct dm_verity_io {
	struct work_struct bh_work;

	u8 tmp_digest[HASH_MAX_DIGESTSIZE];
	u8 real_digest[HASH_MAX_DIGESTSIZE];
	u8 want_digest[HASH_MAX_DIGESTSIZE];

	/*
	 * This is the queue of data blocks that are pending verification.  When
	 * the crypto layer supports interleaved hashing, we allow multiple
	 * blocks to be queued up in order to utilize it.  This can improve
	 * performance significantly vs. sequential hashing of each block.
	 */
	int num_pending;
	struct pending_block pending_blocks[2];

	/*
	 * Temporary space for hashing.  Either sha256 or shash is used,
@@ -116,18 +131,6 @@ struct dm_verity_io {
	} hash_ctx;
};

static inline u8 *verity_io_real_digest(struct dm_verity *v,
					struct dm_verity_io *io)
{
	return io->real_digest;
}

static inline u8 *verity_io_want_digest(struct dm_verity *v,
					struct dm_verity_io *io)
{
	return io->want_digest;
}

extern int verity_hash(struct dm_verity *v, struct dm_verity_io *io,
		       const u8 *data, size_t len, u8 *digest);