Commit fe11ac19 authored by Eric Biggers's avatar Eric Biggers Committed by David Sterba
Browse files

btrfs: switch to library APIs for checksums



Make btrfs use the library APIs instead of crypto_shash, for all
checksum computations.  This has many benefits:

- Allows future checksum types, e.g. XXH3 or CRC64, to be more easily
  supported.  Only a library API will be needed, not crypto_shash too.

- Eliminates the overhead of the generic crypto layer, including an
  indirect call for every function call and other API overhead.  A
  microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a
  speedup from 658 cycles to 608 cycles per 4096-byte block.

- Decreases the stack usage of btrfs by reducing the size of checksum
  contexts from 384 bytes to 240 bytes, and by eliminating the need for
  some functions to declare a checksum context at all.

- Increases reliability.  The library functions always succeed and
  return void.  In contrast, crypto_shash can fail and return errors.
  Also, the library functions are guaranteed to be available when btrfs
  is loaded; there's no longer any need to use module softdeps to try to
  work around the crypto modules sometimes not being loaded.

- Fixes a bug where blake2b checksums didn't work on kernels booted with
  fips=1.  Since btrfs checksums are for integrity only, it's fine for
  them to use non-FIPS-approved algorithms.

Note that with having to handle 4 algorithms instead of just 1-2, this
commit does result in a slightly positive diffstat.  That being said,
this wouldn't have been the case if btrfs had actually checked for
errors from crypto_shash, which technically it should have been doing.

Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Reviewed-by: default avatarNeal Gompa <neal@gompa.dev>
Signed-off-by: default avatarEric Biggers <ebiggers@kernel.org>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b39b26e0
Loading
Loading
Loading
Loading
+3 −5
Original line number Diff line number Diff line
@@ -4,11 +4,8 @@ config BTRFS_FS
	tristate "Btrfs filesystem support"
	select BLK_CGROUP_PUNT_BIO
	select CRC32
	select CRYPTO
	select CRYPTO_CRC32C
	select CRYPTO_XXHASH
	select CRYPTO_SHA256
	select CRYPTO_BLAKE2B
	select CRYPTO_LIB_BLAKE2B
	select CRYPTO_LIB_SHA256
	select ZLIB_INFLATE
	select ZLIB_DEFLATE
	select LZO_COMPRESS
@@ -18,6 +15,7 @@ config BTRFS_FS
	select FS_IOMAP
	select RAID6_PQ
	select XOR_BLOCKS
	select XXHASH
	depends on PAGE_SIZE_LESS_THAN_256KB

	help
+0 −1
Original line number Diff line number Diff line
@@ -21,7 +21,6 @@
#include <linux/sched/mm.h>
#include <linux/log2.h>
#include <linux/shrinker.h>
#include <crypto/hash.h>
#include "misc.h"
#include "ctree.h"
#include "fs.h"
+17 −51
Original line number Diff line number Diff line
@@ -18,7 +18,6 @@
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
#include <linux/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -62,12 +61,6 @@
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);

static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
{
	if (fs_info->csum_shash)
		crypto_free_shash(fs_info->csum_shash);
}

/*
 * Compute the csum of a btree block and store the result to provided buffer.
 */
@@ -76,12 +69,11 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
	struct btrfs_fs_info *fs_info = buf->fs_info;
	int num_pages;
	u32 first_page_part;
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
	struct btrfs_csum_ctx csum;
	char *kaddr;
	int i;

	shash->tfm = fs_info->csum_shash;
	crypto_shash_init(shash);
	btrfs_csum_init(&csum, fs_info->csum_type);

	if (buf->addr) {
		/* Pages are contiguous, handle them as a big one. */
@@ -94,21 +86,21 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
		num_pages = num_extent_pages(buf);
	}

	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
	btrfs_csum_update(&csum, kaddr + BTRFS_CSUM_SIZE,
			  first_page_part - BTRFS_CSUM_SIZE);

	/*
	 * Multiple single-page folios case would reach here.
	 *
	 * nodesize <= PAGE_SIZE and large folio all handled by above
	 * crypto_shash_update() already.
	 * btrfs_csum_update() already.
	 */
	for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
		kaddr = folio_address(buf->folios[i]);
		crypto_shash_update(shash, kaddr, PAGE_SIZE);
		btrfs_csum_update(&csum, kaddr, PAGE_SIZE);
	}
	memset(result, 0, BTRFS_CSUM_SIZE);
	crypto_shash_final(shash, result);
	btrfs_csum_final(&csum, result);
}

/*
@@ -160,17 +152,14 @@ static bool btrfs_supported_super_csum(u16 csum_type)
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
			   const struct btrfs_super_block *disk_sb)
{
	char result[BTRFS_CSUM_SIZE];
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);

	shash->tfm = fs_info->csum_shash;
	u8 result[BTRFS_CSUM_SIZE];

	/*
	 * The super_block structure does not span the whole
	 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
	 * filled with zeros and is included in the checksum.
	 */
	crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
	btrfs_csum(fs_info->csum_type, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
		   BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);

	if (memcmp(disk_sb->csum, result, fs_info->csum_size))
@@ -1229,7 +1218,6 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
		ASSERT(percpu_counter_sum_positive(em_counter) == 0);
	percpu_counter_destroy(em_counter);
	percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
	btrfs_free_csum_hash(fs_info);
	btrfs_free_stripe_hash_table(fs_info);
	btrfs_free_ref_cache(fs_info);
	kfree(fs_info->balance_ctl);
@@ -1983,21 +1971,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
	return 0;
}

static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
static void btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
{
	struct crypto_shash *csum_shash;
	const char *csum_driver = btrfs_super_csum_driver(csum_type);

	csum_shash = crypto_alloc_shash(csum_driver, 0, 0);

	if (IS_ERR(csum_shash)) {
		btrfs_err(fs_info, "error allocating %s hash for checksum",
			  csum_driver);
		return PTR_ERR(csum_shash);
	}

	fs_info->csum_shash = csum_shash;

	/* Check if the checksum implementation is a fast accelerated one. */
	switch (csum_type) {
	case BTRFS_CSUM_TYPE_CRC32:
@@ -2011,10 +1986,8 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
		break;
	}

	btrfs_info(fs_info, "using %s (%s) checksum algorithm",
			btrfs_super_csum_name(csum_type),
			crypto_shash_driver_name(csum_shash));
	return 0;
	btrfs_info(fs_info, "using %s checksum algorithm",
		   btrfs_super_csum_name(csum_type));
}

static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
@@ -3302,12 +3275,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
	}

	fs_info->csum_size = btrfs_super_csum_size(disk_super);
	fs_info->csum_type = csum_type;

	ret = btrfs_init_csum_hash(fs_info, csum_type);
	if (ret) {
		btrfs_release_disk_super(disk_super);
		goto fail_alloc;
	}
	btrfs_init_csum_hash(fs_info, csum_type);

	/*
	 * We want to check superblock checksum, the type is stored inside.
@@ -3709,7 +3679,6 @@ static int write_dev_supers(struct btrfs_device *device,
{
	struct btrfs_fs_info *fs_info = device->fs_info;
	struct address_space *mapping = device->bdev->bd_mapping;
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
	int i;
	int ret;
	u64 bytenr, bytenr_orig;
@@ -3719,8 +3688,6 @@ static int write_dev_supers(struct btrfs_device *device,
	if (max_mirrors == 0)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;

	shash->tfm = fs_info->csum_shash;

	for (i = 0; i < max_mirrors; i++) {
		struct folio *folio;
		struct bio *bio;
@@ -3744,9 +3711,8 @@ static int write_dev_supers(struct btrfs_device *device,

		btrfs_set_super_bytenr(sb, bytenr_orig);

		crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
				    sb->csum);
		btrfs_csum(fs_info->csum_type, (const u8 *)sb + BTRFS_CSUM_SIZE,
			   BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum);

		folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
					    FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+0 −4
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
@@ -769,7 +768,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
{
	struct btrfs_inode *inode = bbio->inode;
	struct btrfs_fs_info *fs_info = inode->root->fs_info;
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
	struct bio *bio = &bbio->bio;
	struct btrfs_ordered_sum *sums = bbio->sums;
	struct bvec_iter iter = *src;
@@ -781,8 +779,6 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src)
	u32 offset = 0;
	int index = 0;

	shash->tfm = fs_info->csum_shash;

	btrfs_bio_for_each_block(paddr, bio, &iter, step) {
		paddrs[(offset / step) % nr_steps] = paddr;
		offset += step;
+86 −14
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <linux/crc32.h>
#include "messages.h"
#include "fs.h"
#include "accessors.h"
@@ -8,13 +9,11 @@
static const struct btrfs_csums {
	u16		size;
	const char	name[10];
	const char	driver[12];
} btrfs_csums[] = {
	[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
	[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
	[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
	[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
				     .driver = "blake2b-256" },
	[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b" },
};

/* This exists for btrfs-progs usages. */
@@ -37,21 +36,94 @@ const char *btrfs_super_csum_name(u16 csum_type)
	return btrfs_csums[csum_type].name;
}

/*
 * Return driver name if defined, otherwise the name that's also a valid driver
 * name.
 */
const char *btrfs_super_csum_driver(u16 csum_type)
size_t __attribute_const__ btrfs_get_num_csums(void)
{
	/* csum type is validated at mount time */
	return btrfs_csums[csum_type].driver[0] ?
		btrfs_csums[csum_type].driver :
		btrfs_csums[csum_type].name;
	return ARRAY_SIZE(btrfs_csums);
}

size_t __attribute_const__ btrfs_get_num_csums(void)
void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out)
{
	return ARRAY_SIZE(btrfs_csums);
	switch (csum_type) {
	case BTRFS_CSUM_TYPE_CRC32:
		put_unaligned_le32(~crc32c(~0, data, len), out);
		break;
	case BTRFS_CSUM_TYPE_XXHASH:
		put_unaligned_le64(xxh64(data, len, 0), out);
		break;
	case BTRFS_CSUM_TYPE_SHA256:
		sha256(data, len, out);
		break;
	case BTRFS_CSUM_TYPE_BLAKE2:
		blake2b(NULL, 0, data, len, out, 32);
		break;
	default:
		/* Checksum type is validated at mount time. */
		BUG();
	}
}

void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type)
{
	ctx->csum_type = csum_type;
	switch (ctx->csum_type) {
	case BTRFS_CSUM_TYPE_CRC32:
		ctx->crc32 = ~0;
		break;
	case BTRFS_CSUM_TYPE_XXHASH:
		xxh64_reset(&ctx->xxh64, 0);
		break;
	case BTRFS_CSUM_TYPE_SHA256:
		sha256_init(&ctx->sha256);
		break;
	case BTRFS_CSUM_TYPE_BLAKE2:
		blake2b_init(&ctx->blake2b, 32);
		break;
	default:
		/* Checksume type is validated at mount time. */
		BUG();
	}
}

void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len)
{
	switch (ctx->csum_type) {
	case BTRFS_CSUM_TYPE_CRC32:
		ctx->crc32 = crc32c(ctx->crc32, data, len);
		break;
	case BTRFS_CSUM_TYPE_XXHASH:
		xxh64_update(&ctx->xxh64, data, len);
		break;
	case BTRFS_CSUM_TYPE_SHA256:
		sha256_update(&ctx->sha256, data, len);
		break;
	case BTRFS_CSUM_TYPE_BLAKE2:
		blake2b_update(&ctx->blake2b, data, len);
		break;
	default:
		/* Checksum type is validated at mount time. */
		BUG();
	}
}

void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out)
{
	switch (ctx->csum_type) {
	case BTRFS_CSUM_TYPE_CRC32:
		put_unaligned_le32(~ctx->crc32, out);
		break;
	case BTRFS_CSUM_TYPE_XXHASH:
		put_unaligned_le64(xxh64_digest(&ctx->xxh64), out);
		break;
	case BTRFS_CSUM_TYPE_SHA256:
		sha256_final(&ctx->sha256, out);
		break;
	case BTRFS_CSUM_TYPE_BLAKE2:
		blake2b_final(&ctx->blake2b, out);
		break;
	default:
		/* Checksum type is validated at mount time. */
		BUG();
	}
}

/*
Loading