Commit ec7f31b2 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: make bio auto-integrity deadlock safe



The current block layer automatic integrity protection allocates the
actual integrity buffer, which has three problems:

 - because it happens at the bottom of the I/O stack and doesn't use a
   mempool it can deadlock under load
 - because the data size in a bio is almost unbounded when using lage
   folios it can relatively easily exceed the maximum kmalloc size
 - even when it does not exceed the maximum kmalloc size, it could
   exceed the maximum segment size of the device

Fix this by limiting the I/O size so that we can allocate at least a
2MiB integrity buffer, i.e. 128MiB for 8 byte PI and 512 byte integrity
intervals, and create a mempool as a last resort for this maximum size,
mirroring the scheme used for bvecs.  As a nice upside none of this
can fail now, so we remove the error handling and open code the
trivial addition of the bip vec.

The new allocation helpers sit outside of bio-integrity-auto.c because
I plan to reuse them for file system based PI in the near future.

Fixes: 7ba1ba12 ("block: Block layer data integrity support")
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarAnuj Gupta <anuj20.g@samsung.com>
Reviewed-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent eef09f74
Loading
Loading
Loading
Loading
+3 −19
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ static void bio_integrity_finish(struct bio_integrity_data *bid)
{
	bid->bio->bi_integrity = NULL;
	bid->bio->bi_opf &= ~REQ_INTEGRITY;
	kfree(bvec_virt(bid->bip.bip_vec));
	bio_integrity_free_buf(&bid->bip);
	mempool_free(bid, &bid_pool);
}

@@ -110,8 +110,6 @@ bool bio_integrity_prep(struct bio *bio)
	struct bio_integrity_data *bid;
	bool set_flags = true;
	gfp_t gfp = GFP_NOIO;
	unsigned int len;
	void *buf;

	if (!bi)
		return true;
@@ -152,17 +150,12 @@ bool bio_integrity_prep(struct bio *bio)
	if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
		return true;

	/* Allocate kernel buffer for protection data */
	len = bio_integrity_bytes(bi, bio_sectors(bio));
	buf = kmalloc(len, gfp);
	if (!buf)
		goto err_end_io;
	bid = mempool_alloc(&bid_pool, GFP_NOIO);
	bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);

	bid->bio = bio;

	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
	bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);

	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);

	if (set_flags) {
@@ -174,21 +167,12 @@ bool bio_integrity_prep(struct bio *bio)
			bid->bip.bip_flags |= BIP_CHECK_REFTAG;
	}

	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
			offset_in_page(buf)) < len)
		goto err_end_io;

	/* Auto-generate integrity metadata if this is a write */
	if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
		blk_integrity_generate(bio);
	else
		bid->saved_bio_iter = bio->bi_iter;
	return true;

err_end_io:
	bio->bi_status = BLK_STS_RESOURCE;
	bio_endio(bio);
	return false;
}
EXPORT_SYMBOL(bio_integrity_prep);

+48 −0
Original line number Diff line number Diff line
@@ -14,6 +14,45 @@ struct bio_integrity_alloc {
	struct bio_vec			bvecs[];
};

static mempool_t integrity_buf_pool;

void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
{
	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
	struct bio_integrity_payload *bip = bio_integrity(bio);
	unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio));
	gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0);
	void *buf;

	buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) |
			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN);
	if (unlikely(!buf)) {
		struct page *page;

		page = mempool_alloc(&integrity_buf_pool, GFP_NOFS);
		if (zero_buffer)
			memset(page_address(page), 0, len);
		bvec_set_page(&bip->bip_vec[0], page, len, 0);
		bip->bip_flags |= BIP_MEMPOOL;
	} else {
		bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len,
				offset_in_page(buf));
	}

	bip->bip_vcnt = 1;
	bip->bip_iter.bi_size = len;
}

void bio_integrity_free_buf(struct bio_integrity_payload *bip)
{
	struct bio_vec *bv = &bip->bip_vec[0];

	if (bip->bip_flags & BIP_MEMPOOL)
		mempool_free(bv->bv_page, &integrity_buf_pool);
	else
		kfree(bvec_virt(bv));
}

/**
 * bio_integrity_free - Free bio integrity payload
 * @bio:	bio containing bip to be freed
@@ -438,3 +477,12 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,

	return 0;
}

static int __init bio_integrity_initfn(void)
{
	if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE,
			get_order(BLK_INTEGRITY_MAX_SIZE)))
		panic("bio: can't create integrity buf pool\n");
	return 0;
}
subsys_initcall(bio_integrity_initfn);
+21 −0
Original line number Diff line number Diff line
@@ -123,6 +123,19 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
	return 0;
}

/*
 * Maximum size of I/O that needs a block layer integrity buffer.  Limited
 * by the number of intervals for which we can fit the integrity buffer into
 * the buffer size.  Because the buffer is a single segment it is also limited
 * by the maximum segment size.
 */
static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
{
	return min_t(unsigned int, lim->max_segment_size,
		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
			lim->integrity.interval_exp);
}

static int blk_validate_integrity_limits(struct queue_limits *lim)
{
	struct blk_integrity *bi = &lim->integrity;
@@ -184,6 +197,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
	if (!bi->interval_exp)
		bi->interval_exp = ilog2(lim->logical_block_size);

	/*
	 * The block layer automatically adds integrity data for bios that don't
	 * already have it.  Limit the I/O size so that a single maximum size
	 * metadata segment can cover the integrity data for the entire I/O.
	 */
	lim->max_sectors = min(lim->max_sectors,
		max_integrity_io_size(lim) >> SECTOR_SHIFT);

	return 0;
}

+6 −0
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@ enum bip_flags {
	BIP_CHECK_REFTAG	= 1 << 6, /* reftag check */
	BIP_CHECK_APPTAG	= 1 << 7, /* apptag check */
	BIP_P2P_DMA		= 1 << 8, /* using P2P address */

	BIP_MEMPOOL		= 1 << 15, /* buffer backed by mempool */
};

struct bio_integrity_payload {
@@ -140,4 +142,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
	return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */

void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
void bio_integrity_free_buf(struct bio_integrity_payload *bip);

#endif /* _LINUX_BIO_INTEGRITY_H */
+5 −0
Original line number Diff line number Diff line
@@ -8,6 +8,11 @@

struct request;

/*
 * Maximum contiguous integrity buffer allocation.
 */
#define BLK_INTEGRITY_MAX_SIZE		SZ_2M

enum blk_integrity_flags {
	BLK_INTEGRITY_NOVERIFY		= 1 << 0,
	BLK_INTEGRITY_NOGENERATE	= 1 << 1,