Commit f68ff6bc authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'autopi-deadlock' into for-6.19/block

Currently the automatic block layer PI generation allocates the integrity
buffer using kmalloc, and thus could deadlock, or fail I/O request due
to memory pressure.

Fix this by adding a mempool, and capping the maximum I/O size on PI
capable devices to not exceed the allocation size of the mempool.

Link: https://lore.kernel.org/linux-block/20251103101653.2083310-1-hch@lst.de/


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>

* autopi-deadlock:
  block: make bio auto-integrity deadlock safe
  block: blocking mempool_alloc doesn't fail
parents 3f5b1169 ec7f31b2
Loading
Loading
Loading
Loading
+3 −23
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ static void bio_integrity_finish(struct bio_integrity_data *bid)
{
	bid->bio->bi_integrity = NULL;
	bid->bio->bi_opf &= ~REQ_INTEGRITY;
	kfree(bvec_virt(bid->bip.bip_vec));
	bio_integrity_free_buf(&bid->bip);
	mempool_free(bid, &bid_pool);
}

@@ -110,8 +110,6 @@ bool bio_integrity_prep(struct bio *bio)
	struct bio_integrity_data *bid;
	bool set_flags = true;
	gfp_t gfp = GFP_NOIO;
	unsigned int len;
	void *buf;

	if (!bi)
		return true;
@@ -152,19 +150,12 @@ bool bio_integrity_prep(struct bio *bio)
	if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
		return true;

	/* Allocate kernel buffer for protection data */
	len = bio_integrity_bytes(bi, bio_sectors(bio));
	buf = kmalloc(len, gfp);
	if (!buf)
		goto err_end_io;
	bid = mempool_alloc(&bid_pool, GFP_NOIO);
	if (!bid)
		goto err_free_buf;
	bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);

	bid->bio = bio;

	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
	bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);

	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);

	if (set_flags) {
@@ -176,23 +167,12 @@ bool bio_integrity_prep(struct bio *bio)
			bid->bip.bip_flags |= BIP_CHECK_REFTAG;
	}

	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
			offset_in_page(buf)) < len)
		goto err_end_io;

	/* Auto-generate integrity metadata if this is a write */
	if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
		blk_integrity_generate(bio);
	else
		bid->saved_bio_iter = bio->bi_iter;
	return true;

err_free_buf:
	kfree(buf);
err_end_io:
	bio->bi_status = BLK_STS_RESOURCE;
	bio_endio(bio);
	return false;
}
EXPORT_SYMBOL(bio_integrity_prep);

+48 −0
Original line number Diff line number Diff line
@@ -14,6 +14,45 @@ struct bio_integrity_alloc {
	struct bio_vec			bvecs[];
};

static mempool_t integrity_buf_pool;

void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
{
	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
	struct bio_integrity_payload *bip = bio_integrity(bio);
	unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio));
	gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0);
	void *buf;

	buf = kmalloc(len, (gfp & ~__GFP_DIRECT_RECLAIM) |
			__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN);
	if (unlikely(!buf)) {
		struct page *page;

		page = mempool_alloc(&integrity_buf_pool, GFP_NOFS);
		if (zero_buffer)
			memset(page_address(page), 0, len);
		bvec_set_page(&bip->bip_vec[0], page, len, 0);
		bip->bip_flags |= BIP_MEMPOOL;
	} else {
		bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len,
				offset_in_page(buf));
	}

	bip->bip_vcnt = 1;
	bip->bip_iter.bi_size = len;
}

void bio_integrity_free_buf(struct bio_integrity_payload *bip)
{
	struct bio_vec *bv = &bip->bip_vec[0];

	if (bip->bip_flags & BIP_MEMPOOL)
		mempool_free(bv->bv_page, &integrity_buf_pool);
	else
		kfree(bvec_virt(bv));
}

/**
 * bio_integrity_free - Free bio integrity payload
 * @bio:	bio containing bip to be freed
@@ -438,3 +477,12 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,

	return 0;
}

static int __init bio_integrity_initfn(void)
{
	if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE,
			get_order(BLK_INTEGRITY_MAX_SIZE)))
		panic("bio: can't create integrity buf pool\n");
	return 0;
}
subsys_initcall(bio_integrity_initfn);
+21 −0
Original line number Diff line number Diff line
@@ -123,6 +123,19 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
	return 0;
}

/*
 * Maximum size of I/O that needs a block layer integrity buffer.  Limited
 * by the number of intervals for which we can fit the integrity buffer into
 * the buffer size.  Because the buffer is a single segment it is also limited
 * by the maximum segment size.
 */
static inline unsigned int max_integrity_io_size(struct queue_limits *lim)
{
	return min_t(unsigned int, lim->max_segment_size,
		(BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) <<
			lim->integrity.interval_exp);
}

static int blk_validate_integrity_limits(struct queue_limits *lim)
{
	struct blk_integrity *bi = &lim->integrity;
@@ -184,6 +197,14 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
	if (!bi->interval_exp)
		bi->interval_exp = ilog2(lim->logical_block_size);

	/*
	 * The block layer automatically adds integrity data for bios that don't
	 * already have it.  Limit the I/O size so that a single maximum size
	 * metadata segment can cover the integrity data for the entire I/O.
	 */
	lim->max_sectors = min(lim->max_sectors,
		max_integrity_io_size(lim) >> SECTOR_SHIFT);

	return 0;
}

+6 −0
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@ enum bip_flags {
	BIP_CHECK_REFTAG	= 1 << 6, /* reftag check */
	BIP_CHECK_APPTAG	= 1 << 7, /* apptag check */
	BIP_P2P_DMA		= 1 << 8, /* using P2P address */

	BIP_MEMPOOL		= 1 << 15, /* buffer backed by mempool */
};

struct bio_integrity_payload {
@@ -140,4 +142,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
	return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */

void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
void bio_integrity_free_buf(struct bio_integrity_payload *bip);

#endif /* _LINUX_BIO_INTEGRITY_H */
+5 −0
Original line number Diff line number Diff line
@@ -8,6 +8,11 @@

struct request;

/*
 * Maximum contiguous integrity buffer allocation.
 */
#define BLK_INTEGRITY_MAX_SIZE		SZ_2M

enum blk_integrity_flags {
	BLK_INTEGRITY_NOVERIFY		= 1 << 0,
	BLK_INTEGRITY_NOGENERATE	= 1 << 1,