Commit 3997e3bb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Fix memory leak for peer-to-peer addresses
      - Fix dma map leaks on resource errors

 - Another bio integrity fix, fixing a recent regression

 - Fix for an issue with the request pre-allocation and caching when IO
   is queued, where if a bio split occurred and ended up blocking, the
   list could be corrupted

* tag 'block-7.1-20260522' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  block: avoid use-after-free in disk_free_zone_resources()
  blk-mq: pop cached request if it is usable
  nvme-pci: fix dma mapping leak on data setup error
  nvme-pci: fix dma_vecs leak on p2p memory
  bio-integrity-fs: pass data iter to bio_integrity_verify()
parents dbae42cf f6982769
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -55,6 +55,10 @@ int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size)
{
	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
	struct bio_integrity_payload *bip = bio_integrity(bio);
	struct bvec_iter data_iter = {
		.bi_sector	= sector,
		.bi_size	= size,
	};

	/*
	 * Reinitialize bip->bip_iter.
@@ -65,7 +69,7 @@ int fs_bio_integrity_verify(struct bio *bio, sector_t sector, unsigned int size)
	memset(&bip->bip_iter, 0, sizeof(bip->bip_iter));
	bip->bip_iter.bi_sector = sector;
	bip->bip_iter.bi_size = bio_integrity_bytes(bi, size >> SECTOR_SHIFT);
	return blk_status_to_errno(bio_integrity_verify(bio, &bip->bip_iter));
	return blk_status_to_errno(bio_integrity_verify(bio, &data_iter));
}

static int __init fs_bio_integrity_init(void)
+9 −25
Original line number Diff line number Diff line
@@ -3077,7 +3077,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
/*
 * Check if there is a suitable cached request and return it.
 */
static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
static struct request *blk_mq_get_cached_request(struct blk_plug *plug,
		struct request_queue *q, blk_opf_t opf)
{
	enum hctx_type type = blk_mq_get_hctx_type(opf);
@@ -3093,27 +3093,10 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
		return NULL;
	if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
		return NULL;
	rq_list_pop(&plug->cached_rqs);
	return rq;
}

static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
		struct bio *bio)
{
	if (rq_list_pop(&plug->cached_rqs) != rq)
		WARN_ON_ONCE(1);

	/*
	 * If any qos ->throttle() end up blocking, we will have flushed the
	 * plug and hence killed the cached_rq list as well. Pop this entry
	 * before we throttle.
	 */
	rq_qos_throttle(rq->q, bio);

	blk_mq_rq_time_init(rq, blk_time_get_ns());
	rq->cmd_flags = bio->bi_opf;
	INIT_LIST_HEAD(&rq->queuelist);
}

static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
{
	unsigned int bs_mask = queue_logical_block_size(q) - 1;
@@ -3152,7 +3135,7 @@ void blk_mq_submit_bio(struct bio *bio)
	/*
	 * If the plug has a cached request for this queue, try to use it.
	 */
	rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
	rq = blk_mq_get_cached_request(plug, q, bio->bi_opf);

	/*
	 * A BIO that was released from a zone write plug has already been
@@ -3211,7 +3194,10 @@ void blk_mq_submit_bio(struct bio *bio)

new_request:
	if (rq) {
		blk_mq_use_cached_rq(rq, plug, bio);
		rq_qos_throttle(rq->q, bio);
		blk_mq_rq_time_init(rq, blk_time_get_ns());
		rq->cmd_flags = bio->bi_opf;
		INIT_LIST_HEAD(&rq->queuelist);
	} else {
		rq = blk_mq_get_new_requests(q, plug, bio);
		if (unlikely(!rq)) {
@@ -3257,12 +3243,10 @@ void blk_mq_submit_bio(struct bio *bio)
	return;

queue_exit:
	/*
	 * Don't drop the queue reference if we were trying to use a cached
	 * request and thus didn't acquire one.
	 */
	if (!rq)
		blk_queue_exit(q);
	else
		blk_mq_free_request(rq);
}

#ifdef CONFIG_BLK_MQ_STACKING
+3 −4
Original line number Diff line number Diff line
@@ -2001,8 +2001,10 @@ static void disk_set_zones_cond_array(struct gendisk *disk, u8 *zones_cond)

void disk_free_zone_resources(struct gendisk *disk)
{
	if (disk->zone_wplugs_worker)
	if (disk->zone_wplugs_worker) {
		kthread_stop(disk->zone_wplugs_worker);
		disk->zone_wplugs_worker = NULL;
	}
	WARN_ON_ONCE(!list_empty(&disk->zone_wplugs_list));

	if (disk->zone_wplugs_wq) {
@@ -2135,9 +2137,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
	ret = queue_limits_commit_update(q, &lim);

unfreeze:
	if (ret)
		disk_free_zone_resources(disk);

	blk_mq_unfreeze_queue(q, memflags);

	return ret;
+30 −4
Original line number Diff line number Diff line
@@ -966,7 +966,8 @@ static bool nvme_pci_prp_save_mapping(struct request *req,
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);

	if (dma_use_iova(&iod->dma_state) || !dma_need_unmap(dma_dev))
	if (dma_use_iova(&iod->dma_state) || !dma_need_unmap(dma_dev) ||
	    (iod->flags & IOD_DATA_P2P))
		return true;

	if (!iod->nr_dma_vecs) {
@@ -996,6 +997,23 @@ static bool nvme_pci_prp_iter_next(struct request *req, struct device *dma_dev,
	return nvme_pci_prp_save_mapping(req, dma_dev, iter);
}

static void nvme_unmap_iter(struct request *req, struct blk_dma_iter *iter,
			    struct dma_iova_state *state)
{
	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
	struct device *dev = nvmeq->dev->dev;

	if (!blk_rq_dma_unmap(req, dev, state, iter->len, iter->p2pdma.map)) {
		unsigned int attrs = 0;

		if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
			attrs |= DMA_ATTR_MMIO;

		dma_unmap_phys(dev, iter->addr, iter->len, rq_dma_dir(req),
			       attrs);
	}
}

static blk_status_t nvme_pci_setup_data_prp(struct request *req,
		struct blk_dma_iter *iter)
{
@@ -1006,8 +1024,10 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req,
	unsigned int prp_len, i;
	__le64 *prp_list;

	if (!nvme_pci_prp_save_mapping(req, nvmeq->dev->dev, iter))
	if (!nvme_pci_prp_save_mapping(req, nvmeq->dev->dev, iter)) {
		nvme_unmap_iter(req, iter, &iod->dma_state);
		return iter->status;
	}

	/*
	 * PRP1 always points to the start of the DMA transfers.
@@ -1112,6 +1132,7 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req,
	dev_err_once(nvmeq->dev->dev,
		"Incorrectly formed request for payload:%d nents:%d\n",
		blk_rq_payload_bytes(req), blk_rq_nr_phys_segments(req));
	nvme_unmap_data(req);
	return BLK_STS_IOERR;
}

@@ -1155,8 +1176,11 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req,

	sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
			&sgl_dma);
	if (!sg_list)
	if (!sg_list) {
		nvme_unmap_iter(req, iter, &iod->dma_state);
		return BLK_STS_RESOURCE;
	}

	iod->descriptors[iod->nr_descriptors++] = sg_list;

	do {
@@ -1313,8 +1337,10 @@ static blk_status_t nvme_pci_setup_meta_iter(struct request *req)

	sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC,
			&sgl_dma);
	if (!sg_list)
	if (!sg_list) {
		nvme_unmap_iter(req, &iter, &iod->meta_dma_state);
		return BLK_STS_RESOURCE;
	}

	iod->meta_descriptor = sg_list;
	iod->meta_dma = sgl_dma;