Commit a43d304f authored by Christoph Hellwig's avatar Christoph Hellwig
Browse files

nvme-pci: use a better encoding for small prp pool allocations



Add a separate flag to encode that the transfer is using the small
page sized pool, and use a normal 0..n count for the number of
descriptors.

Contains improvements and suggestions from Kanchan Joshi
<joshi.k@samsung.com> and Leon Romanovsky <leon@kernel.org>.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarKanchan Joshi <joshi.k@samsung.com>
Reviewed-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 357b536b
Loading
Loading
Loading
Loading
+39 −43
Original line number Diff line number Diff line
@@ -229,6 +229,9 @@ struct nvme_queue {
enum nvme_iod_flags {
	/* this command has been aborted by the timeout handler */
	IOD_ABORTED		= 1U << 0,

	/* uses the small descriptor pool */
	IOD_SMALL_DESCRIPTOR		= 1U << 1,
};

/*
@@ -238,7 +241,7 @@ struct nvme_iod {
	struct nvme_request req;
	struct nvme_command cmd;
	u8 flags;
	s8 nr_descriptors;
	u8 nr_descriptors;
	unsigned int dma_len;	/* length of single DMA segment mapping */
	dma_addr_t first_dma;
	dma_addr_t meta_dma;
@@ -589,13 +592,27 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
	return true;
}

static void nvme_free_prps(struct nvme_queue *nvmeq, struct request *req)
static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq,
		struct nvme_iod *iod)
{
	if (iod->flags & IOD_SMALL_DESCRIPTOR)
		return nvmeq->descriptor_pools.small;
	return nvmeq->descriptor_pools.large;
}

static void nvme_free_descriptors(struct nvme_queue *nvmeq, struct request *req)
{
	const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	dma_addr_t dma_addr = iod->first_dma;
	int i;

	if (iod->nr_descriptors == 1) {
		dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0],
				dma_addr);
		return;
	}

	for (i = 0; i < iod->nr_descriptors; i++) {
		__le64 *prp_list = iod->descriptors[i];
		dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
@@ -620,15 +637,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_queue *nvmeq,
	WARN_ON_ONCE(!iod->sgt.nents);

	dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);

	if (iod->nr_descriptors == 0)
		dma_pool_free(nvmeq->descriptor_pools.small,
				iod->descriptors[0], iod->first_dma);
	else if (iod->nr_descriptors == 1)
		dma_pool_free(nvmeq->descriptor_pools.large,
				iod->descriptors[0], iod->first_dma);
	else
		nvme_free_prps(nvmeq, req);
	nvme_free_descriptors(nvmeq, req);
	mempool_free(iod->sgt.sgl, dev->iod_mempool);
}

@@ -650,7 +659,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
		struct request *req, struct nvme_rw_command *cmnd)
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct dma_pool *pool;
	int length = blk_rq_payload_bytes(req);
	struct scatterlist *sg = iod->sgt.sgl;
	int dma_len = sg_dma_len(sg);
@@ -658,7 +666,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
	int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
	__le64 *prp_list;
	dma_addr_t prp_dma;
	int nprps, i;
	int i;

	length -= (NVME_CTRL_PAGE_SIZE - offset);
	if (length <= 0) {
@@ -680,27 +688,23 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
		goto done;
	}

	nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
	if (nprps <= (256 / 8)) {
		pool = nvmeq->descriptor_pools.small;
		iod->nr_descriptors = 0;
	} else {
		pool = nvmeq->descriptor_pools.large;
		iod->nr_descriptors = 1;
	}
	if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <=
	    256 / sizeof(__le64))
		iod->flags |= IOD_SMALL_DESCRIPTOR;

	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
	if (!prp_list) {
		iod->nr_descriptors = -1;
	prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
			&prp_dma);
	if (!prp_list)
		return BLK_STS_RESOURCE;
	}
	iod->descriptors[0] = prp_list;
	iod->descriptors[iod->nr_descriptors++] = prp_list;
	iod->first_dma = prp_dma;
	i = 0;
	for (;;) {
		if (i == NVME_CTRL_PAGE_SIZE >> 3) {
			__le64 *old_prp_list = prp_list;
			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);

			prp_list = dma_pool_alloc(nvmeq->descriptor_pools.large,
					GFP_ATOMIC, &prp_dma);
			if (!prp_list)
				goto free_prps;
			iod->descriptors[iod->nr_descriptors++] = prp_list;
@@ -727,7 +731,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
	cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
	return BLK_STS_OK;
free_prps:
	nvme_free_prps(nvmeq, req);
	nvme_free_descriptors(nvmeq, req);
	return BLK_STS_RESOURCE;
bad_sgl:
	WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents),
@@ -756,7 +760,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq,
		struct request *req, struct nvme_rw_command *cmd)
{
	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
	struct dma_pool *pool;
	struct nvme_sgl_desc *sg_list;
	struct scatterlist *sg = iod->sgt.sgl;
	unsigned int entries = iod->sgt.nents;
@@ -771,21 +774,14 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq,
		return BLK_STS_OK;
	}

	if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
		pool = nvmeq->descriptor_pools.small;
		iod->nr_descriptors = 0;
	} else {
		pool = nvmeq->descriptor_pools.large;
		iod->nr_descriptors = 1;
	}
	if (entries <= 256 / sizeof(*sg_list))
		iod->flags |= IOD_SMALL_DESCRIPTOR;

	sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
	if (!sg_list) {
		iod->nr_descriptors = -1;
	sg_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
			&sgl_dma);
	if (!sg_list)
		return BLK_STS_RESOURCE;
	}

	iod->descriptors[0] = sg_list;
	iod->descriptors[iod->nr_descriptors++] = sg_list;
	iod->first_dma = sgl_dma;

	nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
@@ -982,7 +978,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
	blk_status_t ret;

	iod->flags = 0;
	iod->nr_descriptors = -1;
	iod->nr_descriptors = 0;
	iod->sgt.nents = 0;
	iod->meta_sgt.nents = 0;