Commit 15da3dd3 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme into for-6.13/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.13

 - Use uring_cmd helper (Pavel)
 - Host Memory Buffer allocation enhancements (Christoph)
 - Target persistent reservation support (Guixin)
 - Persistent reservation tracing (Guixen)
 - NVMe 2.1 specification support (Keith)
 - Rotational Meta Support (Matias, Wang, Keith)
 - Volatile cache detection enhancment (Guixen)"

* tag 'nvme-6.13-2024-11-13' of git://git.infradead.org/nvme: (22 commits)
  nvmet: add tracing of reservation commands
  nvme: parse reservation commands's action and rtype to string
  nvmet: report ns's vwc not present
  nvme: check ns's volatile write cache not present
  nvme: add rotational support
  nvme: use command set independent id ns if available
  nvmet: support for csi identify ns
  nvmet: implement rotational media information log
  nvmet: implement endurance groups
  nvmet: declare 2.1 version compliance
  nvmet: implement crto property
  nvmet: implement supported features log
  nvmet: implement supported log pages
  nvmet: implement active command set ns list
  nvmet: implement id ns for nvm command set
  nvmet: support reservation feature
  nvme: add reservation command's defines
  nvme-core: remove repeated wq flags
  nvmet: make nvmet_wq visible in sysfs
  nvme-pci: use dma_alloc_noncontigous if possible
  ...
parents 6975c1a4 50bee385
Loading
Loading
Loading
Loading
+19 −10
Original line number Diff line number Diff line
@@ -42,6 +42,8 @@ struct nvme_ns_info {
	bool is_readonly;
	bool is_ready;
	bool is_removed;
	bool is_rotational;
	bool no_vwc;
};

unsigned int admin_timeout = 60;
@@ -1615,6 +1617,8 @@ static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
		info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
		info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
		info->is_ready = id->nstat & NVME_NSTAT_NRDY;
		info->is_rotational = id->nsfeat & NVME_NS_ROTATIONAL;
		info->no_vwc = id->nsfeat & NVME_NS_VWC_NOT_PRESENT;
	}
	kfree(id);
	return ret;
@@ -2157,11 +2161,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
	    ns->head->ids.csi == NVME_CSI_ZNS)
		nvme_update_zone_info(ns, &lim, &zi);

	if (ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT)
	if ((ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT) && !info->no_vwc)
		lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
	else
		lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);

	if (info->is_rotational)
		lim.features |= BLK_FEAT_ROTATIONAL;

	/*
	 * Register a metadata profile for PI, or the plain non-integrity NVMe
	 * metadata masquerading as Type 0 if supported, otherwise reject block
@@ -3608,6 +3615,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
	head->ns_id = info->nsid;
	head->ids = info->ids;
	head->shared = info->is_shared;
	head->rotational = info->is_rotational;
	ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
	ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
	kref_init(&head->ref);
@@ -3988,7 +3996,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
	struct nvme_ns_info info = { .nsid = nsid };
	struct nvme_ns *ns;
	int ret;
	int ret = 1;

	if (nvme_identify_ns_descs(ctrl, &info))
		return;
@@ -4005,9 +4013,10 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	 * set up a namespace.  If not fall back to the legacy version.
	 */
	if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
	    (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
	    (info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS) ||
	    ctrl->vs >= NVME_VS(2, 0, 0))
		ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
	else
	if (ret > 0)
		ret = nvme_ns_info_from_identify(ctrl, &info);

	if (info.is_removed)
@@ -5006,6 +5015,8 @@ static inline void _nvme_check_size(void)
	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE);
	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
	BUILD_BUG_ON(sizeof(struct nvme_endurance_group_log) != 512);
	BUILD_BUG_ON(sizeof(struct nvme_rotational_media_log) != 512);
	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
	BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
	BUILD_BUG_ON(sizeof(struct nvme_feat_host_behavior) != 512);
@@ -5014,22 +5025,20 @@ static inline void _nvme_check_size(void)

static int __init nvme_core_init(void)
{
	unsigned int wq_flags = WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS;
	int result = -ENOMEM;

	_nvme_check_size();

	nvme_wq = alloc_workqueue("nvme-wq",
			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
	nvme_wq = alloc_workqueue("nvme-wq", wq_flags, 0);
	if (!nvme_wq)
		goto out;

	nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
	nvme_reset_wq = alloc_workqueue("nvme-reset-wq", wq_flags, 0);
	if (!nvme_reset_wq)
		goto destroy_wq;

	nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
	nvme_delete_wq = alloc_workqueue("nvme-delete-wq", wq_flags, 0);
	if (!nvme_delete_wq)
		goto destroy_reset_wq;

+1 −3
Original line number Diff line number Diff line
@@ -401,7 +401,7 @@ struct nvme_uring_cmd_pdu {
static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
		struct io_uring_cmd *ioucmd)
{
	return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
	return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
}

static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
@@ -631,8 +631,6 @@ static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd,
	struct nvme_ctrl *ctrl = ns->ctrl;
	int ret;

	BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu));

	ret = nvme_uring_cmd_checks(issue_flags);
	if (ret)
		return ret;
+1 −0
Original line number Diff line number Diff line
@@ -474,6 +474,7 @@ struct nvme_ns_head {
	struct list_head	entry;
	struct kref		ref;
	bool			shared;
	bool			rotational;
	bool			passthru_err_log_enabled;
	struct nvme_effects_log *effects;
	u64			nuse;
+62 −12
Original line number Diff line number Diff line
@@ -141,6 +141,7 @@ struct nvme_dev {
	struct nvme_ctrl ctrl;
	u32 last_ps;
	bool hmb;
	struct sg_table *hmb_sgt;

	mempool_t *iod_mempool;

@@ -153,6 +154,7 @@ struct nvme_dev {
	/* host memory buffer support: */
	u64 host_mem_size;
	u32 nr_host_mem_descs;
	u32 host_mem_descs_size;
	dma_addr_t host_mem_descs_dma;
	struct nvme_host_mem_buf_desc *host_mem_descs;
	void **host_mem_desc_bufs;
@@ -1951,7 +1953,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
	return ret;
}

static void nvme_free_host_mem(struct nvme_dev *dev)
static void nvme_free_host_mem_multi(struct nvme_dev *dev)
{
	int i;

@@ -1966,18 +1968,54 @@ static void nvme_free_host_mem(struct nvme_dev *dev)

	kfree(dev->host_mem_desc_bufs);
	dev->host_mem_desc_bufs = NULL;
	dma_free_coherent(dev->dev,
			dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
}

static void nvme_free_host_mem(struct nvme_dev *dev)
{
	if (dev->hmb_sgt)
		dma_free_noncontiguous(dev->dev, dev->host_mem_size,
				dev->hmb_sgt, DMA_BIDIRECTIONAL);
	else
		nvme_free_host_mem_multi(dev);

	dma_free_coherent(dev->dev, dev->host_mem_descs_size,
			dev->host_mem_descs, dev->host_mem_descs_dma);
	dev->host_mem_descs = NULL;
	dev->host_mem_descs_size = 0;
	dev->nr_host_mem_descs = 0;
}

static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
static int nvme_alloc_host_mem_single(struct nvme_dev *dev, u64 size)
{
	dev->hmb_sgt = dma_alloc_noncontiguous(dev->dev, size,
				DMA_BIDIRECTIONAL, GFP_KERNEL, 0);
	if (!dev->hmb_sgt)
		return -ENOMEM;

	dev->host_mem_descs = dma_alloc_coherent(dev->dev,
			sizeof(*dev->host_mem_descs), &dev->host_mem_descs_dma,
			GFP_KERNEL);
	if (!dev->host_mem_descs) {
		dma_free_noncontiguous(dev->dev, dev->host_mem_size,
				dev->hmb_sgt, DMA_BIDIRECTIONAL);
		dev->hmb_sgt = NULL;
		return -ENOMEM;
	}
	dev->host_mem_size = size;
	dev->host_mem_descs_size = sizeof(*dev->host_mem_descs);
	dev->nr_host_mem_descs = 1;

	dev->host_mem_descs[0].addr =
		cpu_to_le64(dev->hmb_sgt->sgl->dma_address);
	dev->host_mem_descs[0].size = cpu_to_le32(size / NVME_CTRL_PAGE_SIZE);
	return 0;
}

static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred,
		u32 chunk_size)
{
	struct nvme_host_mem_buf_desc *descs;
	u32 max_entries, len;
	u32 max_entries, len, descs_size;
	dma_addr_t descs_dma;
	int i = 0;
	void **bufs;
@@ -1990,8 +2028,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
	if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
		max_entries = dev->ctrl.hmmaxd;

	descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
				   &descs_dma, GFP_KERNEL);
	descs_size = max_entries * sizeof(*descs);
	descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma,
			GFP_KERNEL);
	if (!descs)
		goto out;

@@ -2020,6 +2059,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
	dev->host_mem_size = size;
	dev->host_mem_descs = descs;
	dev->host_mem_descs_dma = descs_dma;
	dev->host_mem_descs_size = descs_size;
	dev->host_mem_desc_bufs = bufs;
	return 0;

@@ -2034,8 +2074,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,

	kfree(bufs);
out_free_descs:
	dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
			descs_dma);
	dma_free_coherent(dev->dev, descs_size, descs, descs_dma);
out:
	dev->host_mem_descs = NULL;
	return -ENOMEM;
@@ -2047,9 +2086,18 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
	u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
	u64 chunk_size;

	/*
	 * If there is an IOMMU that can merge pages, try a virtually
	 * non-contiguous allocation for a single segment first.
	 */
	if (!(PAGE_SIZE & dma_get_merge_boundary(dev->dev))) {
		if (!nvme_alloc_host_mem_single(dev, preferred))
			return 0;
	}

	/* start big and work our way down */
	for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
		if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
		if (!nvme_alloc_host_mem_multi(dev, preferred, chunk_size)) {
			if (!min || dev->host_mem_size >= min)
				return 0;
			nvme_free_host_mem(dev);
@@ -2097,8 +2145,10 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
		}

		dev_info(dev->ctrl.device,
			"allocated %lld MiB host memory buffer.\n",
			dev->host_mem_size >> ilog2(SZ_1M));
			"allocated %lld MiB host memory buffer (%u segment%s).\n",
			dev->host_mem_size >> ilog2(SZ_1M),
			dev->nr_host_mem_descs,
			str_plural(dev->nr_host_mem_descs));
	}

	ret = nvme_set_host_mem(dev, enable_bits);
+52 −6
Original line number Diff line number Diff line
@@ -228,27 +228,61 @@ static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)

static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
{
	static const char * const rrega_strs[] = {
		[0x00] = "register",
		[0x01] = "unregister",
		[0x02] = "replace",
	};
	const char *ret = trace_seq_buffer_ptr(p);
	u8 rrega = cdw10[0] & 0x7;
	u8 iekey = (cdw10[0] >> 3) & 0x1;
	u8 ptpl = (cdw10[3] >> 6) & 0x3;
	const char *rrega_str;

	if (rrega < ARRAY_SIZE(rrega_strs) && rrega_strs[rrega])
		rrega_str = rrega_strs[rrega];
	else
		rrega_str = "reserved";

	trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
			 rrega, iekey, ptpl);
	trace_seq_printf(p, "rrega=%u:%s, iekey=%u, ptpl=%u",
			 rrega, rrega_str, iekey, ptpl);
	trace_seq_putc(p, 0);

	return ret;
}

static const char * const rtype_strs[] = {
	[0x00] = "reserved",
	[0x01] = "write exclusive",
	[0x02] = "exclusive access",
	[0x03] = "write exclusive registrants only",
	[0x04] = "exclusive access registrants only",
	[0x05] = "write exclusive all registrants",
	[0x06] = "exclusive access all registrants",
};

static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
{
	static const char * const racqa_strs[] = {
		[0x00] = "acquire",
		[0x01] = "preempt",
		[0x02] = "preempt and abort",
	};
	const char *ret = trace_seq_buffer_ptr(p);
	u8 racqa = cdw10[0] & 0x7;
	u8 iekey = (cdw10[0] >> 3) & 0x1;
	u8 rtype = cdw10[1];
	const char *racqa_str = "reserved";
	const char *rtype_str = "reserved";

	trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
			 racqa, iekey, rtype);
	if (racqa < ARRAY_SIZE(racqa_strs) && racqa_strs[racqa])
		racqa_str = racqa_strs[racqa];

	if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
		rtype_str = rtype_strs[rtype];

	trace_seq_printf(p, "racqa=%u:%s, iekey=%u, rtype=%u:%s",
			 racqa, racqa_str, iekey, rtype, rtype_str);
	trace_seq_putc(p, 0);

	return ret;
@@ -256,13 +290,25 @@ static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)

static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
{
	static const char * const rrela_strs[] = {
		[0x00] = "release",
		[0x01] = "clear",
	};
	const char *ret = trace_seq_buffer_ptr(p);
	u8 rrela = cdw10[0] & 0x7;
	u8 iekey = (cdw10[0] >> 3) & 0x1;
	u8 rtype = cdw10[1];
	const char *rrela_str = "reserved";
	const char *rtype_str = "reserved";

	if (rrela < ARRAY_SIZE(rrela_strs) && rrela_strs[rrela])
		rrela_str = rrela_strs[rrela];

	if (rtype < ARRAY_SIZE(rtype_strs) && rtype_strs[rtype])
		rtype_str = rtype_strs[rtype];

	trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
			 rrela, iekey, rtype);
	trace_seq_printf(p, "rrela=%u:%s, iekey=%u, rtype=%u:%s",
			 rrela, rrela_str, iekey, rtype, rtype_str);
	trace_seq_putc(p, 0);

	return ret;
Loading