Commit 5a47c208 authored by Guixin Liu's avatar Guixin Liu Committed by Keith Busch
Browse files

nvmet: support reservation feature



This patch implements the reservation feature, including:
  1. reservation register(register, unregister and replace).
  2. reservation acquire(acquire, preempt, preempt and abort).
  3. reservation release(release and clear).
  4. reservation report.
  5. set feature and get feature of reservation notify mask.
  6. get log page of reservation event.

Not supported:
  1. persistent reservation through power loss.

Test cases:
  Use nvme-cli and fio to test all implemented sub features:
  1. use nvme resv-register to register host a registrant or
     unregister or replace a new key.
  2. use nvme resv-acquire to set host to the holder, and use fio
     to send read and write io in all reservation type. And also
     test preempt and "preempt and abort".
  3. use nvme resv-report to show all registrants and reservation
     status.
  4. use nvme resv-release to release all registrants.
  5. use nvme get-log to get events generated by the preceding
     operations.

In addition, make reservation configurable, one can set ns to
support reservation before enable ns. The default of resv_enable
is false.

Signed-off-by: default avatarGuixin Liu <kanie@linux.alibaba.com>
Reviewed-by: default avatarDmitry Bogdanov <d.bogdanov@yadro.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Tested-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
parent 1900e1a4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
obj-$(CONFIG_NVME_TARGET_TCP)		+= nvmet-tcp.o

nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
			discovery.o io-cmd-file.o io-cmd-bdev.o
			discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS)	+= debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU)	+= passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED)		+= zns.o
+23 −1
Original line number Diff line number Diff line
@@ -176,6 +176,10 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
	log->iocs[nvme_cmd_read] =
	log->iocs[nvme_cmd_flush] =
	log->iocs[nvme_cmd_dsm]	=
	log->iocs[nvme_cmd_resv_acquire] =
	log->iocs[nvme_cmd_resv_register] =
	log->iocs[nvme_cmd_resv_release] =
	log->iocs[nvme_cmd_resv_report] =
		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
	log->iocs[nvme_cmd_write] =
	log->iocs[nvme_cmd_write_zeroes] =
@@ -340,6 +344,8 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
		return nvmet_execute_get_log_cmd_effects_ns(req);
	case NVME_LOG_ANA:
		return nvmet_execute_get_log_page_ana(req);
	case NVME_LOG_RESERVATION:
		return nvmet_execute_get_log_page_resv(req);
	}
	pr_debug("unhandled lid %d on qid %d\n",
	       req->cmd->get_log_page.lid, req->sq->qid);
@@ -433,7 +439,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
	id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
	id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
	id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
			NVME_CTRL_ONCS_WRITE_ZEROES);
			NVME_CTRL_ONCS_WRITE_ZEROES |
			NVME_CTRL_ONCS_RESERVATIONS);

	/* XXX: don't report vwc if the underlying device is write through */
	id->vwc = NVME_CTRL_VWC_PRESENT;
@@ -551,6 +558,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
	id->nmic = NVME_NS_NMIC_SHARED;
	id->anagrpid = cpu_to_le32(req->ns->anagrpid);

	if (req->ns->pr.enable)
		id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
			NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
			NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
			NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
			NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
			NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
			NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF;

	memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));

	id->lbaf[0].ds = req->ns->blksize_shift;
@@ -861,6 +877,9 @@ void nvmet_execute_set_features(struct nvmet_req *req)
	case NVME_FEAT_WRITE_PROTECT:
		status = nvmet_set_feat_write_protect(req);
		break;
	case NVME_FEAT_RESV_MASK:
		status = nvmet_set_feat_resv_notif_mask(req, cdw11);
		break;
	default:
		req->error_loc = offsetof(struct nvme_common_command, cdw10);
		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
@@ -959,6 +978,9 @@ void nvmet_execute_get_features(struct nvmet_req *req)
	case NVME_FEAT_WRITE_PROTECT:
		status = nvmet_get_feat_write_protect(req);
		break;
	case NVME_FEAT_RESV_MASK:
		status = nvmet_get_feat_resv_notif_mask(req);
		break;
	default:
		req->error_loc =
			offsetof(struct nvme_common_command, cdw10);
+27 −0
Original line number Diff line number Diff line
@@ -769,6 +769,32 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,

CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);

static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page)
{
	return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable);
}

static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
		const char *page, size_t count)
{
	struct nvmet_ns *ns = to_nvmet_ns(item);
	bool val;

	if (kstrtobool(page, &val))
		return -EINVAL;

	mutex_lock(&ns->subsys->lock);
	if (ns->enabled) {
		pr_err("the ns:%d is already enabled.\n", ns->nsid);
		mutex_unlock(&ns->subsys->lock);
		return -EINVAL;
	}
	ns->pr.enable = val;
	mutex_unlock(&ns->subsys->lock);
	return count;
}
CONFIGFS_ATTR(nvmet_ns_, resv_enable);

static struct configfs_attribute *nvmet_ns_attrs[] = {
	&nvmet_ns_attr_device_path,
	&nvmet_ns_attr_device_nguid,
@@ -777,6 +803,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
	&nvmet_ns_attr_enable,
	&nvmet_ns_attr_buffered_io,
	&nvmet_ns_attr_revalidate_size,
	&nvmet_ns_attr_resv_enable,
#ifdef CONFIG_PCI_P2PDMA
	&nvmet_ns_attr_p2pmem,
#endif
+56 −6
Original line number Diff line number Diff line
@@ -611,6 +611,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
	if (ret)
		goto out_restore_subsys_maxnsid;

	if (ns->pr.enable) {
		ret = nvmet_pr_init_ns(ns);
		if (ret)
			goto out_remove_from_subsys;
	}

	subsys->nr_namespaces++;

	nvmet_ns_changed(subsys, ns->nsid);
@@ -620,6 +626,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
	mutex_unlock(&subsys->lock);
	return ret;

out_remove_from_subsys:
	xa_erase(&subsys->namespaces, ns->nsid);
out_restore_subsys_maxnsid:
	subsys->max_nsid = nvmet_max_nsid(subsys);
	percpu_ref_exit(&ns->ref);
@@ -663,6 +671,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
	wait_for_completion(&ns->disable_done);
	percpu_ref_exit(&ns->ref);

	if (ns->pr.enable)
		nvmet_pr_exit_ns(ns);

	mutex_lock(&subsys->lock);

	subsys->nr_namespaces--;
@@ -754,6 +765,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
{
	struct nvmet_ns *ns = req->ns;
	struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;

	if (!req->sq->sqhd_disabled)
		nvmet_update_sq_head(req);
@@ -766,6 +778,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
	trace_nvmet_req_complete(req);

	req->ops->queue_response(req);

	if (pc_ref)
		nvmet_pr_put_ns_pc_ref(pc_ref);
	if (ns)
		nvmet_put_namespace(ns);
}
@@ -929,18 +944,39 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
		return ret;
	}

	if (req->ns->pr.enable) {
		ret = nvmet_parse_pr_cmd(req);
		if (!ret)
			return ret;
	}

	switch (req->ns->csi) {
	case NVME_CSI_NVM:
		if (req->ns->file)
			return nvmet_file_parse_io_cmd(req);
		return nvmet_bdev_parse_io_cmd(req);
			ret = nvmet_file_parse_io_cmd(req);
		else
			ret = nvmet_bdev_parse_io_cmd(req);
		break;
	case NVME_CSI_ZNS:
		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
			return nvmet_bdev_zns_parse_io_cmd(req);
		return NVME_SC_INVALID_IO_CMD_SET;
			ret = nvmet_bdev_zns_parse_io_cmd(req);
		else
			ret = NVME_SC_INVALID_IO_CMD_SET;
		break;
	default:
		return NVME_SC_INVALID_IO_CMD_SET;
		ret = NVME_SC_INVALID_IO_CMD_SET;
	}
	if (ret)
		return ret;

	if (req->ns->pr.enable) {
		ret = nvmet_pr_check_cmd_access(req);
		if (ret)
			return ret;

		ret = nvmet_pr_get_ns_pc_ref(req);
	}
	return ret;
}

bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@@ -964,6 +1000,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
	req->ns = NULL;
	req->error_loc = NVMET_NO_ERROR_LOC;
	req->error_slba = 0;
	req->pc_ref = NULL;

	/* no support for fused commands yet */
	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
@@ -1015,6 +1052,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
void nvmet_req_uninit(struct nvmet_req *req)
{
	percpu_ref_put(&req->sq->ref);
	if (req->pc_ref)
		nvmet_pr_put_ns_pc_ref(req->pc_ref);
	if (req->ns)
		nvmet_put_namespace(req->ns);
}
@@ -1383,7 +1422,8 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
}

u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
		uuid_t *hostid)
{
	struct nvmet_subsys *subsys;
	struct nvmet_ctrl *ctrl;
@@ -1462,6 +1502,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
	}
	ctrl->cntlid = ret;

	uuid_copy(&ctrl->hostid, hostid);

	/*
	 * Discovery controllers may use some arbitrary high value
	 * in order to cleanup stale discovery sessions
@@ -1478,6 +1520,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
	nvmet_start_keep_alive_timer(ctrl);

	mutex_lock(&subsys->lock);
	ret = nvmet_ctrl_init_pr(ctrl);
	if (ret)
		goto init_pr_fail;
	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
	nvmet_setup_p2p_ns_map(ctrl, req);
	nvmet_debugfs_ctrl_setup(ctrl);
@@ -1486,6 +1531,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
	*ctrlp = ctrl;
	return 0;

init_pr_fail:
	mutex_unlock(&subsys->lock);
	nvmet_stop_keep_alive_timer(ctrl);
	ida_free(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
	kfree(ctrl->sqs);
out_free_changed_ns_list:
@@ -1504,6 +1553,7 @@ static void nvmet_ctrl_free(struct kref *ref)
	struct nvmet_subsys *subsys = ctrl->subsys;

	mutex_lock(&subsys->lock);
	nvmet_ctrl_destroy_pr(ctrl);
	nvmet_release_p2p_ns_map(ctrl);
	list_del(&ctrl->subsys_entry);
	mutex_unlock(&subsys->lock);
+1 −3
Original line number Diff line number Diff line
@@ -245,12 +245,10 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
	d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
	d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
	status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
				  le32_to_cpu(c->kato), &ctrl);
				  le32_to_cpu(c->kato), &ctrl, &d->hostid);
	if (status)
		goto out;

	uuid_copy(&ctrl->hostid, &d->hostid);

	dhchap_status = nvmet_setup_auth(ctrl);
	if (dhchap_status) {
		pr_err("Failed to setup authentication, dhchap status %u\n",
Loading