Commit 22b9a896 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.7-2023-12-7' of git://git.infradead.org/nvme into block-6.7

Pull NVMe fixes from Keith:

"nvme fixes for Linux 6.7

 - Proper nvme ctrl state setting (Keith)
 - Passthrough command optimization (Keith)
 - Spectre fix (Nitesh)
 - Kconfig clarifications (Shin'ichiro)
 - Frozen state deadlock fix (Bitao)
 - Power setting quirk (Georg)"

* tag 'nvme-6.7-2023-12-7' of git://git.infradead.org/nvme:
  nvme-pci: Add sleep quirk for Kingston drives
  nvme: fix deadlock between reset and scan
  nvme: prevent potential spectre v1 gadget
  nvme: improve NVME_HOST_AUTH and NVME_TARGET_AUTH config descriptions
  nvme-ioctl: move capable() admin check to the end
  nvme: ensure reset state check ordering
  nvme: introduce helper function to get ctrl state
parents 7d2affce 107b4e06
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -107,11 +107,12 @@ config NVME_TCP_TLS
	  If unsure, say N.

config NVME_HOST_AUTH
	bool "NVM Express over Fabrics In-Band Authentication"
	bool "NVMe over Fabrics In-Band Authentication in host side"
	depends on NVME_CORE
	select NVME_AUTH
	help
	  This provides support for NVMe over Fabrics In-Band Authentication.
	  This provides support for NVMe over Fabrics In-Band Authentication in
	  host side.

	  If unsure, say N.

+32 −20
Original line number Diff line number Diff line
@@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
	/*
	 * Only new queue scan work when admin and IO queues are both alive
	 */
	if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
	if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
		queue_work(nvme_wq, &ctrl->scan_work);
}

@@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
 */
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
	if (ctrl->state != NVME_CTRL_RESETTING)
	if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
		return -EBUSY;
	if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
		return -EBUSY;
@@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
			struct nvme_ctrl, failfast_work);

	if (ctrl->state != NVME_CTRL_CONNECTING)
	if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
		return;

	set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
	ret = nvme_reset_ctrl(ctrl);
	if (!ret) {
		flush_work(&ctrl->reset_work);
		if (ctrl->state != NVME_CTRL_LIVE)
		if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
			ret = -ENETRESET;
	}

@@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,

	spin_lock_irqsave(&ctrl->lock, flags);

	old_state = ctrl->state;
	old_state = nvme_ctrl_state(ctrl);
	switch (new_state) {
	case NVME_CTRL_LIVE:
		switch (old_state) {
@@ -567,7 +567,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
	}

	if (changed) {
		ctrl->state = new_state;
		WRITE_ONCE(ctrl->state, new_state);
		wake_up_all(&ctrl->state_wq);
	}

@@ -575,11 +575,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
	if (!changed)
		return false;

	if (ctrl->state == NVME_CTRL_LIVE) {
	if (new_state == NVME_CTRL_LIVE) {
		if (old_state == NVME_CTRL_CONNECTING)
			nvme_stop_failfast_work(ctrl);
		nvme_kick_requeue_lists(ctrl);
	} else if (ctrl->state == NVME_CTRL_CONNECTING &&
	} else if (new_state == NVME_CTRL_CONNECTING &&
		old_state == NVME_CTRL_RESETTING) {
		nvme_start_failfast_work(ctrl);
	}
@@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
 */
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
{
	switch (ctrl->state) {
	switch (nvme_ctrl_state(ctrl)) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_LIVE:
	case NVME_CTRL_RESETTING:
@@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
	wait_event(ctrl->state_wq,
		   nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
		   nvme_state_terminal(ctrl));
	return ctrl->state == NVME_CTRL_RESETTING;
	return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);

@@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
		struct request *rq)
{
	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
	    ctrl->state != NVME_CTRL_DELETING &&
	    ctrl->state != NVME_CTRL_DEAD &&
	enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

	if (state != NVME_CTRL_DELETING_NOIO &&
	    state != NVME_CTRL_DELETING &&
	    state != NVME_CTRL_DEAD &&
	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
@@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		 * command, which is require to set the queue live in the
		 * appropinquate states.
		 */
		switch (ctrl->state) {
		switch (nvme_ctrl_state(ctrl)) {
		case NVME_CTRL_CONNECTING:
			if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
			    (req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -2550,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)

	if (ctrl->ps_max_latency_us != latency) {
		ctrl->ps_max_latency_us = latency;
		if (ctrl->state == NVME_CTRL_LIVE)
		if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
			nvme_configure_apst(ctrl);
	}
}
@@ -3238,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
	struct nvme_ctrl *ctrl =
		container_of(inode->i_cdev, struct nvme_ctrl, cdev);

	switch (ctrl->state) {
	switch (nvme_ctrl_state(ctrl)) {
	case NVME_CTRL_LIVE:
		break;
	default:
@@ -3660,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
		goto out_unlink_ns;

	down_write(&ctrl->namespaces_rwsem);
	/*
	 * Ensure that no namespaces are added to the ctrl list after the queues
	 * are frozen, thereby avoiding a deadlock between scan and reset.
	 */
	if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
		up_write(&ctrl->namespaces_rwsem);
		goto out_unlink_ns;
	}
	nvme_ns_add_to_ctrl_list(ns);
	up_write(&ctrl->namespaces_rwsem);
	nvme_get_ctrl(ctrl);
@@ -3924,7 +3934,7 @@ static void nvme_scan_work(struct work_struct *work)
	int ret;

	/* No tagset on a live ctrl means IO queues could not created */
	if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
		return;

	/*
@@ -3994,7 +4004,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
	 * removing the namespaces' disks; fail all the queues now to avoid
	 * potentially having to clean up the failed sync later.
	 */
	if (ctrl->state == NVME_CTRL_DEAD)
	if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
		nvme_mark_namespaces_dead(ctrl);

	/* this is a no-op when called from the controller reset handler */
@@ -4076,7 +4086,7 @@ static void nvme_async_event_work(struct work_struct *work)
	 * flushing ctrl async_event_work after changing the controller state
	 * from LIVE and before freeing the admin queue.
	*/
	if (ctrl->state == NVME_CTRL_LIVE)
	if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
		ctrl->ops->submit_async_event(ctrl);
}

@@ -4471,7 +4481,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
{
	int ret;

	ctrl->state = NVME_CTRL_NEW;
	WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
	spin_lock_init(&ctrl->lock);
	mutex_init(&ctrl->scan_lock);
@@ -4581,6 +4591,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
	list_for_each_entry(ns, &ctrl->namespaces, list)
		blk_mq_unfreeze_queue(ns->queue);
	up_read(&ctrl->namespaces_rwsem);
	clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);

@@ -4614,6 +4625,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
{
	struct nvme_ns *ns;

	set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
	down_read(&ctrl->namespaces_rwsem);
	list_for_each_entry(ns, &ctrl->namespaces, list)
		blk_freeze_queue_start(ns->queue);
+3 −3
Original line number Diff line number Diff line
@@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
static void
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
	switch (ctrl->ctrl.state) {
	switch (nvme_ctrl_state(&ctrl->ctrl)) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_CONNECTING:
		/*
@@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
		"NVME-FC{%d}: controller connectivity lost. Awaiting "
		"Reconnect", ctrl->cnum);

	switch (ctrl->ctrl.state) {
	switch (nvme_ctrl_state(&ctrl->ctrl)) {
	case NVME_CTRL_NEW:
	case NVME_CTRL_LIVE:
		/*
@@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
	unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
	bool recon = true;

	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
		return;

	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
+11 −10
Original line number Diff line number Diff line
@@ -18,15 +18,12 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
{
	u32 effects;

	if (capable(CAP_SYS_ADMIN))
		return true;

	/*
	 * Do not allow unprivileged passthrough on partitions, as that allows an
	 * escape from the containment of the partition.
	 */
	if (flags & NVME_IOCTL_PARTITION)
		return false;
		goto admin;

	/*
	 * Do not allow unprivileged processes to send vendor specific or fabrics
@@ -34,7 +31,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
	 */
	if (c->common.opcode >= nvme_cmd_vendor_start ||
	    c->common.opcode == nvme_fabrics_command)
		return false;
		goto admin;

	/*
	 * Do not allow unprivileged passthrough of admin commands except
@@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
				return true;
			}
		}
		return false;
		goto admin;
	}

	/*
@@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
	 */
	effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
	if (!(effects & NVME_CMD_EFFECTS_CSUPP))
		return false;
		goto admin;

	/*
	 * Don't allow passthrough for command that have intrusive (or unknown)
@@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
	if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
			NVME_CMD_EFFECTS_UUID_SEL |
			NVME_CMD_EFFECTS_SCOPE_MASK))
		return false;
		goto admin;

	/*
	 * Only allow I/O commands that transfer data to the controller or that
	 * change the logical block contents if the file descriptor is open for
	 * writing.
	 */
	if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
		return open_for_write;
	if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) &&
	    !open_for_write)
		goto admin;

	return true;
admin:
	return capable(CAP_SYS_ADMIN);
}

/*
+11 −0
Original line number Diff line number Diff line
@@ -156,6 +156,11 @@ enum nvme_quirks {
	 * No temperature thresholds for channels other than 0 (Composite).
	 */
	NVME_QUIRK_NO_SECONDARY_TEMP_THRESH	= (1 << 19),

	/*
	 * Disables simple suspend/resume path.
	 */
	NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND	= (1 << 20),
};

/*
@@ -251,6 +256,7 @@ enum nvme_ctrl_flags {
	NVME_CTRL_STOPPED		= 3,
	NVME_CTRL_SKIP_ID_CNS_CS	= 4,
	NVME_CTRL_DIRTY_CAPABILITY	= 5,
	NVME_CTRL_FROZEN		= 6,
};

struct nvme_ctrl {
@@ -387,6 +393,11 @@ struct nvme_ctrl {
	enum nvme_dctype dctype;
};

static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
{
	return READ_ONCE(ctrl->state);
}

enum nvme_iopolicy {
	NVME_IOPOLICY_NUMA,
	NVME_IOPOLICY_RR,
Loading