Commit de7007e9 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.12-2024-10-18' of git://git.infradead.org/nvme into block-6.12

Pull NVMe fixes from Keith:

"nvme fixes for Linux 6.12

 - Fix target passthrough identifier (Nilay)
 - Fix tcp locking (Hannes)
 - Replace list with sbitmap for tracking RDMA rsp tags (Guixen)
 - Remove unnecessary fallthrough statements (Tokunori)
 - Remove ready-without-media support (Greg)
 - Fix multipath partition scan deadlock (Keith)
 - Fix concurrent PCI reset and remove queue mapping (Maurizio)
 - Fabrics shutdown fixes (Nilay)"

* tag 'nvme-6.12-2024-10-18' of git://git.infradead.org/nvme:
  nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function
  nvme: make keep-alive synchronous operation
  nvme-loop: flush off pending I/O while shutting down loop controller
  nvme-pci: fix race condition between reset and nvme_dev_disable()
  nvme-multipath: defer partition scanning
  nvme: disable CC.CRIME (NVME_CC_CRIME)
  nvme: delete unnecessary fallthru comment
  nvmet-rdma: use sbitmap to replace rsp free list
  nvme: tcp: avoid race between queue_lock lock and destroy
  nvmet-passthru: clear EUID/NGUID/UUID while using loop target
  block: fix blk_rq_map_integrity_sg kernel-doc
parents 42aafd8b 599d9f3a
Loading
Loading
Loading
Loading
+17 −24
Original line number Diff line number Diff line
@@ -1292,14 +1292,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
	queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
}

static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
						 blk_status_t status)
static void nvme_keep_alive_finish(struct request *rq,
		blk_status_t status, struct nvme_ctrl *ctrl)
{
	struct nvme_ctrl *ctrl = rq->end_io_data;
	unsigned long flags;
	bool startka = false;
	unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
	unsigned long delay = nvme_keep_alive_work_period(ctrl);
	enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);

	/*
	 * Subtract off the keepalive RTT so nvme_keep_alive_work runs
@@ -1313,25 +1311,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
		delay = 0;
	}

	blk_mq_free_request(rq);

	if (status) {
		dev_err(ctrl->device,
			"failed nvme_keep_alive_end_io error=%d\n",
				status);
		return RQ_END_IO_NONE;
		return;
	}

	ctrl->ka_last_check_time = jiffies;
	ctrl->comp_seen = false;
	spin_lock_irqsave(&ctrl->lock, flags);
	if (ctrl->state == NVME_CTRL_LIVE ||
	    ctrl->state == NVME_CTRL_CONNECTING)
		startka = true;
	spin_unlock_irqrestore(&ctrl->lock, flags);
	if (startka)
	if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
		queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
	return RQ_END_IO_NONE;
}

static void nvme_keep_alive_work(struct work_struct *work)
@@ -1340,6 +1330,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
			struct nvme_ctrl, ka_work);
	bool comp_seen = ctrl->comp_seen;
	struct request *rq;
	blk_status_t status;

	ctrl->ka_last_check_time = jiffies;

@@ -1362,9 +1353,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
	nvme_init_request(rq, &ctrl->ka_cmd);

	rq->timeout = ctrl->kato * HZ;
	rq->end_io = nvme_keep_alive_end_io;
	rq->end_io_data = ctrl;
	blk_execute_rq_nowait(rq, false);
	status = blk_execute_rq(rq, false);
	nvme_keep_alive_finish(rq, status, ctrl);
	blk_mq_free_request(rq);
}

static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -2458,8 +2449,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
	else
		ctrl->ctrl_config = NVME_CC_CSS_NVM;

	if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
		ctrl->ctrl_config |= NVME_CC_CRIME;
	/*
	 * Setting CRIME results in CSTS.RDY before the media is ready. This
	 * makes it possible for media related commands to return the error
	 * NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is
	 * restructured to handle retries, disable CC.CRIME.
	 */
	ctrl->ctrl_config &= ~NVME_CC_CRIME;

	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
	ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2489,9 +2485,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
		 * devices are known to get this wrong. Use the larger of the
		 * two values.
		 */
		if (ctrl->ctrl_config & NVME_CC_CRIME)
			ready_timeout = NVME_CRTO_CRIMT(crto);
		else
		ready_timeout = NVME_CRTO_CRWMT(crto);

		if (ready_timeout < timeout)
+33 −7
Original line number Diff line number Diff line
@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head)
		case NVME_CTRL_LIVE:
		case NVME_CTRL_RESETTING:
		case NVME_CTRL_CONNECTING:
			/* fallthru */
			return true;
		default:
			break;
@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
	return ret;
}

static void nvme_partition_scan_work(struct work_struct *work)
{
	struct nvme_ns_head *head =
		container_of(work, struct nvme_ns_head, partition_scan_work);

	if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
					     &head->disk->state)))
		return;

	mutex_lock(&head->disk->open_mutex);
	bdev_disk_changed(head->disk, false);
	mutex_unlock(&head->disk->open_mutex);
}

static void nvme_requeue_work(struct work_struct *work)
{
	struct nvme_ns_head *head =
@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
	bio_list_init(&head->requeue_list);
	spin_lock_init(&head->requeue_lock);
	INIT_WORK(&head->requeue_work, nvme_requeue_work);
	INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);

	/*
	 * Add a multipath node if the subsystems supports multiple controllers.
@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
		return PTR_ERR(head->disk);
	head->disk->fops = &nvme_ns_head_ops;
	head->disk->private_data = head;

	/*
	 * We need to suppress the partition scan from occuring within the
	 * controller's scan_work context. If a path error occurs here, the IO
	 * will wait until a path becomes available or all paths are torn down,
	 * but that action also occurs within scan_work, so it would deadlock.
	 * Defer the partion scan to a different context that does not block
	 * scan_work.
	 */
	set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
	sprintf(head->disk->disk_name, "nvme%dn%d",
			ctrl->subsys->instance, head->instance);
	return 0;
@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
			return;
		}
		nvme_add_ns_head_cdev(head);
		kblockd_schedule_work(&head->partition_scan_work);
	}

	mutex_lock(&head->lock);
@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
		return;
	if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
		nvme_cdev_del(&head->cdev, &head->cdev_device);
		del_gendisk(head->disk);
	}
		/*
		 * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
		 * to allow multipath to fail all I/O.
		 */
		synchronize_srcu(&head->srcu);
		kblockd_schedule_work(&head->requeue_work);
		del_gendisk(head->disk);
	}
}

void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
	/* make sure all pending bios are cleaned up */
	kblockd_schedule_work(&head->requeue_work);
	flush_work(&head->requeue_work);
	flush_work(&head->partition_scan_work);
	put_disk(head->disk);
}

+1 −0
Original line number Diff line number Diff line
@@ -494,6 +494,7 @@ struct nvme_ns_head {
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
	struct work_struct	partition_scan_work;
	struct mutex		lock;
	unsigned long		flags;
#define NVME_NSHEAD_DISK_LIVE	0
+16 −3
Original line number Diff line number Diff line
@@ -2506,17 +2506,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev)
	return 1;
}

static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
static bool nvme_pci_update_nr_queues(struct nvme_dev *dev)
{
	if (!dev->ctrl.tagset) {
		nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
				nvme_pci_nr_maps(dev), sizeof(struct nvme_iod));
		return;
		return true;
	}

	/* Give up if we are racing with nvme_dev_disable() */
	if (!mutex_trylock(&dev->shutdown_lock))
		return false;

	/* Check if nvme_dev_disable() has been executed already */
	if (!dev->online_queues) {
		mutex_unlock(&dev->shutdown_lock);
		return false;
	}

	blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
	/* free previously allocated queues that are no longer usable */
	nvme_free_queues(dev, dev->online_queues);
	mutex_unlock(&dev->shutdown_lock);
	return true;
}

static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2797,7 +2809,8 @@ static void nvme_reset_work(struct work_struct *work)
		nvme_dbbuf_set(dev);
		nvme_unquiesce_io_queues(&dev->ctrl);
		nvme_wait_freeze(&dev->ctrl);
		nvme_pci_update_nr_queues(dev);
		if (!nvme_pci_update_nr_queues(dev))
			goto out;
		nvme_unfreeze(&dev->ctrl);
	} else {
		dev_warn(dev->ctrl.device, "IO queues lost\n");
+4 −3
Original line number Diff line number Diff line
@@ -2644,10 +2644,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)

	len = nvmf_get_address(ctrl, buf, size);

	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
		return len;

	mutex_lock(&queue->queue_lock);

	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
		goto done;
	ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
	if (ret > 0) {
		if (len > 0)
@@ -2655,7 +2656,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
		len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
				(len) ? "," : "", &src_addr);
	}
done:

	mutex_unlock(&queue->queue_lock);

	return len;
Loading