Commit b2da1975 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.8-2024-1-10' of git://git.infradead.org/nvme into for-6.8/block

Pull NVMe changes from Keith:

"nvme follow-up updates for Linux 6.8

 - tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes, Christoph)
 - discard fixes and improvements (Christoph)
 - timeout debug improvements (Keith, Max)
 - various cleanups (Daniel, Max, Giuxen)
 - trace event string fixes (Arnd)
 - shadow doorbell setup on reset fix (William)
 - a write zeroes quirk for SK Hynix (Jim)"

* tag 'nvme-6.8-2024-1-10' of git://git.infradead.org/nvme: (25 commits)
  nvmet-rdma: avoid circular locking dependency on install_queue()
  nvmet-tcp: avoid circular locking dependency on install_queue()
  nvme-pci: set doorbell config before unquiescing
  nvmet-tcp: Fix the H2C expected PDU len calculation
  nvme-tcp: enhance timeout kernel log
  nvme-rdma: enhance timeout kernel log
  nvme-pci: enhance timeout kernel log
  nvme: trace: avoid memcpy overflow warning
  nvmet: re-fix tracing strncpy() warning
  nvme: introduce nvme_disk_is_ns_head helper
  nvme-pci: disable write zeroes for SK Hynix BC901
  nvmet-fcloop: Remove remote port from list when unlinking
  nvmet-trace: avoid dereferencing pointer too early
  nvmet-fc: remove unnecessary bracket
  nvme: simplify the max_discard_segments calculation
  nvme: fix max_discard_sectors calculation
  nvme: also skip discard granularity updates in nvme_config_discard
  nvme: update the explanation for not updating the limits in nvme_config_discard
  nvmet-tcp: fix a missing endianess conversion in nvmet_tcp_try_peek_pdu
  nvme-common: mark nvme_tls_psk_prio static
  ...
parents 748dc0b6 31deaeb1
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
 * should be preferred to 'generated' PSKs,
 * and SHA-384 should be preferred to SHA-256.
 */
struct nvme_tls_psk_priority_list {
static struct nvme_tls_psk_priority_list {
	bool generated;
	enum nvme_tcp_tls_cipher cipher;
} nvme_tls_psk_prio[] = {
+20 −21
Original line number Diff line number Diff line
@@ -1727,13 +1727,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
		struct nvme_ns_head *head)
{
	struct request_queue *queue = disk->queue;
	u32 size = queue_logical_block_size(queue);
	u32 max_discard_sectors;

	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX))
		ctrl->max_discard_sectors =
			nvme_lba_to_sect(head, ctrl->dmrsl);

	if (ctrl->max_discard_sectors == 0) {
	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
		max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
	} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
		max_discard_sectors = UINT_MAX;
	} else {
		blk_queue_max_discard_sectors(queue, 0);
		return;
	}
@@ -1741,14 +1741,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
			NVME_DSM_MAX_RANGES);

	queue->limits.discard_granularity = size;

	/* If discard is already enabled, don't reset queue limits */
	/*
	 * If discard is already enabled, don't reset queue limits.
	 *
	 * This works around the fact that the block layer can't cope well with
	 * updating the hardware limits when overridden through sysfs.  This is
	 * harmless because discard limits in NVMe are purely advisory.
	 */
	if (queue->limits.max_discard_sectors)
		return;

	blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
	blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
	blk_queue_max_discard_sectors(queue, max_discard_sectors);
	if (ctrl->dmrl)
		blk_queue_max_discard_segments(queue, ctrl->dmrl);
	else
		blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
	queue->limits.discard_granularity = queue_logical_block_size(queue);

	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -2907,14 +2915,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
	struct nvme_id_ctrl_nvm *id;
	int ret;

	if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
		ctrl->max_discard_sectors = UINT_MAX;
		ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
	} else {
		ctrl->max_discard_sectors = 0;
		ctrl->max_discard_segments = 0;
	}

	/*
	 * Even though NVMe spec explicitly states that MDTS is not applicable
	 * to the write-zeroes, we are cautious and limit the size to the
@@ -2944,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
	if (ret)
		goto free_data;

	if (id->dmrl)
		ctrl->max_discard_segments = id->dmrl;
	ctrl->dmrl = id->dmrl;
	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
	if (id->wzsl)
		ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
+13 −3
Original line number Diff line number Diff line
@@ -297,14 +297,13 @@ struct nvme_ctrl {
	u32 max_hw_sectors;
	u32 max_segments;
	u32 max_integrity_segments;
	u32 max_discard_sectors;
	u32 max_discard_segments;
	u32 max_zeroes_sectors;
#ifdef CONFIG_BLK_DEV_ZONED
	u32 max_zone_append;
#endif
	u16 crdt[3];
	u16 oncs;
	u8 dmrl;
	u32 dmrsl;
	u16 oacs;
	u16 sqsize;
@@ -921,6 +920,10 @@ extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;

static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
	return disk->fops == &nvme_ns_head_ops;
}
#else
#define multipath false
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@@ -998,6 +1001,10 @@ static inline void nvme_mpath_start_request(struct request *rq)
static inline void nvme_mpath_end_request(struct request *rq)
{
}
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
	return false;
}
#endif /* CONFIG_NVME_MULTIPATH */

int nvme_revalidate_zones(struct nvme_ns *ns);
@@ -1026,7 +1033,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)

static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
	return dev_to_disk(dev)->private_data;
	struct gendisk *disk = dev_to_disk(dev);

	WARN_ON(nvme_disk_is_ns_head(disk));
	return disk->private_data;
}

#ifdef CONFIG_NVME_HWMON
+16 −11
Original line number Diff line number Diff line
@@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
	struct request *abort_req;
	struct nvme_command cmd = { };
	u32 csts = readl(dev->bar + NVME_REG_CSTS);
	u8 opcode;

	/* If PCI error recovery process is happening, we cannot reset or
	 * the recovery mechanism will surely fail.
@@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)

	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
		dev_warn(dev->ctrl.device,
			 "I/O %d QID %d timeout, completion polled\n",
			 req->tag, nvmeq->qid);
			 "I/O tag %d (%04x) QID %d timeout, completion polled\n",
			 req->tag, nvme_cid(req), nvmeq->qid);
		return BLK_EH_DONE;
	}

@@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
		fallthrough;
	case NVME_CTRL_DELETING:
		dev_warn_ratelimited(dev->ctrl.device,
			 "I/O %d QID %d timeout, disable controller\n",
			 req->tag, nvmeq->qid);
			 "I/O tag %d (%04x) QID %d timeout, disable controller\n",
			 req->tag, nvme_cid(req), nvmeq->qid);
		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
		nvme_dev_disable(dev, true);
		return BLK_EH_DONE;
@@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
	 * command was already aborted once before and still hasn't been
	 * returned to the driver, or if this is the admin queue.
	 */
	opcode = nvme_req(req)->cmd->common.opcode;
	if (!nvmeq->qid || iod->aborted) {
		dev_warn(dev->ctrl.device,
			 "I/O %d QID %d timeout, reset controller\n",
			 req->tag, nvmeq->qid);
			 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
			 req->tag, nvme_cid(req), opcode,
			 nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
		goto disable;
	}
@@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);

	dev_warn(nvmeq->dev->ctrl.device,
		"I/O %d (%s) QID %d timeout, aborting\n",
		 req->tag,
		 nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
		 nvmeq->qid);
		 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n",
		 req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
		 nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
		 blk_rq_bytes(req));

	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
					 BLK_MQ_REQ_NOWAIT);
@@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work)
	 * controller around but remove all namespaces.
	 */
	if (dev->online_queues > 1) {
		nvme_dbbuf_set(dev);
		nvme_unquiesce_io_queues(&dev->ctrl);
		nvme_wait_freeze(&dev->ctrl);
		nvme_pci_update_nr_queues(dev);
		nvme_dbbuf_set(dev);
		nvme_unfreeze(&dev->ctrl);
	} else {
		dev_warn(dev->ctrl.device, "IO queues lost\n");
@@ -3394,6 +3397,8 @@ static const struct pci_device_id nvme_id_table[] = {
		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
	{ PCI_DEVICE(0x1c5c, 0x174a),   /* SK Hynix P31 SSD */
		.driver_data = NVME_QUIRK_BOGUS_NID, },
	{ PCI_DEVICE(0x1c5c, 0x1D59),   /* SK Hynix BC901 */
		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
	{ PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
+1 −1
Original line number Diff line number Diff line
@@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev,
		struct nvme_command *c, void *data, unsigned int data_len)
{
	if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
	    bdev->bd_disk->fops == &nvme_ns_head_ops)
	    nvme_disk_is_ns_head(bdev->bd_disk))
		return nvme_send_ns_head_pr_command(bdev, c, data, data_len);

	return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
Loading