Commit c9bb139d authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.15-2025-05-15' of git://git.infradead.org/nvme into block-6.15

Pull NVMe fixes from Christoph:

"nvme fixes for linux 6.15

 - fixes for atomic writes (Alan Adamson)
 - fixes for polled CQs in nvmet-epf (Damien Le Moal)
 - fix for polled CQs in nvme-pci (Keith Busch)
 - fix compile on odd configs that need to be forced to inline
   (Kees Cook)
 - one more quirk (Ilya Guterman)"

* tag 'nvme-6.15-2025-05-15' of git://git.infradead.org/nvme:
  nvme-pci: add NVME_QUIRK_NO_DEEPEST_PS quirk for SOLIDIGM P44 Pro
  nvme: all namespaces in a subsystem must adhere to a common atomic write size
  nvme: multipath: enable BLK_FEAT_ATOMIC_WRITES for multipathing
  nvmet: pci-epf: remove NVMET_PCI_EPF_Q_IS_SQ
  nvmet: pci-epf: improve debug message
  nvmet: pci-epf: cleanup nvmet_pci_epf_raise_irq()
  nvmet: pci-epf: do not fall back to using INTX if not supported
  nvmet: pci-epf: clear completion queue IRQ flag on delete
  nvme-pci: acquire cq_poll_lock in nvme_poll_irqdisable
  nvme-pci: make nvme_pci_npages_prp() __always_inline
parents 8098514b e765bf89
Loading
Loading
Loading
Loading
+27 −3
Original line number Diff line number Diff line
@@ -2059,7 +2059,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
		if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
		else
			atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
			atomic_bs = (1 + ns->ctrl->awupf) * bs;

		/*
		 * Set subsystem atomic bs.
		 */
		if (ns->ctrl->subsys->atomic_bs) {
			if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
				dev_err_ratelimited(ns->ctrl->device,
					"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
					ns->disk ? ns->disk->disk_name : "?",
					ns->ctrl->subsys->atomic_bs,
					atomic_bs);
			}
		} else
			ns->ctrl->subsys->atomic_bs = atomic_bs;

		nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
	}
@@ -2201,6 +2215,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
	nvme_set_chunk_sectors(ns, id, &lim);
	if (!nvme_update_disk_info(ns, id, &lim))
		capacity = 0;

	/*
	 * Validate the max atomic write size fits within the subsystem's
	 * atomic write capabilities.
	 */
	if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
		ret = -ENXIO;
		goto out;
	}

	nvme_config_discard(ns, &lim);
	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
	    ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3031,7 +3056,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
		kfree(subsys);
		return -EINVAL;
	}
	subsys->awupf = le16_to_cpu(id->awupf);
	nvme_mpath_default_iopolicy(subsys);

	subsys->dev.class = &nvme_subsys_class;
@@ -3441,7 +3465,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
		dev_pm_qos_expose_latency_tolerance(ctrl->device);
	else if (!ctrl->apst_enabled && prev_apst_enabled)
		dev_pm_qos_hide_latency_tolerance(ctrl->device);

	ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
	kfree(id);
	return ret;
+2 −1
Original line number Diff line number Diff line
@@ -638,7 +638,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)

	blk_set_stacking_limits(&lim);
	lim.dma_alignment = 3;
	lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
	lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT |
		BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES;
	if (head->ids.csi == NVME_CSI_ZNS)
		lim.features |= BLK_FEAT_ZONED;

+2 −1
Original line number Diff line number Diff line
@@ -410,6 +410,7 @@ struct nvme_ctrl {

	enum nvme_ctrl_type cntrltype;
	enum nvme_dctype dctype;
	u16 awupf; /* 0's based value. */
};

static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -442,11 +443,11 @@ struct nvme_subsystem {
	u8			cmic;
	enum nvme_subsys_type	subtype;
	u16			vendor_id;
	u16			awupf;	/* 0's based awupf value. */
	struct ida		ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
	enum nvme_iopolicy	iopolicy;
#endif
	u32			atomic_bs;
};

/*
+5 −1
Original line number Diff line number Diff line
@@ -390,7 +390,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
 * as it only leads to a small amount of wasted memory for the lifetime of
 * the I/O.
 */
static int nvme_pci_npages_prp(void)
static __always_inline int nvme_pci_npages_prp(void)
{
	unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE;
	unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE);
@@ -1202,7 +1202,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
	WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));

	disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
	spin_lock(&nvmeq->cq_poll_lock);
	nvme_poll_cq(nvmeq, NULL);
	spin_unlock(&nvmeq->cq_poll_lock);
	enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}

@@ -3737,6 +3739,8 @@ static const struct pci_device_id nvme_id_table[] = {
		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
	{ PCI_DEVICE(0x1e49, 0x0041),   /* ZHITAI TiPro7000 NVMe SSD */
		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
	{ PCI_DEVICE(0x025e, 0xf1ac),   /* SOLIDIGM  P44 pro SSDPFKKW020X7  */
		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
	{ PCI_DEVICE(0xc0a9, 0x540a),   /* Crucial P2 */
		.driver_data = NVME_QUIRK_BOGUS_NID, },
	{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
+23 −16
Original line number Diff line number Diff line
@@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex);
#define NVMET_PCI_EPF_CQ_RETRY_INTERVAL	msecs_to_jiffies(1)

enum nvmet_pci_epf_queue_flags {
	NVMET_PCI_EPF_Q_IS_SQ = 0,	/* The queue is a submission queue */
	NVMET_PCI_EPF_Q_LIVE,		/* The queue is live */
	NVMET_PCI_EPF_Q_LIVE = 0,	/* The queue is live */
	NVMET_PCI_EPF_Q_IRQ_ENABLED,	/* IRQ is enabled for this queue */
};

@@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
	struct nvmet_pci_epf_irq_vector *iv = cq->iv;
	bool ret;

	if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
		return false;

	/* IRQ coalescing for the admin queue is not allowed. */
	if (!cq->qid)
		return true;
@@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
	struct pci_epf *epf = nvme_epf->epf;
	int ret = 0;

	if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
	if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) ||
	    !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
		return;

	mutex_lock(&ctrl->irq_lock);
@@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
	switch (nvme_epf->irq_type) {
	case PCI_IRQ_MSIX:
	case PCI_IRQ_MSI:
		/*
		 * If we fail to raise an MSI or MSI-X interrupt, it is likely
		 * because the host is using legacy INTX IRQs (e.g. BIOS,
		 * grub), but we can fallback to the INTX type only if the
		 * endpoint controller supports this type.
		 */
		ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
					nvme_epf->irq_type, cq->vector + 1);
		if (!ret)
		if (!ret || !nvme_epf->epc_features->intx_capable)
			break;
		/*
		 * If we got an error, it is likely because the host is using
		 * legacy IRQs (e.g. BIOS, grub).
		 */
		fallthrough;
	case PCI_IRQ_INTX:
		ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no,
@@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl,
	}

	if (ret)
		dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret);
		dev_err_ratelimited(ctrl->dev,
				    "CQ[%u]: Failed to raise IRQ (err=%d)\n",
				    cq->qid, ret);

unlock:
	mutex_unlock(&ctrl->irq_lock);
@@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,

	set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);

	dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
	if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
		dev_dbg(ctrl->dev,
			"CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
			cqid, qsize, cq->qes, cq->vector);
	else
		dev_dbg(ctrl->dev,
			"CQ[%u]: %u entries of %zu B, IRQ disabled\n",
			cqid, qsize, cq->qes);

	return NVME_SC_SUCCESS;

@@ -1344,6 +1351,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)

	cancel_delayed_work_sync(&cq->work);
	nvmet_pci_epf_drain_queue(cq);
	if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
		nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
	nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);

@@ -1533,7 +1541,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl,

	if (sq) {
		queue = &ctrl->sq[qid];
		set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags);
	} else {
		queue = &ctrl->cq[qid];
		INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work);