Commit 0f7223a3 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-6.9-2024-03-07' of git://git.infradead.org/nvme into for-6.9/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.9

 - RDMA target enhancements (Max)
 - Fabrics fixes (Max, Guixin, Hannes)
 - Atomic queue_limits usage (Christoph)
 - Const use for class_register (Ricardo)
 - Identification error handling fixes (Shin'ichiro, Keith)"

* tag 'nvme-6.9-2024-03-07' of git://git.infradead.org/nvme: (31 commits)
  nvme: clear caller pointer on identify failure
  nvme: host: fix double-free of struct nvme_id_ns in ns_update_nuse()
  nvme: fcloop: make fcloop_class constant
  nvme: fabrics: make nvmf_class constant
  nvme: core: constify struct class usage
  nvme-fabrics: typo in nvmf_parse_key()
  nvme-multipath: use atomic queue limits API for stacking limits
  nvme-multipath: pass queue_limits to blk_alloc_disk
  nvme: use the atomic queue limits update API
  nvme: cleanup nvme_configure_metadata
  nvme: don't query identify data in configure_metadata
  nvme: split out a nvme_identify_ns_nvm helper
  nvme: move common logic into nvme_update_ns_info
  nvme: move setting the write cache flags out of nvme_set_queue_limits
  nvme: move a few things out of nvme_update_disk_info
  nvme: don't use nvme_update_disk_info for the multipath disk
  nvme: move blk_integrity_unregister into nvme_init_integrity
  nvme: cleanup the nvme_init_integrity calling conventions
  nvme: move max_integrity_segments handling out of nvme_init_integrity
  nvme: remove nvme_revalidate_zones
  ...
parents d37977f0 7e80eb79
Loading
Loading
Loading
Loading
+238 −218
Original line number Diff line number Diff line
@@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock);

static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;
static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env);
static const struct class nvme_class = {
	.name = "nvme",
	.dev_uevent = nvme_class_uevent,
};

static const struct class nvme_subsys_class = {
	.name = "nvme-subsystem",
};

static DEFINE_IDA(nvme_ns_chr_minor_ida);
static dev_t nvme_ns_chr_devt;
static struct class *nvme_ns_chr_class;
static const struct class nvme_ns_chr_class = {
	.name = "nvme-generic",
};

static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
@@ -1394,8 +1403,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)

	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
			sizeof(struct nvme_id_ctrl));
	if (error)
	if (error) {
		kfree(*id);
		*id = NULL;
	}
	return error;
}

@@ -1524,6 +1535,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
	if (error) {
		dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
		kfree(*id);
		*id = NULL;
	}
	return error;
}
@@ -1723,12 +1735,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
	return 0;
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_init_integrity(struct gendisk *disk,
		struct nvme_ns_head *head, u32 max_integrity_segments)
static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
{
	struct blk_integrity integrity = { };

	blk_integrity_unregister(disk);

	if (!head->ms)
		return true;

	/*
	 * PI can always be supported as we can ask the controller to simply
	 * insert/strip it, which is not possible for other kinds of metadata.
	 */
	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ||
	    !(head->features & NVME_NS_METADATA_SUPPORTED))
		return nvme_ns_has_pi(head);

	switch (head->pi_type) {
	case NVME_NS_DPS_PI_TYPE3:
		switch (head->guard_type) {
@@ -1773,52 +1796,30 @@ static void nvme_init_integrity(struct gendisk *disk,
	integrity.tuple_size = head->ms;
	integrity.pi_offset = head->pi_offset;
	blk_integrity_register(disk, &integrity);
	blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
static void nvme_init_integrity(struct gendisk *disk,
		struct nvme_ns_head *head, u32 max_integrity_segments)
{
	return true;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */

static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
		struct nvme_ns_head *head)
static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
{
	struct request_queue *queue = disk->queue;
	u32 max_discard_sectors;

	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
		max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
	} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
		max_discard_sectors = UINT_MAX;
	} else {
		blk_queue_max_discard_sectors(queue, 0);
		return;
	}
	struct nvme_ctrl *ctrl = ns->ctrl;

	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
			NVME_DSM_MAX_RANGES);

	/*
	 * If discard is already enabled, don't reset queue limits.
	 *
	 * This works around the fact that the block layer can't cope well with
	 * updating the hardware limits when overridden through sysfs.  This is
	 * harmless because discard limits in NVMe are purely advisory.
	 */
	if (queue->limits.max_discard_sectors)
		return;
	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
		lim->max_hw_discard_sectors =
			nvme_lba_to_sect(ns->head, ctrl->dmrsl);
	else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
		lim->max_hw_discard_sectors = UINT_MAX;
	else
		lim->max_hw_discard_sectors = 0;

	lim->discard_granularity = lim->logical_block_size;

	blk_queue_max_discard_sectors(queue, max_discard_sectors);
	if (ctrl->dmrl)
		blk_queue_max_discard_segments(queue, ctrl->dmrl);
		lim->max_discard_segments = ctrl->dmrl;
	else
		blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
	queue->limits.discard_granularity = queue_logical_block_size(queue);

	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
		lim->max_discard_segments = NVME_DSM_MAX_RANGES;
}

static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
@@ -1829,42 +1830,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
		a->csi == b->csi;
}

static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
		struct nvme_id_ns *id)
static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid,
		struct nvme_id_ns_nvm **nvmp)
{
	bool first = id->dps & NVME_NS_DPS_PI_FIRST;
	unsigned lbaf = nvme_lbaf_index(id->flbas);
	struct nvme_command c = { };
	struct nvme_command c = {
		.identify.opcode	= nvme_admin_identify,
		.identify.nsid		= cpu_to_le32(nsid),
		.identify.cns		= NVME_ID_CNS_CS_NS,
		.identify.csi		= NVME_CSI_NVM,
	};
	struct nvme_id_ns_nvm *nvm;
	int ret = 0;
	u32 elbaf;

	head->pi_size = 0;
	head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
	if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
		head->pi_size = sizeof(struct t10_pi_tuple);
		head->guard_type = NVME_NVM_NS_16B_GUARD;
		goto set_pi;
	}
	int ret;

	nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
	if (!nvm)
		return -ENOMEM;

	c.identify.opcode = nvme_admin_identify;
	c.identify.nsid = cpu_to_le32(head->ns_id);
	c.identify.cns = NVME_ID_CNS_CS_NS;
	c.identify.csi = NVME_CSI_NVM;

	ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm));
	if (ret)
		goto free_data;
		kfree(nvm);
	else
		*nvmp = nvm;
	return ret;
}

	elbaf = le32_to_cpu(nvm->elbaf[lbaf]);
static void nvme_configure_pi_elbas(struct nvme_ns_head *head,
		struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm)
{
	u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]);

	/* no support for storage tag formats right now */
	if (nvme_elbaf_sts(elbaf))
		goto free_data;
		return;

	head->guard_type = nvme_elbaf_guard_type(elbaf);
	switch (head->guard_type) {
@@ -1877,35 +1874,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
	default:
		break;
	}
}

free_data:
	kfree(nvm);
set_pi:
	if (head->pi_size && head->ms >= head->pi_size)
		head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
	else
static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
		struct nvme_ns_head *head, struct nvme_id_ns *id,
		struct nvme_id_ns_nvm *nvm)
{
	head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
	head->pi_type = 0;

	if (first)
	head->pi_size = 0;
	head->pi_offset = 0;
	else
		head->pi_offset = head->ms - head->pi_size;
	head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms);
	if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
		return;

	return ret;
	if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
		nvme_configure_pi_elbas(head, id, nvm);
	} else {
		head->pi_size = sizeof(struct t10_pi_tuple);
		head->guard_type = NVME_NVM_NS_16B_GUARD;
	}

static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
		struct nvme_ns_head *head, struct nvme_id_ns *id)
{
	int ret;

	ret = nvme_init_ms(ctrl, head, id);
	if (ret)
		return ret;

	head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
	if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
		return 0;
	if (head->pi_size && head->ms >= head->pi_size)
		head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
	if (!(id->dps & NVME_NS_DPS_PI_FIRST))
		head->pi_offset = head->ms - head->pi_size;

	if (ctrl->ops->flags & NVME_F_FABRICS) {
		/*
@@ -1914,7 +1907,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
		 * remap the separate metadata buffer from the block layer.
		 */
		if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
			return 0;
			return;

		head->features |= NVME_NS_EXT_LBAS;

@@ -1941,33 +1934,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
		else
			head->features |= NVME_NS_METADATA_SUPPORTED;
	}
	return 0;
}

static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
		struct request_queue *q)
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
{
	bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT;

	if (ctrl->max_hw_sectors) {
		u32 max_segments =
			(ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;

		max_segments = min_not_zero(max_segments, ctrl->max_segments);
		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
	return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
}
	blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
	blk_queue_dma_alignment(q, 3);
	blk_queue_write_cache(q, vwc, vwc);

static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
		struct queue_limits *lim)
{
	lim->max_hw_sectors = ctrl->max_hw_sectors;
	lim->max_segments = min_t(u32, USHRT_MAX,
		min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
	lim->max_integrity_segments = ctrl->max_integrity_segments;
	lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
	lim->max_segment_size = UINT_MAX;
	lim->dma_alignment = 3;
}

static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
		struct nvme_ns_head *head, struct nvme_id_ns *id)
static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
		struct queue_limits *lim)
{
	sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze));
	struct nvme_ns_head *head = ns->head;
	u32 bs = 1U << head->lba_shift;
	u32 atomic_bs, phys_bs, io_opt = 0;
	bool valid = true;

	/*
	 * The block layer can't support LBA sizes larger than the page size
@@ -1975,12 +1967,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
	 * allow block I/O.
	 */
	if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
		capacity = 0;
		bs = (1 << 9);
		valid = false;
	}

	blk_integrity_unregister(disk);

	atomic_bs = phys_bs = bs;
	if (id->nabo == 0) {
		/*
@@ -1991,7 +1981,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
		if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
		else
			atomic_bs = (1 + ctrl->subsys->awupf) * bs;
			atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
	}

	if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
@@ -2001,36 +1991,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
		io_opt = bs * (1 + le16_to_cpu(id->nows));
	}

	blk_queue_logical_block_size(disk->queue, bs);
	/*
	 * Linux filesystems assume writing a single physical block is
	 * an atomic operation. Hence limit the physical block size to the
	 * value of the Atomic Write Unit Power Fail parameter.
	 */
	blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
	blk_queue_io_min(disk->queue, phys_bs);
	blk_queue_io_opt(disk->queue, io_opt);

	/*
	 * Register a metadata profile for PI, or the plain non-integrity NVMe
	 * metadata masquerading as Type 0 if supported, otherwise reject block
	 * I/O to namespaces with metadata except when the namespace supports
	 * PI, as it can strip/insert in that case.
	 */
	if (head->ms) {
		if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
		    (head->features & NVME_NS_METADATA_SUPPORTED))
			nvme_init_integrity(disk, head,
					    ctrl->max_integrity_segments);
		else if (!nvme_ns_has_pi(head))
			capacity = 0;
	}

	set_capacity_and_notify(disk, capacity);

	nvme_config_discard(ctrl, disk, head);
	blk_queue_max_write_zeroes_sectors(disk->queue,
					   ctrl->max_zeroes_sectors);
	lim->logical_block_size = bs;
	lim->physical_block_size = min(phys_bs, atomic_bs);
	lim->io_min = phys_bs;
	lim->io_opt = io_opt;
	if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
		lim->max_write_zeroes_sectors = UINT_MAX;
	else
		lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
	return valid;
}

static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
@@ -2044,7 +2018,8 @@ static inline bool nvme_first_scan(struct gendisk *disk)
	return !disk_live(disk);
}

static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
		struct queue_limits *lim)
{
	struct nvme_ctrl *ctrl = ns->ctrl;
	u32 iob;
@@ -2072,38 +2047,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
		return;
	}

	blk_queue_chunk_sectors(ns->queue, iob);
	lim->chunk_sectors = iob;
}

static int nvme_update_ns_info_generic(struct nvme_ns *ns,
		struct nvme_ns_info *info)
{
	struct queue_limits lim;
	int ret;

	blk_mq_freeze_queue(ns->disk->queue);
	nvme_set_queue_limits(ns->ctrl, ns->queue);
	lim = queue_limits_start_update(ns->disk->queue);
	nvme_set_ctrl_limits(ns->ctrl, &lim);
	ret = queue_limits_commit_update(ns->disk->queue, &lim);
	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
	blk_mq_unfreeze_queue(ns->disk->queue);

	if (nvme_ns_head_multipath(ns->head)) {
		blk_mq_freeze_queue(ns->head->disk->queue);
		set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
		nvme_mpath_revalidate_paths(ns);
		blk_stack_limits(&ns->head->disk->queue->limits,
				 &ns->queue->limits, 0);
		ns->head->disk->flags |= GENHD_FL_HIDDEN;
		blk_mq_unfreeze_queue(ns->head->disk->queue);
	}

	/* Hide the block-interface for these devices */
	ns->disk->flags |= GENHD_FL_HIDDEN;
	set_bit(NVME_NS_READY, &ns->flags);

	return 0;
	if (!ret)
		ret = -ENODEV;
	return ret;
}

static int nvme_update_ns_info_block(struct nvme_ns *ns,
		struct nvme_ns_info *info)
{
	bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
	struct queue_limits lim;
	struct nvme_id_ns_nvm *nvm = NULL;
	struct nvme_id_ns *id;
	sector_t capacity;
	unsigned lbaf;
	int ret;

@@ -2115,30 +2088,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
		/* namespace not allocated or attached */
		info->is_removed = true;
		ret = -ENODEV;
		goto error;
		goto out;
	}

	if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
		ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
		if (ret < 0)
			goto out;
	}

	blk_mq_freeze_queue(ns->disk->queue);
	lbaf = nvme_lbaf_index(id->flbas);
	ns->head->lba_shift = id->lbaf[lbaf].ds;
	ns->head->nuse = le64_to_cpu(id->nuse);
	nvme_set_queue_limits(ns->ctrl, ns->queue);
	capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));

	ret = nvme_configure_metadata(ns->ctrl, ns->head, id);
	if (ret < 0) {
	lim = queue_limits_start_update(ns->disk->queue);
	nvme_set_ctrl_limits(ns->ctrl, &lim);
	nvme_configure_metadata(ns->ctrl, ns->head, id, nvm);
	nvme_set_chunk_sectors(ns, id, &lim);
	if (!nvme_update_disk_info(ns, id, &lim))
		capacity = 0;
	nvme_config_discard(ns, &lim);
	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
	    ns->head->ids.csi == NVME_CSI_ZNS) {
		ret = nvme_update_zone_info(ns, lbaf, &lim);
		if (ret) {
			blk_mq_unfreeze_queue(ns->disk->queue);
			goto out;
		}
	nvme_set_chunk_sectors(ns, id);
	nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id);

	if (ns->head->ids.csi == NVME_CSI_ZNS) {
		ret = nvme_update_zone_info(ns, lbaf);
	}
	ret = queue_limits_commit_update(ns->disk->queue, &lim);
	if (ret) {
		blk_mq_unfreeze_queue(ns->disk->queue);
		goto out;
	}
	}

	/*
	 * Register a metadata profile for PI, or the plain non-integrity NVMe
	 * metadata masquerading as Type 0 if supported, otherwise reject block
	 * I/O to namespaces with metadata except when the namespace supports
	 * PI, as it can strip/insert in that case.
	 */
	if (!nvme_init_integrity(ns->disk, ns->head))
		capacity = 0;

	set_capacity_and_notify(ns->disk, capacity);

	/*
	 * Only set the DEAC bit if the device guarantees that reads from
@@ -2149,62 +2144,81 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
	if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
		ns->head->features |= NVME_NS_DEAC;
	set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
	blk_queue_write_cache(ns->disk->queue, vwc, vwc);
	set_bit(NVME_NS_READY, &ns->flags);
	blk_mq_unfreeze_queue(ns->disk->queue);

	if (blk_queue_is_zoned(ns->queue)) {
		ret = nvme_revalidate_zones(ns);
		ret = blk_revalidate_disk_zones(ns->disk, NULL);
		if (ret && !nvme_first_scan(ns->disk))
			goto out;
	}

	if (nvme_ns_head_multipath(ns->head)) {
		blk_mq_freeze_queue(ns->head->disk->queue);
		nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id);
		set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
		nvme_mpath_revalidate_paths(ns);
		blk_stack_limits(&ns->head->disk->queue->limits,
				 &ns->queue->limits, 0);
		disk_update_readahead(ns->head->disk);
		blk_mq_unfreeze_queue(ns->head->disk->queue);
	}

	ret = 0;
out:
	/*
	 * If probing fails due an unsupported feature, hide the block device,
	 * but still allow other access.
	 */
	if (ret == -ENODEV) {
		ns->disk->flags |= GENHD_FL_HIDDEN;
		set_bit(NVME_NS_READY, &ns->flags);
		ret = 0;
	}

error:
	kfree(nvm);
	kfree(id);
	return ret;
}

static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
{
	bool unsupported = false;
	int ret;

	switch (info->ids.csi) {
	case NVME_CSI_ZNS:
		if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
			dev_info(ns->ctrl->device,
	"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
				info->nsid);
			return nvme_update_ns_info_generic(ns, info);
			ret = nvme_update_ns_info_generic(ns, info);
			break;
		}
		return nvme_update_ns_info_block(ns, info);
		ret = nvme_update_ns_info_block(ns, info);
		break;
	case NVME_CSI_NVM:
		return nvme_update_ns_info_block(ns, info);
		ret = nvme_update_ns_info_block(ns, info);
		break;
	default:
		dev_info(ns->ctrl->device,
			"block device for nsid %u not supported (csi %u)\n",
			info->nsid, info->ids.csi);
		return nvme_update_ns_info_generic(ns, info);
		ret = nvme_update_ns_info_generic(ns, info);
		break;
	}

	/*
	 * If probing fails due an unsupported feature, hide the block device,
	 * but still allow other access.
	 */
	if (ret == -ENODEV) {
		ns->disk->flags |= GENHD_FL_HIDDEN;
		set_bit(NVME_NS_READY, &ns->flags);
		unsupported = true;
		ret = 0;
	}

	if (!ret && nvme_ns_head_multipath(ns->head)) {
		struct queue_limits lim;

		blk_mq_freeze_queue(ns->head->disk->queue);
		if (unsupported)
			ns->head->disk->flags |= GENHD_FL_HIDDEN;
		else
			nvme_init_integrity(ns->head->disk, ns->head);
		set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
		set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
		nvme_mpath_revalidate_paths(ns);

		lim = queue_limits_start_update(ns->head->disk->queue);
		queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
					ns->head->disk->disk_name);
		ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
		blk_mq_unfreeze_queue(ns->head->disk->queue);
	}

	return ret;
}

#ifdef CONFIG_BLK_SED_OPAL
@@ -2879,7 +2893,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
	subsys->awupf = le16_to_cpu(id->awupf);
	nvme_mpath_default_iopolicy(subsys);

	subsys->dev.class = nvme_subsys_class;
	subsys->dev.class = &nvme_subsys_class;
	subsys->dev.release = nvme_release_subsystem;
	subsys->dev.groups = nvme_subsys_attrs_groups;
	dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
@@ -3119,11 +3133,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
		return -EINVAL;
	}

	if (!ctrl->maxcmd) {
		dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
		return -EINVAL;
	}

	return 0;
}

static int nvme_init_identify(struct nvme_ctrl *ctrl)
{
	struct queue_limits lim;
	struct nvme_id_ctrl *id;
	u32 max_hw_sectors;
	bool prev_apst_enabled;
@@ -3190,7 +3210,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
	ctrl->max_hw_sectors =
		min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);

	nvme_set_queue_limits(ctrl, ctrl->admin_q);
	lim = queue_limits_start_update(ctrl->admin_q);
	nvme_set_ctrl_limits(ctrl, &lim);
	ret = queue_limits_commit_update(ctrl->admin_q, &lim);
	if (ret)
		goto out_free;

	ctrl->sgls = le32_to_cpu(id->sgls);
	ctrl->kas = le16_to_cpu(id->kas);
	ctrl->max_namespaces = le32_to_cpu(id->mnan);
@@ -3422,7 +3447,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
	if (minor < 0)
		return minor;
	cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
	cdev_device->class = nvme_ns_chr_class;
	cdev_device->class = &nvme_ns_chr_class;
	cdev_device->release = nvme_cdev_rel;
	device_initialize(cdev_device);
	cdev_init(cdev, fops);
@@ -4353,6 +4378,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
		const struct blk_mq_ops *ops, unsigned int cmd_size)
{
	struct queue_limits lim = {};
	int ret;

	memset(set, 0, sizeof(*set));
@@ -4372,7 +4398,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
	if (ret)
		return ret;

	ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL);
	ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
	if (IS_ERR(ctrl->admin_q)) {
		ret = PTR_ERR(ctrl->admin_q);
		goto out_free_tagset;
@@ -4613,7 +4639,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
	ctrl->device = &ctrl->ctrl_device;
	ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
			ctrl->instance);
	ctrl->device->class = nvme_class;
	ctrl->device->class = &nvme_class;
	ctrl->device->parent = ctrl->dev;
	if (ops->dev_attr_groups)
		ctrl->device->groups = ops->dev_attr_groups;
@@ -4846,42 +4872,36 @@ static int __init nvme_core_init(void)
	if (result < 0)
		goto destroy_delete_wq;

	nvme_class = class_create("nvme");
	if (IS_ERR(nvme_class)) {
		result = PTR_ERR(nvme_class);
	result = class_register(&nvme_class);
	if (result)
		goto unregister_chrdev;
	}
	nvme_class->dev_uevent = nvme_class_uevent;

	nvme_subsys_class = class_create("nvme-subsystem");
	if (IS_ERR(nvme_subsys_class)) {
		result = PTR_ERR(nvme_subsys_class);
	result = class_register(&nvme_subsys_class);
	if (result)
		goto destroy_class;
	}

	result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
				     "nvme-generic");
	if (result < 0)
		goto destroy_subsys_class;

	nvme_ns_chr_class = class_create("nvme-generic");
	if (IS_ERR(nvme_ns_chr_class)) {
		result = PTR_ERR(nvme_ns_chr_class);
	result = class_register(&nvme_ns_chr_class);
	if (result)
		goto unregister_generic_ns;
	}

	result = nvme_init_auth();
	if (result)
		goto destroy_ns_chr;
	return 0;

destroy_ns_chr:
	class_destroy(nvme_ns_chr_class);
	class_unregister(&nvme_ns_chr_class);
unregister_generic_ns:
	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class:
	class_destroy(nvme_subsys_class);
	class_unregister(&nvme_subsys_class);
destroy_class:
	class_destroy(nvme_class);
	class_unregister(&nvme_class);
unregister_chrdev:
	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq:
@@ -4897,9 +4917,9 @@ static int __init nvme_core_init(void)
static void __exit nvme_core_exit(void)
{
	nvme_exit_auth();
	class_destroy(nvme_ns_chr_class);
	class_destroy(nvme_subsys_class);
	class_destroy(nvme_class);
	class_unregister(&nvme_ns_chr_class);
	class_unregister(&nvme_subsys_class);
	class_unregister(&nvme_class);
	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
	destroy_workqueue(nvme_delete_wq);
+12 −10
Original line number Diff line number Diff line
@@ -637,7 +637,7 @@ static struct key *nvmf_parse_key(int key_id)
	}

	key = key_lookup(key_id);
	if (!IS_ERR(key))
	if (IS_ERR(key))
		pr_err("key id %08x not found\n", key_id);
	else
		pr_debug("Using key id %08x\n", key_id);
@@ -1318,7 +1318,10 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
	return ERR_PTR(ret);
}

static struct class *nvmf_class;
static const struct class nvmf_class = {
	.name = "nvme-fabrics",
};

static struct device *nvmf_device;
static DEFINE_MUTEX(nvmf_dev_mutex);

@@ -1438,15 +1441,14 @@ static int __init nvmf_init(void)
	if (!nvmf_default_host)
		return -ENOMEM;

	nvmf_class = class_create("nvme-fabrics");
	if (IS_ERR(nvmf_class)) {
	ret = class_register(&nvmf_class);
	if (ret) {
		pr_err("couldn't register class nvme-fabrics\n");
		ret = PTR_ERR(nvmf_class);
		goto out_free_host;
	}

	nvmf_device =
		device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
		device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
	if (IS_ERR(nvmf_device)) {
		pr_err("couldn't create nvme-fabrics device!\n");
		ret = PTR_ERR(nvmf_device);
@@ -1462,9 +1464,9 @@ static int __init nvmf_init(void)
	return 0;

out_destroy_device:
	device_destroy(nvmf_class, MKDEV(0, 0));
	device_destroy(&nvmf_class, MKDEV(0, 0));
out_destroy_class:
	class_destroy(nvmf_class);
	class_unregister(&nvmf_class);
out_free_host:
	nvmf_host_put(nvmf_default_host);
	return ret;
@@ -1473,8 +1475,8 @@ static int __init nvmf_init(void)
static void __exit nvmf_exit(void)
{
	misc_deregister(&nvmf_misc);
	device_destroy(nvmf_class, MKDEV(0, 0));
	class_destroy(nvmf_class);
	device_destroy(&nvmf_class, MKDEV(0, 0));
	class_unregister(&nvmf_class);
	nvmf_host_put(nvmf_default_host);

	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
+7 −6
Original line number Diff line number Diff line
@@ -516,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work)

int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{
	struct queue_limits lim;
	bool vwc = false;

	mutex_init(&head->lock);
@@ -532,7 +533,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
	    !nvme_is_unique_nsid(ctrl, head) || !multipath)
		return 0;

	head->disk = blk_alloc_disk(NULL, ctrl->numa_node);
	blk_set_stacking_limits(&lim);
	lim.dma_alignment = 3;
	if (head->ids.csi != NVME_CSI_ZNS)
		lim.max_zone_append_sectors = 0;

	head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
	if (IS_ERR(head->disk))
		return PTR_ERR(head->disk);
	head->disk->fops = &nvme_ns_head_ops;
@@ -553,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
	    ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
		blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);

	/* set to a default value of 512 until the disk is validated */
	blk_queue_logical_block_size(head->disk->queue, 512);
	blk_set_stacking_limits(&head->disk->queue->limits);
	blk_queue_dma_alignment(head->disk->queue, 3);

	/* we need to propagate up the VMC settings */
	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
		vwc = true;
+2 −9
Original line number Diff line number Diff line
@@ -1036,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */

int nvme_revalidate_zones(struct nvme_ns *ns);
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
		unsigned int nr_zones, report_zones_cb cb, void *data);
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
		struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
				       struct nvme_command *cmnd,
				       enum nvme_zone_mgmt_action action);
@@ -1051,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
{
	return BLK_STS_NOTSUPP;
}

static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
{
	dev_warn(ns->ctrl->device,
		 "Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
	return -EPROTONOSUPPORT;
}
#endif

static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
+10 −4

File changed.

Preview size limit exceeded, changes collapsed.

Loading