Commit f4a1e8e3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.12-20241101' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fixup for a recent blk_rq_map_user_bvec() patch

 - NVMe pull request via Keith:
     - Spec compliant identification fix (Keith)
     - Module parameter to enable backward compatibility on unusual
       namespace formats (Keith)
     - Target double free fix when using keys (Vitaliy)
     - Passthrough command error handling fix (Keith)

* tag 'block-6.12-20241101' of git://git.kernel.dk/linux:
  nvme: re-fix error-handling for io_uring nvme-passthrough
  nvmet-auth: assign dh_key to NULL after kfree_sensitive
  nvme: module parameter to disable pi with offsets
  block: fix queue limits checks in blk_rq_map_user_bvec for real
  nvme: enhance cns version checking
parents f0d3699a d0c6cc6c
Loading
Loading
Loading
Loading
+17 −39
Original line number Diff line number Diff line
@@ -561,55 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
	struct request_queue *q = rq->q;
	size_t nr_iter = iov_iter_count(iter);
	size_t nr_segs = iter->nr_segs;
	struct bio_vec *bvecs, *bvprvp = NULL;
	const struct queue_limits *lim = &q->limits;
	unsigned int nsegs = 0, bytes = 0;
	const struct queue_limits *lim = &rq->q->limits;
	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
	unsigned int nsegs;
	struct bio *bio;
	size_t i;
	int ret;

	if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
		return -EINVAL;
	if (nr_segs > queue_max_segments(q))
	if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
		return -EINVAL;

	/* no iovecs to alloc, as we already have a BVEC iterator */
	/* reuse the bvecs from the iterator instead of allocating new ones */
	bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
	if (bio == NULL)
	if (!bio)
		return -ENOMEM;

	bio_iov_bvec_set(bio, (struct iov_iter *)iter);
	blk_rq_bio_prep(rq, bio, nr_segs);

	/* loop to perform a bunch of sanity checks */
	bvecs = (struct bio_vec *)iter->bvec;
	for (i = 0; i < nr_segs; i++) {
		struct bio_vec *bv = &bvecs[i];

		/*
		 * If the queue doesn't support SG gaps and adding this
		 * offset would create a gap, fallback to copy.
		 */
		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
	/* check that the data layout matches the hardware restrictions */
	ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
	if (ret) {
		/* if we would have to split the bio, copy instead */
		if (ret > 0)
			ret = -EREMOTEIO;
		blk_mq_map_bio_put(bio);
			return -EREMOTEIO;
		return ret;
	}
		/* check full condition */
		if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
			goto put_bio;
		if (bytes + bv->bv_len > nr_iter)
			break;

		nsegs++;
		bytes += bv->bv_len;
		bvprvp = bv;
	}
	blk_rq_bio_prep(rq, bio, nsegs);
	return 0;
put_bio:
	blk_mq_map_bio_put(bio);
	return -EINVAL;
}

/**
+42 −14
Original line number Diff line number Diff line
@@ -91,6 +91,17 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644);
MODULE_PARM_DESC(apst_secondary_latency_tol_us,
	"secondary APST latency tolerance in us");

/*
 * Older kernels didn't enable protection information if it was at an offset.
 * Newer kernels do, so it breaks reads on the upgrade if such formats were
 * used in prior kernels since the metadata written did not contain a valid
 * checksum.
 */
static bool disable_pi_offsets = false;
module_param(disable_pi_offsets, bool, 0444);
MODULE_PARM_DESC(disable_pi_offsets,
	"disable protection information if it has an offset");

/*
 * nvme_wq - hosts nvme related works that are not reset or delete
 * nvme_reset_wq - hosts nvme reset works
@@ -1390,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
	nvme_start_keep_alive(ctrl);
}

static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
{
	/*
 * In NVMe 1.0 the CNS field was just a binary controller or namespace
 * flag, thus sending any new CNS opcodes has a big chance of not working.
 * Qemu unfortunately had that bug after reporting a 1.1 version compliance
 * (but not for any later version).
	 * The CNS field occupies a full byte starting with NVMe 1.2
	 */
static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
{
	if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
		return ctrl->vs < NVME_VS(1, 2, 0);
	return ctrl->vs < NVME_VS(1, 1, 0);
	if (ctrl->vs >= NVME_VS(1, 2, 0))
		return true;

	/*
	 * NVMe 1.1 expanded the CNS value to two bits, which means values
	 * larger than that could get truncated and treated as an incorrect
	 * value.
	 *
	 * Qemu implemented 1.0 behavior for controllers claiming 1.1
	 * compliance, so they need to be quirked here.
	 */
	if (ctrl->vs >= NVME_VS(1, 1, 0) &&
	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS))
		return cns <= 3;

	/*
	 * NVMe 1.0 used a single bit for the CNS value.
	 */
	return cns <= 1;
}

static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
@@ -1913,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,

	if (head->pi_size && head->ms >= head->pi_size)
		head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
	if (!(id->dps & NVME_NS_DPS_PI_FIRST))
	if (!(id->dps & NVME_NS_DPS_PI_FIRST)) {
		if (disable_pi_offsets)
			head->pi_type = 0;
		else
			info->pi_offset = head->ms - head->pi_size;
	}

	if (ctrl->ops->flags & NVME_F_FABRICS) {
		/*
@@ -3104,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
		ctrl->max_zeroes_sectors = 0;

	if (ctrl->subsys->subtype != NVME_NQN_NVME ||
	    nvme_ctrl_limited_cns(ctrl) ||
	    !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
	    test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
		return 0;

@@ -4200,7 +4228,7 @@ static void nvme_scan_work(struct work_struct *work)
	}

	mutex_lock(&ctrl->scan_lock);
	if (nvme_ctrl_limited_cns(ctrl)) {
	if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {
		nvme_scan_ns_sequential(ctrl);
	} else {
		/*
+5 −2
Original line number Diff line number Diff line
@@ -421,10 +421,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
	struct io_uring_cmd *ioucmd = req->end_io_data;
	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);

	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
	if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
		pdu->status = -EINTR;
	else
	} else {
		pdu->status = nvme_req(req)->status;
		if (!pdu->status)
			pdu->status = blk_status_to_errno(err);
	}
	pdu->result = le64_to_cpu(nvme_req(req)->result.u64);

	/*
+1 −0
Original line number Diff line number Diff line
@@ -115,6 +115,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
			pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
				 __func__, ctrl->cntlid, ret);
			kfree_sensitive(ctrl->dh_key);
			ctrl->dh_key = NULL;
			return ret;
		}
		ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);