Commit ee0c8a9b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
     - Invalid namespace identification error handling (Marizio Ewan,
       Keith)
     - Fabrics keep-alive tuning (Mark)

 - Fix for a bad error check regression in bcache (Markus)

 - Fix for a performance regression with O_DIRECT (Ming)

 - Fix for a flush related deadlock (Ming)

 - Make the read-only warn on per-partition (Yu)

* tag 'block-6.7-2023-12-01' of git://git.kernel.dk/linux:
  nvme-core: check for too small lba shift
  blk-mq: don't count completed flush data request as inflight in case of quiesce
  block: Document the role of the two attribute groups
  block: warn once for each partition in bio_check_ro()
  block: move .bd_inode into 1st cacheline of block_device
  nvme: check for valid nvme_identify_ns() before using it
  nvme-core: fix a memory leak in nvme_ns_info_from_identify()
  nvme: fine-tune sending of first keep-alive
  bcache: revert replacing IS_ERR_OR_NULL with IS_ERR
parents abd792f3 8ad3ac92
Loading
Loading
Loading
Loading
+11 −3
Original line number Diff line number Diff line
@@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio)
	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
			return;
		pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",

		if (bio->bi_bdev->bd_ro_warned)
			return;

		bio->bi_bdev->bd_ro_warned = true;
		/*
		 * Use ioctl to set underlying disk of raid/dm to read-only
		 * will trigger this.
		 */
		pr_warn("Trying to write to read-only block-device %pg\n",
			bio->bi_bdev);
		/* Older lvm-tools actually trigger this */
	}
}

+13 −1
Original line number Diff line number Diff line
@@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);

static bool blk_is_flush_data_rq(struct request *rq)
{
	return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq);
}

static bool blk_mq_rq_inflight(struct request *rq, void *priv)
{
	/*
	 * If we find a request that isn't idle we know the queue is busy
	 * as it's checked in the iter.
	 * Return false to stop the iteration.
	 */
	if (blk_mq_request_started(rq)) {
	 *
	 * In case of queue quiesce, if one flush data request is completed,
	 * don't count it as inflight given the flush sequence is suspended,
	 * and the original flush data request is invisible to driver, just
	 * like other pending requests because of quiesce
	 */
	if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) &&
				blk_is_flush_data_rq(rq) &&
				blk_mq_request_completed(rq))) {
		bool *busy = priv;

		*busy = true;
+2 −0
Original line number Diff line number Diff line
@@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
#endif

/* Common attributes for bio-based and request-based queues. */
static struct attribute *queue_attrs[] = {
	&queue_ra_entry.attr,
	&queue_max_hw_sectors_entry.attr,
@@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = {
	NULL,
};

/* Request-based queue attributes that are not relevant for bio-based queues. */
static struct attribute *blk_mq_queue_attrs[] = {
	&queue_requests_entry.attr,
	&elv_iosched_entry.attr,
+1 −1
Original line number Diff line number Diff line
@@ -1522,7 +1522,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
	bch_keylist_free(&keylist);

	for (i = 0; i < nodes; i++)
		if (!IS_ERR(new_nodes[i])) {
		if (!IS_ERR_OR_NULL(new_nodes[i])) {
			btree_node_free(new_nodes[i]);
			rw_unlock(true, new_nodes[i]);
		}
+28 −6
Original line number Diff line number Diff line
@@ -1192,8 +1192,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)

static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
	queue_delayed_work(nvme_wq, &ctrl->ka_work,
			   nvme_keep_alive_work_period(ctrl));
	unsigned long now = jiffies;
	unsigned long delay = nvme_keep_alive_work_period(ctrl);
	unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay;

	if (time_after(now, ka_next_check_tm))
		delay = 0;
	else
		delay = ka_next_check_tm - now;

	queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
}

static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1479,7 +1487,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
	if (id->ncap == 0) {
		/* namespace not allocated or attached */
		info->is_removed = true;
		return -ENODEV;
		ret = -ENODEV;
		goto error;
	}

	info->anagrpid = id->anagrpid;
@@ -1497,8 +1506,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
		    !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
			memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
	}

error:
	kfree(id);
	return 0;
	return ret;
}

static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
@@ -1890,9 +1901,10 @@ static void nvme_update_disk_info(struct gendisk *disk,

	/*
	 * The block layer can't support LBA sizes larger than the page size
	 * yet, so catch this early and don't allow block I/O.
	 * or smaller than a sector size yet, so catch this early and don't
	 * allow block I/O.
	 */
	if (ns->lba_shift > PAGE_SHIFT) {
	if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) {
		capacity = 0;
		bs = (1 << 9);
	}
@@ -2029,6 +2041,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
	if (ret)
		return ret;

	if (id->ncap == 0) {
		/* namespace not allocated or attached */
		info->is_removed = true;
		ret = -ENODEV;
		goto error;
	}

	blk_mq_freeze_queue(ns->disk->queue);
	lbaf = nvme_lbaf_index(id->flbas);
	ns->lba_shift = id->lbaf[lbaf].ds;
@@ -2090,6 +2109,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
		set_bit(NVME_NS_READY, &ns->flags);
		ret = 0;
	}

error:
	kfree(id);
	return ret;
}
@@ -4471,6 +4492,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
	ctrl->ka_last_check_time = jiffies;

	BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
			PAGE_SIZE);
Loading