Commit cfd47302 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.13-20242901' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:

 - NVMe pull request via Keith:
      - Use correct srcu list traversal (Breno)
      - Scatter-gather support for metadata (Keith)
      - Fabrics shutdown race condition fix (Nilay)
      - Persistent reservations updates (Guixin)

 - Add the required bits for MD atomic write support for raid0/1/10

 - Correct return value for unknown opcode in ublk

 - Fix deadlock with zone revalidation

 - Fix for the io priority request vs bio cleanups

 - Use the correct unsigned int type for various limit helpers

 - Fix for a race in loop

 - Cleanup blk_rq_prep_clone() to prevent uninit-value warning and make
   it easier for actual humans to read

 - Fix potential UAF when iterating tags

 - A few fixes for bfq-iosched UAF issues

 - Fix for brd discard not decrementing the allocated page count

 - Various little fixes and cleanups

* tag 'block-6.13-20242901' of git://git.kernel.dk/linux: (36 commits)
  brd: decrease the number of allocated pages which discarded
  block, bfq: fix bfqq uaf in bfq_limit_depth()
  block: Don't allow an atomic write be truncated in blkdev_write_iter()
  mq-deadline: don't call req_get_ioprio from the I/O completion handler
  block: Prevent potential deadlock in blk_revalidate_disk_zones()
  block: Remove extra part pointer NULLify in blk_rq_init()
  nvme: tuning pr code by using defined structs and macros
  nvme: introduce change ptpl and iekey definition
  block: return bool from get_disk_ro and bdev_read_only
  block: remove a duplicate definition for bdev_read_only
  block: return bool from blk_rq_aligned
  block: return unsigned int from blk_lim_dma_alignment_and_pad
  block: return unsigned int from queue_dma_alignment
  block: return unsigned int from bdev_io_opt
  block: req->bio is always set in the merge code
  block: don't bother checking the data direction for merges
  block: blk-mq: fix uninit-value in blk_rq_prep_clone and refactor
  Revert "block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator()"
  md/raid10: Atomic write support
  md/raid1: Atomic write support
  ...
parents dd54fcce 82734209
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -736,6 +736,7 @@ static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
		 */
		bfq_put_cooperator(sync_bfqq);
		bic_set_bfqq(bic, NULL, true, act_idx);
		bfq_release_process_ref(bfqd, sync_bfqq);
	}
}

+28 −15
Original line number Diff line number Diff line
@@ -582,23 +582,31 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
#define BFQ_LIMIT_INLINE_DEPTH 16

#ifdef CONFIG_BFQ_GROUP_IOSCHED
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
static bool bfqq_request_over_limit(struct bfq_data *bfqd,
				    struct bfq_io_cq *bic, blk_opf_t opf,
				    unsigned int act_idx, int limit)
{
	struct bfq_data *bfqd = bfqq->bfqd;
	struct bfq_entity *entity = &bfqq->entity;
	struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
	struct bfq_entity **entities = inline_entities;
	int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
	int class_idx = bfqq->ioprio_class - 1;
	int alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
	struct bfq_sched_data *sched_data;
	struct bfq_entity *entity;
	struct bfq_queue *bfqq;
	unsigned long wsum;
	bool ret = false;

	if (!entity->on_st_or_in_serv)
		return false;
	int depth;
	int level;

retry:
	spin_lock_irq(&bfqd->lock);
	bfqq = bic_to_bfqq(bic, op_is_sync(opf), act_idx);
	if (!bfqq)
		goto out;

	entity = &bfqq->entity;
	if (!entity->on_st_or_in_serv)
		goto out;

	/* +1 for bfqq entity, root cgroup not included */
	depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
	if (depth > alloc_depth) {
@@ -643,7 +651,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
			 * class.
			 */
			wsum = 0;
			for (i = 0; i <= class_idx; i++) {
			for (i = 0; i <= bfqq->ioprio_class - 1; i++) {
				wsum = wsum * IOPRIO_BE_NR +
					sched_data->service_tree[i].wsum;
			}
@@ -666,7 +674,9 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
	return ret;
}
#else
static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
static bool bfqq_request_over_limit(struct bfq_data *bfqd,
				    struct bfq_io_cq *bic, blk_opf_t opf,
				    unsigned int act_idx, int limit)
{
	return false;
}
@@ -704,8 +714,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
	}

	for (act_idx = 0; bic && act_idx < bfqd->num_actuators; act_idx++) {
		struct bfq_queue *bfqq =
			bic_to_bfqq(bic, op_is_sync(opf), act_idx);
		/* Fast path to check if bfqq is already allocated. */
		if (!bic_to_bfqq(bic, op_is_sync(opf), act_idx))
			continue;

		/*
		 * Does queue (or any parent entity) exceed number of
@@ -713,7 +724,7 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
		 * limit depth so that it cannot consume more
		 * available requests and thus starve other entities.
		 */
		if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
		if (bfqq_request_over_limit(bfqd, bic, opf, act_idx, limit)) {
			depth = 1;
			break;
		}
@@ -5434,8 +5445,6 @@ void bfq_put_cooperator(struct bfq_queue *bfqq)
		bfq_put_queue(__bfqq);
		__bfqq = next;
	}

	bfq_release_process_ref(bfqq->bfqd, bfqq);
}

static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
@@ -5448,6 +5457,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
	bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);

	bfq_put_cooperator(bfqq);

	bfq_release_process_ref(bfqd, bfqq);
}

static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
@@ -6734,6 +6745,8 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
	bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);

	bfq_put_cooperator(bfqq);

	bfq_release_process_ref(bfqq->bfqd, bfqq);
	return NULL;
}

+7 −28
Original line number Diff line number Diff line
@@ -864,17 +864,10 @@ static struct request *attempt_merge(struct request_queue *q,
	if (req_op(req) != req_op(next))
		return NULL;

	if (rq_data_dir(req) != rq_data_dir(next))
		return NULL;

	if (req->bio && next->bio) {
		/* Don't merge requests with different write hints. */
	if (req->bio->bi_write_hint != next->bio->bi_write_hint)
		return NULL;
	if (req->bio->bi_ioprio != next->bio->bi_ioprio)
		return NULL;
	}

	if (!blk_atomic_write_mergeable_rqs(req, next))
		return NULL;

@@ -986,30 +979,16 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
	if (req_op(rq) != bio_op(bio))
		return false;

	/* different data direction or already started, don't merge */
	if (bio_data_dir(bio) != rq_data_dir(rq))
		return false;

	/* don't merge across cgroup boundaries */
	if (!blk_cgroup_mergeable(rq, bio))
		return false;

	/* only merge integrity protected bio into ditto rq */
	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
		return false;

	/* Only merge if the crypt contexts are compatible */
	if (!bio_crypt_rq_ctx_compatible(rq, bio))
		return false;

	if (rq->bio) {
		/* Don't merge requests with different write hints. */
	if (rq->bio->bi_write_hint != bio->bi_write_hint)
		return false;
	if (rq->bio->bi_ioprio != bio->bi_ioprio)
		return false;
	}

	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
		return false;

+6 −8
Original line number Diff line number Diff line
@@ -388,7 +388,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
	rq->tag = BLK_MQ_NO_TAG;
	rq->internal_tag = BLK_MQ_NO_TAG;
	rq->start_time_ns = blk_time_get_ns();
	rq->part = NULL;
	blk_crypto_rq_set_defaults(rq);
}
EXPORT_SYMBOL(blk_rq_init);
@@ -3273,19 +3272,21 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
		      int (*bio_ctr)(struct bio *, struct bio *, void *),
		      void *data)
{
	struct bio *bio, *bio_src;
	struct bio *bio_src;

	if (!bs)
		bs = &fs_bio_set;

	__rq_for_each_bio(bio_src, rq_src) {
		bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
				      bs);
		struct bio *bio	 = bio_alloc_clone(rq->q->disk->part0, bio_src,
					gfp_mask, bs);
		if (!bio)
			goto free_and_out;

		if (bio_ctr && bio_ctr(bio, bio_src, data))
		if (bio_ctr && bio_ctr(bio, bio_src, data)) {
			bio_put(bio);
			goto free_and_out;
		}

		if (rq->bio) {
			rq->biotail->bi_next = bio;
@@ -3293,7 +3294,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
		} else {
			rq->bio = rq->biotail = bio;
		}
		bio = NULL;
	}

	/* Copy attributes of the original request to the clone request. */
@@ -3311,8 +3311,6 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
	return 0;

free_and_out:
	if (bio)
		bio_put(bio);
	blk_rq_unprep_clone(rq);

	return -ENOMEM;
+139 −2
Original line number Diff line number Diff line
@@ -178,9 +178,26 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
	if (!lim->atomic_write_hw_max)
		goto unsupported;

	if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_min)))
		goto unsupported;

	if (WARN_ON_ONCE(!is_power_of_2(lim->atomic_write_hw_unit_max)))
		goto unsupported;

	if (WARN_ON_ONCE(lim->atomic_write_hw_unit_min >
			 lim->atomic_write_hw_unit_max))
		goto unsupported;

	if (WARN_ON_ONCE(lim->atomic_write_hw_unit_max >
			 lim->atomic_write_hw_max))
		goto unsupported;

	boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;

	if (boundary_sectors) {
		if (WARN_ON_ONCE(lim->atomic_write_hw_max >
				 lim->atomic_write_hw_boundary))
			goto unsupported;
		/*
		 * A feature of boundary support is that it disallows bios to
		 * be merged which would result in a merged request which
@@ -248,6 +265,13 @@ int blk_validate_limits(struct queue_limits *lim)
	if (lim->io_min < lim->physical_block_size)
		lim->io_min = lim->physical_block_size;

	/*
	 * The optimal I/O size may not be aligned to physical block size
	 * (because it may be limited by dma engines which have no clue about
	 * block size of the disks attached to them), so we round it down here.
	 */
	lim->io_opt = round_down(lim->io_opt, lim->physical_block_size);

	/*
	 * max_hw_sectors has a somewhat weird default for historical reason,
	 * but driver really should set their own instead of relying on this
@@ -458,8 +482,6 @@ static unsigned int queue_limit_discard_alignment(
	/* Why are these in bytes, not sectors? */
	alignment = lim->discard_alignment >> SECTOR_SHIFT;
	granularity = lim->discard_granularity >> SECTOR_SHIFT;
	if (!granularity)
		return 0;

	/* Offset of the partition start in 'granularity' sectors */
	offset = sector_div(sector, granularity);
@@ -479,6 +501,119 @@ static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lb
	return sectors;
}

/* Check if second and later bottom devices are compliant */
static bool blk_stack_atomic_writes_tail(struct queue_limits *t,
				struct queue_limits *b)
{
	/* We're not going to support different boundary sizes.. yet */
	if (t->atomic_write_hw_boundary != b->atomic_write_hw_boundary)
		return false;

	/* Can't support this */
	if (t->atomic_write_hw_unit_min > b->atomic_write_hw_unit_max)
		return false;

	/* Or this */
	if (t->atomic_write_hw_unit_max < b->atomic_write_hw_unit_min)
		return false;

	t->atomic_write_hw_max = min(t->atomic_write_hw_max,
				b->atomic_write_hw_max);
	t->atomic_write_hw_unit_min = max(t->atomic_write_hw_unit_min,
				b->atomic_write_hw_unit_min);
	t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max,
				b->atomic_write_hw_unit_max);
	return true;
}

/* Check for valid boundary of first bottom device */
static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t,
				struct queue_limits *b)
{
	/*
	 * Ensure atomic write boundary is aligned with chunk sectors. Stacked
	 * devices store chunk sectors in t->io_min.
	 */
	if (b->atomic_write_hw_boundary > t->io_min &&
	    b->atomic_write_hw_boundary % t->io_min)
		return false;
	if (t->io_min > b->atomic_write_hw_boundary &&
	    t->io_min % b->atomic_write_hw_boundary)
		return false;

	t->atomic_write_hw_boundary = b->atomic_write_hw_boundary;
	return true;
}


/* Check stacking of first bottom device */
static bool blk_stack_atomic_writes_head(struct queue_limits *t,
				struct queue_limits *b)
{
	if (b->atomic_write_hw_boundary &&
	    !blk_stack_atomic_writes_boundary_head(t, b))
		return false;

	if (t->io_min <= SECTOR_SIZE) {
		/* No chunk sectors, so use bottom device values directly */
		t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
		t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
		t->atomic_write_hw_max = b->atomic_write_hw_max;
		return true;
	}

	/*
	 * Find values for limits which work for chunk size.
	 * b->atomic_write_hw_unit_{min, max} may not be aligned with chunk
	 * size (t->io_min), as chunk size is not restricted to a power-of-2.
	 * So we need to find highest power-of-2 which works for the chunk
	 * size.
	 * As an example scenario, we could have b->unit_max = 16K and
	 * t->io_min = 24K. For this case, reduce t->unit_max to a value
	 * aligned with both limits, i.e. 8K in this example.
	 */
	t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
	while (t->io_min % t->atomic_write_hw_unit_max)
		t->atomic_write_hw_unit_max /= 2;

	t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min,
					  t->atomic_write_hw_unit_max);
	t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min);

	return true;
}

static void blk_stack_atomic_writes_limits(struct queue_limits *t,
				struct queue_limits *b)
{
	if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED))
		goto unsupported;

	if (!b->atomic_write_unit_min)
		goto unsupported;

	/*
	 * If atomic_write_hw_max is set, we have already stacked 1x bottom
	 * device, so check for compliance.
	 */
	if (t->atomic_write_hw_max) {
		if (!blk_stack_atomic_writes_tail(t, b))
			goto unsupported;
		return;
	}

	if (!blk_stack_atomic_writes_head(t, b))
		goto unsupported;
	return;

unsupported:
	t->atomic_write_hw_max = 0;
	t->atomic_write_hw_unit_max = 0;
	t->atomic_write_hw_unit_min = 0;
	t->atomic_write_hw_boundary = 0;
	t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED;
}

/**
 * blk_stack_limits - adjust queue_limits for stacked devices
 * @t:	the stacking driver limits (top device)
@@ -639,6 +774,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
		t->zone_write_granularity = 0;
		t->max_zone_append_sectors = 0;
	}
	blk_stack_atomic_writes_limits(t, b);

	return ret;
}
EXPORT_SYMBOL(blk_stack_limits);
Loading