Commit 0f9a7517 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.10-20240530' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe fixes via Keith:
      - Removing unused fields (Kanchan)
      - Large folio offsets support (Kundan)
      - Multipath NUMA node initialiazation fix (Nilay)
      - Multipath IO stats accounting fixes (Keith)
      - Circular lockdep fix (Keith)
      - Target race condition fix (Sagi)
      - Target memory leak fix (Sagi)

 - bcache fixes

 - null_blk fixes (Damien)

 - Fix regression in io.max due to throttle low removal (Waiman)

 - DM limit table fixes (Christoph)

 - SCSI and block limit fixes (Christoph)

 - zone fixes (Damien)

 - Misc fixes (Christoph, Hannes, hexue)

* tag 'block-6.10-20240530' of git://git.kernel.dk/linux: (25 commits)
  blk-throttle: Fix incorrect display of io.max
  block: Fix zone write plugging handling of devices with a runt zone
  block: Fix validation of zoned device with a runt zone
  null_blk: Do not allow runt zone with zone capacity smaller then zone size
  nvmet: fix a possible leak when destroy a ctrl during qp establishment
  nvme: use srcu for iterating namespace list
  bcache: code cleanup in __bch_bucket_alloc_set()
  bcache: call force_wake_up_gc() if necessary in check_should_bypass()
  bcache: allow allocator to invalidate bucket in gc
  block: check for max_hw_sectors underflow
  block: stack max_user_sectors
  sd: also set max_user_sectors when setting max_sectors
  null_blk: Print correct max open zones limit in null_init_zoned_dev()
  block: delete redundant function declaration
  null_blk: Fix return value of nullb_device_power_store()
  dm: make dm_set_zones_restrictions work on the queue limits
  dm: remove dm_check_zoned
  dm: move setting zoned_enabled to dm_table_set_restrictions
  block: remove blk_queue_max_integrity_segments
  nvme: adjust multiples of NVME_CTRL_PAGE_SIZE in offset
  ...
parents 6d541d66 0a751df4
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -104,6 +104,7 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
static int blk_validate_limits(struct queue_limits *lim)
{
	unsigned int max_hw_sectors;
	unsigned int logical_block_sectors;

	/*
	 * Unless otherwise specified, default to 512 byte logical blocks and a
@@ -134,8 +135,11 @@ static int blk_validate_limits(struct queue_limits *lim)
		lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
	if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
		return -EINVAL;
	logical_block_sectors = lim->logical_block_size >> SECTOR_SHIFT;
	if (WARN_ON_ONCE(logical_block_sectors > lim->max_hw_sectors))
		return -EINVAL;
	lim->max_hw_sectors = round_down(lim->max_hw_sectors,
			lim->logical_block_size >> SECTOR_SHIFT);
			logical_block_sectors);

	/*
	 * The actual max_sectors value is a complex beast and also takes the
@@ -153,7 +157,7 @@ static int blk_validate_limits(struct queue_limits *lim)
		lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
	}
	lim->max_sectors = round_down(lim->max_sectors,
			lim->logical_block_size >> SECTOR_SHIFT);
			logical_block_sectors);

	/*
	 * Random default for the maximum number of segments.  Driver should not
@@ -611,6 +615,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
	unsigned int top, bottom, alignment, ret = 0;

	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
	t->max_user_sectors = min_not_zero(t->max_user_sectors,
			b->max_user_sectors);
	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
	t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
+0 −1
Original line number Diff line number Diff line
@@ -64,7 +64,6 @@ struct blk_stat_callback {

struct blk_queue_stats *blk_alloc_queue_stats(void);
void blk_free_queue_stats(struct blk_queue_stats *);
bool blk_stats_alloc_enable(struct request_queue *q);

void blk_stat_add(struct request *rq, u64 now);

+12 −12
Original line number Diff line number Diff line
@@ -1399,32 +1399,32 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
	bps_dft = U64_MAX;
	iops_dft = UINT_MAX;

	if (tg->bps_conf[READ] == bps_dft &&
	    tg->bps_conf[WRITE] == bps_dft &&
	    tg->iops_conf[READ] == iops_dft &&
	    tg->iops_conf[WRITE] == iops_dft)
	if (tg->bps[READ] == bps_dft &&
	    tg->bps[WRITE] == bps_dft &&
	    tg->iops[READ] == iops_dft &&
	    tg->iops[WRITE] == iops_dft)
		return 0;

	seq_printf(sf, "%s", dname);
	if (tg->bps_conf[READ] == U64_MAX)
	if (tg->bps[READ] == U64_MAX)
		seq_printf(sf, " rbps=max");
	else
		seq_printf(sf, " rbps=%llu", tg->bps_conf[READ]);
		seq_printf(sf, " rbps=%llu", tg->bps[READ]);

	if (tg->bps_conf[WRITE] == U64_MAX)
	if (tg->bps[WRITE] == U64_MAX)
		seq_printf(sf, " wbps=max");
	else
		seq_printf(sf, " wbps=%llu", tg->bps_conf[WRITE]);
		seq_printf(sf, " wbps=%llu", tg->bps[WRITE]);

	if (tg->iops_conf[READ] == UINT_MAX)
	if (tg->iops[READ] == UINT_MAX)
		seq_printf(sf, " riops=max");
	else
		seq_printf(sf, " riops=%u", tg->iops_conf[READ]);
		seq_printf(sf, " riops=%u", tg->iops[READ]);

	if (tg->iops_conf[WRITE] == UINT_MAX)
	if (tg->iops[WRITE] == UINT_MAX)
		seq_printf(sf, " wiops=max");
	else
		seq_printf(sf, " wiops=%u", tg->iops_conf[WRITE]);
		seq_printf(sf, " wiops=%u", tg->iops[WRITE]);

	seq_printf(sf, "\n");
	return 0;
+2 −6
Original line number Diff line number Diff line
@@ -95,15 +95,11 @@ struct throtl_grp {
	bool has_rules_bps[2];
	bool has_rules_iops[2];

	/* internally used bytes per second rate limits */
	/* bytes per second rate limits */
	uint64_t bps[2];
	/* user configured bps limits */
	uint64_t bps_conf[2];

	/* internally used IOPS limits */
	/* IOPS limits */
	unsigned int iops[2];
	/* user configured IOPS limits */
	unsigned int iops_conf[2];

	/* Number of bytes dispatched in current slice */
	uint64_t bytes_disp[2];
+36 −11
Original line number Diff line number Diff line
@@ -450,6 +450,25 @@ static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector)
	return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
}

static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
{
	return zone->start + zone->len >= get_capacity(disk);
}

static bool disk_zone_is_full(struct gendisk *disk,
			      unsigned int zno, unsigned int offset_in_zone)
{
	if (zno < disk->nr_zones - 1)
		return offset_in_zone >= disk->zone_capacity;
	return offset_in_zone >= disk->last_zone_capacity;
}

static bool disk_zone_wplug_is_full(struct gendisk *disk,
				    struct blk_zone_wplug *zwplug)
{
	return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
}

static bool disk_insert_zone_wplug(struct gendisk *disk,
				   struct blk_zone_wplug *zwplug)
{
@@ -543,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
		return false;

	/* We can remove zone write plugs for zones that are empty or full. */
	return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
	return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
}

static void disk_remove_zone_wplug(struct gendisk *disk,
@@ -664,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
					    struct blk_zone_wplug *zwplug)
{
	unsigned int zone_capacity = disk->zone_capacity;
	unsigned int wp_offset = zwplug->wp_offset;
	struct bio_list bl = BIO_EMPTY_LIST;
	struct bio *bio;

	while ((bio = bio_list_pop(&zwplug->bio_list))) {
		if (wp_offset >= zone_capacity ||
		if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
		    (bio_op(bio) != REQ_OP_ZONE_APPEND &&
		     bio_offset_from_zone_start(bio) != wp_offset)) {
			blk_zone_wplug_bio_io_error(zwplug, bio);
@@ -909,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req)
	sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
	struct request_queue *q = req->q;
	struct gendisk *disk = q->disk;
	unsigned int zone_capacity = disk->zone_capacity;
	struct blk_zone_wplug *zwplug =
		disk_get_zone_wplug(disk, blk_rq_pos(req));
	unsigned long flags;
@@ -933,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req)
	 * into the back of the request.
	 */
	spin_lock_irqsave(&zwplug->lock, flags);
	while (zwplug->wp_offset < zone_capacity) {
	while (!disk_zone_wplug_is_full(disk, zwplug)) {
		bio = bio_list_peek(&zwplug->bio_list);
		if (!bio)
			break;
@@ -979,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
	 * We know such BIO will fail, and that would potentially overflow our
	 * write pointer offset beyond the end of the zone.
	 */
	if (zwplug->wp_offset >= disk->zone_capacity)
	if (disk_zone_wplug_is_full(disk, zwplug))
		goto err;

	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
@@ -1556,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk)
	kfree(disk->conv_zones_bitmap);
	disk->conv_zones_bitmap = NULL;
	disk->zone_capacity = 0;
	disk->last_zone_capacity = 0;
	disk->nr_zones = 0;
}

@@ -1600,6 +1618,7 @@ struct blk_revalidate_zone_args {
	unsigned long	*conv_zones_bitmap;
	unsigned int	nr_zones;
	unsigned int	zone_capacity;
	unsigned int	last_zone_capacity;
	sector_t	sector;
};

@@ -1617,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk,

	disk->nr_zones = args->nr_zones;
	disk->zone_capacity = args->zone_capacity;
	disk->last_zone_capacity = args->last_zone_capacity;
	swap(disk->conv_zones_bitmap, args->conv_zones_bitmap);
	if (disk->conv_zones_bitmap)
		nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap,
@@ -1668,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
		return -ENODEV;
	}

	if (disk_zone_is_last(disk, zone))
		args->last_zone_capacity = zone->capacity;

	if (!disk_need_zone_resources(disk))
		return 0;

@@ -1693,11 +1716,14 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,

	/*
	 * Remember the capacity of the first sequential zone and check
	 * if it is constant for all zones.
	 * if it is constant for all zones, ignoring the last zone as it can be
	 * smaller.
	 */
	if (!args->zone_capacity)
		args->zone_capacity = zone->capacity;
	if (zone->capacity != args->zone_capacity) {
	if (disk_zone_is_last(disk, zone)) {
		args->last_zone_capacity = zone->capacity;
	} else if (zone->capacity != args->zone_capacity) {
		pr_warn("%s: Invalid variable zone capacity\n",
			disk->disk_name);
		return -ENODEV;
@@ -1732,7 +1758,6 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
{
	struct blk_revalidate_zone_args *args = data;
	struct gendisk *disk = args->disk;
	sector_t capacity = get_capacity(disk);
	sector_t zone_sectors = disk->queue->limits.chunk_sectors;
	int ret;

@@ -1743,7 +1768,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
		return -ENODEV;
	}

	if (zone->start >= capacity || !zone->len) {
	if (zone->start >= get_capacity(disk) || !zone->len) {
		pr_warn("%s: Invalid zone start %llu, length %llu\n",
			disk->disk_name, zone->start, zone->len);
		return -ENODEV;
@@ -1753,7 +1778,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
	 * All zones must have the same size, with the exception on an eventual
	 * smaller last zone.
	 */
	if (zone->start + zone->len < capacity) {
	if (!disk_zone_is_last(disk, zone)) {
		if (zone->len != zone_sectors) {
			pr_warn("%s: Invalid zoned device with non constant zone size\n",
				disk->disk_name);
Loading