Commit 276f98ef authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-6.14-20250228' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fix plugging for native zone writes

 - Fix segment limit settings for != 4K page size archs

 - Fix for slab names overflowing

* tag 'block-6.14-20250228' of git://git.kernel.dk/linux:
  block: fix 'kmem_cache of name 'bio-108' already exists'
  block: Remove zone write plugs when handling native zone append writes
  block: make segment size limit workable for > 4K PAGE_SIZE
parents 3e5d15dd b654f7a5
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ struct bio_slab {
	struct kmem_cache *slab;
	unsigned int slab_ref;
	unsigned int slab_size;
	char name[8];
	char name[12];
};
static DEFINE_MUTEX(bio_slab_lock);
static DEFINE_XARRAY(bio_slabs);
+1 −1
Original line number Diff line number Diff line
@@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,

		if (nsegs < lim->max_segments &&
		    bytes + bv.bv_len <= max_bytes &&
		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
		    bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
			nsegs++;
			bytes += bv.bv_len;
		} else {
+11 −3
Original line number Diff line number Diff line
@@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim)
{
	unsigned int max_hw_sectors;
	unsigned int logical_block_sectors;
	unsigned long seg_size;
	int err;

	/*
@@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim)
	max_hw_sectors = min_not_zero(lim->max_hw_sectors,
				lim->max_dev_sectors);
	if (lim->max_user_sectors) {
		if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
		if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE)
			return -EINVAL;
		lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
	} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
@@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim)
	 */
	if (!lim->seg_boundary_mask)
		lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
	if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
	if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1))
		return -EINVAL;

	/*
@@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim)
		 */
		if (!lim->max_segment_size)
			lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
		if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
		if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE))
			return -EINVAL;
	}

	/* setup min segment size for building new segment in fast path */
	if (lim->seg_boundary_mask > lim->max_segment_size - 1)
		seg_size = lim->max_segment_size;
	else
		seg_size = lim->seg_boundary_mask + 1;
	lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE);

	/*
	 * We require drivers to at least do logical block aligned I/O, but
	 * historically could not check for that due to the separate calls
+69 −7
Original line number Diff line number Diff line
@@ -410,12 +410,13 @@ static bool disk_insert_zone_wplug(struct gendisk *disk,
		}
	}
	hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
	atomic_inc(&disk->nr_zone_wplugs);
	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);

	return true;
}

static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk,
							 sector_t sector)
{
	unsigned int zno = disk_zone_no(disk, sector);
@@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
	return NULL;
}

static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
							 sector_t sector)
{
	if (!atomic_read(&disk->nr_zone_wplugs))
		return NULL;

	return disk_get_hashed_zone_wplug(disk, sector);
}

static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head)
{
	struct blk_zone_wplug *zwplug =
@@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk,
	zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED;
	spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
	hlist_del_init_rcu(&zwplug->node);
	atomic_dec(&disk->nr_zone_wplugs);
	spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
	disk_put_zone_wplug(zwplug);
}
@@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
{
	struct bio *bio;

	if (bio_list_empty(&zwplug->bio_list))
		return;

	pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n",
			    zwplug->disk->disk_name, zwplug->zone_no);
	while ((bio = bio_list_pop(&zwplug->bio_list)))
		blk_zone_wplug_bio_io_error(zwplug, bio);
}
@@ -1040,6 +1056,47 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
	return true;
}

static void blk_zone_wplug_handle_native_zone_append(struct bio *bio)
{
	struct gendisk *disk = bio->bi_bdev->bd_disk;
	struct blk_zone_wplug *zwplug;
	unsigned long flags;

	/*
	 * We have native support for zone append operations, so we are not
	 * going to handle @bio through plugging. However, we may already have a
	 * zone write plug for the target zone if that zone was previously
	 * partially written using regular writes. In such case, we risk leaving
	 * the plug in the disk hash table if the zone is fully written using
	 * zone append operations. Avoid this by removing the zone write plug.
	 */
	zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
	if (likely(!zwplug))
		return;

	spin_lock_irqsave(&zwplug->lock, flags);

	/*
	 * We are about to remove the zone write plug. But if the user
	 * (mistakenly) has issued regular writes together with native zone
	 * append, we must aborts the writes as otherwise the plugged BIOs would
	 * not be executed by the plug BIO work as disk_get_zone_wplug() will
	 * return NULL after the plug is removed. Aborting the plugged write
	 * BIOs is consistent with the fact that these writes will most likely
	 * fail anyway as there is no ordering guarantees between zone append
	 * operations and regular write operations.
	 */
	if (!bio_list_empty(&zwplug->bio_list)) {
		pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n",
				    disk->disk_name, zwplug->zone_no);
		disk_zone_wplug_abort(zwplug);
	}
	disk_remove_zone_wplug(disk, zwplug);
	spin_unlock_irqrestore(&zwplug->lock, flags);

	disk_put_zone_wplug(zwplug);
}

/**
 * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging
 * @bio: The BIO being submitted
@@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
	 */
	switch (bio_op(bio)) {
	case REQ_OP_ZONE_APPEND:
		if (!bdev_emulates_zone_append(bdev))
		if (!bdev_emulates_zone_append(bdev)) {
			blk_zone_wplug_handle_native_zone_append(bio);
			return false;
		}
		fallthrough;
	case REQ_OP_WRITE:
	case REQ_OP_WRITE_ZEROES:
@@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
{
	unsigned int i;

	atomic_set(&disk->nr_zone_wplugs, 0);
	disk->zone_wplugs_hash_bits =
		min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS);

@@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk)
		}
	}

	WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs));
	kfree(disk->zone_wplugs_hash);
	disk->zone_wplugs_hash = NULL;
	disk->zone_wplugs_hash_bits = 0;
@@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
	}

	/*
	 * We need to track the write pointer of all zones that are not
	 * empty nor full. So make sure we have a zone write plug for
	 * such zone if the device has a zone write plug hash table.
	 * If the device needs zone append emulation, we need to track the
	 * write pointer of all zones that are not empty nor full. So make sure
	 * we have a zone write plug for such zone if the device has a zone
	 * write plug hash table.
	 */
	if (!disk->zone_wplugs_hash)
	if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash)
		return 0;

	disk_zone_wplug_sync_wp_offset(disk, zone);
+7 −2
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
struct elevator_type;

#define	BLK_DEV_MAX_SECTORS	(LLONG_MAX >> 9)
#define	BLK_MIN_SEGMENT_SIZE	4096

/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT		(5 * HZ)
@@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio,
static inline bool bio_may_need_split(struct bio *bio,
		const struct queue_limits *lim)
{
	return lim->chunk_sectors || bio->bi_vcnt != 1 ||
		bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
	if (lim->chunk_sectors)
		return true;
	if (bio->bi_vcnt != 1)
		return true;
	return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset >
		lim->min_segment_size;
}

/**
Loading