Commit 01d550f0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:
 "Pretty quiet round this time around. This contains:

   - NVMe updates via Keith:
        - nvme fabrics spec updates (Guixin, Max)
        - nvme target udpates (Guixin, Evan)
        - nvme attribute refactoring (Daniel)
        - nvme-fc numa fix (Keith)

   - MD updates via Song:
        - Fix/Cleanup RCU usage from conf->disks[i].rdev (Yu Kuai)
        - Fix raid5 hang issue (Junxiao Bi)
        - Add Yu Kuai as Reviewer of the md subsystem
        - Remove deprecated flavors (Song Liu)
        - raid1 read error check support (Li Nan)
        - Better handle events off-by-1 case (Alex Lyakas)

   - Efficiency improvements for passthrough (Kundan)

   - Support for mapping integrity data directly (Keith)

   - Zoned write fix (Damien)

   - rnbd fixes (Kees, Santosh, Supriti)

   - Default to a sane discard size granularity (Christoph)

   - Make the default max transfer size naming less confusing
     (Christoph)

   - Remove support for deprecated host aware zoned model (Christoph)

   - Misc fixes (me, Li, Matthew, Min, Ming, Randy, liyouhong, Daniel,
     Bart, Christoph)"

* tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux: (78 commits)
  block: Treat sequential write preferred zone type as invalid
  block: remove disk_clear_zoned
  sd: remove the !ZBC && blk_queue_is_zoned case in sd_read_block_characteristics
  drivers/block/xen-blkback/common.h: Fix spelling typo in comment
  blk-cgroup: fix rcu lockdep warning in blkg_lookup()
  blk-cgroup: don't use removal safe list iterators
  block: floor the discard granularity to the physical block size
  mtd_blkdevs: use the default discard granularity
  bcache: use the default discard granularity
  zram: use the default discard granularity
  null_blk: use the default discard granularity
  nbd: use the default discard granularity
  ubd: use the default discard granularity
  block: default the discard granularity to sector size
  bcache: discard_granularity should not be smaller than a sector
  block: remove two comments in bio_split_discard
  block: rename and document BLK_DEF_MAX_SECTORS
  loop: don't abuse BLK_DEF_MAX_SECTORS
  aoe: don't abuse BLK_DEF_MAX_SECTORS
  null_blk: don't cap max_hw_sectors to BLK_DEF_MAX_SECTORS
  ...
parents d05e6266 587371ed
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -20079,6 +20079,7 @@ F: include/linux/property.h
SOFTWARE RAID (Multiple Disks) SUPPORT
M:	Song Liu <song@kernel.org>
R:	Yu Kuai <yukuai3@huawei.com>
L:	linux-raid@vger.kernel.org
S:	Supported
Q:	https://patchwork.kernel.org/project/linux-raid/list/
+0 −1
Original line number Diff line number Diff line
@@ -798,7 +798,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
		ubd_dev->cow.fd = err;
	}
	if (ubd_dev->no_trim == 0) {
		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
	}
+216 −2
Original line number Diff line number Diff line
@@ -69,15 +69,15 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,

	memset(bip, 0, sizeof(*bip));

	if (nr_vecs > inline_vecs) {
	/* always report as many vecs as asked explicitly, not inline vecs */
	bip->bip_max_vcnt = nr_vecs;
	if (nr_vecs > inline_vecs) {
		bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
					  &bip->bip_max_vcnt, gfp_mask);
		if (!bip->bip_vec)
			goto err;
	} else {
		bip->bip_vec = bip->bip_inline_vecs;
		bip->bip_max_vcnt = inline_vecs;
	}

	bip->bip_bio = bio;
@@ -91,6 +91,47 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
}
EXPORT_SYMBOL(bio_integrity_alloc);

static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
				     bool dirty)
{
	int i;

	for (i = 0; i < nr_vecs; i++) {
		if (dirty && !PageCompound(bv[i].bv_page))
			set_page_dirty_lock(bv[i].bv_page);
		unpin_user_page(bv[i].bv_page);
	}
}

static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
{
	unsigned short nr_vecs = bip->bip_max_vcnt - 1;
	struct bio_vec *copy = &bip->bip_vec[1];
	size_t bytes = bip->bip_iter.bi_size;
	struct iov_iter iter;
	int ret;

	iov_iter_bvec(&iter, ITER_DEST, copy, nr_vecs, bytes);
	ret = copy_to_iter(bvec_virt(bip->bip_vec), bytes, &iter);
	WARN_ON_ONCE(ret != bytes);

	bio_integrity_unpin_bvec(copy, nr_vecs, true);
}

static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
{
	bool dirty = bio_data_dir(bip->bip_bio) == READ;

	if (bip->bip_flags & BIP_COPY_USER) {
		if (dirty)
			bio_integrity_uncopy_user(bip);
		kfree(bvec_virt(bip->bip_vec));
		return;
	}

	bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
}

/**
 * bio_integrity_free - Free bio integrity payload
 * @bio:	bio containing bip to be freed
@@ -105,6 +146,8 @@ void bio_integrity_free(struct bio *bio)

	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
		kfree(bvec_virt(bip->bip_vec));
	else if (bip->bip_flags & BIP_INTEGRITY_USER)
		bio_integrity_unmap_user(bip);

	__bio_integrity_free(bs, bip);
	bio->bi_integrity = NULL;
@@ -160,6 +203,177 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);

static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
				   int nr_vecs, unsigned int len,
				   unsigned int direction, u32 seed)
{
	bool write = direction == ITER_SOURCE;
	struct bio_integrity_payload *bip;
	struct iov_iter iter;
	void *buf;
	int ret;

	buf = kmalloc(len, GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

	if (write) {
		iov_iter_bvec(&iter, direction, bvec, nr_vecs, len);
		if (!copy_from_iter_full(buf, len, &iter)) {
			ret = -EFAULT;
			goto free_buf;
		}

		bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
	} else {
		memset(buf, 0, len);

		/*
		 * We need to preserve the original bvec and the number of vecs
		 * in it for completion handling
		 */
		bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs + 1);
	}

	if (IS_ERR(bip)) {
		ret = PTR_ERR(bip);
		goto free_buf;
	}

	if (write)
		bio_integrity_unpin_bvec(bvec, nr_vecs, false);
	else
		memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));

	ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
				     offset_in_page(buf));
	if (ret != len) {
		ret = -ENOMEM;
		goto free_bip;
	}

	bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
	bip->bip_iter.bi_sector = seed;
	return 0;
free_bip:
	bio_integrity_free(bio);
free_buf:
	kfree(buf);
	return ret;
}

static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
				   int nr_vecs, unsigned int len, u32 seed)
{
	struct bio_integrity_payload *bip;

	bip = bio_integrity_alloc(bio, GFP_KERNEL, nr_vecs);
	if (IS_ERR(bip))
		return PTR_ERR(bip);

	memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
	bip->bip_flags |= BIP_INTEGRITY_USER;
	bip->bip_iter.bi_sector = seed;
	bip->bip_iter.bi_size = len;
	return 0;
}

static unsigned int bvec_from_pages(struct bio_vec *bvec, struct page **pages,
				    int nr_vecs, ssize_t bytes, ssize_t offset)
{
	unsigned int nr_bvecs = 0;
	int i, j;

	for (i = 0; i < nr_vecs; i = j) {
		size_t size = min_t(size_t, bytes, PAGE_SIZE - offset);
		struct folio *folio = page_folio(pages[i]);

		bytes -= size;
		for (j = i + 1; j < nr_vecs; j++) {
			size_t next = min_t(size_t, PAGE_SIZE, bytes);

			if (page_folio(pages[j]) != folio ||
			    pages[j] != pages[j - 1] + 1)
				break;
			unpin_user_page(pages[j]);
			size += next;
			bytes -= next;
		}

		bvec_set_page(&bvec[nr_bvecs], pages[i], size, offset);
		offset = 0;
		nr_bvecs++;
	}

	return nr_bvecs;
}

int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
			   u32 seed)
{
	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
	unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
	struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
	struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
	unsigned int direction, nr_bvecs;
	struct iov_iter iter;
	int ret, nr_vecs;
	size_t offset;
	bool copy;

	if (bio_integrity(bio))
		return -EINVAL;
	if (bytes >> SECTOR_SHIFT > queue_max_hw_sectors(q))
		return -E2BIG;

	if (bio_data_dir(bio) == READ)
		direction = ITER_DEST;
	else
		direction = ITER_SOURCE;

	iov_iter_ubuf(&iter, direction, ubuf, bytes);
	nr_vecs = iov_iter_npages(&iter, BIO_MAX_VECS + 1);
	if (nr_vecs > BIO_MAX_VECS)
		return -E2BIG;
	if (nr_vecs > UIO_FASTIOV) {
		bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
		if (!bvec)
			return -ENOMEM;
		pages = NULL;
	}

	copy = !iov_iter_is_aligned(&iter, align, align);
	ret = iov_iter_extract_pages(&iter, &pages, bytes, nr_vecs, 0, &offset);
	if (unlikely(ret < 0))
		goto free_bvec;

	nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset);
	if (pages != stack_pages)
		kvfree(pages);
	if (nr_bvecs > queue_max_integrity_segments(q))
		copy = true;

	if (copy)
		ret = bio_integrity_copy_user(bio, bvec, nr_bvecs, bytes,
					      direction, seed);
	else
		ret = bio_integrity_init_user(bio, bvec, nr_bvecs, bytes, seed);
	if (ret)
		goto release_pages;
	if (bvec != stack_vec)
		kfree(bvec);

	return 0;

release_pages:
	bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
free_bvec:
	if (bvec != stack_vec)
		kfree(bvec);
	return ret;
}
EXPORT_SYMBOL_GPL(bio_integrity_map_user);

/**
 * bio_integrity_process - Process integrity metadata for a bio
 * @bio:	bio to generate/verify integrity metadata for
+29 −24
Original line number Diff line number Diff line
@@ -944,7 +944,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,

	if ((addr1 | mask) != (addr2 | mask))
		return false;
	if (bv->bv_len + len > queue_max_segment_size(q))
	if (len > queue_max_segment_size(q) - bv->bv_len)
		return false;
	return bvec_try_merge_page(bv, page, len, offset, same_page);
}
@@ -966,10 +966,13 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
		struct page *page, unsigned int len, unsigned int offset,
		unsigned int max_sectors, bool *same_page)
{
	unsigned int max_size = max_sectors << SECTOR_SHIFT;

	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return 0;

	if (((bio->bi_iter.bi_size + len) >> SECTOR_SHIFT) > max_sectors)
	len = min3(len, max_size, queue_max_segment_size(q));
	if (len > max_size - bio->bi_iter.bi_size)
		return 0;

	if (bio->bi_vcnt > 0) {
@@ -1145,13 +1148,22 @@ EXPORT_SYMBOL(bio_add_folio);

void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
	struct bvec_iter_all iter_all;
	struct bio_vec *bvec;
	struct folio_iter fi;

	bio_for_each_segment_all(bvec, bio, iter_all) {
		if (mark_dirty && !PageCompound(bvec->bv_page))
			set_page_dirty_lock(bvec->bv_page);
		bio_release_page(bio, bvec->bv_page);
	bio_for_each_folio_all(fi, bio) {
		struct page *page;
		size_t done = 0;

		if (mark_dirty) {
			folio_lock(fi.folio);
			folio_mark_dirty(fi.folio);
			folio_unlock(fi.folio);
		}
		page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
		do {
			bio_release_page(bio, page++);
			done += PAGE_SIZE;
		} while (done < fi.length);
	}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1439,18 +1451,12 @@ EXPORT_SYMBOL(bio_free_pages);
 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
 * for performing direct-IO in BIOs.
 *
 * The problem is that we cannot run set_page_dirty() from interrupt context
 * The problem is that we cannot run folio_mark_dirty() from interrupt context
 * because the required locks are not interrupt-safe.  So what we can do is to
 * mark the pages dirty _before_ performing IO.  And in interrupt context,
 * check that the pages are still dirty.   If so, fine.  If not, redirty them
 * in process context.
 *
 * We special-case compound pages here: normally this means reads into hugetlb
 * pages.  The logic in here doesn't really work right for compound pages
 * because the VM does not uniformly chase down the head page in all cases.
 * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
 * handle them at all.  So we skip compound pages here at an early stage.
 *
 * Note that this code is very hard to test under normal circumstances because
 * direct-io pins the pages with get_user_pages().  This makes
 * is_page_cache_freeable return false, and the VM will not clean the pages.
@@ -1466,12 +1472,12 @@ EXPORT_SYMBOL(bio_free_pages);
 */
void bio_set_pages_dirty(struct bio *bio)
{
	struct bio_vec *bvec;
	struct bvec_iter_all iter_all;
	struct folio_iter fi;

	bio_for_each_segment_all(bvec, bio, iter_all) {
		if (!PageCompound(bvec->bv_page))
			set_page_dirty_lock(bvec->bv_page);
	bio_for_each_folio_all(fi, bio) {
		folio_lock(fi.folio);
		folio_mark_dirty(fi.folio);
		folio_unlock(fi.folio);
	}
}
EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
@@ -1515,12 +1521,11 @@ static void bio_dirty_fn(struct work_struct *work)

void bio_check_pages_dirty(struct bio *bio)
{
	struct bio_vec *bvec;
	struct folio_iter fi;
	unsigned long flags;
	struct bvec_iter_all iter_all;

	bio_for_each_segment_all(bvec, bio, iter_all) {
		if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
	bio_for_each_folio_all(fi, bio) {
		if (!folio_test_dirty(fi.folio))
			goto defer;
	}

+5 −2
Original line number Diff line number Diff line
@@ -575,13 +575,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
static void blkg_destroy_all(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;
	struct blkcg_gq *blkg, *n;
	struct blkcg_gq *blkg;
	int count = BLKG_DESTROY_BATCH_SIZE;
	int i;

restart:
	spin_lock_irq(&q->queue_lock);
	list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
	list_for_each_entry(blkg, &q->blkg_list, q_node) {
		struct blkcg *blkcg = blkg->blkcg;

		if (hlist_unhashed(&blkg->blkcg_node))
@@ -2064,6 +2064,9 @@ void bio_associate_blkg(struct bio *bio)
{
	struct cgroup_subsys_state *css;

	if (blk_op_is_passthrough(bio->bi_opf))
		return;

	rcu_read_lock();

	if (bio->bi_blkg)
Loading