Commit d458a240 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull block fixes from Jens Axboe:

 - NVMe merge request via Keith:
     - Fix memory leak on a passthrough integrity mapping failure (Keith)
     - Hide secrets behind debug option (Hannes)
     - Fix pci use-after-free for host memory buffer (Chia-Lin Kao)
     - Fix tcp taregt use-after-free for data digest (Sagi)
     - Revert a mistaken quirk (Alan Cui)
     - Fix uevent and controller state race condition (Maurizio)
     - Fix apple submission queue re-initialization (Nick Chan)

 - Three fixes for blk-integrity, fixing an issue with the user data
   mapping and two problems with recomputing number of segments

 - Two fixes for the iov_iter bounce buffering

 - Fix for the handling of dead zoned write plugs

 - ublk max_sectors validation fix, with associated selftest addition

* tag 'block-7.1-20260515' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  nvme-apple: Reset q->sq_tail during queue init
  block: align down bounces bios
  block: pass a minsize argument to bio_iov_iter_bounce
  selftests: ublk: cap nthreads to kernel's actual nr_hw_queues
  block: fix handling of dead zone write plugs
  block: bio-integrity: Fix null-ptr-deref in bio_integrity_map_user()
  block: recompute nr_integrity_segments in blk_insert_cloned_request
  block: don't overwrite bip_vcnt in bio_integrity_copy_user()
  nvme: fix race condition between connected uevent and STARTED_ONCE flag
  Revert "nvme: add quirk NVME_QUIRK_IGNORE_DEV_SUBNQN for 144d:a808"
  nvmet-tcp: Fix potential UAF when ddgst mismatch
  nvme-pci: fix use-after-free in nvme_free_host_mem()
  nvmet-auth: Do not print DH-HMAC-CHAP secrets
  nvme: fix bio leak on mapping failure
  nvme: make prp passthrough usage less scary
  ublk: reject max_sectors smaller than PAGE_SECTORS in parameter validation
parents ee7226b2 4141f46d
Loading
Loading
Loading
Loading
+18 −1
Original line number Diff line number Diff line
@@ -308,7 +308,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
	}

	bip->bip_flags |= BIP_COPY_USER;
	bip->bip_vcnt = nr_vecs;
	return 0;
free_bip:
	bio_integrity_free(bio);
@@ -403,6 +402,24 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
	if (unlikely(ret < 0))
		goto free_bvec;

	/*
	 * Handle partial pinning. This can happen when pin_user_pages_fast()
	 * returns fewer pages than requested.
	 */
	if (user_backed_iter(iter) && unlikely(ret != bytes)) {
		if (ret > 0) {
			int npinned = DIV_ROUND_UP(offset + ret, PAGE_SIZE);
			int i;

			for (i = 0; i < npinned; i++)
				unpin_user_page(pages[i]);
		}
		if (pages != stack_pages)
			kvfree(pages);
		ret = -EFAULT;
		goto free_bvec;
	}

	nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset,
				   &is_p2p);
	if (pages != stack_pages)
+15 −12
Original line number Diff line number Diff line
@@ -1279,11 +1279,12 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
	return bio_iov_iter_align_down(bio, iter, len_align_mask);
}

static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size)
static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size,
		size_t minsize)
{
	struct folio *folio;

	while (*size > PAGE_SIZE) {
	while (*size > minsize) {
		folio = folio_alloc(gfp | __GFP_NORETRY, get_order(*size));
		if (folio)
			return folio;
@@ -1307,7 +1308,7 @@ static void bio_free_folios(struct bio *bio)
}

static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
		size_t maxlen)
		size_t maxlen, size_t minsize)
{
	size_t total_len = min(maxlen, iov_iter_count(iter));

@@ -1322,13 +1323,13 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
		size_t this_len = min(total_len, SZ_1M);
		struct folio *folio;

		if (this_len > PAGE_SIZE * 2)
		if (this_len > minsize * 2)
			this_len = rounddown_pow_of_two(this_len);

		if (bio->bi_iter.bi_size > BIO_MAX_SIZE - this_len)
			break;

		folio = folio_alloc_greedy(GFP_KERNEL, &this_len);
		folio = folio_alloc_greedy(GFP_KERNEL, &this_len, minsize);
		if (!folio)
			break;
		bio_add_folio_nofail(bio, folio, this_len, 0);
@@ -1344,16 +1345,16 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,

	if (!bio->bi_iter.bi_size)
		return -ENOMEM;
	return 0;
	return bio_iov_iter_align_down(bio, iter, minsize - 1);
}

static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
		size_t maxlen)
		size_t maxlen, size_t minsize)
{
	size_t len = min3(iov_iter_count(iter), maxlen, SZ_1M);
	struct folio *folio;

	folio = folio_alloc_greedy(GFP_KERNEL, &len);
	folio = folio_alloc_greedy(GFP_KERNEL, &len, minsize);
	if (!folio)
		return -ENOMEM;

@@ -1382,7 +1383,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
	bvec_set_folio(&bio->bi_io_vec[0], folio, bio->bi_iter.bi_size, 0);
	if (iov_iter_extract_will_pin(iter))
		bio_set_flag(bio, BIO_PAGE_PINNED);
	return 0;
	return bio_iov_iter_align_down(bio, iter, minsize - 1);
}

/**
@@ -1390,6 +1391,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
 * @bio:	bio to send
 * @iter:	iter to read from / write into
 * @maxlen:	maximum size to bounce
 * @minsize:	minimum folio allocation size
 *
 * Helper for direct I/O implementations that need to bounce buffer because
 * we need to checksum the data or perform other operations that require
@@ -1397,11 +1399,12 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
 * copies the data into it.  Needs to be paired with bio_iov_iter_unbounce()
 * called on completion.
 */
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen)
int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen,
			size_t minsize)
{
	if (op_is_write(bio_op(bio)))
		return bio_iov_iter_bounce_write(bio, iter, maxlen);
	return bio_iov_iter_bounce_read(bio, iter, maxlen);
		return bio_iov_iter_bounce_write(bio, iter, maxlen, minsize);
	return bio_iov_iter_bounce_read(bio, iter, maxlen, minsize);
}

static void bvec_unpin(struct bio_vec *bv, bool mark_dirty)
+19 −0
Original line number Diff line number Diff line
@@ -3307,6 +3307,25 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
		return BLK_STS_IOERR;
	}

	/*
	 * Integrity segment counting depends on the same queue limits
	 * (virt_boundary_mask, seg_boundary_mask, max_segment_size) that
	 * vary across stacked queues, so recompute against the bottom
	 * queue just like nr_phys_segments above.
	 */
	if (blk_integrity_rq(rq) && rq->bio) {
		unsigned short max_int_segs = queue_max_integrity_segments(q);

		rq->nr_integrity_segments =
			blk_rq_count_integrity_sg(rq->q, rq->bio);
		if (rq->nr_integrity_segments > max_int_segs) {
			printk(KERN_ERR "%s: over max integrity segments limit. (%u > %u)\n",
				__func__, rq->nr_integrity_segments,
				max_int_segs);
			return BLK_STS_IOERR;
		}
	}

	if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
		return BLK_STS_IOERR;

+27 −5
Original line number Diff line number Diff line
@@ -623,6 +623,28 @@ static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
	}
}

static inline bool disk_check_zone_wplug_dead(struct blk_zone_wplug *zwplug)
{
	if (!(zwplug->flags & BLK_ZONE_WPLUG_DEAD))
		return false;

	/*
	 * If a new write is received right after a zone reset completes and
	 * while the disk_zone_wplugs_worker() thread has not yet released the
	 * reference on the zone write plug after processing the last write to
	 * the zone, then the new write BIO will see the zone write plug marked
	 * as dead. This case is however a false positive and a perfectly valid
	 * pattern. In such case, restore the zone write plug to a live one.
	 */
	if (!zwplug->wp_offset && bio_list_empty(&zwplug->bio_list)) {
		zwplug->flags &= ~BLK_ZONE_WPLUG_DEAD;
		refcount_inc(&zwplug->ref);
		return false;
	}

	return true;
}

static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
				       struct blk_zone_wplug *zwplug);

@@ -1444,12 +1466,12 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
	spin_lock_irqsave(&zwplug->lock, flags);

	/*
	 * If we got a zone write plug marked as dead, then the user is issuing
	 * writes to a full zone, or without synchronizing with zone reset or
	 * zone finish operations. In such case, fail the BIO to signal this
	 * invalid usage.
	 * Check if we got a zone write plug marked as dead. If yes, then the
	 * user is likely issuing writes to a full zone, or without
	 * synchronizing with zone reset or zone finish operations. In such
	 * case, fail the BIO to signal this invalid usage.
	 */
	if (zwplug->flags & BLK_ZONE_WPLUG_DEAD) {
	if (disk_check_zone_wplug_dead(zwplug)) {
		spin_unlock_irqrestore(&zwplug->lock, flags);
		disk_put_zone_wplug(zwplug);
		bio_io_error(bio);
+3 −0
Original line number Diff line number Diff line
@@ -920,6 +920,9 @@ static int ublk_validate_params(const struct ublk_device *ub)
		if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
			return -EINVAL;

		if (p->max_sectors < PAGE_SECTORS)
			return -EINVAL;

		if (ublk_dev_is_zoned(ub) && !p->chunk_sectors)
			return -EINVAL;
	} else
Loading