Commit cf724e5e authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-6.16-20250513' of...

Merge tag 'md-6.16-20250513' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.16/block

Pull MD changes from Yu Kuai:

- Fix that normal IO can be starved by sync IO, found by mkfs on newly
  created large raid5, with some clean up patches for bdev inflight
  counters.

* tag 'md-6.16-20250513' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux:
  md: clean up accounting for issued sync IO
  md: fix is_mddev_idle()
  md: add a new api sync_io_depth
  md: record dm-raid gendisk in mddev
  block: export API to get the number of bdev inflight IO
  block: clean up blk_mq_in_flight_rw()
  block: WARN if bdev inflight counter is negative
  block: reuse part_in_flight_rw for part_in_flight
  blk-mq: remove blk_mq_in_flight()
parents 2d8951ae 752d0464
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1018,7 +1018,7 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
	stamp = READ_ONCE(part->bd_stamp);
	if (unlikely(time_after(now, stamp)) &&
	    likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
	    (end || part_in_flight(part)))
	    (end || bdev_count_inflight(part)))
		__part_stat_add(part, io_ticks, now - stamp);

	if (bdev_is_partition(part)) {
+6 −16
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ struct mq_inflight {
	unsigned int inflight[2];
};

static bool blk_mq_check_inflight(struct request *rq, void *priv)
static bool blk_mq_check_in_driver(struct request *rq, void *priv)
{
	struct mq_inflight *mi = priv;

@@ -101,24 +101,14 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv)
	return true;
}

unsigned int blk_mq_in_flight(struct request_queue *q,
		struct block_device *part)
void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2])
{
	struct mq_inflight mi = { .part = part };

	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);

	return mi.inflight[0] + mi.inflight[1];
}

void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
		unsigned int inflight[2])
{
	struct mq_inflight mi = { .part = part };

	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
	inflight[0] = mi.inflight[0];
	inflight[1] = mi.inflight[1];
	blk_mq_queue_tag_busy_iter(bdev_get_queue(part), blk_mq_check_in_driver,
				   &mi);
	inflight[READ] = mi.inflight[READ];
	inflight[WRITE] = mi.inflight[WRITE];
}

#ifdef CONFIG_LOCKDEP
+1 −4
Original line number Diff line number Diff line
@@ -246,10 +246,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
	return hctx->nr_ctx && hctx->tags;
}

unsigned int blk_mq_in_flight(struct request_queue *q,
		struct block_device *part);
void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
		unsigned int inflight[2]);
void blk_mq_in_driver_rw(struct block_device *part, unsigned int inflight[2]);

static inline void blk_mq_put_dispatch_budget(struct request_queue *q,
					      int budget_token)
+0 −1
Original line number Diff line number Diff line
@@ -419,7 +419,6 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
int blk_dev_init(void);

void update_io_ticks(struct block_device *part, unsigned long now, bool end);
unsigned int part_in_flight(struct block_device *part);

static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
+40 −29
Original line number Diff line number Diff line
@@ -125,37 +125,46 @@ static void part_stat_read_all(struct block_device *part,
	}
}

unsigned int part_in_flight(struct block_device *part)
static void bdev_count_inflight_rw(struct block_device *part,
		unsigned int inflight[2], bool mq_driver)
{
	unsigned int inflight = 0;
	int cpu;

	if (mq_driver) {
		blk_mq_in_driver_rw(part, inflight);
	} else {
		for_each_possible_cpu(cpu) {
		inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
			    part_stat_local_read_cpu(part, in_flight[1], cpu);
			inflight[READ] += part_stat_local_read_cpu(
						part, in_flight[READ], cpu);
			inflight[WRITE] += part_stat_local_read_cpu(
						part, in_flight[WRITE], cpu);
		}
	}
	if ((int)inflight < 0)
		inflight = 0;

	return inflight;
	if (WARN_ON_ONCE((int)inflight[READ] < 0))
		inflight[READ] = 0;
	if (WARN_ON_ONCE((int)inflight[WRITE] < 0))
		inflight[WRITE] = 0;
}

static void part_in_flight_rw(struct block_device *part,
		unsigned int inflight[2])
/**
 * bdev_count_inflight - get the number of inflight IOs for a block device.
 *
 * @part: the block device.
 *
 * Inflight here means started IO accounting, from bdev_start_io_acct() for
 * bio-based block device, and from blk_account_io_start() for rq-based block
 * device.
 */
unsigned int bdev_count_inflight(struct block_device *part)
{
	int cpu;
	unsigned int inflight[2] = {0};

	inflight[0] = 0;
	inflight[1] = 0;
	for_each_possible_cpu(cpu) {
		inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
		inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
	}
	if ((int)inflight[0] < 0)
		inflight[0] = 0;
	if ((int)inflight[1] < 0)
		inflight[1] = 0;
	bdev_count_inflight_rw(part, inflight, false);

	return inflight[READ] + inflight[WRITE];
}
EXPORT_SYMBOL_GPL(bdev_count_inflight);

/*
 * Can be deleted altogether. Later.
@@ -1053,7 +1062,7 @@ ssize_t part_stat_show(struct device *dev,
	struct disk_stats stat;
	unsigned int inflight;

	inflight = part_in_flight(bdev);
	inflight = bdev_count_inflight(bdev);
	if (inflight) {
		part_stat_lock();
		update_io_ticks(bdev, jiffies, true);
@@ -1090,19 +1099,21 @@ ssize_t part_stat_show(struct device *dev,
		(unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
}

/*
 * Show the number of IOs issued to driver.
 * For bio-based device, started from bdev_start_io_acct();
 * For rq-based device, started from blk_mq_start_request();
 */
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
			   char *buf)
{
	struct block_device *bdev = dev_to_bdev(dev);
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int inflight[2];
	unsigned int inflight[2] = {0};

	if (queue_is_mq(q))
		blk_mq_in_flight_rw(q, bdev, inflight);
	else
		part_in_flight_rw(bdev, inflight);
	bdev_count_inflight_rw(bdev, inflight, queue_is_mq(q));

	return sysfs_emit(buf, "%8u %8u\n", inflight[0], inflight[1]);
	return sysfs_emit(buf, "%8u %8u\n", inflight[READ], inflight[WRITE]);
}

static ssize_t disk_capability_show(struct device *dev,
@@ -1355,7 +1366,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
		if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
			continue;

		inflight = part_in_flight(hd);
		inflight = bdev_count_inflight(hd);
		if (inflight) {
			part_stat_lock();
			update_io_ticks(hd, jiffies, true);
Loading