Commit b69ffeaa authored by Long Li's avatar Long Li Committed by Martin K. Petersen
Browse files

scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU



When selecting an outgoing channel for I/O, storvsc tries to select a
channel with a returning CPU that is not the same as issuing CPU. This
worked well in the past, however it doesn't work well when the Hyper-V
exposes a large number of channels (up to the number of all CPUs). Use a
different CPU for returning channel is not efficient on Hyper-V.

Change this behavior by preferring to the channel with the same CPU as
the current I/O issuing CPU whenever possible.

Tests have shown improvements in newer Hyper-V/Azure environment, and no
regression with older Hyper-V/Azure environments.

Tested-by: default avatarRaheel Abdul Faizy <rabdulfaizy@microsoft.com>
Signed-off-by: default avatarLong Li <longli@microsoft.com>
Message-Id: <1759381530-7414-1-git-send-email-longli@linux.microsoft.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 558ae457
Loading
Loading
Loading
Loading
+45 −51
Original line number Diff line number Diff line
@@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
	}

	/*
	 * Our channel array is sparsley populated and we
	 * Our channel array could be sparsley populated and we
	 * initiated I/O on a processor/hw-q that does not
	 * currently have a designated channel. Fix this.
	 * The strategy is simple:
	 * I. Ensure NUMA locality
	 * II. Distribute evenly (best effort)
	 * I. Prefer the channel associated with the current CPU
	 * II. Ensure NUMA locality
	 * III. Distribute evenly (best effort)
	 */

	/* Prefer the channel on the I/O issuing processor/hw-q */
	if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
		return stor_device->stor_chns[q_num];

	node_mask = cpumask_of_node(cpu_to_node(q_num));

	num_channels = 0;
@@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
	/* See storvsc_change_target_cpu(). */
	outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
	if (outgoing_channel != NULL) {
		if (outgoing_channel->target_cpu == q_num) {
		if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
				> ring_avail_percent_lowater)
			goto found_channel;

		/*
			 * Ideally, we want to pick a different channel if
			 * available on the same NUMA node.
		 * Channel is busy, try to find a channel on the same NUMA node
		 */
		node_mask = cpumask_of_node(cpu_to_node(q_num));
			for_each_cpu_wrap(tgt_cpu,
				 &stor_device->alloced_cpus, q_num + 1) {
		for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
				  q_num + 1) {
			if (!cpumask_test_cpu(tgt_cpu, node_mask))
				continue;
				if (tgt_cpu == q_num)
			channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
			if (!channel)
				continue;
				channel = READ_ONCE(
					stor_device->stor_chns[tgt_cpu]);
				if (channel == NULL)
					continue;
				if (hv_get_avail_to_write_percent(
							&channel->outbound)
			if (hv_get_avail_to_write_percent(&channel->outbound)
					> ring_avail_percent_lowater) {
				outgoing_channel = channel;
				goto found_channel;
			}
		}

			/*
			 * All the other channels on the same NUMA node are
			 * busy. Try to use the channel on the current CPU
			 */
			if (hv_get_avail_to_write_percent(
						&outgoing_channel->outbound)
					> ring_avail_percent_lowater)
				goto found_channel;

		/*
		 * If we reach here, all the channels on the current
		 * NUMA node are busy. Try to find a channel in
			 * other NUMA nodes
		 * all NUMA nodes
		 */
			for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
				if (cpumask_test_cpu(tgt_cpu, node_mask))
		for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
				  q_num + 1) {
			channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
			if (!channel)
				continue;
				channel = READ_ONCE(
					stor_device->stor_chns[tgt_cpu]);
				if (channel == NULL)
					continue;
				if (hv_get_avail_to_write_percent(
							&channel->outbound)
			if (hv_get_avail_to_write_percent(&channel->outbound)
					> ring_avail_percent_lowater) {
				outgoing_channel = channel;
				goto found_channel;
			}
		}
		}
		/*
		 * If we reach here, all the channels are busy. Use the
		 * original channel found.
		 */
	} else {
		spin_lock_irqsave(&stor_device->lock, flags);
		outgoing_channel = stor_device->stor_chns[q_num];