Commit 9cf2317b authored by Mike Christie's avatar Mike Christie Committed by Martin K. Petersen
Browse files

scsi: target: Move I/O path stats to per CPU



The atomic use in the main I/O path is causing perf issues when using
higher performance backend devices and multiple queues. This moves the
stats to per CPU. Combined with the next patch that moves the
non_ordered/delayed_cmd_count to per CPU, IOPS by up to 33% for 8K IOS
when using 4 or more queues.

Signed-off-by: default avatarMike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20250424032741.16216-2-michael.christie@oracle.com


Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 0af2f6be
Loading
Loading
Loading
Loading
+50 −19
Original line number Diff line number Diff line
@@ -55,14 +55,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd)
	rcu_read_lock();
	deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun);
	if (deve) {
		atomic_long_inc(&deve->total_cmds);
		this_cpu_inc(deve->stats->total_cmds);

		if (se_cmd->data_direction == DMA_TO_DEVICE)
			atomic_long_add(se_cmd->data_length,
					&deve->write_bytes);
			this_cpu_add(deve->stats->write_bytes,
				     se_cmd->data_length);
		else if (se_cmd->data_direction == DMA_FROM_DEVICE)
			atomic_long_add(se_cmd->data_length,
					&deve->read_bytes);
			this_cpu_add(deve->stats->read_bytes,
				     se_cmd->data_length);

		if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
		    deve->lun_access_ro) {
@@ -126,14 +126,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd)
	 * target_core_fabric_configfs.c:target_fabric_port_release
	 */
	se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
	atomic_long_inc(&se_cmd->se_dev->num_cmds);
	this_cpu_inc(se_cmd->se_dev->stats->total_cmds);

	if (se_cmd->data_direction == DMA_TO_DEVICE)
		atomic_long_add(se_cmd->data_length,
				&se_cmd->se_dev->write_bytes);
		this_cpu_add(se_cmd->se_dev->stats->write_bytes,
			     se_cmd->data_length);
	else if (se_cmd->data_direction == DMA_FROM_DEVICE)
		atomic_long_add(se_cmd->data_length,
				&se_cmd->se_dev->read_bytes);
		this_cpu_add(se_cmd->se_dev->stats->read_bytes,
			     se_cmd->data_length);

	return ret;
}
@@ -322,6 +322,7 @@ int core_enable_device_list_for_node(
	struct se_portal_group *tpg)
{
	struct se_dev_entry *orig, *new;
	int ret = 0;

	new = kzalloc(sizeof(*new), GFP_KERNEL);
	if (!new) {
@@ -329,6 +330,12 @@ int core_enable_device_list_for_node(
		return -ENOMEM;
	}

	new->stats = alloc_percpu(struct se_dev_entry_io_stats);
	if (!new->stats) {
		ret = -ENOMEM;
		goto free_deve;
	}

	spin_lock_init(&new->ua_lock);
	INIT_LIST_HEAD(&new->ua_list);
	INIT_LIST_HEAD(&new->lun_link);
@@ -351,8 +358,8 @@ int core_enable_device_list_for_node(
			       " for dynamic -> explicit NodeACL conversion:"
				" %s\n", nacl->initiatorname);
			mutex_unlock(&nacl->lun_entry_mutex);
			kfree(new);
			return -EINVAL;
			ret = -EINVAL;
			goto free_stats;
		}
		if (orig->se_lun_acl != NULL) {
			pr_warn_ratelimited("Detected existing explicit"
@@ -360,8 +367,8 @@ int core_enable_device_list_for_node(
				" mapped_lun: %llu, failing\n",
				 nacl->initiatorname, mapped_lun);
			mutex_unlock(&nacl->lun_entry_mutex);
			kfree(new);
			return -EINVAL;
			ret = -EINVAL;
			goto free_stats;
		}

		new->se_lun = lun;
@@ -394,6 +401,20 @@ int core_enable_device_list_for_node(

	target_luns_data_has_changed(nacl, new, true);
	return 0;

free_stats:
	free_percpu(new->stats);
free_deve:
	kfree(new);
	return ret;
}

static void target_free_dev_entry(struct rcu_head *head)
{
	struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
						 rcu_head);
	free_percpu(deve->stats);
	kfree(deve);
}

void core_disable_device_list_for_node(
@@ -443,7 +464,7 @@ void core_disable_device_list_for_node(
	kref_put(&orig->pr_kref, target_pr_kref_release);
	wait_for_completion(&orig->pr_comp);

	kfree_rcu(orig, rcu_head);
	call_rcu(&orig->rcu_head, target_free_dev_entry);

	core_scsi3_free_pr_reg_from_nacl(dev, nacl);
	target_luns_data_has_changed(nacl, NULL, false);
@@ -689,11 +710,13 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
	if (!dev)
		return NULL;

	dev->stats = alloc_percpu(struct se_dev_io_stats);
	if (!dev->stats)
		goto free_device;

	dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL);
	if (!dev->queues) {
		hba->backend->ops->free_device(dev);
		return NULL;
	}
	if (!dev->queues)
		goto free_stats;

	dev->queue_cnt = nr_cpu_ids;
	for (i = 0; i < dev->queue_cnt; i++) {
@@ -707,6 +730,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
		INIT_WORK(&q->sq.work, target_queued_submit_work);
	}


	dev->se_hba = hba;
	dev->transport = hba->backend->ops;
	dev->transport_flags = dev->transport->transport_flags_default;
@@ -791,6 +815,12 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
		sizeof(dev->t10_wwn.revision));

	return dev;

free_stats:
	free_percpu(dev->stats);
free_device:
	hba->backend->ops->free_device(dev);
	return NULL;
}

/*
@@ -1001,6 +1031,7 @@ void target_free_device(struct se_device *dev)
		dev->transport->free_prot(dev);

	kfree(dev->queues);
	free_percpu(dev->stats);
	dev->transport->free_device(dev);
}

+57 −12
Original line number Diff line number Diff line
@@ -280,30 +280,51 @@ static ssize_t target_stat_lu_num_cmds_show(struct config_item *item,
		char *page)
{
	struct se_device *dev = to_stat_lu_dev(item);
	struct se_dev_io_stats *stats;
	unsigned int cpu;
	u32 cmds = 0;

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(dev->stats, cpu);
		cmds += stats->total_cmds;
	}

	/* scsiLuNumCommands */
	return snprintf(page, PAGE_SIZE, "%lu\n",
			atomic_long_read(&dev->num_cmds));
	return snprintf(page, PAGE_SIZE, "%u\n", cmds);
}

static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item,
		char *page)
{
	struct se_device *dev = to_stat_lu_dev(item);
	struct se_dev_io_stats *stats;
	unsigned int cpu;
	u32 bytes = 0;

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(dev->stats, cpu);
		bytes += stats->read_bytes;
	}

	/* scsiLuReadMegaBytes */
	return snprintf(page, PAGE_SIZE, "%lu\n",
			atomic_long_read(&dev->read_bytes) >> 20);
	return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
}

static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item,
		char *page)
{
	struct se_device *dev = to_stat_lu_dev(item);
	struct se_dev_io_stats *stats;
	unsigned int cpu;
	u32 bytes = 0;

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(dev->stats, cpu);
		bytes += stats->write_bytes;
	}

	/* scsiLuWrittenMegaBytes */
	return snprintf(page, PAGE_SIZE, "%lu\n",
			atomic_long_read(&dev->write_bytes) >> 20);
	return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
}

static ssize_t target_stat_lu_resets_show(struct config_item *item, char *page)
@@ -1019,8 +1040,11 @@ static ssize_t target_stat_auth_num_cmds_show(struct config_item *item,
{
	struct se_lun_acl *lacl = auth_to_lacl(item);
	struct se_node_acl *nacl = lacl->se_lun_nacl;
	struct se_dev_entry_io_stats *stats;
	struct se_dev_entry *deve;
	unsigned int cpu;
	ssize_t ret;
	u32 cmds = 0;

	rcu_read_lock();
	deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
@@ -1028,9 +1052,14 @@ static ssize_t target_stat_auth_num_cmds_show(struct config_item *item,
		rcu_read_unlock();
		return -ENODEV;
	}

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(deve->stats, cpu);
		cmds += stats->total_cmds;
	}

	/* scsiAuthIntrOutCommands */
	ret = snprintf(page, PAGE_SIZE, "%lu\n",
		       atomic_long_read(&deve->total_cmds));
	ret = snprintf(page, PAGE_SIZE, "%u\n", cmds);
	rcu_read_unlock();
	return ret;
}
@@ -1040,8 +1069,11 @@ static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item,
{
	struct se_lun_acl *lacl = auth_to_lacl(item);
	struct se_node_acl *nacl = lacl->se_lun_nacl;
	struct se_dev_entry_io_stats *stats;
	struct se_dev_entry *deve;
	unsigned int cpu;
	ssize_t ret;
	u32 bytes = 0;

	rcu_read_lock();
	deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
@@ -1049,9 +1081,14 @@ static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item,
		rcu_read_unlock();
		return -ENODEV;
	}

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(deve->stats, cpu);
		bytes += stats->read_bytes;
	}

	/* scsiAuthIntrReadMegaBytes */
	ret = snprintf(page, PAGE_SIZE, "%u\n",
		      (u32)(atomic_long_read(&deve->read_bytes) >> 20));
	ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
	rcu_read_unlock();
	return ret;
}
@@ -1061,8 +1098,11 @@ static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item,
{
	struct se_lun_acl *lacl = auth_to_lacl(item);
	struct se_node_acl *nacl = lacl->se_lun_nacl;
	struct se_dev_entry_io_stats *stats;
	struct se_dev_entry *deve;
	unsigned int cpu;
	ssize_t ret;
	u32 bytes = 0;

	rcu_read_lock();
	deve = target_nacl_find_deve(nacl, lacl->mapped_lun);
@@ -1070,9 +1110,14 @@ static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item,
		rcu_read_unlock();
		return -ENODEV;
	}

	for_each_possible_cpu(cpu) {
		stats = per_cpu_ptr(deve->stats, cpu);
		bytes += stats->write_bytes;
	}

	/* scsiAuthIntrWrittenMegaBytes */
	ret = snprintf(page, PAGE_SIZE, "%u\n",
		      (u32)(atomic_long_read(&deve->write_bytes) >> 20));
	ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20);
	rcu_read_unlock();
	return ret;
}
+14 −6
Original line number Diff line number Diff line
@@ -669,15 +669,19 @@ struct se_lun_acl {
	struct se_ml_stat_grps	ml_stat_grps;
};

struct se_dev_entry_io_stats {
	u32			total_cmds;
	u32			read_bytes;
	u32			write_bytes;
};

struct se_dev_entry {
	u64			mapped_lun;
	u64			pr_res_key;
	u64			creation_time;
	bool			lun_access_ro;
	u32			attach_count;
	atomic_long_t		total_cmds;
	atomic_long_t		read_bytes;
	atomic_long_t		write_bytes;
	struct se_dev_entry_io_stats __percpu	*stats;
	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
	struct kref		pr_kref;
	struct completion	pr_comp;
@@ -800,6 +804,12 @@ struct se_device_queue {
	struct se_cmd_queue	sq;
};

struct se_dev_io_stats {
	u32			total_cmds;
	u32			read_bytes;
	u32			write_bytes;
};

struct se_device {
	/* Used for SAM Task Attribute ordering */
	u32			dev_cur_ordered_id;
@@ -821,9 +831,7 @@ struct se_device {
	atomic_long_t		num_resets;
	atomic_long_t		aborts_complete;
	atomic_long_t		aborts_no_task;
	atomic_long_t		num_cmds;
	atomic_long_t		read_bytes;
	atomic_long_t		write_bytes;
	struct se_dev_io_stats __percpu	*stats;
	/* Active commands on this virtual SE device */
	atomic_t		non_ordered;
	bool			ordered_sync_in_progress;