drm/amdkfd: allow users to target recommended SDMA engines

Certain GPUs have better copy performance over xGMI on specific
SDMA engines depending on the source and destination GPU.
Allow users to create SDMA queues on these recommended engines.
Close to 2x overall performance has been observed with this
optimization.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jonathan Kim
2024-05-21 13:22:15 -04:00
committed by Alex Deucher
parent 60c30ba7ba
commit e06b71b231
7 changed files with 116 additions and 3 deletions

View File

@@ -255,6 +255,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
q_properties->ctl_stack_size = args->ctl_stack_size;
q_properties->sdma_engine_id = args->sdma_engine_id;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)
q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;
else
return -ENOTSUPP;
@@ -333,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
kfd_get_num_xgmi_sdma_engines(dev) - 1;
if (q_properties.sdma_engine_id > max_sdma_eng_id) {
err = -EINVAL;
pr_err("sdma_engine_id %i exceeds maximum id of %i\n",
q_properties.sdma_engine_id, max_sdma_eng_id);
goto err_sdma_engine_id;
}
}
if (!pdd->qpd.proc_doorbells) {
err = kfd_alloc_process_doorbells(dev->kfd, pdd);
if (err) {
@@ -387,6 +402,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
err_create_queue:
kfd_queue_release_buffers(pdd, &q_properties);
err_acquire_queue_buf:
err_sdma_engine_id:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);