drm/amdkfd: Queue interrupt work to different CPU

For CPX mode, each KFD node has interrupt worker to process ih_fifo to
send events to user space. Currently all interrupt workers of same adev
queue to same CPU, all workers execution are actually serialized and
this cause KFD ih_fifo overflow when CPU usage is high.

Use per-GPU unbounded highpri queue with number of workers equals to
number of partitions, let queue_work select the next CPU round robin
among the local CPUs of same NUMA.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Philip Yang
2024-11-26 11:33:15 -05:00
committed by Alex Deucher
parent 1b00143231
commit 34db5a3261
3 changed files with 20 additions and 33 deletions

View File

@@ -649,6 +649,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
/*
* flush_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
flush_workqueue(kfd->ih_wq);
destroy_workqueue(kfd->ih_wq);
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -1066,21 +1074,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
static inline void kfd_queue_work(struct workqueue_struct *wq,
struct work_struct *work)
{
int cpu, new_cpu;
cpu = new_cpu = smp_processor_id();
do {
new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
if (cpu_to_node(new_cpu) == numa_node_id())
break;
} while (cpu != new_cpu);
queue_work_on(new_cpu, wq, work);
}
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1106,7 +1099,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
kfd_queue_work(node->ih_wq, &node->interrupt_work);
queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}