drm/amdkfd: Ignore bogus signals from MEC efficiently

MEC firmware sometimes sends signal interrupts without a valid context ID
on end of pipe events that don't intend to signal any HSA signals.
This triggers the slow path in kfd_signal_event_interrupt that scans the
entire event page for signaled events. Detect these signals in the top
half interrupt handler to stop processing them as early as possible.

Because we now always treat event ID 0 as invalid, reserve that ID during
process initialization.

v2: Update firmware version checks to support more GPUs

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Felix Kuehling
2022-04-07 18:53:56 -04:00
committed by Alex Deucher
parent b3ef3205bc
commit c3eb12dff0
4 changed files with 60 additions and 7 deletions

View File

@@ -141,6 +141,25 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev,
}
}
static bool context_id_expected(struct kfd_dev *dev)
{
switch (KFD_GC_VERSION(dev)) {
case IP_VERSION(9, 0, 1):
return dev->mec_fw_version >= 0x817a;
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 1):
case IP_VERSION(9, 2, 2):
case IP_VERSION(9, 3, 0):
case IP_VERSION(9, 4, 0):
return dev->mec_fw_version >= 0x17a;
default:
/* Other GFXv9 and later GPUs always sent valid context IDs
* on legitimate events
*/
return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1);
}
}
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -206,6 +225,20 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
return false;
/* Workaround CP firmware sending bogus signals with 0 context_id.
* Those can be safely ignored on hardware and firmware versions that
* include a valid context_id on legitimate signals. This avoids the
* slow path in kfd_signal_event_interrupt that scans all event slots
* for signaled events.
*/
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) {
uint32_t context_id =
SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
if (context_id == 0 && context_id_expected(dev))
return false;
}
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/