drm/amdkfd: Add user queue eviction restore SMI event

Output user queue eviction and restore event. User queue eviction may be
triggered by svm or userptr MMU notifier, TTM eviction, device suspend
and CRIU checkpoint and restore.

User queue restore may be rescheduled if eviction happens again while
restore.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Philip Yang
2022-01-13 21:24:20 -05:00
committed by Alex Deucher
parent acac270d09
commit c7f21978fa
9 changed files with 69 additions and 15 deletions

View File

@@ -32,6 +32,7 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
#include "kfd_smi_events.h"
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
@@ -2346,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mm);
r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
@@ -2620,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
unlock_out:
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
if (evicted_bos)
if (evicted_bos) {
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
kfd_smi_event_queue_restore_rescheduled(mm);
}
mmput(mm);
put_task_struct(usertask);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given