Commit 2d274bf7 authored by Shane Xiao's avatar Shane Xiao Committed by Alex Deucher
Browse files

amd/amdkfd: Trigger segfault for early userptr unmmapping



If applications unmap the memory before destroying the userptr, it needs
trigger a segfault to notify user space to correct the free sequence in
VM debug mode.

v2: Send gpu access fault to user space
v3: Report gpu address to user space, remove unnecessary params
v4: update pr_err into one line, remove userptr log info

Signed-off-by: default avatarShane Xiao <shane.xiao@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8e320f67
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -2559,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
			if (ret != -EFAULT)
				return ret;

			/* If applications unmap memory before destroying the userptr
			 * from the KFD, trigger a segmentation fault in VM debug mode.
			 */
			if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
				pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
								pid_nr(process_info->pid), mem->va);

				// Send GPU VM fault to user space
				kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
								mem->va);
			}

			ret = 0;
		}

+19 −0
Original line number Diff line number Diff line
@@ -1177,6 +1177,25 @@ void kfd_signal_hw_exception_event(u32 pasid)
	kfd_unref_process(p);
}

void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
{
	struct kfd_process_device *pdd;
	struct kfd_hsa_memory_exception_data exception_data;
	int i;

	memset(&exception_data, 0, sizeof(exception_data));
	exception_data.va = gpu_va;
	exception_data.failure.NotPresent = 1;

	// Send VM seg fault to all kfd process device
	for (i = 0; i < p->n_pdds; i++) {
		pdd = p->pdds[i];
		exception_data.gpu_id = pdd->user_gpu_id;
		kfd_evict_process_device(pdd);
		kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
	}
}

void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
				struct kfd_vm_fault_info *info,
				struct kfd_hsa_memory_exception_data *data)
+2 −0
Original line number Diff line number Diff line
@@ -1507,6 +1507,8 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);

void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);

void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
				struct kfd_vm_fault_info *info,
				struct kfd_hsa_memory_exception_data *data);