Commit 8544374c authored by Xiaogang Chen's avatar Xiaogang Chen Committed by Alex Deucher
Browse files

drm/amdkfd: Have kfd driver use same PASID values from graphic driver



Current kfd driver has its own PASID value for a kfd process and uses it to
locate vm at interrupt handler or mapping between kfd process and vm. That
design is not working when a physical gpu device has multiple spatial
partitions, ex: adev in CPX mode. This patch has kfd driver use same pasid
values that graphic driver generated which is per vm per pasid.

These pasid values are passed to fw/hardware. We do not need change interrupt
handler though more pasid values are used. Also, pasid values at log are
replaced by user process pid; pasid values are not exposed to user. Users see
their process pids that have meaning in user space.

Signed-off-by: default avatarXiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ca449221
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
};

struct amdgpu_device;
struct kfd_process_device;
struct amdgpu_reset_context;

enum kfd_mem_attachment_type {
@@ -299,8 +300,6 @@ bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
	(&((struct amdgpu_fpriv *)					\
		((struct drm_file *)(drm_priv))->driver_priv)->vm)

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
				     struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
					struct amdgpu_vm *avm,
					void **process_info,
+0 −21
Original line number Diff line number Diff line
@@ -1529,27 +1529,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
	amdgpu_bo_unreserve(bo);
}

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
				     struct amdgpu_vm *avm, u32 pasid)

{
	int ret;

	/* Free the original amdgpu allocated pasid,
	 * will be replaced with kfd allocated pasid.
	 */
	if (avm->pasid) {
		amdgpu_pasid_free(avm->pasid);
		amdgpu_vm_set_pasid(adev, avm, 0);
	}

	ret = amdgpu_vm_set_pasid(adev, avm, pasid);
	if (ret)
		return ret;

	return 0;
}

int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
					   struct amdgpu_vm *avm,
					   void **process_info,
+14 −4
Original line number Diff line number Diff line
@@ -107,20 +107,30 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
		kfd_signal_hw_exception_event(pasid);
	else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
		struct kfd_process_device *pdd = NULL;
		struct kfd_vm_fault_info info;
		struct kfd_process *p;

		kfd_smi_event_update_vmfault(dev, pasid);
		kfd_dqm_evict_pasid(dev->dqm, pasid);
		p = kfd_lookup_process_by_pasid(pasid, &pdd);
		if (!pdd)
			return;

		kfd_evict_process_device(pdd);

		memset(&info, 0, sizeof(info));
		amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
		if (!info.page_addr && !info.status)
		if (!info.page_addr && !info.status) {
			kfd_unref_process(p);
			return;
		}

		if (info.vmid == vmid)
			kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
			kfd_signal_vm_fault_event(pdd, &info, NULL);
		else
			kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
			kfd_signal_vm_fault_event(pdd, &info, NULL);

		kfd_unref_process(p);
	}
}

+13 −12
Original line number Diff line number Diff line
@@ -155,8 +155,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
	/* filep now owns the reference returned by kfd_create_process */
	filep->private_data = process;

	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
		process->pasid, process->is_32bit_user_mode);
	dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
		process->lead_thread->pid, process->is_32bit_user_mode);

	return 0;
}
@@ -361,8 +361,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
		goto err_acquire_queue_buf;
	}

	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
			p->pasid,
	pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
			p->lead_thread->pid,
			dev->id);

	err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
@@ -415,9 +415,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
	int retval;
	struct kfd_ioctl_destroy_queue_args *args = data;

	pr_debug("Destroying queue id %d for pasid 0x%x\n",
	pr_debug("Destroying queue id %d for process pid %d\n",
				args->queue_id,
				p->pasid);
				p->lead_thread->pid);

	mutex_lock(&p->mutex);

@@ -468,8 +468,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
	properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
	properties.priority = args->queue_priority;

	pr_debug("Updating queue id %d for pasid 0x%x\n",
			args->queue_id, p->pasid);
	pr_debug("Updating queue id %d for process pid %d\n",
			args->queue_id, p->lead_thread->pid);

	mutex_lock(&p->mutex);

@@ -695,7 +695,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
	struct kfd_process_device_apertures *pAperture;
	int i;

	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
	dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);

	args->num_of_nodes = 0;

@@ -747,7 +747,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
	int ret;
	int i;

	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
	dev_dbg(kfd_device, "get apertures for process pid %d",
			p->lead_thread->pid);

	if (args->num_of_nodes == 0) {
		/* Return number of nodes, so that user space can alloacate
@@ -3365,12 +3366,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,

	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

	pr_debug("pasid 0x%x mapping mmio page\n"
	pr_debug("process pid %d mapping mmio page\n"
		 "     target user address == 0x%08llX\n"
		 "     physical address    == 0x%08llX\n"
		 "     vm_flags            == 0x%04lX\n"
		 "     size                == 0x%04lX\n",
		 process->pasid, (unsigned long long) vma->vm_start,
		 process->lead_thread->pid, (unsigned long long) vma->vm_start,
		 address, vma->vm_flags, PAGE_SIZE);

	return io_remap_pfn_range(vma,
+7 −7
Original line number Diff line number Diff line
@@ -204,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
				   size_t exception_data_size)
{
	struct kfd_process *p;
	struct kfd_process_device *pdd = NULL;
	bool signaled_to_debugger_or_runtime = false;

	p = kfd_lookup_process_by_pasid(pasid);
	p = kfd_lookup_process_by_pasid(pasid, &pdd);

	if (!p)
	if (!pdd)
		return false;

	if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -238,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,

			mutex_unlock(&p->mutex);
		} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
			kfd_dqm_evict_pasid(dev->dqm, p->pasid);
			kfd_signal_vm_fault_event(dev, p->pasid, NULL,
							exception_data);
			kfd_evict_process_device(pdd);
			kfd_signal_vm_fault_event(pdd, NULL, exception_data);

			signaled_to_debugger_or_runtime = true;
		}
@@ -276,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
		data = (struct kfd_hsa_memory_exception_data *)
						pdd->vm_fault_exc_data;

		kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
		kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
		kfd_evict_process_device(pdd);
		kfd_signal_vm_fault_event(pdd, NULL, data);
		error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
	}

Loading