Commit e36ba5ab authored by Nicolin Chen's avatar Nicolin Chen Committed by Jason Gunthorpe
Browse files

iommufd: Add IOMMUFD_OBJ_VEVENTQ and IOMMUFD_CMD_VEVENTQ_ALLOC

Introduce a new IOMMUFD_OBJ_VEVENTQ object for vIOMMU Event Queue that
provides user space (VMM) another FD to read the vIOMMU Events.

Allow a vIOMMU object to allocate vEVENTQs, with a condition that each
vIOMMU can only have one single vEVENTQ per type.

Add iommufd_veventq_alloc() with iommufd_veventq_ops for the new ioctl.

Link: https://patch.msgid.link/r/21acf0751dd5c93846935ee06f93b9c65eff5e04.1741719725.git.nicolinc@nvidia.com


Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Signed-off-by: default avatarNicolin Chen <nicolinc@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 0507f337
Loading
Loading
Loading
Loading
+208 −1
Original line number Diff line number Diff line
@@ -262,13 +262,148 @@ static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *b
	return done == 0 ? rc : done;
}

/* IOMMUFD_OBJ_VEVENTQ Functions */

void iommufd_veventq_abort(struct iommufd_object *obj)
{
	struct iommufd_eventq *eventq =
		container_of(obj, struct iommufd_eventq, obj);
	struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
	struct iommufd_viommu *viommu = veventq->viommu;
	struct iommufd_vevent *cur, *next;

	lockdep_assert_held_write(&viommu->veventqs_rwsem);

	list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
		list_del(&cur->node);
		if (cur != &veventq->lost_events_header)
			kfree(cur);
	}

	refcount_dec(&viommu->obj.users);
	list_del(&veventq->node);
}

void iommufd_veventq_destroy(struct iommufd_object *obj)
{
	struct iommufd_veventq *veventq = eventq_to_veventq(
		container_of(obj, struct iommufd_eventq, obj));

	down_write(&veventq->viommu->veventqs_rwsem);
	iommufd_veventq_abort(obj);
	up_write(&veventq->viommu->veventqs_rwsem);
}

static struct iommufd_vevent *
iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
{
	struct iommufd_eventq *eventq = &veventq->common;
	struct list_head *list = &eventq->deliver;
	struct iommufd_vevent *vevent = NULL;

	spin_lock(&eventq->lock);
	if (!list_empty(list)) {
		struct iommufd_vevent *next;

		next = list_first_entry(list, struct iommufd_vevent, node);
		/* Make a copy of the lost_events_header for copy_to_user */
		if (next == &veventq->lost_events_header) {
			vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
			if (!vevent)
				goto out_unlock;
		}
		list_del(&next->node);
		if (vevent)
			memcpy(vevent, next, sizeof(*vevent));
		else
			vevent = next;
	}
out_unlock:
	spin_unlock(&eventq->lock);
	return vevent;
}

static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
					    struct iommufd_vevent *vevent)
{
	struct iommufd_eventq *eventq = &veventq->common;
	struct list_head *list = &eventq->deliver;

	spin_lock(&eventq->lock);
	if (vevent_for_lost_events_header(vevent)) {
		/* Remove the copy of the lost_events_header */
		kfree(vevent);
		vevent = NULL;
		/* An empty list needs the lost_events_header back */
		if (list_empty(list))
			vevent = &veventq->lost_events_header;
	}
	if (vevent)
		list_add(&vevent->node, list);
	spin_unlock(&eventq->lock);
}

static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
					 size_t count, loff_t *ppos)
{
	struct iommufd_eventq *eventq = filep->private_data;
	struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
	struct iommufd_vevent_header *hdr;
	struct iommufd_vevent *cur;
	size_t done = 0;
	int rc = 0;

	if (*ppos)
		return -ESPIPE;

	while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
		/* Validate the remaining bytes against the header size */
		if (done >= count || sizeof(*hdr) > count - done) {
			iommufd_veventq_deliver_restore(veventq, cur);
			break;
		}
		hdr = &cur->header;

		/* If being a normal vEVENT, validate against the full size */
		if (!vevent_for_lost_events_header(cur) &&
		    sizeof(hdr) + cur->data_len > count - done) {
			iommufd_veventq_deliver_restore(veventq, cur);
			break;
		}

		if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
			iommufd_veventq_deliver_restore(veventq, cur);
			rc = -EFAULT;
			break;
		}
		done += sizeof(*hdr);

		if (cur->data_len &&
		    copy_to_user(buf + done, cur->event_data, cur->data_len)) {
			iommufd_veventq_deliver_restore(veventq, cur);
			rc = -EFAULT;
			break;
		}
		spin_lock(&eventq->lock);
		veventq->num_events--;
		spin_unlock(&eventq->lock);
		done += cur->data_len;
		kfree(cur);
	}

	return done == 0 ? rc : done;
}

/* Common Event Queue Functions */

static __poll_t iommufd_eventq_fops_poll(struct file *filep,
					 struct poll_table_struct *wait)
{
	struct iommufd_eventq *eventq = filep->private_data;
	__poll_t pollflags = EPOLLOUT;
	__poll_t pollflags = 0;

	if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
		pollflags |= EPOLLOUT;

	poll_wait(filep, &eventq->wait_queue, wait);
	spin_lock(&eventq->lock);
@@ -388,3 +523,75 @@ int iommufd_fault_iopf_handler(struct iopf_group *group)

	return 0;
}

static const struct file_operations iommufd_veventq_fops =
	INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);

int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
{
	struct iommu_veventq_alloc *cmd = ucmd->cmd;
	struct iommufd_veventq *veventq;
	struct iommufd_viommu *viommu;
	int fdno;
	int rc;

	if (cmd->flags || cmd->__reserved ||
	    cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
		return -EOPNOTSUPP;
	if (!cmd->veventq_depth)
		return -EINVAL;

	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
	if (IS_ERR(viommu))
		return PTR_ERR(viommu);

	down_write(&viommu->veventqs_rwsem);

	if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
		rc = -EEXIST;
		goto out_unlock_veventqs;
	}

	veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
					 IOMMUFD_OBJ_VEVENTQ, common.obj);
	if (IS_ERR(veventq)) {
		rc = PTR_ERR(veventq);
		goto out_unlock_veventqs;
	}

	veventq->type = cmd->type;
	veventq->viommu = viommu;
	refcount_inc(&viommu->obj.users);
	veventq->depth = cmd->veventq_depth;
	list_add_tail(&veventq->node, &viommu->veventqs);
	veventq->lost_events_header.header.flags =
		IOMMU_VEVENTQ_FLAG_LOST_EVENTS;

	fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
				   ucmd->ictx, &iommufd_veventq_fops);
	if (fdno < 0) {
		rc = fdno;
		goto out_abort;
	}

	cmd->out_veventq_id = veventq->common.obj.id;
	cmd->out_veventq_fd = fdno;

	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
	if (rc)
		goto out_put_fdno;

	iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
	fd_install(fdno, veventq->common.filep);
	goto out_unlock_veventqs;

out_put_fdno:
	put_unused_fd(fdno);
	fput(veventq->common.filep);
out_abort:
	iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
out_unlock_veventqs:
	up_write(&viommu->veventqs_rwsem);
	iommufd_put_object(ucmd->ictx, &viommu->obj);
	return rc;
}
+82 −0
Original line number Diff line number Diff line
@@ -507,6 +507,74 @@ void iommufd_fault_iopf_disable(struct iommufd_device *idev);
void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
				  struct iommufd_attach_handle *handle);

/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
struct iommufd_vevent {
	struct iommufd_vevent_header header;
	struct list_head node; /* for iommufd_eventq::deliver */
	ssize_t data_len;
	u64 event_data[] __counted_by(data_len);
};

#define vevent_for_lost_events_header(vevent) \
	(vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)

/*
 * An iommufd_veventq object represents an interface to deliver vIOMMU events to
 * the user space. It is created/destroyed by the user space and associated with
 * a vIOMMU object during the allocations.
 */
struct iommufd_veventq {
	struct iommufd_eventq common;
	struct iommufd_viommu *viommu;
	struct list_head node; /* for iommufd_viommu::veventqs */
	struct iommufd_vevent lost_events_header;

	unsigned int type;
	unsigned int depth;

	/* Use common.lock for protection */
	u32 num_events;
	u32 sequence;
};

static inline struct iommufd_veventq *
eventq_to_veventq(struct iommufd_eventq *eventq)
{
	return container_of(eventq, struct iommufd_veventq, common);
}

static inline struct iommufd_veventq *
iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id)
{
	return container_of(iommufd_get_object(ucmd->ictx, id,
					       IOMMUFD_OBJ_VEVENTQ),
			    struct iommufd_veventq, common.obj);
}

int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd);
void iommufd_veventq_destroy(struct iommufd_object *obj);
void iommufd_veventq_abort(struct iommufd_object *obj);

static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq,
					  struct iommufd_vevent *vevent)
{
	struct iommufd_eventq *eventq = &veventq->common;

	lockdep_assert_held(&eventq->lock);

	/*
	 * Remove the lost_events_header and add the new node at the same time.
	 * Note the new node can be lost_events_header, for a sequence update.
	 */
	if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver))
		list_del(&veventq->lost_events_header.node);
	list_add_tail(&vevent->node, &eventq->deliver);
	vevent->header.sequence = veventq->sequence;
	veventq->sequence = (veventq->sequence + 1) & INT_MAX;

	wake_up_interruptible(&eventq->wait_queue);
}

static inline struct iommufd_viommu *
iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
{
@@ -515,6 +583,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
			    struct iommufd_viommu, obj);
}

static inline struct iommufd_veventq *
iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
{
	struct iommufd_veventq *veventq, *next;

	lockdep_assert_held(&viommu->veventqs_rwsem);

	list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) {
		if (veventq->type == type)
			return veventq;
	}
	return NULL;
}

int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
+7 −0
Original line number Diff line number Diff line
@@ -317,6 +317,7 @@ union ucmd_buffer {
	struct iommu_ioas_unmap unmap;
	struct iommu_option option;
	struct iommu_vdevice_alloc vdev;
	struct iommu_veventq_alloc veventq;
	struct iommu_vfio_ioas vfio_ioas;
	struct iommu_viommu_alloc viommu;
#ifdef CONFIG_IOMMUFD_TEST
@@ -372,6 +373,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
	IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
	IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl,
		 struct iommu_vdevice_alloc, virt_id),
	IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc,
		 struct iommu_veventq_alloc, out_veventq_fd),
	IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
		 __reserved),
	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
@@ -514,6 +517,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
	[IOMMUFD_OBJ_VDEVICE] = {
		.destroy = iommufd_vdevice_destroy,
	},
	[IOMMUFD_OBJ_VEVENTQ] = {
		.destroy = iommufd_veventq_destroy,
		.abort = iommufd_veventq_abort,
	},
	[IOMMUFD_OBJ_VIOMMU] = {
		.destroy = iommufd_viommu_destroy,
	},
+2 −0
Original line number Diff line number Diff line
@@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
	viommu->ictx = ucmd->ictx;
	viommu->hwpt = hwpt_paging;
	refcount_inc(&viommu->hwpt->common.obj.users);
	INIT_LIST_HEAD(&viommu->veventqs);
	init_rwsem(&viommu->veventqs_rwsem);
	/*
	 * It is the most likely case that a physical IOMMU is unpluggable. A
	 * pluggable IOMMU instance (if exists) is responsible for refcounting
+3 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ enum iommufd_object_type {
	IOMMUFD_OBJ_FAULT,
	IOMMUFD_OBJ_VIOMMU,
	IOMMUFD_OBJ_VDEVICE,
	IOMMUFD_OBJ_VEVENTQ,
#ifdef CONFIG_IOMMUFD_TEST
	IOMMUFD_OBJ_SELFTEST,
#endif
@@ -93,6 +94,8 @@ struct iommufd_viommu {
	const struct iommufd_viommu_ops *ops;

	struct xarray vdevs;
	struct list_head veventqs;
	struct rw_semaphore veventqs_rwsem;

	unsigned int type;
};
Loading