Commit cdbb9978 authored by Elizabeth Figura's avatar Elizabeth Figura Committed by Greg Kroah-Hartman
Browse files

ntsync: Introduce NTSYNC_IOC_WAIT_ALL.



This is similar to NTSYNC_IOC_WAIT_ANY, but waits until all of the objects are
simultaneously signaled, and then acquires all of them as a single atomic
operation.

Because acquisition of multiple objects is atomic, some complex locking is
required. We cannot simply spin-lock multiple objects simultaneously, as that
may disable preëmption for a problematically long time.

Instead, modifying any object which may be involved in a wait-all operation takes
a device-wide sleeping mutex, "wait_all_lock", instead of the normal object
spinlock.

Because wait-for-all is a rare operation, in order to optimize wait-for-any,
this lock is only taken when necessary. "all_hint" is used to mark objects which
are involved in a wait-for-all operation, and if an object is not, only its
spinlock is taken.

The locking scheme used here was written by Peter Zijlstra.

Signed-off-by: default avatarElizabeth Figura <zfigura@codeweavers.com>
Link: https://lore.kernel.org/r/20241213193511.457338-5-zfigura@codeweavers.com


Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b4a7b5fe
Loading
Loading
Loading
Loading
+322 −14
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/ktime.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/overflow.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
@@ -41,6 +42,7 @@ enum ntsync_type {

struct ntsync_obj {
	spinlock_t lock;
	int dev_locked;

	enum ntsync_type type;

@@ -55,7 +57,30 @@ struct ntsync_obj {
		} sem;
	} u;

	/*
	 * any_waiters is protected by the object lock, but all_waiters is
	 * protected by the device wait_all_lock.
	 */
	struct list_head any_waiters;
	struct list_head all_waiters;

	/*
	 * Hint describing how many tasks are queued on this object in a
	 * wait-all operation.
	 *
	 * Any time we do a wake, we may need to wake "all" waiters as well as
	 * "any" waiters. In order to atomically wake "all" waiters, we must
	 * lock all of the objects, and that means grabbing the wait_all_lock
	 * below (and, due to lock ordering rules, before locking this object).
	 * However, wait-all is a rare operation, and grabbing the wait-all
	 * lock for every wake would create unnecessary contention.
	 * Therefore we first check whether all_hint is zero, and, if it is,
	 * we skip trying to wake "all" waiters.
	 *
	 * Since wait requests must originate from user-space threads, we're
	 * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
	 */
	atomic_t all_hint;
};

struct ntsync_q_entry {
@@ -75,19 +100,198 @@ struct ntsync_q {
	 */
	atomic_t signaled;

	bool all;
	__u32 count;
	struct ntsync_q_entry entries[];
};

struct ntsync_device {
	/*
	 * Wait-all operations must atomically grab all objects, and be totally
	 * ordered with respect to each other and wait-any operations.
	 * If one thread is trying to acquire several objects, another thread
	 * cannot touch the object at the same time.
	 *
	 * This device-wide lock is used to serialize wait-for-all
	 * operations, and operations on an object that is involved in a
	 * wait-for-all.
	 */
	struct mutex wait_all_lock;

	struct file *file;
};

/*
 * Single objects are locked using obj->lock.
 *
 * Multiple objects are 'locked' while holding dev->wait_all_lock.
 * In this case however, individual objects are not locked by holding
 * obj->lock, but by setting obj->dev_locked.
 *
 * This means that in order to lock a single object, the sequence is slightly
 * more complicated than usual. Specifically it needs to check obj->dev_locked
 * after acquiring obj->lock, if set, it needs to drop the lock and acquire
 * dev->wait_all_lock in order to serialize against the multi-object operation.
 */

static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
{
	lockdep_assert_held(&dev->wait_all_lock);
	lockdep_assert(obj->dev == dev);
	spin_lock(&obj->lock);
	/*
	 * By setting obj->dev_locked inside obj->lock, it is ensured that
	 * anyone holding obj->lock must see the value.
	 */
	obj->dev_locked = 1;
	spin_unlock(&obj->lock);
}

static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
{
	lockdep_assert_held(&dev->wait_all_lock);
	lockdep_assert(obj->dev == dev);
	spin_lock(&obj->lock);
	obj->dev_locked = 0;
	spin_unlock(&obj->lock);
}

static void obj_lock(struct ntsync_obj *obj)
{
	struct ntsync_device *dev = obj->dev;

	for (;;) {
		spin_lock(&obj->lock);
		if (likely(!obj->dev_locked))
			break;

		spin_unlock(&obj->lock);
		mutex_lock(&dev->wait_all_lock);
		spin_lock(&obj->lock);
		/*
		 * obj->dev_locked should be set and released under the same
		 * wait_all_lock section, since we now own this lock, it should
		 * be clear.
		 */
		lockdep_assert(!obj->dev_locked);
		spin_unlock(&obj->lock);
		mutex_unlock(&dev->wait_all_lock);
	}
}

static void obj_unlock(struct ntsync_obj *obj)
{
	spin_unlock(&obj->lock);
}

static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
{
	bool all;

	obj_lock(obj);
	all = atomic_read(&obj->all_hint);
	if (unlikely(all)) {
		obj_unlock(obj);
		mutex_lock(&dev->wait_all_lock);
		dev_lock_obj(dev, obj);
	}

	return all;
}

static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
{
	if (all) {
		dev_unlock_obj(dev, obj);
		mutex_unlock(&dev->wait_all_lock);
	} else {
		obj_unlock(obj);
	}
}

#define ntsync_assert_held(obj) \
	lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
			(obj)->dev_locked))

static bool is_signaled(struct ntsync_obj *obj)
{
	ntsync_assert_held(obj);

	switch (obj->type) {
	case NTSYNC_TYPE_SEM:
		return !!obj->u.sem.count;
	}

	WARN(1, "bad object type %#x\n", obj->type);
	return false;
}

/*
 * "locked_obj" is an optional pointer to an object which is already locked and
 * should not be locked again. This is necessary so that changing an object's
 * state and waking it can be a single atomic operation.
 */
static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
			 struct ntsync_obj *locked_obj)
{
	__u32 count = q->count;
	bool can_wake = true;
	int signaled = -1;
	__u32 i;

	lockdep_assert_held(&dev->wait_all_lock);
	if (locked_obj)
		lockdep_assert(locked_obj->dev_locked);

	for (i = 0; i < count; i++) {
		if (q->entries[i].obj != locked_obj)
			dev_lock_obj(dev, q->entries[i].obj);
	}

	for (i = 0; i < count; i++) {
		if (!is_signaled(q->entries[i].obj)) {
			can_wake = false;
			break;
		}
	}

	if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
		for (i = 0; i < count; i++) {
			struct ntsync_obj *obj = q->entries[i].obj;

			switch (obj->type) {
			case NTSYNC_TYPE_SEM:
				obj->u.sem.count--;
				break;
			}
		}
		wake_up_process(q->task);
	}

	for (i = 0; i < count; i++) {
		if (q->entries[i].obj != locked_obj)
			dev_unlock_obj(dev, q->entries[i].obj);
	}
}

static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
{
	struct ntsync_q_entry *entry;

	lockdep_assert_held(&dev->wait_all_lock);
	lockdep_assert(obj->dev_locked);

	list_for_each_entry(entry, &obj->all_waiters, node)
		try_wake_all(dev, entry->q, obj);
}

static void try_wake_any_sem(struct ntsync_obj *sem)
{
	struct ntsync_q_entry *entry;

	lockdep_assert_held(&sem->lock);
	ntsync_assert_held(sem);
	lockdep_assert(sem->type == NTSYNC_TYPE_SEM);

	list_for_each_entry(entry, &sem->any_waiters, node) {
		struct ntsync_q *q = entry->q;
@@ -111,7 +315,7 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count)
{
	__u32 sum;

	lockdep_assert_held(&sem->lock);
	ntsync_assert_held(sem);

	if (check_add_overflow(sem->u.sem.count, count, &sum) ||
	    sum > sem->u.sem.max)
@@ -123,9 +327,11 @@ static int release_sem_state(struct ntsync_obj *sem, __u32 count)

static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
{
	struct ntsync_device *dev = sem->dev;
	__u32 __user *user_args = argp;
	__u32 prev_count;
	__u32 args;
	bool all;
	int ret;

	if (copy_from_user(&args, argp, sizeof(args)))
@@ -134,14 +340,17 @@ static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
	if (sem->type != NTSYNC_TYPE_SEM)
		return -EINVAL;

	spin_lock(&sem->lock);
	all = ntsync_lock_obj(dev, sem);

	prev_count = sem->u.sem.count;
	ret = post_sem_state(sem, args);
	if (!ret)
	ret = release_sem_state(sem, args);
	if (!ret) {
		if (all)
			try_wake_all_obj(dev, sem);
		try_wake_any_sem(sem);
	}

	spin_unlock(&sem->lock);
	ntsync_unlock_obj(dev, sem, all);

	if (!ret && put_user(prev_count, user_args))
		ret = -EFAULT;
@@ -193,6 +402,8 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
	get_file(dev->file);
	spin_lock_init(&obj->lock);
	INIT_LIST_HEAD(&obj->any_waiters);
	INIT_LIST_HEAD(&obj->all_waiters);
	atomic_set(&obj->all_hint, 0);

	return obj;
}
@@ -301,7 +512,7 @@ static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_ar
 * Allocate and initialize the ntsync_q structure, but do not queue us yet.
 */
static int setup_wait(struct ntsync_device *dev,
		      const struct ntsync_wait_args *args,
		      const struct ntsync_wait_args *args, bool all,
		      struct ntsync_q **ret_q)
{
	const __u32 count = args->count;
@@ -324,6 +535,7 @@ static int setup_wait(struct ntsync_device *dev,
		return -ENOMEM;
	q->task = current;
	atomic_set(&q->signaled, -1);
	q->all = all;
	q->count = count;

	for (i = 0; i < count; i++) {
@@ -333,6 +545,16 @@ static int setup_wait(struct ntsync_device *dev,
		if (!obj)
			goto err;

		if (all) {
			/* Check that the objects are all distinct. */
			for (j = 0; j < i; j++) {
				if (obj == q->entries[j].obj) {
					put_obj(obj);
					goto err;
				}
			}
		}

		entry->obj = obj;
		entry->q = q;
		entry->index = i;
@@ -362,13 +584,14 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
	struct ntsync_wait_args args;
	struct ntsync_q *q;
	int signaled;
	bool all;
	__u32 i;
	int ret;

	if (copy_from_user(&args, argp, sizeof(args)))
		return -EFAULT;

	ret = setup_wait(dev, &args, &q);
	ret = setup_wait(dev, &args, false, &q);
	if (ret < 0)
		return ret;

@@ -378,9 +601,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
		struct ntsync_q_entry *entry = &q->entries[i];
		struct ntsync_obj *obj = entry->obj;

		spin_lock(&obj->lock);
		all = ntsync_lock_obj(dev, obj);
		list_add_tail(&entry->node, &obj->any_waiters);
		spin_unlock(&obj->lock);
		ntsync_unlock_obj(dev, obj, all);
	}

	/* check if we are already signaled */
@@ -391,9 +614,9 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
		if (atomic_read(&q->signaled) != -1)
			break;

		spin_lock(&obj->lock);
		all = ntsync_lock_obj(dev, obj);
		try_wake_any_obj(obj);
		spin_unlock(&obj->lock);
		ntsync_unlock_obj(dev, obj, all);
	}

	/* sleep */
@@ -406,13 +629,94 @@ static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
		struct ntsync_q_entry *entry = &q->entries[i];
		struct ntsync_obj *obj = entry->obj;

		spin_lock(&obj->lock);
		all = ntsync_lock_obj(dev, obj);
		list_del(&entry->node);
		spin_unlock(&obj->lock);
		ntsync_unlock_obj(dev, obj, all);

		put_obj(obj);
	}

	signaled = atomic_read(&q->signaled);
	if (signaled != -1) {
		struct ntsync_wait_args __user *user_args = argp;

		/* even if we caught a signal, we need to communicate success */
		ret = 0;

		if (put_user(signaled, &user_args->index))
			ret = -EFAULT;
	} else if (!ret) {
		ret = -ETIMEDOUT;
	}

	kfree(q);
	return ret;
}

static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
{
	struct ntsync_wait_args args;
	struct ntsync_q *q;
	int signaled;
	__u32 i;
	int ret;

	if (copy_from_user(&args, argp, sizeof(args)))
		return -EFAULT;

	ret = setup_wait(dev, &args, true, &q);
	if (ret < 0)
		return ret;

	/* queue ourselves */

	mutex_lock(&dev->wait_all_lock);

	for (i = 0; i < args.count; i++) {
		struct ntsync_q_entry *entry = &q->entries[i];
		struct ntsync_obj *obj = entry->obj;

		atomic_inc(&obj->all_hint);

		/*
		 * obj->all_waiters is protected by dev->wait_all_lock rather
		 * than obj->lock, so there is no need to acquire obj->lock
		 * here.
		 */
		list_add_tail(&entry->node, &obj->all_waiters);
	}

	/* check if we are already signaled */

	try_wake_all(dev, q, NULL);

	mutex_unlock(&dev->wait_all_lock);

	/* sleep */

	ret = ntsync_schedule(q, &args);

	/* and finally, unqueue */

	mutex_lock(&dev->wait_all_lock);

	for (i = 0; i < args.count; i++) {
		struct ntsync_q_entry *entry = &q->entries[i];
		struct ntsync_obj *obj = entry->obj;

		/*
		 * obj->all_waiters is protected by dev->wait_all_lock rather
		 * than obj->lock, so there is no need to acquire it here.
		 */
		list_del(&entry->node);

		atomic_dec(&obj->all_hint);

		put_obj(obj);
	}

	mutex_unlock(&dev->wait_all_lock);

	signaled = atomic_read(&q->signaled);
	if (signaled != -1) {
		struct ntsync_wait_args __user *user_args = argp;
@@ -438,6 +742,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file)
	if (!dev)
		return -ENOMEM;

	mutex_init(&dev->wait_all_lock);

	file->private_data = dev;
	dev->file = file;
	return nonseekable_open(inode, file);
@@ -461,6 +767,8 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
	switch (cmd) {
	case NTSYNC_IOC_CREATE_SEM:
		return ntsync_create_sem(dev, argp);
	case NTSYNC_IOC_WAIT_ALL:
		return ntsync_wait_all(dev, argp);
	case NTSYNC_IOC_WAIT_ANY:
		return ntsync_wait_any(dev, argp);
	default:
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ struct ntsync_wait_args {

#define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
#define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
#define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)

#define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)