Commit 16cec0d2 authored by Pasha Tatashin's avatar Pasha Tatashin Committed by Andrew Morton
Browse files

liveupdate: luo_session: add ioctls for file preservation

Introducing the userspace interface and internal logic required to manage
the lifecycle of file descriptors within a session.  Previously, a session
was merely a container; this change makes it a functional management unit.

The following capabilities are added:

A new set of ioctl commands are added, which operate on the file
descriptor returned by CREATE_SESSION. This allows userspace to:
- LIVEUPDATE_SESSION_PRESERVE_FD: Add a file descriptor to a session
  to be preserved across the live update.
- LIVEUPDATE_SESSION_RETRIEVE_FD: Retrieve a preserved file in the
  new kernel using its unique token.
- LIVEUPDATE_SESSION_FINISH: finish session

The session's .release handler is enhanced to be state-aware.  When a
session's file descriptor is closed, it correctly unpreserves the session
based on its current state before freeing all associated file resources.

Link: https://lkml.kernel.org/r/20251125165850.3389713-8-pasha.tatashin@soleen.com


Signed-off-by: default avatarPasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: default avatarPratyush Yadav <pratyush@kernel.org>
Reviewed-by: default avatarMike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: default avatarDavid Matlack <dmatlack@google.com>
Cc: Aleksander Lobakin <aleksander.lobakin@intel.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: anish kumar <yesanishhere@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chanwoo Choi <cw00.choi@samsung.com>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Wagner <wagi@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Jeffery <djeffery@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guixin Liu <kanie@linux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Matthew Maurer <mmaurer@google.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Myugnjoo Ham <myungjoo.ham@samsung.com>
Cc: Parav Pandit <parav@nvidia.com>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Song Liu <song@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: William Tu <witu@nvidia.com>
Cc: Yoann Congal <yoann.congal@smile.fr>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Cc: Zijun Hu <quic_zijuhu@quicinc.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 7c722a7f
Loading
Loading
Loading
Loading
+103 −0
Original line number Diff line number Diff line
@@ -53,6 +53,14 @@ enum {
	LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01,
};

/* ioctl commands for session file descriptors */
enum {
	LIVEUPDATE_CMD_SESSION_BASE = 0x40,
	LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE,
	LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41,
	LIVEUPDATE_CMD_SESSION_FINISH = 0x42,
};

/**
 * struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION)
 * @size:	Input; sizeof(struct liveupdate_ioctl_create_session)
@@ -110,4 +118,99 @@ struct liveupdate_ioctl_retrieve_session {
#define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \
	_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION)

/* Session specific IOCTLs */

/**
 * struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD)
 * @size:  Input; sizeof(struct liveupdate_session_preserve_fd)
 * @fd:    Input; The user-space file descriptor to be preserved.
 * @token: Input; An opaque, unique token for preserved resource.
 *
 * Holds parameters for preserving a file descriptor.
 *
 * User sets the @fd field identifying the file descriptor to preserve
 * (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type
 * and its dependencies are supported for preservation. If validation passes,
 * the kernel marks the FD internally and *initiates the process* of preparing
 * its state for saving. The actual snapshotting of the state typically occurs
 * during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though
 * some finalization might occur during freeze.
 * On successful validation and initiation, the kernel uses the @token
 * field with an opaque identifier representing the resource being preserved.
 * This token confirms the FD is targeted for preservation and is required for
 * the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update.
 *
 * Return: 0 on success (validation passed, preservation initiated), negative
 * error code on failure (e.g., unsupported FD type, dependency issue,
 * validation failed).
 */
struct liveupdate_session_preserve_fd {
	__u32		size;
	__s32		fd;
	__aligned_u64	token;
};

#define LIVEUPDATE_SESSION_PRESERVE_FD					\
	_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD)

/**
 * struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD)
 * @size:  Input; sizeof(struct liveupdate_session_retrieve_fd)
 * @fd:    Output; The new file descriptor representing the fully restored
 *         kernel resource.
 * @token: Input; An opaque, token that was used to preserve the resource.
 *
 * Retrieve a previously preserved file descriptor.
 *
 * User sets the @token field to the value obtained from a successful
 * %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success,
 * the kernel restores the state (saved during the PREPARE/FREEZE phases)
 * associated with the token and populates the @fd field with a new file
 * descriptor referencing the restored resource in the current (new) kernel.
 * This operation must be performed *before* signaling completion via
 * %LIVEUPDATE_IOCTL_FINISH.
 *
 * Return: 0 on success, negative error code on failure (e.g., invalid token).
 */
struct liveupdate_session_retrieve_fd {
	__u32		size;
	__s32		fd;
	__aligned_u64	token;
};

#define LIVEUPDATE_SESSION_RETRIEVE_FD					\
	_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD)

/**
 * struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH)
 * @size:     Input; sizeof(struct liveupdate_session_finish)
 * @reserved: Input; Must be zero. Reserved for future use.
 *
 * Signals the completion of the restoration process for a retrieved session.
 * This is the final operation that should be performed on a session file
 * descriptor after a live update.
 *
 * This ioctl must be called once all required file descriptors for the session
 * have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and
 * are fully restored from the userspace and kernel perspective.
 *
 * Upon success, the kernel releases its ownership of the preserved resources
 * associated with this session. This allows internal resources to be freed,
 * typically by decrementing reference counts on the underlying preserved
 * objects.
 *
 * If this operation fails, the resources remain preserved in memory. Userspace
 * may attempt to call finish again. The resources will otherwise be reset
 * during the next live update cycle.
 *
 * Return: 0 on success, negative error code on failure.
 */
struct liveupdate_session_finish {
	__u32		size;
	__u32		reserved;
};

#define LIVEUPDATE_SESSION_FINISH					\
	_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH)

#endif /* _UAPI_LIVEUPDATE_H */
+185 −2
Original line number Diff line number Diff line
@@ -125,6 +125,8 @@ static struct luo_session *luo_session_alloc(const char *name)
		return ERR_PTR(-ENOMEM);

	strscpy(session->name, name, sizeof(session->name));
	INIT_LIST_HEAD(&session->file_set.files_list);
	luo_file_set_init(&session->file_set);
	INIT_LIST_HEAD(&session->list);
	mutex_init(&session->mutex);

@@ -133,6 +135,7 @@ static struct luo_session *luo_session_alloc(const char *name)

static void luo_session_free(struct luo_session *session)
{
	luo_file_set_destroy(&session->file_set);
	mutex_destroy(&session->mutex);
	kfree(session);
}
@@ -177,16 +180,46 @@ static void luo_session_remove(struct luo_session_header *sh,
	sh->count--;
}

static int luo_session_finish_one(struct luo_session *session)
{
	guard(mutex)(&session->mutex);
	return luo_file_finish(&session->file_set);
}

static void luo_session_unfreeze_one(struct luo_session *session,
				     struct luo_session_ser *ser)
{
	guard(mutex)(&session->mutex);
	luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
}

static int luo_session_freeze_one(struct luo_session *session,
				  struct luo_session_ser *ser)
{
	guard(mutex)(&session->mutex);
	return luo_file_freeze(&session->file_set, &ser->file_set_ser);
}

static int luo_session_release(struct inode *inodep, struct file *filep)
{
	struct luo_session *session = filep->private_data;
	struct luo_session_header *sh;

	/* If retrieved is set, it means this session is from incoming list */
	if (session->retrieved)
	if (session->retrieved) {
		int err = luo_session_finish_one(session);

		if (err) {
			pr_warn("Unable to finish session [%s] on release\n",
				session->name);
			return err;
		}
		sh = &luo_session_global.incoming;
	else
	} else {
		scoped_guard(mutex, &session->mutex)
			luo_file_unpreserve_files(&session->file_set);
		sh = &luo_session_global.outgoing;
	}

	luo_session_remove(sh, session);
	luo_session_free(session);
@@ -194,9 +227,140 @@ static int luo_session_release(struct inode *inodep, struct file *filep)
	return 0;
}

static int luo_session_preserve_fd(struct luo_session *session,
				   struct luo_ucmd *ucmd)
{
	struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
	int err;

	guard(mutex)(&session->mutex);
	err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
	if (err)
		return err;

	err = luo_ucmd_respond(ucmd, sizeof(*argp));
	if (err)
		pr_warn("The file was successfully preserved, but response to user failed\n");

	return err;
}

static int luo_session_retrieve_fd(struct luo_session *session,
				   struct luo_ucmd *ucmd)
{
	struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
	struct file *file;
	int err;

	argp->fd = get_unused_fd_flags(O_CLOEXEC);
	if (argp->fd < 0)
		return argp->fd;

	guard(mutex)(&session->mutex);
	err = luo_retrieve_file(&session->file_set, argp->token, &file);
	if (err < 0)
		goto  err_put_fd;

	err = luo_ucmd_respond(ucmd, sizeof(*argp));
	if (err)
		goto err_put_file;

	fd_install(argp->fd, file);

	return 0;

err_put_file:
	fput(file);
err_put_fd:
	put_unused_fd(argp->fd);

	return err;
}

static int luo_session_finish(struct luo_session *session,
			      struct luo_ucmd *ucmd)
{
	struct liveupdate_session_finish *argp = ucmd->cmd;
	int err = luo_session_finish_one(session);

	if (err)
		return err;

	return luo_ucmd_respond(ucmd, sizeof(*argp));
}

union ucmd_buffer {
	struct liveupdate_session_finish finish;
	struct liveupdate_session_preserve_fd preserve;
	struct liveupdate_session_retrieve_fd retrieve;
};

struct luo_ioctl_op {
	unsigned int size;
	unsigned int min_size;
	unsigned int ioctl_num;
	int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
};

#define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
	[_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = {                    \
		.size = sizeof(_struct) +                                      \
			BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
					  sizeof(_struct)),                    \
		.min_size = offsetofend(_struct, _last),                       \
		.ioctl_num = _ioctl,                                           \
		.execute = _fn,                                                \
	}

static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
	IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
		 struct liveupdate_session_finish, reserved),
	IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
		 struct liveupdate_session_preserve_fd, token),
	IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
		 struct liveupdate_session_retrieve_fd, token),
};

static long luo_session_ioctl(struct file *filep, unsigned int cmd,
			      unsigned long arg)
{
	struct luo_session *session = filep->private_data;
	const struct luo_ioctl_op *op;
	struct luo_ucmd ucmd = {};
	union ucmd_buffer buf;
	unsigned int nr;
	int ret;

	nr = _IOC_NR(cmd);
	if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
	    ARRAY_SIZE(luo_session_ioctl_ops)) {
		return -EINVAL;
	}

	ucmd.ubuffer = (void __user *)arg;
	ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
	if (ret)
		return ret;

	op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
	if (op->ioctl_num != cmd)
		return -ENOIOCTLCMD;
	if (ucmd.user_size < op->min_size)
		return -EINVAL;

	ucmd.cmd = &buf;
	ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
				    ucmd.user_size);
	if (ret)
		return ret;

	return op->execute(session, &ucmd);
}

static const struct file_operations luo_session_fops = {
	.owner = THIS_MODULE,
	.release = luo_session_release,
	.unlocked_ioctl = luo_session_ioctl,
};

/* Create a "struct file" for session */
@@ -392,6 +556,11 @@ int luo_session_deserialize(void)
			luo_session_free(session);
			return err;
		}

		scoped_guard(mutex, &session->mutex) {
			luo_file_deserialize(&session->file_set,
					     &sh->ser[i].file_set_ser);
		}
	}

	kho_restore_free(sh->header_ser);
@@ -406,9 +575,14 @@ int luo_session_serialize(void)
	struct luo_session_header *sh = &luo_session_global.outgoing;
	struct luo_session *session;
	int i = 0;
	int err;

	guard(rwsem_write)(&sh->rwsem);
	list_for_each_entry(session, &sh->list, list) {
		err = luo_session_freeze_one(session, &sh->ser[i]);
		if (err)
			goto err_undo;

		strscpy(sh->ser[i].name, session->name,
			sizeof(sh->ser[i].name));
		i++;
@@ -416,6 +590,15 @@ int luo_session_serialize(void)
	sh->header_ser->count = sh->count;

	return 0;

err_undo:
	list_for_each_entry_continue_reverse(session, &sh->list, list) {
		i--;
		luo_session_unfreeze_one(session, &sh->ser[i]);
		memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
	}

	return err;
}

/**