Commit 7063c229 authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "fhandle: expose u64 mount id to name_to_handle_at(2)"

Aleksa Sarai <cyphar@cyphar.com> says:

Now that we provide a unique 64-bit mount ID interface in statx(2), we
can now provide a race-free way for name_to_handle_at(2) to provide a
file handle and corresponding mount without needing to worry about
racing with /proc/mountinfo parsing or having to open a file just to do
statx(2).

While this is not necessary if you are using AT_EMPTY_PATH and don't
care about an extra statx(2) call, users that pass full paths into
name_to_handle_at(2) need to know which mount the file handle comes from
(to make sure they don't try to open_by_handle_at a file handle from a
different filesystem) and switching to AT_EMPTY_PATH would require
allocating a file for every name_to_handle_at(2) call, turning

  err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid,
                          AT_HANDLE_MNT_ID_UNIQUE);

into

  int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC);
  err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH);
  err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf);
  mntid = statxbuf.stx_mnt_id;
  close(fd);

Also, this series adds a patch to clarify how AT_* flag allocation
should work going forwards.

* patches from https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-0-10c2c4c16708@cyphar.com:
  fhandle: expose u64 mount id to name_to_handle_at(2)
  uapi: explain how per-syscall AT_* flags should be allocated

Link: https://lore.kernel.org/r/20240828-exportfs-u64-mount-id-v3-0-10c2c4c16708@cyphar.com


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 5c40e050 4356d575
Loading
Loading
Loading
Loading
+22 −7
Original line number Diff line number Diff line
@@ -16,7 +16,8 @@

static long do_sys_name_to_handle(const struct path *path,
				  struct file_handle __user *ufh,
				  int __user *mnt_id, int fh_flags)
				  void __user *mnt_id, bool unique_mntid,
				  int fh_flags)
{
	long retval;
	struct file_handle f_handle;
@@ -69,7 +70,17 @@ static long do_sys_name_to_handle(const struct path *path,
	} else
		retval = 0;
	/* copy the mount id */
	if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
	if (unique_mntid) {
		if (put_user(real_mount(path->mnt)->mnt_id_unique,
			     (u64 __user *) mnt_id))
			retval = -EFAULT;
	} else {
		if (put_user(real_mount(path->mnt)->mnt_id,
			     (int __user *) mnt_id))
			retval = -EFAULT;
	}
	/* copy the handle */
	if (retval != -EFAULT &&
		copy_to_user(ufh, handle,
			     struct_size(handle, f_handle, handle_bytes)))
		retval = -EFAULT;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
 * @name: name that should be converted to handle.
 * @handle: resulting file handle
 * @mnt_id: mount id of the file system containing the file
 *          (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
 * @flag: flag value to indicate whether to follow symlink or not
 *        and whether a decodable file handle is required.
 *
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
 * value required.
 */
SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
		struct file_handle __user *, handle, int __user *, mnt_id,
		struct file_handle __user *, handle, void __user *, mnt_id,
		int, flag)
{
	struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
	int fh_flags;
	int err;

	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
		     AT_HANDLE_MNT_ID_UNIQUE))
		return -EINVAL;

	lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
		lookup_flags |= LOOKUP_EMPTY;
	err = user_path_at(dfd, name, lookup_flags, &path);
	if (!err) {
		err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
		err = do_sys_name_to_handle(&path, handle, mnt_id,
					    flag & AT_HANDLE_MNT_ID_UNIQUE,
					    fh_flags);
		path_put(&path);
	}
	return err;
+1 −1
Original line number Diff line number Diff line
@@ -870,7 +870,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
#endif
asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
				      struct file_handle __user *handle,
				      int __user *mnt_id, int flag);
				      void __user *mnt_id, int flag);
asmlinkage long sys_open_by_handle_at(int mountdirfd,
				      struct file_handle __user *handle,
				      int flags);
+57 −24
Original line number Diff line number Diff line
@@ -90,26 +90,27 @@
#define DN_ATTRIB	0x00000020	/* File changed attibutes */
#define DN_MULTISHOT	0x80000000	/* Don't remove notifier */

#define AT_FDCWD		-100    /* Special value for dirfd used to
					   indicate openat should use the
					   current working directory. */


/* Generic flags for the *at(2) family of syscalls. */

/* Reserved for per-syscall flags	0xff. */
#define AT_SYMLINK_NOFOLLOW		0x100   /* Do not follow symbolic
						   links. */
/* Reserved for per-syscall flags	0x200 */
#define AT_SYMLINK_FOLLOW		0x400   /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT			0x800	/* Suppress terminal automount
						   traversal. */
#define AT_EMPTY_PATH			0x1000	/* Allow empty relative
						   pathname to operate on dirfd
						   directly. */
/*
 * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
 * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
 * unlinkat.  The two functions do completely different things and therefore,
 * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
 * faccessat would be undefined behavior and thus treating it equivalent to
 * AT_EACCESS is valid undefined behavior.
 * These flags are currently statx(2)-specific, but they could be made generic
 * in the future and so they should not be used for other per-syscall flags.
 */
#define AT_FDCWD		-100    /* Special value used to indicate
                                           openat should use the current
                                           working directory. */
#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
#define AT_EACCESS		0x200	/* Test access permitted for
                                           effective IDs, not real IDs.  */
#define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                           unlinking file.  */
#define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
#define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */

#define AT_STATX_SYNC_TYPE		0x6000	/* Type of synchronisation required from statx() */
#define AT_STATX_SYNC_AS_STAT		0x0000	/* - Do whatever stat() does */
#define AT_STATX_FORCE_SYNC		0x2000	/* - Force the attributes to be sync'd with the server */
@@ -117,10 +118,42 @@

#define AT_RECURSIVE			0x8000	/* Apply to the entire subtree */

/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
#define AT_HANDLE_FID		AT_REMOVEDIR	/* file handle is needed to
					compare object identity and may not
					be usable to open_by_handle_at(2) */
/*
 * Per-syscall flags for the *at(2) family of syscalls.
 *
 * These are flags that are so syscall-specific that a user passing these flags
 * to the wrong syscall is so "clearly wrong" that we can safely call such
 * usage "undefined behaviour".
 *
 * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value.
 * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful
 * only to unlinkat. The two functions do completely different things and
 * therefore, the flags can be allowed to overlap. For example, passing
 * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it
 * equivalent to AT_EACCESS is valid undefined behavior.
 *
 * Note for implementers: When picking a new per-syscall AT_* flag, try to
 * reuse already existing flags first. This leaves us with as many unused bits
 * as possible, so we can use them for generic bits in the future if necessary.
 */

/* Flags for renameat2(2) (must match legacy RENAME_* flags). */
#define AT_RENAME_NOREPLACE	0x0001
#define AT_RENAME_EXCHANGE	0x0002
#define AT_RENAME_WHITEOUT	0x0004

/* Flag for faccessat(2). */
#define AT_EACCESS		0x200	/* Test access permitted for
                                           effective IDs, not real IDs.  */
/* Flag for unlinkat(2). */
#define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                           unlinking file.  */
/* Flags for name_to_handle_at(2). */
#define AT_HANDLE_FID		0x200	/* File handle is needed to compare
					   object identity and may not be
					   usable with open_by_handle_at(2). */
#define AT_HANDLE_MNT_ID_UNIQUE	0x001	/* Return the u64 unique mount ID. */

#if defined(__KERNEL__)
#define AT_GETATTR_NOSEC	0x80000000
#endif