Unverified Commit 0f46d81f authored by Miklos Szeredi's avatar Miklos Szeredi Committed by Christian Brauner
Browse files

fanotify: notify on mount attach and detach



Add notifications for attaching and detaching mounts.  The following new
event masks are added:

  FAN_MNT_ATTACH  - Mount was attached
  FAN_MNT_DETACH  - Mount was detached

If a mount is moved, then the event is reported with (FAN_MNT_ATTACH |
FAN_MNT_DETACH).

These events add an info record of type FAN_EVENT_INFO_TYPE_MNT containing
these fields identifying the affected mounts:

  __u64 mnt_id    - the ID of the mount (see statmount(2))

FAN_REPORT_MNT must be supplied to fanotify_init() to receive these events
and no other type of event can be received with this report type.

Marks are added with FAN_MARK_MNTNS, which records the mount namespace from
an nsfs file (e.g. /proc/self/ns/mnt).

Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20250129165803.72138-3-mszeredi@redhat.com


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent b944249b
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -181,3 +181,5 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
	return container_of(ns, struct mnt_namespace, ns);
}

struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
+11 −3
Original line number Diff line number Diff line
@@ -2145,16 +2145,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
	}
}

struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
{
	if (!is_mnt_ns_file(dentry))
		return NULL;

	return to_mnt_ns(get_proc_ns(dentry->d_inode));
}

static bool mnt_ns_loop(struct dentry *dentry)
{
	/* Could bind mounting the mount namespace inode cause a
	 * mount namespace loop?
	 */
	struct mnt_namespace *mnt_ns;
	if (!is_mnt_ns_file(dentry))
	struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);

	if (!mnt_ns)
		return false;

	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

+35 −3
Original line number Diff line number Diff line
@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
	case FANOTIFY_EVENT_TYPE_FS_ERROR:
		return fanotify_error_event_equal(FANOTIFY_EE(old),
						  FANOTIFY_EE(new));
	case FANOTIFY_EVENT_TYPE_MNT:
		return false;
	default:
		WARN_ON_ONCE(1);
	}
@@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
	pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
		 __func__, iter_info->report_mask, event_mask, data, data_type);

	if (!fid_mode) {
	if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
		if (data_type != FSNOTIFY_EVENT_MNT)
			return 0;
	} else if (!fid_mode) {
		/* Do we have path to open a file descriptor? */
		if (!path)
			return 0;
@@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
	return &pevent->fae;
}

static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
{
	struct fanotify_mnt_event *pevent;

	pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
	if (!pevent)
		return NULL;

	pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
	pevent->mnt_id = mnt_id;

	return &pevent->fae;
}

static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
							int data_type,
							gfp_t gfp)
@@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
					      fid_mode);
	struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
	const struct path *path = fsnotify_data_path(data, data_type);
	u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
	struct mem_cgroup *old_memcg;
	struct dentry *moved = NULL;
	struct inode *child = NULL;
@@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
						  moved, &hash, gfp);
	} else if (fid_mode) {
		event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
	} else {
	} else if (path) {
		event = fanotify_alloc_path_event(path, &hash, gfp);
	} else if (mnt_id) {
		event = fanotify_alloc_mnt_event(mnt_id, gfp);
	} else {
		WARN_ON_ONCE(1);
	}

	if (!event)
@@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
	BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
	BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);

	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);

	mask = fanotify_group_event_mask(group, iter_info, &match_mask,
					 mask, data, data_type, dir);
@@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
	mempool_free(fee, &group->fanotify_data.error_events_pool);
}

static void fanotify_free_mnt_event(struct fanotify_event *event)
{
	kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
}

static void fanotify_free_event(struct fsnotify_group *group,
				struct fsnotify_event *fsn_event)
{
@@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
	case FANOTIFY_EVENT_TYPE_FS_ERROR:
		fanotify_free_error_event(group, event);
		break;
	case FANOTIFY_EVENT_TYPE_MNT:
		fanotify_free_mnt_event(event);
		break;
	default:
		WARN_ON_ONCE(1);
	}
+18 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
extern struct kmem_cache *fanotify_mnt_event_cachep;

/* Possible states of the permission event */
enum {
@@ -244,6 +245,7 @@ enum fanotify_event_type {
	FANOTIFY_EVENT_TYPE_PATH_PERM,
	FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
	FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
	FANOTIFY_EVENT_TYPE_MNT,
	__FANOTIFY_EVENT_TYPE_NUM
};

@@ -409,12 +411,23 @@ struct fanotify_path_event {
	struct path path;
};

struct fanotify_mnt_event {
	struct fanotify_event fae;
	u64 mnt_id;
};

static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
	return container_of(event, struct fanotify_path_event, fae);
}

static inline struct fanotify_mnt_event *
FANOTIFY_ME(struct fanotify_event *event)
{
	return container_of(event, struct fanotify_mnt_event, fae);
}

/*
 * Structure for permission fanotify events. It gets allocated and freed in
 * fanotify_handle_event() since we wait there for user response. When the
@@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
	return mask & FAN_FS_ERROR;
}

static inline bool fanotify_is_mnt_event(u32 mask)
{
	return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
}

static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
	if (event->type == FANOTIFY_EVENT_TYPE_PATH)
+75 −14
Original line number Diff line number Diff line
@@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;

#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
@@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
	(sizeof(struct fanotify_event_info_error))
#define FANOTIFY_RANGE_INFO_LEN \
	(sizeof(struct fanotify_event_info_range))
#define FANOTIFY_MNT_INFO_LEN \
	(sizeof(struct fanotify_event_info_mnt))

static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
		fh_len = fanotify_event_object_fh_len(event);
		event_len += fanotify_fid_info_len(fh_len, dot_len);
	}
	if (fanotify_is_mnt_event(event->mask))
		event_len += FANOTIFY_MNT_INFO_LEN;

	if (info_mode & FAN_REPORT_PIDFD)
		event_len += FANOTIFY_PIDFD_INFO_LEN;
@@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
	return -ENOENT;
}

static size_t copy_mnt_info_to_user(struct fanotify_event *event,
				    char __user *buf, int count)
{
	struct fanotify_event_info_mnt info = { };

	info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
	info.hdr.len = FANOTIFY_MNT_INFO_LEN;

	if (WARN_ON(count < info.hdr.len))
		return -EFAULT;

	info.mnt_id = FANOTIFY_ME(event)->mnt_id;

	if (copy_to_user(buf, &info, sizeof(info)))
		return -EFAULT;

	return info.hdr.len;
}

static size_t copy_error_info_to_user(struct fanotify_event *event,
				      char __user *buf, int count)
{
@@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
		total_bytes += ret;
	}

	if (fanotify_is_mnt_event(event->mask)) {
		ret = copy_mnt_info_to_user(event, buf, count);
		if (ret < 0)
			return ret;
		buf += ret;
		count -= ret;
		total_bytes += ret;
	}

	return total_bytes;
}

@@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
	if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
		return -EINVAL;

	/* Don't allow mixing mnt events with inode events for now */
	if (flags & FAN_REPORT_MNT) {
		if (class != FAN_CLASS_NOTIF)
			return -EINVAL;
		if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR))
			return -EINVAL;
	}

	if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
		return -EINVAL;

@@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
			    int dfd, const char  __user *pathname)
{
	struct inode *inode = NULL;
	struct vfsmount *mnt = NULL;
	struct fsnotify_group *group;
	struct path path;
	struct fan_fsid __fsid, *fsid = NULL;
@@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
	unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
	unsigned int obj_type, fid_mode;
	void *obj;
	void *obj = NULL;
	u32 umask = 0;
	int ret;

@@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	case FAN_MARK_FILESYSTEM:
		obj_type = FSNOTIFY_OBJ_TYPE_SB;
		break;
	case FAN_MARK_MNTNS:
		obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
		break;
	default:
		return -EINVAL;
	}
@@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		return -EINVAL;
	group = fd_file(f)->private_data;

	/* Only report mount events on mnt namespace */
	if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
		if (mask & ~FANOTIFY_MOUNT_EVENTS)
			return -EINVAL;
		if (mark_type != FAN_MARK_MNTNS)
			return -EINVAL;
	} else {
		if (mask & FANOTIFY_MOUNT_EVENTS)
			return -EINVAL;
		if (mark_type == FAN_MARK_MNTNS)
			return -EINVAL;
	}

	/*
	 * An unprivileged user is not allowed to setup mount nor filesystem
	 * marks.  This also includes setting up such marks by a group that
@@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	 * point.
	 */
	fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) &&
	    (!fid_mode || mark_type == FAN_MARK_MOUNT))
		return -EINVAL;

@@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	}

	/* inode held in place by reference to path; group by fget on fd */
	if (mark_type == FAN_MARK_INODE) {
	if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
		inode = path.dentry->d_inode;
		obj = inode;
	} else {
		mnt = path.mnt;
		if (mark_type == FAN_MARK_MOUNT)
			obj = mnt;
		else
			obj = mnt->mnt_sb;
	} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
		obj = path.mnt;
	} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
		obj = path.mnt->mnt_sb;
	} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
		obj = mnt_ns_from_dentry(path.dentry);
	}

	ret = -EINVAL;
	if (!obj)
		goto path_put_and_out;

	/*
	 * If some other task has this inode open for write we should not add
	 * an ignore mask, unless that ignore mask is supposed to survive
@@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	 */
	if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
		ret = mnt ? -EINVAL : -EISDIR;
		ret = !inode ? -EINVAL : -EISDIR;
		/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
		if (ignore == FAN_MARK_IGNORE &&
		    (mnt || S_ISDIR(inode->i_mode)))
		    (!inode || S_ISDIR(inode->i_mode)))
			goto path_put_and_out;

		ret = 0;
@@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
	}

	/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
	if (mnt || !S_ISDIR(inode->i_mode)) {
	if (!inode || !S_ISDIR(inode->i_mode)) {
		mask &= ~FAN_EVENT_ON_CHILD;
		umask = FAN_EVENT_ON_CHILD;
		/*
@@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
				     FANOTIFY_DEFAULT_MAX_USER_MARKS);

	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);

	fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
@@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
		fanotify_perm_event_cachep =
			KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
	}
	fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);

	fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
	init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =
Loading