Merge patch series "mount notification" (2cc0b7fd) · Commits · git / linux-net

fs/mount.h

+26 −0

Original line number	Diff line number	Diff line
		@@ -5,6 +5,8 @@
		#include <linux/ns_common.h>
		#include <linux/fs_pin.h>

		extern struct list_head notify_list;

		struct mnt_namespace {
		struct ns_common ns;
		struct mount * root;
		@@ -21,6 +23,10 @@ struct mnt_namespace {
		struct rcu_head mnt_ns_rcu;
		};
		u64 event;
		#ifdef CONFIG_FSNOTIFY
		__u32 n_fsnotify_mask;
		struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
		#endif
		unsigned int nr_mounts; /* # of mounts in the namespace */
		unsigned int pending_mounts;
		struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
		@@ -76,6 +82,8 @@ struct mount {
		#ifdef CONFIG_FSNOTIFY
		struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
		__u32 mnt_fsnotify_mask;
		struct list_head to_notify; /* need to queue notification */
		struct mnt_namespace prev_ns; / previous namespace (NULL if none) */
		#endif
		int mnt_id; /* mount identifier, reused */
		u64 mnt_id_unique; /* mount ID unique until reboot */
		@@ -177,3 +185,21 @@ static inline struct mnt_namespace to_mnt_ns(struct ns_common ns)
		{
		return container_of(ns, struct mnt_namespace, ns);
		}

		#ifdef CONFIG_FSNOTIFY
		static inline void mnt_notify_add(struct mount *m)
		{
		/* Optimize the case where there are no watches */
		if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) \|\|
		(m->prev_ns && m->prev_ns->n_fsnotify_marks))
		list_add_tail(&m->to_notify, &notify_list);
		else
		m->prev_ns = m->mnt_ns;
		}
		#else
		static inline void mnt_notify_add(struct mount *m)
		{
		}
		#endif

		struct mnt_namespace mnt_ns_from_dentry(struct dentry dentry);

fs/namespace.c

+89 −4

Original line number	Diff line number	Diff line
		@@ -81,6 +81,9 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
		static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
		static DEFINE_SEQLOCK(mnt_ns_tree_lock);

		#ifdef CONFIG_FSNOTIFY
		LIST_HEAD(notify_list); /* protected by namespace_sem */
		#endif
		static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
		static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */

		@@ -163,6 +166,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
		{
		/* keep alive for {list,stat}mount() */
		if (refcount_dec_and_test(&ns->passive)) {
		fsnotify_mntns_delete(ns);
		put_user_ns(ns->user_ns);
		kfree(ns);
		}
		@@ -1176,6 +1180,8 @@ static void mnt_add_to_ns(struct mnt_namespace ns, struct mount mnt)
		ns->mnt_first_node = &mnt->mnt_node;
		rb_link_node(&mnt->mnt_node, parent, link);
		rb_insert_color(&mnt->mnt_node, &ns->mounts);

		mnt_notify_add(mnt);
		}

		/*
		@@ -1723,6 +1729,50 @@ int may_umount(struct vfsmount *mnt)

		EXPORT_SYMBOL(may_umount);

		#ifdef CONFIG_FSNOTIFY
		static void mnt_notify(struct mount *p)
		{
		if (!p->prev_ns && p->mnt_ns) {
		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
		} else if (p->prev_ns && !p->mnt_ns) {
		fsnotify_mnt_detach(p->prev_ns, &p->mnt);
		} else if (p->prev_ns == p->mnt_ns) {
		fsnotify_mnt_move(p->mnt_ns, &p->mnt);
		} else {
		fsnotify_mnt_detach(p->prev_ns, &p->mnt);
		fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
		}
		p->prev_ns = p->mnt_ns;
		}

		static void notify_mnt_list(void)
		{
		struct mount m, tmp;
		/*
		* Notify about mounts that were added/reparented/detached/remain
		* connected after unmount.
		*/
		list_for_each_entry_safe(m, tmp, &notify_list, to_notify) {
		mnt_notify(m);
		list_del_init(&m->to_notify);
		}
		}

		static bool need_notify_mnt_list(void)
		{
		return !list_empty(&notify_list);
		}
		#else
		static void notify_mnt_list(void)
		{
		}

		static bool need_notify_mnt_list(void)
		{
		return false;
		}
		#endif

		static void namespace_unlock(void)
		{
		struct hlist_head head;
		@@ -1733,7 +1783,18 @@ static void namespace_unlock(void)
		hlist_move_list(&unmounted, &head);
		list_splice_init(&ex_mountpoints, &list);

		if (need_notify_mnt_list()) {
		/*
		* No point blocking out concurrent readers while notifications
		* are sent. This will also allow statmount()/listmount() to run
		* concurrently.
		*/
		downgrade_write(&namespace_sem);
		notify_mnt_list();
		up_read(&namespace_sem);
		} else {
		up_write(&namespace_sem);
		}

		shrink_dentry_list(&list);

		@@ -1846,6 +1907,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
		change_mnt_propagation(p, MS_PRIVATE);
		if (disconnect)
		hlist_add_head(&p->mnt_umount, &unmounted);

		/*
		* At this point p->mnt_ns is NULL, notification will be queued
		* only if
		*
		* - p->prev_ns is non-NULL and
		* - p->prev_ns->n_fsnotify_marks is non-NULL
		*
		* This will preclude queuing the mount if this is a cleanup
		* after a failed copy_tree() or destruction of an anonymous
		* namespace, etc.
		*/
		mnt_notify_add(p);
		}
		}

		@@ -2145,16 +2219,24 @@ struct mnt_namespace get_sequential_mnt_ns(struct mnt_namespace mntns, bool pr
		}
		}

		struct mnt_namespace mnt_ns_from_dentry(struct dentry dentry)
		{
		if (!is_mnt_ns_file(dentry))
		return NULL;

		return to_mnt_ns(get_proc_ns(dentry->d_inode));
		}

		static bool mnt_ns_loop(struct dentry *dentry)
		{
		/* Could bind mounting the mount namespace inode cause a
		* mount namespace loop?
		*/
		struct mnt_namespace *mnt_ns;
		if (!is_mnt_ns_file(dentry))
		struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);

		if (!mnt_ns)
		return false;

		mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
		return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
		}

		@@ -2547,6 +2629,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
		dest_mp = smp;
		unhash_mnt(source_mnt);
		attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
		mnt_notify_add(source_mnt);
		touch_mnt_namespace(source_mnt->mnt_ns);
		} else {
		if (source_mnt->mnt_ns) {
		@@ -4468,6 +4551,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
		list_del_init(&new_mnt->mnt_expire);
		put_mountpoint(root_mp);
		unlock_mount_hash();
		mnt_notify_add(root_mnt);
		mnt_notify_add(new_mnt);
		chroot_fs_refs(&root, &new);
		error = 0;
		out4:

fs/notify/fanotify/fanotify.c

+35 −3

Original line number	Diff line number	Diff line
		@@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
		case FANOTIFY_EVENT_TYPE_FS_ERROR:
		return fanotify_error_event_equal(FANOTIFY_EE(old),
		FANOTIFY_EE(new));
		case FANOTIFY_EVENT_TYPE_MNT:
		return false;
		default:
		WARN_ON_ONCE(1);
		}
		@@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
		pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
		__func__, iter_info->report_mask, event_mask, data, data_type);

		if (!fid_mode) {
		if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
		if (data_type != FSNOTIFY_EVENT_MNT)
		return 0;
		} else if (!fid_mode) {
		/* Do we have path to open a file descriptor? */
		if (!path)
		return 0;
		@@ -557,6 +562,20 @@ static struct fanotify_event fanotify_alloc_path_event(const struct path path,
		return &pevent->fae;
		}

		static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
		{
		struct fanotify_mnt_event *pevent;

		pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
		if (!pevent)
		return NULL;

		pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
		pevent->mnt_id = mnt_id;

		return &pevent->fae;
		}

		static struct fanotify_event fanotify_alloc_perm_event(const void data,
		int data_type,
		gfp_t gfp)
		@@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
		fid_mode);
		struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
		const struct path *path = fsnotify_data_path(data, data_type);
		u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
		struct mem_cgroup *old_memcg;
		struct dentry *moved = NULL;
		struct inode *child = NULL;
		@@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
		moved, &hash, gfp);
		} else if (fid_mode) {
		event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
		} else {
		} else if (path) {
		event = fanotify_alloc_path_event(path, &hash, gfp);
		} else if (mnt_id) {
		event = fanotify_alloc_mnt_event(mnt_id, gfp);
		} else {
		WARN_ON_ONCE(1);
		}

		if (!event)
		@@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
		BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
		BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);

		BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
		BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);

		mask = fanotify_group_event_mask(group, iter_info, &match_mask,
		mask, data, data_type, dir);
		@@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
		mempool_free(fee, &group->fanotify_data.error_events_pool);
		}

		static void fanotify_free_mnt_event(struct fanotify_event *event)
		{
		kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
		}

		static void fanotify_free_event(struct fsnotify_group *group,
		struct fsnotify_event *fsn_event)
		{
		@@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
		case FANOTIFY_EVENT_TYPE_FS_ERROR:
		fanotify_free_error_event(group, event);
		break;
		case FANOTIFY_EVENT_TYPE_MNT:
		fanotify_free_mnt_event(event);
		break;
		default:
		WARN_ON_ONCE(1);
		}

fs/notify/fanotify/fanotify.h

+18 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
		extern struct kmem_cache *fanotify_fid_event_cachep;
		extern struct kmem_cache *fanotify_path_event_cachep;
		extern struct kmem_cache *fanotify_perm_event_cachep;
		extern struct kmem_cache *fanotify_mnt_event_cachep;

		/* Possible states of the permission event */
		enum {
		@@ -244,6 +245,7 @@ enum fanotify_event_type {
		FANOTIFY_EVENT_TYPE_PATH_PERM,
		FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
		FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
		FANOTIFY_EVENT_TYPE_MNT,
		__FANOTIFY_EVENT_TYPE_NUM
		};

		@@ -409,12 +411,23 @@ struct fanotify_path_event {
		struct path path;
		};

		struct fanotify_mnt_event {
		struct fanotify_event fae;
		u64 mnt_id;
		};

		static inline struct fanotify_path_event *
		FANOTIFY_PE(struct fanotify_event *event)
		{
		return container_of(event, struct fanotify_path_event, fae);
		}

		static inline struct fanotify_mnt_event *
		FANOTIFY_ME(struct fanotify_event *event)
		{
		return container_of(event, struct fanotify_mnt_event, fae);
		}

		/*
		* Structure for permission fanotify events. It gets allocated and freed in
		* fanotify_handle_event() since we wait there for user response. When the
		@@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
		return mask & FAN_FS_ERROR;
		}

		static inline bool fanotify_is_mnt_event(u32 mask)
		{
		return mask & (FAN_MNT_ATTACH \| FAN_MNT_DETACH);
		}

		static inline const struct path fanotify_event_path(struct fanotify_event event)
		{
		if (event->type == FANOTIFY_EVENT_TYPE_PATH)

fs/notify/fanotify/fanotify_user.c

+75 −14

Original line number	Diff line number	Diff line
		@@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init;
		struct kmem_cache *fanotify_fid_event_cachep __ro_after_init;
		struct kmem_cache *fanotify_path_event_cachep __ro_after_init;
		struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
		struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init;

		#define FANOTIFY_EVENT_ALIGN 4
		#define FANOTIFY_FID_INFO_HDR_LEN \
		@@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
		(sizeof(struct fanotify_event_info_error))
		#define FANOTIFY_RANGE_INFO_LEN \
		(sizeof(struct fanotify_event_info_range))
		#define FANOTIFY_MNT_INFO_LEN \
		(sizeof(struct fanotify_event_info_mnt))

		static int fanotify_fid_info_len(int fh_len, int name_len)
		{
		@@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
		fh_len = fanotify_event_object_fh_len(event);
		event_len += fanotify_fid_info_len(fh_len, dot_len);
		}
		if (fanotify_is_mnt_event(event->mask))
		event_len += FANOTIFY_MNT_INFO_LEN;

		if (info_mode & FAN_REPORT_PIDFD)
		event_len += FANOTIFY_PIDFD_INFO_LEN;
		@@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group,
		return -ENOENT;
		}

		static size_t copy_mnt_info_to_user(struct fanotify_event *event,
		char __user *buf, int count)
		{
		struct fanotify_event_info_mnt info = { };

		info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT;
		info.hdr.len = FANOTIFY_MNT_INFO_LEN;

		if (WARN_ON(count < info.hdr.len))
		return -EFAULT;

		info.mnt_id = FANOTIFY_ME(event)->mnt_id;

		if (copy_to_user(buf, &info, sizeof(info)))
		return -EFAULT;

		return info.hdr.len;
		}

		static size_t copy_error_info_to_user(struct fanotify_event *event,
		char __user *buf, int count)
		{
		@@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
		total_bytes += ret;
		}

		if (fanotify_is_mnt_event(event->mask)) {
		ret = copy_mnt_info_to_user(event, buf, count);
		if (ret < 0)
		return ret;
		buf += ret;
		count -= ret;
		total_bytes += ret;
		}

		return total_bytes;
		}

		@@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
		if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID))
		return -EINVAL;

		/* Don't allow mixing mnt events with inode events for now */
		if (flags & FAN_REPORT_MNT) {
		if (class != FAN_CLASS_NOTIF)
		return -EINVAL;
		if (flags & (FANOTIFY_FID_BITS \| FAN_REPORT_FD_ERROR))
		return -EINVAL;
		}

		if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
		return -EINVAL;

		@@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		int dfd, const char __user *pathname)
		{
		struct inode *inode = NULL;
		struct vfsmount *mnt = NULL;
		struct fsnotify_group *group;
		struct path path;
		struct fan_fsid __fsid, *fsid = NULL;
		@@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
		unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
		unsigned int obj_type, fid_mode;
		void *obj;
		void *obj = NULL;
		u32 umask = 0;
		int ret;

		@@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		case FAN_MARK_FILESYSTEM:
		obj_type = FSNOTIFY_OBJ_TYPE_SB;
		break;
		case FAN_MARK_MNTNS:
		obj_type = FSNOTIFY_OBJ_TYPE_MNTNS;
		break;
		default:
		return -EINVAL;
		}
		@@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		return -EINVAL;
		group = fd_file(f)->private_data;

		/* Only report mount events on mnt namespace */
		if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
		if (mask & ~FANOTIFY_MOUNT_EVENTS)
		return -EINVAL;
		if (mark_type != FAN_MARK_MNTNS)
		return -EINVAL;
		} else {
		if (mask & FANOTIFY_MOUNT_EVENTS)
		return -EINVAL;
		if (mark_type == FAN_MARK_MNTNS)
		return -EINVAL;
		}

		/*
		* An unprivileged user is not allowed to setup mount nor filesystem
		* marks. This also includes setting up such marks by a group that
		@@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		* point.
		*/
		fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
		if (mask & ~(FANOTIFY_FD_EVENTS\|FANOTIFY_EVENT_FLAGS) &&
		if (mask & ~(FANOTIFY_FD_EVENTS\|FANOTIFY_MOUNT_EVENTS\|FANOTIFY_EVENT_FLAGS) &&
		(!fid_mode \|\| mark_type == FAN_MARK_MOUNT))
		return -EINVAL;

		@@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		}

		/* inode held in place by reference to path; group by fget on fd */
		if (mark_type == FAN_MARK_INODE) {
		if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) {
		inode = path.dentry->d_inode;
		obj = inode;
		} else {
		mnt = path.mnt;
		if (mark_type == FAN_MARK_MOUNT)
		obj = mnt;
		else
		obj = mnt->mnt_sb;
		} else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
		obj = path.mnt;
		} else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) {
		obj = path.mnt->mnt_sb;
		} else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) {
		obj = mnt_ns_from_dentry(path.dentry);
		}

		ret = -EINVAL;
		if (!obj)
		goto path_put_and_out;

		/*
		* If some other task has this inode open for write we should not add
		* an ignore mask, unless that ignore mask is supposed to survive
		@@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		*/
		if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) &&
		!(flags & FAN_MARK_IGNORED_SURV_MODIFY)) {
		ret = mnt ? -EINVAL : -EISDIR;
		ret = !inode ? -EINVAL : -EISDIR;
		/* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
		if (ignore == FAN_MARK_IGNORE &&
		(mnt \|\| S_ISDIR(inode->i_mode)))
		(!inode \|\| S_ISDIR(inode->i_mode)))
		goto path_put_and_out;

		ret = 0;
		@@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
		}

		/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
		if (mnt \|\| !S_ISDIR(inode->i_mode)) {
		if (!inode \|\| !S_ISDIR(inode->i_mode)) {
		mask &= ~FAN_EVENT_ON_CHILD;
		umask = FAN_EVENT_ON_CHILD;
		/*
		@@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void)
		FANOTIFY_DEFAULT_MAX_USER_MARKS);

		BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
		BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13);
		BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14);
		BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);

		fanotify_mark_cache = KMEM_CACHE(fanotify_mark,
		@@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void)
		fanotify_perm_event_cachep =
		KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
		}
		fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC);

		fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS;
		init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] =