Loading Documentation/ioctl/ioctl-number.txt +1 −0 Original line number Diff line number Diff line Loading @@ -79,6 +79,7 @@ Code Seq#(hex) Include File Comments 0x1b all InfiniBand Subsystem <http://infiniband.sourceforge.net/> 0x20 all drivers/cdrom/cm206.h 0x22 all scsi/sg.h '!' 00-1F uapi/linux/seccomp.h '#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem '$' 00-0F linux/perf_counter.h, linux/perf_event.h '%' 00-0F include/uapi/linux/stm.h Loading Documentation/userspace-api/seccomp_filter.rst +84 −0 Original line number Diff line number Diff line Loading @@ -122,6 +122,11 @@ In precedence order, they are: Results in the lower 16-bits of the return value being passed to userland as the errno without executing the system call. ``SECCOMP_RET_USER_NOTIF``: Results in a ``struct seccomp_notif`` message sent on the userspace notification fd, if it is attached, or ``-ENOSYS`` if it is not. See below on discussion of how to handle user notifications. ``SECCOMP_RET_TRACE``: When returned, this value will cause the kernel to attempt to notify a ``ptrace()``-based tracer prior to executing the system Loading Loading @@ -183,6 +188,85 @@ The ``samples/seccomp/`` directory contains both an x86-specific example and a more generic example of a higher level macro interface for BPF program generation. Userspace Notification ====================== The ``SECCOMP_RET_USER_NOTIF`` return code lets seccomp filters pass a particular syscall to userspace to be handled. This may be useful for applications like container managers, which wish to intercept particular syscalls (``mount()``, ``finit_module()``, etc.) and change their behavior. To acquire a notification FD, use the ``SECCOMP_FILTER_FLAG_NEW_LISTENER`` argument to the ``seccomp()`` syscall: .. code-block:: c fd = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_NEW_LISTENER, &prog); which (on success) will return a listener fd for the filter, which can then be passed around via ``SCM_RIGHTS`` or similar. Note that filter fds correspond to a particular filter, and not a particular task. So if this task then forks, notifications from both tasks will appear on the same filter fd. Reads and writes to/from a filter fd are also synchronized, so a filter fd can safely have many readers. The interface for a seccomp notification fd consists of two structures: .. code-block:: c struct seccomp_notif_sizes { __u16 seccomp_notif; __u16 seccomp_notif_resp; __u16 seccomp_data; }; struct seccomp_notif { __u64 id; __u32 pid; __u32 flags; struct seccomp_data data; }; struct seccomp_notif_resp { __u64 id; __s64 val; __s32 error; __u32 flags; }; The ``struct seccomp_notif_sizes`` structure can be used to determine the size of the various structures used in seccomp notifications. The size of ``struct seccomp_data`` may change in the future, so code should use: .. code-block:: c struct seccomp_notif_sizes sizes; seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes); to determine the size of the various structures to allocate. See samples/seccomp/user-trap.c for an example. Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)`` (or ``poll()``) on a seccomp notification fd to receive a ``struct seccomp_notif``, which contains five members: the input length of the structure, a unique-per-filter ``id``, the ``pid`` of the task which triggered this request (which may be 0 if the task is in a pid ns not visible from the listener's pid namespace), a ``flags`` member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing whether or not the notification is a result of a non-fatal signal, and the ``data`` passed to seccomp. Userspace can then make a decision based on this information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct seccomp_notif``. It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's memory is accessible to suitably privileged traces via ``ptrace()`` or ``/proc/pid/mem``. However, care should be taken to avoid the TOCTOU mentioned above in this document: all arguments being read from the tracee's memory should be read into the tracer's memory before any policy decisions are made. This allows for an atomic decision on syscall arguments. Sysctls ======= Loading include/linux/seccomp.h +5 −4 Original line number Diff line number Diff line Loading @@ -6,7 +6,8 @@ #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW) SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ SECCOMP_FILTER_FLAG_NEW_LISTENER) #ifdef CONFIG_SECCOMP Loading Loading @@ -43,7 +44,7 @@ extern void secure_computing_strict(int this_syscall); #endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, char __user *); extern long prctl_set_seccomp(unsigned long, void __user *); static inline int seccomp_mode(struct seccomp *s) { Loading include/linux/syscalls.h +1 −1 Original line number Diff line number Diff line Loading @@ -879,7 +879,7 @@ asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, const char __user *uargs); void __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); Loading include/uapi/linux/seccomp.h +37 −3 Original line number Diff line number Diff line Loading @@ -15,11 +15,13 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 #define SECCOMP_GET_NOTIF_SIZES 3 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) /* * All BPF programs must return a 32-bit value. Loading @@ -35,6 +37,7 @@ #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ Loading @@ -60,4 +63,35 @@ struct seccomp_data { __u64 args[6]; }; struct seccomp_notif_sizes { __u16 seccomp_notif; __u16 seccomp_notif_resp; __u16 seccomp_data; }; struct seccomp_notif { __u64 id; __u32 pid; __u32 flags; struct seccomp_data data; }; struct seccomp_notif_resp { __u64 id; __s64 val; __s32 error; __u32 flags; }; #define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) /* Flags for seccomp notification fd ioctl. */ #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #endif /* _UAPI_LINUX_SECCOMP_H */ Loading
Documentation/ioctl/ioctl-number.txt +1 −0 Original line number Diff line number Diff line Loading @@ -79,6 +79,7 @@ Code Seq#(hex) Include File Comments 0x1b all InfiniBand Subsystem <http://infiniband.sourceforge.net/> 0x20 all drivers/cdrom/cm206.h 0x22 all scsi/sg.h '!' 00-1F uapi/linux/seccomp.h '#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem '$' 00-0F linux/perf_counter.h, linux/perf_event.h '%' 00-0F include/uapi/linux/stm.h Loading
Documentation/userspace-api/seccomp_filter.rst +84 −0 Original line number Diff line number Diff line Loading @@ -122,6 +122,11 @@ In precedence order, they are: Results in the lower 16-bits of the return value being passed to userland as the errno without executing the system call. ``SECCOMP_RET_USER_NOTIF``: Results in a ``struct seccomp_notif`` message sent on the userspace notification fd, if it is attached, or ``-ENOSYS`` if it is not. See below on discussion of how to handle user notifications. ``SECCOMP_RET_TRACE``: When returned, this value will cause the kernel to attempt to notify a ``ptrace()``-based tracer prior to executing the system Loading Loading @@ -183,6 +188,85 @@ The ``samples/seccomp/`` directory contains both an x86-specific example and a more generic example of a higher level macro interface for BPF program generation. Userspace Notification ====================== The ``SECCOMP_RET_USER_NOTIF`` return code lets seccomp filters pass a particular syscall to userspace to be handled. This may be useful for applications like container managers, which wish to intercept particular syscalls (``mount()``, ``finit_module()``, etc.) and change their behavior. To acquire a notification FD, use the ``SECCOMP_FILTER_FLAG_NEW_LISTENER`` argument to the ``seccomp()`` syscall: .. code-block:: c fd = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_NEW_LISTENER, &prog); which (on success) will return a listener fd for the filter, which can then be passed around via ``SCM_RIGHTS`` or similar. Note that filter fds correspond to a particular filter, and not a particular task. So if this task then forks, notifications from both tasks will appear on the same filter fd. Reads and writes to/from a filter fd are also synchronized, so a filter fd can safely have many readers. The interface for a seccomp notification fd consists of two structures: .. code-block:: c struct seccomp_notif_sizes { __u16 seccomp_notif; __u16 seccomp_notif_resp; __u16 seccomp_data; }; struct seccomp_notif { __u64 id; __u32 pid; __u32 flags; struct seccomp_data data; }; struct seccomp_notif_resp { __u64 id; __s64 val; __s32 error; __u32 flags; }; The ``struct seccomp_notif_sizes`` structure can be used to determine the size of the various structures used in seccomp notifications. The size of ``struct seccomp_data`` may change in the future, so code should use: .. code-block:: c struct seccomp_notif_sizes sizes; seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes); to determine the size of the various structures to allocate. See samples/seccomp/user-trap.c for an example. Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)`` (or ``poll()``) on a seccomp notification fd to receive a ``struct seccomp_notif``, which contains five members: the input length of the structure, a unique-per-filter ``id``, the ``pid`` of the task which triggered this request (which may be 0 if the task is in a pid ns not visible from the listener's pid namespace), a ``flags`` member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing whether or not the notification is a result of a non-fatal signal, and the ``data`` passed to seccomp. Userspace can then make a decision based on this information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct seccomp_notif``. It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's memory is accessible to suitably privileged traces via ``ptrace()`` or ``/proc/pid/mem``. However, care should be taken to avoid the TOCTOU mentioned above in this document: all arguments being read from the tracee's memory should be read into the tracer's memory before any policy decisions are made. This allows for an atomic decision on syscall arguments. Sysctls ======= Loading
include/linux/seccomp.h +5 −4 Original line number Diff line number Diff line Loading @@ -6,7 +6,8 @@ #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW) SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ SECCOMP_FILTER_FLAG_NEW_LISTENER) #ifdef CONFIG_SECCOMP Loading Loading @@ -43,7 +44,7 @@ extern void secure_computing_strict(int this_syscall); #endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, char __user *); extern long prctl_set_seccomp(unsigned long, void __user *); static inline int seccomp_mode(struct seccomp *s) { Loading
include/linux/syscalls.h +1 −1 Original line number Diff line number Diff line Loading @@ -879,7 +879,7 @@ asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags); asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, const char __user *uargs); void __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); Loading
include/uapi/linux/seccomp.h +37 −3 Original line number Diff line number Diff line Loading @@ -15,11 +15,13 @@ #define SECCOMP_SET_MODE_STRICT 0 #define SECCOMP_SET_MODE_FILTER 1 #define SECCOMP_GET_ACTION_AVAIL 2 #define SECCOMP_GET_NOTIF_SIZES 3 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) /* * All BPF programs must return a 32-bit value. Loading @@ -35,6 +37,7 @@ #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ Loading @@ -60,4 +63,35 @@ struct seccomp_data { __u64 args[6]; }; struct seccomp_notif_sizes { __u16 seccomp_notif; __u16 seccomp_notif_resp; __u16 seccomp_data; }; struct seccomp_notif { __u64 id; __u32 pid; __u32 flags; struct seccomp_data data; }; struct seccomp_notif_resp { __u64 id; __s64 val; __s32 error; __u32 flags; }; #define SECCOMP_IOC_MAGIC '!' #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) /* Flags for seccomp notification fd ioctl. */ #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) #endif /* _UAPI_LINUX_SECCOMP_H */