Commit 2e652049 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-move-getsockopt-away-from-__user-buffers'

Breno Leitao says:

====================
net: move .getsockopt away from __user buffers

Currently, the .getsockopt callback requires __user pointers:

  int (*getsockopt)(struct socket *sock, int level,
                    int optname, char __user *optval, int __user *optlen);

This prevents kernel callers (io_uring, BPF) from using getsockopt on
levels other than SOL_SOCKET, since they pass kernel pointers.

Following Linus' suggestion [0], this series introduces sockopt_t, a
type-safe wrapper around iov_iter, and a getsockopt_iter callback that
works with both user and kernel buffers. AF_PACKET and CAN raw are
converted as initial users, with selftests covering the trickiest
conversion patterns.

[0] https://lore.kernel.org/all/CAHk-=whmzrO-BMU=uSVXbuoLi-3tJsO=0kHj1BCPBE3F2kVhTA@mail.gmail.com/
====================

Link: https://patch.msgid.link/20260408-getsockopt-v3-0-061bb9cb355d@debian.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 1b9707e6 5b75e7d6
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -23,9 +23,30 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
#include <linux/uio.h>

#include <uapi/linux/net.h>

/**
 * struct sockopt - socket option value container
 * @iter_in: iov_iter for reading optval with the content from the caller.
 *	     Use copy_from_iter() given this iov direction is ITER_SOURCE
 * @iter_out: iov_iter for protocols to update optval data to userspace
 *	      Use _copy_to_iter() given iov direction is ITER_DEST
 * @optlen: serves as both input (buffer size) and output (returned data size).
 *
 * Type-safe wrapper for socket option data that works with both
 * user and kernel buffers.
 *
 * The optlen field allows callbacks to return a specific length value
 * independent of the bytes written via copy_to_iter().
 */
typedef struct sockopt {
	struct iov_iter iter_in;
	struct iov_iter iter_out;
	int optlen;
} sockopt_t;

struct poll_table_struct;
struct pipe_inode_info;
struct inode;
@@ -192,6 +213,8 @@ struct proto_ops {
				      unsigned int optlen);
	int		(*getsockopt)(struct socket *sock, int level,
				      int optname, char __user *optval, int __user *optlen);
	int		(*getsockopt_iter)(struct socket *sock, int level,
					   int optname, sockopt_t *opt);
	void		(*show_fdinfo)(struct seq_file *m, struct socket *sock);
	int		(*sendmsg)   (struct socket *sock, struct msghdr *m,
				      size_t total_len);
+13 −15
Original line number Diff line number Diff line
@@ -761,7 +761,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}

static int raw_getsockopt(struct socket *sock, int level, int optname,
			  char __user *optval, int __user *optlen)
			  sockopt_t *opt)
{
	struct sock *sk = sock->sk;
	struct raw_sock *ro = raw_sk(sk);
@@ -771,8 +771,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,

	if (level != SOL_CAN_RAW)
		return -EINVAL;
	if (get_user(len, optlen))
		return -EFAULT;
	len = opt->optlen;
	if (len < 0)
		return -EINVAL;

@@ -788,12 +787,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
			if (len < fsize) {
				/* return -ERANGE and needed space in optlen */
				err = -ERANGE;
				if (put_user(fsize, optlen))
					err = -EFAULT;
				opt->optlen = fsize;
			} else {
				if (len > fsize)
					len = fsize;
				if (copy_to_user(optval, ro->filter, len))
				if (copy_to_iter(ro->filter, len,
						 &opt->iter_out) != len)
					err = -EFAULT;
			}
		} else {
@@ -802,7 +801,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
		release_sock(sk);

		if (!err)
			err = put_user(len, optlen);
			opt->optlen = len;
		return err;
	}
	case CAN_RAW_ERR_FILTER:
@@ -846,16 +845,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
		if (len < sizeof(ro->raw_vcid_opts)) {
			/* return -ERANGE and needed space in optlen */
			err = -ERANGE;
			if (put_user(sizeof(ro->raw_vcid_opts), optlen))
				err = -EFAULT;
			opt->optlen = sizeof(ro->raw_vcid_opts);
		} else {
			if (len > sizeof(ro->raw_vcid_opts))
				len = sizeof(ro->raw_vcid_opts);
			if (copy_to_user(optval, &ro->raw_vcid_opts, len))
			if (copy_to_iter(&ro->raw_vcid_opts, len,
					 &opt->iter_out) != len)
				err = -EFAULT;
		}
		if (!err)
			err = put_user(len, optlen);
			opt->optlen = len;
		return err;
	}
	case CAN_RAW_JOIN_FILTERS:
@@ -869,9 +868,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
		return -ENOPROTOOPT;
	}

	if (put_user(len, optlen))
		return -EFAULT;
	if (copy_to_user(optval, val, len))
	opt->optlen = len;
	if (copy_to_iter(val, len, &opt->iter_out) != len)
		return -EFAULT;
	return 0;
}
@@ -1078,7 +1076,7 @@ static const struct proto_ops raw_ops = {
	.listen        = sock_no_listen,
	.shutdown      = sock_no_shutdown,
	.setsockopt    = raw_setsockopt,
	.getsockopt    = raw_getsockopt,
	.getsockopt_iter = raw_getsockopt,
	.sendmsg       = raw_sendmsg,
	.recvmsg       = raw_recvmsg,
	.mmap          = sock_no_mmap,
+7 −8
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/ethtool.h>
#include <linux/uio.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}

static int packet_getsockopt(struct socket *sock, int level, int optname,
			     char __user *optval, int __user *optlen)
			     sockopt_t *opt)
{
	int len;
	int val, lv = sizeof(val);
@@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
	if (level != SOL_PACKET)
		return -ENOPROTOOPT;

	if (get_user(len, optlen))
		return -EFAULT;
	len = opt->optlen;

	if (len < 0)
		return -EINVAL;
@@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
			len = sizeof(int);
		if (len < sizeof(int))
			return -EINVAL;
		if (copy_from_user(&val, optval, len))
		if (copy_from_iter(&val, len, &opt->iter_in) != len)
			return -EFAULT;
		switch (val) {
		case TPACKET_V1:
@@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,

	if (len > lv)
		len = lv;
	if (put_user(len, optlen))
		return -EFAULT;
	if (copy_to_user(optval, data, len))
	opt->optlen = len;
	if (copy_to_iter(data, len, &opt->iter_out) != len)
		return -EFAULT;
	return 0;
}
@@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = {
	.listen =	sock_no_listen,
	.shutdown =	sock_no_shutdown,
	.setsockopt =	packet_setsockopt,
	.getsockopt =	packet_getsockopt,
	.getsockopt_iter =	packet_getsockopt,
	.sendmsg =	packet_sendmsg,
	.recvmsg =	packet_recvmsg,
	.mmap =		packet_mmap,
+51 −3
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/security.h>
#include <linux/uio.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
@@ -2349,11 +2350,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
							 int optname));

/*
 * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter
 * for both input and output directions.
 * It is important to remember that both iov points to the same data, but,
 * .iter_in is read-only and .iter_out is write-only by the protocol callbacks
 */
static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval,
			      sockptr_t optlen, struct kvec *kvec)
{
	int koptlen;

	if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
		return -EFAULT;

	if (koptlen < 0)
		return -EINVAL;

	if (optval.is_kernel) {
		kvec->iov_base = optval.kernel;
		kvec->iov_len = koptlen;
		iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen);
		iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen);
	} else {
		iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen);
		iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user,
			      koptlen);
	}
	opt->optlen = koptlen;

	return 0;
}

int do_sock_getsockopt(struct socket *sock, bool compat, int level,
		       int optname, sockptr_t optval, sockptr_t optlen)
{
	int max_optlen __maybe_unused = 0;
	const struct proto_ops *ops;
	struct kvec kvec;
	sockopt_t opt;
	int err;

	err = security_socket_getsockopt(sock, level, optname);
@@ -2366,15 +2401,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
	ops = READ_ONCE(sock->ops);
	if (level == SOL_SOCKET) {
		err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
	} else if (unlikely(!ops->getsockopt)) {
		err = -EOPNOTSUPP;
	} else {
	} else if (ops->getsockopt_iter) {
		err = sockptr_to_sockopt(&opt, optval, optlen, &kvec);
		if (err)
			return err;

		err = ops->getsockopt_iter(sock, level, optname, &opt);

		/* Always write back optlen, even on failure. Some protocols
		 * (e.g. CAN raw) return -ERANGE and set optlen to the
		 * required buffer size so userspace can discover it.
		 */
		if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
			return -EFAULT;
	} else if (ops->getsockopt) {
		if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
			      "Invalid argument type"))
			return -EOPNOTSUPP;

		err = ops->getsockopt(sock, level, optname, optval.user,
				      optlen.user);
	} else {
		err = -EOPNOTSUPP;
	}

	if (!compat)