Commit 8c73b263 authored by Dmitry Safonov's avatar Dmitry Safonov Committed by David S. Miller
Browse files

net/tcp: Prepare tcp_md5sig_pool for TCP-AO

TCP-AO, similarly to TCP-MD5, needs to allocate tfms on a slow-path,
which is setsockopt() and use crypto ahash requests on fast paths,
which are RX/TX softirqs. Also, it needs a temporary/scratch buffer
for preparing the hash.

Rework tcp_md5sig_pool in order to support other hashing algorithms
than MD5. It will make it possible to share pre-allocated crypto_ahash
descriptors and scratch area between all TCP hash users.

Internally tcp_sigpool calls crypto_clone_ahash() API over pre-allocated
crypto ahash tfm. Kudos to Herbert, who provided this new crypto API.

I was a little concerned over GFP_ATOMIC allocations of ahash and
crypto_request in RX/TX (see tcp_sigpool_start()), so I benchmarked both
"backends" with different algorithms, using patched version of iperf3[2].
On my laptop with i7-7600U @ 2.80GHz:

                         clone-tfm                per-CPU-requests
TCP-MD5                  2.25 Gbits/sec           2.30 Gbits/sec
TCP-AO(hmac(sha1))       2.53 Gbits/sec           2.54 Gbits/sec
TCP-AO(hmac(sha512))     1.67 Gbits/sec           1.64 Gbits/sec
TCP-AO(hmac(sha384))     1.77 Gbits/sec           1.80 Gbits/sec
TCP-AO(hmac(sha224))     1.29 Gbits/sec           1.30 Gbits/sec
TCP-AO(hmac(sha3-512))    481 Mbits/sec            480 Mbits/sec
TCP-AO(hmac(md5))        2.07 Gbits/sec           2.12 Gbits/sec
TCP-AO(hmac(rmd160))     1.01 Gbits/sec            995 Mbits/sec
TCP-AO(cmac(aes128))     [not supporetd yet]      2.11 Gbits/sec

So, it seems that my concerns don't have strong grounds and per-CPU
crypto_request allocation can be dropped/removed from tcp_sigpool once
ciphers get crypto_clone_ahash() support.

[1]: https://lore.kernel.org/all/ZDefxOq6Ax0JeTRH@gondor.apana.org.au/T/#u
[2]: https://github.com/0x7f454c46/iperf/tree/tcp-md5-ao


Signed-off-by: default avatarDmitry Safonov <dima@arista.com>
Reviewed-by: default avatarSteen Hegelund <Steen.Hegelund@microchip.com>
Acked-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cc54d2e2
Loading
Loading
Loading
Loading
+36 −14
Original line number Diff line number Diff line
@@ -1737,12 +1737,39 @@ union tcp_md5sum_block {
#endif
};

/* - pool: digest algorithm, hash description and scratch buffer */
struct tcp_md5sig_pool {
	struct ahash_request	*md5_req;
/*
 * struct tcp_sigpool - per-CPU pool of ahash_requests
 * @scratch: per-CPU temporary area, that can be used between
 *	     tcp_sigpool_start() and tcp_sigpool_end() to perform
 *	     crypto request
 * @req: pre-allocated ahash request
 */
struct tcp_sigpool {
	void *scratch;
	struct ahash_request *req;
};

int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size);
void tcp_sigpool_get(unsigned int id);
void tcp_sigpool_release(unsigned int id);
int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
			      const struct sk_buff *skb,
			      unsigned int header_len);

/**
 * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash
 * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
 * @c: returned tcp_sigpool for usage (uninitialized on failure)
 *
 * Returns 0 on success, error otherwise.
 */
int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
/**
 * tcp_sigpool_end - enable bh and stop using tcp_sigpool
 * @c: tcp_sigpool context that was returned by tcp_sigpool_start()
 */
void tcp_sigpool_end(struct tcp_sigpool *c);
size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len);
/* - functions */
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
			const struct sock *sk, const struct sk_buff *skb);
@@ -1798,17 +1825,12 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
#define tcp_twsk_md5_key(twsk)	NULL
#endif

bool tcp_alloc_md5sig_pool(void);

struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
static inline void tcp_put_md5sig_pool(void)
{
	local_bh_enable();
}
int tcp_md5_alloc_sigpool(void);
void tcp_md5_release_sigpool(void);
void tcp_md5_add_sigpool(void);
extern int tcp_md5_sigpool_id;

int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
			  unsigned int header_len);
int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
int tcp_md5_hash_key(struct tcp_sigpool *hp,
		     const struct tcp_md5sig_key *key);

/* From tcp_fastopen.c */
+4 −0
Original line number Diff line number Diff line
@@ -741,10 +741,14 @@ config DEFAULT_TCP_CONG
	default "bbr" if DEFAULT_BBR
	default "cubic"

config TCP_SIGPOOL
	tristate

config TCP_MD5SIG
	bool "TCP: MD5 Signature Option support (RFC2385)"
	select CRYPTO
	select CRYPTO_MD5
	select TCP_SIGPOOL
	help
	  RFC2385 specifies a method of giving MD5 protection to TCP sessions.
	  Its main (only?) use is to protect BGP sessions between core routers
+1 −0
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_TCP_SIGPOOL) += tcp_sigpool.o
obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
+28 −117
Original line number Diff line number Diff line
@@ -4305,141 +4305,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
EXPORT_SYMBOL(tcp_getsockopt);

#ifdef CONFIG_TCP_MD5SIG
static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
static bool tcp_md5sig_pool_populated = false;
int tcp_md5_sigpool_id = -1;
EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id);

static void __tcp_alloc_md5sig_pool(void)
int tcp_md5_alloc_sigpool(void)
{
	struct crypto_ahash *hash;
	int cpu;

	hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
	if (IS_ERR(hash))
		return;

	for_each_possible_cpu(cpu) {
		void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
		struct ahash_request *req;

		if (!scratch) {
			scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
					       sizeof(struct tcphdr),
					       GFP_KERNEL,
					       cpu_to_node(cpu));
			if (!scratch)
				return;
			per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
		}
		if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
			continue;

		req = ahash_request_alloc(hash, GFP_KERNEL);
		if (!req)
			return;

		ahash_request_set_callback(req, 0, NULL, NULL);
	size_t scratch_size;
	int ret;

		per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
	}
	/* before setting tcp_md5sig_pool_populated, we must commit all writes
	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
	scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr);
	ret = tcp_sigpool_alloc_ahash("md5", scratch_size);
	if (ret >= 0) {
		/* As long as any md5 sigpool was allocated, the return
		 * id would stay the same. Re-write the id only for the case
		 * when previously all MD5 keys were deleted and this call
		 * allocates the first MD5 key, which may return a different
		 * sigpool id than was used previously.
		 */
	smp_wmb();
	/* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
	 * and tcp_get_md5sig_pool().
	*/
	WRITE_ONCE(tcp_md5sig_pool_populated, true);
}

bool tcp_alloc_md5sig_pool(void)
{
	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
	if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
		mutex_lock(&tcp_md5sig_mutex);

		if (!tcp_md5sig_pool_populated)
			__tcp_alloc_md5sig_pool();

		mutex_unlock(&tcp_md5sig_mutex);
		WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */
		return 0;
	}
	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
	return READ_ONCE(tcp_md5sig_pool_populated);
	return ret;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);


/**
 *	tcp_get_md5sig_pool - get md5sig_pool for this user
 *
 *	We use percpu structure, so if we succeed, we exit with preemption
 *	and BH disabled, to make sure another thread or softirq handling
 *	wont try to get same context.
 */
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
void tcp_md5_release_sigpool(void)
{
	local_bh_disable();

	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
	if (READ_ONCE(tcp_md5sig_pool_populated)) {
		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
		smp_rmb();
		return this_cpu_ptr(&tcp_md5sig_pool);
	tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id));
}
	local_bh_enable();
	return NULL;
}
EXPORT_SYMBOL(tcp_get_md5sig_pool);

int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
			  const struct sk_buff *skb, unsigned int header_len)
void tcp_md5_add_sigpool(void)
{
	struct scatterlist sg;
	const struct tcphdr *tp = tcp_hdr(skb);
	struct ahash_request *req = hp->md5_req;
	unsigned int i;
	const unsigned int head_data_len = skb_headlen(skb) > header_len ?
					   skb_headlen(skb) - header_len : 0;
	const struct skb_shared_info *shi = skb_shinfo(skb);
	struct sk_buff *frag_iter;

	sg_init_table(&sg, 1);

	sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
	ahash_request_set_crypt(req, &sg, NULL, head_data_len);
	if (crypto_ahash_update(req))
		return 1;

	for (i = 0; i < shi->nr_frags; ++i) {
		const skb_frag_t *f = &shi->frags[i];
		unsigned int offset = skb_frag_off(f);
		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);

		sg_set_page(&sg, page, skb_frag_size(f),
			    offset_in_page(offset));
		ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
		if (crypto_ahash_update(req))
			return 1;
	tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id));
}

	skb_walk_frags(skb, frag_iter)
		if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
			return 1;

	return 0;
}
EXPORT_SYMBOL(tcp_md5_hash_skb_data);

int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
int tcp_md5_hash_key(struct tcp_sigpool *hp,
		     const struct tcp_md5sig_key *key)
{
	u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
	struct scatterlist sg;

	sg_init_one(&sg, key->key, keylen);
	ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
	ahash_request_set_crypt(hp->req, &sg, NULL, keylen);

	/* We use data_race() because tcp_md5_do_add() might change key->key under us */
	return data_race(crypto_ahash_update(hp->md5_req));
	/* We use data_race() because tcp_md5_do_add() might change
	 * key->key under us
	 */
	return data_race(crypto_ahash_update(hp->req));
}
EXPORT_SYMBOL(tcp_md5_hash_key);

+56 −41
Original line number Diff line number Diff line
@@ -1221,10 +1221,6 @@ static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
	key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO);
	if (!key)
		return -ENOMEM;
	if (!tcp_alloc_md5sig_pool()) {
		sock_kfree_s(sk, key, sizeof(*key));
		return -ENOMEM;
	}

	memcpy(key->key, newkey, newkeylen);
	key->keylen = newkeylen;
@@ -1246,15 +1242,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
	struct tcp_sock *tp = tcp_sk(sk);

	if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
		if (tcp_md5sig_info_add(sk, GFP_KERNEL))
		if (tcp_md5_alloc_sigpool())
			return -ENOMEM;

		if (tcp_md5sig_info_add(sk, GFP_KERNEL)) {
			tcp_md5_release_sigpool();
			return -ENOMEM;
		}

		if (!static_branch_inc(&tcp_md5_needed.key)) {
			struct tcp_md5sig_info *md5sig;

			md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk));
			rcu_assign_pointer(tp->md5sig_info, NULL);
			kfree_rcu(md5sig, rcu);
			tcp_md5_release_sigpool();
			return -EUSERS;
		}
	}
@@ -1271,8 +1273,12 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
	struct tcp_sock *tp = tcp_sk(sk);

	if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) {
		if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC)))
		tcp_md5_add_sigpool();

		if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) {
			tcp_md5_release_sigpool();
			return -ENOMEM;
		}

		if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) {
			struct tcp_md5sig_info *md5sig;
@@ -1281,6 +1287,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
			net_warn_ratelimited("Too many TCP-MD5 keys in the system\n");
			rcu_assign_pointer(tp->md5sig_info, NULL);
			kfree_rcu(md5sig, rcu);
			tcp_md5_release_sigpool();
			return -EUSERS;
		}
	}
@@ -1380,7 +1387,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
			      cmd.tcpm_key, cmd.tcpm_keylen);
}

static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp,
				   __be32 daddr, __be32 saddr,
				   const struct tcphdr *th, int nbytes)
{
@@ -1400,38 +1407,35 @@ static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
	_th->check = 0;

	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
	ahash_request_set_crypt(hp->req, &sg, NULL,
				sizeof(*bp) + sizeof(*th));
	return crypto_ahash_update(hp->md5_req);
	return crypto_ahash_update(hp->req);
}

static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
{
	struct tcp_md5sig_pool *hp;
	struct ahash_request *req;
	struct tcp_sigpool hp;

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
	req = hp->md5_req;
	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
		goto clear_hash_nostart;

	if (crypto_ahash_init(req))
	if (crypto_ahash_init(hp.req))
		goto clear_hash;
	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
	if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
	if (tcp_md5_hash_key(&hp, key))
		goto clear_hash;
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
	if (crypto_ahash_final(hp.req))
		goto clear_hash;

	tcp_put_md5sig_pool();
	tcp_sigpool_end(&hp);
	return 0;

clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	tcp_sigpool_end(&hp);
clear_hash_nostart:
	memset(md5_hash, 0, 16);
	return 1;
}
@@ -1440,9 +1444,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
			const struct sock *sk,
			const struct sk_buff *skb)
{
	struct tcp_md5sig_pool *hp;
	struct ahash_request *req;
	const struct tcphdr *th = tcp_hdr(skb);
	struct tcp_sigpool hp;
	__be32 saddr, daddr;

	if (sk) { /* valid for establish/request sockets */
@@ -1454,30 +1457,28 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
		daddr = iph->daddr;
	}

	hp = tcp_get_md5sig_pool();
	if (!hp)
		goto clear_hash_noput;
	req = hp->md5_req;
	if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
		goto clear_hash_nostart;

	if (crypto_ahash_init(req))
	if (crypto_ahash_init(hp.req))
		goto clear_hash;

	if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
	if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
		goto clear_hash;
	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
	if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
		goto clear_hash;
	if (tcp_md5_hash_key(hp, key))
	if (tcp_md5_hash_key(&hp, key))
		goto clear_hash;
	ahash_request_set_crypt(req, NULL, md5_hash, 0);
	if (crypto_ahash_final(req))
	ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
	if (crypto_ahash_final(hp.req))
		goto clear_hash;

	tcp_put_md5sig_pool();
	tcp_sigpool_end(&hp);
	return 0;

clear_hash:
	tcp_put_md5sig_pool();
clear_hash_noput:
	tcp_sigpool_end(&hp);
clear_hash_nostart:
	memset(md5_hash, 0, 16);
	return 1;
}
@@ -2296,6 +2297,18 @@ static int tcp_v4_init_sock(struct sock *sk)
	return 0;
}

#ifdef CONFIG_TCP_MD5SIG
static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
{
	struct tcp_md5sig_info *md5sig;

	md5sig = container_of(head, struct tcp_md5sig_info, rcu);
	kfree(md5sig);
	static_branch_slow_dec_deferred(&tcp_md5_needed);
	tcp_md5_release_sigpool();
}
#endif

void tcp_v4_destroy_sock(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
@@ -2320,10 +2333,12 @@ void tcp_v4_destroy_sock(struct sock *sk)
#ifdef CONFIG_TCP_MD5SIG
	/* Clean up the MD5 key list, if any */
	if (tp->md5sig_info) {
		struct tcp_md5sig_info *md5sig;

		md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
		tcp_clear_md5_list(sk);
		kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu);
		tp->md5sig_info = NULL;
		static_branch_slow_dec_deferred(&tcp_md5_needed);
		call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu);
		rcu_assign_pointer(tp->md5sig_info, NULL);
	}
#endif

Loading