Commit c6634c98 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-faster-and-simpler-crc32c-computation'

Eric Biggers says:

====================
net: faster and simpler CRC32C computation

Update networking code that computes the CRC32C of packets to just call
crc32c() without unnecessary abstraction layers.  The result is faster
and simpler code.

Patches 1-7 add skb_crc32c() and remove the overly-abstracted and
inefficient __skb_checksum().

Patches 8-10 replace skb_copy_and_hash_datagram_iter() with
skb_copy_and_crc32c_datagram_iter(), eliminating the unnecessary use of
the crypto layer.  This unblocks the conversion of nvme-tcp to call
crc32c() directly instead of using the crypto layer, which patch 9 does.

v1: https://lore.kernel.org/20250511004110.145171-1-ebiggers@kernel.org
====================

Link: https://patch.msgid.link/20250519175012.36581-1-ebiggers@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 51ebe6b1 c93f75b2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ config RDMA_SIW
	depends on INET && INFINIBAND
	depends on INFINIBAND_VIRT_DMA
	select CRC32
	select NET_CRC32C
	help
	This driver implements the iWARP RDMA transport over
	the Linux TCP/IP network stack. It enables a system with a
+1 −21
Original line number Diff line number Diff line
@@ -693,29 +693,9 @@ static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4])
	return siw_crc_final(&crc, out);
}

static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
{
	return (__force __wsum)crc32c((__force __u32)sum, buff, len);
}

static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
				      int len)
{
	return (__force __wsum)crc32c_combine((__force __u32)csum,
					      (__force __u32)csum2, len);
}

static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
{
	const struct skb_checksum_ops siw_cs_ops = {
		.update = siw_csum_update,
		.combine = siw_csum_combine,
	};
	__wsum crc = (__force __wsum)srx->mpa_crc;

	crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc,
			     &siw_cs_ops);
	srx->mpa_crc = (__force u32)crc;
	srx->mpa_crc = skb_crc32c(srx->skb, srx->skb_offset, len, srx->mpa_crc);
}

#define siw_dbg(ibdev, fmt, ...)                                               \
+2 −2
Original line number Diff line number Diff line
@@ -84,9 +84,9 @@ config NVME_TCP
	tristate "NVM Express over Fabrics TCP host driver"
	depends on INET
	depends on BLOCK
	select CRC32
	select NET_CRC32C
	select NVME_FABRICS
	select CRYPTO
	select CRYPTO_CRC32C
	help
	  This provides support for the NVMe over Fabrics protocol using
	  the TCP transport.  This allows you to use remote block devices
+40 −84
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/crc32.h>
#include <linux/nvme-tcp.h>
#include <linux/nvme-keyring.h>
#include <net/sock.h>
@@ -16,7 +17,6 @@
#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
#include <net/busy_poll.h>
#include <trace/events/sock.h>

@@ -168,8 +168,8 @@ struct nvme_tcp_queue {
	bool			hdr_digest;
	bool			data_digest;
	bool			tls_enabled;
	struct ahash_request	*rcv_hash;
	struct ahash_request	*snd_hash;
	u32			rcv_crc;
	u32			snd_crc;
	__le32			exp_ddgst;
	__le32			recv_ddgst;
	struct completion       tls_complete;
@@ -456,32 +456,38 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
	return req;
}

static inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
		__le32 *dgst)
#define NVME_TCP_CRC_SEED (~0)

static inline void nvme_tcp_ddgst_update(u32 *crcp,
		struct page *page, size_t off, size_t len)
{
	ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
	crypto_ahash_final(hash);
	page += off / PAGE_SIZE;
	off %= PAGE_SIZE;
	while (len) {
		const void *vaddr = kmap_local_page(page);
		size_t n = min(len, (size_t)PAGE_SIZE - off);

		*crcp = crc32c(*crcp, vaddr + off, n);
		kunmap_local(vaddr);
		page++;
		off = 0;
		len -= n;
	}
}

static inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
		struct page *page, off_t off, size_t len)
static inline __le32 nvme_tcp_ddgst_final(u32 crc)
{
	struct scatterlist sg;

	sg_init_table(&sg, 1);
	sg_set_page(&sg, page, len, off);
	ahash_request_set_crypt(hash, &sg, NULL, len);
	crypto_ahash_update(hash);
	return cpu_to_le32(~crc);
}

static inline void nvme_tcp_hdgst(struct ahash_request *hash,
		void *pdu, size_t len)
static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len)
{
	struct scatterlist sg;
	return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len));
}

	sg_init_one(&sg, pdu, len);
	ahash_request_set_crypt(hash, &sg, pdu + len, len);
	crypto_ahash_digest(hash);
static inline void nvme_tcp_set_hdgst(void *pdu, size_t len)
{
	*(__le32 *)(pdu + len) = nvme_tcp_hdgst(pdu, len);
}

static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
@@ -499,8 +505,7 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
	}

	recv_digest = *(__le32 *)(pdu + hdr->hlen);
	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
	exp_digest = *(__le32 *)(pdu + hdr->hlen);
	exp_digest = nvme_tcp_hdgst(pdu, pdu_len);
	if (recv_digest != exp_digest) {
		dev_err(queue->ctrl->ctrl.device,
			"header digest error: recv %#x expected %#x\n",
@@ -526,7 +531,7 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
		nvme_tcp_queue_id(queue));
		return -EPROTO;
	}
	crypto_ahash_init(queue->rcv_hash);
	queue->rcv_crc = NVME_TCP_CRC_SEED;

	return 0;
}
@@ -926,8 +931,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
				iov_iter_count(&req->iter));

		if (queue->data_digest)
			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
				&req->iter, recv_len, queue->rcv_hash);
			ret = skb_copy_and_crc32c_datagram_iter(skb, *offset,
				&req->iter, recv_len, &queue->rcv_crc);
		else
			ret = skb_copy_datagram_iter(skb, *offset,
					&req->iter, recv_len);
@@ -945,7 +950,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,

	if (!queue->data_remaining) {
		if (queue->data_digest) {
			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
			queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc);
			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
		} else {
			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
@@ -1147,7 +1152,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
			return ret;

		if (queue->data_digest)
			nvme_tcp_ddgst_update(queue->snd_hash, page,
			nvme_tcp_ddgst_update(&queue->snd_crc, page,
					offset, ret);

		/*
@@ -1161,8 +1166,8 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
		/* fully successful last send in current PDU */
		if (last && ret == len) {
			if (queue->data_digest) {
				nvme_tcp_ddgst_final(queue->snd_hash,
					&req->ddgst);
				req->ddgst =
					nvme_tcp_ddgst_final(queue->snd_crc);
				req->state = NVME_TCP_SEND_DDGST;
				req->offset = 0;
			} else {
@@ -1194,7 +1199,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
		msg.msg_flags |= MSG_EOR;

	if (queue->hdr_digest && !req->offset)
		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
		nvme_tcp_set_hdgst(pdu, sizeof(*pdu));

	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
@@ -1207,7 +1212,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
		if (inline_data) {
			req->state = NVME_TCP_SEND_DATA;
			if (queue->data_digest)
				crypto_ahash_init(queue->snd_hash);
				queue->snd_crc = NVME_TCP_CRC_SEED;
		} else {
			nvme_tcp_done_send_req(queue);
		}
@@ -1229,7 +1234,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
	int ret;

	if (queue->hdr_digest && !req->offset)
		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
		nvme_tcp_set_hdgst(pdu, sizeof(*pdu));

	if (!req->h2cdata_left)
		msg.msg_flags |= MSG_SPLICE_PAGES;
@@ -1244,7 +1249,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
	if (!len) {
		req->state = NVME_TCP_SEND_DATA;
		if (queue->data_digest)
			crypto_ahash_init(queue->snd_hash);
			queue->snd_crc = NVME_TCP_CRC_SEED;
		return 1;
	}
	req->offset += ret;
@@ -1384,41 +1389,6 @@ static void nvme_tcp_io_work(struct work_struct *w)
	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}

static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
{
	struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);

	ahash_request_free(queue->rcv_hash);
	ahash_request_free(queue->snd_hash);
	crypto_free_ahash(tfm);
}

static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
{
	struct crypto_ahash *tfm;

	tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
	if (IS_ERR(tfm))
		return PTR_ERR(tfm);

	queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
	if (!queue->snd_hash)
		goto free_tfm;
	ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);

	queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
	if (!queue->rcv_hash)
		goto free_snd_hash;
	ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);

	return 0;
free_snd_hash:
	ahash_request_free(queue->snd_hash);
free_tfm:
	crypto_free_ahash(tfm);
	return -ENOMEM;
}

static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
{
	struct nvme_tcp_request *async = &ctrl->async_req;
@@ -1451,9 +1421,6 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
	if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
		return;

	if (queue->hdr_digest || queue->data_digest)
		nvme_tcp_free_crypto(queue);

	page_frag_cache_drain(&queue->pf_cache);

	noreclaim_flag = memalloc_noreclaim_save();
@@ -1867,21 +1834,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,

	queue->hdr_digest = nctrl->opts->hdr_digest;
	queue->data_digest = nctrl->opts->data_digest;
	if (queue->hdr_digest || queue->data_digest) {
		ret = nvme_tcp_alloc_crypto(queue);
		if (ret) {
			dev_err(nctrl->device,
				"failed to allocate queue %d crypto\n", qid);
			goto err_sock;
		}
	}

	rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
			nvme_tcp_hdgst_len(queue);
	queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
	if (!queue->pdu) {
		ret = -ENOMEM;
		goto err_crypto;
		goto err_sock;
	}

	dev_dbg(nctrl->device, "connecting queue %d\n",
@@ -1914,9 +1873,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
err_rcv_pdu:
	kfree(queue->pdu);
err_crypto:
	if (queue->hdr_digest || queue->data_digest)
		nvme_tcp_free_crypto(queue);
err_sock:
	/* ->sock will be released by fput() */
	fput(queue->sock->file);
+0 −23
Original line number Diff line number Diff line
@@ -76,29 +76,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
	return crc32_le_shift(crc1, len2) ^ crc2;
}

u32 crc32c_shift(u32 crc, size_t len);

/**
 * crc32c_combine - Combine two crc32c check values into one. For two sequences
 *		    of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
 *		    check values were calculated for each, crc1 and crc2.
 *
 * @crc1: crc32c of the first block
 * @crc2: crc32c of the second block
 * @len2: length of the second block
 *
 * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
 *	   only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
 *	   memory area of seq1 with seq2, and crc_full the crc32c() value of
 *	   seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
 *	   crc_full was seeded with the same initializer as crc1, and crc2 seed
 *	   was 0. See also crc_combine_test().
 */
static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
{
	return crc32c_shift(crc1, len2) ^ crc2;
}

#define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)

/*
Loading