s390/checksum: provide csum_partial_copy_nocheck() (dcd3e1de) · Commits · git / linux-nf

arch/s390/include/asm/checksum.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -32,6 +32,9 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)

		__wsum csum_partial(const void *buff, int len, __wsum sum);

		#define _HAVE_ARCH_CSUM_AND_COPY
		__wsum csum_partial_copy_nocheck(const void src, void dst, int len);

		/*
		* Fold a partial checksum without adding pseudo headers.
		*/

arch/s390/include/asm/fpu-insn-asm.h

+10 −0

Original line number	Diff line number	Diff line
		@@ -531,6 +531,16 @@
		MRXBOPC 0, 0x37, v1
		.endm

		/* VECTOR STORE WITH LENGTH */
		.macro VSTL v, gr, disp, base
		VX_NUM v1, \v
		GR_NUM b2, \base
		GR_NUM r3, \gr
		.word 0xE700 \| ((v1&15) << 4) \| r3
		.word (b2 << 12) \| (\disp)
		MRXBOPC 0, 0x3f, v1
		.endm

		/* Vector integer instructions */

		/* VECTOR AND */

arch/s390/include/asm/fpu-insn.h

+58 −0

Original line number	Diff line number	Diff line
		@@ -241,6 +241,64 @@ static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)

		#ifdef CONFIG_CC_IS_CLANG

		static __always_inline void fpu_vst(u8 v1, const void *vxr)
		{
		instrument_write(vxr, sizeof(__vector128));
		asm volatile("\n"
		" la 1,%[vxr]\n"
		" VST %[v1],0,,1\n"
		: [vxr] "=R" ((__vector128 )vxr)
		: [v1] "I" (v1)
		: "memory", "1");
		}

		#else /* CONFIG_CC_IS_CLANG */

		static __always_inline void fpu_vst(u8 v1, const void *vxr)
		{
		instrument_write(vxr, sizeof(__vector128));
		asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n"
		: [vxr] "=Q" ((__vector128 )vxr)
		: [v1] "I" (v1)
		: "memory");
		}

		#endif /* CONFIG_CC_IS_CLANG */

		#ifdef CONFIG_CC_IS_CLANG

		static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
		{
		unsigned int size;

		size = min(index + 1, sizeof(__vector128));
		instrument_write(vxr, size);
		asm volatile("\n"
		" la 1,%[vxr]\n"
		" VSTL %[v1],%[index],0,1\n"
		: [vxr] "=R" ((u8 )vxr)
		: [index] "d" (index), [v1] "I" (v1)
		: "memory", "1");
		}

		#else /* CONFIG_CC_IS_CLANG */

		static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
		{
		unsigned int size;

		size = min(index + 1, sizeof(__vector128));
		instrument_write(vxr, size);
		asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n"
		: [vxr] "=Q" ((u8 )vxr)
		: [index] "d" (index), [v1] "I" (v1)
		: "memory");
		}

		#endif /* CONFIG_CC_IS_CLANG */

		#ifdef CONFIG_CC_IS_CLANG

		#define fpu_vstm(_v1, _v3, _vxrs) \
		({ \
		unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \

arch/s390/lib/csum-partial.c

+41 −13

Original line number	Diff line number	Diff line
		@@ -5,8 +5,8 @@
		#include <asm/fpu.h>

		/*
		* Computes the checksum of a memory block at buff, length len,
		* and adds in "sum" (32-bit).
		* Computes the checksum of a memory block at src, length len,
		* and adds in "sum" (32-bit). If copy is true copies to dst.
		*
		* Returns a 32-bit number suitable for feeding into itself
		* or csum_tcpudp_magic.
		@@ -14,43 +14,60 @@
		* This function must be called with even lengths, except
		* for the last fragment, which may be odd.
		*
		* It's best to have buff aligned on a 64-bit boundary.
		* It's best to have src and dst aligned on a 64-bit boundary.
		*/
		__wsum csum_partial(const void *buff, int len, __wsum sum)
		static __always_inline __wsum csum_copy(void dst, const void src, int len, __wsum sum, bool copy)
		{
		DECLARE_KERNEL_FPU_ONSTACK8(vxstate);

		if (!cpu_has_vx())
		return cksm(buff, len, sum);
		if (!cpu_has_vx()) {
		if (copy)
		memcpy(dst, src, len);
		return cksm(dst, len, sum);
		}
		kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
		fpu_vlvgf(16, (__force u32)sum, 1);
		fpu_vzero(17);
		fpu_vzero(18);
		fpu_vzero(19);
		while (len >= 64) {
		fpu_vlm(20, 23, buff);
		fpu_vlm(20, 23, src);
		if (copy) {
		fpu_vstm(20, 23, dst);
		dst += 64;
		}
		fpu_vcksm(16, 20, 16);
		fpu_vcksm(17, 21, 17);
		fpu_vcksm(18, 22, 18);
		fpu_vcksm(19, 23, 19);
		buff += 64;
		src += 64;
		len -= 64;
		}
		while (len >= 32) {
		fpu_vlm(20, 21, buff);
		fpu_vlm(20, 21, src);
		if (copy) {
		fpu_vstm(20, 21, dst);
		dst += 32;
		}
		fpu_vcksm(16, 20, 16);
		fpu_vcksm(17, 21, 17);
		buff += 32;
		src += 32;
		len -= 32;
		}
		while (len >= 16) {
		fpu_vl(20, buff);
		fpu_vl(20, src);
		if (copy) {
		fpu_vst(20, dst);
		dst += 16;
		}
		fpu_vcksm(16, 20, 16);
		buff += 16;
		src += 16;
		len -= 16;
		}
		if (len) {
		fpu_vll(20, len - 1, buff);
		fpu_vll(20, len - 1, src);
		if (copy)
		fpu_vstl(20, len - 1, dst);
		fpu_vcksm(16, 20, 16);
		}
		fpu_vcksm(18, 19, 18);
		@@ -60,4 +77,15 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
		kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
		return sum;
		}

		__wsum csum_partial(const void *buff, int len, __wsum sum)
		{
		return csum_copy(NULL, buff, len, sum, false);
		}
		EXPORT_SYMBOL(csum_partial);

		__wsum csum_partial_copy_nocheck(const void src, void dst, int len)
		{
		return csum_copy(dst, src, len, 0, true);
		}
		EXPORT_SYMBOL(csum_partial_copy_nocheck);