Commit cb2a1dd5 authored by Heiko Carstens's avatar Heiko Carstens
Browse files

s390/checksum: provide vector register variant of csum_partial()



Provide a faster variant of csum_partial() which uses vector registers
instead of the cksm instruction.

Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 3a74f44d
Loading
Loading
Loading
Loading
+1 −16
Original line number Diff line number Diff line
@@ -30,22 +30,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
	return sum;
}

/*
 * Computes the checksum of a memory block at buff, length len,
 * and adds in "sum" (32-bit).
 *
 * Returns a 32-bit number suitable for feeding into itself
 * or csum_tcpudp_magic.
 *
 * This function must be called with even lengths, except
 * for the last fragment, which may be odd.
 *
 * It's best to have buff aligned on a 32-bit boundary.
 */
static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
{
	return cksm(buff, len, sum);
}
__wsum csum_partial(const void *buff, int len, __wsum sum);

/*
 * Fold a partial checksum without adding pseudo headers.
+19 −0
Original line number Diff line number Diff line
@@ -521,6 +521,15 @@
	VMRL	\vr1, \vr2, \vr3, 3
.endm

/* VECTOR LOAD WITH LENGTH */
.macro VLL	v, gr, disp, base
	VX_NUM	v1, \v
	GR_NUM	b2, \base
	GR_NUM	r3, \gr
	.word	0xE700 | ((v1&15) << 4) | r3
	.word	(b2 << 12) | (\disp)
	MRXBOPC 0, 0x37, v1
.endm

/* Vector integer instructions */

@@ -534,6 +543,16 @@
	MRXBOPC	0, 0x68, v1, v2, v3
.endm

/* VECTOR CHECKSUM */
.macro VCKSM	vr1, vr2, vr3
	VX_NUM	v1, \vr1
	VX_NUM	v2, \vr2
	VX_NUM	v3, \vr3
	.word	0xE700 | ((v1&15) << 4) | (v2&15)
	.word	((v3&15) << 12)
	MRXBOPC 0, 0x66, v1, v2, v3
.endm

/* VECTOR EXCLUSIVE OR */
.macro	VX	vr1, vr2, vr3
	VX_NUM	v1, \vr1
+99 −0
Original line number Diff line number Diff line
@@ -108,6 +108,89 @@ static __always_inline void fpu_stfpc(unsigned int *fpc)
		     : "memory");
}

static __always_inline void fpu_vcksm(u8 v1, u8 v2, u8 v3)
{
	asm volatile("VCKSM	%[v1],%[v2],%[v3]"
		     :
		     : [v1] "I" (v1), [v2] "I" (v2), [v3] "I" (v3)
		     : "memory");
}

#ifdef CONFIG_CC_IS_CLANG

static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
	instrument_read(vxr, sizeof(__vector128));
	asm volatile("\n"
		"	la	1,%[vxr]\n"
		"	VL	%[v1],0,,1\n"
		:
		: [vxr] "R" (*(__vector128 *)vxr),
		  [v1] "I" (v1)
		: "memory", "1");
}

#else /* CONFIG_CC_IS_CLANG */

static __always_inline void fpu_vl(u8 v1, const void *vxr)
{
	instrument_read(vxr, sizeof(__vector128));
	asm volatile("VL	%[v1],%O[vxr],,%R[vxr]\n"
		     :
		     : [vxr] "Q" (*(__vector128 *)vxr),
		       [v1] "I" (v1)
		     : "memory");
}

#endif /* CONFIG_CC_IS_CLANG */

static __always_inline u64 fpu_vlgvf(u8 v, u16 index)
{
	u64 val;

	asm volatile("VLGVF	%[val],%[v],%[index]"
		     : [val] "=d" (val)
		     : [v] "I" (v), [index] "L" (index)
		     : "memory");
	return val;
}

#ifdef CONFIG_CC_IS_CLANG

static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
	unsigned int size;

	size = min(index + 1, sizeof(__vector128));
	instrument_read(vxr, size);
	asm volatile("\n"
		"	la	1,%[vxr]\n"
		"	VLL	%[v1],%[index],0,1\n"
		:
		: [vxr] "R" (*(u8 *)vxr),
		  [index] "d" (index),
		  [v1] "I" (v1)
		: "memory", "1");
}

#else /* CONFIG_CC_IS_CLANG */

static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr)
{
	unsigned int size;

	size = min(index + 1, sizeof(__vector128));
	instrument_read(vxr, size);
	asm volatile("VLL	%[v1],%[index],%O[vxr],%R[vxr]\n"
		     :
		     : [vxr] "Q" (*(u8 *)vxr),
		       [index] "d" (index),
		       [v1] "I" (v1)
		     : "memory");
}

#endif /* CONFIG_CC_IS_CLANG */

#ifdef CONFIG_CC_IS_CLANG

#define fpu_vlm(_v1, _v3, _vxrs)					\
@@ -148,6 +231,14 @@ static __always_inline void fpu_stfpc(unsigned int *fpc)

#endif /* CONFIG_CC_IS_CLANG */

static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
{
	asm volatile("VLVGF	%[v],%[val],%[index]"
		     :
		     : [v] "I" (v), [val] "d" (val), [index] "L" (index)
		     : "memory");
}

#ifdef CONFIG_CC_IS_CLANG

#define fpu_vstm(_v1, _v3, _vxrs)					\
@@ -186,5 +277,13 @@ static __always_inline void fpu_stfpc(unsigned int *fpc)

#endif /* CONFIG_CC_IS_CLANG */

static __always_inline void fpu_vzero(u8 v)
{
	asm volatile("VZERO	%[v]"
		     :
		     : [v] "I" (v)
		     : "memory");
}

#endif /* __ASSEMBLY__ */
#endif	/* __ASM_S390_FPU_INSN_H */
+4 −0
Original line number Diff line number Diff line
@@ -32,12 +32,16 @@ struct kernel_fpu_##vxr_size { \
	__vector128 vxrs[vxr_size] __aligned(8);		\
}

KERNEL_FPU_STRUCT(8);
KERNEL_FPU_STRUCT(16);
KERNEL_FPU_STRUCT(32);

#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name)		\
	struct kernel_fpu_##vxr_size name __uninitialized

#define DECLARE_KERNEL_FPU_ONSTACK8(name)			\
	DECLARE_KERNEL_FPU_ONSTACK(8, name)

#define DECLARE_KERNEL_FPU_ONSTACK16(name)			\
	DECLARE_KERNEL_FPU_ONSTACK(16, name)

+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#

lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
lib-y += csum-partial.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
Loading