Commit e3f4591f authored by Lisa Robinson's avatar Lisa Robinson Committed by Huacai Chen
Browse files

LoongArch: Align FPU register state to 32 bytes



Move fpr to the beginning of struct loongarch_fpu so it is naturally
aligned to FPU_ALIGN (32 bytes), improving 256-bit SIMD (LASX) context
switch performance.

Also adjust process.c and fpu.S to work well with the new loongarch_fpu
layout.

Signed-off-by: default avatarLisa Robinson <lisa@bytefly.space>
Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent 1829419b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -80,10 +80,10 @@ BUILD_FPR_ACCESS(32)
BUILD_FPR_ACCESS(64)

struct loongarch_fpu {
	union fpureg	fpr[NUM_FPU_REGS];
	uint64_t	fcc;	/* 8x8 */
	uint32_t	fcsr;
	uint32_t	ftop;
	union fpureg	fpr[NUM_FPU_REGS];
};

struct loongarch_lbt {
+6 −6
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@
	.endm

#ifdef CONFIG_32BIT
	.macro sc_save_fcc thread tmp0 tmp1
	.macro sc_save_fcc base tmp0 tmp1
	movcf2gr	\tmp0, $fcc0
	move		\tmp1, \tmp0
	movcf2gr	\tmp0, $fcc1
@@ -106,7 +106,7 @@
	bstrins.w	\tmp1, \tmp0, 23, 16
	movcf2gr	\tmp0, $fcc3
	bstrins.w	\tmp1, \tmp0, 31, 24
	EX	st.w	\tmp1, \thread, THREAD_FCC
	EX	st.w	\tmp1, \base, 0
	movcf2gr	\tmp0, $fcc4
	move		\tmp1, \tmp0
	movcf2gr	\tmp0, $fcc5
@@ -115,11 +115,11 @@
	bstrins.w	\tmp1, \tmp0, 23, 16
	movcf2gr	\tmp0, $fcc7
	bstrins.w	\tmp1, \tmp0, 31, 24
	EX	st.w	\tmp1, \thread, (THREAD_FCC + 4)
	EX	st.w	\tmp1, \base, 4
	.endm

	.macro sc_restore_fcc thread tmp0 tmp1
	EX	ld.w	\tmp0, \thread, THREAD_FCC
	.macro sc_restore_fcc base tmp0 tmp1
	EX	ld.w	\tmp0, \base, 0
	bstrpick.w	\tmp1, \tmp0, 7, 0
	movgr2cf	$fcc0, \tmp1
	bstrpick.w	\tmp1, \tmp0, 15, 8
@@ -128,7 +128,7 @@
	movgr2cf	$fcc2, \tmp1
	bstrpick.w	\tmp1, \tmp0, 31, 24
	movgr2cf	$fcc3, \tmp1
	EX	ld.w	\tmp0, \thread, (THREAD_FCC + 4)
	EX	ld.w	\tmp0, \base, 4
	bstrpick.w	\tmp1, \tmp0, 7, 0
	movgr2cf	$fcc4, \tmp1
	bstrpick.w	\tmp1, \tmp0, 15, 8
+2 −0
Original line number Diff line number Diff line
@@ -135,6 +135,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
		return 0;
	}

	dst->thread.fpu.fcsr =  src->thread.fpu.fcsr;

	if (!used_math())
		memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
	else