mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
synced 2026-04-25 00:52:45 -04:00
When any of the copy functions in arch/x86/lib/copy_user_64.S take a
fault, the fixup code copies the remaining byte count from %ecx to %edx
and unconditionally jumps to .Lcopy_user_handle_tail to continue the
copy in case any more bytes can be copied.
If the fault was #PF this may copy more bytes (because the page fault
handler might have fixed the fault). But when the fault is a machine
check the original copy code will have copied all the way to the poisoned
cache line. So .Lcopy_user_handle_tail will just take another machine
check for no good reason.
Every code path to .Lcopy_user_handle_tail comes from an exception fixup
path, so add a check there to check the trap type (in %eax) and simply
return the count of remaining bytes if the trap was a machine check.
Doing this reduces the number of machine checks taken during synthetic
tests from four to three.
As well as reducing the number of machine checks, this also allows
Skylake generation Xeons to recover some cases that currently fail. The
is because REP; MOVSB is only recoverable when source and destination
are well aligned and the byte count is large. That useless call to
.Lcopy_user_handle_tail may violate one or more of these conditions and
generate a fatal machine check.
[ Tony: Add more details to commit message. ]
[ bp: Fixup comment.
Also, another tip patchset which is adding straight-line speculation
mitigation changes the "ret" instruction to an all-caps macro "RET".
But, since gas is case-insensitive, use "RET" in the newly added asm block
already in order to simplify tip branch merging on its way upstream.
]
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/YcTW5dh8yTGucDd+@agluck-desk2.amr.corp.intel.com
404 lines
9.0 KiB
ArmAsm
404 lines
9.0 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
|
|
* Copyright 2002 Andi Kleen, SuSE Labs.
|
|
*
|
|
* Functions to copy from and to user space.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/current.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/smap.h>
|
|
#include <asm/export.h>
|
|
#include <asm/trapnr.h>
|
|
|
|
.macro ALIGN_DESTINATION
|
|
/* check for bad alignment of destination */
|
|
movl %edi,%ecx
|
|
andl $7,%ecx
|
|
jz 102f /* already aligned */
|
|
subl $8,%ecx
|
|
negl %ecx
|
|
subl %ecx,%edx
|
|
100: movb (%rsi),%al
|
|
101: movb %al,(%rdi)
|
|
incq %rsi
|
|
incq %rdi
|
|
decl %ecx
|
|
jnz 100b
|
|
102:
|
|
.section .fixup,"ax"
|
|
103: addl %ecx,%edx /* ecx is zerorest also */
|
|
jmp .Lcopy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE_CPY(100b, 103b)
|
|
_ASM_EXTABLE_CPY(101b, 103b)
|
|
.endm
|
|
|
|
/*
|
|
* copy_user_generic_unrolled - memory copy with exception handling.
|
|
* This version is for CPUs like P4 that don't have efficient micro
|
|
* code for rep movsq
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
SYM_FUNC_START(copy_user_generic_unrolled)
|
|
ASM_STAC
|
|
cmpl $8,%edx
|
|
jb 20f /* less then 8 bytes, go to byte copy loop */
|
|
ALIGN_DESTINATION
|
|
movl %edx,%ecx
|
|
andl $63,%edx
|
|
shrl $6,%ecx
|
|
jz .L_copy_short_string
|
|
1: movq (%rsi),%r8
|
|
2: movq 1*8(%rsi),%r9
|
|
3: movq 2*8(%rsi),%r10
|
|
4: movq 3*8(%rsi),%r11
|
|
5: movq %r8,(%rdi)
|
|
6: movq %r9,1*8(%rdi)
|
|
7: movq %r10,2*8(%rdi)
|
|
8: movq %r11,3*8(%rdi)
|
|
9: movq 4*8(%rsi),%r8
|
|
10: movq 5*8(%rsi),%r9
|
|
11: movq 6*8(%rsi),%r10
|
|
12: movq 7*8(%rsi),%r11
|
|
13: movq %r8,4*8(%rdi)
|
|
14: movq %r9,5*8(%rdi)
|
|
15: movq %r10,6*8(%rdi)
|
|
16: movq %r11,7*8(%rdi)
|
|
leaq 64(%rsi),%rsi
|
|
leaq 64(%rdi),%rdi
|
|
decl %ecx
|
|
jnz 1b
|
|
.L_copy_short_string:
|
|
movl %edx,%ecx
|
|
andl $7,%edx
|
|
shrl $3,%ecx
|
|
jz 20f
|
|
18: movq (%rsi),%r8
|
|
19: movq %r8,(%rdi)
|
|
leaq 8(%rsi),%rsi
|
|
leaq 8(%rdi),%rdi
|
|
decl %ecx
|
|
jnz 18b
|
|
20: andl %edx,%edx
|
|
jz 23f
|
|
movl %edx,%ecx
|
|
21: movb (%rsi),%al
|
|
22: movb %al,(%rdi)
|
|
incq %rsi
|
|
incq %rdi
|
|
decl %ecx
|
|
jnz 21b
|
|
23: xor %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
30: shll $6,%ecx
|
|
addl %ecx,%edx
|
|
jmp 60f
|
|
40: leal (%rdx,%rcx,8),%edx
|
|
jmp 60f
|
|
50: movl %ecx,%edx
|
|
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
|
|
.previous
|
|
|
|
_ASM_EXTABLE_CPY(1b, 30b)
|
|
_ASM_EXTABLE_CPY(2b, 30b)
|
|
_ASM_EXTABLE_CPY(3b, 30b)
|
|
_ASM_EXTABLE_CPY(4b, 30b)
|
|
_ASM_EXTABLE_CPY(5b, 30b)
|
|
_ASM_EXTABLE_CPY(6b, 30b)
|
|
_ASM_EXTABLE_CPY(7b, 30b)
|
|
_ASM_EXTABLE_CPY(8b, 30b)
|
|
_ASM_EXTABLE_CPY(9b, 30b)
|
|
_ASM_EXTABLE_CPY(10b, 30b)
|
|
_ASM_EXTABLE_CPY(11b, 30b)
|
|
_ASM_EXTABLE_CPY(12b, 30b)
|
|
_ASM_EXTABLE_CPY(13b, 30b)
|
|
_ASM_EXTABLE_CPY(14b, 30b)
|
|
_ASM_EXTABLE_CPY(15b, 30b)
|
|
_ASM_EXTABLE_CPY(16b, 30b)
|
|
_ASM_EXTABLE_CPY(18b, 40b)
|
|
_ASM_EXTABLE_CPY(19b, 40b)
|
|
_ASM_EXTABLE_CPY(21b, 50b)
|
|
_ASM_EXTABLE_CPY(22b, 50b)
|
|
SYM_FUNC_END(copy_user_generic_unrolled)
|
|
EXPORT_SYMBOL(copy_user_generic_unrolled)
|
|
|
|
/* Some CPUs run faster using the string copy instructions.
|
|
* This is also a lot simpler. Use them when possible.
|
|
*
|
|
* Only 4GB of copy is supported. This shouldn't be a problem
|
|
* because the kernel normally only writes from/to page sized chunks
|
|
* even if user space passed a longer buffer.
|
|
* And more would be dangerous because both Intel and AMD have
|
|
* errata with rep movsq > 4GB. If someone feels the need to fix
|
|
* this please consider this.
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
SYM_FUNC_START(copy_user_generic_string)
|
|
ASM_STAC
|
|
cmpl $8,%edx
|
|
jb 2f /* less than 8 bytes, go to byte copy loop */
|
|
ALIGN_DESTINATION
|
|
movl %edx,%ecx
|
|
shrl $3,%ecx
|
|
andl $7,%edx
|
|
1: rep
|
|
movsq
|
|
2: movl %edx,%ecx
|
|
3: rep
|
|
movsb
|
|
xorl %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
11: leal (%rdx,%rcx,8),%ecx
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
jmp .Lcopy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE_CPY(1b, 11b)
|
|
_ASM_EXTABLE_CPY(3b, 12b)
|
|
SYM_FUNC_END(copy_user_generic_string)
|
|
EXPORT_SYMBOL(copy_user_generic_string)
|
|
|
|
/*
|
|
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
|
|
* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
SYM_FUNC_START(copy_user_enhanced_fast_string)
|
|
ASM_STAC
|
|
cmpl $64,%edx
|
|
jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
|
|
movl %edx,%ecx
|
|
1: rep
|
|
movsb
|
|
xorl %eax,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
12: movl %ecx,%edx /* ecx is zerorest also */
|
|
jmp .Lcopy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE_CPY(1b, 12b)
|
|
SYM_FUNC_END(copy_user_enhanced_fast_string)
|
|
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
|
|
|
|
/*
|
|
* Try to copy last bytes and clear the rest if needed.
|
|
* Since protection fault in copy_from/to_user is not a normal situation,
|
|
* it is not necessary to optimize tail handling.
|
|
* Don't try to copy the tail if machine check happened
|
|
*
|
|
* Input:
|
|
* eax trap number written by ex_handler_copy()
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* eax uncopied bytes or 0 if successful.
|
|
*/
|
|
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
|
|
cmp $X86_TRAP_MC,%eax
|
|
je 3f
|
|
|
|
movl %edx,%ecx
|
|
1: rep movsb
|
|
2: mov %ecx,%eax
|
|
ASM_CLAC
|
|
ret
|
|
|
|
3:
|
|
movl %edx,%eax
|
|
ASM_CLAC
|
|
RET
|
|
|
|
_ASM_EXTABLE_CPY(1b, 2b)
|
|
SYM_CODE_END(.Lcopy_user_handle_tail)
|
|
|
|
/*
|
|
* copy_user_nocache - Uncached memory copy with exception handling
|
|
* This will force destination out of cache for more performance.
|
|
*
|
|
* Note: Cached memory copy is used when destination or size is not
|
|
* naturally aligned. That is:
|
|
* - Require 8-byte alignment when size is 8 bytes or larger.
|
|
* - Require 4-byte alignment when size is 4 bytes.
|
|
*/
|
|
SYM_FUNC_START(__copy_user_nocache)
|
|
ASM_STAC
|
|
|
|
/* If size is less than 8 bytes, go to 4-byte copy */
|
|
cmpl $8,%edx
|
|
jb .L_4b_nocache_copy_entry
|
|
|
|
/* If destination is not 8-byte aligned, "cache" copy to align it */
|
|
ALIGN_DESTINATION
|
|
|
|
/* Set 4x8-byte copy count and remainder */
|
|
movl %edx,%ecx
|
|
andl $63,%edx
|
|
shrl $6,%ecx
|
|
jz .L_8b_nocache_copy_entry /* jump if count is 0 */
|
|
|
|
/* Perform 4x8-byte nocache loop-copy */
|
|
.L_4x8b_nocache_copy_loop:
|
|
1: movq (%rsi),%r8
|
|
2: movq 1*8(%rsi),%r9
|
|
3: movq 2*8(%rsi),%r10
|
|
4: movq 3*8(%rsi),%r11
|
|
5: movnti %r8,(%rdi)
|
|
6: movnti %r9,1*8(%rdi)
|
|
7: movnti %r10,2*8(%rdi)
|
|
8: movnti %r11,3*8(%rdi)
|
|
9: movq 4*8(%rsi),%r8
|
|
10: movq 5*8(%rsi),%r9
|
|
11: movq 6*8(%rsi),%r10
|
|
12: movq 7*8(%rsi),%r11
|
|
13: movnti %r8,4*8(%rdi)
|
|
14: movnti %r9,5*8(%rdi)
|
|
15: movnti %r10,6*8(%rdi)
|
|
16: movnti %r11,7*8(%rdi)
|
|
leaq 64(%rsi),%rsi
|
|
leaq 64(%rdi),%rdi
|
|
decl %ecx
|
|
jnz .L_4x8b_nocache_copy_loop
|
|
|
|
/* Set 8-byte copy count and remainder */
|
|
.L_8b_nocache_copy_entry:
|
|
movl %edx,%ecx
|
|
andl $7,%edx
|
|
shrl $3,%ecx
|
|
jz .L_4b_nocache_copy_entry /* jump if count is 0 */
|
|
|
|
/* Perform 8-byte nocache loop-copy */
|
|
.L_8b_nocache_copy_loop:
|
|
20: movq (%rsi),%r8
|
|
21: movnti %r8,(%rdi)
|
|
leaq 8(%rsi),%rsi
|
|
leaq 8(%rdi),%rdi
|
|
decl %ecx
|
|
jnz .L_8b_nocache_copy_loop
|
|
|
|
/* If no byte left, we're done */
|
|
.L_4b_nocache_copy_entry:
|
|
andl %edx,%edx
|
|
jz .L_finish_copy
|
|
|
|
/* If destination is not 4-byte aligned, go to byte copy: */
|
|
movl %edi,%ecx
|
|
andl $3,%ecx
|
|
jnz .L_1b_cache_copy_entry
|
|
|
|
/* Set 4-byte copy count (1 or 0) and remainder */
|
|
movl %edx,%ecx
|
|
andl $3,%edx
|
|
shrl $2,%ecx
|
|
jz .L_1b_cache_copy_entry /* jump if count is 0 */
|
|
|
|
/* Perform 4-byte nocache copy: */
|
|
30: movl (%rsi),%r8d
|
|
31: movnti %r8d,(%rdi)
|
|
leaq 4(%rsi),%rsi
|
|
leaq 4(%rdi),%rdi
|
|
|
|
/* If no bytes left, we're done: */
|
|
andl %edx,%edx
|
|
jz .L_finish_copy
|
|
|
|
/* Perform byte "cache" loop-copy for the remainder */
|
|
.L_1b_cache_copy_entry:
|
|
movl %edx,%ecx
|
|
.L_1b_cache_copy_loop:
|
|
40: movb (%rsi),%al
|
|
41: movb %al,(%rdi)
|
|
incq %rsi
|
|
incq %rdi
|
|
decl %ecx
|
|
jnz .L_1b_cache_copy_loop
|
|
|
|
/* Finished copying; fence the prior stores */
|
|
.L_finish_copy:
|
|
xorl %eax,%eax
|
|
ASM_CLAC
|
|
sfence
|
|
ret
|
|
|
|
.section .fixup,"ax"
|
|
.L_fixup_4x8b_copy:
|
|
shll $6,%ecx
|
|
addl %ecx,%edx
|
|
jmp .L_fixup_handle_tail
|
|
.L_fixup_8b_copy:
|
|
lea (%rdx,%rcx,8),%rdx
|
|
jmp .L_fixup_handle_tail
|
|
.L_fixup_4b_copy:
|
|
lea (%rdx,%rcx,4),%rdx
|
|
jmp .L_fixup_handle_tail
|
|
.L_fixup_1b_copy:
|
|
movl %ecx,%edx
|
|
.L_fixup_handle_tail:
|
|
sfence
|
|
jmp .Lcopy_user_handle_tail
|
|
.previous
|
|
|
|
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
|
|
_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
|
|
_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
|
|
_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
|
|
_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
|
|
_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
|
|
_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
|
|
SYM_FUNC_END(__copy_user_nocache)
|
|
EXPORT_SYMBOL(__copy_user_nocache)
|