Files
linux-net/arch/x86/lib/copy_user_64.S
Linus Torvalds adfcf4231b x86: don't use REP_GOOD or ERMS for user memory copies
The modern target to use is FSRM (Fast Short REP MOVS), and the other
cases should only be used for bigger areas (ie mainly things like page
clearing).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-04-18 17:05:28 -07:00

374 lines
8.4 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
*
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
_ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
_ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
.endm
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
* code for rep movsq
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
SYM_FUNC_START(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
jb .Lcopy_user_short_string_bytes
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz copy_user_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movq %r8,(%rdi)
6: movq %r9,1*8(%rdi)
7: movq %r10,2*8(%rdi)
8: movq %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movq %r8,4*8(%rdi)
14: movq %r9,5*8(%rdi)
15: movq %r10,6*8(%rdi)
16: movq %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
jmp copy_user_short_string
30: shll $6,%ecx
addl %ecx,%edx
jmp .Lcopy_user_handle_tail
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
_ASM_EXTABLE_CPY(3b, 30b)
_ASM_EXTABLE_CPY(4b, 30b)
_ASM_EXTABLE_CPY(5b, 30b)
_ASM_EXTABLE_CPY(6b, 30b)
_ASM_EXTABLE_CPY(7b, 30b)
_ASM_EXTABLE_CPY(8b, 30b)
_ASM_EXTABLE_CPY(9b, 30b)
_ASM_EXTABLE_CPY(10b, 30b)
_ASM_EXTABLE_CPY(11b, 30b)
_ASM_EXTABLE_CPY(12b, 30b)
_ASM_EXTABLE_CPY(13b, 30b)
_ASM_EXTABLE_CPY(14b, 30b)
_ASM_EXTABLE_CPY(15b, 30b)
_ASM_EXTABLE_CPY(16b, 30b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
/*
* Some CPUs support FSRM for Fast Short REP MOVS.
*
* Only 4GB of copy is supported. This shouldn't be a problem
* because the kernel normally only writes from/to page sized chunks
* even if user space passed a longer buffer.
* And more would be dangerous because both Intel and AMD have
* errata with rep movsq > 4GB. If someone feels the need to fix
* this please consider this.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
SYM_FUNC_START(copy_user_fast_string)
ASM_STAC
movl %edx,%ecx
1: rep movsb
xorl %eax,%eax
ASM_CLAC
RET
12: movl %ecx,%eax /* ecx is zerorest also */
ASM_CLAC
RET
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_fast_string)
EXPORT_SYMBOL(copy_user_fast_string)
/*
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
* Don't try to copy the tail if machine check happened
*
* Input:
* eax trap number written by ex_handler_copy()
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
cmp $X86_TRAP_MC,%eax
je 3f
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
RET
3:
movl %edx,%eax
ASM_CLAC
RET
_ASM_EXTABLE_CPY(1b, 2b)
.Lcopy_user_handle_align:
addl %ecx,%edx /* ecx is zerorest also */
jmp .Lcopy_user_handle_tail
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
* Finish memcpy of less than 64 bytes. #AC should already be set.
*
* Input:
* rdi destination
* rsi source
* rdx count (< 64)
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
SYM_CODE_START_LOCAL(copy_user_short_string)
movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz .Lcopy_user_short_string_bytes
18: movq (%rsi),%r8
19: movq %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
.Lcopy_user_short_string_bytes:
andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xor %eax,%eax
ASM_CLAC
RET
40: leal (%rdx,%rcx,8),%edx
jmp 60f
50: movl %ecx,%edx /* ecx is zerorest also */
60: jmp .Lcopy_user_handle_tail
_ASM_EXTABLE_CPY(18b, 40b)
_ASM_EXTABLE_CPY(19b, 40b)
_ASM_EXTABLE_CPY(21b, 50b)
_ASM_EXTABLE_CPY(22b, 50b)
SYM_CODE_END(copy_user_short_string)
/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
* Note: Cached memory copy is used when destination or size is not
* naturally aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*/
SYM_FUNC_START(__copy_user_nocache)
ASM_STAC
/* If size is less than 8 bytes, go to 4-byte copy */
cmpl $8,%edx
jb .L_4b_nocache_copy_entry
/* If destination is not 8-byte aligned, "cache" copy to align it */
ALIGN_DESTINATION
/* Set 4x8-byte copy count and remainder */
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz .L_8b_nocache_copy_entry /* jump if count is 0 */
/* Perform 4x8-byte nocache loop-copy */
.L_4x8b_nocache_copy_loop:
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movnti %r8,(%rdi)
6: movnti %r9,1*8(%rdi)
7: movnti %r10,2*8(%rdi)
8: movnti %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movnti %r8,4*8(%rdi)
14: movnti %r9,5*8(%rdi)
15: movnti %r10,6*8(%rdi)
16: movnti %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz .L_4x8b_nocache_copy_loop
/* Set 8-byte copy count and remainder */
.L_8b_nocache_copy_entry:
movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz .L_4b_nocache_copy_entry /* jump if count is 0 */
/* Perform 8-byte nocache loop-copy */
.L_8b_nocache_copy_loop:
20: movq (%rsi),%r8
21: movnti %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz .L_8b_nocache_copy_loop
/* If no byte left, we're done */
.L_4b_nocache_copy_entry:
andl %edx,%edx
jz .L_finish_copy
/* If destination is not 4-byte aligned, go to byte copy: */
movl %edi,%ecx
andl $3,%ecx
jnz .L_1b_cache_copy_entry
/* Set 4-byte copy count (1 or 0) and remainder */
movl %edx,%ecx
andl $3,%edx
shrl $2,%ecx
jz .L_1b_cache_copy_entry /* jump if count is 0 */
/* Perform 4-byte nocache copy: */
30: movl (%rsi),%r8d
31: movnti %r8d,(%rdi)
leaq 4(%rsi),%rsi
leaq 4(%rdi),%rdi
/* If no bytes left, we're done: */
andl %edx,%edx
jz .L_finish_copy
/* Perform byte "cache" loop-copy for the remainder */
.L_1b_cache_copy_entry:
movl %edx,%ecx
.L_1b_cache_copy_loop:
40: movb (%rsi),%al
41: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_1b_cache_copy_loop
/* Finished copying; fence the prior stores */
.L_finish_copy:
xorl %eax,%eax
ASM_CLAC
sfence
RET
.L_fixup_4x8b_copy:
shll $6,%ecx
addl %ecx,%edx
jmp .L_fixup_handle_tail
.L_fixup_8b_copy:
lea (%rdx,%rcx,8),%rdx
jmp .L_fixup_handle_tail
.L_fixup_4b_copy:
lea (%rdx,%rcx,4),%rdx
jmp .L_fixup_handle_tail
.L_fixup_1b_copy:
movl %ecx,%edx
.L_fixup_handle_tail:
sfence
jmp .Lcopy_user_handle_tail
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)