Commit d2c95f9d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

x86: don't use REP_GOOD or ERMS for user memory clearing



The modern target to use is FSRS (Fast Short REP STOS), and the other
cases should only be used for bigger areas (ie mainly things like page
clearing).

Note! This changes the conditional for the inlining from FSRM ("fast
short rep movs") to FSRS ("fast short rep stos").

We'll have a separate fixup for AMD microarchitectures that have a good
'rep stosb' yet do not set the new Intel-specific FSRS bit (because FSRM
was there first).

Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent adfcf423
Loading
Loading
Loading
Loading
+3 −11
Original line number Diff line number Diff line
@@ -81,10 +81,6 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)

__must_check unsigned long
clear_user_original(void __user *addr, unsigned long len);
__must_check unsigned long
clear_user_rep_good(void __user *addr, unsigned long len);
__must_check unsigned long
clear_user_erms(void __user *addr, unsigned long len);

static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
{
@@ -97,16 +93,12 @@ static __always_inline __must_check unsigned long __clear_user(void __user *addr
	 */
	asm volatile(
		"1:\n\t"
		ALTERNATIVE_3("rep stosb",
			      "call clear_user_erms",	  ALT_NOT(X86_FEATURE_FSRM),
			      "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS),
			      "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD))
		ALTERNATIVE("rep stosb",
			    "call clear_user_original", ALT_NOT(X86_FEATURE_FSRS))
		"2:\n"
	       _ASM_EXTABLE_UA(1b, 2b)
	       : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
	       : "a" (0)
		/* rep_good clobbers %rdx */
	       : "rdx");
	       : "a" (0));

	clac();

+0 −75
Original line number Diff line number Diff line
@@ -113,78 +113,3 @@ SYM_FUNC_START(clear_user_original)
        _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
SYM_FUNC_END(clear_user_original)
EXPORT_SYMBOL(clear_user_original)

/*
 * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
 * present.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx: uncleared bytes or 0 if successful.
 */
SYM_FUNC_START(clear_user_rep_good)
	# call the original thing for less than a cacheline
	cmp $64, %rcx
	jb clear_user_original

.Lprep:
	# copy lower 32-bits for rest bytes
	mov %ecx, %edx
	shr $3, %rcx
	jz .Lrep_good_rest_bytes

.Lrep_good_qwords:
	rep stosq

.Lrep_good_rest_bytes:
	and $7, %edx
	jz .Lrep_good_exit

.Lrep_good_bytes:
	mov %edx, %ecx
	rep stosb

.Lrep_good_exit:
	# see .Lexit comment above
	xor %eax, %eax
	RET

.Lrep_good_qwords_exception:
	# convert remaining qwords back into bytes to return to caller
	shl $3, %rcx
	and $7, %edx
	add %rdx, %rcx
	jmp .Lrep_good_exit

	_ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
	_ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
SYM_FUNC_END(clear_user_rep_good)
EXPORT_SYMBOL(clear_user_rep_good)

/*
 * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
 * Input:
 * rdi destination
 * rcx count
 *
 * Output:
 * rcx: uncleared bytes or 0 if successful.
 *
 */
SYM_FUNC_START(clear_user_erms)
	# call the original thing for less than a cacheline
	cmp $64, %rcx
	jb clear_user_original

.Lerms_bytes:
	rep stosb

.Lerms_exit:
	xorl %eax,%eax
	RET

	_ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
SYM_FUNC_END(clear_user_erms)
EXPORT_SYMBOL(clear_user_erms)
+0 −2
Original line number Diff line number Diff line
@@ -1284,8 +1284,6 @@ static const char *uaccess_safe_builtin[] = {
	"copy_mc_fragile_handle_tail",
	"copy_mc_enhanced_fast_string",
	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
	"clear_user_erms",
	"clear_user_rep_good",
	"clear_user_original",
	NULL
};