Commit cdc20228 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'core-entry-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull generic syscall updates from Ingo Molnar:
 "Move various entry functions from kernel/entry/common.c to a header
  file, and always-inline them, to improve syscall entry performance
  on s390 by ~11%"

* tag 'core-entry-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  entry: Move syscall_enter_from_user_mode() to header file
  entry: Move enter_from_user_mode() to header file
  entry: Move exit to usermode functions to header file
parents ab9517fa 221a1640
Loading
Loading
Loading
Loading
+91 −4
Original line number Diff line number Diff line
@@ -7,6 +7,11 @@
#include <linux/syscalls.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
#include <linux/context_tracking.h>
#include <linux/livepatch.h>
#include <linux/resume_user_mode.h>
#include <linux/tick.h>
#include <linux/kmsan.h>

#include <asm/entry-common.h>

@@ -98,7 +103,19 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
 * done between establishing state and enabling interrupts. The caller must
 * enable interrupts before invoking syscall_enter_from_user_mode_work().
 */
void enter_from_user_mode(struct pt_regs *regs);
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
{
	arch_enter_from_user_mode(regs);
	lockdep_hardirqs_off(CALLER_ADDR0);

	CT_WARN_ON(__ct_state() != CONTEXT_USER);
	user_exit_irqoff();

	instrumentation_begin();
	kmsan_unpoison_entry_regs(regs);
	trace_hardirqs_off_finish();
	instrumentation_end();
}

/**
 * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
@@ -117,6 +134,9 @@ void enter_from_user_mode(struct pt_regs *regs);
 */
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);

long syscall_trace_enter(struct pt_regs *regs, long syscall,
			 unsigned long work);

/**
 * syscall_enter_from_user_mode_work - Check and handle work before invoking
 *				       a syscall
@@ -140,7 +160,15 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
 *     ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
 *  2) Invocation of audit_syscall_entry()
 */
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
{
	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);

	if (work & SYSCALL_WORK_ENTER)
		syscall = syscall_trace_enter(regs, syscall, work);

	return syscall;
}

/**
 * syscall_enter_from_user_mode - Establish state and check and handle work
@@ -159,7 +187,19 @@ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
 * Returns: The original or a modified syscall number. See
 * syscall_enter_from_user_mode_work() for further explanation.
 */
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
{
	long ret;

	enter_from_user_mode(regs);

	instrumentation_begin();
	local_irq_enable();
	ret = syscall_enter_from_user_mode_work(regs, syscall);
	instrumentation_end();

	return ret;
}

/**
 * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
@@ -258,6 +298,43 @@ static __always_inline void arch_exit_to_user_mode(void) { }
 */
void arch_do_signal_or_restart(struct pt_regs *regs);

/**
 * exit_to_user_mode_loop - do any pending work before leaving to user space
 */
unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
				     unsigned long ti_work);

/**
 * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
 * @regs:	Pointer to pt_regs on entry stack
 *
 * 1) check that interrupts are disabled
 * 2) call tick_nohz_user_enter_prepare()
 * 3) call exit_to_user_mode_loop() if any flags from
 *    EXIT_TO_USER_MODE_WORK are set
 * 4) check that interrupts are still disabled
 */
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
{
	unsigned long ti_work;

	lockdep_assert_irqs_disabled();

	/* Flush pending rcuog wakeup before the last need_resched() check */
	tick_nohz_user_enter_prepare();

	ti_work = read_thread_flags();
	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
		ti_work = exit_to_user_mode_loop(regs, ti_work);

	arch_exit_to_user_mode_prepare(regs, ti_work);

	/* Ensure that kernel state is sane for a return to userspace */
	kmap_assert_nomap();
	lockdep_assert_irqs_disabled();
	lockdep_sys_exit();
}

/**
 * exit_to_user_mode - Fixup state when exiting to user mode
 *
@@ -276,7 +353,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
 * non-instrumentable.
 * The caller has to invoke syscall_exit_to_user_mode_work() before this.
 */
void exit_to_user_mode(void);
static __always_inline void exit_to_user_mode(void)
{
	instrumentation_begin();
	trace_hardirqs_on_prepare();
	lockdep_hardirqs_on_prepare();
	instrumentation_end();

	user_enter_irqoff();
	arch_exit_to_user_mode();
	lockdep_hardirqs_on(CALLER_ADDR0);
}

/**
 * syscall_exit_to_user_mode_work - Handle work before returning to user mode
+12 −96
Original line number Diff line number Diff line
@@ -15,26 +15,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

/* See comment for enter_from_user_mode() in entry-common.h */
static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
{
	arch_enter_from_user_mode(regs);
	lockdep_hardirqs_off(CALLER_ADDR0);

	CT_WARN_ON(__ct_state() != CONTEXT_USER);
	user_exit_irqoff();

	instrumentation_begin();
	kmsan_unpoison_entry_regs(regs);
	trace_hardirqs_off_finish();
	instrumentation_end();
}

void noinstr enter_from_user_mode(struct pt_regs *regs)
{
	__enter_from_user_mode(regs);
}

static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
{
	if (unlikely(audit_context())) {
@@ -45,7 +25,7 @@ static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
	}
}

static long syscall_trace_enter(struct pt_regs *regs, long syscall,
long syscall_trace_enter(struct pt_regs *regs, long syscall,
				unsigned long work)
{
	long ret = 0;
@@ -85,66 +65,23 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
	return ret ? : syscall;
}

static __always_inline long
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
{
	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);

	if (work & SYSCALL_WORK_ENTER)
		syscall = syscall_trace_enter(regs, syscall, work);

	return syscall;
}

long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
{
	return __syscall_enter_from_user_work(regs, syscall);
}

noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
{
	long ret;

	__enter_from_user_mode(regs);

	instrumentation_begin();
	local_irq_enable();
	ret = __syscall_enter_from_user_work(regs, syscall);
	instrumentation_end();

	return ret;
}

noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
{
	__enter_from_user_mode(regs);
	enter_from_user_mode(regs);
	instrumentation_begin();
	local_irq_enable();
	instrumentation_end();
}

/* See comment for exit_to_user_mode() in entry-common.h */
static __always_inline void __exit_to_user_mode(void)
{
	instrumentation_begin();
	trace_hardirqs_on_prepare();
	lockdep_hardirqs_on_prepare();
	instrumentation_end();

	user_enter_irqoff();
	arch_exit_to_user_mode();
	lockdep_hardirqs_on(CALLER_ADDR0);
}

void noinstr exit_to_user_mode(void)
{
	__exit_to_user_mode();
}

/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }

static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
/**
 * exit_to_user_mode_loop - do any pending work before leaving to user space
 * @regs:	Pointer to pt_regs on entry stack
 * @ti_work:	TIF work flags as read by the caller
 */
__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
						     unsigned long ti_work)
{
	/*
@@ -190,27 +127,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
	return ti_work;
}

static void exit_to_user_mode_prepare(struct pt_regs *regs)
{
	unsigned long ti_work;

	lockdep_assert_irqs_disabled();

	/* Flush pending rcuog wakeup before the last need_resched() check */
	tick_nohz_user_enter_prepare();

	ti_work = read_thread_flags();
	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
		ti_work = exit_to_user_mode_loop(regs, ti_work);

	arch_exit_to_user_mode_prepare(regs, ti_work);

	/* Ensure that kernel state is sane for a return to userspace */
	kmap_assert_nomap();
	lockdep_assert_irqs_disabled();
	lockdep_sys_exit();
}

/*
 * If SYSCALL_EMU is set, then the only reason to report is when
 * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
@@ -295,12 +211,12 @@ __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
	instrumentation_begin();
	__syscall_exit_to_user_mode_work(regs);
	instrumentation_end();
	__exit_to_user_mode();
	exit_to_user_mode();
}

noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
{
	__enter_from_user_mode(regs);
	enter_from_user_mode(regs);
}

noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
@@ -308,7 +224,7 @@ noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
	instrumentation_begin();
	exit_to_user_mode_prepare(regs);
	instrumentation_end();
	__exit_to_user_mode();
	exit_to_user_mode();
}

noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)