Commit 5c4378b7 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

Merge branch 'core/entry' into sched/core



Pull the entry update to avoid merge conflicts with the time slice
extension changes.

Signed-off-by: default avatarThomas Gleixner <tglx@kernel.org>
parents 377521af 31c9387d
Loading
Loading
Loading
Loading
+148 −17
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#ifndef __LINUX_ENTRYCOMMON_H
#define __LINUX_ENTRYCOMMON_H

#include <linux/audit.h>
#include <linux/irq-entry-common.h>
#include <linux/livepatch.h>
#include <linux/ptrace.h>
@@ -45,7 +46,84 @@
				 SYSCALL_WORK_SYSCALL_EXIT_TRAP	|	\
				 ARCH_SYSCALL_WORK_EXIT)

long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
/**
 * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
 *
 * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry().
 *
 * This allows architecture specific ptrace_report_syscall_entry()
 * implementations. If not defined by the architecture this falls back to
 * to ptrace_report_syscall_entry().
 */
static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs);

#ifndef arch_ptrace_report_syscall_entry
static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs)
{
	return ptrace_report_syscall_entry(regs);
}
#endif

bool syscall_user_dispatch(struct pt_regs *regs);
long trace_syscall_enter(struct pt_regs *regs, long syscall);
void trace_syscall_exit(struct pt_regs *regs, long ret);

static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
{
	if (unlikely(audit_context())) {
		unsigned long args[6];

		syscall_get_arguments(current, regs, args);
		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
	}
}

static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
{
	long syscall, ret = 0;

	/*
	 * Handle Syscall User Dispatch.  This must comes first, since
	 * the ABI here can be something that doesn't make sense for
	 * other syscall_work features.
	 */
	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
		if (syscall_user_dispatch(regs))
			return -1L;
	}

	/*
	 * User space got a time slice extension granted and relinquishes
	 * the CPU. The work stops the slice timer to avoid an extra round
	 * through hrtimer_interrupt().
	 */
	if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
		rseq_syscall_enter_work(syscall_get_nr(current, regs));

	/* Handle ptrace */
	if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
		ret = arch_ptrace_report_syscall_entry(regs);
		if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
			return -1L;
	}

	/* Do seccomp after ptrace, to catch any tracer changes. */
	if (work & SYSCALL_WORK_SECCOMP) {
		ret = __secure_computing();
		if (ret == -1L)
			return ret;
	}

	/* Either of the above might have changed the syscall number */
	syscall = syscall_get_nr(current, regs);

	if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
		syscall = trace_syscall_enter(regs, syscall);

	syscall_enter_audit(regs, syscall);

	return ret ? : syscall;
}

/**
 * syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re
	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);

	if (work & SYSCALL_WORK_ENTER)
		syscall = syscall_trace_enter(regs, syscall, work);
		syscall = syscall_trace_enter(regs, work);

	return syscall;
}
@@ -112,6 +190,37 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
	return ret;
}

/*
 * If SYSCALL_EMU is set, then the only reason to report is when
 * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
 * instruction has been already reported in syscall_enter_from_user_mode().
 */
static __always_inline bool report_single_step(unsigned long work)
{
	if (work & SYSCALL_WORK_SYSCALL_EMU)
		return false;

	return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}

/**
 * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
 *
 * This allows architecture specific ptrace_report_syscall_exit()
 * implementations. If not defined by the architecture this falls back to
 * to ptrace_report_syscall_exit().
 */
static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
							    int step);

#ifndef arch_ptrace_report_syscall_exit
static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
							    int step)
{
	ptrace_report_syscall_exit(regs, step);
}
#endif

/**
 * syscall_exit_work - Handle work before returning to user mode
 * @regs:	Pointer to current pt_regs
@@ -119,20 +228,40 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
 *
 * Do one-time syscall specific work.
 */
void syscall_exit_work(struct pt_regs *regs, unsigned long work);
static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work)
{
	bool step;

	/*
	 * If the syscall was rolled back due to syscall user dispatching,
	 * then the tracers below are not invoked for the same reason as
	 * the entry side was not invoked in syscall_trace_enter(): The ABI
	 * of these syscalls is unknown.
	 */
	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
		if (unlikely(current->syscall_dispatch.on_dispatch)) {
			current->syscall_dispatch.on_dispatch = false;
			return;
		}
	}

	audit_syscall_exit(regs);

	if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
		trace_syscall_exit(regs, syscall_get_return_value(current, regs));

	step = report_single_step(work);
	if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
		arch_ptrace_report_syscall_exit(regs, step);
}

/**
 * syscall_exit_to_user_mode_work - Handle work before returning to user mode
 * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode
 * @regs:	Pointer to currents pt_regs
 *
 * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
 * exit_to_user_mode() to perform the final transition to user mode.
 * Step 1 of syscall_exit_to_user_mode() with the same calling convention.
 *
 * Calling convention is the same as for syscall_exit_to_user_mode() and it
 * returns with all work handled and interrupts disabled. The caller must
 * invoke exit_to_user_mode() before actually switching to user mode to
 * make the final state transitions. Interrupts must stay disabled between
 * return from this function and the invocation of exit_to_user_mode().
 * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards.
 */
static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
{
@@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
	 */
	if (unlikely(work & SYSCALL_WORK_EXIT))
		syscall_exit_work(regs, work);
	local_irq_disable_exit_to_user();
	syscall_exit_to_user_mode_prepare(regs);
}

/**
 * syscall_exit_to_user_mode - Handle work before returning to user mode
 * @regs:	Pointer to currents pt_regs
 *
 * Invoked with interrupts enabled and fully valid regs. Returns with all
 * Invoked with interrupts enabled and fully valid @regs. Returns with all
 * work handled, interrupts disabled such that the caller can immediately
 * switch to user mode. Called from architecture specific syscall and ret
 * from fork code.
@@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
 *	- ptrace (single stepping)
 *
 *  2) Preparatory work
 *	- Disable interrupts
 *	- Exit to user mode loop (common TIF handling). Invokes
 *	  arch_exit_to_user_mode_work() for architecture specific TIF work
 *	- Architecture specific one time work arch_exit_to_user_mode_prepare()
@@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
 *  3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
 *     functionality in exit_to_user_mode().
 *
 * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
 * exit_to_user_mode(). This function is preferred unless there is a
 * compelling architectural reason to use the separate functions.
 * This is a combination of syscall_exit_to_user_mode_work() (1), disabling
 * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and
 * exit_to_user_mode() (3). This function is preferred unless there is a
 * compelling architectural reason to invoke the functions separately.
 */
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
{
	instrumentation_begin();
	syscall_exit_to_user_mode_work(regs);
	local_irq_disable_exit_to_user();
	syscall_exit_to_user_mode_prepare(regs);
	instrumentation_end();
	exit_to_user_mode();
}

kernel/entry/common.h

deleted100644 → 0
+0 −7
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COMMON_H
#define _COMMON_H

bool syscall_user_dispatch(struct pt_regs *regs);

#endif
+8 −96
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <linux/audit.h>
#include <linux/entry-common.h>
#include "common.h"

#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
{
	if (unlikely(audit_context())) {
		unsigned long args[6];
/* Out of line to prevent tracepoint code duplication */

		syscall_get_arguments(current, regs, args);
		audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
	}
}

long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work)
long trace_syscall_enter(struct pt_regs *regs, long syscall)
{
	long ret = 0;

	/*
	 * Handle Syscall User Dispatch.  This must comes first, since
	 * the ABI here can be something that doesn't make sense for
	 * other syscall_work features.
	 */
	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
		if (syscall_user_dispatch(regs))
			return -1L;
	}

	/*
	 * User space got a time slice extension granted and relinquishes
	 * the CPU. The work stops the slice timer to avoid an extra round
	 * through hrtimer_interrupt().
	 */
	if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
		rseq_syscall_enter_work(syscall);

	/* Handle ptrace */
	if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
		ret = ptrace_report_syscall_entry(regs);
		if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
			return -1L;
	}

	/* Do seccomp after ptrace, to catch any tracer changes. */
	if (work & SYSCALL_WORK_SECCOMP) {
		ret = __secure_computing();
		if (ret == -1L)
			return ret;
	}

	/* Either of the above might have changed the syscall number */
	syscall = syscall_get_nr(current, regs);

	if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
	trace_sys_enter(regs, syscall);
	/*
	 * Probes or BPF hooks in the tracepoint may have changed the
		 * system call number as well.
	 * system call number. Reread it.
	 */
		syscall = syscall_get_nr(current, regs);
	return syscall_get_nr(current, regs);
}

	syscall_enter_audit(regs, syscall);

	return ret ? : syscall;
}

/*
 * If SYSCALL_EMU is set, then the only reason to report is when
 * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
 * instruction has been already reported in syscall_enter_from_user_mode().
 */
static inline bool report_single_step(unsigned long work)
{
	if (work & SYSCALL_WORK_SYSCALL_EMU)
		return false;

	return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
}

void syscall_exit_work(struct pt_regs *regs, unsigned long work)
void trace_syscall_exit(struct pt_regs *regs, long ret)
{
	bool step;

	/*
	 * If the syscall was rolled back due to syscall user dispatching,
	 * then the tracers below are not invoked for the same reason as
	 * the entry side was not invoked in syscall_trace_enter(): The ABI
	 * of these syscalls is unknown.
	 */
	if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
		if (unlikely(current->syscall_dispatch.on_dispatch)) {
			current->syscall_dispatch.on_dispatch = false;
			return;
		}
	}

	audit_syscall_exit(regs);

	if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
		trace_sys_exit(regs, syscall_get_return_value(current, regs));

	step = report_single_step(work);
	if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
		ptrace_report_syscall_exit(regs, step);
	trace_sys_exit(regs, ret);
}
+2 −2
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@
/*
 * Copyright (C) 2020 Collabora Ltd.
 */

#include <linux/entry-common.h>
#include <linux/sched.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
@@ -15,8 +17,6 @@

#include <asm/syscall.h>

#include "common.h"

static void trigger_sigsys(struct pt_regs *regs)
{
	struct kernel_siginfo info;