Commit d923739e authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Ingo Molnar
Browse files

rseq: Simplify the event notification



Since commit 0190e419 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_*
flags") the bits in task::rseq_event_mask are meaningless and just extra
work in terms of setting them individually.

Aside of that the only relevant point where an event has to be raised is
context switch. Neither the CPU nor MM CID can change without going through
a context switch.

Collapse them all into a single boolean which simplifies the code a lot and
remove the pointless invocations which have been sprinkled all over the
place for no value.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://patch.msgid.link/20251027084306.336978188@linutronix.de
parent 067b3b41
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1775,7 +1775,7 @@ static int bprm_execve(struct linux_binprm *bprm)
		force_fatal_sig(SIGSEGV);

	sched_mm_cid_after_execve(current);
	rseq_set_notify_resume(current);
	rseq_sched_switch_event(current);
	current->in_execve = 0;

	return retval;
+13 −53
Original line number Diff line number Diff line
@@ -3,38 +3,8 @@
#define _LINUX_RSEQ_H

#ifdef CONFIG_RSEQ

#include <linux/preempt.h>
#include <linux/sched.h>

#ifdef CONFIG_MEMBARRIER
# define RSEQ_EVENT_GUARD	irq
#else
# define RSEQ_EVENT_GUARD	preempt
#endif

/*
 * Map the event mask on the user-space ABI enum rseq_cs_flags
 * for direct mask checks.
 */
enum rseq_event_mask_bits {
	RSEQ_EVENT_PREEMPT_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
	RSEQ_EVENT_SIGNAL_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
	RSEQ_EVENT_MIGRATE_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
};

enum rseq_event_mask {
	RSEQ_EVENT_PREEMPT	= (1U << RSEQ_EVENT_PREEMPT_BIT),
	RSEQ_EVENT_SIGNAL	= (1U << RSEQ_EVENT_SIGNAL_BIT),
	RSEQ_EVENT_MIGRATE	= (1U << RSEQ_EVENT_MIGRATE_BIT),
};

static inline void rseq_set_notify_resume(struct task_struct *t)
{
	if (t->rseq)
		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}

void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);

static inline void rseq_handle_notify_resume(struct pt_regs *regs)
@@ -43,35 +13,27 @@ static inline void rseq_handle_notify_resume(struct pt_regs *regs)
		__rseq_handle_notify_resume(NULL, regs);
}

static inline void rseq_signal_deliver(struct ksignal *ksig,
				       struct pt_regs *regs)
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
	if (current->rseq) {
		scoped_guard(RSEQ_EVENT_GUARD)
			__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
		current->rseq_event_pending = true;
		__rseq_handle_notify_resume(ksig, regs);
	}
}

/* rseq_preempt() requires preemption to be disabled. */
static inline void rseq_preempt(struct task_struct *t)
static inline void rseq_sched_switch_event(struct task_struct *t)
{
	__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
	rseq_set_notify_resume(t);
	if (t->rseq) {
		t->rseq_event_pending = true;
		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
	}

/* rseq_migrate() requires preemption to be disabled. */
static inline void rseq_migrate(struct task_struct *t)
{
	__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
	rseq_set_notify_resume(t);
}

static __always_inline void rseq_exit_to_user_mode(void)
{
	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
		if (WARN_ON_ONCE(current->rseq && current->rseq_event_mask))
			current->rseq_event_mask = 0;
		if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
			current->rseq_event_pending = false;
	}
}

@@ -85,12 +47,12 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
		t->rseq = NULL;
		t->rseq_len = 0;
		t->rseq_sig = 0;
		t->rseq_event_mask = 0;
		t->rseq_event_pending = false;
	} else {
		t->rseq = current->rseq;
		t->rseq_len = current->rseq_len;
		t->rseq_sig = current->rseq_sig;
		t->rseq_event_mask = current->rseq_event_mask;
		t->rseq_event_pending = current->rseq_event_pending;
	}
}

@@ -99,15 +61,13 @@ static inline void rseq_execve(struct task_struct *t)
	t->rseq = NULL;
	t->rseq_len = 0;
	t->rseq_sig = 0;
	t->rseq_event_mask = 0;
	t->rseq_event_pending = false;
}

#else /* CONFIG_RSEQ */
static inline void rseq_set_notify_resume(struct task_struct *t) { }
static inline void rseq_handle_notify_resume(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_preempt(struct task_struct *t) { }
static inline void rseq_migrate(struct task_struct *t) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }
+5 −5
Original line number Diff line number Diff line
@@ -1411,10 +1411,10 @@ struct task_struct {
	u32				rseq_len;
	u32				rseq_sig;
	/*
	 * RmW on rseq_event_mask must be performed atomically
	 * RmW on rseq_event_pending must be performed atomically
	 * with respect to preemption.
	 */
	unsigned long rseq_event_mask;
	bool				rseq_event_pending;
# ifdef CONFIG_DEBUG_RSEQ
	/*
	 * This is a place holder to save a copy of the rseq fields for
+7 −14
Original line number Diff line number Diff line
@@ -114,20 +114,13 @@ struct rseq {
	/*
	 * Restartable sequences flags field.
	 *
	 * This field should only be updated by the thread which
	 * registered this data structure. Read by the kernel.
	 * Mainly used for single-stepping through rseq critical sections
	 * with debuggers.
	 *
	 * This field was initially intended to allow event masking for
	 * single-stepping through rseq critical sections with debuggers.
	 * The kernel does not support this anymore and the relevant bits
	 * are checked for being always false:
	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
	 *     Inhibit instruction sequence block restart on preemption
	 *     for this thread.
	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
	 *     Inhibit instruction sequence block restart on signal
	 *     delivery for this thread.
	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
	 *     Inhibit instruction sequence block restart on migration for
	 *     this thread.
	 */
	__u32 flags;

+17 −11
Original line number Diff line number Diff line
@@ -78,6 +78,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>

#ifdef CONFIG_MEMBARRIER
# define RSEQ_EVENT_GUARD	irq
#else
# define RSEQ_EVENT_GUARD	preempt
#endif

/* The original rseq structure size (including padding) is 32 bytes. */
#define ORIG_RSEQ_SIZE		32

@@ -430,11 +436,11 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
	 */
	if (regs) {
		/*
		 * Read and clear the event mask first. If the task was not
		 * preempted or migrated or a signal is on the way, there
		 * is no point in doing any of the heavy lifting here on
		 * production kernels. In that case TIF_NOTIFY_RESUME was
		 * raised by some other functionality.
		 * Read and clear the event pending bit first. If the task
		 * was not preempted or migrated or a signal is on the way,
		 * there is no point in doing any of the heavy lifting here
		 * on production kernels. In that case TIF_NOTIFY_RESUME
		 * was raised by some other functionality.
		 *
		 * This is correct because the read/clear operation is
		 * guarded against scheduler preemption, which makes it CPU
@@ -447,15 +453,15 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
		 * with the result handed in to allow the detection of
		 * inconsistencies.
		 */
		u32 event_mask;
		bool event;

		scoped_guard(RSEQ_EVENT_GUARD) {
			event_mask = t->rseq_event_mask;
			t->rseq_event_mask = 0;
			event = t->rseq_event_pending;
			t->rseq_event_pending = false;
		}

		if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event_mask) {
			ret = rseq_ip_fixup(regs, !!event_mask);
		if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
			ret = rseq_ip_fixup(regs, event);
			if (unlikely(ret < 0))
				goto error;
		}
@@ -584,7 +590,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
	 * registered, ensure the cpu_id_start and cpu_id fields
	 * are updated before returning to user-space.
	 */
	rseq_set_notify_resume(current);
	rseq_sched_switch_event(current);

	return 0;
}
Loading