Commit 2b09f480 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'core-rseq-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull rseq updates from Thomas Gleixner:
 "A large overhaul of the restartable sequences and CID management:

  The recent enablement of RSEQ in glibc resulted in regressions which
  are caused by the related overhead. It turned out that the decision to
  invoke the exit to user work was not really a decision. More or less
  each context switch caused that. There is a long list of small issues
  which sums up nicely and results in a 3-4% regression in I/O
  benchmarks.

  The other detail which caused issues due to extra work in context
  switch and task migration is the CID (memory context ID) management.
  It also requires to use a task work to consolidate the CID space,
  which is executed in the context of an arbitrary task and results in
  sporadic uncontrolled exit latencies.

  The rewrite addresses this by:

   - Removing deprecated and long unsupported functionality

   - Moving the related data into dedicated data structures which are
     optimized for fast path processing.

   - Caching values so actual decisions can be made

   - Replacing the current implementation with a optimized inlined
     variant.

   - Separating fast and slow path for architectures which use the
     generic entry code, so that only fault and error handling goes into
     the TIF_NOTIFY_RESUME handler.

   - Rewriting the CID management so that it becomes mostly invisible in
     the context switch path. That moves the work of switching modes
     into the fork/exit path, which is a reasonable tradeoff. That work
     is only required when a process creates more threads than the
     cpuset it is allowed to run on or when enough threads exit after
     that. An artificial thread pool benchmarks which triggers this did
     not degrade, it actually improved significantly.

     The main effect in migration heavy scenarios is that runqueue lock
     held time and therefore contention goes down significantly"

* tag 'core-rseq-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits)
  sched/mmcid: Switch over to the new mechanism
  sched/mmcid: Implement deferred mode change
  irqwork: Move data struct to a types header
  sched/mmcid: Provide CID ownership mode fixup functions
  sched/mmcid: Provide new scheduler CID mechanism
  sched/mmcid: Introduce per task/CPU ownership infrastructure
  sched/mmcid: Serialize sched_mm_cid_fork()/exit() with a mutex
  sched/mmcid: Provide precomputed maximal value
  sched/mmcid: Move initialization out of line
  signal: Move MMCID exit out of sighand lock
  sched/mmcid: Convert mm CID mask to a bitmap
  cpumask: Cache num_possible_cpus()
  sched/mmcid: Use cpumask_weighted_or()
  cpumask: Introduce cpumask_weighted_or()
  sched/mmcid: Prevent pointless work in mm_update_cpus_allowed()
  sched/mmcid: Move scheduler code out of global header
  sched: Fixup whitespace damage
  sched/mmcid: Cacheline align MM CID storage
  sched/mmcid: Use proper data structures
  sched/mmcid: Revert the complex CID management
  ...
parents 1dce5069 653fda7a
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -6500,6 +6500,10 @@
			Memory area to be used by remote processor image,
			managed by CMA.

	rseq_debug=	[KNL] Enable or disable restartable sequence
			debug mode. Defaults to CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE.
			Format: <bool>

	rt_group_sched=	[KNL] Enable or disable SCHED_RR/FIFO group scheduling
			when CONFIG_RT_GROUP_SCHED=y. Defaults to
			!CONFIG_RT_GROUP_SCHED_DEFAULT_DISABLED.
+1 −1
Original line number Diff line number Diff line
@@ -100,7 +100,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
{
	local_irq_disable();
	exit_to_user_mode_prepare(regs);
	exit_to_user_mode_prepare_legacy(regs);
	local_daif_mask();
	mte_check_tfsr_exit();
	exit_to_user_mode();
+2 −1
Original line number Diff line number Diff line
@@ -274,9 +274,10 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
	 * fetch EBP before invoking any of the syscall entry work
	 * functions.
	 */
	syscall_enter_from_user_mode_prepare(regs);
	enter_from_user_mode(regs);

	instrumentation_begin();
	local_irq_enable();
	/* Fetch EBP from where the vDSO stashed it. */
	if (IS_ENABLED(CONFIG_X86_64)) {
		/*
+10 −10
Original line number Diff line number Diff line
@@ -187,12 +187,12 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);


static inline unsigned long regs_return_value(struct pt_regs *regs)
static __always_inline unsigned long regs_return_value(struct pt_regs *regs)
{
	return regs->ax;
}

static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
static __always_inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
{
	regs->ax = rc;
}
@@ -277,34 +277,34 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
}
#endif

static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
static __always_inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
	return regs->sp;
}

static inline unsigned long instruction_pointer(struct pt_regs *regs)
static __always_inline unsigned long instruction_pointer(struct pt_regs *regs)
{
	return regs->ip;
}

static inline void instruction_pointer_set(struct pt_regs *regs,
		unsigned long val)
static __always_inline
void instruction_pointer_set(struct pt_regs *regs, unsigned long val)
{
	regs->ip = val;
}

static inline unsigned long frame_pointer(struct pt_regs *regs)
static __always_inline unsigned long frame_pointer(struct pt_regs *regs)
{
	return regs->bp;
}

static inline unsigned long user_stack_pointer(struct pt_regs *regs)
static __always_inline unsigned long user_stack_pointer(struct pt_regs *regs)
{
	return regs->sp;
}

static inline void user_stack_pointer_set(struct pt_regs *regs,
		unsigned long val)
static __always_inline
void user_stack_pointer_set(struct pt_regs *regs, unsigned long val)
{
	regs->sp = val;
}
+3 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include <linux/crash_dump.h>
#include <linux/panic_notifier.h>
#include <linux/vmalloc.h>
#include <linux/rseq.h>

#include "mshv_eventfd.h"
#include "mshv.h"
@@ -560,6 +561,8 @@ static long mshv_run_vp_with_root_scheduler(struct mshv_vp *vp)
		}
	} while (!vp->run.flags.intercept_suspend);

	rseq_virt_userspace_exit();

	return ret;
}

Loading