Commit d5048d11 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer core updates from Thomas Gleixner:

 - Fix a memory ordering issue in posix-timers

   Posix-timer lookup is lockless and reevaluates the timer validity
   under the timer lock, but the update which validates the timer is not
   protected by the timer lock. That allows the store to be reordered
   against the initialization stores, so that the lookup side can
   observe a partially initialized timer. That's mostly a theoretical
   problem, but incorrect nevertheless.

 - Fix a long standing inconsistency of the coarse time getters

   The coarse time getters read the base time of the current update
   cycle without reading the actual hardware clock. NTP frequency
   adjustment can set the base time backwards. The fine grained
   interfaces compensate this by reading the clock and applying the new
   conversion factor, but the coarse grained time getters use the base
   time directly. That allows the user to observe time going backwards.

   Cure it by always forwarding base time, when NTP changes the
   frequency with an immediate step.

 - Rework of posix-timer hashing

   The posix-timer hash is not scalable and due to the CRIU timer
   restore mechanism prone to massive contention on the global hash
   bucket lock.

   Replace the global hash lock with a fine grained per bucket locking
   scheme to address that.

 - Rework the proc/$PID/timers interface.

   /proc/$PID/timers is provided for CRIU to be able to restore a timer.
   The printout happens with sighand lock held and interrupts disabled.
   That's not required as this can be done with RCU protection as well.

 - Provide a sane mechanism for CRIU to restore a timer ID

   CRIU restores timers by creating and deleting them until the kernel
   internal per process ID counter reached the requested ID. That's
   horribly slow for sparse timer IDs.

   Provide a prctl() which allows CRIU to restore a timer with a given
   ID. When enabled the ID pointer is used as input pointer to read the
   requested ID from user space. When disabled, the normal allocation
   scheme (next ID) is active as before. This is backwards compatible
   for both kernel and user space.

 - Make hrtimer_update_function() less expensive.

   The sanity checks are valuable, but expensive for high frequency
   usage in io/uring. Make the debug checks conditional and enable them
   only when lockdep is enabled.

 - Small updates, cleanups and improvements

* tag 'timers-core-2025-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  selftests/timers: Improve skew_consistency by testing with other clockids
  timekeeping: Fix possible inconsistencies in _COARSE clockids
  posix-timers: Drop redundant memset() invocation
  selftests/timers/posix-timers: Add a test for exact allocation mode
  posix-timers: Provide a mechanism to allocate a given timer ID
  posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
  posix-timers: Make per process list RCU safe
  posix-timers: Avoid false cacheline sharing
  posix-timers: Switch to jhash32()
  posix-timers: Improve hash table performance
  posix-timers: Make signal_struct:: Next_posix_timer_id an atomic_t
  posix-timers: Make lock_timer() use guard()
  posix-timers: Rework timer removal
  posix-timers: Simplify lock/unlock_timer()
  posix-timers: Use guards in a few places
  posix-timers: Remove SLAB_PANIC from kmem cache
  posix-timers: Remove a few paranoid warnings
  posix-timers: Cleanup includes
  posix-timers: Add cond_resched() to posix_timer_add() search loop
  posix-timers: Initialise timer before adding it to the hash table
  ...
parents 0ae2062e e40d3709
Loading
Loading
Loading
Loading
+20 −28
Original line number Diff line number Diff line
@@ -2496,9 +2496,7 @@ static const struct file_operations proc_map_files_operations = {
struct timers_private {
	struct pid		*pid;
	struct task_struct	*task;
	struct sighand_struct *sighand;
	struct pid_namespace	*ns;
	unsigned long flags;
};

static void *timers_start(struct seq_file *m, loff_t *pos)
@@ -2509,54 +2507,48 @@ static void *timers_start(struct seq_file *m, loff_t *pos)
	if (!tp->task)
		return ERR_PTR(-ESRCH);

	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
	if (!tp->sighand)
		return ERR_PTR(-ESRCH);

	return seq_hlist_start(&tp->task->signal->posix_timers, *pos);
	rcu_read_lock();
	return seq_hlist_start_rcu(&tp->task->signal->posix_timers, *pos);
}

static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
{
	struct timers_private *tp = m->private;
	return seq_hlist_next(v, &tp->task->signal->posix_timers, pos);

	return seq_hlist_next_rcu(v, &tp->task->signal->posix_timers, pos);
}

static void timers_stop(struct seq_file *m, void *v)
{
	struct timers_private *tp = m->private;

	if (tp->sighand) {
		unlock_task_sighand(tp->task, &tp->flags);
		tp->sighand = NULL;
	}

	if (tp->task) {
		put_task_struct(tp->task);
		tp->task = NULL;
		rcu_read_unlock();
	}
}

static int show_timer(struct seq_file *m, void *v)
{
	struct k_itimer *timer;
	struct timers_private *tp = m->private;
	int notify;
	static const char * const nstr[] = {
		[SIGEV_SIGNAL]	= "signal",
		[SIGEV_NONE]	= "none",
		[SIGEV_THREAD]	= "thread",
	};

	timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
	notify = timer->it_sigev_notify;
	struct k_itimer *timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
	struct timers_private *tp = m->private;
	int notify = timer->it_sigev_notify;

	guard(spinlock_irq)(&timer->it_lock);
	if (!posixtimer_valid(timer))
		return 0;

	seq_printf(m, "ID: %d\n", timer->it_id);
	seq_printf(m, "signal: %d/%px\n",
		   timer->sigq.info.si_signo,
	seq_printf(m, "signal: %d/%px\n", timer->sigq.info.si_signo,
		   timer->sigq.info.si_value.sival_ptr);
	seq_printf(m, "notify: %s/%s.%d\n",
		   nstr[notify & ~SIGEV_THREAD_ID],
	seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID],
		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
		   pid_nr_ns(timer->it_pid, tp->ns));
	seq_printf(m, "ClockID: %d\n", timer->it_clock);
+14 −8
Original line number Diff line number Diff line
@@ -308,11 +308,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond)	\
static __maybe_unused const bool class_##_name##_is_conditional = _is_cond

#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \
	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
	{ return (void *)(__force unsigned long)*(_exp); }

#define DEFINE_CLASS_IS_GUARD(_name) \
	__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
	__DEFINE_GUARD_LOCK_PTR(_name, _T)

#define DEFINE_CLASS_IS_COND_GUARD(_name) \
	__DEFINE_CLASS_IS_CONDITIONAL(_name, true); \
	__DEFINE_GUARD_LOCK_PTR(_name, _T)

#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
	DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
	static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
	{ return (void *)(__force unsigned long)*_T; }
	DEFINE_CLASS_IS_GUARD(_name)

#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
	__DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
@@ -392,11 +402,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \
	if (_T->lock) { _unlock; }					\
}									\
									\
static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T)	\
{									\
	return (void *)(__force unsigned long)_T->lock;			\
}

__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock)

#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock)			\
static inline class_##_name##_t class_##_name##_constructor(_type *l)	\
+2 −1
Original line number Diff line number Diff line
@@ -333,6 +333,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
static inline void hrtimer_update_function(struct hrtimer *timer,
					   enum hrtimer_restart (*function)(struct hrtimer *))
{
#ifdef CONFIG_PROVE_LOCKING
	guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock);

	if (WARN_ON_ONCE(hrtimer_is_queued(timer)))
@@ -340,7 +341,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer,

	if (WARN_ON_ONCE(!function))
		return;

#endif
	timer->function = function;
}

+21 −9
Original line number Diff line number Diff line
@@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q);
void posixtimer_send_sigqueue(struct k_itimer *tmr);
bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
void posixtimer_free_timer(struct k_itimer *timer);
long posixtimer_create_prctl(unsigned long ctrl);

/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) {						\
@@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info,
					     struct sigqueue *timer_sigq) { return false; }
static inline void posixtimer_free_timer(struct k_itimer *timer) { }
static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; }
#endif

#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { }
 * @rcu:		RCU head for freeing the timer.
 */
struct k_itimer {
	struct hlist_node	list;
	struct hlist_node	ignored_list;
	/* 1st cacheline contains read-mostly fields */
	struct hlist_node	t_hash;
	spinlock_t		it_lock;
	const struct k_clock	*kclock;
	clockid_t		it_clock;
	struct hlist_node	list;
	timer_t			it_id;
	clockid_t		it_clock;
	int			it_sigev_notify;
	enum pid_type		it_pid_type;
	struct signal_struct	*it_signal;
	const struct k_clock	*kclock;

	/* 2nd cacheline and above contain fields which are modified regularly */
	spinlock_t		it_lock;
	int			it_status;
	bool			it_sig_periodic;
	s64			it_overrun;
	s64			it_overrun_last;
	unsigned int		it_signal_seq;
	unsigned int		it_sigqueue_seq;
	int			it_sigev_notify;
	enum pid_type		it_pid_type;
	ktime_t			it_interval;
	struct signal_struct	*it_signal;
	struct hlist_node	ignored_list;
	union {
		struct pid		*it_pid;
		struct task_struct	*it_process;
@@ -210,7 +215,7 @@ struct k_itimer {
		} alarm;
	} it;
	struct rcu_head		rcu;
};
} ____cacheline_aligned_in_smp;

void run_posix_cpu_timers(void);
void posix_cpu_timers_exit(struct task_struct *task);
@@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q)

	posixtimer_putref(tmr);
}

static inline bool posixtimer_valid(const struct k_itimer *timer)
{
	unsigned long val = (unsigned long)timer->it_signal;

	return !(val & 0x1UL);
}
#else  /* CONFIG_POSIX_TIMERS */
static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
+2 −1
Original line number Diff line number Diff line
@@ -136,7 +136,8 @@ struct signal_struct {
#ifdef CONFIG_POSIX_TIMERS

	/* POSIX.1b Interval Timers */
	unsigned int		next_posix_timer_id;
	unsigned int		timer_create_restore_ids:1;
	atomic_t		next_posix_timer_id;
	struct hlist_head	posix_timers;
	struct hlist_head	ignored_posix_timers;

Loading