Merge tag 'timers_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (25144ea3) · Commits · git / linux-net

include/linux/hrtimer.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -386,6 +386,7 @@ extern void __init hrtimers_init(void);
		extern void sysrq_timer_list_show(void);

		int hrtimers_prepare_cpu(unsigned int cpu);
		int hrtimers_cpu_starting(unsigned int cpu);
		#ifdef CONFIG_HOTPLUG_CPU
		int hrtimers_cpu_dying(unsigned int cpu);
		#else

kernel/cpu.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -2179,7 +2179,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
		},
		[CPUHP_AP_HRTIMERS_DYING] = {
		.name = "hrtimers:dying",
		.startup.single = NULL,
		.startup.single = hrtimers_cpu_starting,
		.teardown.single = hrtimers_cpu_dying,
		},
		[CPUHP_AP_TICK_DYING] = {

kernel/signal.c

+28 −9

Original line number	Diff line number	Diff line
		@@ -2007,11 +2007,22 @@ void posixtimer_send_sigqueue(struct k_itimer *tmr)

		if (!list_empty(&q->list)) {
		/*
		* If task group is exiting with the signal already pending,
		* wait for __exit_signal() to do its job. Otherwise if
		* ignored, it's not supposed to be queued. Try to survive.
		* The signal was ignored and blocked. The timer
		* expiry queued it because blocked signals are
		* queued independent of the ignored state.
		*
		* The unblocking set SIGPENDING, but the signal
		* was not yet dequeued from the pending list.
		* So prepare_signal() sees unblocked and ignored,
		* which ends up here. Leave it queued like a
		* regular signal.
		*
		* The same happens when the task group is exiting
		* and the signal is already queued.
		* prepare_signal() treats SIGNAL_GROUP_EXIT as
		* ignored independent of its queued state. This
		* gets cleaned up in __exit_signal().
		*/
		WARN_ON_ONCE(!(t->signal->flags & SIGNAL_GROUP_EXIT));
		goto out;
		}

		@@ -2046,17 +2057,25 @@ void posixtimer_send_sigqueue(struct k_itimer *tmr)
		goto out;
		}

		/* This should never happen and leaks a reference count */
		if (WARN_ON_ONCE(!hlist_unhashed(&tmr->ignored_list)))
		hlist_del_init(&tmr->ignored_list);

		if (unlikely(!list_empty(&q->list))) {
		/* This holds a reference count already */
		result = TRACE_SIGNAL_ALREADY_PENDING;
		goto out;
		}

		/*
		* If the signal is on the ignore list, it got blocked after it was
		* ignored earlier. But nothing lifted the ignore. Move it back to
		* the pending list to be consistent with the regular signal
		* handling. This already holds a reference count.
		*
		* If it's not on the ignore list acquire a reference count.
		*/
		if (likely(hlist_unhashed(&tmr->ignored_list)))
		posixtimer_sigqueue_getref(q);
		else
		hlist_del_init(&tmr->ignored_list);

		posixtimer_queue_sigqueue(q, t, tmr->it_pid_type);
		result = TRACE_SIGNAL_DELIVERED;
		out:

kernel/time/hrtimer.c

+10 −1

Original line number	Diff line number	Diff line
		@@ -2202,6 +2202,15 @@ int hrtimers_prepare_cpu(unsigned int cpu)
		}

		cpu_base->cpu = cpu;
		hrtimer_cpu_base_init_expiry_lock(cpu_base);
		return 0;
		}

		int hrtimers_cpu_starting(unsigned int cpu)
		{
		struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);

		/* Clear out any left over state from a CPU down operation */
		cpu_base->active_bases = 0;
		cpu_base->hres_active = 0;
		cpu_base->hang_detected = 0;
		@@ -2210,7 +2219,6 @@ int hrtimers_prepare_cpu(unsigned int cpu)
		cpu_base->expires_next = KTIME_MAX;
		cpu_base->softirq_expires_next = KTIME_MAX;
		cpu_base->online = 1;
		hrtimer_cpu_base_init_expiry_lock(cpu_base);
		return 0;
		}

		@@ -2286,5 +2294,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
		void __init hrtimers_init(void)
		{
		hrtimers_prepare_cpu(smp_processor_id());
		hrtimers_cpu_starting(smp_processor_id());
		open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
		}

kernel/time/timer_migration.c

+55 −9

Original line number	Diff line number	Diff line
		@@ -534,8 +534,13 @@ static void __walk_groups(up_f up, struct tmigr_walk *data,
		break;

		child = group;
		group = group->parent;
		/*
		* Pairs with the store release on group connection
		* to make sure group initialization is visible.
		*/
		group = READ_ONCE(group->parent);
		data->childmask = child->groupmask;
		WARN_ON_ONCE(!data->childmask);
		} while (group);
		}

		@@ -564,7 +569,7 @@ static struct tmigr_event tmigr_next_groupevt(struct tmigr_group group)
		while ((node = timerqueue_getnext(&group->events))) {
		evt = container_of(node, struct tmigr_event, nextevt);

		if (!evt->ignore) {
		if (!READ_ONCE(evt->ignore)) {
		WRITE_ONCE(group->next_expiry, evt->nextevt.expires);
		return evt;
		}
		@@ -660,7 +665,7 @@ static bool tmigr_active_up(struct tmigr_group *group,
		* lock is held while updating the ignore flag in idle path. So this
		* state change will not be lost.
		*/
		group->groupevt.ignore = true;
		WRITE_ONCE(group->groupevt.ignore, true);

		return walk_done;
		}
		@@ -721,6 +726,7 @@ bool tmigr_update_events(struct tmigr_group group, struct tmigr_group child,
		union tmigr_state childstate, groupstate;
		bool remote = data->remote;
		bool walk_done = false;
		bool ignore;
		u64 nextexp;

		if (child) {
		@@ -739,11 +745,19 @@ bool tmigr_update_events(struct tmigr_group group, struct tmigr_group child,
		nextexp = child->next_expiry;
		evt = &child->groupevt;

		evt->ignore = (nextexp == KTIME_MAX) ? true : false;
		/*
		* This can race with concurrent idle exit (activate).
		* If the current writer wins, a useless remote expiration may
		* be scheduled. If the activate wins, the event is properly
		* ignored.
		*/
		ignore = (nextexp == KTIME_MAX) ? true : false;
		WRITE_ONCE(evt->ignore, ignore);
		} else {
		nextexp = data->nextexp;

		first_childevt = evt = data->evt;
		ignore = evt->ignore;

		/*
		* Walking the hierarchy is required in any case when a
		@@ -769,7 +783,7 @@ bool tmigr_update_events(struct tmigr_group group, struct tmigr_group child,
		* first event information of the group is updated properly and
		* also handled properly, so skip this fast return path.
		*/
		if (evt->ignore && !remote && group->parent)
		if (ignore && !remote && group->parent)
		return true;

		raw_spin_lock(&group->lock);
		@@ -783,7 +797,7 @@ bool tmigr_update_events(struct tmigr_group group, struct tmigr_group child,
		* queue when the expiry time changed only or when it could be ignored.
		*/
		if (timerqueue_node_queued(&evt->nextevt)) {
		if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
		if ((evt->nextevt.expires == nextexp) && !ignore) {
		/* Make sure not to miss a new CPU event with the same expiry */
		evt->cpu = first_childevt->cpu;
		goto check_toplvl;
		@@ -793,7 +807,7 @@ bool tmigr_update_events(struct tmigr_group group, struct tmigr_group child,
		WRITE_ONCE(group->next_expiry, KTIME_MAX);
		}

		if (evt->ignore) {
		if (ignore) {
		/*
		* When the next child event could be ignored (nextexp is
		* KTIME_MAX) and there was no remote timer handling before or
		@@ -1487,6 +1501,21 @@ static void tmigr_init_group(struct tmigr_group *group, unsigned int lvl,
		s.seq = 0;
		atomic_set(&group->migr_state, s.state);

		/*
		* If this is a new top-level, prepare its groupmask in advance.
		* This avoids accidents where yet another new top-level is
		* created in the future and made visible before the current groupmask.
		*/
		if (list_empty(&tmigr_level_list[lvl])) {
		group->groupmask = BIT(0);
		/*
		* The previous top level has prepared its groupmask already,
		* simply account it as the first child.
		*/
		if (lvl > 0)
		group->num_children = 1;
		}

		timerqueue_init_head(&group->events);
		timerqueue_init(&group->groupevt.nextevt);
		group->groupevt.nextevt.expires = KTIME_MAX;
		@@ -1550,8 +1579,25 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
		raw_spin_lock_irq(&child->lock);
		raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);

		child->parent = parent;
		if (activate) {
		/*
		* @child is the old top and @parent the new one. In this
		* case groupmask is pre-initialized and @child already
		* accounted, along with its new sibling corresponding to the
		* CPU going up.
		*/
		WARN_ON_ONCE(child->groupmask != BIT(0) \|\| parent->num_children != 2);
		} else {
		/* Adding @child for the CPU going up to @parent. */
		child->groupmask = BIT(parent->num_children++);
		}

		/*
		* Make sure parent initialization is visible before publishing it to a
		* racing CPU entering/exiting idle. This RELEASE barrier enforces an
		* address dependency that pairs with the READ_ONCE() in __walk_groups().
		*/
		smp_store_release(&child->parent, parent);

		raw_spin_unlock(&parent->lock);
		raw_spin_unlock_irq(&child->lock);