Merge tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull tasklist_lock optimizations from Christian Brauner:
 "According to the performance testbots this brings a 23% performance
  increase when creating new processes:

   - Reduce tasklist_lock hold time on exit:
       - Perform add_device_randomness() without tasklist_lock
       - Perform free_pid() calls outside of tasklist_lock

   - Drop irq disablement around pidmap_lock

   - Add some tasklist_lock asserts

   - Call flush_sigqueue() lockless by changing release_task()

   - Don't pointlessly clear TIF_SIGPENDING in __exit_signal() ->
     clear_tsk_thread_flag()"

* tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  pid: drop irq disablement around pidmap_lock
  pid: perform free_pid() calls outside of tasklist_lock
  pid: sprinkle tasklist_lock asserts
  exit: hoist get_pid() in release_task() outside of tasklist_lock
  exit: perform add_device_randomness() without tasklist_lock
  exit: kill the pointless __exit_signal()->clear_tsk_thread_flag(TIF_SIGPENDING)
  exit: change the release_task() paths to call flush_sigqueue() lockless
This commit is contained in:
Linus Torvalds
2025-03-24 13:39:27 -07:00
4 changed files with 93 additions and 66 deletions

View File

@@ -123,14 +123,22 @@ static __init int kernel_exit_sysfs_init(void)
late_initcall(kernel_exit_sysfs_init);
#endif
static void __unhash_process(struct task_struct *p, bool group_dead)
/*
* For things release_task() would like to do *after* tasklist_lock is released.
*/
struct release_task_post {
struct pid *pids[PIDTYPE_MAX];
};
static void __unhash_process(struct release_task_post *post, struct task_struct *p,
bool group_dead)
{
nr_threads--;
detach_pid(p, PIDTYPE_PID);
detach_pid(post->pids, p, PIDTYPE_PID);
if (group_dead) {
detach_pid(p, PIDTYPE_TGID);
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
detach_pid(post->pids, p, PIDTYPE_TGID);
detach_pid(post->pids, p, PIDTYPE_PGID);
detach_pid(post->pids, p, PIDTYPE_SID);
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
@@ -142,7 +150,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
/*
* This function expects the tasklist_lock write-locked.
*/
static void __exit_signal(struct task_struct *tsk)
static void __exit_signal(struct release_task_post *post, struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
bool group_dead = thread_group_leader(tsk);
@@ -175,9 +183,6 @@ static void __exit_signal(struct task_struct *tsk)
sig->curr_target = next_thread(tsk);
}
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
sizeof(unsigned long long));
/*
* Accumulate here the counters for all threads as they die. We could
* skip the group leader because it is the last user of signal_struct,
@@ -198,23 +203,15 @@ static void __exit_signal(struct task_struct *tsk)
task_io_accounting_add(&sig->ioac, &tsk->ioac);
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig->nr_threads--;
__unhash_process(tsk, group_dead);
__unhash_process(post, tsk, group_dead);
write_sequnlock(&sig->stats_lock);
/*
* Do this under ->siglock, we can race with another thread
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
*/
flush_sigqueue(&tsk->pending);
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
if (group_dead) {
flush_sigqueue(&sig->shared_pending);
if (group_dead)
tty_kref_put(tty);
}
}
static void delayed_put_task_struct(struct rcu_head *rhp)
@@ -240,10 +237,13 @@ void __weak release_thread(struct task_struct *dead_task)
void release_task(struct task_struct *p)
{
struct release_task_post post;
struct task_struct *leader;
struct pid *thread_pid;
int zap_leader;
repeat:
memset(&post, 0, sizeof(post));
/* don't need to get the RCU readlock here - the process is dead and
* can't be modifying its own credentials. But shut RCU-lockdep up */
rcu_read_lock();
@@ -253,10 +253,11 @@ repeat:
pidfs_exit(p);
cgroup_release(p);
thread_pid = get_pid(p->thread_pid);
write_lock_irq(&tasklist_lock);
ptrace_release_task(p);
thread_pid = get_pid(p->thread_pid);
__exit_signal(p);
__exit_signal(&post, p);
/*
* If we are the last non-leader member of the thread
@@ -280,7 +281,20 @@ repeat:
write_unlock_irq(&tasklist_lock);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
add_device_randomness(&p->se.sum_exec_runtime,
sizeof(p->se.sum_exec_runtime));
free_pids(post.pids);
release_thread(p);
/*
* This task was already removed from the process/thread/pid lists
* and lock_task_sighand(p) can't succeed. Nobody else can touch
* ->pending or, if group dead, signal->shared_pending. We can call
* flush_sigqueue() lockless.
*/
flush_sigqueue(&p->pending);
if (thread_group_leader(p))
flush_sigqueue(&p->signal->shared_pending);
put_task_struct_rcu_user(p);
p = leader;