fs: WQ_PERCPU added to alloc_workqueue users

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistentcy cannot be addressed without refactoring the API.

alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.

This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.

This patch adds a new WQ_PERCPU flag to all the fs subsystem users to
explicitly request the use of the per-CPU behavior. Both flags coexist
for one release cycle to allow callers to transition their calls.

Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.

With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.

All existing users have been updated accordingly.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Link: https://lore.kernel.org/20250916082906.77439-4-marco.crivellari@suse.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Marco Crivellari 2025-09-16 10:29:06 +02:00 committed by Christian Brauner
parent 4ef64db060
commit 69635d7f4b
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
19 changed files with 51 additions and 37 deletions

View File

@ -169,13 +169,13 @@ static int __init afs_init(void)
printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
afs_wq = alloc_workqueue("afs", 0, 0);
afs_wq = alloc_workqueue("afs", WQ_PERCPU, 0);
if (!afs_wq)
goto error_afs_wq;
afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!afs_async_calls)
goto error_async;
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!afs_lock_manager)
goto error_lockmgr;

View File

@ -801,13 +801,13 @@ int bch2_fs_init_rw(struct bch_fs *c)
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
!(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_PERCPU, 1)) ||
!(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 1)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)))
WQ_FREEZABLE|WQ_PERCPU, 0)))
return bch_err_throw(c, ENOMEM_fs_other_alloc);
int ret = bch2_fs_btree_interior_update_init(c) ?:
@ -975,7 +975,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts,
sizeof(struct sort_iter_set);
if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete",
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_PERCPU, 512)) ||
enumerated_ref_init(&c->writes, BCH_WRITE_REF_NR,
bch2_writes_disabled) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||

View File

@ -1958,7 +1958,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);

View File

@ -862,7 +862,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
if (!fsc->inode_wq)
goto fail_client;
fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
fsc->cap_wq = alloc_workqueue("ceph-cap", WQ_PERCPU, 1);
if (!fsc->cap_wq)
goto fail_inode_wq;

View File

@ -1703,7 +1703,7 @@ static int work_start(void)
return -ENOMEM;
}
process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH, 0);
process_workqueue = alloc_workqueue("dlm_process", WQ_HIGHPRI | WQ_BH | WQ_PERCPU, 0);
if (!process_workqueue) {
log_print("can't start dlm_process");
destroy_workqueue(io_workqueue);

View File

@ -52,7 +52,7 @@ static int __init init_dlm(void)
if (error)
goto out_user;
dlm_wq = alloc_workqueue("dlm_wq", 0, 0);
dlm_wq = alloc_workqueue("dlm_wq", WQ_PERCPU, 0);
if (!dlm_wq) {
error = -ENOMEM;
goto out_plock;

View File

@ -1180,7 +1180,7 @@ void cgroup_writeback_umount(struct super_block *sb)
static int __init cgroup_writeback_init(void)
{
isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
isw_wq = alloc_workqueue("inode_switch_wbs", WQ_PERCPU, 0);
if (!isw_wq)
return -ENOMEM;
return 0;

View File

@ -151,7 +151,8 @@ static int __init init_gfs2_fs(void)
error = -ENOMEM;
gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU,
0);
if (!gfs2_recovery_wq)
goto fail_wq1;
@ -160,7 +161,7 @@ static int __init init_gfs2_fs(void)
if (!gfs2_control_wq)
goto fail_wq2;
gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0);
gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", WQ_PERCPU, 0);
if (!gfs2_freeze_wq)
goto fail_wq3;

View File

@ -1193,13 +1193,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
error = -ENOMEM;
sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s",
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0,
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE | WQ_PERCPU,
0,
sdp->sd_fsname);
if (!sdp->sd_glock_wq)
goto fail_iput;
sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU, 0,
sdp->sd_fsname);
if (!sdp->sd_delete_wq)
goto fail_glock_wq;

View File

@ -1876,7 +1876,8 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
dlm_debug_init(dlm);
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!dlm->dlm_worker) {
status = -ENOMEM;
mlog_errno(status);

View File

@ -595,7 +595,8 @@ static int __init init_dlmfs_fs(void)
}
cleanup_inode = 1;
user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
user_dlm_worker = alloc_workqueue("user_dlm",
WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!user_dlm_worker) {
status = -ENOMEM;
goto bail;

View File

@ -1881,7 +1881,9 @@ init_cifs(void)
cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n");
}
cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
cifsiod_wq = alloc_workqueue("cifsiod",
WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!cifsiod_wq) {
rc = -ENOMEM;
goto out_clean_proc;
@ -1909,28 +1911,32 @@ init_cifs(void)
}
cifsoplockd_wq = alloc_workqueue("cifsoplockd",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!cifsoplockd_wq) {
rc = -ENOMEM;
goto out_destroy_fileinfo_put_wq;
}
deferredclose_wq = alloc_workqueue("deferredclose",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!deferredclose_wq) {
rc = -ENOMEM;
goto out_destroy_cifsoplockd_wq;
}
serverclose_wq = alloc_workqueue("serverclose",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!serverclose_wq) {
rc = -ENOMEM;
goto out_destroy_deferredclose_wq;
}
cfid_put_wq = alloc_workqueue("cfid_put_wq",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!cfid_put_wq) {
rc = -ENOMEM;
goto out_destroy_serverclose_wq;

View File

@ -78,7 +78,7 @@ int ksmbd_work_pool_init(void)
int ksmbd_workqueue_init(void)
{
ksmbd_wq = alloc_workqueue("ksmbd-io", 0, 0);
ksmbd_wq = alloc_workqueue("ksmbd-io", WQ_PERCPU, 0);
if (!ksmbd_wq)
return -ENOMEM;
return 0;

View File

@ -2177,7 +2177,8 @@ int ksmbd_rdma_init(void)
* for lack of credits
*/
smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_PERCPU,
0);
if (!smb_direct_wq)
return -ENOMEM;

View File

@ -2314,7 +2314,8 @@ int sb_init_dio_done_wq(struct super_block *sb)
{
struct workqueue_struct *old;
struct workqueue_struct *wq = alloc_workqueue("dio/%s",
WQ_MEM_RECLAIM, 0,
WQ_MEM_RECLAIM | WQ_PERCPU,
0,
sb->s_id);
if (!wq)
return -ENOMEM;

View File

@ -355,7 +355,7 @@ void __init fsverity_init_workqueue(void)
* latency on ARM64.
*/
fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
WQ_HIGHPRI,
WQ_HIGHPRI | WQ_PERCPU,
num_online_cpus());
if (!fsverity_read_workqueue)
panic("failed to allocate fsverity_read_queue");

View File

@ -1489,8 +1489,7 @@ xlog_alloc_log(
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM |
WQ_HIGHPRI),
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_PERCPU),
0, mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;

View File

@ -293,7 +293,8 @@ int
xfs_mru_cache_init(void)
{
xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache",
XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 1);
XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
1);
if (!xfs_mru_reap_wq)
return -ENOMEM;
return 0;

View File

@ -578,19 +578,19 @@ xfs_init_mount_workqueues(
struct xfs_mount *mp)
{
mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_buf_workqueue)
goto out;
mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_unwritten_workqueue)
goto out_destroy_buf;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
0, mp->m_super->s_id);
if (!mp->m_reclaim_workqueue)
goto out_destroy_unwritten;
@ -602,13 +602,14 @@ xfs_init_mount_workqueues(
goto out_destroy_reclaim;
mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU),
1, mp->m_super->s_id);
if (!mp->m_inodegc_wq)
goto out_destroy_blockgc;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0,
mp->m_super->s_id);
if (!mp->m_sync_workqueue)
goto out_destroy_inodegc;
@ -2596,8 +2597,8 @@ xfs_init_workqueues(void)
* AGs in all the filesystems mounted. Hence use the default large
* max_active value for this workqueue.
*/
xfs_alloc_wq = alloc_workqueue("xfsalloc",
XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU),
0);
if (!xfs_alloc_wq)
return -ENOMEM;