Commit 7de6b4a2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull workqueue updates from Tejun Heo:

 - New default WQ_AFFN_CACHE_SHARD affinity scope subdivides LLCs into
   smaller shards to improve scalability on machines with many CPUs per
   LLC

 - Misc:
    - system_dfl_long_wq for long unbound works
    - devm_alloc_workqueue() for device-managed allocation
    - sysfs exposure for ordered workqueues and the EFI workqueue
    - removal of HK_TYPE_WQ from wq_unbound_cpumask
    - various small fixes

* tag 'wq-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: (21 commits)
  workqueue: validate cpumask_first() result in llc_populate_cpu_shard_id()
  workqueue: use NR_STD_WORKER_POOLS instead of hardcoded value
  workqueue: avoid unguarded 64-bit division
  docs: workqueue: document WQ_AFFN_CACHE_SHARD affinity scope
  workqueue: add test_workqueue benchmark module
  tools/workqueue: add CACHE_SHARD support to wq_dump.py
  workqueue: set WQ_AFFN_CACHE_SHARD as the default affinity scope
  workqueue: add WQ_AFFN_CACHE_SHARD affinity scope
  workqueue: fix typo in WQ_AFFN_SMT comment
  workqueue: Remove HK_TYPE_WQ from affecting wq_unbound_cpumask
  workqueue: unlink pwqs from wq->pwqs list in alloc_and_link_pwqs() error path
  workqueue: Remove NULL wq WARN in __queue_delayed_work()
  workqueue: fix parse_affn_scope() prefix matching bug
  workqueue: devres: Add device-managed allocate workqueue
  workqueue: Add system_dfl_long_wq for long unbound works
  tools/workqueue/wq_dump.py: add NODE prefix to all node columns
  tools/workqueue/wq_dump.py: fix column alignment in node_nr/max_active section
  tools/workqueue/wq_dump.py: remove backslash separator from node_nr/max_active header
  efi: Allow to expose the workqueue via sysfs
  workqueue: Allow to expose ordered workqueues via sysfs
  ...
parents b71f0be2 76af5464
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -8543,7 +8543,8 @@ Kernel parameters
        workqueue.default_affinity_scope=
			Select the default affinity scope to use for unbound
			workqueues. Can be one of "cpu", "smt", "cache",
			"numa" and "system". Default is "cache". For more
			"cache_shard", "numa" and "system". Default is
			"cache_shard". For more
			information, see the Affinity Scopes section in
			Documentation/core-api/workqueue.rst.

+10 −4
Original line number Diff line number Diff line
@@ -378,9 +378,9 @@ Affinity Scopes

An unbound workqueue groups CPUs according to its affinity scope to improve
cache locality. For example, if a workqueue is using the default affinity
scope of "cache", it will group CPUs according to last level cache
boundaries. A work item queued on the workqueue will be assigned to a worker
on one of the CPUs which share the last level cache with the issuing CPU.
scope of "cache_shard", it will group CPUs into sub-LLC shards. A work item
queued on the workqueue will be assigned to a worker on one of the CPUs
within the same shard as the issuing CPU.
Once started, the worker may or may not be allowed to move outside the scope
depending on the ``affinity_strict`` setting of the scope.

@@ -402,7 +402,13 @@ Workqueue currently supports the following affinity scopes.
``cache``
  CPUs are grouped according to cache boundaries. Which specific cache
  boundary is used is determined by the arch code. L3 is used in a lot of
  cases. This is the default affinity scope.
  cases.

``cache_shard``
  CPUs are grouped into sub-LLC shards of at most ``wq_cache_shard_size``
  cores (default 8, tunable via the ``workqueue.cache_shard_size`` boot
  parameter). Shards are always split on core (SMT group) boundaries.
  This is the default affinity scope.

``numa``
  CPUs are grouped according to NUMA boundaries.
+4 −0
Original line number Diff line number Diff line
@@ -464,3 +464,7 @@ SPI

WATCHDOG
  devm_watchdog_register_device()

WORKQUEUE
  devm_alloc_workqueue()
  devm_alloc_ordered_workqueue()
+1 −1
Original line number Diff line number Diff line
@@ -423,7 +423,7 @@ static int __init efisubsys_init(void)
		 * ordered workqueue (which creates only one execution context)
		 * should suffice for all our needs.
		 */
		efi_rts_wq = alloc_ordered_workqueue("efi_rts_wq", 0);
		efi_rts_wq = alloc_ordered_workqueue("efi_runtime", WQ_SYSFS);
		if (!efi_rts_wq) {
			pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
			clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+38 −9
Original line number Diff line number Diff line
@@ -131,8 +131,9 @@ struct rcu_work {
enum wq_affn_scope {
	WQ_AFFN_DFL,			/* use system default */
	WQ_AFFN_CPU,			/* one pod per CPU */
	WQ_AFFN_SMT,			/* one pod poer SMT */
	WQ_AFFN_SMT,			/* one pod per SMT */
	WQ_AFFN_CACHE,			/* one pod per LLC */
	WQ_AFFN_CACHE_SHARD,		/* synthetic sub-LLC shards */
	WQ_AFFN_NUMA,			/* one pod per NUMA node */
	WQ_AFFN_SYSTEM,			/* one pod across the whole system */

@@ -440,6 +441,9 @@ enum wq_consts {
 * system_long_wq is similar to system_percpu_wq but may host long running
 * works.  Queue flushing might take relatively long.
 *
 * system_dfl_long_wq is similar to system_dfl_wq but it may host long running
 * works.
 *
 * system_dfl_wq is unbound workqueue.  Workers are not bound to
 * any specific CPU, not concurrency managed, and all queued works are
 * executed immediately as long as max_active limit is not reached and
@@ -468,6 +472,7 @@ extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
extern struct workqueue_struct *system_bh_wq;
extern struct workqueue_struct *system_bh_highpri_wq;
extern struct workqueue_struct *system_dfl_long_wq;

void workqueue_softirq_action(bool highpri);
void workqueue_softirq_dead(unsigned int cpu);
@@ -512,6 +517,26 @@ __printf(1, 4) struct workqueue_struct *
alloc_workqueue_noprof(const char *fmt, unsigned int flags, int max_active, ...);
#define alloc_workqueue(...)	alloc_hooks(alloc_workqueue_noprof(__VA_ARGS__))

/**
 * devm_alloc_workqueue - Resource-managed allocate a workqueue
 * @dev: Device to allocate workqueue for
 * @fmt: printf format for the name of the workqueue
 * @flags: WQ_* flags
 * @max_active: max in-flight work items, 0 for default
 * @...: args for @fmt
 *
 * Resource managed workqueue, see alloc_workqueue() for details.
 *
 * The workqueue will be automatically destroyed on driver detach.  Typically
 * this should be used in drivers already relying on devm interafaces.
 *
 * RETURNS:
 * Pointer to the allocated workqueue on success, %NULL on failure.
 */
__printf(2, 5) struct workqueue_struct *
devm_alloc_workqueue(struct device *dev, const char *fmt, unsigned int flags,
		     int max_active, ...);

#ifdef CONFIG_LOCKDEP
/**
 * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map
@@ -568,6 +593,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active,
 */
#define alloc_ordered_workqueue(fmt, flags, args...)			\
	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
#define devm_alloc_ordered_workqueue(dev, fmt, flags, args...)		\
	devm_alloc_workqueue(dev, fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)

#define create_workqueue(name)						\
	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM | WQ_PERCPU, 1, (name))
@@ -712,14 +739,14 @@ static inline bool schedule_work_on(int cpu, struct work_struct *work)
}

/**
 * schedule_work - put work task in global workqueue
 * schedule_work - put work task in per-CPU workqueue
 * @work: job to be done
 *
 * Returns %false if @work was already on the kernel-global workqueue and
 * Returns %false if @work was already on the system per-CPU workqueue and
 * %true otherwise.
 *
 * This puts a job in the kernel-global workqueue if it was not already
 * queued and leaves it in the same position on the kernel-global
 * This puts a job in the system per-CPU workqueue if it was not already
 * queued and leaves it in the same position on the system per-CPU
 * workqueue otherwise.
 *
 * Shares the same memory-ordering properties of queue_work(), cf. the
@@ -783,6 +810,8 @@ extern void __warn_flushing_systemwide_wq(void)
	     _wq == system_highpri_wq) ||				\
	    (__builtin_constant_p(_wq == system_long_wq) &&		\
	     _wq == system_long_wq) ||					\
	    (__builtin_constant_p(_wq == system_dfl_long_wq) &&		\
	     _wq == system_dfl_long_wq) ||					\
	    (__builtin_constant_p(_wq == system_dfl_wq) &&		\
	     _wq == system_dfl_wq) ||				\
	    (__builtin_constant_p(_wq == system_freezable_wq) &&	\
@@ -796,12 +825,12 @@ extern void __warn_flushing_systemwide_wq(void)
})

/**
 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
 * schedule_delayed_work_on - queue work in per-CPU workqueue on CPU after delay
 * @cpu: cpu to use
 * @dwork: job to be done
 * @delay: number of jiffies to wait
 *
 * After waiting for a given time this puts a job in the kernel-global
 * After waiting for a given time this puts a job in the system per-CPU
 * workqueue on the specified CPU.
 */
static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
@@ -811,11 +840,11 @@ static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
}

/**
 * schedule_delayed_work - put work task in global workqueue after delay
 * schedule_delayed_work - put work task in per-CPU workqueue after delay
 * @dwork: job to be done
 * @delay: number of jiffies to wait or 0 for immediate execution
 *
 * After waiting for a given time this puts a job in the kernel-global
 * After waiting for a given time this puts a job in the system per-CPU
 * workqueue.
 */
static inline bool schedule_delayed_work(struct delayed_work *dwork,
Loading