Commit 45141eea authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:
 "Workqueue now prints debug information at the end of sysrq-t which
  should be helpful when tracking down suspected workqueue stalls.  It
  only prints out the ones with something currently going on so it
  shouldn't add much output in most cases"

* 'for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Reorder sysfs code
  percpu: Fix trivial typos in comments
  workqueue: dump workqueues on sysrq-t
  workqueue: keep track of the flushing task and pool manager
  workqueue: make the workqueues list RCU walkable
parents 8954672d 6ba94429
......@@ -275,6 +275,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
static void sysrq_handle_showstate(int key)
{
show_state();
show_workqueue_state();
}
static struct sysrq_key_op sysrq_showstate_op = {
.handler = sysrq_handle_showstate,
......
......@@ -454,6 +454,7 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
extern void show_workqueue_state(void);
/**
* queue_work - queue work on a workqueue
......
......@@ -159,6 +159,7 @@ struct worker_pool {
/* see manage_workers() for details on the two manager mutexes */
struct mutex manager_arb; /* manager arbitration */
struct worker *manager; /* L: purely informational */
struct mutex attach_mutex; /* attach/detach exclusion */
struct list_head workers; /* A: attached workers */
struct completion *detach_completion; /* all workers detached */
......@@ -230,7 +231,7 @@ struct wq_device;
*/
struct workqueue_struct {
struct list_head pwqs; /* WR: all pwqs of this wq */
struct list_head list; /* PL: list of all workqueues */
struct list_head list; /* PR: list of all workqueues */
struct mutex mutex; /* protects this wq */
int work_color; /* WQ: current work color */
......@@ -257,6 +258,13 @@ struct workqueue_struct {
#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */
/*
* Destruction of workqueue_struct is sched-RCU protected to allow
* walking the workqueues list without grabbing wq_pool_mutex.
* This is used to dump all workqueues from sysrq.
*/
struct rcu_head rcu;
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
......@@ -288,7 +296,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static LIST_HEAD(workqueues); /* PL: list of all workqueues */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
/* the per-cpu worker pools */
......@@ -324,6 +332,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
const struct workqueue_attrs *from);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
......@@ -1911,9 +1920,11 @@ static bool manage_workers(struct worker *worker)
*/
if (!mutex_trylock(&pool->manager_arb))
return false;
pool->manager = worker;
maybe_create_worker(pool);
pool->manager = NULL;
mutex_unlock(&pool->manager_arb);
return true;
}
......@@ -2303,6 +2314,7 @@ static int rescuer_thread(void *__rescuer)
struct wq_barrier {
struct work_struct work;
struct completion done;
struct task_struct *task; /* purely informational */
};
static void wq_barrier_func(struct work_struct *work)
......@@ -2351,6 +2363,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
init_completion(&barr->done);
barr->task = current;
/*
* If @target is currently being executed, schedule the
......@@ -2989,624 +3002,319 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)
}
EXPORT_SYMBOL_GPL(execute_in_process_context);
#ifdef CONFIG_SYSFS
/*
* Workqueues with WQ_SYSFS flag set is visible to userland via
* /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
* following attributes.
*
* per_cpu RO bool : whether the workqueue is per-cpu or unbound
* max_active RW int : maximum number of in-flight work items
*
* Unbound workqueues have the following extra attributes.
/**
* free_workqueue_attrs - free a workqueue_attrs
* @attrs: workqueue_attrs to free
*
* id RO int : the associated pool ID
* nice RW int : nice value of the workers
* cpumask RW mask : bitmask of allowed CPUs for the workers
* Undo alloc_workqueue_attrs().
*/
struct wq_device {
struct workqueue_struct *wq;
struct device dev;
};
static struct workqueue_struct *dev_to_wq(struct device *dev)
void free_workqueue_attrs(struct workqueue_attrs *attrs)
{
struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
return wq_dev->wq;
if (attrs) {
free_cpumask_var(attrs->cpumask);
kfree(attrs);
}
}
static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
char *buf)
/**
* alloc_workqueue_attrs - allocate a workqueue_attrs
* @gfp_mask: allocation mask to use
*
* Allocate a new workqueue_attrs, initialize with default settings and
* return it.
*
* Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
attrs = kzalloc(sizeof(*attrs), gfp_mask);
if (!attrs)
goto fail;
if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
goto fail;
cpumask_copy(attrs->cpumask, cpu_possible_mask);
return attrs;
fail:
free_workqueue_attrs(attrs);
return NULL;
}
static DEVICE_ATTR_RO(per_cpu);
static ssize_t max_active_show(struct device *dev,
struct device_attribute *attr, char *buf)
static void copy_workqueue_attrs(struct workqueue_attrs *to,
const struct workqueue_attrs *from)
{
struct workqueue_struct *wq = dev_to_wq(dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
to->nice = from->nice;
cpumask_copy(to->cpumask, from->cpumask);
/*
* Unlike hash and equality test, this function doesn't ignore
* ->no_numa as it is used for both pool and wq attrs. Instead,
* get_unbound_pool() explicitly clears ->no_numa after copying.
*/
to->no_numa = from->no_numa;
}
static ssize_t max_active_store(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
/* hash value of the content of @attr */
static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
{
struct workqueue_struct *wq = dev_to_wq(dev);
int val;
if (sscanf(buf, "%d", &val) != 1 || val <= 0)
return -EINVAL;
u32 hash = 0;
workqueue_set_max_active(wq, val);
return count;
hash = jhash_1word(attrs->nice, hash);
hash = jhash(cpumask_bits(attrs->cpumask),
BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
return hash;
}
static DEVICE_ATTR_RW(max_active);
static struct attribute *wq_sysfs_attrs[] = {
&dev_attr_per_cpu.attr,
&dev_attr_max_active.attr,
NULL,
};
ATTRIBUTE_GROUPS(wq_sysfs);
static ssize_t wq_pool_ids_show(struct device *dev,
struct device_attribute *attr, char *buf)
/* content equality test */
static bool wqattrs_equal(const struct workqueue_attrs *a,
const struct workqueue_attrs *b)
{
struct workqueue_struct *wq = dev_to_wq(dev);
const char *delim = "";
int node, written = 0;
rcu_read_lock_sched();
for_each_node(node) {
written += scnprintf(buf + written, PAGE_SIZE - written,
"%s%d:%d", delim, node,
unbound_pwq_by_node(wq, node)->pool->id);
delim = " ";
}
written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
rcu_read_unlock_sched();
return written;
if (a->nice != b->nice)
return false;
if (!cpumask_equal(a->cpumask, b->cpumask))
return false;
return true;
}
static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
char *buf)
/**
* init_worker_pool - initialize a newly zalloc'd worker_pool
* @pool: worker_pool to initialize
*
* Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
*
* Return: 0 on success, -errno on failure. Even on failure, all fields
* inside @pool proper are initialized and put_unbound_pool() can be called
* on @pool safely to release it.
*/
static int init_worker_pool(struct worker_pool *pool)
{
struct workqueue_struct *wq = dev_to_wq(dev);
int written;
spin_lock_init(&pool->lock);
pool->id = -1;
pool->cpu = -1;
pool->node = NUMA_NO_NODE;
pool->flags |= POOL_DISASSOCIATED;
INIT_LIST_HEAD(&pool->worklist);
INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
mutex_lock(&wq->mutex);
written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
mutex_unlock(&wq->mutex);
init_timer_deferrable(&pool->idle_timer);
pool->idle_timer.function = idle_worker_timeout;
pool->idle_timer.data = (unsigned long)pool;
return written;
}
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
(unsigned long)pool);
/* prepare workqueue_attrs for sysfs store operations */
static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
{
struct workqueue_attrs *attrs;
mutex_init(&pool->manager_arb);
mutex_init(&pool->attach_mutex);
INIT_LIST_HEAD(&pool->workers);
attrs = alloc_workqueue_attrs(GFP_KERNEL);
if (!attrs)
return NULL;
ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1;
mutex_lock(&wq->mutex);
copy_workqueue_attrs(attrs, wq->unbound_attrs);
mutex_unlock(&wq->mutex);
return attrs;
/* shouldn't fail above this point */
pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
if (!pool->attrs)
return -ENOMEM;
return 0;
}
static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
static void rcu_free_wq(struct rcu_head *rcu)
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
int ret;
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
return -ENOMEM;
struct workqueue_struct *wq =
container_of(rcu, struct workqueue_struct, rcu);
if (sscanf(buf, "%d", &attrs->nice) == 1 &&
attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
ret = apply_workqueue_attrs(wq, attrs);
if (!(wq->flags & WQ_UNBOUND))
free_percpu(wq->cpu_pwqs);
else
ret = -EINVAL;
free_workqueue_attrs(wq->unbound_attrs);
free_workqueue_attrs(attrs);
return ret ?: count;
kfree(wq->rescuer);
kfree(wq);
}
static ssize_t wq_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
static void rcu_free_pool(struct rcu_head *rcu)
{
struct workqueue_struct *wq = dev_to_wq(dev);
int written;
struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
mutex_lock(&wq->mutex);
written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
cpumask_pr_args(wq->unbound_attrs->cpumask));
mutex_unlock(&wq->mutex);
return written;
ida_destroy(&pool->worker_ida);
free_workqueue_attrs(pool->attrs);
kfree(pool);
}
static ssize_t wq_cpumask_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
/**
* put_unbound_pool - put a worker_pool
* @pool: worker_pool to put
*
* Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
* safe manner. get_unbound_pool() calls this function on its failure path
* and this function should be able to release pools which went through,
* successfully or not, init_worker_pool().
*
* Should be called with wq_pool_mutex held.
*/
static void put_unbound_pool(struct worker_pool *pool)
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
int ret;
DECLARE_COMPLETION_ONSTACK(detach_completion);
struct worker *worker;
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
return -ENOMEM;
lockdep_assert_held(&wq_pool_mutex);
ret = cpumask_parse(buf, attrs->cpumask);
if (!ret)
ret = apply_workqueue_attrs(wq, attrs);
if (--pool->refcnt)
return;
free_workqueue_attrs(attrs);
return ret ?: count;
}
/* sanity checks */
if (WARN_ON(!(pool->cpu < 0)) ||
WARN_ON(!list_empty(&pool->worklist)))
return;
static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct workqueue_struct *wq = dev_to_wq(dev);
int written;
mutex_lock(&wq->mutex);
written = scnprintf(buf, PAGE_SIZE, "%d\n",
!wq->unbound_attrs->no_numa);
mutex_unlock(&wq->mutex);
return written;
}
static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
int v, ret;
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
return -ENOMEM;
/* release id and unhash */
if (pool->id >= 0)
idr_remove(&worker_pool_idr, pool->id);
hash_del(&pool->hash_node);
ret = -EINVAL;
if (sscanf(buf, "%d", &v) == 1) {
attrs->no_numa = !v;
ret = apply_workqueue_attrs(wq, attrs);
}
/*
* Become the manager and destroy all workers. Grabbing
* manager_arb prevents @pool's workers from blocking on
* attach_mutex.
*/
mutex_lock(&pool->manager_arb);
free_workqueue_attrs(attrs);
return ret ?: count;
}
spin_lock_irq(&pool->lock);
while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
spin_unlock_irq(&pool->lock);
static struct device_attribute wq_sysfs_unbound_attrs[] = {
__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
__ATTR_NULL,
};
mutex_lock(&pool->attach_mutex);
if (!list_empty(&pool->workers))
pool->detach_completion = &detach_completion;
mutex_unlock(&pool->attach_mutex);
static struct bus_type wq_subsys = {
.name = "workqueue",
.dev_groups = wq_sysfs_groups,
};
if (pool->detach_completion)
wait_for_completion(pool->detach_completion);
static int __init wq_sysfs_init(void)
{
return subsys_virtual_register(&wq_subsys, NULL);
}
core_initcall(wq_sysfs_init);
mutex_unlock(&pool->manager_arb);
static void wq_device_release(struct device *dev)
{
struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
/* shut down the timers */
del_timer_sync(&pool->idle_timer);
del_timer_sync(&pool->mayday_timer);
kfree(wq_dev);
/* sched-RCU protected to allow dereferences from get_work_pool() */
call_rcu_sched(&pool->rcu, rcu_free_pool);
}
/**
* workqueue_sysfs_register - make a workqueue visible in sysfs
* @wq: the workqueue to register
* get_unbound_pool - get a worker_pool with the specified attributes
* @attrs: the attributes of the worker_pool to get
*
* Expose @wq in sysfs under /sys/bus/workqueue/devices.
* alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
* which is the preferred method.
* Obtain a worker_pool which has the same attributes as @attrs, bump the
* reference count and return it. If there already is a matching
* worker_pool, it will be used; otherwise, this function attempts to
* create a new one.
*
* Workqueue user should use this function directly iff it wants to apply
* workqueue_attrs before making the workqueue visible in sysfs; otherwise,
* apply_workqueue_attrs() may race against userland updating the
* attributes.
* Should be called with wq_pool_mutex held.
*
* Return: 0 on success, -errno on failure.
* Return: On success, a worker_pool with the same attributes as @attrs.
* On failure, %NULL.
*/
int workqueue_sysfs_register(struct workqueue_struct *wq)
static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
{
struct wq_device *wq_dev;
int ret;
u32 hash = wqattrs_hash(attrs);
struct worker_pool *pool;
int node;
/*
* Adjusting max_active or creating new pwqs by applyting
* attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues.
*/
if (WARN_ON(wq->flags & __WQ_ORDERED))
return -EINVAL;
lockdep_assert_held(&wq_pool_mutex);
wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
if (!wq_dev)
return -ENOMEM;
/* do we already have a matching pool? */
hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
if (wqattrs_equal(pool->attrs, attrs)) {
pool->refcnt++;
return pool;
}
}
wq_dev->wq = wq;
wq_dev->dev.bus = &wq_subsys;
wq_dev->dev.init_name = wq->name;
wq_dev->dev.release = wq_device_release;
/* nope, create a new one */
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
if (!pool || init_worker_pool(pool) < 0)
goto fail;
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
copy_workqueue_attrs(pool->attrs, attrs);
/*
* unbound_attrs are created separately. Suppress uevent until
* everything is ready.
* no_numa isn't a worker_pool attribute, always clear it. See
* 'struct workqueue_attrs' comments for detail.
*/
dev_set_uevent_suppress(&wq_dev->dev, true);
ret = device_register(&wq_dev->dev);
if (ret) {
kfree(wq_dev);
wq->wq_dev = NULL;
return ret;
}
if (wq->flags & WQ_UNBOUND) {
struct device_attribute *attr;
pool->attrs->no_numa = false;
for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
ret = device_create_file(&wq_dev->dev, attr);
if (ret) {
device_unregister(&wq_dev->dev);
wq->wq_dev = NULL;
return ret;
/* if cpumask is contained inside a NUMA node, we belong to that node */
if (wq_numa_enabled) {
for_each_node(node) {
if (cpumask_subset(pool->attrs->cpumask,
wq_numa_possible_cpumask[node])) {
pool->node = node;
break;
}
}
}
dev_set_uevent_suppress(&wq_dev->dev, false);
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
return 0;
}
if (worker_pool_assign_id(pool) < 0)
goto fail;
/**
* workqueue_sysfs_unregister - undo workqueue_sysfs_register()
* @wq: the workqueue to unregister
*
* If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
*/
static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
{
struct wq_device *wq_dev = wq->wq_dev;
/* create and start the initial worker */
if (!create_worker(pool))
goto fail;
if (!wq->wq_dev)
return;
/* install */
hash_add(unbound_pool_hash, &pool->hash_node, hash);
wq->wq_dev = NULL;
device_unregister(&wq_dev->dev);
return pool;
fail:
if (pool)
put_unbound_pool(pool);
return NULL;
}
#else /* CONFIG_SYSFS */
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
#endif /* CONFIG_SYSFS */
/**