This source file includes following definitions.
- work_debug_hint
- work_is_static_object
- work_fixup_init
- work_fixup_free
- debug_work_activate
- debug_work_deactivate
- __init_work
- destroy_work_on_stack
- destroy_delayed_work_on_stack
- debug_work_activate
- debug_work_deactivate
- worker_pool_assign_id
- unbound_pwq_by_node
- work_color_to_flags
- get_work_color
- work_next_color
- set_work_data
- set_work_pwq
- set_work_pool_and_keep_pending
- set_work_pool_and_clear_pending
- clear_work_data
- get_work_pwq
- get_work_pool
- get_work_pool_id
- mark_work_canceling
- work_is_canceling
- __need_more_worker
- need_more_worker
- may_start_working
- keep_working
- need_to_create_worker
- too_many_workers
- first_idle_worker
- wake_up_worker
- wq_worker_running
- wq_worker_sleeping
- wq_worker_last_func
- worker_set_flags
- worker_clr_flags
- find_worker_executing_work
- move_linked_works
- get_pwq
- put_pwq
- put_pwq_unlocked
- pwq_activate_delayed_work
- pwq_activate_first_delayed
- pwq_dec_nr_in_flight
- try_to_grab_pending
- insert_work
- is_chained_work
- wq_select_unbound_cpu
- __queue_work
- queue_work_on
- workqueue_select_cpu_near
- queue_work_node
- delayed_work_timer_fn
- __queue_delayed_work
- queue_delayed_work_on
- mod_delayed_work_on
- rcu_work_rcufn
- queue_rcu_work
- worker_enter_idle
- worker_leave_idle
- alloc_worker
- worker_attach_to_pool
- worker_detach_from_pool
- create_worker
- destroy_worker
- idle_worker_timeout
- send_mayday
- pool_mayday_timeout
- maybe_create_worker
- manage_workers
- process_one_work
- process_scheduled_works
- set_pf_worker
- worker_thread
- rescuer_thread
- check_flush_dependency
- wq_barrier_func
- insert_wq_barrier
- flush_workqueue_prep_pwqs
- flush_workqueue
- drain_workqueue
- start_flush_work
- __flush_work
- flush_work
- cwt_wakefn
- __cancel_work_timer
- cancel_work_sync
- flush_delayed_work
- flush_rcu_work
- __cancel_work
- cancel_delayed_work
- cancel_delayed_work_sync
- schedule_on_each_cpu
- execute_in_process_context
- free_workqueue_attrs
- alloc_workqueue_attrs
- copy_workqueue_attrs
- wqattrs_hash
- wqattrs_equal
- init_worker_pool
- wq_init_lockdep
- wq_unregister_lockdep
- wq_free_lockdep
- wq_init_lockdep
- wq_unregister_lockdep
- wq_free_lockdep
- rcu_free_wq
- rcu_free_pool
- put_unbound_pool
- get_unbound_pool
- rcu_free_pwq
- pwq_unbound_release_workfn
- pwq_adjust_max_active
- init_pwq
- link_pwq
- alloc_unbound_pwq
- wq_calc_node_cpumask
- numa_pwq_tbl_install
- apply_wqattrs_cleanup
- apply_wqattrs_prepare
- apply_wqattrs_commit
- apply_wqattrs_lock
- apply_wqattrs_unlock
- apply_workqueue_attrs_locked
- apply_workqueue_attrs
- wq_update_unbound_numa
- alloc_and_link_pwqs
- wq_clamp_max_active
- init_rescuer
- __printf
- destroy_workqueue
- workqueue_set_max_active
- current_work
- current_is_workqueue_rescuer
- workqueue_congested
- work_busy
- set_worker_desc
- print_worker_info
- pr_cont_pool_info
- pr_cont_work
- show_pwq
- show_workqueue_state
- wq_worker_comm
- unbind_workers
- rebind_workers
- restore_unbound_workers_cpumask
- workqueue_prepare_cpu
- workqueue_online_cpu
- workqueue_offline_cpu
- work_for_cpu_fn
- work_on_cpu
- work_on_cpu_safe
- freeze_workqueues_begin
- freeze_workqueues_busy
- thaw_workqueues
- workqueue_apply_unbound_cpumask
- workqueue_set_unbound_cpumask
- dev_to_wq
- per_cpu_show
- max_active_show
- max_active_store
- wq_pool_ids_show
- wq_nice_show
- wq_sysfs_prep_attrs
- wq_nice_store
- wq_cpumask_show
- wq_cpumask_store
- wq_numa_show
- wq_numa_store
- wq_unbound_cpumask_show
- wq_unbound_cpumask_store
- wq_sysfs_init
- wq_device_release
- workqueue_sysfs_register
- workqueue_sysfs_unregister
- workqueue_sysfs_unregister
- wq_watchdog_reset_touched
- wq_watchdog_timer_fn
- wq_watchdog_touch
- wq_watchdog_set_thresh
- wq_watchdog_param_set_thresh
- wq_watchdog_init
- wq_watchdog_init
- wq_numa_init
- workqueue_init_early
- workqueue_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 #include <linux/export.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/init.h>
32 #include <linux/signal.h>
33 #include <linux/completion.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <linux/cpu.h>
37 #include <linux/notifier.h>
38 #include <linux/kthread.h>
39 #include <linux/hardirq.h>
40 #include <linux/mempolicy.h>
41 #include <linux/freezer.h>
42 #include <linux/debug_locks.h>
43 #include <linux/lockdep.h>
44 #include <linux/idr.h>
45 #include <linux/jhash.h>
46 #include <linux/hashtable.h>
47 #include <linux/rculist.h>
48 #include <linux/nodemask.h>
49 #include <linux/moduleparam.h>
50 #include <linux/uaccess.h>
51 #include <linux/sched/isolation.h>
52 #include <linux/nmi.h>
53
54 #include "workqueue_internal.h"
55
56 enum {
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 POOL_MANAGER_ACTIVE = 1 << 0,
74 POOL_DISASSOCIATED = 1 << 2,
75
76
77 WORKER_DIE = 1 << 1,
78 WORKER_IDLE = 1 << 2,
79 WORKER_PREP = 1 << 3,
80 WORKER_CPU_INTENSIVE = 1 << 6,
81 WORKER_UNBOUND = 1 << 7,
82 WORKER_REBOUND = 1 << 8,
83
84 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
85 WORKER_UNBOUND | WORKER_REBOUND,
86
87 NR_STD_WORKER_POOLS = 2,
88
89 UNBOUND_POOL_HASH_ORDER = 6,
90 BUSY_WORKER_HASH_ORDER = 6,
91
92 MAX_IDLE_WORKERS_RATIO = 4,
93 IDLE_WORKER_TIMEOUT = 300 * HZ,
94
95 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
96
97
98 MAYDAY_INTERVAL = HZ / 10,
99 CREATE_COOLDOWN = HZ,
100
101
102
103
104
105 RESCUER_NICE_LEVEL = MIN_NICE,
106 HIGHPRI_NICE_LEVEL = MIN_NICE,
107
108 WQ_NAME_LEN = 24,
109 };
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 struct worker_pool {
148 spinlock_t lock;
149 int cpu;
150 int node;
151 int id;
152 unsigned int flags;
153
154 unsigned long watchdog_ts;
155
156 struct list_head worklist;
157
158 int nr_workers;
159 int nr_idle;
160
161 struct list_head idle_list;
162 struct timer_list idle_timer;
163 struct timer_list mayday_timer;
164
165
166 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
167
168
169 struct worker *manager;
170 struct list_head workers;
171 struct completion *detach_completion;
172
173 struct ida worker_ida;
174
175 struct workqueue_attrs *attrs;
176 struct hlist_node hash_node;
177 int refcnt;
178
179
180
181
182
183
184 atomic_t nr_running ____cacheline_aligned_in_smp;
185
186
187
188
189
190 struct rcu_head rcu;
191 } ____cacheline_aligned_in_smp;
192
193
194
195
196
197
198
199 struct pool_workqueue {
200 struct worker_pool *pool;
201 struct workqueue_struct *wq;
202 int work_color;
203 int flush_color;
204 int refcnt;
205 int nr_in_flight[WORK_NR_COLORS];
206
207 int nr_active;
208 int max_active;
209 struct list_head delayed_works;
210 struct list_head pwqs_node;
211 struct list_head mayday_node;
212
213
214
215
216
217
218
219 struct work_struct unbound_release_work;
220 struct rcu_head rcu;
221 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
222
223
224
225
226 struct wq_flusher {
227 struct list_head list;
228 int flush_color;
229 struct completion done;
230 };
231
232 struct wq_device;
233
234
235
236
237
238 struct workqueue_struct {
239 struct list_head pwqs;
240 struct list_head list;
241
242 struct mutex mutex;
243 int work_color;
244 int flush_color;
245 atomic_t nr_pwqs_to_flush;
246 struct wq_flusher *first_flusher;
247 struct list_head flusher_queue;
248 struct list_head flusher_overflow;
249
250 struct list_head maydays;
251 struct worker *rescuer;
252
253 int nr_drainers;
254 int saved_max_active;
255
256 struct workqueue_attrs *unbound_attrs;
257 struct pool_workqueue *dfl_pwq;
258
259 #ifdef CONFIG_SYSFS
260 struct wq_device *wq_dev;
261 #endif
262 #ifdef CONFIG_LOCKDEP
263 char *lock_name;
264 struct lock_class_key key;
265 struct lockdep_map lockdep_map;
266 #endif
267 char name[WQ_NAME_LEN];
268
269
270
271
272
273
274 struct rcu_head rcu;
275
276
277 unsigned int flags ____cacheline_aligned;
278 struct pool_workqueue __percpu *cpu_pwqs;
279 struct pool_workqueue __rcu *numa_pwq_tbl[];
280 };
281
282 static struct kmem_cache *pwq_cache;
283
284 static cpumask_var_t *wq_numa_possible_cpumask;
285
286
287 static bool wq_disable_numa;
288 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
289
290
291 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
292 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
293
294 static bool wq_online;
295
296 static bool wq_numa_enabled;
297
298
299 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
300
301 static DEFINE_MUTEX(wq_pool_mutex);
302 static DEFINE_MUTEX(wq_pool_attach_mutex);
303 static DEFINE_SPINLOCK(wq_mayday_lock);
304 static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait);
305
306 static LIST_HEAD(workqueues);
307 static bool workqueue_freezing;
308
309
310 static cpumask_var_t wq_unbound_cpumask;
311
312
313 static DEFINE_PER_CPU(int, wq_rr_cpu_last);
314
315
316
317
318
319
320 #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
321 static bool wq_debug_force_rr_cpu = true;
322 #else
323 static bool wq_debug_force_rr_cpu = false;
324 #endif
325 module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
326
327
328 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
329
330 static DEFINE_IDR(worker_pool_idr);
331
332
333 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
334
335
336 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
337
338
339 static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
340
341 struct workqueue_struct *system_wq __read_mostly;
342 EXPORT_SYMBOL(system_wq);
343 struct workqueue_struct *system_highpri_wq __read_mostly;
344 EXPORT_SYMBOL_GPL(system_highpri_wq);
345 struct workqueue_struct *system_long_wq __read_mostly;
346 EXPORT_SYMBOL_GPL(system_long_wq);
347 struct workqueue_struct *system_unbound_wq __read_mostly;
348 EXPORT_SYMBOL_GPL(system_unbound_wq);
349 struct workqueue_struct *system_freezable_wq __read_mostly;
350 EXPORT_SYMBOL_GPL(system_freezable_wq);
351 struct workqueue_struct *system_power_efficient_wq __read_mostly;
352 EXPORT_SYMBOL_GPL(system_power_efficient_wq);
353 struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
354 EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
355
356 static int worker_thread(void *__worker);
357 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
358
359 #define CREATE_TRACE_POINTS
360 #include <trace/events/workqueue.h>
361
362 #define assert_rcu_or_pool_mutex() \
363 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
364 !lockdep_is_held(&wq_pool_mutex), \
365 "RCU or wq_pool_mutex should be held")
366
367 #define assert_rcu_or_wq_mutex(wq) \
368 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
369 !lockdep_is_held(&wq->mutex), \
370 "RCU or wq->mutex should be held")
371
372 #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
373 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
374 !lockdep_is_held(&wq->mutex) && \
375 !lockdep_is_held(&wq_pool_mutex), \
376 "RCU, wq->mutex or wq_pool_mutex should be held")
377
378 #define for_each_cpu_worker_pool(pool, cpu) \
379 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
380 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
381 (pool)++)
382
383
384
385
386
387
388
389
390
391
392
393
394
395 #define for_each_pool(pool, pi) \
396 idr_for_each_entry(&worker_pool_idr, pool, pi) \
397 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
398 else
399
400
401
402
403
404
405
406
407
408
409
410 #define for_each_pool_worker(worker, pool) \
411 list_for_each_entry((worker), &(pool)->workers, node) \
412 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
413 else
414
415
416
417
418
419
420
421
422
423
424
425
426
427 #define for_each_pwq(pwq, wq) \
428 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
429 lockdep_is_held(&wq->mutex)) \
430 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
431 else
432
433 #ifdef CONFIG_DEBUG_OBJECTS_WORK
434
435 static struct debug_obj_descr work_debug_descr;
436
437 static void *work_debug_hint(void *addr)
438 {
439 return ((struct work_struct *) addr)->func;
440 }
441
442 static bool work_is_static_object(void *addr)
443 {
444 struct work_struct *work = addr;
445
446 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
447 }
448
449
450
451
452
453 static bool work_fixup_init(void *addr, enum debug_obj_state state)
454 {
455 struct work_struct *work = addr;
456
457 switch (state) {
458 case ODEBUG_STATE_ACTIVE:
459 cancel_work_sync(work);
460 debug_object_init(work, &work_debug_descr);
461 return true;
462 default:
463 return false;
464 }
465 }
466
467
468
469
470
471 static bool work_fixup_free(void *addr, enum debug_obj_state state)
472 {
473 struct work_struct *work = addr;
474
475 switch (state) {
476 case ODEBUG_STATE_ACTIVE:
477 cancel_work_sync(work);
478 debug_object_free(work, &work_debug_descr);
479 return true;
480 default:
481 return false;
482 }
483 }
484
485 static struct debug_obj_descr work_debug_descr = {
486 .name = "work_struct",
487 .debug_hint = work_debug_hint,
488 .is_static_object = work_is_static_object,
489 .fixup_init = work_fixup_init,
490 .fixup_free = work_fixup_free,
491 };
492
493 static inline void debug_work_activate(struct work_struct *work)
494 {
495 debug_object_activate(work, &work_debug_descr);
496 }
497
498 static inline void debug_work_deactivate(struct work_struct *work)
499 {
500 debug_object_deactivate(work, &work_debug_descr);
501 }
502
503 void __init_work(struct work_struct *work, int onstack)
504 {
505 if (onstack)
506 debug_object_init_on_stack(work, &work_debug_descr);
507 else
508 debug_object_init(work, &work_debug_descr);
509 }
510 EXPORT_SYMBOL_GPL(__init_work);
511
512 void destroy_work_on_stack(struct work_struct *work)
513 {
514 debug_object_free(work, &work_debug_descr);
515 }
516 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
517
518 void destroy_delayed_work_on_stack(struct delayed_work *work)
519 {
520 destroy_timer_on_stack(&work->timer);
521 debug_object_free(&work->work, &work_debug_descr);
522 }
523 EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
524
525 #else
526 static inline void debug_work_activate(struct work_struct *work) { }
527 static inline void debug_work_deactivate(struct work_struct *work) { }
528 #endif
529
530
531
532
533
534
535
536
537 static int worker_pool_assign_id(struct worker_pool *pool)
538 {
539 int ret;
540
541 lockdep_assert_held(&wq_pool_mutex);
542
543 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
544 GFP_KERNEL);
545 if (ret >= 0) {
546 pool->id = ret;
547 return 0;
548 }
549 return ret;
550 }
551
552
553
554
555
556
557
558
559
560
561
562
563
564 static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
565 int node)
566 {
567 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
568
569
570
571
572
573
574
575 if (unlikely(node == NUMA_NO_NODE))
576 return wq->dfl_pwq;
577
578 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
579 }
580
581 static unsigned int work_color_to_flags(int color)
582 {
583 return color << WORK_STRUCT_COLOR_SHIFT;
584 }
585
586 static int get_work_color(struct work_struct *work)
587 {
588 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
589 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
590 }
591
592 static int work_next_color(int color)
593 {
594 return (color + 1) % WORK_NR_COLORS;
595 }
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617 static inline void set_work_data(struct work_struct *work, unsigned long data,
618 unsigned long flags)
619 {
620 WARN_ON_ONCE(!work_pending(work));
621 atomic_long_set(&work->data, data | flags | work_static(work));
622 }
623
624 static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
625 unsigned long extra_flags)
626 {
627 set_work_data(work, (unsigned long)pwq,
628 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
629 }
630
631 static void set_work_pool_and_keep_pending(struct work_struct *work,
632 int pool_id)
633 {
634 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
635 WORK_STRUCT_PENDING);
636 }
637
638 static void set_work_pool_and_clear_pending(struct work_struct *work,
639 int pool_id)
640 {
641
642
643
644
645
646
647 smp_wmb();
648 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677 smp_mb();
678 }
679
680 static void clear_work_data(struct work_struct *work)
681 {
682 smp_wmb();
683 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
684 }
685
686 static struct pool_workqueue *get_work_pwq(struct work_struct *work)
687 {
688 unsigned long data = atomic_long_read(&work->data);
689
690 if (data & WORK_STRUCT_PWQ)
691 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
692 else
693 return NULL;
694 }
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711 static struct worker_pool *get_work_pool(struct work_struct *work)
712 {
713 unsigned long data = atomic_long_read(&work->data);
714 int pool_id;
715
716 assert_rcu_or_pool_mutex();
717
718 if (data & WORK_STRUCT_PWQ)
719 return ((struct pool_workqueue *)
720 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
721
722 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
723 if (pool_id == WORK_OFFQ_POOL_NONE)
724 return NULL;
725
726 return idr_find(&worker_pool_idr, pool_id);
727 }
728
729
730
731
732
733
734
735
736 static int get_work_pool_id(struct work_struct *work)
737 {
738 unsigned long data = atomic_long_read(&work->data);
739
740 if (data & WORK_STRUCT_PWQ)
741 return ((struct pool_workqueue *)
742 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
743
744 return data >> WORK_OFFQ_POOL_SHIFT;
745 }
746
747 static void mark_work_canceling(struct work_struct *work)
748 {
749 unsigned long pool_id = get_work_pool_id(work);
750
751 pool_id <<= WORK_OFFQ_POOL_SHIFT;
752 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
753 }
754
755 static bool work_is_canceling(struct work_struct *work)
756 {
757 unsigned long data = atomic_long_read(&work->data);
758
759 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
760 }
761
762
763
764
765
766
767
768 static bool __need_more_worker(struct worker_pool *pool)
769 {
770 return !atomic_read(&pool->nr_running);
771 }
772
773
774
775
776
777
778
779
780
781 static bool need_more_worker(struct worker_pool *pool)
782 {
783 return !list_empty(&pool->worklist) && __need_more_worker(pool);
784 }
785
786
787 static bool may_start_working(struct worker_pool *pool)
788 {
789 return pool->nr_idle;
790 }
791
792
793 static bool keep_working(struct worker_pool *pool)
794 {
795 return !list_empty(&pool->worklist) &&
796 atomic_read(&pool->nr_running) <= 1;
797 }
798
799
800 static bool need_to_create_worker(struct worker_pool *pool)
801 {
802 return need_more_worker(pool) && !may_start_working(pool);
803 }
804
805
806 static bool too_many_workers(struct worker_pool *pool)
807 {
808 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
809 int nr_idle = pool->nr_idle + managing;
810 int nr_busy = pool->nr_workers - nr_idle;
811
812 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
813 }
814
815
816
817
818
819
820 static struct worker *first_idle_worker(struct worker_pool *pool)
821 {
822 if (unlikely(list_empty(&pool->idle_list)))
823 return NULL;
824
825 return list_first_entry(&pool->idle_list, struct worker, entry);
826 }
827
828
829
830
831
832
833
834
835
836
837 static void wake_up_worker(struct worker_pool *pool)
838 {
839 struct worker *worker = first_idle_worker(pool);
840
841 if (likely(worker))
842 wake_up_process(worker->task);
843 }
844
845
846
847
848
849
850
851 void wq_worker_running(struct task_struct *task)
852 {
853 struct worker *worker = kthread_data(task);
854
855 if (!worker->sleeping)
856 return;
857 if (!(worker->flags & WORKER_NOT_RUNNING))
858 atomic_inc(&worker->pool->nr_running);
859 worker->sleeping = 0;
860 }
861
862
863
864
865
866
867
868
869 void wq_worker_sleeping(struct task_struct *task)
870 {
871 struct worker *next, *worker = kthread_data(task);
872 struct worker_pool *pool;
873
874
875
876
877
878
879 if (worker->flags & WORKER_NOT_RUNNING)
880 return;
881
882 pool = worker->pool;
883
884 if (WARN_ON_ONCE(worker->sleeping))
885 return;
886
887 worker->sleeping = 1;
888 spin_lock_irq(&pool->lock);
889
890
891
892
893
894
895
896
897
898
899
900
901 if (atomic_dec_and_test(&pool->nr_running) &&
902 !list_empty(&pool->worklist)) {
903 next = first_idle_worker(pool);
904 if (next)
905 wake_up_process(next->task);
906 }
907 spin_unlock_irq(&pool->lock);
908 }
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934 work_func_t wq_worker_last_func(struct task_struct *task)
935 {
936 struct worker *worker = kthread_data(task);
937
938 return worker->last_func;
939 }
940
941
942
943
944
945
946
947
948
949
950
951 static inline void worker_set_flags(struct worker *worker, unsigned int flags)
952 {
953 struct worker_pool *pool = worker->pool;
954
955 WARN_ON_ONCE(worker->task != current);
956
957
958 if ((flags & WORKER_NOT_RUNNING) &&
959 !(worker->flags & WORKER_NOT_RUNNING)) {
960 atomic_dec(&pool->nr_running);
961 }
962
963 worker->flags |= flags;
964 }
965
966
967
968
969
970
971
972
973
974
975
976 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
977 {
978 struct worker_pool *pool = worker->pool;
979 unsigned int oflags = worker->flags;
980
981 WARN_ON_ONCE(worker->task != current);
982
983 worker->flags &= ~flags;
984
985
986
987
988
989
990 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
991 if (!(worker->flags & WORKER_NOT_RUNNING))
992 atomic_inc(&pool->nr_running);
993 }
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 static struct worker *find_worker_executing_work(struct worker_pool *pool,
1029 struct work_struct *work)
1030 {
1031 struct worker *worker;
1032
1033 hash_for_each_possible(pool->busy_hash, worker, hentry,
1034 (unsigned long)work)
1035 if (worker->current_work == work &&
1036 worker->current_func == work->func)
1037 return worker;
1038
1039 return NULL;
1040 }
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059 static void move_linked_works(struct work_struct *work, struct list_head *head,
1060 struct work_struct **nextp)
1061 {
1062 struct work_struct *n;
1063
1064
1065
1066
1067
1068 list_for_each_entry_safe_from(work, n, NULL, entry) {
1069 list_move_tail(&work->entry, head);
1070 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1071 break;
1072 }
1073
1074
1075
1076
1077
1078
1079 if (nextp)
1080 *nextp = n;
1081 }
1082
1083
1084
1085
1086
1087
1088
1089
1090 static void get_pwq(struct pool_workqueue *pwq)
1091 {
1092 lockdep_assert_held(&pwq->pool->lock);
1093 WARN_ON_ONCE(pwq->refcnt <= 0);
1094 pwq->refcnt++;
1095 }
1096
1097
1098
1099
1100
1101
1102
1103
1104 static void put_pwq(struct pool_workqueue *pwq)
1105 {
1106 lockdep_assert_held(&pwq->pool->lock);
1107 if (likely(--pwq->refcnt))
1108 return;
1109 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1110 return;
1111
1112
1113
1114
1115
1116
1117
1118
1119 schedule_work(&pwq->unbound_release_work);
1120 }
1121
1122
1123
1124
1125
1126
1127
1128 static void put_pwq_unlocked(struct pool_workqueue *pwq)
1129 {
1130 if (pwq) {
1131
1132
1133
1134
1135 spin_lock_irq(&pwq->pool->lock);
1136 put_pwq(pwq);
1137 spin_unlock_irq(&pwq->pool->lock);
1138 }
1139 }
1140
1141 static void pwq_activate_delayed_work(struct work_struct *work)
1142 {
1143 struct pool_workqueue *pwq = get_work_pwq(work);
1144
1145 trace_workqueue_activate_work(work);
1146 if (list_empty(&pwq->pool->worklist))
1147 pwq->pool->watchdog_ts = jiffies;
1148 move_linked_works(work, &pwq->pool->worklist, NULL);
1149 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1150 pwq->nr_active++;
1151 }
1152
1153 static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1154 {
1155 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1156 struct work_struct, entry);
1157
1158 pwq_activate_delayed_work(work);
1159 }
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1173 {
1174
1175 if (color == WORK_NO_COLOR)
1176 goto out_put;
1177
1178 pwq->nr_in_flight[color]--;
1179
1180 pwq->nr_active--;
1181 if (!list_empty(&pwq->delayed_works)) {
1182
1183 if (pwq->nr_active < pwq->max_active)
1184 pwq_activate_first_delayed(pwq);
1185 }
1186
1187
1188 if (likely(pwq->flush_color != color))
1189 goto out_put;
1190
1191
1192 if (pwq->nr_in_flight[color])
1193 goto out_put;
1194
1195
1196 pwq->flush_color = -1;
1197
1198
1199
1200
1201
1202 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1203 complete(&pwq->wq->first_flusher->done);
1204 out_put:
1205 put_pwq(pwq);
1206 }
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235 static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1236 unsigned long *flags)
1237 {
1238 struct worker_pool *pool;
1239 struct pool_workqueue *pwq;
1240
1241 local_irq_save(*flags);
1242
1243
1244 if (is_dwork) {
1245 struct delayed_work *dwork = to_delayed_work(work);
1246
1247
1248
1249
1250
1251
1252 if (likely(del_timer(&dwork->timer)))
1253 return 1;
1254 }
1255
1256
1257 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1258 return 0;
1259
1260 rcu_read_lock();
1261
1262
1263
1264
1265 pool = get_work_pool(work);
1266 if (!pool)
1267 goto fail;
1268
1269 spin_lock(&pool->lock);
1270
1271
1272
1273
1274
1275
1276
1277
1278 pwq = get_work_pwq(work);
1279 if (pwq && pwq->pool == pool) {
1280 debug_work_deactivate(work);
1281
1282
1283
1284
1285
1286
1287
1288
1289 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1290 pwq_activate_delayed_work(work);
1291
1292 list_del_init(&work->entry);
1293 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1294
1295
1296 set_work_pool_and_keep_pending(work, pool->id);
1297
1298 spin_unlock(&pool->lock);
1299 rcu_read_unlock();
1300 return 1;
1301 }
1302 spin_unlock(&pool->lock);
1303 fail:
1304 rcu_read_unlock();
1305 local_irq_restore(*flags);
1306 if (work_is_canceling(work))
1307 return -ENOENT;
1308 cpu_relax();
1309 return -EAGAIN;
1310 }
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1326 struct list_head *head, unsigned int extra_flags)
1327 {
1328 struct worker_pool *pool = pwq->pool;
1329
1330
1331 set_work_pwq(work, pwq, extra_flags);
1332 list_add_tail(&work->entry, head);
1333 get_pwq(pwq);
1334
1335
1336
1337
1338
1339
1340 smp_mb();
1341
1342 if (__need_more_worker(pool))
1343 wake_up_worker(pool);
1344 }
1345
1346
1347
1348
1349
1350 static bool is_chained_work(struct workqueue_struct *wq)
1351 {
1352 struct worker *worker;
1353
1354 worker = current_wq_worker();
1355
1356
1357
1358
1359 return worker && worker->current_pwq->wq == wq;
1360 }
1361
1362
1363
1364
1365
1366
1367 static int wq_select_unbound_cpu(int cpu)
1368 {
1369 static bool printed_dbg_warning;
1370 int new_cpu;
1371
1372 if (likely(!wq_debug_force_rr_cpu)) {
1373 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1374 return cpu;
1375 } else if (!printed_dbg_warning) {
1376 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1377 printed_dbg_warning = true;
1378 }
1379
1380 if (cpumask_empty(wq_unbound_cpumask))
1381 return cpu;
1382
1383 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1384 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1385 if (unlikely(new_cpu >= nr_cpu_ids)) {
1386 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1387 if (unlikely(new_cpu >= nr_cpu_ids))
1388 return cpu;
1389 }
1390 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1391
1392 return new_cpu;
1393 }
1394
1395 static void __queue_work(int cpu, struct workqueue_struct *wq,
1396 struct work_struct *work)
1397 {
1398 struct pool_workqueue *pwq;
1399 struct worker_pool *last_pool;
1400 struct list_head *worklist;
1401 unsigned int work_flags;
1402 unsigned int req_cpu = cpu;
1403
1404
1405
1406
1407
1408
1409
1410 lockdep_assert_irqs_disabled();
1411
1412 debug_work_activate(work);
1413
1414
1415 if (unlikely(wq->flags & __WQ_DRAINING) &&
1416 WARN_ON_ONCE(!is_chained_work(wq)))
1417 return;
1418 rcu_read_lock();
1419 retry:
1420
1421 if (wq->flags & WQ_UNBOUND) {
1422 if (req_cpu == WORK_CPU_UNBOUND)
1423 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1424 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1425 } else {
1426 if (req_cpu == WORK_CPU_UNBOUND)
1427 cpu = raw_smp_processor_id();
1428 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1429 }
1430
1431
1432
1433
1434
1435
1436 last_pool = get_work_pool(work);
1437 if (last_pool && last_pool != pwq->pool) {
1438 struct worker *worker;
1439
1440 spin_lock(&last_pool->lock);
1441
1442 worker = find_worker_executing_work(last_pool, work);
1443
1444 if (worker && worker->current_pwq->wq == wq) {
1445 pwq = worker->current_pwq;
1446 } else {
1447
1448 spin_unlock(&last_pool->lock);
1449 spin_lock(&pwq->pool->lock);
1450 }
1451 } else {
1452 spin_lock(&pwq->pool->lock);
1453 }
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463 if (unlikely(!pwq->refcnt)) {
1464 if (wq->flags & WQ_UNBOUND) {
1465 spin_unlock(&pwq->pool->lock);
1466 cpu_relax();
1467 goto retry;
1468 }
1469
1470 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1471 wq->name, cpu);
1472 }
1473
1474
1475 trace_workqueue_queue_work(req_cpu, pwq, work);
1476
1477 if (WARN_ON(!list_empty(&work->entry)))
1478 goto out;
1479
1480 pwq->nr_in_flight[pwq->work_color]++;
1481 work_flags = work_color_to_flags(pwq->work_color);
1482
1483 if (likely(pwq->nr_active < pwq->max_active)) {
1484 trace_workqueue_activate_work(work);
1485 pwq->nr_active++;
1486 worklist = &pwq->pool->worklist;
1487 if (list_empty(worklist))
1488 pwq->pool->watchdog_ts = jiffies;
1489 } else {
1490 work_flags |= WORK_STRUCT_DELAYED;
1491 worklist = &pwq->delayed_works;
1492 }
1493
1494 insert_work(pwq, work, worklist, work_flags);
1495
1496 out:
1497 spin_unlock(&pwq->pool->lock);
1498 rcu_read_unlock();
1499 }
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 bool queue_work_on(int cpu, struct workqueue_struct *wq,
1513 struct work_struct *work)
1514 {
1515 bool ret = false;
1516 unsigned long flags;
1517
1518 local_irq_save(flags);
1519
1520 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1521 __queue_work(cpu, wq, work);
1522 ret = true;
1523 }
1524
1525 local_irq_restore(flags);
1526 return ret;
1527 }
1528 EXPORT_SYMBOL(queue_work_on);
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539 static int workqueue_select_cpu_near(int node)
1540 {
1541 int cpu;
1542
1543
1544 if (!wq_numa_enabled)
1545 return WORK_CPU_UNBOUND;
1546
1547
1548 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1549 return WORK_CPU_UNBOUND;
1550
1551
1552 cpu = raw_smp_processor_id();
1553 if (node == cpu_to_node(cpu))
1554 return cpu;
1555
1556
1557 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1558
1559
1560 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1561 }
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583 bool queue_work_node(int node, struct workqueue_struct *wq,
1584 struct work_struct *work)
1585 {
1586 unsigned long flags;
1587 bool ret = false;
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1599
1600 local_irq_save(flags);
1601
1602 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1603 int cpu = workqueue_select_cpu_near(node);
1604
1605 __queue_work(cpu, wq, work);
1606 ret = true;
1607 }
1608
1609 local_irq_restore(flags);
1610 return ret;
1611 }
1612 EXPORT_SYMBOL_GPL(queue_work_node);
1613
1614 void delayed_work_timer_fn(struct timer_list *t)
1615 {
1616 struct delayed_work *dwork = from_timer(dwork, t, timer);
1617
1618
1619 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1620 }
1621 EXPORT_SYMBOL(delayed_work_timer_fn);
1622
1623 static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1624 struct delayed_work *dwork, unsigned long delay)
1625 {
1626 struct timer_list *timer = &dwork->timer;
1627 struct work_struct *work = &dwork->work;
1628
1629 WARN_ON_ONCE(!wq);
1630 WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1631 WARN_ON_ONCE(timer_pending(timer));
1632 WARN_ON_ONCE(!list_empty(&work->entry));
1633
1634
1635
1636
1637
1638
1639
1640 if (!delay) {
1641 __queue_work(cpu, wq, &dwork->work);
1642 return;
1643 }
1644
1645 dwork->wq = wq;
1646 dwork->cpu = cpu;
1647 timer->expires = jiffies + delay;
1648
1649 if (unlikely(cpu != WORK_CPU_UNBOUND))
1650 add_timer_on(timer, cpu);
1651 else
1652 add_timer(timer);
1653 }
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666 bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1667 struct delayed_work *dwork, unsigned long delay)
1668 {
1669 struct work_struct *work = &dwork->work;
1670 bool ret = false;
1671 unsigned long flags;
1672
1673
1674 local_irq_save(flags);
1675
1676 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1677 __queue_delayed_work(cpu, wq, dwork, delay);
1678 ret = true;
1679 }
1680
1681 local_irq_restore(flags);
1682 return ret;
1683 }
1684 EXPORT_SYMBOL(queue_delayed_work_on);
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704 bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1705 struct delayed_work *dwork, unsigned long delay)
1706 {
1707 unsigned long flags;
1708 int ret;
1709
1710 do {
1711 ret = try_to_grab_pending(&dwork->work, true, &flags);
1712 } while (unlikely(ret == -EAGAIN));
1713
1714 if (likely(ret >= 0)) {
1715 __queue_delayed_work(cpu, wq, dwork, delay);
1716 local_irq_restore(flags);
1717 }
1718
1719
1720 return ret;
1721 }
1722 EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1723
1724 static void rcu_work_rcufn(struct rcu_head *rcu)
1725 {
1726 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1727
1728
1729 local_irq_disable();
1730 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1731 local_irq_enable();
1732 }
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744 bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1745 {
1746 struct work_struct *work = &rwork->work;
1747
1748 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1749 rwork->wq = wq;
1750 call_rcu(&rwork->rcu, rcu_work_rcufn);
1751 return true;
1752 }
1753
1754 return false;
1755 }
1756 EXPORT_SYMBOL(queue_rcu_work);
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 static void worker_enter_idle(struct worker *worker)
1769 {
1770 struct worker_pool *pool = worker->pool;
1771
1772 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1773 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1774 (worker->hentry.next || worker->hentry.pprev)))
1775 return;
1776
1777
1778 worker->flags |= WORKER_IDLE;
1779 pool->nr_idle++;
1780 worker->last_active = jiffies;
1781
1782
1783 list_add(&worker->entry, &pool->idle_list);
1784
1785 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1786 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1787
1788
1789
1790
1791
1792
1793
1794 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1795 pool->nr_workers == pool->nr_idle &&
1796 atomic_read(&pool->nr_running));
1797 }
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808 static void worker_leave_idle(struct worker *worker)
1809 {
1810 struct worker_pool *pool = worker->pool;
1811
1812 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1813 return;
1814 worker_clr_flags(worker, WORKER_IDLE);
1815 pool->nr_idle--;
1816 list_del_init(&worker->entry);
1817 }
1818
1819 static struct worker *alloc_worker(int node)
1820 {
1821 struct worker *worker;
1822
1823 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1824 if (worker) {
1825 INIT_LIST_HEAD(&worker->entry);
1826 INIT_LIST_HEAD(&worker->scheduled);
1827 INIT_LIST_HEAD(&worker->node);
1828
1829 worker->flags = WORKER_PREP;
1830 }
1831 return worker;
1832 }
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843 static void worker_attach_to_pool(struct worker *worker,
1844 struct worker_pool *pool)
1845 {
1846 mutex_lock(&wq_pool_attach_mutex);
1847
1848
1849
1850
1851
1852 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1853
1854
1855
1856
1857
1858
1859 if (pool->flags & POOL_DISASSOCIATED)
1860 worker->flags |= WORKER_UNBOUND;
1861
1862 list_add_tail(&worker->node, &pool->workers);
1863 worker->pool = pool;
1864
1865 mutex_unlock(&wq_pool_attach_mutex);
1866 }
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876 static void worker_detach_from_pool(struct worker *worker)
1877 {
1878 struct worker_pool *pool = worker->pool;
1879 struct completion *detach_completion = NULL;
1880
1881 mutex_lock(&wq_pool_attach_mutex);
1882
1883 list_del(&worker->node);
1884 worker->pool = NULL;
1885
1886 if (list_empty(&pool->workers))
1887 detach_completion = pool->detach_completion;
1888 mutex_unlock(&wq_pool_attach_mutex);
1889
1890
1891 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1892
1893 if (detach_completion)
1894 complete(detach_completion);
1895 }
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909 static struct worker *create_worker(struct worker_pool *pool)
1910 {
1911 struct worker *worker = NULL;
1912 int id = -1;
1913 char id_buf[16];
1914
1915
1916 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1917 if (id < 0)
1918 goto fail;
1919
1920 worker = alloc_worker(pool->node);
1921 if (!worker)
1922 goto fail;
1923
1924 worker->id = id;
1925
1926 if (pool->cpu >= 0)
1927 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1928 pool->attrs->nice < 0 ? "H" : "");
1929 else
1930 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1931
1932 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1933 "kworker/%s", id_buf);
1934 if (IS_ERR(worker->task))
1935 goto fail;
1936
1937 set_user_nice(worker->task, pool->attrs->nice);
1938 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1939
1940
1941 worker_attach_to_pool(worker, pool);
1942
1943
1944 spin_lock_irq(&pool->lock);
1945 worker->pool->nr_workers++;
1946 worker_enter_idle(worker);
1947 wake_up_process(worker->task);
1948 spin_unlock_irq(&pool->lock);
1949
1950 return worker;
1951
1952 fail:
1953 if (id >= 0)
1954 ida_simple_remove(&pool->worker_ida, id);
1955 kfree(worker);
1956 return NULL;
1957 }
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969 static void destroy_worker(struct worker *worker)
1970 {
1971 struct worker_pool *pool = worker->pool;
1972
1973 lockdep_assert_held(&pool->lock);
1974
1975
1976 if (WARN_ON(worker->current_work) ||
1977 WARN_ON(!list_empty(&worker->scheduled)) ||
1978 WARN_ON(!(worker->flags & WORKER_IDLE)))
1979 return;
1980
1981 pool->nr_workers--;
1982 pool->nr_idle--;
1983
1984 list_del_init(&worker->entry);
1985 worker->flags |= WORKER_DIE;
1986 wake_up_process(worker->task);
1987 }
1988
1989 static void idle_worker_timeout(struct timer_list *t)
1990 {
1991 struct worker_pool *pool = from_timer(pool, t, idle_timer);
1992
1993 spin_lock_irq(&pool->lock);
1994
1995 while (too_many_workers(pool)) {
1996 struct worker *worker;
1997 unsigned long expires;
1998
1999
2000 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2001 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2002
2003 if (time_before(jiffies, expires)) {
2004 mod_timer(&pool->idle_timer, expires);
2005 break;
2006 }
2007
2008 destroy_worker(worker);
2009 }
2010
2011 spin_unlock_irq(&pool->lock);
2012 }
2013
2014 static void send_mayday(struct work_struct *work)
2015 {
2016 struct pool_workqueue *pwq = get_work_pwq(work);
2017 struct workqueue_struct *wq = pwq->wq;
2018
2019 lockdep_assert_held(&wq_mayday_lock);
2020
2021 if (!wq->rescuer)
2022 return;
2023
2024
2025 if (list_empty(&pwq->mayday_node)) {
2026
2027
2028
2029
2030
2031 get_pwq(pwq);
2032 list_add_tail(&pwq->mayday_node, &wq->maydays);
2033 wake_up_process(wq->rescuer->task);
2034 }
2035 }
2036
2037 static void pool_mayday_timeout(struct timer_list *t)
2038 {
2039 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2040 struct work_struct *work;
2041
2042 spin_lock_irq(&pool->lock);
2043 spin_lock(&wq_mayday_lock);
2044
2045 if (need_to_create_worker(pool)) {
2046
2047
2048
2049
2050
2051
2052 list_for_each_entry(work, &pool->worklist, entry)
2053 send_mayday(work);
2054 }
2055
2056 spin_unlock(&wq_mayday_lock);
2057 spin_unlock_irq(&pool->lock);
2058
2059 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2060 }
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080 static void maybe_create_worker(struct worker_pool *pool)
2081 __releases(&pool->lock)
2082 __acquires(&pool->lock)
2083 {
2084 restart:
2085 spin_unlock_irq(&pool->lock);
2086
2087
2088 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2089
2090 while (true) {
2091 if (create_worker(pool) || !need_to_create_worker(pool))
2092 break;
2093
2094 schedule_timeout_interruptible(CREATE_COOLDOWN);
2095
2096 if (!need_to_create_worker(pool))
2097 break;
2098 }
2099
2100 del_timer_sync(&pool->mayday_timer);
2101 spin_lock_irq(&pool->lock);
2102
2103
2104
2105
2106
2107 if (need_to_create_worker(pool))
2108 goto restart;
2109 }
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 static bool manage_workers(struct worker *worker)
2134 {
2135 struct worker_pool *pool = worker->pool;
2136
2137 if (pool->flags & POOL_MANAGER_ACTIVE)
2138 return false;
2139
2140 pool->flags |= POOL_MANAGER_ACTIVE;
2141 pool->manager = worker;
2142
2143 maybe_create_worker(pool);
2144
2145 pool->manager = NULL;
2146 pool->flags &= ~POOL_MANAGER_ACTIVE;
2147 wake_up(&wq_manager_wait);
2148 return true;
2149 }
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165 static void process_one_work(struct worker *worker, struct work_struct *work)
2166 __releases(&pool->lock)
2167 __acquires(&pool->lock)
2168 {
2169 struct pool_workqueue *pwq = get_work_pwq(work);
2170 struct worker_pool *pool = worker->pool;
2171 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2172 int work_color;
2173 struct worker *collision;
2174 #ifdef CONFIG_LOCKDEP
2175
2176
2177
2178
2179
2180
2181
2182 struct lockdep_map lockdep_map;
2183
2184 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2185 #endif
2186
2187 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2188 raw_smp_processor_id() != pool->cpu);
2189
2190
2191
2192
2193
2194
2195
2196 collision = find_worker_executing_work(pool, work);
2197 if (unlikely(collision)) {
2198 move_linked_works(work, &collision->scheduled, NULL);
2199 return;
2200 }
2201
2202
2203 debug_work_deactivate(work);
2204 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2205 worker->current_work = work;
2206 worker->current_func = work->func;
2207 worker->current_pwq = pwq;
2208 work_color = get_work_color(work);
2209
2210
2211
2212
2213
2214 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2215
2216 list_del_init(&work->entry);
2217
2218
2219
2220
2221
2222
2223
2224 if (unlikely(cpu_intensive))
2225 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2226
2227
2228
2229
2230
2231
2232
2233
2234 if (need_more_worker(pool))
2235 wake_up_worker(pool);
2236
2237
2238
2239
2240
2241
2242
2243 set_work_pool_and_clear_pending(work, pool->id);
2244
2245 spin_unlock_irq(&pool->lock);
2246
2247 lock_map_acquire(&pwq->wq->lockdep_map);
2248 lock_map_acquire(&lockdep_map);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270 lockdep_invariant_state(true);
2271 trace_workqueue_execute_start(work);
2272 worker->current_func(work);
2273
2274
2275
2276
2277 trace_workqueue_execute_end(work);
2278 lock_map_release(&lockdep_map);
2279 lock_map_release(&pwq->wq->lockdep_map);
2280
2281 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2282 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2283 " last function: %ps\n",
2284 current->comm, preempt_count(), task_pid_nr(current),
2285 worker->current_func);
2286 debug_show_held_locks(current);
2287 dump_stack();
2288 }
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298 cond_resched();
2299
2300 spin_lock_irq(&pool->lock);
2301
2302
2303 if (unlikely(cpu_intensive))
2304 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2305
2306
2307 worker->last_func = worker->current_func;
2308
2309
2310 hash_del(&worker->hentry);
2311 worker->current_work = NULL;
2312 worker->current_func = NULL;
2313 worker->current_pwq = NULL;
2314 pwq_dec_nr_in_flight(pwq, work_color);
2315 }
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329 static void process_scheduled_works(struct worker *worker)
2330 {
2331 while (!list_empty(&worker->scheduled)) {
2332 struct work_struct *work = list_first_entry(&worker->scheduled,
2333 struct work_struct, entry);
2334 process_one_work(worker, work);
2335 }
2336 }
2337
2338 static void set_pf_worker(bool val)
2339 {
2340 mutex_lock(&wq_pool_attach_mutex);
2341 if (val)
2342 current->flags |= PF_WQ_WORKER;
2343 else
2344 current->flags &= ~PF_WQ_WORKER;
2345 mutex_unlock(&wq_pool_attach_mutex);
2346 }
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360 static int worker_thread(void *__worker)
2361 {
2362 struct worker *worker = __worker;
2363 struct worker_pool *pool = worker->pool;
2364
2365
2366 set_pf_worker(true);
2367 woke_up:
2368 spin_lock_irq(&pool->lock);
2369
2370
2371 if (unlikely(worker->flags & WORKER_DIE)) {
2372 spin_unlock_irq(&pool->lock);
2373 WARN_ON_ONCE(!list_empty(&worker->entry));
2374 set_pf_worker(false);
2375
2376 set_task_comm(worker->task, "kworker/dying");
2377 ida_simple_remove(&pool->worker_ida, worker->id);
2378 worker_detach_from_pool(worker);
2379 kfree(worker);
2380 return 0;
2381 }
2382
2383 worker_leave_idle(worker);
2384 recheck:
2385
2386 if (!need_more_worker(pool))
2387 goto sleep;
2388
2389
2390 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2391 goto recheck;
2392
2393
2394
2395
2396
2397
2398 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2399
2400
2401
2402
2403
2404
2405
2406
2407 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2408
2409 do {
2410 struct work_struct *work =
2411 list_first_entry(&pool->worklist,
2412 struct work_struct, entry);
2413
2414 pool->watchdog_ts = jiffies;
2415
2416 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2417
2418 process_one_work(worker, work);
2419 if (unlikely(!list_empty(&worker->scheduled)))
2420 process_scheduled_works(worker);
2421 } else {
2422 move_linked_works(work, &worker->scheduled, NULL);
2423 process_scheduled_works(worker);
2424 }
2425 } while (keep_working(pool));
2426
2427 worker_set_flags(worker, WORKER_PREP);
2428 sleep:
2429
2430
2431
2432
2433
2434
2435
2436 worker_enter_idle(worker);
2437 __set_current_state(TASK_IDLE);
2438 spin_unlock_irq(&pool->lock);
2439 schedule();
2440 goto woke_up;
2441 }
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 static int rescuer_thread(void *__rescuer)
2465 {
2466 struct worker *rescuer = __rescuer;
2467 struct workqueue_struct *wq = rescuer->rescue_wq;
2468 struct list_head *scheduled = &rescuer->scheduled;
2469 bool should_stop;
2470
2471 set_user_nice(current, RESCUER_NICE_LEVEL);
2472
2473
2474
2475
2476
2477 set_pf_worker(true);
2478 repeat:
2479 set_current_state(TASK_IDLE);
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489 should_stop = kthread_should_stop();
2490
2491
2492 spin_lock_irq(&wq_mayday_lock);
2493
2494 while (!list_empty(&wq->maydays)) {
2495 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2496 struct pool_workqueue, mayday_node);
2497 struct worker_pool *pool = pwq->pool;
2498 struct work_struct *work, *n;
2499 bool first = true;
2500
2501 __set_current_state(TASK_RUNNING);
2502 list_del_init(&pwq->mayday_node);
2503
2504 spin_unlock_irq(&wq_mayday_lock);
2505
2506 worker_attach_to_pool(rescuer, pool);
2507
2508 spin_lock_irq(&pool->lock);
2509
2510
2511
2512
2513
2514 WARN_ON_ONCE(!list_empty(scheduled));
2515 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2516 if (get_work_pwq(work) == pwq) {
2517 if (first)
2518 pool->watchdog_ts = jiffies;
2519 move_linked_works(work, scheduled, &n);
2520 }
2521 first = false;
2522 }
2523
2524 if (!list_empty(scheduled)) {
2525 process_scheduled_works(rescuer);
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536 if (need_to_create_worker(pool)) {
2537 spin_lock(&wq_mayday_lock);
2538
2539
2540
2541
2542 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2543 get_pwq(pwq);
2544 list_add_tail(&pwq->mayday_node, &wq->maydays);
2545 }
2546 spin_unlock(&wq_mayday_lock);
2547 }
2548 }
2549
2550
2551
2552
2553
2554 put_pwq(pwq);
2555
2556
2557
2558
2559
2560
2561 if (need_more_worker(pool))
2562 wake_up_worker(pool);
2563
2564 spin_unlock_irq(&pool->lock);
2565
2566 worker_detach_from_pool(rescuer);
2567
2568 spin_lock_irq(&wq_mayday_lock);
2569 }
2570
2571 spin_unlock_irq(&wq_mayday_lock);
2572
2573 if (should_stop) {
2574 __set_current_state(TASK_RUNNING);
2575 set_pf_worker(false);
2576 return 0;
2577 }
2578
2579
2580 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2581 schedule();
2582 goto repeat;
2583 }
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596 static void check_flush_dependency(struct workqueue_struct *target_wq,
2597 struct work_struct *target_work)
2598 {
2599 work_func_t target_func = target_work ? target_work->func : NULL;
2600 struct worker *worker;
2601
2602 if (target_wq->flags & WQ_MEM_RECLAIM)
2603 return;
2604
2605 worker = current_wq_worker();
2606
2607 WARN_ONCE(current->flags & PF_MEMALLOC,
2608 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2609 current->pid, current->comm, target_wq->name, target_func);
2610 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2611 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2612 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2613 worker->current_pwq->wq->name, worker->current_func,
2614 target_wq->name, target_func);
2615 }
2616
2617 struct wq_barrier {
2618 struct work_struct work;
2619 struct completion done;
2620 struct task_struct *task;
2621 };
2622
2623 static void wq_barrier_func(struct work_struct *work)
2624 {
2625 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2626 complete(&barr->done);
2627 }
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653 static void insert_wq_barrier(struct pool_workqueue *pwq,
2654 struct wq_barrier *barr,
2655 struct work_struct *target, struct worker *worker)
2656 {
2657 struct list_head *head;
2658 unsigned int linked = 0;
2659
2660
2661
2662
2663
2664
2665
2666 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2667 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2668
2669 init_completion_map(&barr->done, &target->lockdep_map);
2670
2671 barr->task = current;
2672
2673
2674
2675
2676
2677 if (worker)
2678 head = worker->scheduled.next;
2679 else {
2680 unsigned long *bits = work_data_bits(target);
2681
2682 head = target->entry.next;
2683
2684 linked = *bits & WORK_STRUCT_LINKED;
2685 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2686 }
2687
2688 debug_work_activate(&barr->work);
2689 insert_work(pwq, &barr->work, head,
2690 work_color_to_flags(WORK_NO_COLOR) | linked);
2691 }
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724 static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2725 int flush_color, int work_color)
2726 {
2727 bool wait = false;
2728 struct pool_workqueue *pwq;
2729
2730 if (flush_color >= 0) {
2731 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2732 atomic_set(&wq->nr_pwqs_to_flush, 1);
2733 }
2734
2735 for_each_pwq(pwq, wq) {
2736 struct worker_pool *pool = pwq->pool;
2737
2738 spin_lock_irq(&pool->lock);
2739
2740 if (flush_color >= 0) {
2741 WARN_ON_ONCE(pwq->flush_color != -1);
2742
2743 if (pwq->nr_in_flight[flush_color]) {
2744 pwq->flush_color = flush_color;
2745 atomic_inc(&wq->nr_pwqs_to_flush);
2746 wait = true;
2747 }
2748 }
2749
2750 if (work_color >= 0) {
2751 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2752 pwq->work_color = work_color;
2753 }
2754
2755 spin_unlock_irq(&pool->lock);
2756 }
2757
2758 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2759 complete(&wq->first_flusher->done);
2760
2761 return wait;
2762 }
2763
2764
2765
2766
2767
2768
2769
2770
2771 void flush_workqueue(struct workqueue_struct *wq)
2772 {
2773 struct wq_flusher this_flusher = {
2774 .list = LIST_HEAD_INIT(this_flusher.list),
2775 .flush_color = -1,
2776 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2777 };
2778 int next_color;
2779
2780 if (WARN_ON(!wq_online))
2781 return;
2782
2783 lock_map_acquire(&wq->lockdep_map);
2784 lock_map_release(&wq->lockdep_map);
2785
2786 mutex_lock(&wq->mutex);
2787
2788
2789
2790
2791 next_color = work_next_color(wq->work_color);
2792
2793 if (next_color != wq->flush_color) {
2794
2795
2796
2797
2798
2799 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2800 this_flusher.flush_color = wq->work_color;
2801 wq->work_color = next_color;
2802
2803 if (!wq->first_flusher) {
2804
2805 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2806
2807 wq->first_flusher = &this_flusher;
2808
2809 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2810 wq->work_color)) {
2811
2812 wq->flush_color = next_color;
2813 wq->first_flusher = NULL;
2814 goto out_unlock;
2815 }
2816 } else {
2817
2818 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2819 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2820 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2821 }
2822 } else {
2823
2824
2825
2826
2827
2828 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2829 }
2830
2831 check_flush_dependency(wq, NULL);
2832
2833 mutex_unlock(&wq->mutex);
2834
2835 wait_for_completion(&this_flusher.done);
2836
2837
2838
2839
2840
2841
2842
2843 if (wq->first_flusher != &this_flusher)
2844 return;
2845
2846 mutex_lock(&wq->mutex);
2847
2848
2849 if (wq->first_flusher != &this_flusher)
2850 goto out_unlock;
2851
2852 wq->first_flusher = NULL;
2853
2854 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2855 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2856
2857 while (true) {
2858 struct wq_flusher *next, *tmp;
2859
2860
2861 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2862 if (next->flush_color != wq->flush_color)
2863 break;
2864 list_del_init(&next->list);
2865 complete(&next->done);
2866 }
2867
2868 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2869 wq->flush_color != work_next_color(wq->work_color));
2870
2871
2872 wq->flush_color = work_next_color(wq->flush_color);
2873
2874
2875 if (!list_empty(&wq->flusher_overflow)) {
2876
2877
2878
2879
2880
2881
2882 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2883 tmp->flush_color = wq->work_color;
2884
2885 wq->work_color = work_next_color(wq->work_color);
2886
2887 list_splice_tail_init(&wq->flusher_overflow,
2888 &wq->flusher_queue);
2889 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2890 }
2891
2892 if (list_empty(&wq->flusher_queue)) {
2893 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2894 break;
2895 }
2896
2897
2898
2899
2900
2901 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2902 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2903
2904 list_del_init(&next->list);
2905 wq->first_flusher = next;
2906
2907 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2908 break;
2909
2910
2911
2912
2913
2914 wq->first_flusher = NULL;
2915 }
2916
2917 out_unlock:
2918 mutex_unlock(&wq->mutex);
2919 }
2920 EXPORT_SYMBOL(flush_workqueue);
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933 void drain_workqueue(struct workqueue_struct *wq)
2934 {
2935 unsigned int flush_cnt = 0;
2936 struct pool_workqueue *pwq;
2937
2938
2939
2940
2941
2942
2943 mutex_lock(&wq->mutex);
2944 if (!wq->nr_drainers++)
2945 wq->flags |= __WQ_DRAINING;
2946 mutex_unlock(&wq->mutex);
2947 reflush:
2948 flush_workqueue(wq);
2949
2950 mutex_lock(&wq->mutex);
2951
2952 for_each_pwq(pwq, wq) {
2953 bool drained;
2954
2955 spin_lock_irq(&pwq->pool->lock);
2956 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2957 spin_unlock_irq(&pwq->pool->lock);
2958
2959 if (drained)
2960 continue;
2961
2962 if (++flush_cnt == 10 ||
2963 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2964 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2965 wq->name, flush_cnt);
2966
2967 mutex_unlock(&wq->mutex);
2968 goto reflush;
2969 }
2970
2971 if (!--wq->nr_drainers)
2972 wq->flags &= ~__WQ_DRAINING;
2973 mutex_unlock(&wq->mutex);
2974 }
2975 EXPORT_SYMBOL_GPL(drain_workqueue);
2976
2977 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2978 bool from_cancel)
2979 {
2980 struct worker *worker = NULL;
2981 struct worker_pool *pool;
2982 struct pool_workqueue *pwq;
2983
2984 might_sleep();
2985
2986 rcu_read_lock();
2987 pool = get_work_pool(work);
2988 if (!pool) {
2989 rcu_read_unlock();
2990 return false;
2991 }
2992
2993 spin_lock_irq(&pool->lock);
2994
2995 pwq = get_work_pwq(work);
2996 if (pwq) {
2997 if (unlikely(pwq->pool != pool))
2998 goto already_gone;
2999 } else {
3000 worker = find_worker_executing_work(pool, work);
3001 if (!worker)
3002 goto already_gone;
3003 pwq = worker->current_pwq;
3004 }
3005
3006 check_flush_dependency(pwq->wq, work);
3007
3008 insert_wq_barrier(pwq, barr, work, worker);
3009 spin_unlock_irq(&pool->lock);
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020 if (!from_cancel &&
3021 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3022 lock_map_acquire(&pwq->wq->lockdep_map);
3023 lock_map_release(&pwq->wq->lockdep_map);
3024 }
3025 rcu_read_unlock();
3026 return true;
3027 already_gone:
3028 spin_unlock_irq(&pool->lock);
3029 rcu_read_unlock();
3030 return false;
3031 }
3032
3033 static bool __flush_work(struct work_struct *work, bool from_cancel)
3034 {
3035 struct wq_barrier barr;
3036
3037 if (WARN_ON(!wq_online))
3038 return false;
3039
3040 if (WARN_ON(!work->func))
3041 return false;
3042
3043 if (!from_cancel) {
3044 lock_map_acquire(&work->lockdep_map);
3045 lock_map_release(&work->lockdep_map);
3046 }
3047
3048 if (start_flush_work(work, &barr, from_cancel)) {
3049 wait_for_completion(&barr.done);
3050 destroy_work_on_stack(&barr.work);
3051 return true;
3052 } else {
3053 return false;
3054 }
3055 }
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068 bool flush_work(struct work_struct *work)
3069 {
3070 return __flush_work(work, false);
3071 }
3072 EXPORT_SYMBOL_GPL(flush_work);
3073
3074 struct cwt_wait {
3075 wait_queue_entry_t wait;
3076 struct work_struct *work;
3077 };
3078
3079 static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3080 {
3081 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3082
3083 if (cwait->work != key)
3084 return 0;
3085 return autoremove_wake_function(wait, mode, sync, key);
3086 }
3087
3088 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3089 {
3090 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3091 unsigned long flags;
3092 int ret;
3093
3094 do {
3095 ret = try_to_grab_pending(work, is_dwork, &flags);
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 if (unlikely(ret == -ENOENT)) {
3113 struct cwt_wait cwait;
3114
3115 init_wait(&cwait.wait);
3116 cwait.wait.func = cwt_wakefn;
3117 cwait.work = work;
3118
3119 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3120 TASK_UNINTERRUPTIBLE);
3121 if (work_is_canceling(work))
3122 schedule();
3123 finish_wait(&cancel_waitq, &cwait.wait);
3124 }
3125 } while (unlikely(ret < 0));
3126
3127
3128 mark_work_canceling(work);
3129 local_irq_restore(flags);
3130
3131
3132
3133
3134
3135 if (wq_online)
3136 __flush_work(work, true);
3137
3138 clear_work_data(work);
3139
3140
3141
3142
3143
3144
3145 smp_mb();
3146 if (waitqueue_active(&cancel_waitq))
3147 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3148
3149 return ret;
3150 }
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170 bool cancel_work_sync(struct work_struct *work)
3171 {
3172 return __cancel_work_timer(work, false);
3173 }
3174 EXPORT_SYMBOL_GPL(cancel_work_sync);
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188 bool flush_delayed_work(struct delayed_work *dwork)
3189 {
3190 local_irq_disable();
3191 if (del_timer_sync(&dwork->timer))
3192 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3193 local_irq_enable();
3194 return flush_work(&dwork->work);
3195 }
3196 EXPORT_SYMBOL(flush_delayed_work);
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206 bool flush_rcu_work(struct rcu_work *rwork)
3207 {
3208 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3209 rcu_barrier();
3210 flush_work(&rwork->work);
3211 return true;
3212 } else {
3213 return flush_work(&rwork->work);
3214 }
3215 }
3216 EXPORT_SYMBOL(flush_rcu_work);
3217
3218 static bool __cancel_work(struct work_struct *work, bool is_dwork)
3219 {
3220 unsigned long flags;
3221 int ret;
3222
3223 do {
3224 ret = try_to_grab_pending(work, is_dwork, &flags);
3225 } while (unlikely(ret == -EAGAIN));
3226
3227 if (unlikely(ret < 0))
3228 return false;
3229
3230 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3231 local_irq_restore(flags);
3232 return ret;
3233 }
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251 bool cancel_delayed_work(struct delayed_work *dwork)
3252 {
3253 return __cancel_work(&dwork->work, true);
3254 }
3255 EXPORT_SYMBOL(cancel_delayed_work);
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266 bool cancel_delayed_work_sync(struct delayed_work *dwork)
3267 {
3268 return __cancel_work_timer(&dwork->work, true);
3269 }
3270 EXPORT_SYMBOL(cancel_delayed_work_sync);
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283 int schedule_on_each_cpu(work_func_t func)
3284 {
3285 int cpu;
3286 struct work_struct __percpu *works;
3287
3288 works = alloc_percpu(struct work_struct);
3289 if (!works)
3290 return -ENOMEM;
3291
3292 get_online_cpus();
3293
3294 for_each_online_cpu(cpu) {
3295 struct work_struct *work = per_cpu_ptr(works, cpu);
3296
3297 INIT_WORK(work, func);
3298 schedule_work_on(cpu, work);
3299 }
3300
3301 for_each_online_cpu(cpu)
3302 flush_work(per_cpu_ptr(works, cpu));
3303
3304 put_online_cpus();
3305 free_percpu(works);
3306 return 0;
3307 }
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3322 {
3323 if (!in_interrupt()) {
3324 fn(&ew->work);
3325 return 0;
3326 }
3327
3328 INIT_WORK(&ew->work, fn);
3329 schedule_work(&ew->work);
3330
3331 return 1;
3332 }
3333 EXPORT_SYMBOL_GPL(execute_in_process_context);
3334
3335
3336
3337
3338
3339
3340
3341 void free_workqueue_attrs(struct workqueue_attrs *attrs)
3342 {
3343 if (attrs) {
3344 free_cpumask_var(attrs->cpumask);
3345 kfree(attrs);
3346 }
3347 }
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357 struct workqueue_attrs *alloc_workqueue_attrs(void)
3358 {
3359 struct workqueue_attrs *attrs;
3360
3361 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3362 if (!attrs)
3363 goto fail;
3364 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3365 goto fail;
3366
3367 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3368 return attrs;
3369 fail:
3370 free_workqueue_attrs(attrs);
3371 return NULL;
3372 }
3373
3374 static void copy_workqueue_attrs(struct workqueue_attrs *to,
3375 const struct workqueue_attrs *from)
3376 {
3377 to->nice = from->nice;
3378 cpumask_copy(to->cpumask, from->cpumask);
3379
3380
3381
3382
3383
3384 to->no_numa = from->no_numa;
3385 }
3386
3387
3388 static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3389 {
3390 u32 hash = 0;
3391
3392 hash = jhash_1word(attrs->nice, hash);
3393 hash = jhash(cpumask_bits(attrs->cpumask),
3394 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3395 return hash;
3396 }
3397
3398
3399 static bool wqattrs_equal(const struct workqueue_attrs *a,
3400 const struct workqueue_attrs *b)
3401 {
3402 if (a->nice != b->nice)
3403 return false;
3404 if (!cpumask_equal(a->cpumask, b->cpumask))
3405 return false;
3406 return true;
3407 }
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419 static int init_worker_pool(struct worker_pool *pool)
3420 {
3421 spin_lock_init(&pool->lock);
3422 pool->id = -1;
3423 pool->cpu = -1;
3424 pool->node = NUMA_NO_NODE;
3425 pool->flags |= POOL_DISASSOCIATED;
3426 pool->watchdog_ts = jiffies;
3427 INIT_LIST_HEAD(&pool->worklist);
3428 INIT_LIST_HEAD(&pool->idle_list);
3429 hash_init(pool->busy_hash);
3430
3431 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3432
3433 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3434
3435 INIT_LIST_HEAD(&pool->workers);
3436
3437 ida_init(&pool->worker_ida);
3438 INIT_HLIST_NODE(&pool->hash_node);
3439 pool->refcnt = 1;
3440
3441
3442 pool->attrs = alloc_workqueue_attrs();
3443 if (!pool->attrs)
3444 return -ENOMEM;
3445 return 0;
3446 }
3447
3448 #ifdef CONFIG_LOCKDEP
3449 static void wq_init_lockdep(struct workqueue_struct *wq)
3450 {
3451 char *lock_name;
3452
3453 lockdep_register_key(&wq->key);
3454 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3455 if (!lock_name)
3456 lock_name = wq->name;
3457
3458 wq->lock_name = lock_name;
3459 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3460 }
3461
3462 static void wq_unregister_lockdep(struct workqueue_struct *wq)
3463 {
3464 lockdep_unregister_key(&wq->key);
3465 }
3466
3467 static void wq_free_lockdep(struct workqueue_struct *wq)
3468 {
3469 if (wq->lock_name != wq->name)
3470 kfree(wq->lock_name);
3471 }
3472 #else
3473 static void wq_init_lockdep(struct workqueue_struct *wq)
3474 {
3475 }
3476
3477 static void wq_unregister_lockdep(struct workqueue_struct *wq)
3478 {
3479 }
3480
3481 static void wq_free_lockdep(struct workqueue_struct *wq)
3482 {
3483 }
3484 #endif
3485
3486 static void rcu_free_wq(struct rcu_head *rcu)
3487 {
3488 struct workqueue_struct *wq =
3489 container_of(rcu, struct workqueue_struct, rcu);
3490
3491 wq_free_lockdep(wq);
3492
3493 if (!(wq->flags & WQ_UNBOUND))
3494 free_percpu(wq->cpu_pwqs);
3495 else
3496 free_workqueue_attrs(wq->unbound_attrs);
3497
3498 kfree(wq->rescuer);
3499 kfree(wq);
3500 }
3501
3502 static void rcu_free_pool(struct rcu_head *rcu)
3503 {
3504 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3505
3506 ida_destroy(&pool->worker_ida);
3507 free_workqueue_attrs(pool->attrs);
3508 kfree(pool);
3509 }
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522 static void put_unbound_pool(struct worker_pool *pool)
3523 {
3524 DECLARE_COMPLETION_ONSTACK(detach_completion);
3525 struct worker *worker;
3526
3527 lockdep_assert_held(&wq_pool_mutex);
3528
3529 if (--pool->refcnt)
3530 return;
3531
3532
3533 if (WARN_ON(!(pool->cpu < 0)) ||
3534 WARN_ON(!list_empty(&pool->worklist)))
3535 return;
3536
3537
3538 if (pool->id >= 0)
3539 idr_remove(&worker_pool_idr, pool->id);
3540 hash_del(&pool->hash_node);
3541
3542
3543
3544
3545
3546
3547 spin_lock_irq(&pool->lock);
3548 wait_event_lock_irq(wq_manager_wait,
3549 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3550 pool->flags |= POOL_MANAGER_ACTIVE;
3551
3552 while ((worker = first_idle_worker(pool)))
3553 destroy_worker(worker);
3554 WARN_ON(pool->nr_workers || pool->nr_idle);
3555 spin_unlock_irq(&pool->lock);
3556
3557 mutex_lock(&wq_pool_attach_mutex);
3558 if (!list_empty(&pool->workers))
3559 pool->detach_completion = &detach_completion;
3560 mutex_unlock(&wq_pool_attach_mutex);
3561
3562 if (pool->detach_completion)
3563 wait_for_completion(pool->detach_completion);
3564
3565
3566 del_timer_sync(&pool->idle_timer);
3567 del_timer_sync(&pool->mayday_timer);
3568
3569
3570 call_rcu(&pool->rcu, rcu_free_pool);
3571 }
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587 static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3588 {
3589 u32 hash = wqattrs_hash(attrs);
3590 struct worker_pool *pool;
3591 int node;
3592 int target_node = NUMA_NO_NODE;
3593
3594 lockdep_assert_held(&wq_pool_mutex);
3595
3596
3597 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3598 if (wqattrs_equal(pool->attrs, attrs)) {
3599 pool->refcnt++;
3600 return pool;
3601 }
3602 }
3603
3604
3605 if (wq_numa_enabled) {
3606 for_each_node(node) {
3607 if (cpumask_subset(attrs->cpumask,
3608 wq_numa_possible_cpumask[node])) {
3609 target_node = node;
3610 break;
3611 }
3612 }
3613 }
3614
3615
3616 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3617 if (!pool || init_worker_pool(pool) < 0)
3618 goto fail;
3619
3620 lockdep_set_subclass(&pool->lock, 1);
3621 copy_workqueue_attrs(pool->attrs, attrs);
3622 pool->node = target_node;
3623
3624
3625
3626
3627
3628 pool->attrs->no_numa = false;
3629
3630 if (worker_pool_assign_id(pool) < 0)
3631 goto fail;
3632
3633
3634 if (wq_online && !create_worker(pool))
3635 goto fail;
3636
3637
3638 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3639
3640 return pool;
3641 fail:
3642 if (pool)
3643 put_unbound_pool(pool);
3644 return NULL;
3645 }
3646
3647 static void rcu_free_pwq(struct rcu_head *rcu)
3648 {
3649 kmem_cache_free(pwq_cache,
3650 container_of(rcu, struct pool_workqueue, rcu));
3651 }
3652
3653
3654
3655
3656
3657 static void pwq_unbound_release_workfn(struct work_struct *work)
3658 {
3659 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3660 unbound_release_work);
3661 struct workqueue_struct *wq = pwq->wq;
3662 struct worker_pool *pool = pwq->pool;
3663 bool is_last;
3664
3665 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3666 return;
3667
3668 mutex_lock(&wq->mutex);
3669 list_del_rcu(&pwq->pwqs_node);
3670 is_last = list_empty(&wq->pwqs);
3671 mutex_unlock(&wq->mutex);
3672
3673 mutex_lock(&wq_pool_mutex);
3674 put_unbound_pool(pool);
3675 mutex_unlock(&wq_pool_mutex);
3676
3677 call_rcu(&pwq->rcu, rcu_free_pwq);
3678
3679
3680
3681
3682
3683 if (is_last) {
3684 wq_unregister_lockdep(wq);
3685 call_rcu(&wq->rcu, rcu_free_wq);
3686 }
3687 }
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697 static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3698 {
3699 struct workqueue_struct *wq = pwq->wq;
3700 bool freezable = wq->flags & WQ_FREEZABLE;
3701 unsigned long flags;
3702
3703
3704 lockdep_assert_held(&wq->mutex);
3705
3706
3707 if (!freezable && pwq->max_active == wq->saved_max_active)
3708 return;
3709
3710
3711 spin_lock_irqsave(&pwq->pool->lock, flags);
3712
3713
3714
3715
3716
3717
3718 if (!freezable || !workqueue_freezing) {
3719 pwq->max_active = wq->saved_max_active;
3720
3721 while (!list_empty(&pwq->delayed_works) &&
3722 pwq->nr_active < pwq->max_active)
3723 pwq_activate_first_delayed(pwq);
3724
3725
3726
3727
3728
3729 wake_up_worker(pwq->pool);
3730 } else {
3731 pwq->max_active = 0;
3732 }
3733
3734 spin_unlock_irqrestore(&pwq->pool->lock, flags);
3735 }
3736
3737
3738 static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3739 struct worker_pool *pool)
3740 {
3741 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3742
3743 memset(pwq, 0, sizeof(*pwq));
3744
3745 pwq->pool = pool;
3746 pwq->wq = wq;
3747 pwq->flush_color = -1;
3748 pwq->refcnt = 1;
3749 INIT_LIST_HEAD(&pwq->delayed_works);
3750 INIT_LIST_HEAD(&pwq->pwqs_node);
3751 INIT_LIST_HEAD(&pwq->mayday_node);
3752 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3753 }
3754
3755
3756 static void link_pwq(struct pool_workqueue *pwq)
3757 {
3758 struct workqueue_struct *wq = pwq->wq;
3759
3760 lockdep_assert_held(&wq->mutex);
3761
3762
3763 if (!list_empty(&pwq->pwqs_node))
3764 return;
3765
3766
3767 pwq->work_color = wq->work_color;
3768
3769
3770 pwq_adjust_max_active(pwq);
3771
3772
3773 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3774 }
3775
3776
3777 static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3778 const struct workqueue_attrs *attrs)
3779 {
3780 struct worker_pool *pool;
3781 struct pool_workqueue *pwq;
3782
3783 lockdep_assert_held(&wq_pool_mutex);
3784
3785 pool = get_unbound_pool(attrs);
3786 if (!pool)
3787 return NULL;
3788
3789 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3790 if (!pwq) {
3791 put_unbound_pool(pool);
3792 return NULL;
3793 }
3794
3795 init_pwq(pwq, wq, pool);
3796 return pwq;
3797 }
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821 static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3822 int cpu_going_down, cpumask_t *cpumask)
3823 {
3824 if (!wq_numa_enabled || attrs->no_numa)
3825 goto use_dfl;
3826
3827
3828 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3829 if (cpu_going_down >= 0)
3830 cpumask_clear_cpu(cpu_going_down, cpumask);
3831
3832 if (cpumask_empty(cpumask))
3833 goto use_dfl;
3834
3835
3836 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3837
3838 if (cpumask_empty(cpumask)) {
3839 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3840 "possible intersect\n");
3841 return false;
3842 }
3843
3844 return !cpumask_equal(cpumask, attrs->cpumask);
3845
3846 use_dfl:
3847 cpumask_copy(cpumask, attrs->cpumask);
3848 return false;
3849 }
3850
3851
3852 static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3853 int node,
3854 struct pool_workqueue *pwq)
3855 {
3856 struct pool_workqueue *old_pwq;
3857
3858 lockdep_assert_held(&wq_pool_mutex);
3859 lockdep_assert_held(&wq->mutex);
3860
3861
3862 link_pwq(pwq);
3863
3864 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3865 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3866 return old_pwq;
3867 }
3868
3869
3870 struct apply_wqattrs_ctx {
3871 struct workqueue_struct *wq;
3872 struct workqueue_attrs *attrs;
3873 struct list_head list;
3874 struct pool_workqueue *dfl_pwq;
3875 struct pool_workqueue *pwq_tbl[];
3876 };
3877
3878
3879 static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3880 {
3881 if (ctx) {
3882 int node;
3883
3884 for_each_node(node)
3885 put_pwq_unlocked(ctx->pwq_tbl[node]);
3886 put_pwq_unlocked(ctx->dfl_pwq);
3887
3888 free_workqueue_attrs(ctx->attrs);
3889
3890 kfree(ctx);
3891 }
3892 }
3893
3894
3895 static struct apply_wqattrs_ctx *
3896 apply_wqattrs_prepare(struct workqueue_struct *wq,
3897 const struct workqueue_attrs *attrs)
3898 {
3899 struct apply_wqattrs_ctx *ctx;
3900 struct workqueue_attrs *new_attrs, *tmp_attrs;
3901 int node;
3902
3903 lockdep_assert_held(&wq_pool_mutex);
3904
3905 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
3906
3907 new_attrs = alloc_workqueue_attrs();
3908 tmp_attrs = alloc_workqueue_attrs();
3909 if (!ctx || !new_attrs || !tmp_attrs)
3910 goto out_free;
3911
3912
3913
3914
3915
3916
3917 copy_workqueue_attrs(new_attrs, attrs);
3918 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3919 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3920 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3921
3922
3923
3924
3925
3926
3927 copy_workqueue_attrs(tmp_attrs, new_attrs);
3928
3929
3930
3931
3932
3933
3934 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3935 if (!ctx->dfl_pwq)
3936 goto out_free;
3937
3938 for_each_node(node) {
3939 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3940 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3941 if (!ctx->pwq_tbl[node])
3942 goto out_free;
3943 } else {
3944 ctx->dfl_pwq->refcnt++;
3945 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3946 }
3947 }
3948
3949
3950 copy_workqueue_attrs(new_attrs, attrs);
3951 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3952 ctx->attrs = new_attrs;
3953
3954 ctx->wq = wq;
3955 free_workqueue_attrs(tmp_attrs);
3956 return ctx;
3957
3958 out_free:
3959 free_workqueue_attrs(tmp_attrs);
3960 free_workqueue_attrs(new_attrs);
3961 apply_wqattrs_cleanup(ctx);
3962 return NULL;
3963 }
3964
3965
3966 static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3967 {
3968 int node;
3969
3970
3971 mutex_lock(&ctx->wq->mutex);
3972
3973 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3974
3975
3976 for_each_node(node)
3977 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3978 ctx->pwq_tbl[node]);
3979
3980
3981 link_pwq(ctx->dfl_pwq);
3982 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3983
3984 mutex_unlock(&ctx->wq->mutex);
3985 }
3986
3987 static void apply_wqattrs_lock(void)
3988 {
3989
3990 get_online_cpus();
3991 mutex_lock(&wq_pool_mutex);
3992 }
3993
3994 static void apply_wqattrs_unlock(void)
3995 {
3996 mutex_unlock(&wq_pool_mutex);
3997 put_online_cpus();
3998 }
3999
4000 static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4001 const struct workqueue_attrs *attrs)
4002 {
4003 struct apply_wqattrs_ctx *ctx;
4004
4005
4006 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4007 return -EINVAL;
4008
4009
4010 if (!list_empty(&wq->pwqs)) {
4011 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4012 return -EINVAL;
4013
4014 wq->flags &= ~__WQ_ORDERED;
4015 }
4016
4017 ctx = apply_wqattrs_prepare(wq, attrs);
4018 if (!ctx)
4019 return -ENOMEM;
4020
4021
4022 apply_wqattrs_commit(ctx);
4023 apply_wqattrs_cleanup(ctx);
4024
4025 return 0;
4026 }
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046 int apply_workqueue_attrs(struct workqueue_struct *wq,
4047 const struct workqueue_attrs *attrs)
4048 {
4049 int ret;
4050
4051 lockdep_assert_cpus_held();
4052
4053 mutex_lock(&wq_pool_mutex);
4054 ret = apply_workqueue_attrs_locked(wq, attrs);
4055 mutex_unlock(&wq_pool_mutex);
4056
4057 return ret;
4058 }
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082 static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4083 bool online)
4084 {
4085 int node = cpu_to_node(cpu);
4086 int cpu_off = online ? -1 : cpu;
4087 struct pool_workqueue *old_pwq = NULL, *pwq;
4088 struct workqueue_attrs *target_attrs;
4089 cpumask_t *cpumask;
4090
4091 lockdep_assert_held(&wq_pool_mutex);
4092
4093 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4094 wq->unbound_attrs->no_numa)
4095 return;
4096
4097
4098
4099
4100
4101
4102 target_attrs = wq_update_unbound_numa_attrs_buf;
4103 cpumask = target_attrs->cpumask;
4104
4105 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4106 pwq = unbound_pwq_by_node(wq, node);
4107
4108
4109
4110
4111
4112
4113
4114 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4115 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4116 return;
4117 } else {
4118 goto use_dfl_pwq;
4119 }
4120
4121
4122 pwq = alloc_unbound_pwq(wq, target_attrs);
4123 if (!pwq) {
4124 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4125 wq->name);
4126 goto use_dfl_pwq;
4127 }
4128
4129
4130 mutex_lock(&wq->mutex);
4131 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4132 goto out_unlock;
4133
4134 use_dfl_pwq:
4135 mutex_lock(&wq->mutex);
4136 spin_lock_irq(&wq->dfl_pwq->pool->lock);
4137 get_pwq(wq->dfl_pwq);
4138 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4139 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4140 out_unlock:
4141 mutex_unlock(&wq->mutex);
4142 put_pwq_unlocked(old_pwq);
4143 }
4144
4145 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4146 {
4147 bool highpri = wq->flags & WQ_HIGHPRI;
4148 int cpu, ret;
4149
4150 if (!(wq->flags & WQ_UNBOUND)) {
4151 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4152 if (!wq->cpu_pwqs)
4153 return -ENOMEM;
4154
4155 for_each_possible_cpu(cpu) {
4156 struct pool_workqueue *pwq =
4157 per_cpu_ptr(wq->cpu_pwqs, cpu);
4158 struct worker_pool *cpu_pools =
4159 per_cpu(cpu_worker_pools, cpu);
4160
4161 init_pwq(pwq, wq, &cpu_pools[highpri]);
4162
4163 mutex_lock(&wq->mutex);
4164 link_pwq(pwq);
4165 mutex_unlock(&wq->mutex);
4166 }
4167 return 0;
4168 }
4169
4170 get_online_cpus();
4171 if (wq->flags & __WQ_ORDERED) {
4172 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4173
4174 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4175 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4176 "ordering guarantee broken for workqueue %s\n", wq->name);
4177 } else {
4178 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4179 }
4180 put_online_cpus();
4181
4182 return ret;
4183 }
4184
4185 static int wq_clamp_max_active(int max_active, unsigned int flags,
4186 const char *name)
4187 {
4188 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4189
4190 if (max_active < 1 || max_active > lim)
4191 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4192 max_active, name, 1, lim);
4193
4194 return clamp_val(max_active, 1, lim);
4195 }
4196
4197
4198
4199
4200
4201 static int init_rescuer(struct workqueue_struct *wq)
4202 {
4203 struct worker *rescuer;
4204 int ret;
4205
4206 if (!(wq->flags & WQ_MEM_RECLAIM))
4207 return 0;
4208
4209 rescuer = alloc_worker(NUMA_NO_NODE);
4210 if (!rescuer)
4211 return -ENOMEM;
4212
4213 rescuer->rescue_wq = wq;
4214 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4215 ret = PTR_ERR_OR_ZERO(rescuer->task);
4216 if (ret) {
4217 kfree(rescuer);
4218 return ret;
4219 }
4220
4221 wq->rescuer = rescuer;
4222 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4223 wake_up_process(rescuer->task);
4224
4225 return 0;
4226 }
4227
4228 __printf(1, 4)
4229 struct workqueue_struct *alloc_workqueue(const char *fmt,
4230 unsigned int flags,
4231 int max_active, ...)
4232 {
4233 size_t tbl_size = 0;
4234 va_list args;
4235 struct workqueue_struct *wq;
4236 struct pool_workqueue *pwq;
4237
4238
4239
4240
4241
4242
4243
4244
4245 if ((flags & WQ_UNBOUND) && max_active == 1)
4246 flags |= __WQ_ORDERED;
4247
4248
4249 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4250 flags |= WQ_UNBOUND;
4251
4252
4253 if (flags & WQ_UNBOUND)
4254 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4255
4256 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4257 if (!wq)
4258 return NULL;
4259
4260 if (flags & WQ_UNBOUND) {
4261 wq->unbound_attrs = alloc_workqueue_attrs();
4262 if (!wq->unbound_attrs)
4263 goto err_free_wq;
4264 }
4265
4266 va_start(args, max_active);
4267 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4268 va_end(args);
4269
4270 max_active = max_active ?: WQ_DFL_ACTIVE;
4271 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4272
4273
4274 wq->flags = flags;
4275 wq->saved_max_active = max_active;
4276 mutex_init(&wq->mutex);
4277 atomic_set(&wq->nr_pwqs_to_flush, 0);
4278 INIT_LIST_HEAD(&wq->pwqs);
4279 INIT_LIST_HEAD(&wq->flusher_queue);
4280 INIT_LIST_HEAD(&wq->flusher_overflow);
4281 INIT_LIST_HEAD(&wq->maydays);
4282
4283 wq_init_lockdep(wq);
4284 INIT_LIST_HEAD(&wq->list);
4285
4286 if (alloc_and_link_pwqs(wq) < 0)
4287 goto err_unreg_lockdep;
4288
4289 if (wq_online && init_rescuer(wq) < 0)
4290 goto err_destroy;
4291
4292 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4293 goto err_destroy;
4294
4295
4296
4297
4298
4299
4300 mutex_lock(&wq_pool_mutex);
4301
4302 mutex_lock(&wq->mutex);
4303 for_each_pwq(pwq, wq)
4304 pwq_adjust_max_active(pwq);
4305 mutex_unlock(&wq->mutex);
4306
4307 list_add_tail_rcu(&wq->list, &workqueues);
4308
4309 mutex_unlock(&wq_pool_mutex);
4310
4311 return wq;
4312
4313 err_unreg_lockdep:
4314 wq_unregister_lockdep(wq);
4315 wq_free_lockdep(wq);
4316 err_free_wq:
4317 free_workqueue_attrs(wq->unbound_attrs);
4318 kfree(wq);
4319 return NULL;
4320 err_destroy:
4321 destroy_workqueue(wq);
4322 return NULL;
4323 }
4324 EXPORT_SYMBOL_GPL(alloc_workqueue);
4325
4326
4327
4328
4329
4330
4331
4332 void destroy_workqueue(struct workqueue_struct *wq)
4333 {
4334 struct pool_workqueue *pwq;
4335 int node;
4336
4337
4338
4339
4340
4341 workqueue_sysfs_unregister(wq);
4342
4343
4344 drain_workqueue(wq);
4345
4346
4347 if (wq->rescuer) {
4348 struct worker *rescuer = wq->rescuer;
4349
4350
4351 spin_lock_irq(&wq_mayday_lock);
4352 wq->rescuer = NULL;
4353 spin_unlock_irq(&wq_mayday_lock);
4354
4355
4356 kthread_stop(rescuer->task);
4357 kfree(rescuer);
4358 }
4359
4360
4361 mutex_lock(&wq->mutex);
4362 for_each_pwq(pwq, wq) {
4363 int i;
4364
4365 for (i = 0; i < WORK_NR_COLORS; i++) {
4366 if (WARN_ON(pwq->nr_in_flight[i])) {
4367 mutex_unlock(&wq->mutex);
4368 show_workqueue_state();
4369 return;
4370 }
4371 }
4372
4373 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4374 WARN_ON(pwq->nr_active) ||
4375 WARN_ON(!list_empty(&pwq->delayed_works))) {
4376 mutex_unlock(&wq->mutex);
4377 show_workqueue_state();
4378 return;
4379 }
4380 }
4381 mutex_unlock(&wq->mutex);
4382
4383
4384
4385
4386
4387 mutex_lock(&wq_pool_mutex);
4388 list_del_rcu(&wq->list);
4389 mutex_unlock(&wq_pool_mutex);
4390
4391 if (!(wq->flags & WQ_UNBOUND)) {
4392 wq_unregister_lockdep(wq);
4393
4394
4395
4396
4397 call_rcu(&wq->rcu, rcu_free_wq);
4398 } else {
4399
4400
4401
4402
4403
4404 for_each_node(node) {
4405 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4406 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4407 put_pwq_unlocked(pwq);
4408 }
4409
4410
4411
4412
4413
4414 pwq = wq->dfl_pwq;
4415 wq->dfl_pwq = NULL;
4416 put_pwq_unlocked(pwq);
4417 }
4418 }
4419 EXPORT_SYMBOL_GPL(destroy_workqueue);
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4432 {
4433 struct pool_workqueue *pwq;
4434
4435
4436 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4437 return;
4438
4439 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4440
4441 mutex_lock(&wq->mutex);
4442
4443 wq->flags &= ~__WQ_ORDERED;
4444 wq->saved_max_active = max_active;
4445
4446 for_each_pwq(pwq, wq)
4447 pwq_adjust_max_active(pwq);
4448
4449 mutex_unlock(&wq->mutex);
4450 }
4451 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461 struct work_struct *current_work(void)
4462 {
4463 struct worker *worker = current_wq_worker();
4464
4465 return worker ? worker->current_work : NULL;
4466 }
4467 EXPORT_SYMBOL(current_work);
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477 bool current_is_workqueue_rescuer(void)
4478 {
4479 struct worker *worker = current_wq_worker();
4480
4481 return worker && worker->rescue_wq;
4482 }
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502 bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4503 {
4504 struct pool_workqueue *pwq;
4505 bool ret;
4506
4507 rcu_read_lock();
4508 preempt_disable();
4509
4510 if (cpu == WORK_CPU_UNBOUND)
4511 cpu = smp_processor_id();
4512
4513 if (!(wq->flags & WQ_UNBOUND))
4514 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4515 else
4516 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4517
4518 ret = !list_empty(&pwq->delayed_works);
4519 preempt_enable();
4520 rcu_read_unlock();
4521
4522 return ret;
4523 }
4524 EXPORT_SYMBOL_GPL(workqueue_congested);
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537 unsigned int work_busy(struct work_struct *work)
4538 {
4539 struct worker_pool *pool;
4540 unsigned long flags;
4541 unsigned int ret = 0;
4542
4543 if (work_pending(work))
4544 ret |= WORK_BUSY_PENDING;
4545
4546 rcu_read_lock();
4547 pool = get_work_pool(work);
4548 if (pool) {
4549 spin_lock_irqsave(&pool->lock, flags);
4550 if (find_worker_executing_work(pool, work))
4551 ret |= WORK_BUSY_RUNNING;
4552 spin_unlock_irqrestore(&pool->lock, flags);
4553 }
4554 rcu_read_unlock();
4555
4556 return ret;
4557 }
4558 EXPORT_SYMBOL_GPL(work_busy);
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570 void set_worker_desc(const char *fmt, ...)
4571 {
4572 struct worker *worker = current_wq_worker();
4573 va_list args;
4574
4575 if (worker) {
4576 va_start(args, fmt);
4577 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4578 va_end(args);
4579 }
4580 }
4581 EXPORT_SYMBOL_GPL(set_worker_desc);
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596 void print_worker_info(const char *log_lvl, struct task_struct *task)
4597 {
4598 work_func_t *fn = NULL;
4599 char name[WQ_NAME_LEN] = { };
4600 char desc[WORKER_DESC_LEN] = { };
4601 struct pool_workqueue *pwq = NULL;
4602 struct workqueue_struct *wq = NULL;
4603 struct worker *worker;
4604
4605 if (!(task->flags & PF_WQ_WORKER))
4606 return;
4607
4608
4609
4610
4611
4612 worker = kthread_probe_data(task);
4613
4614
4615
4616
4617
4618 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4619 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4620 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4621 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4622 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4623
4624 if (fn || name[0] || desc[0]) {
4625 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4626 if (strcmp(name, desc))
4627 pr_cont(" (%s)", desc);
4628 pr_cont("\n");
4629 }
4630 }
4631
4632 static void pr_cont_pool_info(struct worker_pool *pool)
4633 {
4634 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4635 if (pool->node != NUMA_NO_NODE)
4636 pr_cont(" node=%d", pool->node);
4637 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4638 }
4639
4640 static void pr_cont_work(bool comma, struct work_struct *work)
4641 {
4642 if (work->func == wq_barrier_func) {
4643 struct wq_barrier *barr;
4644
4645 barr = container_of(work, struct wq_barrier, work);
4646
4647 pr_cont("%s BAR(%d)", comma ? "," : "",
4648 task_pid_nr(barr->task));
4649 } else {
4650 pr_cont("%s %ps", comma ? "," : "", work->func);
4651 }
4652 }
4653
4654 static void show_pwq(struct pool_workqueue *pwq)
4655 {
4656 struct worker_pool *pool = pwq->pool;
4657 struct work_struct *work;
4658 struct worker *worker;
4659 bool has_in_flight = false, has_pending = false;
4660 int bkt;
4661
4662 pr_info(" pwq %d:", pool->id);
4663 pr_cont_pool_info(pool);
4664
4665 pr_cont(" active=%d/%d refcnt=%d%s\n",
4666 pwq->nr_active, pwq->max_active, pwq->refcnt,
4667 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4668
4669 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4670 if (worker->current_pwq == pwq) {
4671 has_in_flight = true;
4672 break;
4673 }
4674 }
4675 if (has_in_flight) {
4676 bool comma = false;
4677
4678 pr_info(" in-flight:");
4679 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4680 if (worker->current_pwq != pwq)
4681 continue;
4682
4683 pr_cont("%s %d%s:%ps", comma ? "," : "",
4684 task_pid_nr(worker->task),
4685 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4686 worker->current_func);
4687 list_for_each_entry(work, &worker->scheduled, entry)
4688 pr_cont_work(false, work);
4689 comma = true;
4690 }
4691 pr_cont("\n");
4692 }
4693
4694 list_for_each_entry(work, &pool->worklist, entry) {
4695 if (get_work_pwq(work) == pwq) {
4696 has_pending = true;
4697 break;
4698 }
4699 }
4700 if (has_pending) {
4701 bool comma = false;
4702
4703 pr_info(" pending:");
4704 list_for_each_entry(work, &pool->worklist, entry) {
4705 if (get_work_pwq(work) != pwq)
4706 continue;
4707
4708 pr_cont_work(comma, work);
4709 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4710 }
4711 pr_cont("\n");
4712 }
4713
4714 if (!list_empty(&pwq->delayed_works)) {
4715 bool comma = false;
4716
4717 pr_info(" delayed:");
4718 list_for_each_entry(work, &pwq->delayed_works, entry) {
4719 pr_cont_work(comma, work);
4720 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4721 }
4722 pr_cont("\n");
4723 }
4724 }
4725
4726
4727
4728
4729
4730
4731
4732 void show_workqueue_state(void)
4733 {
4734 struct workqueue_struct *wq;
4735 struct worker_pool *pool;
4736 unsigned long flags;
4737 int pi;
4738
4739 rcu_read_lock();
4740
4741 pr_info("Showing busy workqueues and worker pools:\n");
4742
4743 list_for_each_entry_rcu(wq, &workqueues, list) {
4744 struct pool_workqueue *pwq;
4745 bool idle = true;
4746
4747 for_each_pwq(pwq, wq) {
4748 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4749 idle = false;
4750 break;
4751 }
4752 }
4753 if (idle)
4754 continue;
4755
4756 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4757
4758 for_each_pwq(pwq, wq) {
4759 spin_lock_irqsave(&pwq->pool->lock, flags);
4760 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4761 show_pwq(pwq);
4762 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4763
4764
4765
4766
4767
4768 touch_nmi_watchdog();
4769 }
4770 }
4771
4772 for_each_pool(pool, pi) {
4773 struct worker *worker;
4774 bool first = true;
4775
4776 spin_lock_irqsave(&pool->lock, flags);
4777 if (pool->nr_workers == pool->nr_idle)
4778 goto next_pool;
4779
4780 pr_info("pool %d:", pool->id);
4781 pr_cont_pool_info(pool);
4782 pr_cont(" hung=%us workers=%d",
4783 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4784 pool->nr_workers);
4785 if (pool->manager)
4786 pr_cont(" manager: %d",
4787 task_pid_nr(pool->manager->task));
4788 list_for_each_entry(worker, &pool->idle_list, entry) {
4789 pr_cont(" %s%d", first ? "idle: " : "",
4790 task_pid_nr(worker->task));
4791 first = false;
4792 }
4793 pr_cont("\n");
4794 next_pool:
4795 spin_unlock_irqrestore(&pool->lock, flags);
4796
4797
4798
4799
4800
4801 touch_nmi_watchdog();
4802 }
4803
4804 rcu_read_unlock();
4805 }
4806
4807
4808 void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4809 {
4810 int off;
4811
4812
4813 off = strscpy(buf, task->comm, size);
4814 if (off < 0)
4815 return;
4816
4817
4818 mutex_lock(&wq_pool_attach_mutex);
4819
4820 if (task->flags & PF_WQ_WORKER) {
4821 struct worker *worker = kthread_data(task);
4822 struct worker_pool *pool = worker->pool;
4823
4824 if (pool) {
4825 spin_lock_irq(&pool->lock);
4826
4827
4828
4829
4830
4831 if (worker->desc[0] != '\0') {
4832 if (worker->current_work)
4833 scnprintf(buf + off, size - off, "+%s",
4834 worker->desc);
4835 else
4836 scnprintf(buf + off, size - off, "-%s",
4837 worker->desc);
4838 }
4839 spin_unlock_irq(&pool->lock);
4840 }
4841 }
4842
4843 mutex_unlock(&wq_pool_attach_mutex);
4844 }
4845
4846 #ifdef CONFIG_SMP
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863 static void unbind_workers(int cpu)
4864 {
4865 struct worker_pool *pool;
4866 struct worker *worker;
4867
4868 for_each_cpu_worker_pool(pool, cpu) {
4869 mutex_lock(&wq_pool_attach_mutex);
4870 spin_lock_irq(&pool->lock);
4871
4872
4873
4874
4875
4876
4877
4878
4879 for_each_pool_worker(worker, pool)
4880 worker->flags |= WORKER_UNBOUND;
4881
4882 pool->flags |= POOL_DISASSOCIATED;
4883
4884 spin_unlock_irq(&pool->lock);
4885 mutex_unlock(&wq_pool_attach_mutex);
4886
4887
4888
4889
4890
4891
4892
4893 schedule();
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903 atomic_set(&pool->nr_running, 0);
4904
4905
4906
4907
4908
4909
4910 spin_lock_irq(&pool->lock);
4911 wake_up_worker(pool);
4912 spin_unlock_irq(&pool->lock);
4913 }
4914 }
4915
4916
4917
4918
4919
4920
4921
4922 static void rebind_workers(struct worker_pool *pool)
4923 {
4924 struct worker *worker;
4925
4926 lockdep_assert_held(&wq_pool_attach_mutex);
4927
4928
4929
4930
4931
4932
4933
4934
4935 for_each_pool_worker(worker, pool)
4936 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4937 pool->attrs->cpumask) < 0);
4938
4939 spin_lock_irq(&pool->lock);
4940
4941 pool->flags &= ~POOL_DISASSOCIATED;
4942
4943 for_each_pool_worker(worker, pool) {
4944 unsigned int worker_flags = worker->flags;
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954 if (worker_flags & WORKER_IDLE)
4955 wake_up_process(worker->task);
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4973 worker_flags |= WORKER_REBOUND;
4974 worker_flags &= ~WORKER_UNBOUND;
4975 WRITE_ONCE(worker->flags, worker_flags);
4976 }
4977
4978 spin_unlock_irq(&pool->lock);
4979 }
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991 static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4992 {
4993 static cpumask_t cpumask;
4994 struct worker *worker;
4995
4996 lockdep_assert_held(&wq_pool_attach_mutex);
4997
4998
4999 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5000 return;
5001
5002 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5003
5004
5005 for_each_pool_worker(worker, pool)
5006 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5007 }
5008
5009 int workqueue_prepare_cpu(unsigned int cpu)
5010 {
5011 struct worker_pool *pool;
5012
5013 for_each_cpu_worker_pool(pool, cpu) {
5014 if (pool->nr_workers)
5015 continue;
5016 if (!create_worker(pool))
5017 return -ENOMEM;
5018 }
5019 return 0;
5020 }
5021
5022 int workqueue_online_cpu(unsigned int cpu)
5023 {
5024 struct worker_pool *pool;
5025 struct workqueue_struct *wq;
5026 int pi;
5027
5028 mutex_lock(&wq_pool_mutex);
5029
5030 for_each_pool(pool, pi) {
5031 mutex_lock(&wq_pool_attach_mutex);
5032
5033 if (pool->cpu == cpu)
5034 rebind_workers(pool);
5035 else if (pool->cpu < 0)
5036 restore_unbound_workers_cpumask(pool, cpu);
5037
5038 mutex_unlock(&wq_pool_attach_mutex);
5039 }
5040
5041
5042 list_for_each_entry(wq, &workqueues, list)
5043 wq_update_unbound_numa(wq, cpu, true);
5044
5045 mutex_unlock(&wq_pool_mutex);
5046 return 0;
5047 }
5048
5049 int workqueue_offline_cpu(unsigned int cpu)
5050 {
5051 struct workqueue_struct *wq;
5052
5053
5054 if (WARN_ON(cpu != smp_processor_id()))
5055 return -1;
5056
5057 unbind_workers(cpu);
5058
5059
5060 mutex_lock(&wq_pool_mutex);
5061 list_for_each_entry(wq, &workqueues, list)
5062 wq_update_unbound_numa(wq, cpu, false);
5063 mutex_unlock(&wq_pool_mutex);
5064
5065 return 0;
5066 }
5067
5068 struct work_for_cpu {
5069 struct work_struct work;
5070 long (*fn)(void *);
5071 void *arg;
5072 long ret;
5073 };
5074
5075 static void work_for_cpu_fn(struct work_struct *work)
5076 {
5077 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5078
5079 wfc->ret = wfc->fn(wfc->arg);
5080 }
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093 long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5094 {
5095 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5096
5097 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5098 schedule_work_on(cpu, &wfc.work);
5099 flush_work(&wfc.work);
5100 destroy_work_on_stack(&wfc.work);
5101 return wfc.ret;
5102 }
5103 EXPORT_SYMBOL_GPL(work_on_cpu);
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116 long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5117 {
5118 long ret = -ENODEV;
5119
5120 get_online_cpus();
5121 if (cpu_online(cpu))
5122 ret = work_on_cpu(cpu, fn, arg);
5123 put_online_cpus();
5124 return ret;
5125 }
5126 EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5127 #endif
5128
5129 #ifdef CONFIG_FREEZER
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141 void freeze_workqueues_begin(void)
5142 {
5143 struct workqueue_struct *wq;
5144 struct pool_workqueue *pwq;
5145
5146 mutex_lock(&wq_pool_mutex);
5147
5148 WARN_ON_ONCE(workqueue_freezing);
5149 workqueue_freezing = true;
5150
5151 list_for_each_entry(wq, &workqueues, list) {
5152 mutex_lock(&wq->mutex);
5153 for_each_pwq(pwq, wq)
5154 pwq_adjust_max_active(pwq);
5155 mutex_unlock(&wq->mutex);
5156 }
5157
5158 mutex_unlock(&wq_pool_mutex);
5159 }
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174 bool freeze_workqueues_busy(void)
5175 {
5176 bool busy = false;
5177 struct workqueue_struct *wq;
5178 struct pool_workqueue *pwq;
5179
5180 mutex_lock(&wq_pool_mutex);
5181
5182 WARN_ON_ONCE(!workqueue_freezing);
5183
5184 list_for_each_entry(wq, &workqueues, list) {
5185 if (!(wq->flags & WQ_FREEZABLE))
5186 continue;
5187
5188
5189
5190
5191 rcu_read_lock();
5192 for_each_pwq(pwq, wq) {
5193 WARN_ON_ONCE(pwq->nr_active < 0);
5194 if (pwq->nr_active) {
5195 busy = true;
5196 rcu_read_unlock();
5197 goto out_unlock;
5198 }
5199 }
5200 rcu_read_unlock();
5201 }
5202 out_unlock:
5203 mutex_unlock(&wq_pool_mutex);
5204 return busy;
5205 }
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216 void thaw_workqueues(void)
5217 {
5218 struct workqueue_struct *wq;
5219 struct pool_workqueue *pwq;
5220
5221 mutex_lock(&wq_pool_mutex);
5222
5223 if (!workqueue_freezing)
5224 goto out_unlock;
5225
5226 workqueue_freezing = false;
5227
5228
5229 list_for_each_entry(wq, &workqueues, list) {
5230 mutex_lock(&wq->mutex);
5231 for_each_pwq(pwq, wq)
5232 pwq_adjust_max_active(pwq);
5233 mutex_unlock(&wq->mutex);
5234 }
5235
5236 out_unlock:
5237 mutex_unlock(&wq_pool_mutex);
5238 }
5239 #endif
5240
5241 static int workqueue_apply_unbound_cpumask(void)
5242 {
5243 LIST_HEAD(ctxs);
5244 int ret = 0;
5245 struct workqueue_struct *wq;
5246 struct apply_wqattrs_ctx *ctx, *n;
5247
5248 lockdep_assert_held(&wq_pool_mutex);
5249
5250 list_for_each_entry(wq, &workqueues, list) {
5251 if (!(wq->flags & WQ_UNBOUND))
5252 continue;
5253
5254 if (wq->flags & __WQ_ORDERED)
5255 continue;
5256
5257 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5258 if (!ctx) {
5259 ret = -ENOMEM;
5260 break;
5261 }
5262
5263 list_add_tail(&ctx->list, &ctxs);
5264 }
5265
5266 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5267 if (!ret)
5268 apply_wqattrs_commit(ctx);
5269 apply_wqattrs_cleanup(ctx);
5270 }
5271
5272 return ret;
5273 }
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287 int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5288 {
5289 int ret = -EINVAL;
5290 cpumask_var_t saved_cpumask;
5291
5292 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
5293 return -ENOMEM;
5294
5295
5296
5297
5298
5299 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5300 if (!cpumask_empty(cpumask)) {
5301 apply_wqattrs_lock();
5302
5303
5304 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5305
5306
5307 cpumask_copy(wq_unbound_cpumask, cpumask);
5308 ret = workqueue_apply_unbound_cpumask();
5309
5310
5311 if (ret < 0)
5312 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5313
5314 apply_wqattrs_unlock();
5315 }
5316
5317 free_cpumask_var(saved_cpumask);
5318 return ret;
5319 }
5320
5321 #ifdef CONFIG_SYSFS
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337 struct wq_device {
5338 struct workqueue_struct *wq;
5339 struct device dev;
5340 };
5341
5342 static struct workqueue_struct *dev_to_wq(struct device *dev)
5343 {
5344 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5345
5346 return wq_dev->wq;
5347 }
5348
5349 static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5350 char *buf)
5351 {
5352 struct workqueue_struct *wq = dev_to_wq(dev);
5353
5354 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5355 }
5356 static DEVICE_ATTR_RO(per_cpu);
5357
5358 static ssize_t max_active_show(struct device *dev,
5359 struct device_attribute *attr, char *buf)
5360 {
5361 struct workqueue_struct *wq = dev_to_wq(dev);
5362
5363 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5364 }
5365
5366 static ssize_t max_active_store(struct device *dev,
5367 struct device_attribute *attr, const char *buf,
5368 size_t count)
5369 {
5370 struct workqueue_struct *wq = dev_to_wq(dev);
5371 int val;
5372
5373 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5374 return -EINVAL;
5375
5376 workqueue_set_max_active(wq, val);
5377 return count;
5378 }
5379 static DEVICE_ATTR_RW(max_active);
5380
5381 static struct attribute *wq_sysfs_attrs[] = {
5382 &dev_attr_per_cpu.attr,
5383 &dev_attr_max_active.attr,
5384 NULL,
5385 };
5386 ATTRIBUTE_GROUPS(wq_sysfs);
5387
5388 static ssize_t wq_pool_ids_show(struct device *dev,
5389 struct device_attribute *attr, char *buf)
5390 {
5391 struct workqueue_struct *wq = dev_to_wq(dev);
5392 const char *delim = "";
5393 int node, written = 0;
5394
5395 get_online_cpus();
5396 rcu_read_lock();
5397 for_each_node(node) {
5398 written += scnprintf(buf + written, PAGE_SIZE - written,
5399 "%s%d:%d", delim, node,
5400 unbound_pwq_by_node(wq, node)->pool->id);
5401 delim = " ";
5402 }
5403 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5404 rcu_read_unlock();
5405 put_online_cpus();
5406
5407 return written;
5408 }
5409
5410 static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5411 char *buf)
5412 {
5413 struct workqueue_struct *wq = dev_to_wq(dev);
5414 int written;
5415
5416 mutex_lock(&wq->mutex);
5417 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5418 mutex_unlock(&wq->mutex);
5419
5420 return written;
5421 }
5422
5423
5424 static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5425 {
5426 struct workqueue_attrs *attrs;
5427
5428 lockdep_assert_held(&wq_pool_mutex);
5429
5430 attrs = alloc_workqueue_attrs();
5431 if (!attrs)
5432 return NULL;
5433
5434 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5435 return attrs;
5436 }
5437
5438 static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5439 const char *buf, size_t count)
5440 {
5441 struct workqueue_struct *wq = dev_to_wq(dev);
5442 struct workqueue_attrs *attrs;
5443 int ret = -ENOMEM;
5444
5445 apply_wqattrs_lock();
5446
5447 attrs = wq_sysfs_prep_attrs(wq);
5448 if (!attrs)
5449 goto out_unlock;
5450
5451 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5452 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5453 ret = apply_workqueue_attrs_locked(wq, attrs);
5454 else
5455 ret = -EINVAL;
5456
5457 out_unlock:
5458 apply_wqattrs_unlock();
5459 free_workqueue_attrs(attrs);
5460 return ret ?: count;
5461 }
5462
5463 static ssize_t wq_cpumask_show(struct device *dev,
5464 struct device_attribute *attr, char *buf)
5465 {
5466 struct workqueue_struct *wq = dev_to_wq(dev);
5467 int written;
5468
5469 mutex_lock(&wq->mutex);
5470 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5471 cpumask_pr_args(wq->unbound_attrs->cpumask));
5472 mutex_unlock(&wq->mutex);
5473 return written;
5474 }
5475
5476 static ssize_t wq_cpumask_store(struct device *dev,
5477 struct device_attribute *attr,
5478 const char *buf, size_t count)
5479 {
5480 struct workqueue_struct *wq = dev_to_wq(dev);
5481 struct workqueue_attrs *attrs;
5482 int ret = -ENOMEM;
5483
5484 apply_wqattrs_lock();
5485
5486 attrs = wq_sysfs_prep_attrs(wq);
5487 if (!attrs)
5488 goto out_unlock;
5489
5490 ret = cpumask_parse(buf, attrs->cpumask);
5491 if (!ret)
5492 ret = apply_workqueue_attrs_locked(wq, attrs);
5493
5494 out_unlock:
5495 apply_wqattrs_unlock();
5496 free_workqueue_attrs(attrs);
5497 return ret ?: count;
5498 }
5499
5500 static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5501 char *buf)
5502 {
5503 struct workqueue_struct *wq = dev_to_wq(dev);
5504 int written;
5505
5506 mutex_lock(&wq->mutex);
5507 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5508 !wq->unbound_attrs->no_numa);
5509 mutex_unlock(&wq->mutex);
5510
5511 return written;
5512 }
5513
5514 static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5515 const char *buf, size_t count)
5516 {
5517 struct workqueue_struct *wq = dev_to_wq(dev);
5518 struct workqueue_attrs *attrs;
5519 int v, ret = -ENOMEM;
5520
5521 apply_wqattrs_lock();
5522
5523 attrs = wq_sysfs_prep_attrs(wq);
5524 if (!attrs)
5525 goto out_unlock;
5526
5527 ret = -EINVAL;
5528 if (sscanf(buf, "%d", &v) == 1) {
5529 attrs->no_numa = !v;
5530 ret = apply_workqueue_attrs_locked(wq, attrs);
5531 }
5532
5533 out_unlock:
5534 apply_wqattrs_unlock();
5535 free_workqueue_attrs(attrs);
5536 return ret ?: count;
5537 }
5538
5539 static struct device_attribute wq_sysfs_unbound_attrs[] = {
5540 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5541 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5542 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5543 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5544 __ATTR_NULL,
5545 };
5546
5547 static struct bus_type wq_subsys = {
5548 .name = "workqueue",
5549 .dev_groups = wq_sysfs_groups,
5550 };
5551
5552 static ssize_t wq_unbound_cpumask_show(struct device *dev,
5553 struct device_attribute *attr, char *buf)
5554 {
5555 int written;
5556
5557 mutex_lock(&wq_pool_mutex);
5558 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5559 cpumask_pr_args(wq_unbound_cpumask));
5560 mutex_unlock(&wq_pool_mutex);
5561
5562 return written;
5563 }
5564
5565 static ssize_t wq_unbound_cpumask_store(struct device *dev,
5566 struct device_attribute *attr, const char *buf, size_t count)
5567 {
5568 cpumask_var_t cpumask;
5569 int ret;
5570
5571 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5572 return -ENOMEM;
5573
5574 ret = cpumask_parse(buf, cpumask);
5575 if (!ret)
5576 ret = workqueue_set_unbound_cpumask(cpumask);
5577
5578 free_cpumask_var(cpumask);
5579 return ret ? ret : count;
5580 }
5581
5582 static struct device_attribute wq_sysfs_cpumask_attr =
5583 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5584 wq_unbound_cpumask_store);
5585
5586 static int __init wq_sysfs_init(void)
5587 {
5588 int err;
5589
5590 err = subsys_virtual_register(&wq_subsys, NULL);
5591 if (err)
5592 return err;
5593
5594 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5595 }
5596 core_initcall(wq_sysfs_init);
5597
5598 static void wq_device_release(struct device *dev)
5599 {
5600 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5601
5602 kfree(wq_dev);
5603 }
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620 int workqueue_sysfs_register(struct workqueue_struct *wq)
5621 {
5622 struct wq_device *wq_dev;
5623 int ret;
5624
5625
5626
5627
5628
5629
5630 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5631 return -EINVAL;
5632
5633 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5634 if (!wq_dev)
5635 return -ENOMEM;
5636
5637 wq_dev->wq = wq;
5638 wq_dev->dev.bus = &wq_subsys;
5639 wq_dev->dev.release = wq_device_release;
5640 dev_set_name(&wq_dev->dev, "%s", wq->name);
5641
5642
5643
5644
5645
5646 dev_set_uevent_suppress(&wq_dev->dev, true);
5647
5648 ret = device_register(&wq_dev->dev);
5649 if (ret) {
5650 put_device(&wq_dev->dev);
5651 wq->wq_dev = NULL;
5652 return ret;
5653 }
5654
5655 if (wq->flags & WQ_UNBOUND) {
5656 struct device_attribute *attr;
5657
5658 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5659 ret = device_create_file(&wq_dev->dev, attr);
5660 if (ret) {
5661 device_unregister(&wq_dev->dev);
5662 wq->wq_dev = NULL;
5663 return ret;
5664 }
5665 }
5666 }
5667
5668 dev_set_uevent_suppress(&wq_dev->dev, false);
5669 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5670 return 0;
5671 }
5672
5673
5674
5675
5676
5677
5678
5679 static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5680 {
5681 struct wq_device *wq_dev = wq->wq_dev;
5682
5683 if (!wq->wq_dev)
5684 return;
5685
5686 wq->wq_dev = NULL;
5687 device_unregister(&wq_dev->dev);
5688 }
5689 #else
5690 static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5691 #endif
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710 #ifdef CONFIG_WQ_WATCHDOG
5711
5712 static unsigned long wq_watchdog_thresh = 30;
5713 static struct timer_list wq_watchdog_timer;
5714
5715 static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5716 static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5717
5718 static void wq_watchdog_reset_touched(void)
5719 {
5720 int cpu;
5721
5722 wq_watchdog_touched = jiffies;
5723 for_each_possible_cpu(cpu)
5724 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5725 }
5726
5727 static void wq_watchdog_timer_fn(struct timer_list *unused)
5728 {
5729 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5730 bool lockup_detected = false;
5731 struct worker_pool *pool;
5732 int pi;
5733
5734 if (!thresh)
5735 return;
5736
5737 rcu_read_lock();
5738
5739 for_each_pool(pool, pi) {
5740 unsigned long pool_ts, touched, ts;
5741
5742 if (list_empty(&pool->worklist))
5743 continue;
5744
5745
5746 pool_ts = READ_ONCE(pool->watchdog_ts);
5747 touched = READ_ONCE(wq_watchdog_touched);
5748
5749 if (time_after(pool_ts, touched))
5750 ts = pool_ts;
5751 else
5752 ts = touched;
5753
5754 if (pool->cpu >= 0) {
5755 unsigned long cpu_touched =
5756 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5757 pool->cpu));
5758 if (time_after(cpu_touched, ts))
5759 ts = cpu_touched;
5760 }
5761
5762
5763 if (time_after(jiffies, ts + thresh)) {
5764 lockup_detected = true;
5765 pr_emerg("BUG: workqueue lockup - pool");
5766 pr_cont_pool_info(pool);
5767 pr_cont(" stuck for %us!\n",
5768 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5769 }
5770 }
5771
5772 rcu_read_unlock();
5773
5774 if (lockup_detected)
5775 show_workqueue_state();
5776
5777 wq_watchdog_reset_touched();
5778 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5779 }
5780
5781 notrace void wq_watchdog_touch(int cpu)
5782 {
5783 if (cpu >= 0)
5784 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5785 else
5786 wq_watchdog_touched = jiffies;
5787 }
5788
5789 static void wq_watchdog_set_thresh(unsigned long thresh)
5790 {
5791 wq_watchdog_thresh = 0;
5792 del_timer_sync(&wq_watchdog_timer);
5793
5794 if (thresh) {
5795 wq_watchdog_thresh = thresh;
5796 wq_watchdog_reset_touched();
5797 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5798 }
5799 }
5800
5801 static int wq_watchdog_param_set_thresh(const char *val,
5802 const struct kernel_param *kp)
5803 {
5804 unsigned long thresh;
5805 int ret;
5806
5807 ret = kstrtoul(val, 0, &thresh);
5808 if (ret)
5809 return ret;
5810
5811 if (system_wq)
5812 wq_watchdog_set_thresh(thresh);
5813 else
5814 wq_watchdog_thresh = thresh;
5815
5816 return 0;
5817 }
5818
5819 static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5820 .set = wq_watchdog_param_set_thresh,
5821 .get = param_get_ulong,
5822 };
5823
5824 module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5825 0644);
5826
5827 static void wq_watchdog_init(void)
5828 {
5829 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5830 wq_watchdog_set_thresh(wq_watchdog_thresh);
5831 }
5832
5833 #else
5834
5835 static inline void wq_watchdog_init(void) { }
5836
5837 #endif
5838
5839 static void __init wq_numa_init(void)
5840 {
5841 cpumask_var_t *tbl;
5842 int node, cpu;
5843
5844 if (num_possible_nodes() <= 1)
5845 return;
5846
5847 if (wq_disable_numa) {
5848 pr_info("workqueue: NUMA affinity support disabled\n");
5849 return;
5850 }
5851
5852 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5853 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5854
5855
5856
5857
5858
5859
5860 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
5861 BUG_ON(!tbl);
5862
5863 for_each_node(node)
5864 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5865 node_online(node) ? node : NUMA_NO_NODE));
5866
5867 for_each_possible_cpu(cpu) {
5868 node = cpu_to_node(cpu);
5869 if (WARN_ON(node == NUMA_NO_NODE)) {
5870 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5871
5872 return;
5873 }
5874 cpumask_set_cpu(cpu, tbl[node]);
5875 }
5876
5877 wq_numa_possible_cpumask = tbl;
5878 wq_numa_enabled = true;
5879 }
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891 int __init workqueue_init_early(void)
5892 {
5893 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5894 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
5895 int i, cpu;
5896
5897 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5898
5899 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5900 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
5901
5902 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5903
5904
5905 for_each_possible_cpu(cpu) {
5906 struct worker_pool *pool;
5907
5908 i = 0;
5909 for_each_cpu_worker_pool(pool, cpu) {
5910 BUG_ON(init_worker_pool(pool));
5911 pool->cpu = cpu;
5912 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5913 pool->attrs->nice = std_nice[i++];
5914 pool->node = cpu_to_node(cpu);
5915
5916
5917 mutex_lock(&wq_pool_mutex);
5918 BUG_ON(worker_pool_assign_id(pool));
5919 mutex_unlock(&wq_pool_mutex);
5920 }
5921 }
5922
5923
5924 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5925 struct workqueue_attrs *attrs;
5926
5927 BUG_ON(!(attrs = alloc_workqueue_attrs()));
5928 attrs->nice = std_nice[i];
5929 unbound_std_wq_attrs[i] = attrs;
5930
5931
5932
5933
5934
5935
5936 BUG_ON(!(attrs = alloc_workqueue_attrs()));
5937 attrs->nice = std_nice[i];
5938 attrs->no_numa = true;
5939 ordered_wq_attrs[i] = attrs;
5940 }
5941
5942 system_wq = alloc_workqueue("events", 0, 0);
5943 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5944 system_long_wq = alloc_workqueue("events_long", 0, 0);
5945 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5946 WQ_UNBOUND_MAX_ACTIVE);
5947 system_freezable_wq = alloc_workqueue("events_freezable",
5948 WQ_FREEZABLE, 0);
5949 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5950 WQ_POWER_EFFICIENT, 0);
5951 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5952 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5953 0);
5954 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5955 !system_unbound_wq || !system_freezable_wq ||
5956 !system_power_efficient_wq ||
5957 !system_freezable_power_efficient_wq);
5958
5959 return 0;
5960 }
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971 int __init workqueue_init(void)
5972 {
5973 struct workqueue_struct *wq;
5974 struct worker_pool *pool;
5975 int cpu, bkt;
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986 wq_numa_init();
5987
5988 mutex_lock(&wq_pool_mutex);
5989
5990 for_each_possible_cpu(cpu) {
5991 for_each_cpu_worker_pool(pool, cpu) {
5992 pool->node = cpu_to_node(cpu);
5993 }
5994 }
5995
5996 list_for_each_entry(wq, &workqueues, list) {
5997 wq_update_unbound_numa(wq, smp_processor_id(), true);
5998 WARN(init_rescuer(wq),
5999 "workqueue: failed to create early rescuer for %s",
6000 wq->name);
6001 }
6002
6003 mutex_unlock(&wq_pool_mutex);
6004
6005
6006 for_each_online_cpu(cpu) {
6007 for_each_cpu_worker_pool(pool, cpu) {
6008 pool->flags &= ~POOL_DISASSOCIATED;
6009 BUG_ON(!create_worker(pool));
6010 }
6011 }
6012
6013 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6014 BUG_ON(!create_worker(pool));
6015
6016 wq_online = true;
6017 wq_watchdog_init();
6018
6019 return 0;
6020 }