This source file includes following definitions.
- cgroup_ssid_enabled
- cgroup_on_dfl
- cgroup_idr_alloc
- cgroup_idr_replace
- cgroup_idr_remove
- cgroup_has_tasks
- cgroup_is_threaded
- cgroup_is_mixable
- cgroup_can_be_thread_root
- cgroup_is_thread_root
- cgroup_is_valid_domain
- cgroup_control
- cgroup_ss_mask
- cgroup_css
- cgroup_tryget_css
- cgroup_e_css_by_mask
- cgroup_e_css
- cgroup_get_e_css
- cgroup_get_live
- __cgroup_task_count
- cgroup_task_count
- of_css
- css_set_threaded
- css_set_populated
- cgroup_update_populated
- css_set_update_populated
- css_set_skip_task_iters
- css_set_move_task
- css_set_hash
- put_css_set_locked
- compare_css_sets
- find_existing_css_set
- free_cgrp_cset_links
- allocate_cgrp_cset_links
- link_css_set
- find_css_set
- cgroup_root_from_kf
- cgroup_init_root_id
- cgroup_exit_root_id
- cgroup_free_root
- cgroup_destroy_root
- current_cgns_cgroup_from_root
- cset_cgroup_from_root
- task_cgroup_from_root
- cgroup_file_name
- cgroup_file_mode
- cgroup_calc_subtree_ss_mask
- cgroup_kn_unlock
- cgroup_kn_lock_live
- cgroup_rm_file
- css_clear_dir
- css_populate_dir
- rebind_subsystems
- cgroup_show_path
- cgroup2_parse_param
- apply_cgroup_root_flags
- cgroup_show_options
- cgroup_reconfigure
- cgroup_enable_task_cg_lists
- init_cgroup_housekeeping
- init_cgroup_root
- cgroup_setup_root
- cgroup_do_get_tree
- cgroup_fs_context_free
- cgroup_get_tree
- cgroup_init_fs_context
- cgroup_kill_sb
- cpuset_init_fs_context
- cgroup_path_ns_locked
- cgroup_path_ns
- task_cgroup_path
- cgroup_migrate_add_task
- cgroup_taskset_first
- cgroup_taskset_next
- cgroup_migrate_execute
- cgroup_migrate_vet_dst
- cgroup_migrate_finish
- cgroup_migrate_add_src
- cgroup_migrate_prepare_dst
- cgroup_migrate
- cgroup_attach_task
- cgroup_procs_write_start
- cgroup_procs_write_finish
- cgroup_print_ss_mask
- cgroup_controllers_show
- cgroup_subtree_control_show
- cgroup_update_dfl_csses
- cgroup_lock_and_drain_offline
- cgroup_save_control
- cgroup_propagate_control
- cgroup_restore_control
- css_visible
- cgroup_apply_control_enable
- cgroup_apply_control_disable
- cgroup_apply_control
- cgroup_finalize_control
- cgroup_vet_subtree_control_enable
- cgroup_subtree_control_write
- cgroup_enable_threaded
- cgroup_type_show
- cgroup_type_write
- cgroup_max_descendants_show
- cgroup_max_descendants_write
- cgroup_max_depth_show
- cgroup_max_depth_write
- cgroup_events_show
- cgroup_stat_show
- cgroup_extra_stat_show
- cpu_stat_show
- cgroup_io_pressure_show
- cgroup_memory_pressure_show
- cgroup_cpu_pressure_show
- cgroup_pressure_write
- cgroup_io_pressure_write
- cgroup_memory_pressure_write
- cgroup_cpu_pressure_write
- cgroup_pressure_poll
- cgroup_pressure_release
- cgroup_freeze_show
- cgroup_freeze_write
- cgroup_file_open
- cgroup_file_release
- cgroup_file_write
- cgroup_file_poll
- cgroup_seqfile_start
- cgroup_seqfile_next
- cgroup_seqfile_stop
- cgroup_seqfile_show
- cgroup_kn_set_ugid
- cgroup_file_notify_timer
- cgroup_add_file
- cgroup_addrm_files
- cgroup_apply_cftypes
- cgroup_exit_cftypes
- cgroup_init_cftypes
- cgroup_rm_cftypes_locked
- cgroup_rm_cftypes
- cgroup_add_cftypes
- cgroup_add_dfl_cftypes
- cgroup_add_legacy_cftypes
- cgroup_file_notify
- css_next_child
- css_next_descendant_pre
- css_rightmost_descendant
- css_leftmost_descendant
- css_next_descendant_post
- css_has_online_children
- css_task_iter_next_css_set
- css_task_iter_advance_css_set
- css_task_iter_skip
- css_task_iter_advance
- css_task_iter_start
- css_task_iter_next
- css_task_iter_end
- cgroup_procs_release
- cgroup_procs_next
- __cgroup_procs_start
- cgroup_procs_start
- cgroup_procs_show
- cgroup_procs_write_permission
- cgroup_procs_write
- cgroup_threads_start
- cgroup_threads_write
- css_free_rwork_fn
- css_release_work_fn
- css_release
- init_and_link_css
- online_css
- offline_css
- css_create
- cgroup_create
- cgroup_check_hierarchy_limits
- cgroup_mkdir
- css_killed_work_fn
- css_killed_ref_fn
- kill_css
- cgroup_destroy_locked
- cgroup_rmdir
- cgroup_init_subsys
- cgroup_init_early
- cgroup_init
- cgroup_wq_init
- cgroup_path_from_kernfs_id
- proc_cgroup_show
- cgroup_fork
- cgroup_can_fork
- cgroup_cancel_fork
- cgroup_post_fork
- cgroup_exit
- cgroup_release
- cgroup_free
- cgroup_disable
- enable_debug_cgroup
- enable_cgroup_debug
- css_tryget_online_from_dir
- css_from_id
- cgroup_get_from_path
- cgroup_get_from_fd
- power_of_ten
- cgroup_parse_float
- cgroup_sk_alloc_disable
- cgroup_sk_alloc
- cgroup_sk_free
- cgroup_bpf_attach
- cgroup_bpf_detach
- cgroup_bpf_query
- show_delegatable_files
- delegate_show
- features_show
- cgroup_sysfs_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31 #include "cgroup-internal.h"
32
33 #include <linux/cred.h>
34 #include <linux/errno.h>
35 #include <linux/init_task.h>
36 #include <linux/kernel.h>
37 #include <linux/magic.h>
38 #include <linux/mutex.h>
39 #include <linux/mount.h>
40 #include <linux/pagemap.h>
41 #include <linux/proc_fs.h>
42 #include <linux/rcupdate.h>
43 #include <linux/sched.h>
44 #include <linux/sched/task.h>
45 #include <linux/slab.h>
46 #include <linux/spinlock.h>
47 #include <linux/percpu-rwsem.h>
48 #include <linux/string.h>
49 #include <linux/hashtable.h>
50 #include <linux/idr.h>
51 #include <linux/kthread.h>
52 #include <linux/atomic.h>
53 #include <linux/cpuset.h>
54 #include <linux/proc_ns.h>
55 #include <linux/nsproxy.h>
56 #include <linux/file.h>
57 #include <linux/fs_parser.h>
58 #include <linux/sched/cputime.h>
59 #include <linux/psi.h>
60 #include <net/sock.h>
61
62 #define CREATE_TRACE_POINTS
63 #include <trace/events/cgroup.h>
64
65 #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
66 MAX_CFTYPE_NAME + 2)
67
68 #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
69
70
71
72
73
74
75
76
77
78
79
80 DEFINE_MUTEX(cgroup_mutex);
81 DEFINE_SPINLOCK(css_set_lock);
82
83 #ifdef CONFIG_PROVE_RCU
84 EXPORT_SYMBOL_GPL(cgroup_mutex);
85 EXPORT_SYMBOL_GPL(css_set_lock);
86 #endif
87
88 DEFINE_SPINLOCK(trace_cgroup_path_lock);
89 char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
90 bool cgroup_debug __read_mostly;
91
92
93
94
95
96 static DEFINE_SPINLOCK(cgroup_idr_lock);
97
98
99
100
101
102 static DEFINE_SPINLOCK(cgroup_file_kn_lock);
103
104 DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
105
106 #define cgroup_assert_mutex_or_rcu_locked() \
107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
108 !lockdep_is_held(&cgroup_mutex), \
109 "cgroup_mutex or RCU read lock required");
110
111
112
113
114
115
116
117 static struct workqueue_struct *cgroup_destroy_wq;
118
119
120 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
121 struct cgroup_subsys *cgroup_subsys[] = {
122 #include <linux/cgroup_subsys.h>
123 };
124 #undef SUBSYS
125
126
127 #define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
128 static const char *cgroup_subsys_name[] = {
129 #include <linux/cgroup_subsys.h>
130 };
131 #undef SUBSYS
132
133
134 #define SUBSYS(_x) \
135 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
136 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
137 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
138 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
139 #include <linux/cgroup_subsys.h>
140 #undef SUBSYS
141
142 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
143 static struct static_key_true *cgroup_subsys_enabled_key[] = {
144 #include <linux/cgroup_subsys.h>
145 };
146 #undef SUBSYS
147
148 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
149 static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
150 #include <linux/cgroup_subsys.h>
151 };
152 #undef SUBSYS
153
154 static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
155
156
157
158
159
160
161 struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
162 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
163
164
165
166
167
168 static bool cgrp_dfl_visible;
169
170
171 static u16 cgrp_dfl_inhibit_ss_mask;
172
173
174 static u16 cgrp_dfl_implicit_ss_mask;
175
176
177 static u16 cgrp_dfl_threaded_ss_mask;
178
179
180 LIST_HEAD(cgroup_roots);
181 static int cgroup_root_count;
182
183
184 static DEFINE_IDR(cgroup_hierarchy_idr);
185
186
187
188
189
190
191
192
193 static u64 css_serial_nr_next = 1;
194
195
196
197
198
199 static u16 have_fork_callback __read_mostly;
200 static u16 have_exit_callback __read_mostly;
201 static u16 have_release_callback __read_mostly;
202 static u16 have_canfork_callback __read_mostly;
203
204
205 struct cgroup_namespace init_cgroup_ns = {
206 .count = REFCOUNT_INIT(2),
207 .user_ns = &init_user_ns,
208 .ns.ops = &cgroupns_operations,
209 .ns.inum = PROC_CGROUP_INIT_INO,
210 .root_cset = &init_css_set,
211 };
212
213 static struct file_system_type cgroup2_fs_type;
214 static struct cftype cgroup_base_files[];
215
216 static int cgroup_apply_control(struct cgroup *cgrp);
217 static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
218 static void css_task_iter_skip(struct css_task_iter *it,
219 struct task_struct *task);
220 static int cgroup_destroy_locked(struct cgroup *cgrp);
221 static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
222 struct cgroup_subsys *ss);
223 static void css_release(struct percpu_ref *ref);
224 static void kill_css(struct cgroup_subsys_state *css);
225 static int cgroup_addrm_files(struct cgroup_subsys_state *css,
226 struct cgroup *cgrp, struct cftype cfts[],
227 bool is_add);
228
229
230
231
232
233
234
235
236
237 bool cgroup_ssid_enabled(int ssid)
238 {
239 if (CGROUP_SUBSYS_COUNT == 0)
240 return false;
241
242 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298 bool cgroup_on_dfl(const struct cgroup *cgrp)
299 {
300 return cgrp->root == &cgrp_dfl_root;
301 }
302
303
304 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
305 gfp_t gfp_mask)
306 {
307 int ret;
308
309 idr_preload(gfp_mask);
310 spin_lock_bh(&cgroup_idr_lock);
311 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
312 spin_unlock_bh(&cgroup_idr_lock);
313 idr_preload_end();
314 return ret;
315 }
316
317 static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
318 {
319 void *ret;
320
321 spin_lock_bh(&cgroup_idr_lock);
322 ret = idr_replace(idr, ptr, id);
323 spin_unlock_bh(&cgroup_idr_lock);
324 return ret;
325 }
326
327 static void cgroup_idr_remove(struct idr *idr, int id)
328 {
329 spin_lock_bh(&cgroup_idr_lock);
330 idr_remove(idr, id);
331 spin_unlock_bh(&cgroup_idr_lock);
332 }
333
334 static bool cgroup_has_tasks(struct cgroup *cgrp)
335 {
336 return cgrp->nr_populated_csets;
337 }
338
339 bool cgroup_is_threaded(struct cgroup *cgrp)
340 {
341 return cgrp->dom_cgrp != cgrp;
342 }
343
344
345 static bool cgroup_is_mixable(struct cgroup *cgrp)
346 {
347
348
349
350
351
352 return !cgroup_parent(cgrp);
353 }
354
355
356 static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
357 {
358
359 if (cgroup_is_mixable(cgrp))
360 return true;
361
362
363 if (cgroup_is_threaded(cgrp))
364 return false;
365
366
367 if (cgrp->nr_populated_domain_children)
368 return false;
369
370
371 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
372 return false;
373
374 return true;
375 }
376
377
378 bool cgroup_is_thread_root(struct cgroup *cgrp)
379 {
380
381 if (cgroup_is_threaded(cgrp))
382 return false;
383
384
385 if (cgrp->nr_threaded_children)
386 return true;
387
388
389
390
391
392 if (cgroup_has_tasks(cgrp) &&
393 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
394 return true;
395
396 return false;
397 }
398
399
400 static bool cgroup_is_valid_domain(struct cgroup *cgrp)
401 {
402
403 if (cgroup_is_threaded(cgrp))
404 return false;
405
406
407 while ((cgrp = cgroup_parent(cgrp))) {
408 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
409 return false;
410 if (cgroup_is_threaded(cgrp))
411 return false;
412 }
413
414 return true;
415 }
416
417
418 static u16 cgroup_control(struct cgroup *cgrp)
419 {
420 struct cgroup *parent = cgroup_parent(cgrp);
421 u16 root_ss_mask = cgrp->root->subsys_mask;
422
423 if (parent) {
424 u16 ss_mask = parent->subtree_control;
425
426
427 if (cgroup_is_threaded(cgrp))
428 ss_mask &= cgrp_dfl_threaded_ss_mask;
429 return ss_mask;
430 }
431
432 if (cgroup_on_dfl(cgrp))
433 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
434 cgrp_dfl_implicit_ss_mask);
435 return root_ss_mask;
436 }
437
438
439 static u16 cgroup_ss_mask(struct cgroup *cgrp)
440 {
441 struct cgroup *parent = cgroup_parent(cgrp);
442
443 if (parent) {
444 u16 ss_mask = parent->subtree_ss_mask;
445
446
447 if (cgroup_is_threaded(cgrp))
448 ss_mask &= cgrp_dfl_threaded_ss_mask;
449 return ss_mask;
450 }
451
452 return cgrp->root->subsys_mask;
453 }
454
455
456
457
458
459
460
461
462
463
464
465
466 static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
467 struct cgroup_subsys *ss)
468 {
469 if (ss)
470 return rcu_dereference_check(cgrp->subsys[ss->id],
471 lockdep_is_held(&cgroup_mutex));
472 else
473 return &cgrp->self;
474 }
475
476
477
478
479
480
481
482
483
484 static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
485 struct cgroup_subsys *ss)
486 {
487 struct cgroup_subsys_state *css;
488
489 rcu_read_lock();
490 css = cgroup_css(cgrp, ss);
491 if (css && !css_tryget_online(css))
492 css = NULL;
493 rcu_read_unlock();
494
495 return css;
496 }
497
498
499
500
501
502
503
504
505
506
507
508 static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
509 struct cgroup_subsys *ss)
510 {
511 lockdep_assert_held(&cgroup_mutex);
512
513 if (!ss)
514 return &cgrp->self;
515
516
517
518
519
520 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
521 cgrp = cgroup_parent(cgrp);
522 if (!cgrp)
523 return NULL;
524 }
525
526 return cgroup_css(cgrp, ss);
527 }
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
543 struct cgroup_subsys *ss)
544 {
545 struct cgroup_subsys_state *css;
546
547 do {
548 css = cgroup_css(cgrp, ss);
549
550 if (css)
551 return css;
552 cgrp = cgroup_parent(cgrp);
553 } while (cgrp);
554
555 return init_css_set.subsys[ss->id];
556 }
557
558
559
560
561
562
563
564
565
566
567
568
569 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
570 struct cgroup_subsys *ss)
571 {
572 struct cgroup_subsys_state *css;
573
574 rcu_read_lock();
575
576 do {
577 css = cgroup_css(cgrp, ss);
578
579 if (css && css_tryget_online(css))
580 goto out_unlock;
581 cgrp = cgroup_parent(cgrp);
582 } while (cgrp);
583
584 css = init_css_set.subsys[ss->id];
585 css_get(css);
586 out_unlock:
587 rcu_read_unlock();
588 return css;
589 }
590
591 static void cgroup_get_live(struct cgroup *cgrp)
592 {
593 WARN_ON_ONCE(cgroup_is_dead(cgrp));
594 css_get(&cgrp->self);
595 }
596
597
598
599
600
601
602 int __cgroup_task_count(const struct cgroup *cgrp)
603 {
604 int count = 0;
605 struct cgrp_cset_link *link;
606
607 lockdep_assert_held(&css_set_lock);
608
609 list_for_each_entry(link, &cgrp->cset_links, cset_link)
610 count += link->cset->nr_tasks;
611
612 return count;
613 }
614
615
616
617
618
619 int cgroup_task_count(const struct cgroup *cgrp)
620 {
621 int count;
622
623 spin_lock_irq(&css_set_lock);
624 count = __cgroup_task_count(cgrp);
625 spin_unlock_irq(&css_set_lock);
626
627 return count;
628 }
629
630 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
631 {
632 struct cgroup *cgrp = of->kn->parent->priv;
633 struct cftype *cft = of_cft(of);
634
635
636
637
638
639
640
641
642
643 if (cft->ss)
644 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
645 else
646 return &cgrp->self;
647 }
648 EXPORT_SYMBOL_GPL(of_css);
649
650
651
652
653
654
655
656
657
658 #define for_each_css(css, ssid, cgrp) \
659 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
660 if (!((css) = rcu_dereference_check( \
661 (cgrp)->subsys[(ssid)], \
662 lockdep_is_held(&cgroup_mutex)))) { } \
663 else
664
665
666
667
668
669
670
671
672
673 #define for_each_e_css(css, ssid, cgrp) \
674 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
675 if (!((css) = cgroup_e_css_by_mask(cgrp, \
676 cgroup_subsys[(ssid)]))) \
677 ; \
678 else
679
680
681
682
683
684
685
686
687
688
689 #define do_each_subsys_mask(ss, ssid, ss_mask) do { \
690 unsigned long __ss_mask = (ss_mask); \
691 if (!CGROUP_SUBSYS_COUNT) { \
692 (ssid) = 0; \
693 break; \
694 } \
695 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
696 (ss) = cgroup_subsys[ssid]; \
697 {
698
699 #define while_each_subsys_mask() \
700 } \
701 } \
702 } while (false)
703
704
705 #define cgroup_for_each_live_child(child, cgrp) \
706 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
707 if (({ lockdep_assert_held(&cgroup_mutex); \
708 cgroup_is_dead(child); })) \
709 ; \
710 else
711
712
713 #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
714 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
715 if (({ lockdep_assert_held(&cgroup_mutex); \
716 (dsct) = (d_css)->cgroup; \
717 cgroup_is_dead(dsct); })) \
718 ; \
719 else
720
721
722 #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
723 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
724 if (({ lockdep_assert_held(&cgroup_mutex); \
725 (dsct) = (d_css)->cgroup; \
726 cgroup_is_dead(dsct); })) \
727 ; \
728 else
729
730
731
732
733
734
735
736
737 struct css_set init_css_set = {
738 .refcount = REFCOUNT_INIT(1),
739 .dom_cset = &init_css_set,
740 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
741 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
742 .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
743 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
744 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
745 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
746 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
747 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
748
749
750
751
752
753
754
755 .dfl_cgrp = &cgrp_dfl_root.cgrp,
756 };
757
758 static int css_set_count = 1;
759
760 static bool css_set_threaded(struct css_set *cset)
761 {
762 return cset->dom_cset != cset;
763 }
764
765
766
767
768
769
770
771
772
773
774 static bool css_set_populated(struct css_set *cset)
775 {
776 lockdep_assert_held(&css_set_lock);
777
778 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
779 }
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798 static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
799 {
800 struct cgroup *child = NULL;
801 int adj = populated ? 1 : -1;
802
803 lockdep_assert_held(&css_set_lock);
804
805 do {
806 bool was_populated = cgroup_is_populated(cgrp);
807
808 if (!child) {
809 cgrp->nr_populated_csets += adj;
810 } else {
811 if (cgroup_is_threaded(child))
812 cgrp->nr_populated_threaded_children += adj;
813 else
814 cgrp->nr_populated_domain_children += adj;
815 }
816
817 if (was_populated == cgroup_is_populated(cgrp))
818 break;
819
820 cgroup1_check_for_release(cgrp);
821 TRACE_CGROUP_PATH(notify_populated, cgrp,
822 cgroup_is_populated(cgrp));
823 cgroup_file_notify(&cgrp->events_file);
824
825 child = cgrp;
826 cgrp = cgroup_parent(cgrp);
827 } while (cgrp);
828 }
829
830
831
832
833
834
835
836
837
838 static void css_set_update_populated(struct css_set *cset, bool populated)
839 {
840 struct cgrp_cset_link *link;
841
842 lockdep_assert_held(&css_set_lock);
843
844 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
845 cgroup_update_populated(link->cgrp, populated);
846 }
847
848
849
850
851
852
853
854 static void css_set_skip_task_iters(struct css_set *cset,
855 struct task_struct *task)
856 {
857 struct css_task_iter *it, *pos;
858
859 list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
860 css_task_iter_skip(it, task);
861 }
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878 static void css_set_move_task(struct task_struct *task,
879 struct css_set *from_cset, struct css_set *to_cset,
880 bool use_mg_tasks)
881 {
882 lockdep_assert_held(&css_set_lock);
883
884 if (to_cset && !css_set_populated(to_cset))
885 css_set_update_populated(to_cset, true);
886
887 if (from_cset) {
888 WARN_ON_ONCE(list_empty(&task->cg_list));
889
890 css_set_skip_task_iters(from_cset, task);
891 list_del_init(&task->cg_list);
892 if (!css_set_populated(from_cset))
893 css_set_update_populated(from_cset, false);
894 } else {
895 WARN_ON_ONCE(!list_empty(&task->cg_list));
896 }
897
898 if (to_cset) {
899
900
901
902
903
904
905 WARN_ON_ONCE(task->flags & PF_EXITING);
906
907 cgroup_move_task(task, to_cset);
908 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
909 &to_cset->tasks);
910 }
911 }
912
913
914
915
916
917
918 #define CSS_SET_HASH_BITS 7
919 static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
920
921 static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
922 {
923 unsigned long key = 0UL;
924 struct cgroup_subsys *ss;
925 int i;
926
927 for_each_subsys(ss, i)
928 key += (unsigned long)css[i];
929 key = (key >> 16) ^ key;
930
931 return key;
932 }
933
934 void put_css_set_locked(struct css_set *cset)
935 {
936 struct cgrp_cset_link *link, *tmp_link;
937 struct cgroup_subsys *ss;
938 int ssid;
939
940 lockdep_assert_held(&css_set_lock);
941
942 if (!refcount_dec_and_test(&cset->refcount))
943 return;
944
945 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
946
947
948 for_each_subsys(ss, ssid) {
949 list_del(&cset->e_cset_node[ssid]);
950 css_put(cset->subsys[ssid]);
951 }
952 hash_del(&cset->hlist);
953 css_set_count--;
954
955 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
956 list_del(&link->cset_link);
957 list_del(&link->cgrp_link);
958 if (cgroup_parent(link->cgrp))
959 cgroup_put(link->cgrp);
960 kfree(link);
961 }
962
963 if (css_set_threaded(cset)) {
964 list_del(&cset->threaded_csets_node);
965 put_css_set_locked(cset->dom_cset);
966 }
967
968 kfree_rcu(cset, rcu_head);
969 }
970
971
972
973
974
975
976
977
978
979
980
981 static bool compare_css_sets(struct css_set *cset,
982 struct css_set *old_cset,
983 struct cgroup *new_cgrp,
984 struct cgroup_subsys_state *template[])
985 {
986 struct cgroup *new_dfl_cgrp;
987 struct list_head *l1, *l2;
988
989
990
991
992
993
994 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
995 return false;
996
997
998
999 if (cgroup_on_dfl(new_cgrp))
1000 new_dfl_cgrp = new_cgrp;
1001 else
1002 new_dfl_cgrp = old_cset->dfl_cgrp;
1003
1004 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1005 return false;
1006
1007
1008
1009
1010
1011
1012
1013 l1 = &cset->cgrp_links;
1014 l2 = &old_cset->cgrp_links;
1015 while (1) {
1016 struct cgrp_cset_link *link1, *link2;
1017 struct cgroup *cgrp1, *cgrp2;
1018
1019 l1 = l1->next;
1020 l2 = l2->next;
1021
1022 if (l1 == &cset->cgrp_links) {
1023 BUG_ON(l2 != &old_cset->cgrp_links);
1024 break;
1025 } else {
1026 BUG_ON(l2 == &old_cset->cgrp_links);
1027 }
1028
1029 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1030 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1031 cgrp1 = link1->cgrp;
1032 cgrp2 = link2->cgrp;
1033
1034 BUG_ON(cgrp1->root != cgrp2->root);
1035
1036
1037
1038
1039
1040
1041
1042
1043 if (cgrp1->root == new_cgrp->root) {
1044 if (cgrp1 != new_cgrp)
1045 return false;
1046 } else {
1047 if (cgrp1 != cgrp2)
1048 return false;
1049 }
1050 }
1051 return true;
1052 }
1053
1054
1055
1056
1057
1058
1059
1060 static struct css_set *find_existing_css_set(struct css_set *old_cset,
1061 struct cgroup *cgrp,
1062 struct cgroup_subsys_state *template[])
1063 {
1064 struct cgroup_root *root = cgrp->root;
1065 struct cgroup_subsys *ss;
1066 struct css_set *cset;
1067 unsigned long key;
1068 int i;
1069
1070
1071
1072
1073
1074
1075 for_each_subsys(ss, i) {
1076 if (root->subsys_mask & (1UL << i)) {
1077
1078
1079
1080
1081 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1082 } else {
1083
1084
1085
1086
1087 template[i] = old_cset->subsys[i];
1088 }
1089 }
1090
1091 key = css_set_hash(template);
1092 hash_for_each_possible(css_set_table, cset, hlist, key) {
1093 if (!compare_css_sets(cset, old_cset, cgrp, template))
1094 continue;
1095
1096
1097 return cset;
1098 }
1099
1100
1101 return NULL;
1102 }
1103
1104 static void free_cgrp_cset_links(struct list_head *links_to_free)
1105 {
1106 struct cgrp_cset_link *link, *tmp_link;
1107
1108 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1109 list_del(&link->cset_link);
1110 kfree(link);
1111 }
1112 }
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122 static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1123 {
1124 struct cgrp_cset_link *link;
1125 int i;
1126
1127 INIT_LIST_HEAD(tmp_links);
1128
1129 for (i = 0; i < count; i++) {
1130 link = kzalloc(sizeof(*link), GFP_KERNEL);
1131 if (!link) {
1132 free_cgrp_cset_links(tmp_links);
1133 return -ENOMEM;
1134 }
1135 list_add(&link->cset_link, tmp_links);
1136 }
1137 return 0;
1138 }
1139
1140
1141
1142
1143
1144
1145
1146 static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1147 struct cgroup *cgrp)
1148 {
1149 struct cgrp_cset_link *link;
1150
1151 BUG_ON(list_empty(tmp_links));
1152
1153 if (cgroup_on_dfl(cgrp))
1154 cset->dfl_cgrp = cgrp;
1155
1156 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1157 link->cset = cset;
1158 link->cgrp = cgrp;
1159
1160
1161
1162
1163
1164 list_move_tail(&link->cset_link, &cgrp->cset_links);
1165 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1166
1167 if (cgroup_parent(cgrp))
1168 cgroup_get_live(cgrp);
1169 }
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179 static struct css_set *find_css_set(struct css_set *old_cset,
1180 struct cgroup *cgrp)
1181 {
1182 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1183 struct css_set *cset;
1184 struct list_head tmp_links;
1185 struct cgrp_cset_link *link;
1186 struct cgroup_subsys *ss;
1187 unsigned long key;
1188 int ssid;
1189
1190 lockdep_assert_held(&cgroup_mutex);
1191
1192
1193
1194 spin_lock_irq(&css_set_lock);
1195 cset = find_existing_css_set(old_cset, cgrp, template);
1196 if (cset)
1197 get_css_set(cset);
1198 spin_unlock_irq(&css_set_lock);
1199
1200 if (cset)
1201 return cset;
1202
1203 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1204 if (!cset)
1205 return NULL;
1206
1207
1208 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1209 kfree(cset);
1210 return NULL;
1211 }
1212
1213 refcount_set(&cset->refcount, 1);
1214 cset->dom_cset = cset;
1215 INIT_LIST_HEAD(&cset->tasks);
1216 INIT_LIST_HEAD(&cset->mg_tasks);
1217 INIT_LIST_HEAD(&cset->dying_tasks);
1218 INIT_LIST_HEAD(&cset->task_iters);
1219 INIT_LIST_HEAD(&cset->threaded_csets);
1220 INIT_HLIST_NODE(&cset->hlist);
1221 INIT_LIST_HEAD(&cset->cgrp_links);
1222 INIT_LIST_HEAD(&cset->mg_preload_node);
1223 INIT_LIST_HEAD(&cset->mg_node);
1224
1225
1226
1227 memcpy(cset->subsys, template, sizeof(cset->subsys));
1228
1229 spin_lock_irq(&css_set_lock);
1230
1231 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1232 struct cgroup *c = link->cgrp;
1233
1234 if (c->root == cgrp->root)
1235 c = cgrp;
1236 link_css_set(&tmp_links, cset, c);
1237 }
1238
1239 BUG_ON(!list_empty(&tmp_links));
1240
1241 css_set_count++;
1242
1243
1244 key = css_set_hash(cset->subsys);
1245 hash_add(css_set_table, &cset->hlist, key);
1246
1247 for_each_subsys(ss, ssid) {
1248 struct cgroup_subsys_state *css = cset->subsys[ssid];
1249
1250 list_add_tail(&cset->e_cset_node[ssid],
1251 &css->cgroup->e_csets[ssid]);
1252 css_get(css);
1253 }
1254
1255 spin_unlock_irq(&css_set_lock);
1256
1257
1258
1259
1260
1261
1262
1263 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1264 struct css_set *dcset;
1265
1266 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1267 if (!dcset) {
1268 put_css_set(cset);
1269 return NULL;
1270 }
1271
1272 spin_lock_irq(&css_set_lock);
1273 cset->dom_cset = dcset;
1274 list_add_tail(&cset->threaded_csets_node,
1275 &dcset->threaded_csets);
1276 spin_unlock_irq(&css_set_lock);
1277 }
1278
1279 return cset;
1280 }
1281
1282 struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1283 {
1284 struct cgroup *root_cgrp = kf_root->kn->priv;
1285
1286 return root_cgrp->root;
1287 }
1288
1289 static int cgroup_init_root_id(struct cgroup_root *root)
1290 {
1291 int id;
1292
1293 lockdep_assert_held(&cgroup_mutex);
1294
1295 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1296 if (id < 0)
1297 return id;
1298
1299 root->hierarchy_id = id;
1300 return 0;
1301 }
1302
1303 static void cgroup_exit_root_id(struct cgroup_root *root)
1304 {
1305 lockdep_assert_held(&cgroup_mutex);
1306
1307 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1308 }
1309
1310 void cgroup_free_root(struct cgroup_root *root)
1311 {
1312 if (root) {
1313 idr_destroy(&root->cgroup_idr);
1314 kfree(root);
1315 }
1316 }
1317
1318 static void cgroup_destroy_root(struct cgroup_root *root)
1319 {
1320 struct cgroup *cgrp = &root->cgrp;
1321 struct cgrp_cset_link *link, *tmp_link;
1322
1323 trace_cgroup_destroy_root(root);
1324
1325 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1326
1327 BUG_ON(atomic_read(&root->nr_cgrps));
1328 BUG_ON(!list_empty(&cgrp->self.children));
1329
1330
1331 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1332
1333
1334
1335
1336
1337 spin_lock_irq(&css_set_lock);
1338
1339 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1340 list_del(&link->cset_link);
1341 list_del(&link->cgrp_link);
1342 kfree(link);
1343 }
1344
1345 spin_unlock_irq(&css_set_lock);
1346
1347 if (!list_empty(&root->root_list)) {
1348 list_del(&root->root_list);
1349 cgroup_root_count--;
1350 }
1351
1352 cgroup_exit_root_id(root);
1353
1354 mutex_unlock(&cgroup_mutex);
1355
1356 kernfs_destroy_root(root->kf_root);
1357 cgroup_free_root(root);
1358 }
1359
1360
1361
1362
1363
1364 static struct cgroup *
1365 current_cgns_cgroup_from_root(struct cgroup_root *root)
1366 {
1367 struct cgroup *res = NULL;
1368 struct css_set *cset;
1369
1370 lockdep_assert_held(&css_set_lock);
1371
1372 rcu_read_lock();
1373
1374 cset = current->nsproxy->cgroup_ns->root_cset;
1375 if (cset == &init_css_set) {
1376 res = &root->cgrp;
1377 } else {
1378 struct cgrp_cset_link *link;
1379
1380 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1381 struct cgroup *c = link->cgrp;
1382
1383 if (c->root == root) {
1384 res = c;
1385 break;
1386 }
1387 }
1388 }
1389 rcu_read_unlock();
1390
1391 BUG_ON(!res);
1392 return res;
1393 }
1394
1395
1396 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1397 struct cgroup_root *root)
1398 {
1399 struct cgroup *res = NULL;
1400
1401 lockdep_assert_held(&cgroup_mutex);
1402 lockdep_assert_held(&css_set_lock);
1403
1404 if (cset == &init_css_set) {
1405 res = &root->cgrp;
1406 } else if (root == &cgrp_dfl_root) {
1407 res = cset->dfl_cgrp;
1408 } else {
1409 struct cgrp_cset_link *link;
1410
1411 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1412 struct cgroup *c = link->cgrp;
1413
1414 if (c->root == root) {
1415 res = c;
1416 break;
1417 }
1418 }
1419 }
1420
1421 BUG_ON(!res);
1422 return res;
1423 }
1424
1425
1426
1427
1428
1429 struct cgroup *task_cgroup_from_root(struct task_struct *task,
1430 struct cgroup_root *root)
1431 {
1432
1433
1434
1435
1436
1437 return cset_cgroup_from_root(task_css_set(task), root);
1438 }
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1467
1468 static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1469 char *buf)
1470 {
1471 struct cgroup_subsys *ss = cft->ss;
1472
1473 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1474 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1475 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1476
1477 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1478 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1479 cft->name);
1480 } else {
1481 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1482 }
1483 return buf;
1484 }
1485
1486
1487
1488
1489
1490
1491
1492 static umode_t cgroup_file_mode(const struct cftype *cft)
1493 {
1494 umode_t mode = 0;
1495
1496 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1497 mode |= S_IRUGO;
1498
1499 if (cft->write_u64 || cft->write_s64 || cft->write) {
1500 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1501 mode |= S_IWUGO;
1502 else
1503 mode |= S_IWUSR;
1504 }
1505
1506 return mode;
1507 }
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521 static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1522 {
1523 u16 cur_ss_mask = subtree_control;
1524 struct cgroup_subsys *ss;
1525 int ssid;
1526
1527 lockdep_assert_held(&cgroup_mutex);
1528
1529 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1530
1531 while (true) {
1532 u16 new_ss_mask = cur_ss_mask;
1533
1534 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1535 new_ss_mask |= ss->depends_on;
1536 } while_each_subsys_mask();
1537
1538
1539
1540
1541
1542
1543 new_ss_mask &= this_ss_mask;
1544
1545 if (new_ss_mask == cur_ss_mask)
1546 break;
1547 cur_ss_mask = new_ss_mask;
1548 }
1549
1550 return cur_ss_mask;
1551 }
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563 void cgroup_kn_unlock(struct kernfs_node *kn)
1564 {
1565 struct cgroup *cgrp;
1566
1567 if (kernfs_type(kn) == KERNFS_DIR)
1568 cgrp = kn->priv;
1569 else
1570 cgrp = kn->parent->priv;
1571
1572 mutex_unlock(&cgroup_mutex);
1573
1574 kernfs_unbreak_active_protection(kn);
1575 cgroup_put(cgrp);
1576 }
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595 struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1596 {
1597 struct cgroup *cgrp;
1598
1599 if (kernfs_type(kn) == KERNFS_DIR)
1600 cgrp = kn->priv;
1601 else
1602 cgrp = kn->parent->priv;
1603
1604
1605
1606
1607
1608
1609
1610 if (!cgroup_tryget(cgrp))
1611 return NULL;
1612 kernfs_break_active_protection(kn);
1613
1614 if (drain_offline)
1615 cgroup_lock_and_drain_offline(cgrp);
1616 else
1617 mutex_lock(&cgroup_mutex);
1618
1619 if (!cgroup_is_dead(cgrp))
1620 return cgrp;
1621
1622 cgroup_kn_unlock(kn);
1623 return NULL;
1624 }
1625
1626 static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1627 {
1628 char name[CGROUP_FILE_NAME_MAX];
1629
1630 lockdep_assert_held(&cgroup_mutex);
1631
1632 if (cft->file_offset) {
1633 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1634 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1635
1636 spin_lock_irq(&cgroup_file_kn_lock);
1637 cfile->kn = NULL;
1638 spin_unlock_irq(&cgroup_file_kn_lock);
1639
1640 del_timer_sync(&cfile->notify_timer);
1641 }
1642
1643 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1644 }
1645
1646
1647
1648
1649
1650 static void css_clear_dir(struct cgroup_subsys_state *css)
1651 {
1652 struct cgroup *cgrp = css->cgroup;
1653 struct cftype *cfts;
1654
1655 if (!(css->flags & CSS_VISIBLE))
1656 return;
1657
1658 css->flags &= ~CSS_VISIBLE;
1659
1660 if (!css->ss) {
1661 if (cgroup_on_dfl(cgrp))
1662 cfts = cgroup_base_files;
1663 else
1664 cfts = cgroup1_base_files;
1665
1666 cgroup_addrm_files(css, cgrp, cfts, false);
1667 } else {
1668 list_for_each_entry(cfts, &css->ss->cfts, node)
1669 cgroup_addrm_files(css, cgrp, cfts, false);
1670 }
1671 }
1672
1673
1674
1675
1676
1677
1678
1679 static int css_populate_dir(struct cgroup_subsys_state *css)
1680 {
1681 struct cgroup *cgrp = css->cgroup;
1682 struct cftype *cfts, *failed_cfts;
1683 int ret;
1684
1685 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1686 return 0;
1687
1688 if (!css->ss) {
1689 if (cgroup_on_dfl(cgrp))
1690 cfts = cgroup_base_files;
1691 else
1692 cfts = cgroup1_base_files;
1693
1694 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1695 if (ret < 0)
1696 return ret;
1697 } else {
1698 list_for_each_entry(cfts, &css->ss->cfts, node) {
1699 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1700 if (ret < 0) {
1701 failed_cfts = cfts;
1702 goto err;
1703 }
1704 }
1705 }
1706
1707 css->flags |= CSS_VISIBLE;
1708
1709 return 0;
1710 err:
1711 list_for_each_entry(cfts, &css->ss->cfts, node) {
1712 if (cfts == failed_cfts)
1713 break;
1714 cgroup_addrm_files(css, cgrp, cfts, false);
1715 }
1716 return ret;
1717 }
1718
1719 int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1720 {
1721 struct cgroup *dcgrp = &dst_root->cgrp;
1722 struct cgroup_subsys *ss;
1723 int ssid, i, ret;
1724
1725 lockdep_assert_held(&cgroup_mutex);
1726
1727 do_each_subsys_mask(ss, ssid, ss_mask) {
1728
1729
1730
1731
1732
1733 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1734 !ss->implicit_on_dfl)
1735 return -EBUSY;
1736
1737
1738 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1739 return -EBUSY;
1740 } while_each_subsys_mask();
1741
1742 do_each_subsys_mask(ss, ssid, ss_mask) {
1743 struct cgroup_root *src_root = ss->root;
1744 struct cgroup *scgrp = &src_root->cgrp;
1745 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1746 struct css_set *cset;
1747
1748 WARN_ON(!css || cgroup_css(dcgrp, ss));
1749
1750
1751 src_root->subsys_mask &= ~(1 << ssid);
1752 WARN_ON(cgroup_apply_control(scgrp));
1753 cgroup_finalize_control(scgrp, 0);
1754
1755
1756 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1757 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1758 ss->root = dst_root;
1759 css->cgroup = dcgrp;
1760
1761 spin_lock_irq(&css_set_lock);
1762 hash_for_each(css_set_table, i, cset, hlist)
1763 list_move_tail(&cset->e_cset_node[ss->id],
1764 &dcgrp->e_csets[ss->id]);
1765 spin_unlock_irq(&css_set_lock);
1766
1767
1768 dst_root->subsys_mask |= 1 << ssid;
1769 if (dst_root == &cgrp_dfl_root) {
1770 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1771 } else {
1772 dcgrp->subtree_control |= 1 << ssid;
1773 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1774 }
1775
1776 ret = cgroup_apply_control(dcgrp);
1777 if (ret)
1778 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1779 ss->name, ret);
1780
1781 if (ss->bind)
1782 ss->bind(css);
1783 } while_each_subsys_mask();
1784
1785 kernfs_activate(dcgrp->kn);
1786 return 0;
1787 }
1788
1789 int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1790 struct kernfs_root *kf_root)
1791 {
1792 int len = 0;
1793 char *buf = NULL;
1794 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1795 struct cgroup *ns_cgroup;
1796
1797 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1798 if (!buf)
1799 return -ENOMEM;
1800
1801 spin_lock_irq(&css_set_lock);
1802 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1803 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1804 spin_unlock_irq(&css_set_lock);
1805
1806 if (len >= PATH_MAX)
1807 len = -ERANGE;
1808 else if (len > 0) {
1809 seq_escape(sf, buf, " \t\n\\");
1810 len = 0;
1811 }
1812 kfree(buf);
1813 return len;
1814 }
1815
1816 enum cgroup2_param {
1817 Opt_nsdelegate,
1818 Opt_memory_localevents,
1819 nr__cgroup2_params
1820 };
1821
1822 static const struct fs_parameter_spec cgroup2_param_specs[] = {
1823 fsparam_flag("nsdelegate", Opt_nsdelegate),
1824 fsparam_flag("memory_localevents", Opt_memory_localevents),
1825 {}
1826 };
1827
1828 static const struct fs_parameter_description cgroup2_fs_parameters = {
1829 .name = "cgroup2",
1830 .specs = cgroup2_param_specs,
1831 };
1832
1833 static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1834 {
1835 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1836 struct fs_parse_result result;
1837 int opt;
1838
1839 opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
1840 if (opt < 0)
1841 return opt;
1842
1843 switch (opt) {
1844 case Opt_nsdelegate:
1845 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1846 return 0;
1847 case Opt_memory_localevents:
1848 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1849 return 0;
1850 }
1851 return -EINVAL;
1852 }
1853
1854 static void apply_cgroup_root_flags(unsigned int root_flags)
1855 {
1856 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1857 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1858 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1859 else
1860 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1861
1862 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1863 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1864 else
1865 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1866 }
1867 }
1868
1869 static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1870 {
1871 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1872 seq_puts(seq, ",nsdelegate");
1873 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1874 seq_puts(seq, ",memory_localevents");
1875 return 0;
1876 }
1877
1878 static int cgroup_reconfigure(struct fs_context *fc)
1879 {
1880 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1881
1882 apply_cgroup_root_flags(ctx->flags);
1883 return 0;
1884 }
1885
1886
1887
1888
1889
1890
1891
1892 static bool use_task_css_set_links __read_mostly;
1893
1894 void cgroup_enable_task_cg_lists(void)
1895 {
1896 struct task_struct *p, *g;
1897
1898
1899
1900
1901
1902
1903
1904
1905 read_lock(&tasklist_lock);
1906 spin_lock_irq(&css_set_lock);
1907
1908 if (use_task_css_set_links)
1909 goto out_unlock;
1910
1911 use_task_css_set_links = true;
1912
1913 do_each_thread(g, p) {
1914 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1915 task_css_set(p) != &init_css_set);
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 spin_lock(&p->sighand->siglock);
1929 if (!(p->flags & PF_EXITING)) {
1930 struct css_set *cset = task_css_set(p);
1931
1932 if (!css_set_populated(cset))
1933 css_set_update_populated(cset, true);
1934 list_add_tail(&p->cg_list, &cset->tasks);
1935 get_css_set(cset);
1936 cset->nr_tasks++;
1937 }
1938 spin_unlock(&p->sighand->siglock);
1939 } while_each_thread(g, p);
1940 out_unlock:
1941 spin_unlock_irq(&css_set_lock);
1942 read_unlock(&tasklist_lock);
1943 }
1944
1945 static void init_cgroup_housekeeping(struct cgroup *cgrp)
1946 {
1947 struct cgroup_subsys *ss;
1948 int ssid;
1949
1950 INIT_LIST_HEAD(&cgrp->self.sibling);
1951 INIT_LIST_HEAD(&cgrp->self.children);
1952 INIT_LIST_HEAD(&cgrp->cset_links);
1953 INIT_LIST_HEAD(&cgrp->pidlists);
1954 mutex_init(&cgrp->pidlist_mutex);
1955 cgrp->self.cgroup = cgrp;
1956 cgrp->self.flags |= CSS_ONLINE;
1957 cgrp->dom_cgrp = cgrp;
1958 cgrp->max_descendants = INT_MAX;
1959 cgrp->max_depth = INT_MAX;
1960 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1961 prev_cputime_init(&cgrp->prev_cputime);
1962
1963 for_each_subsys(ss, ssid)
1964 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1965
1966 init_waitqueue_head(&cgrp->offline_waitq);
1967 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1968 }
1969
1970 void init_cgroup_root(struct cgroup_fs_context *ctx)
1971 {
1972 struct cgroup_root *root = ctx->root;
1973 struct cgroup *cgrp = &root->cgrp;
1974
1975 INIT_LIST_HEAD(&root->root_list);
1976 atomic_set(&root->nr_cgrps, 1);
1977 cgrp->root = root;
1978 init_cgroup_housekeeping(cgrp);
1979 idr_init(&root->cgroup_idr);
1980
1981 root->flags = ctx->flags;
1982 if (ctx->release_agent)
1983 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
1984 if (ctx->name)
1985 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
1986 if (ctx->cpuset_clone_children)
1987 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1988 }
1989
1990 int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1991 {
1992 LIST_HEAD(tmp_links);
1993 struct cgroup *root_cgrp = &root->cgrp;
1994 struct kernfs_syscall_ops *kf_sops;
1995 struct css_set *cset;
1996 int i, ret;
1997
1998 lockdep_assert_held(&cgroup_mutex);
1999
2000 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
2001 if (ret < 0)
2002 goto out;
2003 root_cgrp->id = ret;
2004 root_cgrp->ancestor_ids[0] = ret;
2005
2006 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
2007 0, GFP_KERNEL);
2008 if (ret)
2009 goto out;
2010
2011
2012
2013
2014
2015
2016
2017
2018 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
2019 if (ret)
2020 goto cancel_ref;
2021
2022 ret = cgroup_init_root_id(root);
2023 if (ret)
2024 goto cancel_ref;
2025
2026 kf_sops = root == &cgrp_dfl_root ?
2027 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
2028
2029 root->kf_root = kernfs_create_root(kf_sops,
2030 KERNFS_ROOT_CREATE_DEACTIVATED |
2031 KERNFS_ROOT_SUPPORT_EXPORTOP,
2032 root_cgrp);
2033 if (IS_ERR(root->kf_root)) {
2034 ret = PTR_ERR(root->kf_root);
2035 goto exit_root_id;
2036 }
2037 root_cgrp->kn = root->kf_root->kn;
2038
2039 ret = css_populate_dir(&root_cgrp->self);
2040 if (ret)
2041 goto destroy_root;
2042
2043 ret = rebind_subsystems(root, ss_mask);
2044 if (ret)
2045 goto destroy_root;
2046
2047 ret = cgroup_bpf_inherit(root_cgrp);
2048 WARN_ON_ONCE(ret);
2049
2050 trace_cgroup_setup_root(root);
2051
2052
2053
2054
2055
2056
2057 list_add(&root->root_list, &cgroup_roots);
2058 cgroup_root_count++;
2059
2060
2061
2062
2063
2064 spin_lock_irq(&css_set_lock);
2065 hash_for_each(css_set_table, i, cset, hlist) {
2066 link_css_set(&tmp_links, cset, root_cgrp);
2067 if (css_set_populated(cset))
2068 cgroup_update_populated(root_cgrp, true);
2069 }
2070 spin_unlock_irq(&css_set_lock);
2071
2072 BUG_ON(!list_empty(&root_cgrp->self.children));
2073 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2074
2075 kernfs_activate(root_cgrp->kn);
2076 ret = 0;
2077 goto out;
2078
2079 destroy_root:
2080 kernfs_destroy_root(root->kf_root);
2081 root->kf_root = NULL;
2082 exit_root_id:
2083 cgroup_exit_root_id(root);
2084 cancel_ref:
2085 percpu_ref_exit(&root_cgrp->self.refcnt);
2086 out:
2087 free_cgrp_cset_links(&tmp_links);
2088 return ret;
2089 }
2090
2091 int cgroup_do_get_tree(struct fs_context *fc)
2092 {
2093 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2094 int ret;
2095
2096 ctx->kfc.root = ctx->root->kf_root;
2097 if (fc->fs_type == &cgroup2_fs_type)
2098 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2099 else
2100 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2101 ret = kernfs_get_tree(fc);
2102
2103
2104
2105
2106
2107 if (!ret && ctx->ns != &init_cgroup_ns) {
2108 struct dentry *nsdentry;
2109 struct super_block *sb = fc->root->d_sb;
2110 struct cgroup *cgrp;
2111
2112 mutex_lock(&cgroup_mutex);
2113 spin_lock_irq(&css_set_lock);
2114
2115 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2116
2117 spin_unlock_irq(&css_set_lock);
2118 mutex_unlock(&cgroup_mutex);
2119
2120 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2121 dput(fc->root);
2122 if (IS_ERR(nsdentry)) {
2123 deactivate_locked_super(sb);
2124 ret = PTR_ERR(nsdentry);
2125 nsdentry = NULL;
2126 }
2127 fc->root = nsdentry;
2128 }
2129
2130 if (!ctx->kfc.new_sb_created)
2131 cgroup_put(&ctx->root->cgrp);
2132
2133 return ret;
2134 }
2135
2136
2137
2138
2139 static void cgroup_fs_context_free(struct fs_context *fc)
2140 {
2141 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2142
2143 kfree(ctx->name);
2144 kfree(ctx->release_agent);
2145 put_cgroup_ns(ctx->ns);
2146 kernfs_free_fs_context(fc);
2147 kfree(ctx);
2148 }
2149
2150 static int cgroup_get_tree(struct fs_context *fc)
2151 {
2152 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2153 int ret;
2154
2155 cgrp_dfl_visible = true;
2156 cgroup_get_live(&cgrp_dfl_root.cgrp);
2157 ctx->root = &cgrp_dfl_root;
2158
2159 ret = cgroup_do_get_tree(fc);
2160 if (!ret)
2161 apply_cgroup_root_flags(ctx->flags);
2162 return ret;
2163 }
2164
2165 static const struct fs_context_operations cgroup_fs_context_ops = {
2166 .free = cgroup_fs_context_free,
2167 .parse_param = cgroup2_parse_param,
2168 .get_tree = cgroup_get_tree,
2169 .reconfigure = cgroup_reconfigure,
2170 };
2171
2172 static const struct fs_context_operations cgroup1_fs_context_ops = {
2173 .free = cgroup_fs_context_free,
2174 .parse_param = cgroup1_parse_param,
2175 .get_tree = cgroup1_get_tree,
2176 .reconfigure = cgroup1_reconfigure,
2177 };
2178
2179
2180
2181
2182
2183 static int cgroup_init_fs_context(struct fs_context *fc)
2184 {
2185 struct cgroup_fs_context *ctx;
2186
2187 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2188 if (!ctx)
2189 return -ENOMEM;
2190
2191
2192
2193
2194
2195 if (!use_task_css_set_links)
2196 cgroup_enable_task_cg_lists();
2197
2198 ctx->ns = current->nsproxy->cgroup_ns;
2199 get_cgroup_ns(ctx->ns);
2200 fc->fs_private = &ctx->kfc;
2201 if (fc->fs_type == &cgroup2_fs_type)
2202 fc->ops = &cgroup_fs_context_ops;
2203 else
2204 fc->ops = &cgroup1_fs_context_ops;
2205 put_user_ns(fc->user_ns);
2206 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2207 fc->global = true;
2208 return 0;
2209 }
2210
2211 static void cgroup_kill_sb(struct super_block *sb)
2212 {
2213 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2214 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2215
2216
2217
2218
2219
2220
2221
2222
2223 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2224 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2225 percpu_ref_kill(&root->cgrp.self.refcnt);
2226 cgroup_put(&root->cgrp);
2227 kernfs_kill_sb(sb);
2228 }
2229
2230 struct file_system_type cgroup_fs_type = {
2231 .name = "cgroup",
2232 .init_fs_context = cgroup_init_fs_context,
2233 .parameters = &cgroup1_fs_parameters,
2234 .kill_sb = cgroup_kill_sb,
2235 .fs_flags = FS_USERNS_MOUNT,
2236 };
2237
2238 static struct file_system_type cgroup2_fs_type = {
2239 .name = "cgroup2",
2240 .init_fs_context = cgroup_init_fs_context,
2241 .parameters = &cgroup2_fs_parameters,
2242 .kill_sb = cgroup_kill_sb,
2243 .fs_flags = FS_USERNS_MOUNT,
2244 };
2245
2246 #ifdef CONFIG_CPUSETS
2247 static const struct fs_context_operations cpuset_fs_context_ops = {
2248 .get_tree = cgroup1_get_tree,
2249 .free = cgroup_fs_context_free,
2250 };
2251
2252
2253
2254
2255
2256
2257 static int cpuset_init_fs_context(struct fs_context *fc)
2258 {
2259 char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
2260 struct cgroup_fs_context *ctx;
2261 int err;
2262
2263 err = cgroup_init_fs_context(fc);
2264 if (err) {
2265 kfree(agent);
2266 return err;
2267 }
2268
2269 fc->ops = &cpuset_fs_context_ops;
2270
2271 ctx = cgroup_fc2context(fc);
2272 ctx->subsys_mask = 1 << cpuset_cgrp_id;
2273 ctx->flags |= CGRP_ROOT_NOPREFIX;
2274 ctx->release_agent = agent;
2275
2276 get_filesystem(&cgroup_fs_type);
2277 put_filesystem(fc->fs_type);
2278 fc->fs_type = &cgroup_fs_type;
2279
2280 return 0;
2281 }
2282
2283 static struct file_system_type cpuset_fs_type = {
2284 .name = "cpuset",
2285 .init_fs_context = cpuset_init_fs_context,
2286 .fs_flags = FS_USERNS_MOUNT,
2287 };
2288 #endif
2289
2290 int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2291 struct cgroup_namespace *ns)
2292 {
2293 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2294
2295 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2296 }
2297
2298 int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2299 struct cgroup_namespace *ns)
2300 {
2301 int ret;
2302
2303 mutex_lock(&cgroup_mutex);
2304 spin_lock_irq(&css_set_lock);
2305
2306 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2307
2308 spin_unlock_irq(&css_set_lock);
2309 mutex_unlock(&cgroup_mutex);
2310
2311 return ret;
2312 }
2313 EXPORT_SYMBOL_GPL(cgroup_path_ns);
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328 int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2329 {
2330 struct cgroup_root *root;
2331 struct cgroup *cgrp;
2332 int hierarchy_id = 1;
2333 int ret;
2334
2335 mutex_lock(&cgroup_mutex);
2336 spin_lock_irq(&css_set_lock);
2337
2338 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2339
2340 if (root) {
2341 cgrp = task_cgroup_from_root(task, root);
2342 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2343 } else {
2344
2345 ret = strlcpy(buf, "/", buflen);
2346 }
2347
2348 spin_unlock_irq(&css_set_lock);
2349 mutex_unlock(&cgroup_mutex);
2350 return ret;
2351 }
2352 EXPORT_SYMBOL_GPL(task_cgroup_path);
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364 static void cgroup_migrate_add_task(struct task_struct *task,
2365 struct cgroup_mgctx *mgctx)
2366 {
2367 struct css_set *cset;
2368
2369 lockdep_assert_held(&css_set_lock);
2370
2371
2372 if (task->flags & PF_EXITING)
2373 return;
2374
2375
2376 if (list_empty(&task->cg_list))
2377 return;
2378
2379 cset = task_css_set(task);
2380 if (!cset->mg_src_cgrp)
2381 return;
2382
2383 mgctx->tset.nr_tasks++;
2384
2385 list_move_tail(&task->cg_list, &cset->mg_tasks);
2386 if (list_empty(&cset->mg_node))
2387 list_add_tail(&cset->mg_node,
2388 &mgctx->tset.src_csets);
2389 if (list_empty(&cset->mg_dst_cset->mg_node))
2390 list_add_tail(&cset->mg_dst_cset->mg_node,
2391 &mgctx->tset.dst_csets);
2392 }
2393
2394
2395
2396
2397
2398
2399
2400
2401 struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2402 struct cgroup_subsys_state **dst_cssp)
2403 {
2404 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2405 tset->cur_task = NULL;
2406
2407 return cgroup_taskset_next(tset, dst_cssp);
2408 }
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2419 struct cgroup_subsys_state **dst_cssp)
2420 {
2421 struct css_set *cset = tset->cur_cset;
2422 struct task_struct *task = tset->cur_task;
2423
2424 while (&cset->mg_node != tset->csets) {
2425 if (!task)
2426 task = list_first_entry(&cset->mg_tasks,
2427 struct task_struct, cg_list);
2428 else
2429 task = list_next_entry(task, cg_list);
2430
2431 if (&task->cg_list != &cset->mg_tasks) {
2432 tset->cur_cset = cset;
2433 tset->cur_task = task;
2434
2435
2436
2437
2438
2439
2440
2441 if (cset->mg_dst_cset)
2442 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2443 else
2444 *dst_cssp = cset->subsys[tset->ssid];
2445
2446 return task;
2447 }
2448
2449 cset = list_next_entry(cset, mg_node);
2450 task = NULL;
2451 }
2452
2453 return NULL;
2454 }
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465 static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2466 {
2467 struct cgroup_taskset *tset = &mgctx->tset;
2468 struct cgroup_subsys *ss;
2469 struct task_struct *task, *tmp_task;
2470 struct css_set *cset, *tmp_cset;
2471 int ssid, failed_ssid, ret;
2472
2473
2474 if (tset->nr_tasks) {
2475 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2476 if (ss->can_attach) {
2477 tset->ssid = ssid;
2478 ret = ss->can_attach(tset);
2479 if (ret) {
2480 failed_ssid = ssid;
2481 goto out_cancel_attach;
2482 }
2483 }
2484 } while_each_subsys_mask();
2485 }
2486
2487
2488
2489
2490
2491
2492 spin_lock_irq(&css_set_lock);
2493 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2494 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2495 struct css_set *from_cset = task_css_set(task);
2496 struct css_set *to_cset = cset->mg_dst_cset;
2497
2498 get_css_set(to_cset);
2499 to_cset->nr_tasks++;
2500 css_set_move_task(task, from_cset, to_cset, true);
2501 from_cset->nr_tasks--;
2502
2503
2504
2505
2506 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2507 to_cset->dfl_cgrp);
2508 put_css_set_locked(from_cset);
2509
2510 }
2511 }
2512 spin_unlock_irq(&css_set_lock);
2513
2514
2515
2516
2517
2518
2519 tset->csets = &tset->dst_csets;
2520
2521 if (tset->nr_tasks) {
2522 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2523 if (ss->attach) {
2524 tset->ssid = ssid;
2525 ss->attach(tset);
2526 }
2527 } while_each_subsys_mask();
2528 }
2529
2530 ret = 0;
2531 goto out_release_tset;
2532
2533 out_cancel_attach:
2534 if (tset->nr_tasks) {
2535 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2536 if (ssid == failed_ssid)
2537 break;
2538 if (ss->cancel_attach) {
2539 tset->ssid = ssid;
2540 ss->cancel_attach(tset);
2541 }
2542 } while_each_subsys_mask();
2543 }
2544 out_release_tset:
2545 spin_lock_irq(&css_set_lock);
2546 list_splice_init(&tset->dst_csets, &tset->src_csets);
2547 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2548 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2549 list_del_init(&cset->mg_node);
2550 }
2551 spin_unlock_irq(&css_set_lock);
2552
2553
2554
2555
2556
2557
2558 tset->nr_tasks = 0;
2559 tset->csets = &tset->src_csets;
2560 return ret;
2561 }
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572 int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2573 {
2574
2575 if (!cgroup_on_dfl(dst_cgrp))
2576 return 0;
2577
2578
2579 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2580 return -EOPNOTSUPP;
2581
2582
2583 if (cgroup_is_mixable(dst_cgrp))
2584 return 0;
2585
2586
2587
2588
2589
2590 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2591 return 0;
2592
2593
2594 if (dst_cgrp->subtree_control)
2595 return -EBUSY;
2596
2597 return 0;
2598 }
2599
2600
2601
2602
2603
2604
2605
2606
2607 void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2608 {
2609 LIST_HEAD(preloaded);
2610 struct css_set *cset, *tmp_cset;
2611
2612 lockdep_assert_held(&cgroup_mutex);
2613
2614 spin_lock_irq(&css_set_lock);
2615
2616 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2617 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2618
2619 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2620 cset->mg_src_cgrp = NULL;
2621 cset->mg_dst_cgrp = NULL;
2622 cset->mg_dst_cset = NULL;
2623 list_del_init(&cset->mg_preload_node);
2624 put_css_set_locked(cset);
2625 }
2626
2627 spin_unlock_irq(&css_set_lock);
2628 }
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646 void cgroup_migrate_add_src(struct css_set *src_cset,
2647 struct cgroup *dst_cgrp,
2648 struct cgroup_mgctx *mgctx)
2649 {
2650 struct cgroup *src_cgrp;
2651
2652 lockdep_assert_held(&cgroup_mutex);
2653 lockdep_assert_held(&css_set_lock);
2654
2655
2656
2657
2658
2659
2660 if (src_cset->dead)
2661 return;
2662
2663 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2664
2665 if (!list_empty(&src_cset->mg_preload_node))
2666 return;
2667
2668 WARN_ON(src_cset->mg_src_cgrp);
2669 WARN_ON(src_cset->mg_dst_cgrp);
2670 WARN_ON(!list_empty(&src_cset->mg_tasks));
2671 WARN_ON(!list_empty(&src_cset->mg_node));
2672
2673 src_cset->mg_src_cgrp = src_cgrp;
2674 src_cset->mg_dst_cgrp = dst_cgrp;
2675 get_css_set(src_cset);
2676 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2677 }
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693 int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2694 {
2695 struct css_set *src_cset, *tmp_cset;
2696
2697 lockdep_assert_held(&cgroup_mutex);
2698
2699
2700 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2701 mg_preload_node) {
2702 struct css_set *dst_cset;
2703 struct cgroup_subsys *ss;
2704 int ssid;
2705
2706 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2707 if (!dst_cset)
2708 return -ENOMEM;
2709
2710 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2711
2712
2713
2714
2715
2716
2717 if (src_cset == dst_cset) {
2718 src_cset->mg_src_cgrp = NULL;
2719 src_cset->mg_dst_cgrp = NULL;
2720 list_del_init(&src_cset->mg_preload_node);
2721 put_css_set(src_cset);
2722 put_css_set(dst_cset);
2723 continue;
2724 }
2725
2726 src_cset->mg_dst_cset = dst_cset;
2727
2728 if (list_empty(&dst_cset->mg_preload_node))
2729 list_add_tail(&dst_cset->mg_preload_node,
2730 &mgctx->preloaded_dst_csets);
2731 else
2732 put_css_set(dst_cset);
2733
2734 for_each_subsys(ss, ssid)
2735 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2736 mgctx->ss_mask |= 1 << ssid;
2737 }
2738
2739 return 0;
2740 }
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760 int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2761 struct cgroup_mgctx *mgctx)
2762 {
2763 struct task_struct *task;
2764
2765
2766
2767
2768
2769
2770 spin_lock_irq(&css_set_lock);
2771 rcu_read_lock();
2772 task = leader;
2773 do {
2774 cgroup_migrate_add_task(task, mgctx);
2775 if (!threadgroup)
2776 break;
2777 } while_each_thread(leader, task);
2778 rcu_read_unlock();
2779 spin_unlock_irq(&css_set_lock);
2780
2781 return cgroup_migrate_execute(mgctx);
2782 }
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792 int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2793 bool threadgroup)
2794 {
2795 DEFINE_CGROUP_MGCTX(mgctx);
2796 struct task_struct *task;
2797 int ret;
2798
2799 ret = cgroup_migrate_vet_dst(dst_cgrp);
2800 if (ret)
2801 return ret;
2802
2803
2804 spin_lock_irq(&css_set_lock);
2805 rcu_read_lock();
2806 task = leader;
2807 do {
2808 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2809 if (!threadgroup)
2810 break;
2811 } while_each_thread(leader, task);
2812 rcu_read_unlock();
2813 spin_unlock_irq(&css_set_lock);
2814
2815
2816 ret = cgroup_migrate_prepare_dst(&mgctx);
2817 if (!ret)
2818 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2819
2820 cgroup_migrate_finish(&mgctx);
2821
2822 if (!ret)
2823 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2824
2825 return ret;
2826 }
2827
2828 struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2829 __acquires(&cgroup_threadgroup_rwsem)
2830 {
2831 struct task_struct *tsk;
2832 pid_t pid;
2833
2834 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2835 return ERR_PTR(-EINVAL);
2836
2837 percpu_down_write(&cgroup_threadgroup_rwsem);
2838
2839 rcu_read_lock();
2840 if (pid) {
2841 tsk = find_task_by_vpid(pid);
2842 if (!tsk) {
2843 tsk = ERR_PTR(-ESRCH);
2844 goto out_unlock_threadgroup;
2845 }
2846 } else {
2847 tsk = current;
2848 }
2849
2850 if (threadgroup)
2851 tsk = tsk->group_leader;
2852
2853
2854
2855
2856
2857
2858
2859 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2860 tsk = ERR_PTR(-EINVAL);
2861 goto out_unlock_threadgroup;
2862 }
2863
2864 get_task_struct(tsk);
2865 goto out_unlock_rcu;
2866
2867 out_unlock_threadgroup:
2868 percpu_up_write(&cgroup_threadgroup_rwsem);
2869 out_unlock_rcu:
2870 rcu_read_unlock();
2871 return tsk;
2872 }
2873
2874 void cgroup_procs_write_finish(struct task_struct *task)
2875 __releases(&cgroup_threadgroup_rwsem)
2876 {
2877 struct cgroup_subsys *ss;
2878 int ssid;
2879
2880
2881 put_task_struct(task);
2882
2883 percpu_up_write(&cgroup_threadgroup_rwsem);
2884 for_each_subsys(ss, ssid)
2885 if (ss->post_attach)
2886 ss->post_attach();
2887 }
2888
2889 static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2890 {
2891 struct cgroup_subsys *ss;
2892 bool printed = false;
2893 int ssid;
2894
2895 do_each_subsys_mask(ss, ssid, ss_mask) {
2896 if (printed)
2897 seq_putc(seq, ' ');
2898 seq_puts(seq, ss->name);
2899 printed = true;
2900 } while_each_subsys_mask();
2901 if (printed)
2902 seq_putc(seq, '\n');
2903 }
2904
2905
2906 static int cgroup_controllers_show(struct seq_file *seq, void *v)
2907 {
2908 struct cgroup *cgrp = seq_css(seq)->cgroup;
2909
2910 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2911 return 0;
2912 }
2913
2914
2915 static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2916 {
2917 struct cgroup *cgrp = seq_css(seq)->cgroup;
2918
2919 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2920 return 0;
2921 }
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932 static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2933 {
2934 DEFINE_CGROUP_MGCTX(mgctx);
2935 struct cgroup_subsys_state *d_css;
2936 struct cgroup *dsct;
2937 struct css_set *src_cset;
2938 int ret;
2939
2940 lockdep_assert_held(&cgroup_mutex);
2941
2942 percpu_down_write(&cgroup_threadgroup_rwsem);
2943
2944
2945 spin_lock_irq(&css_set_lock);
2946 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2947 struct cgrp_cset_link *link;
2948
2949 list_for_each_entry(link, &dsct->cset_links, cset_link)
2950 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2951 }
2952 spin_unlock_irq(&css_set_lock);
2953
2954
2955 ret = cgroup_migrate_prepare_dst(&mgctx);
2956 if (ret)
2957 goto out_finish;
2958
2959 spin_lock_irq(&css_set_lock);
2960 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2961 struct task_struct *task, *ntask;
2962
2963
2964 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2965 cgroup_migrate_add_task(task, &mgctx);
2966 }
2967 spin_unlock_irq(&css_set_lock);
2968
2969 ret = cgroup_migrate_execute(&mgctx);
2970 out_finish:
2971 cgroup_migrate_finish(&mgctx);
2972 percpu_up_write(&cgroup_threadgroup_rwsem);
2973 return ret;
2974 }
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984 void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2985 __acquires(&cgroup_mutex)
2986 {
2987 struct cgroup *dsct;
2988 struct cgroup_subsys_state *d_css;
2989 struct cgroup_subsys *ss;
2990 int ssid;
2991
2992 restart:
2993 mutex_lock(&cgroup_mutex);
2994
2995 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2996 for_each_subsys(ss, ssid) {
2997 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2998 DEFINE_WAIT(wait);
2999
3000 if (!css || !percpu_ref_is_dying(&css->refcnt))
3001 continue;
3002
3003 cgroup_get_live(dsct);
3004 prepare_to_wait(&dsct->offline_waitq, &wait,
3005 TASK_UNINTERRUPTIBLE);
3006
3007 mutex_unlock(&cgroup_mutex);
3008 schedule();
3009 finish_wait(&dsct->offline_waitq, &wait);
3010
3011 cgroup_put(dsct);
3012 goto restart;
3013 }
3014 }
3015 }
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025 static void cgroup_save_control(struct cgroup *cgrp)
3026 {
3027 struct cgroup *dsct;
3028 struct cgroup_subsys_state *d_css;
3029
3030 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3031 dsct->old_subtree_control = dsct->subtree_control;
3032 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3033 dsct->old_dom_cgrp = dsct->dom_cgrp;
3034 }
3035 }
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045 static void cgroup_propagate_control(struct cgroup *cgrp)
3046 {
3047 struct cgroup *dsct;
3048 struct cgroup_subsys_state *d_css;
3049
3050 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3051 dsct->subtree_control &= cgroup_control(dsct);
3052 dsct->subtree_ss_mask =
3053 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3054 cgroup_ss_mask(dsct));
3055 }
3056 }
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066 static void cgroup_restore_control(struct cgroup *cgrp)
3067 {
3068 struct cgroup *dsct;
3069 struct cgroup_subsys_state *d_css;
3070
3071 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3072 dsct->subtree_control = dsct->old_subtree_control;
3073 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3074 dsct->dom_cgrp = dsct->old_dom_cgrp;
3075 }
3076 }
3077
3078 static bool css_visible(struct cgroup_subsys_state *css)
3079 {
3080 struct cgroup_subsys *ss = css->ss;
3081 struct cgroup *cgrp = css->cgroup;
3082
3083 if (cgroup_control(cgrp) & (1 << ss->id))
3084 return true;
3085 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3086 return false;
3087 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3088 }
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103 static int cgroup_apply_control_enable(struct cgroup *cgrp)
3104 {
3105 struct cgroup *dsct;
3106 struct cgroup_subsys_state *d_css;
3107 struct cgroup_subsys *ss;
3108 int ssid, ret;
3109
3110 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3111 for_each_subsys(ss, ssid) {
3112 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3113
3114 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3115 continue;
3116
3117 if (!css) {
3118 css = css_create(dsct, ss);
3119 if (IS_ERR(css))
3120 return PTR_ERR(css);
3121 }
3122
3123 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3124
3125 if (css_visible(css)) {
3126 ret = css_populate_dir(css);
3127 if (ret)
3128 return ret;
3129 }
3130 }
3131 }
3132
3133 return 0;
3134 }
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149 static void cgroup_apply_control_disable(struct cgroup *cgrp)
3150 {
3151 struct cgroup *dsct;
3152 struct cgroup_subsys_state *d_css;
3153 struct cgroup_subsys *ss;
3154 int ssid;
3155
3156 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3157 for_each_subsys(ss, ssid) {
3158 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3159
3160 if (!css)
3161 continue;
3162
3163 WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
3164
3165 if (css->parent &&
3166 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3167 kill_css(css);
3168 } else if (!css_visible(css)) {
3169 css_clear_dir(css);
3170 if (ss->css_reset)
3171 ss->css_reset(css);
3172 }
3173 }
3174 }
3175 }
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194 static int cgroup_apply_control(struct cgroup *cgrp)
3195 {
3196 int ret;
3197
3198 cgroup_propagate_control(cgrp);
3199
3200 ret = cgroup_apply_control_enable(cgrp);
3201 if (ret)
3202 return ret;
3203
3204
3205
3206
3207
3208
3209 ret = cgroup_update_dfl_csses(cgrp);
3210 if (ret)
3211 return ret;
3212
3213 return 0;
3214 }
3215
3216
3217
3218
3219
3220
3221
3222
3223 static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3224 {
3225 if (ret) {
3226 cgroup_restore_control(cgrp);
3227 cgroup_propagate_control(cgrp);
3228 }
3229
3230 cgroup_apply_control_disable(cgrp);
3231 }
3232
3233 static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3234 {
3235 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3236
3237
3238 if (!enable)
3239 return 0;
3240
3241
3242 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3243 return -EOPNOTSUPP;
3244
3245
3246 if (cgroup_is_mixable(cgrp))
3247 return 0;
3248
3249 if (domain_enable) {
3250
3251 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3252 return -EOPNOTSUPP;
3253 } else {
3254
3255
3256
3257
3258
3259 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3260 return 0;
3261 }
3262
3263
3264
3265
3266
3267 if (cgroup_has_tasks(cgrp))
3268 return -EBUSY;
3269
3270 return 0;
3271 }
3272
3273
3274 static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3275 char *buf, size_t nbytes,
3276 loff_t off)
3277 {
3278 u16 enable = 0, disable = 0;
3279 struct cgroup *cgrp, *child;
3280 struct cgroup_subsys *ss;
3281 char *tok;
3282 int ssid, ret;
3283
3284
3285
3286
3287
3288 buf = strstrip(buf);
3289 while ((tok = strsep(&buf, " "))) {
3290 if (tok[0] == '\0')
3291 continue;
3292 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3293 if (!cgroup_ssid_enabled(ssid) ||
3294 strcmp(tok + 1, ss->name))
3295 continue;
3296
3297 if (*tok == '+') {
3298 enable |= 1 << ssid;
3299 disable &= ~(1 << ssid);
3300 } else if (*tok == '-') {
3301 disable |= 1 << ssid;
3302 enable &= ~(1 << ssid);
3303 } else {
3304 return -EINVAL;
3305 }
3306 break;
3307 } while_each_subsys_mask();
3308 if (ssid == CGROUP_SUBSYS_COUNT)
3309 return -EINVAL;
3310 }
3311
3312 cgrp = cgroup_kn_lock_live(of->kn, true);
3313 if (!cgrp)
3314 return -ENODEV;
3315
3316 for_each_subsys(ss, ssid) {
3317 if (enable & (1 << ssid)) {
3318 if (cgrp->subtree_control & (1 << ssid)) {
3319 enable &= ~(1 << ssid);
3320 continue;
3321 }
3322
3323 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3324 ret = -ENOENT;
3325 goto out_unlock;
3326 }
3327 } else if (disable & (1 << ssid)) {
3328 if (!(cgrp->subtree_control & (1 << ssid))) {
3329 disable &= ~(1 << ssid);
3330 continue;
3331 }
3332
3333
3334 cgroup_for_each_live_child(child, cgrp) {
3335 if (child->subtree_control & (1 << ssid)) {
3336 ret = -EBUSY;
3337 goto out_unlock;
3338 }
3339 }
3340 }
3341 }
3342
3343 if (!enable && !disable) {
3344 ret = 0;
3345 goto out_unlock;
3346 }
3347
3348 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3349 if (ret)
3350 goto out_unlock;
3351
3352
3353 cgroup_save_control(cgrp);
3354
3355 cgrp->subtree_control |= enable;
3356 cgrp->subtree_control &= ~disable;
3357
3358 ret = cgroup_apply_control(cgrp);
3359 cgroup_finalize_control(cgrp, ret);
3360 if (ret)
3361 goto out_unlock;
3362
3363 kernfs_activate(cgrp->kn);
3364 out_unlock:
3365 cgroup_kn_unlock(of->kn);
3366 return ret ?: nbytes;
3367 }
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378 static int cgroup_enable_threaded(struct cgroup *cgrp)
3379 {
3380 struct cgroup *parent = cgroup_parent(cgrp);
3381 struct cgroup *dom_cgrp = parent->dom_cgrp;
3382 struct cgroup *dsct;
3383 struct cgroup_subsys_state *d_css;
3384 int ret;
3385
3386 lockdep_assert_held(&cgroup_mutex);
3387
3388
3389 if (cgroup_is_threaded(cgrp))
3390 return 0;
3391
3392
3393
3394
3395
3396
3397
3398 if (cgroup_is_populated(cgrp) ||
3399 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3400 return -EOPNOTSUPP;
3401
3402
3403 if (!cgroup_is_valid_domain(dom_cgrp) ||
3404 !cgroup_can_be_thread_root(dom_cgrp))
3405 return -EOPNOTSUPP;
3406
3407
3408
3409
3410
3411 cgroup_save_control(cgrp);
3412
3413 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3414 if (dsct == cgrp || cgroup_is_threaded(dsct))
3415 dsct->dom_cgrp = dom_cgrp;
3416
3417 ret = cgroup_apply_control(cgrp);
3418 if (!ret)
3419 parent->nr_threaded_children++;
3420
3421 cgroup_finalize_control(cgrp, ret);
3422 return ret;
3423 }
3424
3425 static int cgroup_type_show(struct seq_file *seq, void *v)
3426 {
3427 struct cgroup *cgrp = seq_css(seq)->cgroup;
3428
3429 if (cgroup_is_threaded(cgrp))
3430 seq_puts(seq, "threaded\n");
3431 else if (!cgroup_is_valid_domain(cgrp))
3432 seq_puts(seq, "domain invalid\n");
3433 else if (cgroup_is_thread_root(cgrp))
3434 seq_puts(seq, "domain threaded\n");
3435 else
3436 seq_puts(seq, "domain\n");
3437
3438 return 0;
3439 }
3440
3441 static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3442 size_t nbytes, loff_t off)
3443 {
3444 struct cgroup *cgrp;
3445 int ret;
3446
3447
3448 if (strcmp(strstrip(buf), "threaded"))
3449 return -EINVAL;
3450
3451
3452 cgrp = cgroup_kn_lock_live(of->kn, true);
3453 if (!cgrp)
3454 return -ENOENT;
3455
3456
3457 ret = cgroup_enable_threaded(cgrp);
3458
3459 cgroup_kn_unlock(of->kn);
3460 return ret ?: nbytes;
3461 }
3462
3463 static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3464 {
3465 struct cgroup *cgrp = seq_css(seq)->cgroup;
3466 int descendants = READ_ONCE(cgrp->max_descendants);
3467
3468 if (descendants == INT_MAX)
3469 seq_puts(seq, "max\n");
3470 else
3471 seq_printf(seq, "%d\n", descendants);
3472
3473 return 0;
3474 }
3475
3476 static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3477 char *buf, size_t nbytes, loff_t off)
3478 {
3479 struct cgroup *cgrp;
3480 int descendants;
3481 ssize_t ret;
3482
3483 buf = strstrip(buf);
3484 if (!strcmp(buf, "max")) {
3485 descendants = INT_MAX;
3486 } else {
3487 ret = kstrtoint(buf, 0, &descendants);
3488 if (ret)
3489 return ret;
3490 }
3491
3492 if (descendants < 0)
3493 return -ERANGE;
3494
3495 cgrp = cgroup_kn_lock_live(of->kn, false);
3496 if (!cgrp)
3497 return -ENOENT;
3498
3499 cgrp->max_descendants = descendants;
3500
3501 cgroup_kn_unlock(of->kn);
3502
3503 return nbytes;
3504 }
3505
3506 static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3507 {
3508 struct cgroup *cgrp = seq_css(seq)->cgroup;
3509 int depth = READ_ONCE(cgrp->max_depth);
3510
3511 if (depth == INT_MAX)
3512 seq_puts(seq, "max\n");
3513 else
3514 seq_printf(seq, "%d\n", depth);
3515
3516 return 0;
3517 }
3518
3519 static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3520 char *buf, size_t nbytes, loff_t off)
3521 {
3522 struct cgroup *cgrp;
3523 ssize_t ret;
3524 int depth;
3525
3526 buf = strstrip(buf);
3527 if (!strcmp(buf, "max")) {
3528 depth = INT_MAX;
3529 } else {
3530 ret = kstrtoint(buf, 0, &depth);
3531 if (ret)
3532 return ret;
3533 }
3534
3535 if (depth < 0)
3536 return -ERANGE;
3537
3538 cgrp = cgroup_kn_lock_live(of->kn, false);
3539 if (!cgrp)
3540 return -ENOENT;
3541
3542 cgrp->max_depth = depth;
3543
3544 cgroup_kn_unlock(of->kn);
3545
3546 return nbytes;
3547 }
3548
3549 static int cgroup_events_show(struct seq_file *seq, void *v)
3550 {
3551 struct cgroup *cgrp = seq_css(seq)->cgroup;
3552
3553 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3554 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3555
3556 return 0;
3557 }
3558
3559 static int cgroup_stat_show(struct seq_file *seq, void *v)
3560 {
3561 struct cgroup *cgroup = seq_css(seq)->cgroup;
3562
3563 seq_printf(seq, "nr_descendants %d\n",
3564 cgroup->nr_descendants);
3565 seq_printf(seq, "nr_dying_descendants %d\n",
3566 cgroup->nr_dying_descendants);
3567
3568 return 0;
3569 }
3570
3571 static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3572 struct cgroup *cgrp, int ssid)
3573 {
3574 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3575 struct cgroup_subsys_state *css;
3576 int ret;
3577
3578 if (!ss->css_extra_stat_show)
3579 return 0;
3580
3581 css = cgroup_tryget_css(cgrp, ss);
3582 if (!css)
3583 return 0;
3584
3585 ret = ss->css_extra_stat_show(seq, css);
3586 css_put(css);
3587 return ret;
3588 }
3589
3590 static int cpu_stat_show(struct seq_file *seq, void *v)
3591 {
3592 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3593 int ret = 0;
3594
3595 cgroup_base_stat_cputime_show(seq);
3596 #ifdef CONFIG_CGROUP_SCHED
3597 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3598 #endif
3599 return ret;
3600 }
3601
3602 #ifdef CONFIG_PSI
3603 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3604 {
3605 struct cgroup *cgroup = seq_css(seq)->cgroup;
3606 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3607
3608 return psi_show(seq, psi, PSI_IO);
3609 }
3610 static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3611 {
3612 struct cgroup *cgroup = seq_css(seq)->cgroup;
3613 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3614
3615 return psi_show(seq, psi, PSI_MEM);
3616 }
3617 static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3618 {
3619 struct cgroup *cgroup = seq_css(seq)->cgroup;
3620 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3621
3622 return psi_show(seq, psi, PSI_CPU);
3623 }
3624
3625 static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3626 size_t nbytes, enum psi_res res)
3627 {
3628 struct psi_trigger *new;
3629 struct cgroup *cgrp;
3630
3631 cgrp = cgroup_kn_lock_live(of->kn, false);
3632 if (!cgrp)
3633 return -ENODEV;
3634
3635 cgroup_get(cgrp);
3636 cgroup_kn_unlock(of->kn);
3637
3638 new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
3639 if (IS_ERR(new)) {
3640 cgroup_put(cgrp);
3641 return PTR_ERR(new);
3642 }
3643
3644 psi_trigger_replace(&of->priv, new);
3645
3646 cgroup_put(cgrp);
3647
3648 return nbytes;
3649 }
3650
3651 static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3652 char *buf, size_t nbytes,
3653 loff_t off)
3654 {
3655 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3656 }
3657
3658 static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3659 char *buf, size_t nbytes,
3660 loff_t off)
3661 {
3662 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3663 }
3664
3665 static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3666 char *buf, size_t nbytes,
3667 loff_t off)
3668 {
3669 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3670 }
3671
3672 static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3673 poll_table *pt)
3674 {
3675 return psi_trigger_poll(&of->priv, of->file, pt);
3676 }
3677
3678 static void cgroup_pressure_release(struct kernfs_open_file *of)
3679 {
3680 psi_trigger_replace(&of->priv, NULL);
3681 }
3682 #endif
3683
3684 static int cgroup_freeze_show(struct seq_file *seq, void *v)
3685 {
3686 struct cgroup *cgrp = seq_css(seq)->cgroup;
3687
3688 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3689
3690 return 0;
3691 }
3692
3693 static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3694 char *buf, size_t nbytes, loff_t off)
3695 {
3696 struct cgroup *cgrp;
3697 ssize_t ret;
3698 int freeze;
3699
3700 ret = kstrtoint(strstrip(buf), 0, &freeze);
3701 if (ret)
3702 return ret;
3703
3704 if (freeze < 0 || freeze > 1)
3705 return -ERANGE;
3706
3707 cgrp = cgroup_kn_lock_live(of->kn, false);
3708 if (!cgrp)
3709 return -ENOENT;
3710
3711 cgroup_freeze(cgrp, freeze);
3712
3713 cgroup_kn_unlock(of->kn);
3714
3715 return nbytes;
3716 }
3717
3718 static int cgroup_file_open(struct kernfs_open_file *of)
3719 {
3720 struct cftype *cft = of->kn->priv;
3721
3722 if (cft->open)
3723 return cft->open(of);
3724 return 0;
3725 }
3726
3727 static void cgroup_file_release(struct kernfs_open_file *of)
3728 {
3729 struct cftype *cft = of->kn->priv;
3730
3731 if (cft->release)
3732 cft->release(of);
3733 }
3734
3735 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3736 size_t nbytes, loff_t off)
3737 {
3738 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3739 struct cgroup *cgrp = of->kn->parent->priv;
3740 struct cftype *cft = of->kn->priv;
3741 struct cgroup_subsys_state *css;
3742 int ret;
3743
3744
3745
3746
3747
3748
3749
3750 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3751 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3752 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3753 return -EPERM;
3754
3755 if (cft->write)
3756 return cft->write(of, buf, nbytes, off);
3757
3758
3759
3760
3761
3762
3763
3764 rcu_read_lock();
3765 css = cgroup_css(cgrp, cft->ss);
3766 rcu_read_unlock();
3767
3768 if (cft->write_u64) {
3769 unsigned long long v;
3770 ret = kstrtoull(buf, 0, &v);
3771 if (!ret)
3772 ret = cft->write_u64(css, cft, v);
3773 } else if (cft->write_s64) {
3774 long long v;
3775 ret = kstrtoll(buf, 0, &v);
3776 if (!ret)
3777 ret = cft->write_s64(css, cft, v);
3778 } else {
3779 ret = -EINVAL;
3780 }
3781
3782 return ret ?: nbytes;
3783 }
3784
3785 static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3786 {
3787 struct cftype *cft = of->kn->priv;
3788
3789 if (cft->poll)
3790 return cft->poll(of, pt);
3791
3792 return kernfs_generic_poll(of, pt);
3793 }
3794
3795 static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3796 {
3797 return seq_cft(seq)->seq_start(seq, ppos);
3798 }
3799
3800 static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3801 {
3802 return seq_cft(seq)->seq_next(seq, v, ppos);
3803 }
3804
3805 static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3806 {
3807 if (seq_cft(seq)->seq_stop)
3808 seq_cft(seq)->seq_stop(seq, v);
3809 }
3810
3811 static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3812 {
3813 struct cftype *cft = seq_cft(m);
3814 struct cgroup_subsys_state *css = seq_css(m);
3815
3816 if (cft->seq_show)
3817 return cft->seq_show(m, arg);
3818
3819 if (cft->read_u64)
3820 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3821 else if (cft->read_s64)
3822 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3823 else
3824 return -EINVAL;
3825 return 0;
3826 }
3827
3828 static struct kernfs_ops cgroup_kf_single_ops = {
3829 .atomic_write_len = PAGE_SIZE,
3830 .open = cgroup_file_open,
3831 .release = cgroup_file_release,
3832 .write = cgroup_file_write,
3833 .poll = cgroup_file_poll,
3834 .seq_show = cgroup_seqfile_show,
3835 };
3836
3837 static struct kernfs_ops cgroup_kf_ops = {
3838 .atomic_write_len = PAGE_SIZE,
3839 .open = cgroup_file_open,
3840 .release = cgroup_file_release,
3841 .write = cgroup_file_write,
3842 .poll = cgroup_file_poll,
3843 .seq_start = cgroup_seqfile_start,
3844 .seq_next = cgroup_seqfile_next,
3845 .seq_stop = cgroup_seqfile_stop,
3846 .seq_show = cgroup_seqfile_show,
3847 };
3848
3849
3850 static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3851 {
3852 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3853 .ia_uid = current_fsuid(),
3854 .ia_gid = current_fsgid(), };
3855
3856 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3857 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3858 return 0;
3859
3860 return kernfs_setattr(kn, &iattr);
3861 }
3862
3863 static void cgroup_file_notify_timer(struct timer_list *timer)
3864 {
3865 cgroup_file_notify(container_of(timer, struct cgroup_file,
3866 notify_timer));
3867 }
3868
3869 static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3870 struct cftype *cft)
3871 {
3872 char name[CGROUP_FILE_NAME_MAX];
3873 struct kernfs_node *kn;
3874 struct lock_class_key *key = NULL;
3875 int ret;
3876
3877 #ifdef CONFIG_DEBUG_LOCK_ALLOC
3878 key = &cft->lockdep_key;
3879 #endif
3880 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3881 cgroup_file_mode(cft),
3882 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
3883 0, cft->kf_ops, cft,
3884 NULL, key);
3885 if (IS_ERR(kn))
3886 return PTR_ERR(kn);
3887
3888 ret = cgroup_kn_set_ugid(kn);
3889 if (ret) {
3890 kernfs_remove(kn);
3891 return ret;
3892 }
3893
3894 if (cft->file_offset) {
3895 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3896
3897 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3898
3899 spin_lock_irq(&cgroup_file_kn_lock);
3900 cfile->kn = kn;
3901 spin_unlock_irq(&cgroup_file_kn_lock);
3902 }
3903
3904 return 0;
3905 }
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917 static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3918 struct cgroup *cgrp, struct cftype cfts[],
3919 bool is_add)
3920 {
3921 struct cftype *cft, *cft_end = NULL;
3922 int ret = 0;
3923
3924 lockdep_assert_held(&cgroup_mutex);
3925
3926 restart:
3927 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3928
3929 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3930 continue;
3931 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3932 continue;
3933 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3934 continue;
3935 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3936 continue;
3937 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
3938 continue;
3939 if (is_add) {
3940 ret = cgroup_add_file(css, cgrp, cft);
3941 if (ret) {
3942 pr_warn("%s: failed to add %s, err=%d\n",
3943 __func__, cft->name, ret);
3944 cft_end = cft;
3945 is_add = false;
3946 goto restart;
3947 }
3948 } else {
3949 cgroup_rm_file(cgrp, cft);
3950 }
3951 }
3952 return ret;
3953 }
3954
3955 static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3956 {
3957 struct cgroup_subsys *ss = cfts[0].ss;
3958 struct cgroup *root = &ss->root->cgrp;
3959 struct cgroup_subsys_state *css;
3960 int ret = 0;
3961
3962 lockdep_assert_held(&cgroup_mutex);
3963
3964
3965 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3966 struct cgroup *cgrp = css->cgroup;
3967
3968 if (!(css->flags & CSS_VISIBLE))
3969 continue;
3970
3971 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3972 if (ret)
3973 break;
3974 }
3975
3976 if (is_add && !ret)
3977 kernfs_activate(root->kn);
3978 return ret;
3979 }
3980
3981 static void cgroup_exit_cftypes(struct cftype *cfts)
3982 {
3983 struct cftype *cft;
3984
3985 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3986
3987 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3988 kfree(cft->kf_ops);
3989 cft->kf_ops = NULL;
3990 cft->ss = NULL;
3991
3992
3993 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3994 }
3995 }
3996
3997 static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3998 {
3999 struct cftype *cft;
4000
4001 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4002 struct kernfs_ops *kf_ops;
4003
4004 WARN_ON(cft->ss || cft->kf_ops);
4005
4006 if (cft->seq_start)
4007 kf_ops = &cgroup_kf_ops;
4008 else
4009 kf_ops = &cgroup_kf_single_ops;
4010
4011
4012
4013
4014
4015 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
4016 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
4017 if (!kf_ops) {
4018 cgroup_exit_cftypes(cfts);
4019 return -ENOMEM;
4020 }
4021 kf_ops->atomic_write_len = cft->max_write_len;
4022 }
4023
4024 cft->kf_ops = kf_ops;
4025 cft->ss = ss;
4026 }
4027
4028 return 0;
4029 }
4030
4031 static int cgroup_rm_cftypes_locked(struct cftype *cfts)
4032 {
4033 lockdep_assert_held(&cgroup_mutex);
4034
4035 if (!cfts || !cfts[0].ss)
4036 return -ENOENT;
4037
4038 list_del(&cfts->node);
4039 cgroup_apply_cftypes(cfts, false);
4040 cgroup_exit_cftypes(cfts);
4041 return 0;
4042 }
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055 int cgroup_rm_cftypes(struct cftype *cfts)
4056 {
4057 int ret;
4058
4059 mutex_lock(&cgroup_mutex);
4060 ret = cgroup_rm_cftypes_locked(cfts);
4061 mutex_unlock(&cgroup_mutex);
4062 return ret;
4063 }
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079 static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4080 {
4081 int ret;
4082
4083 if (!cgroup_ssid_enabled(ss->id))
4084 return 0;
4085
4086 if (!cfts || cfts[0].name[0] == '\0')
4087 return 0;
4088
4089 ret = cgroup_init_cftypes(ss, cfts);
4090 if (ret)
4091 return ret;
4092
4093 mutex_lock(&cgroup_mutex);
4094
4095 list_add_tail(&cfts->node, &ss->cfts);
4096 ret = cgroup_apply_cftypes(cfts, true);
4097 if (ret)
4098 cgroup_rm_cftypes_locked(cfts);
4099
4100 mutex_unlock(&cgroup_mutex);
4101 return ret;
4102 }
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4113 {
4114 struct cftype *cft;
4115
4116 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4117 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4118 return cgroup_add_cftypes(ss, cfts);
4119 }
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4130 {
4131 struct cftype *cft;
4132
4133 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4134 cft->flags |= __CFTYPE_NOT_ON_DFL;
4135 return cgroup_add_cftypes(ss, cfts);
4136 }
4137
4138
4139
4140
4141
4142
4143
4144 void cgroup_file_notify(struct cgroup_file *cfile)
4145 {
4146 unsigned long flags;
4147
4148 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4149 if (cfile->kn) {
4150 unsigned long last = cfile->notified_at;
4151 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4152
4153 if (time_in_range(jiffies, last, next)) {
4154 timer_reduce(&cfile->notify_timer, next);
4155 } else {
4156 kernfs_notify(cfile->kn);
4157 cfile->notified_at = jiffies;
4158 }
4159 }
4160 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4161 }
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180 struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4181 struct cgroup_subsys_state *parent)
4182 {
4183 struct cgroup_subsys_state *next;
4184
4185 cgroup_assert_mutex_or_rcu_locked();
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207 if (!pos) {
4208 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4209 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4210 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4211 } else {
4212 list_for_each_entry_rcu(next, &parent->children, sibling)
4213 if (next->serial_nr > pos->serial_nr)
4214 break;
4215 }
4216
4217
4218
4219
4220
4221 if (&next->sibling != &parent->children)
4222 return next;
4223 return NULL;
4224 }
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247 struct cgroup_subsys_state *
4248 css_next_descendant_pre(struct cgroup_subsys_state *pos,
4249 struct cgroup_subsys_state *root)
4250 {
4251 struct cgroup_subsys_state *next;
4252
4253 cgroup_assert_mutex_or_rcu_locked();
4254
4255
4256 if (!pos)
4257 return root;
4258
4259
4260 next = css_next_child(NULL, pos);
4261 if (next)
4262 return next;
4263
4264
4265 while (pos != root) {
4266 next = css_next_child(pos, pos->parent);
4267 if (next)
4268 return next;
4269 pos = pos->parent;
4270 }
4271
4272 return NULL;
4273 }
4274 EXPORT_SYMBOL_GPL(css_next_descendant_pre);
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289 struct cgroup_subsys_state *
4290 css_rightmost_descendant(struct cgroup_subsys_state *pos)
4291 {
4292 struct cgroup_subsys_state *last, *tmp;
4293
4294 cgroup_assert_mutex_or_rcu_locked();
4295
4296 do {
4297 last = pos;
4298
4299 pos = NULL;
4300 css_for_each_child(tmp, last)
4301 pos = tmp;
4302 } while (pos);
4303
4304 return last;
4305 }
4306
4307 static struct cgroup_subsys_state *
4308 css_leftmost_descendant(struct cgroup_subsys_state *pos)
4309 {
4310 struct cgroup_subsys_state *last;
4311
4312 do {
4313 last = pos;
4314 pos = css_next_child(NULL, pos);
4315 } while (pos);
4316
4317 return last;
4318 }
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342 struct cgroup_subsys_state *
4343 css_next_descendant_post(struct cgroup_subsys_state *pos,
4344 struct cgroup_subsys_state *root)
4345 {
4346 struct cgroup_subsys_state *next;
4347
4348 cgroup_assert_mutex_or_rcu_locked();
4349
4350
4351 if (!pos)
4352 return css_leftmost_descendant(root);
4353
4354
4355 if (pos == root)
4356 return NULL;
4357
4358
4359 next = css_next_child(pos, pos->parent);
4360 if (next)
4361 return css_leftmost_descendant(next);
4362
4363
4364 return pos->parent;
4365 }
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375 bool css_has_online_children(struct cgroup_subsys_state *css)
4376 {
4377 struct cgroup_subsys_state *child;
4378 bool ret = false;
4379
4380 rcu_read_lock();
4381 css_for_each_child(child, css) {
4382 if (child->flags & CSS_ONLINE) {
4383 ret = true;
4384 break;
4385 }
4386 }
4387 rcu_read_unlock();
4388 return ret;
4389 }
4390
4391 static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4392 {
4393 struct list_head *l;
4394 struct cgrp_cset_link *link;
4395 struct css_set *cset;
4396
4397 lockdep_assert_held(&css_set_lock);
4398
4399
4400 if (it->tcset_pos) {
4401 l = it->tcset_pos->next;
4402
4403 if (l != it->tcset_head) {
4404 it->tcset_pos = l;
4405 return container_of(l, struct css_set,
4406 threaded_csets_node);
4407 }
4408
4409 it->tcset_pos = NULL;
4410 }
4411
4412
4413 l = it->cset_pos;
4414 l = l->next;
4415 if (l == it->cset_head) {
4416 it->cset_pos = NULL;
4417 return NULL;
4418 }
4419
4420 if (it->ss) {
4421 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4422 } else {
4423 link = list_entry(l, struct cgrp_cset_link, cset_link);
4424 cset = link->cset;
4425 }
4426
4427 it->cset_pos = l;
4428
4429
4430 if (it->flags & CSS_TASK_ITER_THREADED) {
4431 if (it->cur_dcset)
4432 put_css_set_locked(it->cur_dcset);
4433 it->cur_dcset = cset;
4434 get_css_set(cset);
4435
4436 it->tcset_head = &cset->threaded_csets;
4437 it->tcset_pos = &cset->threaded_csets;
4438 }
4439
4440 return cset;
4441 }
4442
4443
4444
4445
4446
4447
4448
4449 static void css_task_iter_advance_css_set(struct css_task_iter *it)
4450 {
4451 struct css_set *cset;
4452
4453 lockdep_assert_held(&css_set_lock);
4454
4455
4456 do {
4457 cset = css_task_iter_next_css_set(it);
4458 if (!cset) {
4459 it->task_pos = NULL;
4460 return;
4461 }
4462 } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
4463
4464 if (!list_empty(&cset->tasks)) {
4465 it->task_pos = cset->tasks.next;
4466 it->cur_tasks_head = &cset->tasks;
4467 } else if (!list_empty(&cset->mg_tasks)) {
4468 it->task_pos = cset->mg_tasks.next;
4469 it->cur_tasks_head = &cset->mg_tasks;
4470 } else {
4471 it->task_pos = cset->dying_tasks.next;
4472 it->cur_tasks_head = &cset->dying_tasks;
4473 }
4474
4475 it->tasks_head = &cset->tasks;
4476 it->mg_tasks_head = &cset->mg_tasks;
4477 it->dying_tasks_head = &cset->dying_tasks;
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494 if (it->cur_cset) {
4495 list_del(&it->iters_node);
4496 put_css_set_locked(it->cur_cset);
4497 }
4498 get_css_set(cset);
4499 it->cur_cset = cset;
4500 list_add(&it->iters_node, &cset->task_iters);
4501 }
4502
4503 static void css_task_iter_skip(struct css_task_iter *it,
4504 struct task_struct *task)
4505 {
4506 lockdep_assert_held(&css_set_lock);
4507
4508 if (it->task_pos == &task->cg_list) {
4509 it->task_pos = it->task_pos->next;
4510 it->flags |= CSS_TASK_ITER_SKIPPED;
4511 }
4512 }
4513
4514 static void css_task_iter_advance(struct css_task_iter *it)
4515 {
4516 struct task_struct *task;
4517
4518 lockdep_assert_held(&css_set_lock);
4519 repeat:
4520 if (it->task_pos) {
4521
4522
4523
4524
4525
4526 if (it->flags & CSS_TASK_ITER_SKIPPED)
4527 it->flags &= ~CSS_TASK_ITER_SKIPPED;
4528 else
4529 it->task_pos = it->task_pos->next;
4530
4531 if (it->task_pos == it->tasks_head) {
4532 it->task_pos = it->mg_tasks_head->next;
4533 it->cur_tasks_head = it->mg_tasks_head;
4534 }
4535 if (it->task_pos == it->mg_tasks_head) {
4536 it->task_pos = it->dying_tasks_head->next;
4537 it->cur_tasks_head = it->dying_tasks_head;
4538 }
4539 if (it->task_pos == it->dying_tasks_head)
4540 css_task_iter_advance_css_set(it);
4541 } else {
4542
4543 css_task_iter_advance_css_set(it);
4544 }
4545
4546 if (!it->task_pos)
4547 return;
4548
4549 task = list_entry(it->task_pos, struct task_struct, cg_list);
4550
4551 if (it->flags & CSS_TASK_ITER_PROCS) {
4552
4553 if (!thread_group_leader(task))
4554 goto repeat;
4555
4556
4557 if (it->cur_tasks_head == it->dying_tasks_head &&
4558 !atomic_read(&task->signal->live))
4559 goto repeat;
4560 } else {
4561
4562 if (it->cur_tasks_head == it->dying_tasks_head)
4563 goto repeat;
4564 }
4565 }
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578 void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4579 struct css_task_iter *it)
4580 {
4581
4582 WARN_ON_ONCE(!use_task_css_set_links);
4583
4584 memset(it, 0, sizeof(*it));
4585
4586 spin_lock_irq(&css_set_lock);
4587
4588 it->ss = css->ss;
4589 it->flags = flags;
4590
4591 if (it->ss)
4592 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4593 else
4594 it->cset_pos = &css->cgroup->cset_links;
4595
4596 it->cset_head = it->cset_pos;
4597
4598 css_task_iter_advance(it);
4599
4600 spin_unlock_irq(&css_set_lock);
4601 }
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611 struct task_struct *css_task_iter_next(struct css_task_iter *it)
4612 {
4613 if (it->cur_task) {
4614 put_task_struct(it->cur_task);
4615 it->cur_task = NULL;
4616 }
4617
4618 spin_lock_irq(&css_set_lock);
4619
4620
4621 if (it->flags & CSS_TASK_ITER_SKIPPED)
4622 css_task_iter_advance(it);
4623
4624 if (it->task_pos) {
4625 it->cur_task = list_entry(it->task_pos, struct task_struct,
4626 cg_list);
4627 get_task_struct(it->cur_task);
4628 css_task_iter_advance(it);
4629 }
4630
4631 spin_unlock_irq(&css_set_lock);
4632
4633 return it->cur_task;
4634 }
4635
4636
4637
4638
4639
4640
4641
4642 void css_task_iter_end(struct css_task_iter *it)
4643 {
4644 if (it->cur_cset) {
4645 spin_lock_irq(&css_set_lock);
4646 list_del(&it->iters_node);
4647 put_css_set_locked(it->cur_cset);
4648 spin_unlock_irq(&css_set_lock);
4649 }
4650
4651 if (it->cur_dcset)
4652 put_css_set(it->cur_dcset);
4653
4654 if (it->cur_task)
4655 put_task_struct(it->cur_task);
4656 }
4657
4658 static void cgroup_procs_release(struct kernfs_open_file *of)
4659 {
4660 if (of->priv) {
4661 css_task_iter_end(of->priv);
4662 kfree(of->priv);
4663 }
4664 }
4665
4666 static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4667 {
4668 struct kernfs_open_file *of = s->private;
4669 struct css_task_iter *it = of->priv;
4670
4671 if (pos)
4672 (*pos)++;
4673
4674 return css_task_iter_next(it);
4675 }
4676
4677 static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4678 unsigned int iter_flags)
4679 {
4680 struct kernfs_open_file *of = s->private;
4681 struct cgroup *cgrp = seq_css(s)->cgroup;
4682 struct css_task_iter *it = of->priv;
4683
4684
4685
4686
4687
4688 if (!it) {
4689 if (WARN_ON_ONCE((*pos)))
4690 return ERR_PTR(-EINVAL);
4691
4692 it = kzalloc(sizeof(*it), GFP_KERNEL);
4693 if (!it)
4694 return ERR_PTR(-ENOMEM);
4695 of->priv = it;
4696 css_task_iter_start(&cgrp->self, iter_flags, it);
4697 } else if (!(*pos)) {
4698 css_task_iter_end(it);
4699 css_task_iter_start(&cgrp->self, iter_flags, it);
4700 } else
4701 return it->cur_task;
4702
4703 return cgroup_procs_next(s, NULL, NULL);
4704 }
4705
4706 static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4707 {
4708 struct cgroup *cgrp = seq_css(s)->cgroup;
4709
4710
4711
4712
4713
4714
4715
4716 if (cgroup_is_threaded(cgrp))
4717 return ERR_PTR(-EOPNOTSUPP);
4718
4719 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4720 CSS_TASK_ITER_THREADED);
4721 }
4722
4723 static int cgroup_procs_show(struct seq_file *s, void *v)
4724 {
4725 seq_printf(s, "%d\n", task_pid_vnr(v));
4726 return 0;
4727 }
4728
4729 static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4730 struct cgroup *dst_cgrp,
4731 struct super_block *sb)
4732 {
4733 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4734 struct cgroup *com_cgrp = src_cgrp;
4735 struct inode *inode;
4736 int ret;
4737
4738 lockdep_assert_held(&cgroup_mutex);
4739
4740
4741 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4742 com_cgrp = cgroup_parent(com_cgrp);
4743
4744
4745 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4746 if (!inode)
4747 return -ENOMEM;
4748
4749 ret = inode_permission(inode, MAY_WRITE);
4750 iput(inode);
4751 if (ret)
4752 return ret;
4753
4754
4755
4756
4757
4758 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4759 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4760 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4761 return -ENOENT;
4762
4763 return 0;
4764 }
4765
4766 static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4767 char *buf, size_t nbytes, loff_t off)
4768 {
4769 struct cgroup *src_cgrp, *dst_cgrp;
4770 struct task_struct *task;
4771 ssize_t ret;
4772
4773 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4774 if (!dst_cgrp)
4775 return -ENODEV;
4776
4777 task = cgroup_procs_write_start(buf, true);
4778 ret = PTR_ERR_OR_ZERO(task);
4779 if (ret)
4780 goto out_unlock;
4781
4782
4783 spin_lock_irq(&css_set_lock);
4784 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4785 spin_unlock_irq(&css_set_lock);
4786
4787 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4788 of->file->f_path.dentry->d_sb);
4789 if (ret)
4790 goto out_finish;
4791
4792 ret = cgroup_attach_task(dst_cgrp, task, true);
4793
4794 out_finish:
4795 cgroup_procs_write_finish(task);
4796 out_unlock:
4797 cgroup_kn_unlock(of->kn);
4798
4799 return ret ?: nbytes;
4800 }
4801
4802 static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4803 {
4804 return __cgroup_procs_start(s, pos, 0);
4805 }
4806
4807 static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4808 char *buf, size_t nbytes, loff_t off)
4809 {
4810 struct cgroup *src_cgrp, *dst_cgrp;
4811 struct task_struct *task;
4812 ssize_t ret;
4813
4814 buf = strstrip(buf);
4815
4816 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4817 if (!dst_cgrp)
4818 return -ENODEV;
4819
4820 task = cgroup_procs_write_start(buf, false);
4821 ret = PTR_ERR_OR_ZERO(task);
4822 if (ret)
4823 goto out_unlock;
4824
4825
4826 spin_lock_irq(&css_set_lock);
4827 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4828 spin_unlock_irq(&css_set_lock);
4829
4830
4831 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4832 of->file->f_path.dentry->d_sb);
4833 if (ret)
4834 goto out_finish;
4835
4836
4837 ret = -EOPNOTSUPP;
4838 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4839 goto out_finish;
4840
4841 ret = cgroup_attach_task(dst_cgrp, task, false);
4842
4843 out_finish:
4844 cgroup_procs_write_finish(task);
4845 out_unlock:
4846 cgroup_kn_unlock(of->kn);
4847
4848 return ret ?: nbytes;
4849 }
4850
4851
4852 static struct cftype cgroup_base_files[] = {
4853 {
4854 .name = "cgroup.type",
4855 .flags = CFTYPE_NOT_ON_ROOT,
4856 .seq_show = cgroup_type_show,
4857 .write = cgroup_type_write,
4858 },
4859 {
4860 .name = "cgroup.procs",
4861 .flags = CFTYPE_NS_DELEGATABLE,
4862 .file_offset = offsetof(struct cgroup, procs_file),
4863 .release = cgroup_procs_release,
4864 .seq_start = cgroup_procs_start,
4865 .seq_next = cgroup_procs_next,
4866 .seq_show = cgroup_procs_show,
4867 .write = cgroup_procs_write,
4868 },
4869 {
4870 .name = "cgroup.threads",
4871 .flags = CFTYPE_NS_DELEGATABLE,
4872 .release = cgroup_procs_release,
4873 .seq_start = cgroup_threads_start,
4874 .seq_next = cgroup_procs_next,
4875 .seq_show = cgroup_procs_show,
4876 .write = cgroup_threads_write,
4877 },
4878 {
4879 .name = "cgroup.controllers",
4880 .seq_show = cgroup_controllers_show,
4881 },
4882 {
4883 .name = "cgroup.subtree_control",
4884 .flags = CFTYPE_NS_DELEGATABLE,
4885 .seq_show = cgroup_subtree_control_show,
4886 .write = cgroup_subtree_control_write,
4887 },
4888 {
4889 .name = "cgroup.events",
4890 .flags = CFTYPE_NOT_ON_ROOT,
4891 .file_offset = offsetof(struct cgroup, events_file),
4892 .seq_show = cgroup_events_show,
4893 },
4894 {
4895 .name = "cgroup.max.descendants",
4896 .seq_show = cgroup_max_descendants_show,
4897 .write = cgroup_max_descendants_write,
4898 },
4899 {
4900 .name = "cgroup.max.depth",
4901 .seq_show = cgroup_max_depth_show,
4902 .write = cgroup_max_depth_write,
4903 },
4904 {
4905 .name = "cgroup.stat",
4906 .seq_show = cgroup_stat_show,
4907 },
4908 {
4909 .name = "cgroup.freeze",
4910 .flags = CFTYPE_NOT_ON_ROOT,
4911 .seq_show = cgroup_freeze_show,
4912 .write = cgroup_freeze_write,
4913 },
4914 {
4915 .name = "cpu.stat",
4916 .flags = CFTYPE_NOT_ON_ROOT,
4917 .seq_show = cpu_stat_show,
4918 },
4919 #ifdef CONFIG_PSI
4920 {
4921 .name = "io.pressure",
4922 .seq_show = cgroup_io_pressure_show,
4923 .write = cgroup_io_pressure_write,
4924 .poll = cgroup_pressure_poll,
4925 .release = cgroup_pressure_release,
4926 },
4927 {
4928 .name = "memory.pressure",
4929 .seq_show = cgroup_memory_pressure_show,
4930 .write = cgroup_memory_pressure_write,
4931 .poll = cgroup_pressure_poll,
4932 .release = cgroup_pressure_release,
4933 },
4934 {
4935 .name = "cpu.pressure",
4936 .seq_show = cgroup_cpu_pressure_show,
4937 .write = cgroup_cpu_pressure_write,
4938 .poll = cgroup_pressure_poll,
4939 .release = cgroup_pressure_release,
4940 },
4941 #endif
4942 { }
4943 };
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967 static void css_free_rwork_fn(struct work_struct *work)
4968 {
4969 struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
4970 struct cgroup_subsys_state, destroy_rwork);
4971 struct cgroup_subsys *ss = css->ss;
4972 struct cgroup *cgrp = css->cgroup;
4973
4974 percpu_ref_exit(&css->refcnt);
4975
4976 if (ss) {
4977
4978 struct cgroup_subsys_state *parent = css->parent;
4979 int id = css->id;
4980
4981 ss->css_free(css);
4982 cgroup_idr_remove(&ss->css_idr, id);
4983 cgroup_put(cgrp);
4984
4985 if (parent)
4986 css_put(parent);
4987 } else {
4988
4989 atomic_dec(&cgrp->root->nr_cgrps);
4990 cgroup1_pidlist_destroy_all(cgrp);
4991 cancel_work_sync(&cgrp->release_agent_work);
4992
4993 if (cgroup_parent(cgrp)) {
4994
4995
4996
4997
4998
4999
5000 cgroup_put(cgroup_parent(cgrp));
5001 kernfs_put(cgrp->kn);
5002 psi_cgroup_free(cgrp);
5003 if (cgroup_on_dfl(cgrp))
5004 cgroup_rstat_exit(cgrp);
5005 kfree(cgrp);
5006 } else {
5007
5008
5009
5010
5011
5012 cgroup_destroy_root(cgrp->root);
5013 }
5014 }
5015 }
5016
5017 static void css_release_work_fn(struct work_struct *work)
5018 {
5019 struct cgroup_subsys_state *css =
5020 container_of(work, struct cgroup_subsys_state, destroy_work);
5021 struct cgroup_subsys *ss = css->ss;
5022 struct cgroup *cgrp = css->cgroup;
5023
5024 mutex_lock(&cgroup_mutex);
5025
5026 css->flags |= CSS_RELEASED;
5027 list_del_rcu(&css->sibling);
5028
5029 if (ss) {
5030
5031 if (!list_empty(&css->rstat_css_node)) {
5032 cgroup_rstat_flush(cgrp);
5033 list_del_rcu(&css->rstat_css_node);
5034 }
5035
5036 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
5037 if (ss->css_released)
5038 ss->css_released(css);
5039 } else {
5040 struct cgroup *tcgrp;
5041
5042
5043 TRACE_CGROUP_PATH(release, cgrp);
5044
5045 if (cgroup_on_dfl(cgrp))
5046 cgroup_rstat_flush(cgrp);
5047
5048 spin_lock_irq(&css_set_lock);
5049 for (tcgrp = cgroup_parent(cgrp); tcgrp;
5050 tcgrp = cgroup_parent(tcgrp))
5051 tcgrp->nr_dying_descendants--;
5052 spin_unlock_irq(&css_set_lock);
5053
5054 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
5055 cgrp->id = -1;
5056
5057
5058
5059
5060
5061
5062
5063
5064 if (cgrp->kn)
5065 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
5066 NULL);
5067 }
5068
5069 mutex_unlock(&cgroup_mutex);
5070
5071 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5072 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5073 }
5074
5075 static void css_release(struct percpu_ref *ref)
5076 {
5077 struct cgroup_subsys_state *css =
5078 container_of(ref, struct cgroup_subsys_state, refcnt);
5079
5080 INIT_WORK(&css->destroy_work, css_release_work_fn);
5081 queue_work(cgroup_destroy_wq, &css->destroy_work);
5082 }
5083
5084 static void init_and_link_css(struct cgroup_subsys_state *css,
5085 struct cgroup_subsys *ss, struct cgroup *cgrp)
5086 {
5087 lockdep_assert_held(&cgroup_mutex);
5088
5089 cgroup_get_live(cgrp);
5090
5091 memset(css, 0, sizeof(*css));
5092 css->cgroup = cgrp;
5093 css->ss = ss;
5094 css->id = -1;
5095 INIT_LIST_HEAD(&css->sibling);
5096 INIT_LIST_HEAD(&css->children);
5097 INIT_LIST_HEAD(&css->rstat_css_node);
5098 css->serial_nr = css_serial_nr_next++;
5099 atomic_set(&css->online_cnt, 0);
5100
5101 if (cgroup_parent(cgrp)) {
5102 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
5103 css_get(css->parent);
5104 }
5105
5106 if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
5107 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
5108
5109 BUG_ON(cgroup_css(cgrp, ss));
5110 }
5111
5112
5113 static int online_css(struct cgroup_subsys_state *css)
5114 {
5115 struct cgroup_subsys *ss = css->ss;
5116 int ret = 0;
5117
5118 lockdep_assert_held(&cgroup_mutex);
5119
5120 if (ss->css_online)
5121 ret = ss->css_online(css);
5122 if (!ret) {
5123 css->flags |= CSS_ONLINE;
5124 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
5125
5126 atomic_inc(&css->online_cnt);
5127 if (css->parent)
5128 atomic_inc(&css->parent->online_cnt);
5129 }
5130 return ret;
5131 }
5132
5133
5134 static void offline_css(struct cgroup_subsys_state *css)
5135 {
5136 struct cgroup_subsys *ss = css->ss;
5137
5138 lockdep_assert_held(&cgroup_mutex);
5139
5140 if (!(css->flags & CSS_ONLINE))
5141 return;
5142
5143 if (ss->css_offline)
5144 ss->css_offline(css);
5145
5146 css->flags &= ~CSS_ONLINE;
5147 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
5148
5149 wake_up_all(&css->cgroup->offline_waitq);
5150 }
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161 static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5162 struct cgroup_subsys *ss)
5163 {
5164 struct cgroup *parent = cgroup_parent(cgrp);
5165 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5166 struct cgroup_subsys_state *css;
5167 int err;
5168
5169 lockdep_assert_held(&cgroup_mutex);
5170
5171 css = ss->css_alloc(parent_css);
5172 if (!css)
5173 css = ERR_PTR(-ENOMEM);
5174 if (IS_ERR(css))
5175 return css;
5176
5177 init_and_link_css(css, ss, cgrp);
5178
5179 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5180 if (err)
5181 goto err_free_css;
5182
5183 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5184 if (err < 0)
5185 goto err_free_css;
5186 css->id = err;
5187
5188
5189 list_add_tail_rcu(&css->sibling, &parent_css->children);
5190 cgroup_idr_replace(&ss->css_idr, css, css->id);
5191
5192 err = online_css(css);
5193 if (err)
5194 goto err_list_del;
5195
5196 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
5197 cgroup_parent(parent)) {
5198 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
5199 current->comm, current->pid, ss->name);
5200 if (!strcmp(ss->name, "memory"))
5201 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
5202 ss->warned_broken_hierarchy = true;
5203 }
5204
5205 return css;
5206
5207 err_list_del:
5208 list_del_rcu(&css->sibling);
5209 err_free_css:
5210 list_del_rcu(&css->rstat_css_node);
5211 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5212 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5213 return ERR_PTR(err);
5214 }
5215
5216
5217
5218
5219
5220
5221 static struct cgroup *cgroup_create(struct cgroup *parent)
5222 {
5223 struct cgroup_root *root = parent->root;
5224 struct cgroup *cgrp, *tcgrp;
5225 int level = parent->level + 1;
5226 int ret;
5227
5228
5229 cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
5230 GFP_KERNEL);
5231 if (!cgrp)
5232 return ERR_PTR(-ENOMEM);
5233
5234 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5235 if (ret)
5236 goto out_free_cgrp;
5237
5238 if (cgroup_on_dfl(parent)) {
5239 ret = cgroup_rstat_init(cgrp);
5240 if (ret)
5241 goto out_cancel_ref;
5242 }
5243
5244
5245
5246
5247
5248 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
5249 if (cgrp->id < 0) {
5250 ret = -ENOMEM;
5251 goto out_stat_exit;
5252 }
5253
5254 init_cgroup_housekeeping(cgrp);
5255
5256 cgrp->self.parent = &parent->self;
5257 cgrp->root = root;
5258 cgrp->level = level;
5259
5260 ret = psi_cgroup_alloc(cgrp);
5261 if (ret)
5262 goto out_idr_free;
5263
5264 ret = cgroup_bpf_inherit(cgrp);
5265 if (ret)
5266 goto out_psi_free;
5267
5268
5269
5270
5271
5272 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5273 if (cgrp->freezer.e_freeze) {
5274
5275
5276
5277
5278
5279
5280 set_bit(CGRP_FREEZE, &cgrp->flags);
5281 set_bit(CGRP_FROZEN, &cgrp->flags);
5282 }
5283
5284 spin_lock_irq(&css_set_lock);
5285 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5286 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5287
5288 if (tcgrp != cgrp) {
5289 tcgrp->nr_descendants++;
5290
5291
5292
5293
5294
5295
5296 if (cgrp->freezer.e_freeze)
5297 tcgrp->freezer.nr_frozen_descendants++;
5298 }
5299 }
5300 spin_unlock_irq(&css_set_lock);
5301
5302 if (notify_on_release(parent))
5303 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5304
5305 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5306 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5307
5308 cgrp->self.serial_nr = css_serial_nr_next++;
5309
5310
5311 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5312 atomic_inc(&root->nr_cgrps);
5313 cgroup_get_live(parent);
5314
5315
5316
5317
5318
5319 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
5320
5321
5322
5323
5324
5325 if (!cgroup_on_dfl(cgrp))
5326 cgrp->subtree_control = cgroup_control(cgrp);
5327
5328 cgroup_propagate_control(cgrp);
5329
5330 return cgrp;
5331
5332 out_psi_free:
5333 psi_cgroup_free(cgrp);
5334 out_idr_free:
5335 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
5336 out_stat_exit:
5337 if (cgroup_on_dfl(parent))
5338 cgroup_rstat_exit(cgrp);
5339 out_cancel_ref:
5340 percpu_ref_exit(&cgrp->self.refcnt);
5341 out_free_cgrp:
5342 kfree(cgrp);
5343 return ERR_PTR(ret);
5344 }
5345
5346 static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
5347 {
5348 struct cgroup *cgroup;
5349 int ret = false;
5350 int level = 1;
5351
5352 lockdep_assert_held(&cgroup_mutex);
5353
5354 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
5355 if (cgroup->nr_descendants >= cgroup->max_descendants)
5356 goto fail;
5357
5358 if (level > cgroup->max_depth)
5359 goto fail;
5360
5361 level++;
5362 }
5363
5364 ret = true;
5365 fail:
5366 return ret;
5367 }
5368
5369 int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
5370 {
5371 struct cgroup *parent, *cgrp;
5372 struct kernfs_node *kn;
5373 int ret;
5374
5375
5376 if (strchr(name, '\n'))
5377 return -EINVAL;
5378
5379 parent = cgroup_kn_lock_live(parent_kn, false);
5380 if (!parent)
5381 return -ENODEV;
5382
5383 if (!cgroup_check_hierarchy_limits(parent)) {
5384 ret = -EAGAIN;
5385 goto out_unlock;
5386 }
5387
5388 cgrp = cgroup_create(parent);
5389 if (IS_ERR(cgrp)) {
5390 ret = PTR_ERR(cgrp);
5391 goto out_unlock;
5392 }
5393
5394
5395 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5396 if (IS_ERR(kn)) {
5397 ret = PTR_ERR(kn);
5398 goto out_destroy;
5399 }
5400 cgrp->kn = kn;
5401
5402
5403
5404
5405
5406 kernfs_get(kn);
5407
5408 ret = cgroup_kn_set_ugid(kn);
5409 if (ret)
5410 goto out_destroy;
5411
5412 ret = css_populate_dir(&cgrp->self);
5413 if (ret)
5414 goto out_destroy;
5415
5416 ret = cgroup_apply_control_enable(cgrp);
5417 if (ret)
5418 goto out_destroy;
5419
5420 TRACE_CGROUP_PATH(mkdir, cgrp);
5421
5422
5423 kernfs_activate(kn);
5424
5425 ret = 0;
5426 goto out_unlock;
5427
5428 out_destroy:
5429 cgroup_destroy_locked(cgrp);
5430 out_unlock:
5431 cgroup_kn_unlock(parent_kn);
5432 return ret;
5433 }
5434
5435
5436
5437
5438
5439
5440 static void css_killed_work_fn(struct work_struct *work)
5441 {
5442 struct cgroup_subsys_state *css =
5443 container_of(work, struct cgroup_subsys_state, destroy_work);
5444
5445 mutex_lock(&cgroup_mutex);
5446
5447 do {
5448 offline_css(css);
5449 css_put(css);
5450
5451 css = css->parent;
5452 } while (css && atomic_dec_and_test(&css->online_cnt));
5453
5454 mutex_unlock(&cgroup_mutex);
5455 }
5456
5457
5458 static void css_killed_ref_fn(struct percpu_ref *ref)
5459 {
5460 struct cgroup_subsys_state *css =
5461 container_of(ref, struct cgroup_subsys_state, refcnt);
5462
5463 if (atomic_dec_and_test(&css->online_cnt)) {
5464 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5465 queue_work(cgroup_destroy_wq, &css->destroy_work);
5466 }
5467 }
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478 static void kill_css(struct cgroup_subsys_state *css)
5479 {
5480 lockdep_assert_held(&cgroup_mutex);
5481
5482 if (css->flags & CSS_DYING)
5483 return;
5484
5485 css->flags |= CSS_DYING;
5486
5487
5488
5489
5490
5491 css_clear_dir(css);
5492
5493
5494
5495
5496
5497 css_get(css);
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5510 }
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536 static int cgroup_destroy_locked(struct cgroup *cgrp)
5537 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5538 {
5539 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5540 struct cgroup_subsys_state *css;
5541 struct cgrp_cset_link *link;
5542 int ssid;
5543
5544 lockdep_assert_held(&cgroup_mutex);
5545
5546
5547
5548
5549
5550 if (cgroup_is_populated(cgrp))
5551 return -EBUSY;
5552
5553
5554
5555
5556
5557
5558 if (css_has_online_children(&cgrp->self))
5559 return -EBUSY;
5560
5561
5562
5563
5564
5565
5566
5567 cgrp->self.flags &= ~CSS_ONLINE;
5568
5569 spin_lock_irq(&css_set_lock);
5570 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5571 link->cset->dead = true;
5572 spin_unlock_irq(&css_set_lock);
5573
5574
5575 for_each_css(css, ssid, cgrp)
5576 kill_css(css);
5577
5578
5579 css_clear_dir(&cgrp->self);
5580 kernfs_remove(cgrp->kn);
5581
5582 if (parent && cgroup_is_threaded(cgrp))
5583 parent->nr_threaded_children--;
5584
5585 spin_lock_irq(&css_set_lock);
5586 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5587 tcgrp->nr_descendants--;
5588 tcgrp->nr_dying_descendants++;
5589
5590
5591
5592
5593 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5594 tcgrp->freezer.nr_frozen_descendants--;
5595 }
5596 spin_unlock_irq(&css_set_lock);
5597
5598 cgroup1_check_for_release(parent);
5599
5600 cgroup_bpf_offline(cgrp);
5601
5602
5603 percpu_ref_kill(&cgrp->self.refcnt);
5604
5605 return 0;
5606 };
5607
5608 int cgroup_rmdir(struct kernfs_node *kn)
5609 {
5610 struct cgroup *cgrp;
5611 int ret = 0;
5612
5613 cgrp = cgroup_kn_lock_live(kn, false);
5614 if (!cgrp)
5615 return 0;
5616
5617 ret = cgroup_destroy_locked(cgrp);
5618 if (!ret)
5619 TRACE_CGROUP_PATH(rmdir, cgrp);
5620
5621 cgroup_kn_unlock(kn);
5622 return ret;
5623 }
5624
5625 static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5626 .show_options = cgroup_show_options,
5627 .mkdir = cgroup_mkdir,
5628 .rmdir = cgroup_rmdir,
5629 .show_path = cgroup_show_path,
5630 };
5631
5632 static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5633 {
5634 struct cgroup_subsys_state *css;
5635
5636 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5637
5638 mutex_lock(&cgroup_mutex);
5639
5640 idr_init(&ss->css_idr);
5641 INIT_LIST_HEAD(&ss->cfts);
5642
5643
5644 ss->root = &cgrp_dfl_root;
5645 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5646
5647 BUG_ON(IS_ERR(css));
5648 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5649
5650
5651
5652
5653
5654 css->flags |= CSS_NO_REF;
5655
5656 if (early) {
5657
5658 css->id = 1;
5659 } else {
5660 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5661 BUG_ON(css->id < 0);
5662 }
5663
5664
5665
5666
5667
5668 init_css_set.subsys[ss->id] = css;
5669
5670 have_fork_callback |= (bool)ss->fork << ss->id;
5671 have_exit_callback |= (bool)ss->exit << ss->id;
5672 have_release_callback |= (bool)ss->release << ss->id;
5673 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5674
5675
5676
5677
5678 BUG_ON(!list_empty(&init_task.tasks));
5679
5680 BUG_ON(online_css(css));
5681
5682 mutex_unlock(&cgroup_mutex);
5683 }
5684
5685
5686
5687
5688
5689
5690
5691 int __init cgroup_init_early(void)
5692 {
5693 static struct cgroup_fs_context __initdata ctx;
5694 struct cgroup_subsys *ss;
5695 int i;
5696
5697 ctx.root = &cgrp_dfl_root;
5698 init_cgroup_root(&ctx);
5699 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5700
5701 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5702
5703 for_each_subsys(ss, i) {
5704 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5705 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5706 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5707 ss->id, ss->name);
5708 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5709 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5710
5711 ss->id = i;
5712 ss->name = cgroup_subsys_name[i];
5713 if (!ss->legacy_name)
5714 ss->legacy_name = cgroup_subsys_name[i];
5715
5716 if (ss->early_init)
5717 cgroup_init_subsys(ss, true);
5718 }
5719 return 0;
5720 }
5721
5722 static u16 cgroup_disable_mask __initdata;
5723
5724
5725
5726
5727
5728
5729
5730 int __init cgroup_init(void)
5731 {
5732 struct cgroup_subsys *ss;
5733 int ssid;
5734
5735 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5736 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5737 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5738
5739 cgroup_rstat_boot();
5740
5741
5742
5743
5744
5745 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5746
5747 get_user_ns(init_cgroup_ns.user_ns);
5748
5749 mutex_lock(&cgroup_mutex);
5750
5751
5752
5753
5754
5755 hash_add(css_set_table, &init_css_set.hlist,
5756 css_set_hash(init_css_set.subsys));
5757
5758 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5759
5760 mutex_unlock(&cgroup_mutex);
5761
5762 for_each_subsys(ss, ssid) {
5763 if (ss->early_init) {
5764 struct cgroup_subsys_state *css =
5765 init_css_set.subsys[ss->id];
5766
5767 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5768 GFP_KERNEL);
5769 BUG_ON(css->id < 0);
5770 } else {
5771 cgroup_init_subsys(ss, false);
5772 }
5773
5774 list_add_tail(&init_css_set.e_cset_node[ssid],
5775 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5776
5777
5778
5779
5780
5781
5782 if (cgroup_disable_mask & (1 << ssid)) {
5783 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5784 printk(KERN_INFO "Disabling %s control group subsystem\n",
5785 ss->name);
5786 continue;
5787 }
5788
5789 if (cgroup1_ssid_disabled(ssid))
5790 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5791 ss->name);
5792
5793 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5794
5795
5796 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5797
5798 if (ss->implicit_on_dfl)
5799 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5800 else if (!ss->dfl_cftypes)
5801 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5802
5803 if (ss->threaded)
5804 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5805
5806 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5807 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5808 } else {
5809 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5810 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5811 }
5812
5813 if (ss->bind)
5814 ss->bind(init_css_set.subsys[ssid]);
5815
5816 mutex_lock(&cgroup_mutex);
5817 css_populate_dir(init_css_set.subsys[ssid]);
5818 mutex_unlock(&cgroup_mutex);
5819 }
5820
5821
5822 hash_del(&init_css_set.hlist);
5823 hash_add(css_set_table, &init_css_set.hlist,
5824 css_set_hash(init_css_set.subsys));
5825
5826 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5827 WARN_ON(register_filesystem(&cgroup_fs_type));
5828 WARN_ON(register_filesystem(&cgroup2_fs_type));
5829 WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
5830 #ifdef CONFIG_CPUSETS
5831 WARN_ON(register_filesystem(&cpuset_fs_type));
5832 #endif
5833
5834 return 0;
5835 }
5836
5837 static int __init cgroup_wq_init(void)
5838 {
5839
5840
5841
5842
5843
5844
5845
5846
5847 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5848 BUG_ON(!cgroup_destroy_wq);
5849 return 0;
5850 }
5851 core_initcall(cgroup_wq_init);
5852
5853 void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5854 char *buf, size_t buflen)
5855 {
5856 struct kernfs_node *kn;
5857
5858 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5859 if (!kn)
5860 return;
5861 kernfs_path(kn, buf, buflen);
5862 kernfs_put(kn);
5863 }
5864
5865
5866
5867
5868
5869
5870 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5871 struct pid *pid, struct task_struct *tsk)
5872 {
5873 char *buf;
5874 int retval;
5875 struct cgroup_root *root;
5876
5877 retval = -ENOMEM;
5878 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5879 if (!buf)
5880 goto out;
5881
5882 mutex_lock(&cgroup_mutex);
5883 spin_lock_irq(&css_set_lock);
5884
5885 for_each_root(root) {
5886 struct cgroup_subsys *ss;
5887 struct cgroup *cgrp;
5888 int ssid, count = 0;
5889
5890 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5891 continue;
5892
5893 seq_printf(m, "%d:", root->hierarchy_id);
5894 if (root != &cgrp_dfl_root)
5895 for_each_subsys(ss, ssid)
5896 if (root->subsys_mask & (1 << ssid))
5897 seq_printf(m, "%s%s", count++ ? "," : "",
5898 ss->legacy_name);
5899 if (strlen(root->name))
5900 seq_printf(m, "%sname=%s", count ? "," : "",
5901 root->name);
5902 seq_putc(m, ':');
5903
5904 cgrp = task_cgroup_from_root(tsk, root);
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5916 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5917 current->nsproxy->cgroup_ns);
5918 if (retval >= PATH_MAX)
5919 retval = -ENAMETOOLONG;
5920 if (retval < 0)
5921 goto out_unlock;
5922
5923 seq_puts(m, buf);
5924 } else {
5925 seq_puts(m, "/");
5926 }
5927
5928 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5929 seq_puts(m, " (deleted)\n");
5930 else
5931 seq_putc(m, '\n');
5932 }
5933
5934 retval = 0;
5935 out_unlock:
5936 spin_unlock_irq(&css_set_lock);
5937 mutex_unlock(&cgroup_mutex);
5938 kfree(buf);
5939 out:
5940 return retval;
5941 }
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951 void cgroup_fork(struct task_struct *child)
5952 {
5953 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5954 INIT_LIST_HEAD(&child->cg_list);
5955 }
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965 int cgroup_can_fork(struct task_struct *child)
5966 {
5967 struct cgroup_subsys *ss;
5968 int i, j, ret;
5969
5970 do_each_subsys_mask(ss, i, have_canfork_callback) {
5971 ret = ss->can_fork(child);
5972 if (ret)
5973 goto out_revert;
5974 } while_each_subsys_mask();
5975
5976 return 0;
5977
5978 out_revert:
5979 for_each_subsys(ss, j) {
5980 if (j >= i)
5981 break;
5982 if (ss->cancel_fork)
5983 ss->cancel_fork(child);
5984 }
5985
5986 return ret;
5987 }
5988
5989
5990
5991
5992
5993
5994
5995
5996 void cgroup_cancel_fork(struct task_struct *child)
5997 {
5998 struct cgroup_subsys *ss;
5999 int i;
6000
6001 for_each_subsys(ss, i)
6002 if (ss->cancel_fork)
6003 ss->cancel_fork(child);
6004 }
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016 void cgroup_post_fork(struct task_struct *child)
6017 {
6018 struct cgroup_subsys *ss;
6019 int i;
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042 if (use_task_css_set_links) {
6043 struct css_set *cset;
6044
6045 spin_lock_irq(&css_set_lock);
6046 cset = task_css_set(current);
6047 if (list_empty(&child->cg_list)) {
6048 get_css_set(cset);
6049 cset->nr_tasks++;
6050 css_set_move_task(child, NULL, cset, false);
6051 }
6052
6053
6054
6055
6056
6057
6058 if (unlikely(cgroup_task_freeze(child))) {
6059 spin_lock(&child->sighand->siglock);
6060 WARN_ON_ONCE(child->frozen);
6061 child->jobctl |= JOBCTL_TRAP_FREEZE;
6062 spin_unlock(&child->sighand->siglock);
6063
6064
6065
6066
6067
6068
6069
6070 }
6071
6072 spin_unlock_irq(&css_set_lock);
6073 }
6074
6075
6076
6077
6078
6079
6080 do_each_subsys_mask(ss, i, have_fork_callback) {
6081 ss->fork(child);
6082 } while_each_subsys_mask();
6083 }
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104 void cgroup_exit(struct task_struct *tsk)
6105 {
6106 struct cgroup_subsys *ss;
6107 struct css_set *cset;
6108 int i;
6109
6110
6111
6112
6113
6114 cset = task_css_set(tsk);
6115
6116 if (!list_empty(&tsk->cg_list)) {
6117 spin_lock_irq(&css_set_lock);
6118 css_set_move_task(tsk, cset, NULL, false);
6119 list_add_tail(&tsk->cg_list, &cset->dying_tasks);
6120 cset->nr_tasks--;
6121
6122 WARN_ON_ONCE(cgroup_task_frozen(tsk));
6123 if (unlikely(cgroup_task_freeze(tsk)))
6124 cgroup_update_frozen(task_dfl_cgroup(tsk));
6125
6126 spin_unlock_irq(&css_set_lock);
6127 } else {
6128 get_css_set(cset);
6129 }
6130
6131
6132 do_each_subsys_mask(ss, i, have_exit_callback) {
6133 ss->exit(tsk);
6134 } while_each_subsys_mask();
6135 }
6136
6137 void cgroup_release(struct task_struct *task)
6138 {
6139 struct cgroup_subsys *ss;
6140 int ssid;
6141
6142 do_each_subsys_mask(ss, ssid, have_release_callback) {
6143 ss->release(task);
6144 } while_each_subsys_mask();
6145
6146 if (use_task_css_set_links) {
6147 spin_lock_irq(&css_set_lock);
6148 css_set_skip_task_iters(task_css_set(task), task);
6149 list_del_init(&task->cg_list);
6150 spin_unlock_irq(&css_set_lock);
6151 }
6152 }
6153
6154 void cgroup_free(struct task_struct *task)
6155 {
6156 struct css_set *cset = task_css_set(task);
6157 put_css_set(cset);
6158 }
6159
6160 static int __init cgroup_disable(char *str)
6161 {
6162 struct cgroup_subsys *ss;
6163 char *token;
6164 int i;
6165
6166 while ((token = strsep(&str, ",")) != NULL) {
6167 if (!*token)
6168 continue;
6169
6170 for_each_subsys(ss, i) {
6171 if (strcmp(token, ss->name) &&
6172 strcmp(token, ss->legacy_name))
6173 continue;
6174 cgroup_disable_mask |= 1 << i;
6175 }
6176 }
6177 return 1;
6178 }
6179 __setup("cgroup_disable=", cgroup_disable);
6180
6181 void __init __weak enable_debug_cgroup(void) { }
6182
6183 static int __init enable_cgroup_debug(char *str)
6184 {
6185 cgroup_debug = true;
6186 enable_debug_cgroup();
6187 return 1;
6188 }
6189 __setup("cgroup_debug", enable_cgroup_debug);
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6201 struct cgroup_subsys *ss)
6202 {
6203 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6204 struct file_system_type *s_type = dentry->d_sb->s_type;
6205 struct cgroup_subsys_state *css = NULL;
6206 struct cgroup *cgrp;
6207
6208
6209 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6210 !kn || kernfs_type(kn) != KERNFS_DIR)
6211 return ERR_PTR(-EBADF);
6212
6213 rcu_read_lock();
6214
6215
6216
6217
6218
6219
6220 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6221 if (cgrp)
6222 css = cgroup_css(cgrp, ss);
6223
6224 if (!css || !css_tryget_online(css))
6225 css = ERR_PTR(-ENOENT);
6226
6227 rcu_read_unlock();
6228 return css;
6229 }
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6240 {
6241 WARN_ON_ONCE(!rcu_read_lock_held());
6242 return idr_find(&ss->css_idr, id);
6243 }
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254 struct cgroup *cgroup_get_from_path(const char *path)
6255 {
6256 struct kernfs_node *kn;
6257 struct cgroup *cgrp;
6258
6259 mutex_lock(&cgroup_mutex);
6260
6261 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6262 if (kn) {
6263 if (kernfs_type(kn) == KERNFS_DIR) {
6264 cgrp = kn->priv;
6265 cgroup_get_live(cgrp);
6266 } else {
6267 cgrp = ERR_PTR(-ENOTDIR);
6268 }
6269 kernfs_put(kn);
6270 } else {
6271 cgrp = ERR_PTR(-ENOENT);
6272 }
6273
6274 mutex_unlock(&cgroup_mutex);
6275 return cgrp;
6276 }
6277 EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288 struct cgroup *cgroup_get_from_fd(int fd)
6289 {
6290 struct cgroup_subsys_state *css;
6291 struct cgroup *cgrp;
6292 struct file *f;
6293
6294 f = fget_raw(fd);
6295 if (!f)
6296 return ERR_PTR(-EBADF);
6297
6298 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
6299 fput(f);
6300 if (IS_ERR(css))
6301 return ERR_CAST(css);
6302
6303 cgrp = css->cgroup;
6304 if (!cgroup_on_dfl(cgrp)) {
6305 cgroup_put(cgrp);
6306 return ERR_PTR(-EBADF);
6307 }
6308
6309 return cgrp;
6310 }
6311 EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
6312
6313 static u64 power_of_ten(int power)
6314 {
6315 u64 v = 1;
6316 while (power--)
6317 v *= 10;
6318 return v;
6319 }
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335 int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
6336 {
6337 s64 whole, frac = 0;
6338 int fstart = 0, fend = 0, flen;
6339
6340 if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
6341 return -EINVAL;
6342 if (frac < 0)
6343 return -EINVAL;
6344
6345 flen = fend > fstart ? fend - fstart : 0;
6346 if (flen < dec_shift)
6347 frac *= power_of_ten(dec_shift - flen);
6348 else
6349 frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
6350
6351 *v = whole * power_of_ten(dec_shift) + frac;
6352 return 0;
6353 }
6354
6355
6356
6357
6358
6359 #ifdef CONFIG_SOCK_CGROUP_DATA
6360
6361 #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
6362
6363 DEFINE_SPINLOCK(cgroup_sk_update_lock);
6364 static bool cgroup_sk_alloc_disabled __read_mostly;
6365
6366 void cgroup_sk_alloc_disable(void)
6367 {
6368 if (cgroup_sk_alloc_disabled)
6369 return;
6370 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
6371 cgroup_sk_alloc_disabled = true;
6372 }
6373
6374 #else
6375
6376 #define cgroup_sk_alloc_disabled false
6377
6378 #endif
6379
6380 void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6381 {
6382 if (cgroup_sk_alloc_disabled)
6383 return;
6384
6385
6386 if (skcd->val) {
6387
6388
6389
6390
6391
6392 cgroup_get(sock_cgroup_ptr(skcd));
6393 cgroup_bpf_get(sock_cgroup_ptr(skcd));
6394 return;
6395 }
6396
6397
6398 if (in_interrupt())
6399 return;
6400
6401 rcu_read_lock();
6402
6403 while (true) {
6404 struct css_set *cset;
6405
6406 cset = task_css_set(current);
6407 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6408 skcd->val = (unsigned long)cset->dfl_cgrp;
6409 cgroup_bpf_get(cset->dfl_cgrp);
6410 break;
6411 }
6412 cpu_relax();
6413 }
6414
6415 rcu_read_unlock();
6416 }
6417
6418 void cgroup_sk_free(struct sock_cgroup_data *skcd)
6419 {
6420 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6421
6422 cgroup_bpf_put(cgrp);
6423 cgroup_put(cgrp);
6424 }
6425
6426 #endif
6427
6428 #ifdef CONFIG_CGROUP_BPF
6429 int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
6430 enum bpf_attach_type type, u32 flags)
6431 {
6432 int ret;
6433
6434 mutex_lock(&cgroup_mutex);
6435 ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
6436 mutex_unlock(&cgroup_mutex);
6437 return ret;
6438 }
6439 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
6440 enum bpf_attach_type type, u32 flags)
6441 {
6442 int ret;
6443
6444 mutex_lock(&cgroup_mutex);
6445 ret = __cgroup_bpf_detach(cgrp, prog, type);
6446 mutex_unlock(&cgroup_mutex);
6447 return ret;
6448 }
6449 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
6450 union bpf_attr __user *uattr)
6451 {
6452 int ret;
6453
6454 mutex_lock(&cgroup_mutex);
6455 ret = __cgroup_bpf_query(cgrp, attr, uattr);
6456 mutex_unlock(&cgroup_mutex);
6457 return ret;
6458 }
6459 #endif
6460
6461 #ifdef CONFIG_SYSFS
6462 static ssize_t show_delegatable_files(struct cftype *files, char *buf,
6463 ssize_t size, const char *prefix)
6464 {
6465 struct cftype *cft;
6466 ssize_t ret = 0;
6467
6468 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
6469 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
6470 continue;
6471
6472 if (prefix)
6473 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
6474
6475 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
6476
6477 if (WARN_ON(ret >= size))
6478 break;
6479 }
6480
6481 return ret;
6482 }
6483
6484 static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
6485 char *buf)
6486 {
6487 struct cgroup_subsys *ss;
6488 int ssid;
6489 ssize_t ret = 0;
6490
6491 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
6492 NULL);
6493
6494 for_each_subsys(ss, ssid)
6495 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
6496 PAGE_SIZE - ret,
6497 cgroup_subsys_name[ssid]);
6498
6499 return ret;
6500 }
6501 static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
6502
6503 static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
6504 char *buf)
6505 {
6506 return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
6507 }
6508 static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
6509
6510 static struct attribute *cgroup_sysfs_attrs[] = {
6511 &cgroup_delegate_attr.attr,
6512 &cgroup_features_attr.attr,
6513 NULL,
6514 };
6515
6516 static const struct attribute_group cgroup_sysfs_attr_group = {
6517 .attrs = cgroup_sysfs_attrs,
6518 .name = "cgroup",
6519 };
6520
6521 static int __init cgroup_sysfs_init(void)
6522 {
6523 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
6524 }
6525 subsys_initcall(cgroup_sysfs_init);
6526
6527 #endif