This source file includes following definitions.
- warn_setuid_and_fcaps_mixed
- cap_capable
- cap_settime
- cap_ptrace_access_check
- cap_ptrace_traceme
- cap_capget
- cap_inh_is_capped
- cap_capset
- cap_inode_need_killpriv
- cap_inode_killpriv
- rootid_owns_currentns
- sansflags
- is_v2header
- is_v3header
- cap_inode_getsecurity
- rootid_from_xattr
- validheader
- cap_convert_nscap
- bprm_caps_from_vfs_caps
- get_vfs_caps_from_disk
- get_file_caps
- root_privileged
- __is_real
- __is_eff
- __is_suid
- handle_privileged_root
- __is_setuid
- __is_setgid
- nonroot_raised_pE
- cap_bprm_set_creds
- cap_inode_setxattr
- cap_inode_removexattr
- cap_emulate_setxuid
- cap_task_fix_setuid
- cap_safe_nice
- cap_task_setscheduler
- cap_task_setioprio
- cap_task_setnice
- cap_prctl_drop
- cap_task_prctl
- cap_vm_enough_memory
- cap_mmap_addr
- cap_mmap_file
- capability_init
1
2
3
4
5 #include <linux/capability.h>
6 #include <linux/audit.h>
7 #include <linux/init.h>
8 #include <linux/kernel.h>
9 #include <linux/lsm_hooks.h>
10 #include <linux/file.h>
11 #include <linux/mm.h>
12 #include <linux/mman.h>
13 #include <linux/pagemap.h>
14 #include <linux/swap.h>
15 #include <linux/skbuff.h>
16 #include <linux/netlink.h>
17 #include <linux/ptrace.h>
18 #include <linux/xattr.h>
19 #include <linux/hugetlb.h>
20 #include <linux/mount.h>
21 #include <linux/sched.h>
22 #include <linux/prctl.h>
23 #include <linux/securebits.h>
24 #include <linux/user_namespace.h>
25 #include <linux/binfmts.h>
26 #include <linux/personality.h>
27
28
29
30
31
32
33
34
35
36
37
38
39 static void warn_setuid_and_fcaps_mixed(const char *fname)
40 {
41 static int warned;
42 if (!warned) {
43 printk(KERN_INFO "warning: `%s' has both setuid-root and"
44 " effective capabilities. Therefore not raising all"
45 " capabilities.\n", fname);
46 warned = 1;
47 }
48 }
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
66 int cap, unsigned int opts)
67 {
68 struct user_namespace *ns = targ_ns;
69
70
71
72
73
74 for (;;) {
75
76 if (ns == cred->user_ns)
77 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
78
79
80
81
82
83 if (ns->level <= cred->user_ns->level)
84 return -EPERM;
85
86
87
88
89
90 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
91 return 0;
92
93
94
95
96
97 ns = ns->parent;
98 }
99
100
101 }
102
103
104
105
106
107
108
109
110
111 int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
112 {
113 if (!capable(CAP_SYS_TIME))
114 return -EPERM;
115 return 0;
116 }
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
134 {
135 int ret = 0;
136 const struct cred *cred, *child_cred;
137 const kernel_cap_t *caller_caps;
138
139 rcu_read_lock();
140 cred = current_cred();
141 child_cred = __task_cred(child);
142 if (mode & PTRACE_MODE_FSCREDS)
143 caller_caps = &cred->cap_effective;
144 else
145 caller_caps = &cred->cap_permitted;
146 if (cred->user_ns == child_cred->user_ns &&
147 cap_issubset(child_cred->cap_permitted, *caller_caps))
148 goto out;
149 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
150 goto out;
151 ret = -EPERM;
152 out:
153 rcu_read_unlock();
154 return ret;
155 }
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170 int cap_ptrace_traceme(struct task_struct *parent)
171 {
172 int ret = 0;
173 const struct cred *cred, *child_cred;
174
175 rcu_read_lock();
176 cred = __task_cred(parent);
177 child_cred = current_cred();
178 if (cred->user_ns == child_cred->user_ns &&
179 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
180 goto out;
181 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
182 goto out;
183 ret = -EPERM;
184 out:
185 rcu_read_unlock();
186 return ret;
187 }
188
189
190
191
192
193
194
195
196
197
198
199 int cap_capget(struct task_struct *target, kernel_cap_t *effective,
200 kernel_cap_t *inheritable, kernel_cap_t *permitted)
201 {
202 const struct cred *cred;
203
204
205 rcu_read_lock();
206 cred = __task_cred(target);
207 *effective = cred->cap_effective;
208 *inheritable = cred->cap_inheritable;
209 *permitted = cred->cap_permitted;
210 rcu_read_unlock();
211 return 0;
212 }
213
214
215
216
217
218 static inline int cap_inh_is_capped(void)
219 {
220
221
222
223 if (cap_capable(current_cred(), current_cred()->user_ns,
224 CAP_SETPCAP, CAP_OPT_NONE) == 0)
225 return 0;
226 return 1;
227 }
228
229
230
231
232
233
234
235
236
237
238
239
240
241 int cap_capset(struct cred *new,
242 const struct cred *old,
243 const kernel_cap_t *effective,
244 const kernel_cap_t *inheritable,
245 const kernel_cap_t *permitted)
246 {
247 if (cap_inh_is_capped() &&
248 !cap_issubset(*inheritable,
249 cap_combine(old->cap_inheritable,
250 old->cap_permitted)))
251
252 return -EPERM;
253
254 if (!cap_issubset(*inheritable,
255 cap_combine(old->cap_inheritable,
256 old->cap_bset)))
257
258 return -EPERM;
259
260
261 if (!cap_issubset(*permitted, old->cap_permitted))
262 return -EPERM;
263
264
265 if (!cap_issubset(*effective, *permitted))
266 return -EPERM;
267
268 new->cap_effective = *effective;
269 new->cap_inheritable = *inheritable;
270 new->cap_permitted = *permitted;
271
272
273
274
275
276 new->cap_ambient = cap_intersect(new->cap_ambient,
277 cap_intersect(*permitted,
278 *inheritable));
279 if (WARN_ON(!cap_ambient_invariant_ok(new)))
280 return -EINVAL;
281 return 0;
282 }
283
284
285
286
287
288
289
290
291
292
293
294
295 int cap_inode_need_killpriv(struct dentry *dentry)
296 {
297 struct inode *inode = d_backing_inode(dentry);
298 int error;
299
300 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
301 return error > 0;
302 }
303
304
305
306
307
308
309
310
311
312 int cap_inode_killpriv(struct dentry *dentry)
313 {
314 int error;
315
316 error = __vfs_removexattr(dentry, XATTR_NAME_CAPS);
317 if (error == -EOPNOTSUPP)
318 error = 0;
319 return error;
320 }
321
322 static bool rootid_owns_currentns(kuid_t kroot)
323 {
324 struct user_namespace *ns;
325
326 if (!uid_valid(kroot))
327 return false;
328
329 for (ns = current_user_ns(); ; ns = ns->parent) {
330 if (from_kuid(ns, kroot) == 0)
331 return true;
332 if (ns == &init_user_ns)
333 break;
334 }
335
336 return false;
337 }
338
339 static __u32 sansflags(__u32 m)
340 {
341 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
342 }
343
344 static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
345 {
346 if (size != XATTR_CAPS_SZ_2)
347 return false;
348 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
349 }
350
351 static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
352 {
353 if (size != XATTR_CAPS_SZ_3)
354 return false;
355 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
356 }
357
358
359
360
361
362
363
364
365
366
367
368
369 int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
370 bool alloc)
371 {
372 int size, ret;
373 kuid_t kroot;
374 uid_t root, mappedroot;
375 char *tmpbuf = NULL;
376 struct vfs_cap_data *cap;
377 struct vfs_ns_cap_data *nscap;
378 struct dentry *dentry;
379 struct user_namespace *fs_ns;
380
381 if (strcmp(name, "capability") != 0)
382 return -EOPNOTSUPP;
383
384 dentry = d_find_any_alias(inode);
385 if (!dentry)
386 return -EINVAL;
387
388 size = sizeof(struct vfs_ns_cap_data);
389 ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS,
390 &tmpbuf, size, GFP_NOFS);
391 dput(dentry);
392
393 if (ret < 0)
394 return ret;
395
396 fs_ns = inode->i_sb->s_user_ns;
397 cap = (struct vfs_cap_data *) tmpbuf;
398 if (is_v2header((size_t) ret, cap)) {
399
400
401 if (alloc)
402 *buffer = tmpbuf;
403 else
404 kfree(tmpbuf);
405 return ret;
406 } else if (!is_v3header((size_t) ret, cap)) {
407 kfree(tmpbuf);
408 return -EINVAL;
409 }
410
411 nscap = (struct vfs_ns_cap_data *) tmpbuf;
412 root = le32_to_cpu(nscap->rootid);
413 kroot = make_kuid(fs_ns, root);
414
415
416
417 mappedroot = from_kuid(current_user_ns(), kroot);
418 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
419 if (alloc) {
420 *buffer = tmpbuf;
421 nscap->rootid = cpu_to_le32(mappedroot);
422 } else
423 kfree(tmpbuf);
424 return size;
425 }
426
427 if (!rootid_owns_currentns(kroot)) {
428 kfree(tmpbuf);
429 return -EOPNOTSUPP;
430 }
431
432
433 size = sizeof(struct vfs_cap_data);
434 if (alloc) {
435 *buffer = kmalloc(size, GFP_ATOMIC);
436 if (*buffer) {
437 struct vfs_cap_data *cap = *buffer;
438 __le32 nsmagic, magic;
439 magic = VFS_CAP_REVISION_2;
440 nsmagic = le32_to_cpu(nscap->magic_etc);
441 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
442 magic |= VFS_CAP_FLAGS_EFFECTIVE;
443 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
444 cap->magic_etc = cpu_to_le32(magic);
445 } else {
446 size = -ENOMEM;
447 }
448 }
449 kfree(tmpbuf);
450 return size;
451 }
452
453 static kuid_t rootid_from_xattr(const void *value, size_t size,
454 struct user_namespace *task_ns)
455 {
456 const struct vfs_ns_cap_data *nscap = value;
457 uid_t rootid = 0;
458
459 if (size == XATTR_CAPS_SZ_3)
460 rootid = le32_to_cpu(nscap->rootid);
461
462 return make_kuid(task_ns, rootid);
463 }
464
465 static bool validheader(size_t size, const struct vfs_cap_data *cap)
466 {
467 return is_v2header(size, cap) || is_v3header(size, cap);
468 }
469
470
471
472
473
474
475
476 int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
477 {
478 struct vfs_ns_cap_data *nscap;
479 uid_t nsrootid;
480 const struct vfs_cap_data *cap = *ivalue;
481 __u32 magic, nsmagic;
482 struct inode *inode = d_backing_inode(dentry);
483 struct user_namespace *task_ns = current_user_ns(),
484 *fs_ns = inode->i_sb->s_user_ns;
485 kuid_t rootid;
486 size_t newsize;
487
488 if (!*ivalue)
489 return -EINVAL;
490 if (!validheader(size, cap))
491 return -EINVAL;
492 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
493 return -EPERM;
494 if (size == XATTR_CAPS_SZ_2)
495 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
496
497 return size;
498
499 rootid = rootid_from_xattr(*ivalue, size, task_ns);
500 if (!uid_valid(rootid))
501 return -EINVAL;
502
503 nsrootid = from_kuid(fs_ns, rootid);
504 if (nsrootid == -1)
505 return -EINVAL;
506
507 newsize = sizeof(struct vfs_ns_cap_data);
508 nscap = kmalloc(newsize, GFP_ATOMIC);
509 if (!nscap)
510 return -ENOMEM;
511 nscap->rootid = cpu_to_le32(nsrootid);
512 nsmagic = VFS_CAP_REVISION_3;
513 magic = le32_to_cpu(cap->magic_etc);
514 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
515 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
516 nscap->magic_etc = cpu_to_le32(nsmagic);
517 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
518
519 kvfree(*ivalue);
520 *ivalue = nscap;
521 return newsize;
522 }
523
524
525
526
527
528 static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
529 struct linux_binprm *bprm,
530 bool *effective,
531 bool *has_fcap)
532 {
533 struct cred *new = bprm->cred;
534 unsigned i;
535 int ret = 0;
536
537 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
538 *effective = true;
539
540 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
541 *has_fcap = true;
542
543 CAP_FOR_EACH_U32(i) {
544 __u32 permitted = caps->permitted.cap[i];
545 __u32 inheritable = caps->inheritable.cap[i];
546
547
548
549
550
551 new->cap_permitted.cap[i] =
552 (new->cap_bset.cap[i] & permitted) |
553 (new->cap_inheritable.cap[i] & inheritable);
554
555 if (permitted & ~new->cap_permitted.cap[i])
556
557 ret = -EPERM;
558 }
559
560
561
562
563
564
565 return *effective ? ret : 0;
566 }
567
568
569
570
571 int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
572 {
573 struct inode *inode = d_backing_inode(dentry);
574 __u32 magic_etc;
575 unsigned tocopy, i;
576 int size;
577 struct vfs_ns_cap_data data, *nscaps = &data;
578 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
579 kuid_t rootkuid;
580 struct user_namespace *fs_ns;
581
582 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
583
584 if (!inode)
585 return -ENODATA;
586
587 fs_ns = inode->i_sb->s_user_ns;
588 size = __vfs_getxattr((struct dentry *)dentry, inode,
589 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
590 if (size == -ENODATA || size == -EOPNOTSUPP)
591
592 return -ENODATA;
593
594 if (size < 0)
595 return size;
596
597 if (size < sizeof(magic_etc))
598 return -EINVAL;
599
600 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
601
602 rootkuid = make_kuid(fs_ns, 0);
603 switch (magic_etc & VFS_CAP_REVISION_MASK) {
604 case VFS_CAP_REVISION_1:
605 if (size != XATTR_CAPS_SZ_1)
606 return -EINVAL;
607 tocopy = VFS_CAP_U32_1;
608 break;
609 case VFS_CAP_REVISION_2:
610 if (size != XATTR_CAPS_SZ_2)
611 return -EINVAL;
612 tocopy = VFS_CAP_U32_2;
613 break;
614 case VFS_CAP_REVISION_3:
615 if (size != XATTR_CAPS_SZ_3)
616 return -EINVAL;
617 tocopy = VFS_CAP_U32_3;
618 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
619 break;
620
621 default:
622 return -EINVAL;
623 }
624
625
626
627 if (!rootid_owns_currentns(rootkuid))
628 return -ENODATA;
629
630 CAP_FOR_EACH_U32(i) {
631 if (i >= tocopy)
632 break;
633 cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
634 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
635 }
636
637 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
638 cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
639
640 cpu_caps->rootid = rootkuid;
641
642 return 0;
643 }
644
645
646
647
648
649
650 static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_fcap)
651 {
652 int rc = 0;
653 struct cpu_vfs_cap_data vcaps;
654
655 cap_clear(bprm->cred->cap_permitted);
656
657 if (!file_caps_enabled)
658 return 0;
659
660 if (!mnt_may_suid(bprm->file->f_path.mnt))
661 return 0;
662
663
664
665
666
667
668 if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
669 return 0;
670
671 rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
672 if (rc < 0) {
673 if (rc == -EINVAL)
674 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
675 bprm->filename);
676 else if (rc == -ENODATA)
677 rc = 0;
678 goto out;
679 }
680
681 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
682
683 out:
684 if (rc)
685 cap_clear(bprm->cred->cap_permitted);
686
687 return rc;
688 }
689
690 static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
691
692 static inline bool __is_real(kuid_t uid, struct cred *cred)
693 { return uid_eq(cred->uid, uid); }
694
695 static inline bool __is_eff(kuid_t uid, struct cred *cred)
696 { return uid_eq(cred->euid, uid); }
697
698 static inline bool __is_suid(kuid_t uid, struct cred *cred)
699 { return !__is_real(uid, cred) && __is_eff(uid, cred); }
700
701
702
703
704
705
706
707
708
709
710
711
712
713 static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
714 bool *effective, kuid_t root_uid)
715 {
716 const struct cred *old = current_cred();
717 struct cred *new = bprm->cred;
718
719 if (!root_privileged())
720 return;
721
722
723
724
725
726 if (has_fcap && __is_suid(root_uid, new)) {
727 warn_setuid_and_fcaps_mixed(bprm->filename);
728 return;
729 }
730
731
732
733
734
735 if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
736
737 new->cap_permitted = cap_combine(old->cap_bset,
738 old->cap_inheritable);
739 }
740
741
742
743 if (__is_eff(root_uid, new))
744 *effective = true;
745 }
746
747 #define __cap_gained(field, target, source) \
748 !cap_issubset(target->cap_##field, source->cap_##field)
749 #define __cap_grew(target, source, cred) \
750 !cap_issubset(cred->cap_##target, cred->cap_##source)
751 #define __cap_full(field, cred) \
752 cap_issubset(CAP_FULL_SET, cred->cap_##field)
753
754 static inline bool __is_setuid(struct cred *new, const struct cred *old)
755 { return !uid_eq(new->euid, old->uid); }
756
757 static inline bool __is_setgid(struct cred *new, const struct cred *old)
758 { return !gid_eq(new->egid, old->gid); }
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777 static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
778 kuid_t root, bool has_fcap)
779 {
780 bool ret = false;
781
782 if ((__cap_grew(effective, ambient, new) &&
783 !(__cap_full(effective, new) &&
784 (__is_eff(root, new) || __is_real(root, new)) &&
785 root_privileged())) ||
786 (root_privileged() &&
787 __is_suid(root, new) &&
788 !__cap_full(effective, new)) ||
789 (!__is_setuid(new, old) &&
790 ((has_fcap &&
791 __cap_gained(permitted, new, old)) ||
792 __cap_gained(ambient, new, old))))
793
794 ret = true;
795
796 return ret;
797 }
798
799
800
801
802
803
804
805
806
807 int cap_bprm_set_creds(struct linux_binprm *bprm)
808 {
809 const struct cred *old = current_cred();
810 struct cred *new = bprm->cred;
811 bool effective = false, has_fcap = false, is_setid;
812 int ret;
813 kuid_t root_uid;
814
815 new->cap_ambient = old->cap_ambient;
816 if (WARN_ON(!cap_ambient_invariant_ok(old)))
817 return -EPERM;
818
819 ret = get_file_caps(bprm, &effective, &has_fcap);
820 if (ret < 0)
821 return ret;
822
823 root_uid = make_kuid(new->user_ns, 0);
824
825 handle_privileged_root(bprm, has_fcap, &effective, root_uid);
826
827
828 if (__cap_gained(permitted, new, old))
829 bprm->per_clear |= PER_CLEAR_ON_SETID;
830
831
832
833
834
835
836 is_setid = __is_setuid(new, old) || __is_setgid(new, old);
837
838 if ((is_setid || __cap_gained(permitted, new, old)) &&
839 ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
840 !ptracer_capable(current, new->user_ns))) {
841
842 if (!ns_capable(new->user_ns, CAP_SETUID) ||
843 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
844 new->euid = new->uid;
845 new->egid = new->gid;
846 }
847 new->cap_permitted = cap_intersect(new->cap_permitted,
848 old->cap_permitted);
849 }
850
851 new->suid = new->fsuid = new->euid;
852 new->sgid = new->fsgid = new->egid;
853
854
855 if (has_fcap || is_setid)
856 cap_clear(new->cap_ambient);
857
858
859
860
861
862 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
863
864
865
866
867
868 if (effective)
869 new->cap_effective = new->cap_permitted;
870 else
871 new->cap_effective = new->cap_ambient;
872
873 if (WARN_ON(!cap_ambient_invariant_ok(new)))
874 return -EPERM;
875
876 if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
877 ret = audit_log_bprm_fcaps(bprm, new, old);
878 if (ret < 0)
879 return ret;
880 }
881
882 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
883
884 if (WARN_ON(!cap_ambient_invariant_ok(new)))
885 return -EPERM;
886
887
888 bprm->cap_elevated = 0;
889 if (is_setid ||
890 (!__is_real(root_uid, new) &&
891 (effective ||
892 __cap_grew(permitted, ambient, new))))
893 bprm->cap_elevated = 1;
894
895 return 0;
896 }
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912 int cap_inode_setxattr(struct dentry *dentry, const char *name,
913 const void *value, size_t size, int flags)
914 {
915 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
916
917
918 if (strncmp(name, XATTR_SECURITY_PREFIX,
919 XATTR_SECURITY_PREFIX_LEN) != 0)
920 return 0;
921
922
923
924
925
926 if (strcmp(name, XATTR_NAME_CAPS) == 0)
927 return 0;
928
929 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
930 return -EPERM;
931 return 0;
932 }
933
934
935
936
937
938
939
940
941
942
943
944
945 int cap_inode_removexattr(struct dentry *dentry, const char *name)
946 {
947 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
948
949
950 if (strncmp(name, XATTR_SECURITY_PREFIX,
951 XATTR_SECURITY_PREFIX_LEN) != 0)
952 return 0;
953
954 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
955
956 struct inode *inode = d_backing_inode(dentry);
957 if (!inode)
958 return -EINVAL;
959 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
960 return -EPERM;
961 return 0;
962 }
963
964 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
965 return -EPERM;
966 return 0;
967 }
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998 static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
999 {
1000 kuid_t root_uid = make_kuid(old->user_ns, 0);
1001
1002 if ((uid_eq(old->uid, root_uid) ||
1003 uid_eq(old->euid, root_uid) ||
1004 uid_eq(old->suid, root_uid)) &&
1005 (!uid_eq(new->uid, root_uid) &&
1006 !uid_eq(new->euid, root_uid) &&
1007 !uid_eq(new->suid, root_uid))) {
1008 if (!issecure(SECURE_KEEP_CAPS)) {
1009 cap_clear(new->cap_permitted);
1010 cap_clear(new->cap_effective);
1011 }
1012
1013
1014
1015
1016
1017
1018 cap_clear(new->cap_ambient);
1019 }
1020 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1021 cap_clear(new->cap_effective);
1022 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1023 new->cap_effective = new->cap_permitted;
1024 }
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035 int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1036 {
1037 switch (flags) {
1038 case LSM_SETID_RE:
1039 case LSM_SETID_ID:
1040 case LSM_SETID_RES:
1041
1042
1043 if (!issecure(SECURE_NO_SETUID_FIXUP))
1044 cap_emulate_setxuid(new, old);
1045 break;
1046
1047 case LSM_SETID_FS:
1048
1049
1050
1051
1052
1053
1054 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1055 kuid_t root_uid = make_kuid(old->user_ns, 0);
1056 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1057 new->cap_effective =
1058 cap_drop_fs_set(new->cap_effective);
1059
1060 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1061 new->cap_effective =
1062 cap_raise_fs_set(new->cap_effective,
1063 new->cap_permitted);
1064 }
1065 break;
1066
1067 default:
1068 return -EINVAL;
1069 }
1070
1071 return 0;
1072 }
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084 static int cap_safe_nice(struct task_struct *p)
1085 {
1086 int is_subset, ret = 0;
1087
1088 rcu_read_lock();
1089 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1090 current_cred()->cap_permitted);
1091 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1092 ret = -EPERM;
1093 rcu_read_unlock();
1094
1095 return ret;
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105 int cap_task_setscheduler(struct task_struct *p)
1106 {
1107 return cap_safe_nice(p);
1108 }
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 int cap_task_setioprio(struct task_struct *p, int ioprio)
1119 {
1120 return cap_safe_nice(p);
1121 }
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131 int cap_task_setnice(struct task_struct *p, int nice)
1132 {
1133 return cap_safe_nice(p);
1134 }
1135
1136
1137
1138
1139
1140 static int cap_prctl_drop(unsigned long cap)
1141 {
1142 struct cred *new;
1143
1144 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1145 return -EPERM;
1146 if (!cap_valid(cap))
1147 return -EINVAL;
1148
1149 new = prepare_creds();
1150 if (!new)
1151 return -ENOMEM;
1152 cap_lower(new->cap_bset, cap);
1153 return commit_creds(new);
1154 }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1169 unsigned long arg4, unsigned long arg5)
1170 {
1171 const struct cred *old = current_cred();
1172 struct cred *new;
1173
1174 switch (option) {
1175 case PR_CAPBSET_READ:
1176 if (!cap_valid(arg2))
1177 return -EINVAL;
1178 return !!cap_raised(old->cap_bset, arg2);
1179
1180 case PR_CAPBSET_DROP:
1181 return cap_prctl_drop(arg2);
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202 case PR_SET_SECUREBITS:
1203 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1204 & (old->securebits ^ arg2))
1205 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))
1206 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
1207 || (cap_capable(current_cred(),
1208 current_cred()->user_ns,
1209 CAP_SETPCAP,
1210 CAP_OPT_NONE) != 0)
1211
1212
1213
1214
1215
1216
1217
1218 )
1219
1220 return -EPERM;
1221
1222 new = prepare_creds();
1223 if (!new)
1224 return -ENOMEM;
1225 new->securebits = arg2;
1226 return commit_creds(new);
1227
1228 case PR_GET_SECUREBITS:
1229 return old->securebits;
1230
1231 case PR_GET_KEEPCAPS:
1232 return !!issecure(SECURE_KEEP_CAPS);
1233
1234 case PR_SET_KEEPCAPS:
1235 if (arg2 > 1)
1236 return -EINVAL;
1237 if (issecure(SECURE_KEEP_CAPS_LOCKED))
1238 return -EPERM;
1239
1240 new = prepare_creds();
1241 if (!new)
1242 return -ENOMEM;
1243 if (arg2)
1244 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1245 else
1246 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1247 return commit_creds(new);
1248
1249 case PR_CAP_AMBIENT:
1250 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1251 if (arg3 | arg4 | arg5)
1252 return -EINVAL;
1253
1254 new = prepare_creds();
1255 if (!new)
1256 return -ENOMEM;
1257 cap_clear(new->cap_ambient);
1258 return commit_creds(new);
1259 }
1260
1261 if (((!cap_valid(arg3)) | arg4 | arg5))
1262 return -EINVAL;
1263
1264 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1265 return !!cap_raised(current_cred()->cap_ambient, arg3);
1266 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1267 arg2 != PR_CAP_AMBIENT_LOWER) {
1268 return -EINVAL;
1269 } else {
1270 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1271 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1272 !cap_raised(current_cred()->cap_inheritable,
1273 arg3) ||
1274 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1275 return -EPERM;
1276
1277 new = prepare_creds();
1278 if (!new)
1279 return -ENOMEM;
1280 if (arg2 == PR_CAP_AMBIENT_RAISE)
1281 cap_raise(new->cap_ambient, arg3);
1282 else
1283 cap_lower(new->cap_ambient, arg3);
1284 return commit_creds(new);
1285 }
1286
1287 default:
1288
1289 return -ENOSYS;
1290 }
1291 }
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1302 {
1303 int cap_sys_admin = 0;
1304
1305 if (cap_capable(current_cred(), &init_user_ns,
1306 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1307 cap_sys_admin = 1;
1308
1309 return cap_sys_admin;
1310 }
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321 int cap_mmap_addr(unsigned long addr)
1322 {
1323 int ret = 0;
1324
1325 if (addr < dac_mmap_min_addr) {
1326 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1327 CAP_OPT_NONE);
1328
1329 if (ret == 0)
1330 current->flags |= PF_SUPERPRIV;
1331 }
1332 return ret;
1333 }
1334
1335 int cap_mmap_file(struct file *file, unsigned long reqprot,
1336 unsigned long prot, unsigned long flags)
1337 {
1338 return 0;
1339 }
1340
1341 #ifdef CONFIG_SECURITY
1342
1343 static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1344 LSM_HOOK_INIT(capable, cap_capable),
1345 LSM_HOOK_INIT(settime, cap_settime),
1346 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1347 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1348 LSM_HOOK_INIT(capget, cap_capget),
1349 LSM_HOOK_INIT(capset, cap_capset),
1350 LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds),
1351 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1352 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1353 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1354 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1355 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1356 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1357 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1358 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1359 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1360 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1361 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1362 };
1363
1364 static int __init capability_init(void)
1365 {
1366 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1367 "capability");
1368 return 0;
1369 }
1370
1371 DEFINE_LSM(capability) = {
1372 .name = "capability",
1373 .order = LSM_ORDER_FIRST,
1374 .init = capability_init,
1375 };
1376
1377 #endif