1/*
2 *  This program is free software; you can redistribute it and/or
3 *  modify it under the terms of the GNU General Public License as
4 *  published by the Free Software Foundation, version 2 of the
5 *  License.
6 */
7
8#include <linux/export.h>
9#include <linux/nsproxy.h>
10#include <linux/slab.h>
11#include <linux/user_namespace.h>
12#include <linux/proc_ns.h>
13#include <linux/highuid.h>
14#include <linux/cred.h>
15#include <linux/securebits.h>
16#include <linux/keyctl.h>
17#include <linux/key-type.h>
18#include <keys/user-type.h>
19#include <linux/seq_file.h>
20#include <linux/fs.h>
21#include <linux/uaccess.h>
22#include <linux/ctype.h>
23#include <linux/projid.h>
24#include <linux/fs_struct.h>
25
26static struct kmem_cache *user_ns_cachep __read_mostly;
27static DEFINE_MUTEX(userns_state_mutex);
28
29static bool new_idmap_permitted(const struct file *file,
30				struct user_namespace *ns, int cap_setid,
31				struct uid_gid_map *map);
32
33static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
34{
35	/* Start with the same capabilities as init but useless for doing
36	 * anything as the capabilities are bound to the new user namespace.
37	 */
38	cred->securebits = SECUREBITS_DEFAULT;
39	cred->cap_inheritable = CAP_EMPTY_SET;
40	cred->cap_permitted = CAP_FULL_SET;
41	cred->cap_effective = CAP_FULL_SET;
42	cred->cap_bset = CAP_FULL_SET;
43#ifdef CONFIG_KEYS
44	key_put(cred->request_key_auth);
45	cred->request_key_auth = NULL;
46#endif
47	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
48	cred->user_ns = user_ns;
49}
50
51/*
52 * Create a new user namespace, deriving the creator from the user in the
53 * passed credentials, and replacing that user with the new root user for the
54 * new namespace.
55 *
56 * This is called by copy_creds(), which will finish setting the target task's
57 * credentials.
58 */
59int create_user_ns(struct cred *new)
60{
61	struct user_namespace *ns, *parent_ns = new->user_ns;
62	kuid_t owner = new->euid;
63	kgid_t group = new->egid;
64	int ret;
65
66	if (parent_ns->level > 32)
67		return -EUSERS;
68
69	/*
70	 * Verify that we can not violate the policy of which files
71	 * may be accessed that is specified by the root directory,
72	 * by verifing that the root directory is at the root of the
73	 * mount namespace which allows all files to be accessed.
74	 */
75	if (current_chrooted())
76		return -EPERM;
77
78	/* The creator needs a mapping in the parent user namespace
79	 * or else we won't be able to reasonably tell userspace who
80	 * created a user_namespace.
81	 */
82	if (!kuid_has_mapping(parent_ns, owner) ||
83	    !kgid_has_mapping(parent_ns, group))
84		return -EPERM;
85
86	ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
87	if (!ns)
88		return -ENOMEM;
89
90	ret = ns_alloc_inum(&ns->ns);
91	if (ret) {
92		kmem_cache_free(user_ns_cachep, ns);
93		return ret;
94	}
95	ns->ns.ops = &userns_operations;
96
97	atomic_set(&ns->count, 1);
98	/* Leave the new->user_ns reference with the new user namespace. */
99	ns->parent = parent_ns;
100	ns->level = parent_ns->level + 1;
101	ns->owner = owner;
102	ns->group = group;
103
104	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
105	mutex_lock(&userns_state_mutex);
106	ns->flags = parent_ns->flags;
107	mutex_unlock(&userns_state_mutex);
108
109	set_cred_user_ns(new, ns);
110
111#ifdef CONFIG_PERSISTENT_KEYRINGS
112	init_rwsem(&ns->persistent_keyring_register_sem);
113#endif
114	return 0;
115}
116
117int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
118{
119	struct cred *cred;
120	int err = -ENOMEM;
121
122	if (!(unshare_flags & CLONE_NEWUSER))
123		return 0;
124
125	cred = prepare_creds();
126	if (cred) {
127		err = create_user_ns(cred);
128		if (err)
129			put_cred(cred);
130		else
131			*new_cred = cred;
132	}
133
134	return err;
135}
136
137void free_user_ns(struct user_namespace *ns)
138{
139	struct user_namespace *parent;
140
141	do {
142		parent = ns->parent;
143#ifdef CONFIG_PERSISTENT_KEYRINGS
144		key_put(ns->persistent_keyring_register);
145#endif
146		ns_free_inum(&ns->ns);
147		kmem_cache_free(user_ns_cachep, ns);
148		ns = parent;
149	} while (atomic_dec_and_test(&parent->count));
150}
151EXPORT_SYMBOL(free_user_ns);
152
153static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
154{
155	unsigned idx, extents;
156	u32 first, last, id2;
157
158	id2 = id + count - 1;
159
160	/* Find the matching extent */
161	extents = map->nr_extents;
162	smp_rmb();
163	for (idx = 0; idx < extents; idx++) {
164		first = map->extent[idx].first;
165		last = first + map->extent[idx].count - 1;
166		if (id >= first && id <= last &&
167		    (id2 >= first && id2 <= last))
168			break;
169	}
170	/* Map the id or note failure */
171	if (idx < extents)
172		id = (id - first) + map->extent[idx].lower_first;
173	else
174		id = (u32) -1;
175
176	return id;
177}
178
179static u32 map_id_down(struct uid_gid_map *map, u32 id)
180{
181	unsigned idx, extents;
182	u32 first, last;
183
184	/* Find the matching extent */
185	extents = map->nr_extents;
186	smp_rmb();
187	for (idx = 0; idx < extents; idx++) {
188		first = map->extent[idx].first;
189		last = first + map->extent[idx].count - 1;
190		if (id >= first && id <= last)
191			break;
192	}
193	/* Map the id or note failure */
194	if (idx < extents)
195		id = (id - first) + map->extent[idx].lower_first;
196	else
197		id = (u32) -1;
198
199	return id;
200}
201
202static u32 map_id_up(struct uid_gid_map *map, u32 id)
203{
204	unsigned idx, extents;
205	u32 first, last;
206
207	/* Find the matching extent */
208	extents = map->nr_extents;
209	smp_rmb();
210	for (idx = 0; idx < extents; idx++) {
211		first = map->extent[idx].lower_first;
212		last = first + map->extent[idx].count - 1;
213		if (id >= first && id <= last)
214			break;
215	}
216	/* Map the id or note failure */
217	if (idx < extents)
218		id = (id - first) + map->extent[idx].first;
219	else
220		id = (u32) -1;
221
222	return id;
223}
224
225/**
226 *	make_kuid - Map a user-namespace uid pair into a kuid.
227 *	@ns:  User namespace that the uid is in
228 *	@uid: User identifier
229 *
230 *	Maps a user-namespace uid pair into a kernel internal kuid,
231 *	and returns that kuid.
232 *
233 *	When there is no mapping defined for the user-namespace uid
234 *	pair INVALID_UID is returned.  Callers are expected to test
235 *	for and handle INVALID_UID being returned.  INVALID_UID
236 *	may be tested for using uid_valid().
237 */
238kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
239{
240	/* Map the uid to a global kernel uid */
241	return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
242}
243EXPORT_SYMBOL(make_kuid);
244
245/**
246 *	from_kuid - Create a uid from a kuid user-namespace pair.
247 *	@targ: The user namespace we want a uid in.
248 *	@kuid: The kernel internal uid to start with.
249 *
250 *	Map @kuid into the user-namespace specified by @targ and
251 *	return the resulting uid.
252 *
253 *	There is always a mapping into the initial user_namespace.
254 *
255 *	If @kuid has no mapping in @targ (uid_t)-1 is returned.
256 */
257uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
258{
259	/* Map the uid from a global kernel uid */
260	return map_id_up(&targ->uid_map, __kuid_val(kuid));
261}
262EXPORT_SYMBOL(from_kuid);
263
264/**
265 *	from_kuid_munged - Create a uid from a kuid user-namespace pair.
266 *	@targ: The user namespace we want a uid in.
267 *	@kuid: The kernel internal uid to start with.
268 *
269 *	Map @kuid into the user-namespace specified by @targ and
270 *	return the resulting uid.
271 *
272 *	There is always a mapping into the initial user_namespace.
273 *
274 *	Unlike from_kuid from_kuid_munged never fails and always
275 *	returns a valid uid.  This makes from_kuid_munged appropriate
276 *	for use in syscalls like stat and getuid where failing the
277 *	system call and failing to provide a valid uid are not an
278 *	options.
279 *
280 *	If @kuid has no mapping in @targ overflowuid is returned.
281 */
282uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
283{
284	uid_t uid;
285	uid = from_kuid(targ, kuid);
286
287	if (uid == (uid_t) -1)
288		uid = overflowuid;
289	return uid;
290}
291EXPORT_SYMBOL(from_kuid_munged);
292
293/**
294 *	make_kgid - Map a user-namespace gid pair into a kgid.
295 *	@ns:  User namespace that the gid is in
296 *	@gid: group identifier
297 *
298 *	Maps a user-namespace gid pair into a kernel internal kgid,
299 *	and returns that kgid.
300 *
301 *	When there is no mapping defined for the user-namespace gid
302 *	pair INVALID_GID is returned.  Callers are expected to test
303 *	for and handle INVALID_GID being returned.  INVALID_GID may be
304 *	tested for using gid_valid().
305 */
306kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
307{
308	/* Map the gid to a global kernel gid */
309	return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
310}
311EXPORT_SYMBOL(make_kgid);
312
313/**
314 *	from_kgid - Create a gid from a kgid user-namespace pair.
315 *	@targ: The user namespace we want a gid in.
316 *	@kgid: The kernel internal gid to start with.
317 *
318 *	Map @kgid into the user-namespace specified by @targ and
319 *	return the resulting gid.
320 *
321 *	There is always a mapping into the initial user_namespace.
322 *
323 *	If @kgid has no mapping in @targ (gid_t)-1 is returned.
324 */
325gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
326{
327	/* Map the gid from a global kernel gid */
328	return map_id_up(&targ->gid_map, __kgid_val(kgid));
329}
330EXPORT_SYMBOL(from_kgid);
331
332/**
333 *	from_kgid_munged - Create a gid from a kgid user-namespace pair.
334 *	@targ: The user namespace we want a gid in.
335 *	@kgid: The kernel internal gid to start with.
336 *
337 *	Map @kgid into the user-namespace specified by @targ and
338 *	return the resulting gid.
339 *
340 *	There is always a mapping into the initial user_namespace.
341 *
342 *	Unlike from_kgid from_kgid_munged never fails and always
343 *	returns a valid gid.  This makes from_kgid_munged appropriate
344 *	for use in syscalls like stat and getgid where failing the
345 *	system call and failing to provide a valid gid are not options.
346 *
347 *	If @kgid has no mapping in @targ overflowgid is returned.
348 */
349gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
350{
351	gid_t gid;
352	gid = from_kgid(targ, kgid);
353
354	if (gid == (gid_t) -1)
355		gid = overflowgid;
356	return gid;
357}
358EXPORT_SYMBOL(from_kgid_munged);
359
360/**
361 *	make_kprojid - Map a user-namespace projid pair into a kprojid.
362 *	@ns:  User namespace that the projid is in
363 *	@projid: Project identifier
364 *
365 *	Maps a user-namespace uid pair into a kernel internal kuid,
366 *	and returns that kuid.
367 *
368 *	When there is no mapping defined for the user-namespace projid
369 *	pair INVALID_PROJID is returned.  Callers are expected to test
370 *	for and handle handle INVALID_PROJID being returned.  INVALID_PROJID
371 *	may be tested for using projid_valid().
372 */
373kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
374{
375	/* Map the uid to a global kernel uid */
376	return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
377}
378EXPORT_SYMBOL(make_kprojid);
379
380/**
381 *	from_kprojid - Create a projid from a kprojid user-namespace pair.
382 *	@targ: The user namespace we want a projid in.
383 *	@kprojid: The kernel internal project identifier to start with.
384 *
385 *	Map @kprojid into the user-namespace specified by @targ and
386 *	return the resulting projid.
387 *
388 *	There is always a mapping into the initial user_namespace.
389 *
390 *	If @kprojid has no mapping in @targ (projid_t)-1 is returned.
391 */
392projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
393{
394	/* Map the uid from a global kernel uid */
395	return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
396}
397EXPORT_SYMBOL(from_kprojid);
398
399/**
400 *	from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
401 *	@targ: The user namespace we want a projid in.
402 *	@kprojid: The kernel internal projid to start with.
403 *
404 *	Map @kprojid into the user-namespace specified by @targ and
405 *	return the resulting projid.
406 *
407 *	There is always a mapping into the initial user_namespace.
408 *
409 *	Unlike from_kprojid from_kprojid_munged never fails and always
410 *	returns a valid projid.  This makes from_kprojid_munged
411 *	appropriate for use in syscalls like stat and where
412 *	failing the system call and failing to provide a valid projid are
413 *	not an options.
414 *
415 *	If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
416 */
417projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
418{
419	projid_t projid;
420	projid = from_kprojid(targ, kprojid);
421
422	if (projid == (projid_t) -1)
423		projid = OVERFLOW_PROJID;
424	return projid;
425}
426EXPORT_SYMBOL(from_kprojid_munged);
427
428
429static int uid_m_show(struct seq_file *seq, void *v)
430{
431	struct user_namespace *ns = seq->private;
432	struct uid_gid_extent *extent = v;
433	struct user_namespace *lower_ns;
434	uid_t lower;
435
436	lower_ns = seq_user_ns(seq);
437	if ((lower_ns == ns) && lower_ns->parent)
438		lower_ns = lower_ns->parent;
439
440	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
441
442	seq_printf(seq, "%10u %10u %10u\n",
443		extent->first,
444		lower,
445		extent->count);
446
447	return 0;
448}
449
450static int gid_m_show(struct seq_file *seq, void *v)
451{
452	struct user_namespace *ns = seq->private;
453	struct uid_gid_extent *extent = v;
454	struct user_namespace *lower_ns;
455	gid_t lower;
456
457	lower_ns = seq_user_ns(seq);
458	if ((lower_ns == ns) && lower_ns->parent)
459		lower_ns = lower_ns->parent;
460
461	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
462
463	seq_printf(seq, "%10u %10u %10u\n",
464		extent->first,
465		lower,
466		extent->count);
467
468	return 0;
469}
470
471static int projid_m_show(struct seq_file *seq, void *v)
472{
473	struct user_namespace *ns = seq->private;
474	struct uid_gid_extent *extent = v;
475	struct user_namespace *lower_ns;
476	projid_t lower;
477
478	lower_ns = seq_user_ns(seq);
479	if ((lower_ns == ns) && lower_ns->parent)
480		lower_ns = lower_ns->parent;
481
482	lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
483
484	seq_printf(seq, "%10u %10u %10u\n",
485		extent->first,
486		lower,
487		extent->count);
488
489	return 0;
490}
491
492static void *m_start(struct seq_file *seq, loff_t *ppos,
493		     struct uid_gid_map *map)
494{
495	struct uid_gid_extent *extent = NULL;
496	loff_t pos = *ppos;
497
498	if (pos < map->nr_extents)
499		extent = &map->extent[pos];
500
501	return extent;
502}
503
504static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
505{
506	struct user_namespace *ns = seq->private;
507
508	return m_start(seq, ppos, &ns->uid_map);
509}
510
511static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
512{
513	struct user_namespace *ns = seq->private;
514
515	return m_start(seq, ppos, &ns->gid_map);
516}
517
518static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
519{
520	struct user_namespace *ns = seq->private;
521
522	return m_start(seq, ppos, &ns->projid_map);
523}
524
525static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
526{
527	(*pos)++;
528	return seq->op->start(seq, pos);
529}
530
531static void m_stop(struct seq_file *seq, void *v)
532{
533	return;
534}
535
536const struct seq_operations proc_uid_seq_operations = {
537	.start = uid_m_start,
538	.stop = m_stop,
539	.next = m_next,
540	.show = uid_m_show,
541};
542
543const struct seq_operations proc_gid_seq_operations = {
544	.start = gid_m_start,
545	.stop = m_stop,
546	.next = m_next,
547	.show = gid_m_show,
548};
549
550const struct seq_operations proc_projid_seq_operations = {
551	.start = projid_m_start,
552	.stop = m_stop,
553	.next = m_next,
554	.show = projid_m_show,
555};
556
557static bool mappings_overlap(struct uid_gid_map *new_map,
558			     struct uid_gid_extent *extent)
559{
560	u32 upper_first, lower_first, upper_last, lower_last;
561	unsigned idx;
562
563	upper_first = extent->first;
564	lower_first = extent->lower_first;
565	upper_last = upper_first + extent->count - 1;
566	lower_last = lower_first + extent->count - 1;
567
568	for (idx = 0; idx < new_map->nr_extents; idx++) {
569		u32 prev_upper_first, prev_lower_first;
570		u32 prev_upper_last, prev_lower_last;
571		struct uid_gid_extent *prev;
572
573		prev = &new_map->extent[idx];
574
575		prev_upper_first = prev->first;
576		prev_lower_first = prev->lower_first;
577		prev_upper_last = prev_upper_first + prev->count - 1;
578		prev_lower_last = prev_lower_first + prev->count - 1;
579
580		/* Does the upper range intersect a previous extent? */
581		if ((prev_upper_first <= upper_last) &&
582		    (prev_upper_last >= upper_first))
583			return true;
584
585		/* Does the lower range intersect a previous extent? */
586		if ((prev_lower_first <= lower_last) &&
587		    (prev_lower_last >= lower_first))
588			return true;
589	}
590	return false;
591}
592
593static ssize_t map_write(struct file *file, const char __user *buf,
594			 size_t count, loff_t *ppos,
595			 int cap_setid,
596			 struct uid_gid_map *map,
597			 struct uid_gid_map *parent_map)
598{
599	struct seq_file *seq = file->private_data;
600	struct user_namespace *ns = seq->private;
601	struct uid_gid_map new_map;
602	unsigned idx;
603	struct uid_gid_extent *extent = NULL;
604	unsigned long page = 0;
605	char *kbuf, *pos, *next_line;
606	ssize_t ret = -EINVAL;
607
608	/*
609	 * The userns_state_mutex serializes all writes to any given map.
610	 *
611	 * Any map is only ever written once.
612	 *
613	 * An id map fits within 1 cache line on most architectures.
614	 *
615	 * On read nothing needs to be done unless you are on an
616	 * architecture with a crazy cache coherency model like alpha.
617	 *
618	 * There is a one time data dependency between reading the
619	 * count of the extents and the values of the extents.  The
620	 * desired behavior is to see the values of the extents that
621	 * were written before the count of the extents.
622	 *
623	 * To achieve this smp_wmb() is used on guarantee the write
624	 * order and smp_rmb() is guaranteed that we don't have crazy
625	 * architectures returning stale data.
626	 */
627	mutex_lock(&userns_state_mutex);
628
629	ret = -EPERM;
630	/* Only allow one successful write to the map */
631	if (map->nr_extents != 0)
632		goto out;
633
634	/*
635	 * Adjusting namespace settings requires capabilities on the target.
636	 */
637	if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
638		goto out;
639
640	/* Get a buffer */
641	ret = -ENOMEM;
642	page = __get_free_page(GFP_TEMPORARY);
643	kbuf = (char *) page;
644	if (!page)
645		goto out;
646
647	/* Only allow < page size writes at the beginning of the file */
648	ret = -EINVAL;
649	if ((*ppos != 0) || (count >= PAGE_SIZE))
650		goto out;
651
652	/* Slurp in the user data */
653	ret = -EFAULT;
654	if (copy_from_user(kbuf, buf, count))
655		goto out;
656	kbuf[count] = '\0';
657
658	/* Parse the user data */
659	ret = -EINVAL;
660	pos = kbuf;
661	new_map.nr_extents = 0;
662	for (; pos; pos = next_line) {
663		extent = &new_map.extent[new_map.nr_extents];
664
665		/* Find the end of line and ensure I don't look past it */
666		next_line = strchr(pos, '\n');
667		if (next_line) {
668			*next_line = '\0';
669			next_line++;
670			if (*next_line == '\0')
671				next_line = NULL;
672		}
673
674		pos = skip_spaces(pos);
675		extent->first = simple_strtoul(pos, &pos, 10);
676		if (!isspace(*pos))
677			goto out;
678
679		pos = skip_spaces(pos);
680		extent->lower_first = simple_strtoul(pos, &pos, 10);
681		if (!isspace(*pos))
682			goto out;
683
684		pos = skip_spaces(pos);
685		extent->count = simple_strtoul(pos, &pos, 10);
686		if (*pos && !isspace(*pos))
687			goto out;
688
689		/* Verify there is not trailing junk on the line */
690		pos = skip_spaces(pos);
691		if (*pos != '\0')
692			goto out;
693
694		/* Verify we have been given valid starting values */
695		if ((extent->first == (u32) -1) ||
696		    (extent->lower_first == (u32) -1))
697			goto out;
698
699		/* Verify count is not zero and does not cause the
700		 * extent to wrap
701		 */
702		if ((extent->first + extent->count) <= extent->first)
703			goto out;
704		if ((extent->lower_first + extent->count) <=
705		     extent->lower_first)
706			goto out;
707
708		/* Do the ranges in extent overlap any previous extents? */
709		if (mappings_overlap(&new_map, extent))
710			goto out;
711
712		new_map.nr_extents++;
713
714		/* Fail if the file contains too many extents */
715		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
716		    (next_line != NULL))
717			goto out;
718	}
719	/* Be very certaint the new map actually exists */
720	if (new_map.nr_extents == 0)
721		goto out;
722
723	ret = -EPERM;
724	/* Validate the user is allowed to use user id's mapped to. */
725	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
726		goto out;
727
728	/* Map the lower ids from the parent user namespace to the
729	 * kernel global id space.
730	 */
731	for (idx = 0; idx < new_map.nr_extents; idx++) {
732		u32 lower_first;
733		extent = &new_map.extent[idx];
734
735		lower_first = map_id_range_down(parent_map,
736						extent->lower_first,
737						extent->count);
738
739		/* Fail if we can not map the specified extent to
740		 * the kernel global id space.
741		 */
742		if (lower_first == (u32) -1)
743			goto out;
744
745		extent->lower_first = lower_first;
746	}
747
748	/* Install the map */
749	memcpy(map->extent, new_map.extent,
750		new_map.nr_extents*sizeof(new_map.extent[0]));
751	smp_wmb();
752	map->nr_extents = new_map.nr_extents;
753
754	*ppos = count;
755	ret = count;
756out:
757	mutex_unlock(&userns_state_mutex);
758	if (page)
759		free_page(page);
760	return ret;
761}
762
763ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
764			   size_t size, loff_t *ppos)
765{
766	struct seq_file *seq = file->private_data;
767	struct user_namespace *ns = seq->private;
768	struct user_namespace *seq_ns = seq_user_ns(seq);
769
770	if (!ns->parent)
771		return -EPERM;
772
773	if ((seq_ns != ns) && (seq_ns != ns->parent))
774		return -EPERM;
775
776	return map_write(file, buf, size, ppos, CAP_SETUID,
777			 &ns->uid_map, &ns->parent->uid_map);
778}
779
780ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
781			   size_t size, loff_t *ppos)
782{
783	struct seq_file *seq = file->private_data;
784	struct user_namespace *ns = seq->private;
785	struct user_namespace *seq_ns = seq_user_ns(seq);
786
787	if (!ns->parent)
788		return -EPERM;
789
790	if ((seq_ns != ns) && (seq_ns != ns->parent))
791		return -EPERM;
792
793	return map_write(file, buf, size, ppos, CAP_SETGID,
794			 &ns->gid_map, &ns->parent->gid_map);
795}
796
797ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
798			      size_t size, loff_t *ppos)
799{
800	struct seq_file *seq = file->private_data;
801	struct user_namespace *ns = seq->private;
802	struct user_namespace *seq_ns = seq_user_ns(seq);
803
804	if (!ns->parent)
805		return -EPERM;
806
807	if ((seq_ns != ns) && (seq_ns != ns->parent))
808		return -EPERM;
809
810	/* Anyone can set any valid project id no capability needed */
811	return map_write(file, buf, size, ppos, -1,
812			 &ns->projid_map, &ns->parent->projid_map);
813}
814
815static bool new_idmap_permitted(const struct file *file,
816				struct user_namespace *ns, int cap_setid,
817				struct uid_gid_map *new_map)
818{
819	const struct cred *cred = file->f_cred;
820	/* Don't allow mappings that would allow anything that wouldn't
821	 * be allowed without the establishment of unprivileged mappings.
822	 */
823	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
824	    uid_eq(ns->owner, cred->euid)) {
825		u32 id = new_map->extent[0].lower_first;
826		if (cap_setid == CAP_SETUID) {
827			kuid_t uid = make_kuid(ns->parent, id);
828			if (uid_eq(uid, cred->euid))
829				return true;
830		} else if (cap_setid == CAP_SETGID) {
831			kgid_t gid = make_kgid(ns->parent, id);
832			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
833			    gid_eq(gid, cred->egid))
834				return true;
835		}
836	}
837
838	/* Allow anyone to set a mapping that doesn't require privilege */
839	if (!cap_valid(cap_setid))
840		return true;
841
842	/* Allow the specified ids if we have the appropriate capability
843	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
844	 * And the opener of the id file also had the approprpiate capability.
845	 */
846	if (ns_capable(ns->parent, cap_setid) &&
847	    file_ns_capable(file, ns->parent, cap_setid))
848		return true;
849
850	return false;
851}
852
853int proc_setgroups_show(struct seq_file *seq, void *v)
854{
855	struct user_namespace *ns = seq->private;
856	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
857
858	seq_printf(seq, "%s\n",
859		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
860		   "allow" : "deny");
861	return 0;
862}
863
864ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
865			     size_t count, loff_t *ppos)
866{
867	struct seq_file *seq = file->private_data;
868	struct user_namespace *ns = seq->private;
869	char kbuf[8], *pos;
870	bool setgroups_allowed;
871	ssize_t ret;
872
873	/* Only allow a very narrow range of strings to be written */
874	ret = -EINVAL;
875	if ((*ppos != 0) || (count >= sizeof(kbuf)))
876		goto out;
877
878	/* What was written? */
879	ret = -EFAULT;
880	if (copy_from_user(kbuf, buf, count))
881		goto out;
882	kbuf[count] = '\0';
883	pos = kbuf;
884
885	/* What is being requested? */
886	ret = -EINVAL;
887	if (strncmp(pos, "allow", 5) == 0) {
888		pos += 5;
889		setgroups_allowed = true;
890	}
891	else if (strncmp(pos, "deny", 4) == 0) {
892		pos += 4;
893		setgroups_allowed = false;
894	}
895	else
896		goto out;
897
898	/* Verify there is not trailing junk on the line */
899	pos = skip_spaces(pos);
900	if (*pos != '\0')
901		goto out;
902
903	ret = -EPERM;
904	mutex_lock(&userns_state_mutex);
905	if (setgroups_allowed) {
906		/* Enabling setgroups after setgroups has been disabled
907		 * is not allowed.
908		 */
909		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
910			goto out_unlock;
911	} else {
912		/* Permanently disabling setgroups after setgroups has
913		 * been enabled by writing the gid_map is not allowed.
914		 */
915		if (ns->gid_map.nr_extents != 0)
916			goto out_unlock;
917		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
918	}
919	mutex_unlock(&userns_state_mutex);
920
921	/* Report a successful write */
922	*ppos = count;
923	ret = count;
924out:
925	return ret;
926out_unlock:
927	mutex_unlock(&userns_state_mutex);
928	goto out;
929}
930
931bool userns_may_setgroups(const struct user_namespace *ns)
932{
933	bool allowed;
934
935	mutex_lock(&userns_state_mutex);
936	/* It is not safe to use setgroups until a gid mapping in
937	 * the user namespace has been established.
938	 */
939	allowed = ns->gid_map.nr_extents != 0;
940	/* Is setgroups allowed? */
941	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
942	mutex_unlock(&userns_state_mutex);
943
944	return allowed;
945}
946
947static inline struct user_namespace *to_user_ns(struct ns_common *ns)
948{
949	return container_of(ns, struct user_namespace, ns);
950}
951
952static struct ns_common *userns_get(struct task_struct *task)
953{
954	struct user_namespace *user_ns;
955
956	rcu_read_lock();
957	user_ns = get_user_ns(__task_cred(task)->user_ns);
958	rcu_read_unlock();
959
960	return user_ns ? &user_ns->ns : NULL;
961}
962
963static void userns_put(struct ns_common *ns)
964{
965	put_user_ns(to_user_ns(ns));
966}
967
968static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
969{
970	struct user_namespace *user_ns = to_user_ns(ns);
971	struct cred *cred;
972
973	/* Don't allow gaining capabilities by reentering
974	 * the same user namespace.
975	 */
976	if (user_ns == current_user_ns())
977		return -EINVAL;
978
979	/* Threaded processes may not enter a different user namespace */
980	if (atomic_read(&current->mm->mm_users) > 1)
981		return -EINVAL;
982
983	if (current->fs->users != 1)
984		return -EINVAL;
985
986	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
987		return -EPERM;
988
989	cred = prepare_creds();
990	if (!cred)
991		return -ENOMEM;
992
993	put_user_ns(cred->user_ns);
994	set_cred_user_ns(cred, get_user_ns(user_ns));
995
996	return commit_creds(cred);
997}
998
999const struct proc_ns_operations userns_operations = {
1000	.name		= "user",
1001	.type		= CLONE_NEWUSER,
1002	.get		= userns_get,
1003	.put		= userns_put,
1004	.install	= userns_install,
1005};
1006
1007static __init int user_namespaces_init(void)
1008{
1009	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1010	return 0;
1011}
1012subsys_initcall(user_namespaces_init);
1013