1 /*
2 * linux/fs/proc/base.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc base directory handling functions
7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore.
14 *
15 *
16 * Changelog:
17 * 17-Jan-2005
18 * Allan Bezerra
19 * Bruna Moreira <bruna.moreira@indt.org.br>
20 * Edjard Mota <edjard.mota@indt.org.br>
21 * Ilias Biris <ilias.biris@indt.org.br>
22 * Mauricio Lin <mauricio.lin@indt.org.br>
23 *
24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25 *
26 * A new process specific entry (smaps) included in /proc. It shows the
27 * size of rss for each memory area. The maps entry lacks information
28 * about physical memory size (rss) for each mapped file, i.e.,
29 * rss information for executables and library files.
30 * This additional information is useful for any tools that need to know
31 * about physical memory consumption for a process specific library.
32 *
33 * Changelog:
34 * 21-Feb-2005
35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36 * Pud inclusion in the page table walking.
37 *
38 * ChangeLog:
39 * 10-Mar-2005
40 * 10LE Instituto Nokia de Tecnologia - INdT:
41 * A better way to walks through the page table as suggested by Hugh Dickins.
42 *
43 * Simo Piiroinen <simo.piiroinen@nokia.com>:
44 * Smaps information related to shared, private, clean and dirty pages.
45 *
46 * Paul Mundt <paul.mundt@nokia.com>:
47 * Overall revision about smaps.
48 */
49
50 #include <asm/uaccess.h>
51
52 #include <linux/errno.h>
53 #include <linux/time.h>
54 #include <linux/proc_fs.h>
55 #include <linux/stat.h>
56 #include <linux/task_io_accounting_ops.h>
57 #include <linux/init.h>
58 #include <linux/capability.h>
59 #include <linux/file.h>
60 #include <linux/fdtable.h>
61 #include <linux/string.h>
62 #include <linux/seq_file.h>
63 #include <linux/namei.h>
64 #include <linux/mnt_namespace.h>
65 #include <linux/mm.h>
66 #include <linux/swap.h>
67 #include <linux/rcupdate.h>
68 #include <linux/kallsyms.h>
69 #include <linux/stacktrace.h>
70 #include <linux/resource.h>
71 #include <linux/module.h>
72 #include <linux/mount.h>
73 #include <linux/security.h>
74 #include <linux/ptrace.h>
75 #include <linux/tracehook.h>
76 #include <linux/printk.h>
77 #include <linux/cgroup.h>
78 #include <linux/cpuset.h>
79 #include <linux/audit.h>
80 #include <linux/poll.h>
81 #include <linux/nsproxy.h>
82 #include <linux/oom.h>
83 #include <linux/elf.h>
84 #include <linux/pid_namespace.h>
85 #include <linux/user_namespace.h>
86 #include <linux/fs_struct.h>
87 #include <linux/slab.h>
88 #include <linux/flex_array.h>
89 #include <linux/posix-timers.h>
90 #ifdef CONFIG_HARDWALL
91 #include <asm/hardwall.h>
92 #endif
93 #include <trace/events/oom.h>
94 #include "internal.h"
95 #include "fd.h"
96
97 /* NOTE:
98 * Implementing inode permission operations in /proc is almost
99 * certainly an error. Permission checks need to happen during
100 * each system call not at open time. The reason is that most of
101 * what we wish to check for permissions in /proc varies at runtime.
102 *
103 * The classic example of a problem is opening file descriptors
104 * in /proc for a task before it execs a suid executable.
105 */
106
107 struct pid_entry {
108 const char *name;
109 int len;
110 umode_t mode;
111 const struct inode_operations *iop;
112 const struct file_operations *fop;
113 union proc_op op;
114 };
115
116 #define NOD(NAME, MODE, IOP, FOP, OP) { \
117 .name = (NAME), \
118 .len = sizeof(NAME) - 1, \
119 .mode = MODE, \
120 .iop = IOP, \
121 .fop = FOP, \
122 .op = OP, \
123 }
124
125 #define DIR(NAME, MODE, iops, fops) \
126 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
127 #define LNK(NAME, get_link) \
128 NOD(NAME, (S_IFLNK|S_IRWXUGO), \
129 &proc_pid_link_inode_operations, NULL, \
130 { .proc_get_link = get_link } )
131 #define REG(NAME, MODE, fops) \
132 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
133 #define ONE(NAME, MODE, show) \
134 NOD(NAME, (S_IFREG|(MODE)), \
135 NULL, &proc_single_file_operations, \
136 { .proc_show = show } )
137
138 /*
139 * Count the number of hardlinks for the pid_entry table, excluding the .
140 * and .. links.
141 */
pid_entry_count_dirs(const struct pid_entry * entries,unsigned int n)142 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
143 unsigned int n)
144 {
145 unsigned int i;
146 unsigned int count;
147
148 count = 0;
149 for (i = 0; i < n; ++i) {
150 if (S_ISDIR(entries[i].mode))
151 ++count;
152 }
153
154 return count;
155 }
156
get_task_root(struct task_struct * task,struct path * root)157 static int get_task_root(struct task_struct *task, struct path *root)
158 {
159 int result = -ENOENT;
160
161 task_lock(task);
162 if (task->fs) {
163 get_fs_root(task->fs, root);
164 result = 0;
165 }
166 task_unlock(task);
167 return result;
168 }
169
proc_cwd_link(struct dentry * dentry,struct path * path)170 static int proc_cwd_link(struct dentry *dentry, struct path *path)
171 {
172 struct task_struct *task = get_proc_task(d_inode(dentry));
173 int result = -ENOENT;
174
175 if (task) {
176 task_lock(task);
177 if (task->fs) {
178 get_fs_pwd(task->fs, path);
179 result = 0;
180 }
181 task_unlock(task);
182 put_task_struct(task);
183 }
184 return result;
185 }
186
proc_root_link(struct dentry * dentry,struct path * path)187 static int proc_root_link(struct dentry *dentry, struct path *path)
188 {
189 struct task_struct *task = get_proc_task(d_inode(dentry));
190 int result = -ENOENT;
191
192 if (task) {
193 result = get_task_root(task, path);
194 put_task_struct(task);
195 }
196 return result;
197 }
198
proc_pid_cmdline(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)199 static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
200 struct pid *pid, struct task_struct *task)
201 {
202 /*
203 * Rely on struct seq_operations::show() being called once
204 * per internal buffer allocation. See single_open(), traverse().
205 */
206 BUG_ON(m->size < PAGE_SIZE);
207 m->count += get_cmdline(task, m->buf, PAGE_SIZE);
208 return 0;
209 }
210
proc_pid_auxv(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)211 static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
212 struct pid *pid, struct task_struct *task)
213 {
214 struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
215 if (mm && !IS_ERR(mm)) {
216 unsigned int nwords = 0;
217 do {
218 nwords += 2;
219 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
220 seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
221 mmput(mm);
222 return 0;
223 } else
224 return PTR_ERR(mm);
225 }
226
227
228 #ifdef CONFIG_KALLSYMS
229 /*
230 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
231 * Returns the resolved symbol. If that fails, simply return the address.
232 */
proc_pid_wchan(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)233 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
234 struct pid *pid, struct task_struct *task)
235 {
236 unsigned long wchan;
237 char symname[KSYM_NAME_LEN];
238
239 wchan = get_wchan(task);
240
241 if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
242 && !lookup_symbol_name(wchan, symname))
243 seq_printf(m, "%s", symname);
244 else
245 seq_putc(m, '0');
246
247 return 0;
248 }
249 #endif /* CONFIG_KALLSYMS */
250
lock_trace(struct task_struct * task)251 static int lock_trace(struct task_struct *task)
252 {
253 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
254 if (err)
255 return err;
256 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
257 mutex_unlock(&task->signal->cred_guard_mutex);
258 return -EPERM;
259 }
260 return 0;
261 }
262
unlock_trace(struct task_struct * task)263 static void unlock_trace(struct task_struct *task)
264 {
265 mutex_unlock(&task->signal->cred_guard_mutex);
266 }
267
268 #ifdef CONFIG_STACKTRACE
269
270 #define MAX_STACK_TRACE_DEPTH 64
271
proc_pid_stack(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)272 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
273 struct pid *pid, struct task_struct *task)
274 {
275 struct stack_trace trace;
276 unsigned long *entries;
277 int err;
278 int i;
279
280 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
281 if (!entries)
282 return -ENOMEM;
283
284 trace.nr_entries = 0;
285 trace.max_entries = MAX_STACK_TRACE_DEPTH;
286 trace.entries = entries;
287 trace.skip = 0;
288
289 err = lock_trace(task);
290 if (!err) {
291 save_stack_trace_tsk(task, &trace);
292
293 for (i = 0; i < trace.nr_entries; i++) {
294 seq_printf(m, "[<%pK>] %pS\n",
295 (void *)entries[i], (void *)entries[i]);
296 }
297 unlock_trace(task);
298 }
299 kfree(entries);
300
301 return err;
302 }
303 #endif
304
305 #ifdef CONFIG_SCHEDSTATS
306 /*
307 * Provides /proc/PID/schedstat
308 */
proc_pid_schedstat(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)309 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task)
311 {
312 seq_printf(m, "%llu %llu %lu\n",
313 (unsigned long long)task->se.sum_exec_runtime,
314 (unsigned long long)task->sched_info.run_delay,
315 task->sched_info.pcount);
316
317 return 0;
318 }
319 #endif
320
321 #ifdef CONFIG_LATENCYTOP
lstats_show_proc(struct seq_file * m,void * v)322 static int lstats_show_proc(struct seq_file *m, void *v)
323 {
324 int i;
325 struct inode *inode = m->private;
326 struct task_struct *task = get_proc_task(inode);
327
328 if (!task)
329 return -ESRCH;
330 seq_puts(m, "Latency Top version : v0.1\n");
331 for (i = 0; i < 32; i++) {
332 struct latency_record *lr = &task->latency_record[i];
333 if (lr->backtrace[0]) {
334 int q;
335 seq_printf(m, "%i %li %li",
336 lr->count, lr->time, lr->max);
337 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
338 unsigned long bt = lr->backtrace[q];
339 if (!bt)
340 break;
341 if (bt == ULONG_MAX)
342 break;
343 seq_printf(m, " %ps", (void *)bt);
344 }
345 seq_putc(m, '\n');
346 }
347
348 }
349 put_task_struct(task);
350 return 0;
351 }
352
lstats_open(struct inode * inode,struct file * file)353 static int lstats_open(struct inode *inode, struct file *file)
354 {
355 return single_open(file, lstats_show_proc, inode);
356 }
357
lstats_write(struct file * file,const char __user * buf,size_t count,loff_t * offs)358 static ssize_t lstats_write(struct file *file, const char __user *buf,
359 size_t count, loff_t *offs)
360 {
361 struct task_struct *task = get_proc_task(file_inode(file));
362
363 if (!task)
364 return -ESRCH;
365 clear_all_latency_tracing(task);
366 put_task_struct(task);
367
368 return count;
369 }
370
371 static const struct file_operations proc_lstats_operations = {
372 .open = lstats_open,
373 .read = seq_read,
374 .write = lstats_write,
375 .llseek = seq_lseek,
376 .release = single_release,
377 };
378
379 #endif
380
proc_oom_score(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)381 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
382 struct pid *pid, struct task_struct *task)
383 {
384 unsigned long totalpages = totalram_pages + total_swap_pages;
385 unsigned long points = 0;
386
387 read_lock(&tasklist_lock);
388 if (pid_alive(task))
389 points = oom_badness(task, NULL, NULL, totalpages) *
390 1000 / totalpages;
391 read_unlock(&tasklist_lock);
392 seq_printf(m, "%lu\n", points);
393
394 return 0;
395 }
396
397 struct limit_names {
398 const char *name;
399 const char *unit;
400 };
401
402 static const struct limit_names lnames[RLIM_NLIMITS] = {
403 [RLIMIT_CPU] = {"Max cpu time", "seconds"},
404 [RLIMIT_FSIZE] = {"Max file size", "bytes"},
405 [RLIMIT_DATA] = {"Max data size", "bytes"},
406 [RLIMIT_STACK] = {"Max stack size", "bytes"},
407 [RLIMIT_CORE] = {"Max core file size", "bytes"},
408 [RLIMIT_RSS] = {"Max resident set", "bytes"},
409 [RLIMIT_NPROC] = {"Max processes", "processes"},
410 [RLIMIT_NOFILE] = {"Max open files", "files"},
411 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
412 [RLIMIT_AS] = {"Max address space", "bytes"},
413 [RLIMIT_LOCKS] = {"Max file locks", "locks"},
414 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
415 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
416 [RLIMIT_NICE] = {"Max nice priority", NULL},
417 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
418 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
419 };
420
421 /* Display limits for a process */
proc_pid_limits(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)422 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
423 struct pid *pid, struct task_struct *task)
424 {
425 unsigned int i;
426 unsigned long flags;
427
428 struct rlimit rlim[RLIM_NLIMITS];
429
430 if (!lock_task_sighand(task, &flags))
431 return 0;
432 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
433 unlock_task_sighand(task, &flags);
434
435 /*
436 * print the file header
437 */
438 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
439 "Limit", "Soft Limit", "Hard Limit", "Units");
440
441 for (i = 0; i < RLIM_NLIMITS; i++) {
442 if (rlim[i].rlim_cur == RLIM_INFINITY)
443 seq_printf(m, "%-25s %-20s ",
444 lnames[i].name, "unlimited");
445 else
446 seq_printf(m, "%-25s %-20lu ",
447 lnames[i].name, rlim[i].rlim_cur);
448
449 if (rlim[i].rlim_max == RLIM_INFINITY)
450 seq_printf(m, "%-20s ", "unlimited");
451 else
452 seq_printf(m, "%-20lu ", rlim[i].rlim_max);
453
454 if (lnames[i].unit)
455 seq_printf(m, "%-10s\n", lnames[i].unit);
456 else
457 seq_putc(m, '\n');
458 }
459
460 return 0;
461 }
462
463 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
proc_pid_syscall(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)464 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
465 struct pid *pid, struct task_struct *task)
466 {
467 long nr;
468 unsigned long args[6], sp, pc;
469 int res;
470
471 res = lock_trace(task);
472 if (res)
473 return res;
474
475 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
476 seq_puts(m, "running\n");
477 else if (nr < 0)
478 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
479 else
480 seq_printf(m,
481 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
482 nr,
483 args[0], args[1], args[2], args[3], args[4], args[5],
484 sp, pc);
485 unlock_trace(task);
486
487 return 0;
488 }
489 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
490
491 /************************************************************************/
492 /* Here the fs part begins */
493 /************************************************************************/
494
495 /* permission checks */
proc_fd_access_allowed(struct inode * inode)496 static int proc_fd_access_allowed(struct inode *inode)
497 {
498 struct task_struct *task;
499 int allowed = 0;
500 /* Allow access to a task's file descriptors if it is us or we
501 * may use ptrace attach to the process and find out that
502 * information.
503 */
504 task = get_proc_task(inode);
505 if (task) {
506 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
507 put_task_struct(task);
508 }
509 return allowed;
510 }
511
proc_setattr(struct dentry * dentry,struct iattr * attr)512 int proc_setattr(struct dentry *dentry, struct iattr *attr)
513 {
514 int error;
515 struct inode *inode = d_inode(dentry);
516
517 if (attr->ia_valid & ATTR_MODE)
518 return -EPERM;
519
520 error = inode_change_ok(inode, attr);
521 if (error)
522 return error;
523
524 setattr_copy(inode, attr);
525 mark_inode_dirty(inode);
526 return 0;
527 }
528
529 /*
530 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
531 * or euid/egid (for hide_pid_min=2)?
532 */
has_pid_permissions(struct pid_namespace * pid,struct task_struct * task,int hide_pid_min)533 static bool has_pid_permissions(struct pid_namespace *pid,
534 struct task_struct *task,
535 int hide_pid_min)
536 {
537 if (pid->hide_pid < hide_pid_min)
538 return true;
539 if (in_group_p(pid->pid_gid))
540 return true;
541 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
542 }
543
544
proc_pid_permission(struct inode * inode,int mask)545 static int proc_pid_permission(struct inode *inode, int mask)
546 {
547 struct pid_namespace *pid = inode->i_sb->s_fs_info;
548 struct task_struct *task;
549 bool has_perms;
550
551 task = get_proc_task(inode);
552 if (!task)
553 return -ESRCH;
554 has_perms = has_pid_permissions(pid, task, 1);
555 put_task_struct(task);
556
557 if (!has_perms) {
558 if (pid->hide_pid == 2) {
559 /*
560 * Let's make getdents(), stat(), and open()
561 * consistent with each other. If a process
562 * may not stat() a file, it shouldn't be seen
563 * in procfs at all.
564 */
565 return -ENOENT;
566 }
567
568 return -EPERM;
569 }
570 return generic_permission(inode, mask);
571 }
572
573
574
575 static const struct inode_operations proc_def_inode_operations = {
576 .setattr = proc_setattr,
577 };
578
proc_single_show(struct seq_file * m,void * v)579 static int proc_single_show(struct seq_file *m, void *v)
580 {
581 struct inode *inode = m->private;
582 struct pid_namespace *ns;
583 struct pid *pid;
584 struct task_struct *task;
585 int ret;
586
587 ns = inode->i_sb->s_fs_info;
588 pid = proc_pid(inode);
589 task = get_pid_task(pid, PIDTYPE_PID);
590 if (!task)
591 return -ESRCH;
592
593 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
594
595 put_task_struct(task);
596 return ret;
597 }
598
proc_single_open(struct inode * inode,struct file * filp)599 static int proc_single_open(struct inode *inode, struct file *filp)
600 {
601 return single_open(filp, proc_single_show, inode);
602 }
603
604 static const struct file_operations proc_single_file_operations = {
605 .open = proc_single_open,
606 .read = seq_read,
607 .llseek = seq_lseek,
608 .release = single_release,
609 };
610
611
proc_mem_open(struct inode * inode,unsigned int mode)612 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
613 {
614 struct task_struct *task = get_proc_task(inode);
615 struct mm_struct *mm = ERR_PTR(-ESRCH);
616
617 if (task) {
618 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
619 put_task_struct(task);
620
621 if (!IS_ERR_OR_NULL(mm)) {
622 /* ensure this mm_struct can't be freed */
623 atomic_inc(&mm->mm_count);
624 /* but do not pin its memory */
625 mmput(mm);
626 }
627 }
628
629 return mm;
630 }
631
__mem_open(struct inode * inode,struct file * file,unsigned int mode)632 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
633 {
634 struct mm_struct *mm = proc_mem_open(inode, mode);
635
636 if (IS_ERR(mm))
637 return PTR_ERR(mm);
638
639 file->private_data = mm;
640 return 0;
641 }
642
mem_open(struct inode * inode,struct file * file)643 static int mem_open(struct inode *inode, struct file *file)
644 {
645 int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
646
647 /* OK to pass negative loff_t, we can catch out-of-range */
648 file->f_mode |= FMODE_UNSIGNED_OFFSET;
649
650 return ret;
651 }
652
mem_rw(struct file * file,char __user * buf,size_t count,loff_t * ppos,int write)653 static ssize_t mem_rw(struct file *file, char __user *buf,
654 size_t count, loff_t *ppos, int write)
655 {
656 struct mm_struct *mm = file->private_data;
657 unsigned long addr = *ppos;
658 ssize_t copied;
659 char *page;
660
661 if (!mm)
662 return 0;
663
664 page = (char *)__get_free_page(GFP_TEMPORARY);
665 if (!page)
666 return -ENOMEM;
667
668 copied = 0;
669 if (!atomic_inc_not_zero(&mm->mm_users))
670 goto free;
671
672 while (count > 0) {
673 int this_len = min_t(int, count, PAGE_SIZE);
674
675 if (write && copy_from_user(page, buf, this_len)) {
676 copied = -EFAULT;
677 break;
678 }
679
680 this_len = access_remote_vm(mm, addr, page, this_len, write);
681 if (!this_len) {
682 if (!copied)
683 copied = -EIO;
684 break;
685 }
686
687 if (!write && copy_to_user(buf, page, this_len)) {
688 copied = -EFAULT;
689 break;
690 }
691
692 buf += this_len;
693 addr += this_len;
694 copied += this_len;
695 count -= this_len;
696 }
697 *ppos = addr;
698
699 mmput(mm);
700 free:
701 free_page((unsigned long) page);
702 return copied;
703 }
704
mem_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)705 static ssize_t mem_read(struct file *file, char __user *buf,
706 size_t count, loff_t *ppos)
707 {
708 return mem_rw(file, buf, count, ppos, 0);
709 }
710
mem_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)711 static ssize_t mem_write(struct file *file, const char __user *buf,
712 size_t count, loff_t *ppos)
713 {
714 return mem_rw(file, (char __user*)buf, count, ppos, 1);
715 }
716
mem_lseek(struct file * file,loff_t offset,int orig)717 loff_t mem_lseek(struct file *file, loff_t offset, int orig)
718 {
719 switch (orig) {
720 case 0:
721 file->f_pos = offset;
722 break;
723 case 1:
724 file->f_pos += offset;
725 break;
726 default:
727 return -EINVAL;
728 }
729 force_successful_syscall_return();
730 return file->f_pos;
731 }
732
mem_release(struct inode * inode,struct file * file)733 static int mem_release(struct inode *inode, struct file *file)
734 {
735 struct mm_struct *mm = file->private_data;
736 if (mm)
737 mmdrop(mm);
738 return 0;
739 }
740
741 static const struct file_operations proc_mem_operations = {
742 .llseek = mem_lseek,
743 .read = mem_read,
744 .write = mem_write,
745 .open = mem_open,
746 .release = mem_release,
747 };
748
environ_open(struct inode * inode,struct file * file)749 static int environ_open(struct inode *inode, struct file *file)
750 {
751 return __mem_open(inode, file, PTRACE_MODE_READ);
752 }
753
environ_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)754 static ssize_t environ_read(struct file *file, char __user *buf,
755 size_t count, loff_t *ppos)
756 {
757 char *page;
758 unsigned long src = *ppos;
759 int ret = 0;
760 struct mm_struct *mm = file->private_data;
761
762 if (!mm)
763 return 0;
764
765 page = (char *)__get_free_page(GFP_TEMPORARY);
766 if (!page)
767 return -ENOMEM;
768
769 ret = 0;
770 if (!atomic_inc_not_zero(&mm->mm_users))
771 goto free;
772 while (count > 0) {
773 size_t this_len, max_len;
774 int retval;
775
776 if (src >= (mm->env_end - mm->env_start))
777 break;
778
779 this_len = mm->env_end - (mm->env_start + src);
780
781 max_len = min_t(size_t, PAGE_SIZE, count);
782 this_len = min(max_len, this_len);
783
784 retval = access_remote_vm(mm, (mm->env_start + src),
785 page, this_len, 0);
786
787 if (retval <= 0) {
788 ret = retval;
789 break;
790 }
791
792 if (copy_to_user(buf, page, retval)) {
793 ret = -EFAULT;
794 break;
795 }
796
797 ret += retval;
798 src += retval;
799 buf += retval;
800 count -= retval;
801 }
802 *ppos = src;
803 mmput(mm);
804
805 free:
806 free_page((unsigned long) page);
807 return ret;
808 }
809
810 static const struct file_operations proc_environ_operations = {
811 .open = environ_open,
812 .read = environ_read,
813 .llseek = generic_file_llseek,
814 .release = mem_release,
815 };
816
oom_adj_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)817 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
818 loff_t *ppos)
819 {
820 struct task_struct *task = get_proc_task(file_inode(file));
821 char buffer[PROC_NUMBUF];
822 int oom_adj = OOM_ADJUST_MIN;
823 size_t len;
824 unsigned long flags;
825
826 if (!task)
827 return -ESRCH;
828 if (lock_task_sighand(task, &flags)) {
829 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
830 oom_adj = OOM_ADJUST_MAX;
831 else
832 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
833 OOM_SCORE_ADJ_MAX;
834 unlock_task_sighand(task, &flags);
835 }
836 put_task_struct(task);
837 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
838 return simple_read_from_buffer(buf, count, ppos, buffer, len);
839 }
840
oom_adj_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)841 static ssize_t oom_adj_write(struct file *file, const char __user *buf,
842 size_t count, loff_t *ppos)
843 {
844 struct task_struct *task;
845 char buffer[PROC_NUMBUF];
846 int oom_adj;
847 unsigned long flags;
848 int err;
849
850 memset(buffer, 0, sizeof(buffer));
851 if (count > sizeof(buffer) - 1)
852 count = sizeof(buffer) - 1;
853 if (copy_from_user(buffer, buf, count)) {
854 err = -EFAULT;
855 goto out;
856 }
857
858 err = kstrtoint(strstrip(buffer), 0, &oom_adj);
859 if (err)
860 goto out;
861 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
862 oom_adj != OOM_DISABLE) {
863 err = -EINVAL;
864 goto out;
865 }
866
867 task = get_proc_task(file_inode(file));
868 if (!task) {
869 err = -ESRCH;
870 goto out;
871 }
872
873 task_lock(task);
874 if (!task->mm) {
875 err = -EINVAL;
876 goto err_task_lock;
877 }
878
879 if (!lock_task_sighand(task, &flags)) {
880 err = -ESRCH;
881 goto err_task_lock;
882 }
883
884 /*
885 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
886 * value is always attainable.
887 */
888 if (oom_adj == OOM_ADJUST_MAX)
889 oom_adj = OOM_SCORE_ADJ_MAX;
890 else
891 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
892
893 if (oom_adj < task->signal->oom_score_adj &&
894 !capable(CAP_SYS_RESOURCE)) {
895 err = -EACCES;
896 goto err_sighand;
897 }
898
899 /*
900 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
901 * /proc/pid/oom_score_adj instead.
902 */
903 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
904 current->comm, task_pid_nr(current), task_pid_nr(task),
905 task_pid_nr(task));
906
907 task->signal->oom_score_adj = oom_adj;
908 trace_oom_score_adj_update(task);
909 err_sighand:
910 unlock_task_sighand(task, &flags);
911 err_task_lock:
912 task_unlock(task);
913 put_task_struct(task);
914 out:
915 return err < 0 ? err : count;
916 }
917
918 static const struct file_operations proc_oom_adj_operations = {
919 .read = oom_adj_read,
920 .write = oom_adj_write,
921 .llseek = generic_file_llseek,
922 };
923
oom_score_adj_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)924 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
925 size_t count, loff_t *ppos)
926 {
927 struct task_struct *task = get_proc_task(file_inode(file));
928 char buffer[PROC_NUMBUF];
929 short oom_score_adj = OOM_SCORE_ADJ_MIN;
930 unsigned long flags;
931 size_t len;
932
933 if (!task)
934 return -ESRCH;
935 if (lock_task_sighand(task, &flags)) {
936 oom_score_adj = task->signal->oom_score_adj;
937 unlock_task_sighand(task, &flags);
938 }
939 put_task_struct(task);
940 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
941 return simple_read_from_buffer(buf, count, ppos, buffer, len);
942 }
943
oom_score_adj_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)944 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
945 size_t count, loff_t *ppos)
946 {
947 struct task_struct *task;
948 char buffer[PROC_NUMBUF];
949 unsigned long flags;
950 int oom_score_adj;
951 int err;
952
953 memset(buffer, 0, sizeof(buffer));
954 if (count > sizeof(buffer) - 1)
955 count = sizeof(buffer) - 1;
956 if (copy_from_user(buffer, buf, count)) {
957 err = -EFAULT;
958 goto out;
959 }
960
961 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
962 if (err)
963 goto out;
964 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
965 oom_score_adj > OOM_SCORE_ADJ_MAX) {
966 err = -EINVAL;
967 goto out;
968 }
969
970 task = get_proc_task(file_inode(file));
971 if (!task) {
972 err = -ESRCH;
973 goto out;
974 }
975
976 task_lock(task);
977 if (!task->mm) {
978 err = -EINVAL;
979 goto err_task_lock;
980 }
981
982 if (!lock_task_sighand(task, &flags)) {
983 err = -ESRCH;
984 goto err_task_lock;
985 }
986
987 if ((short)oom_score_adj < task->signal->oom_score_adj_min &&
988 !capable(CAP_SYS_RESOURCE)) {
989 err = -EACCES;
990 goto err_sighand;
991 }
992
993 task->signal->oom_score_adj = (short)oom_score_adj;
994 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
995 task->signal->oom_score_adj_min = (short)oom_score_adj;
996 trace_oom_score_adj_update(task);
997
998 err_sighand:
999 unlock_task_sighand(task, &flags);
1000 err_task_lock:
1001 task_unlock(task);
1002 put_task_struct(task);
1003 out:
1004 return err < 0 ? err : count;
1005 }
1006
1007 static const struct file_operations proc_oom_score_adj_operations = {
1008 .read = oom_score_adj_read,
1009 .write = oom_score_adj_write,
1010 .llseek = default_llseek,
1011 };
1012
1013 #ifdef CONFIG_AUDITSYSCALL
1014 #define TMPBUFLEN 21
proc_loginuid_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)1015 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1016 size_t count, loff_t *ppos)
1017 {
1018 struct inode * inode = file_inode(file);
1019 struct task_struct *task = get_proc_task(inode);
1020 ssize_t length;
1021 char tmpbuf[TMPBUFLEN];
1022
1023 if (!task)
1024 return -ESRCH;
1025 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1026 from_kuid(file->f_cred->user_ns,
1027 audit_get_loginuid(task)));
1028 put_task_struct(task);
1029 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1030 }
1031
proc_loginuid_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)1032 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1033 size_t count, loff_t *ppos)
1034 {
1035 struct inode * inode = file_inode(file);
1036 char *page, *tmp;
1037 ssize_t length;
1038 uid_t loginuid;
1039 kuid_t kloginuid;
1040
1041 rcu_read_lock();
1042 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
1043 rcu_read_unlock();
1044 return -EPERM;
1045 }
1046 rcu_read_unlock();
1047
1048 if (count >= PAGE_SIZE)
1049 count = PAGE_SIZE - 1;
1050
1051 if (*ppos != 0) {
1052 /* No partial writes. */
1053 return -EINVAL;
1054 }
1055 page = (char*)__get_free_page(GFP_TEMPORARY);
1056 if (!page)
1057 return -ENOMEM;
1058 length = -EFAULT;
1059 if (copy_from_user(page, buf, count))
1060 goto out_free_page;
1061
1062 page[count] = '\0';
1063 loginuid = simple_strtoul(page, &tmp, 10);
1064 if (tmp == page) {
1065 length = -EINVAL;
1066 goto out_free_page;
1067
1068 }
1069
1070 /* is userspace tring to explicitly UNSET the loginuid? */
1071 if (loginuid == AUDIT_UID_UNSET) {
1072 kloginuid = INVALID_UID;
1073 } else {
1074 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
1075 if (!uid_valid(kloginuid)) {
1076 length = -EINVAL;
1077 goto out_free_page;
1078 }
1079 }
1080
1081 length = audit_set_loginuid(kloginuid);
1082 if (likely(length == 0))
1083 length = count;
1084
1085 out_free_page:
1086 free_page((unsigned long) page);
1087 return length;
1088 }
1089
1090 static const struct file_operations proc_loginuid_operations = {
1091 .read = proc_loginuid_read,
1092 .write = proc_loginuid_write,
1093 .llseek = generic_file_llseek,
1094 };
1095
proc_sessionid_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)1096 static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1097 size_t count, loff_t *ppos)
1098 {
1099 struct inode * inode = file_inode(file);
1100 struct task_struct *task = get_proc_task(inode);
1101 ssize_t length;
1102 char tmpbuf[TMPBUFLEN];
1103
1104 if (!task)
1105 return -ESRCH;
1106 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1107 audit_get_sessionid(task));
1108 put_task_struct(task);
1109 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1110 }
1111
1112 static const struct file_operations proc_sessionid_operations = {
1113 .read = proc_sessionid_read,
1114 .llseek = generic_file_llseek,
1115 };
1116 #endif
1117
1118 #ifdef CONFIG_FAULT_INJECTION
proc_fault_inject_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)1119 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1120 size_t count, loff_t *ppos)
1121 {
1122 struct task_struct *task = get_proc_task(file_inode(file));
1123 char buffer[PROC_NUMBUF];
1124 size_t len;
1125 int make_it_fail;
1126
1127 if (!task)
1128 return -ESRCH;
1129 make_it_fail = task->make_it_fail;
1130 put_task_struct(task);
1131
1132 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
1133
1134 return simple_read_from_buffer(buf, count, ppos, buffer, len);
1135 }
1136
proc_fault_inject_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)1137 static ssize_t proc_fault_inject_write(struct file * file,
1138 const char __user * buf, size_t count, loff_t *ppos)
1139 {
1140 struct task_struct *task;
1141 char buffer[PROC_NUMBUF], *end;
1142 int make_it_fail;
1143
1144 if (!capable(CAP_SYS_RESOURCE))
1145 return -EPERM;
1146 memset(buffer, 0, sizeof(buffer));
1147 if (count > sizeof(buffer) - 1)
1148 count = sizeof(buffer) - 1;
1149 if (copy_from_user(buffer, buf, count))
1150 return -EFAULT;
1151 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1152 if (*end)
1153 return -EINVAL;
1154 if (make_it_fail < 0 || make_it_fail > 1)
1155 return -EINVAL;
1156
1157 task = get_proc_task(file_inode(file));
1158 if (!task)
1159 return -ESRCH;
1160 task->make_it_fail = make_it_fail;
1161 put_task_struct(task);
1162
1163 return count;
1164 }
1165
1166 static const struct file_operations proc_fault_inject_operations = {
1167 .read = proc_fault_inject_read,
1168 .write = proc_fault_inject_write,
1169 .llseek = generic_file_llseek,
1170 };
1171 #endif
1172
1173
1174 #ifdef CONFIG_SCHED_DEBUG
1175 /*
1176 * Print out various scheduling related per-task fields:
1177 */
sched_show(struct seq_file * m,void * v)1178 static int sched_show(struct seq_file *m, void *v)
1179 {
1180 struct inode *inode = m->private;
1181 struct task_struct *p;
1182
1183 p = get_proc_task(inode);
1184 if (!p)
1185 return -ESRCH;
1186 proc_sched_show_task(p, m);
1187
1188 put_task_struct(p);
1189
1190 return 0;
1191 }
1192
1193 static ssize_t
sched_write(struct file * file,const char __user * buf,size_t count,loff_t * offset)1194 sched_write(struct file *file, const char __user *buf,
1195 size_t count, loff_t *offset)
1196 {
1197 struct inode *inode = file_inode(file);
1198 struct task_struct *p;
1199
1200 p = get_proc_task(inode);
1201 if (!p)
1202 return -ESRCH;
1203 proc_sched_set_task(p);
1204
1205 put_task_struct(p);
1206
1207 return count;
1208 }
1209
sched_open(struct inode * inode,struct file * filp)1210 static int sched_open(struct inode *inode, struct file *filp)
1211 {
1212 return single_open(filp, sched_show, inode);
1213 }
1214
1215 static const struct file_operations proc_pid_sched_operations = {
1216 .open = sched_open,
1217 .read = seq_read,
1218 .write = sched_write,
1219 .llseek = seq_lseek,
1220 .release = single_release,
1221 };
1222
1223 #endif
1224
1225 #ifdef CONFIG_SCHED_AUTOGROUP
1226 /*
1227 * Print out autogroup related information:
1228 */
sched_autogroup_show(struct seq_file * m,void * v)1229 static int sched_autogroup_show(struct seq_file *m, void *v)
1230 {
1231 struct inode *inode = m->private;
1232 struct task_struct *p;
1233
1234 p = get_proc_task(inode);
1235 if (!p)
1236 return -ESRCH;
1237 proc_sched_autogroup_show_task(p, m);
1238
1239 put_task_struct(p);
1240
1241 return 0;
1242 }
1243
1244 static ssize_t
sched_autogroup_write(struct file * file,const char __user * buf,size_t count,loff_t * offset)1245 sched_autogroup_write(struct file *file, const char __user *buf,
1246 size_t count, loff_t *offset)
1247 {
1248 struct inode *inode = file_inode(file);
1249 struct task_struct *p;
1250 char buffer[PROC_NUMBUF];
1251 int nice;
1252 int err;
1253
1254 memset(buffer, 0, sizeof(buffer));
1255 if (count > sizeof(buffer) - 1)
1256 count = sizeof(buffer) - 1;
1257 if (copy_from_user(buffer, buf, count))
1258 return -EFAULT;
1259
1260 err = kstrtoint(strstrip(buffer), 0, &nice);
1261 if (err < 0)
1262 return err;
1263
1264 p = get_proc_task(inode);
1265 if (!p)
1266 return -ESRCH;
1267
1268 err = proc_sched_autogroup_set_nice(p, nice);
1269 if (err)
1270 count = err;
1271
1272 put_task_struct(p);
1273
1274 return count;
1275 }
1276
sched_autogroup_open(struct inode * inode,struct file * filp)1277 static int sched_autogroup_open(struct inode *inode, struct file *filp)
1278 {
1279 int ret;
1280
1281 ret = single_open(filp, sched_autogroup_show, NULL);
1282 if (!ret) {
1283 struct seq_file *m = filp->private_data;
1284
1285 m->private = inode;
1286 }
1287 return ret;
1288 }
1289
1290 static const struct file_operations proc_pid_sched_autogroup_operations = {
1291 .open = sched_autogroup_open,
1292 .read = seq_read,
1293 .write = sched_autogroup_write,
1294 .llseek = seq_lseek,
1295 .release = single_release,
1296 };
1297
1298 #endif /* CONFIG_SCHED_AUTOGROUP */
1299
comm_write(struct file * file,const char __user * buf,size_t count,loff_t * offset)1300 static ssize_t comm_write(struct file *file, const char __user *buf,
1301 size_t count, loff_t *offset)
1302 {
1303 struct inode *inode = file_inode(file);
1304 struct task_struct *p;
1305 char buffer[TASK_COMM_LEN];
1306 const size_t maxlen = sizeof(buffer) - 1;
1307
1308 memset(buffer, 0, sizeof(buffer));
1309 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
1310 return -EFAULT;
1311
1312 p = get_proc_task(inode);
1313 if (!p)
1314 return -ESRCH;
1315
1316 if (same_thread_group(current, p))
1317 set_task_comm(p, buffer);
1318 else
1319 count = -EINVAL;
1320
1321 put_task_struct(p);
1322
1323 return count;
1324 }
1325
comm_show(struct seq_file * m,void * v)1326 static int comm_show(struct seq_file *m, void *v)
1327 {
1328 struct inode *inode = m->private;
1329 struct task_struct *p;
1330
1331 p = get_proc_task(inode);
1332 if (!p)
1333 return -ESRCH;
1334
1335 task_lock(p);
1336 seq_printf(m, "%s\n", p->comm);
1337 task_unlock(p);
1338
1339 put_task_struct(p);
1340
1341 return 0;
1342 }
1343
comm_open(struct inode * inode,struct file * filp)1344 static int comm_open(struct inode *inode, struct file *filp)
1345 {
1346 return single_open(filp, comm_show, inode);
1347 }
1348
1349 static const struct file_operations proc_pid_set_comm_operations = {
1350 .open = comm_open,
1351 .read = seq_read,
1352 .write = comm_write,
1353 .llseek = seq_lseek,
1354 .release = single_release,
1355 };
1356
proc_exe_link(struct dentry * dentry,struct path * exe_path)1357 static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1358 {
1359 struct task_struct *task;
1360 struct mm_struct *mm;
1361 struct file *exe_file;
1362
1363 task = get_proc_task(d_inode(dentry));
1364 if (!task)
1365 return -ENOENT;
1366 mm = get_task_mm(task);
1367 put_task_struct(task);
1368 if (!mm)
1369 return -ENOENT;
1370 exe_file = get_mm_exe_file(mm);
1371 mmput(mm);
1372 if (exe_file) {
1373 *exe_path = exe_file->f_path;
1374 path_get(&exe_file->f_path);
1375 fput(exe_file);
1376 return 0;
1377 } else
1378 return -ENOENT;
1379 }
1380
proc_pid_follow_link(struct dentry * dentry,struct nameidata * nd)1381 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1382 {
1383 struct inode *inode = d_inode(dentry);
1384 struct path path;
1385 int error = -EACCES;
1386
1387 /* Are we allowed to snoop on the tasks file descriptors? */
1388 if (!proc_fd_access_allowed(inode))
1389 goto out;
1390
1391 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1392 if (error)
1393 goto out;
1394
1395 nd_jump_link(nd, &path);
1396 return NULL;
1397 out:
1398 return ERR_PTR(error);
1399 }
1400
do_proc_readlink(struct path * path,char __user * buffer,int buflen)1401 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1402 {
1403 char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
1404 char *pathname;
1405 int len;
1406
1407 if (!tmp)
1408 return -ENOMEM;
1409
1410 pathname = d_path(path, tmp, PAGE_SIZE);
1411 len = PTR_ERR(pathname);
1412 if (IS_ERR(pathname))
1413 goto out;
1414 len = tmp + PAGE_SIZE - 1 - pathname;
1415
1416 if (len > buflen)
1417 len = buflen;
1418 if (copy_to_user(buffer, pathname, len))
1419 len = -EFAULT;
1420 out:
1421 free_page((unsigned long)tmp);
1422 return len;
1423 }
1424
proc_pid_readlink(struct dentry * dentry,char __user * buffer,int buflen)1425 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1426 {
1427 int error = -EACCES;
1428 struct inode *inode = d_inode(dentry);
1429 struct path path;
1430
1431 /* Are we allowed to snoop on the tasks file descriptors? */
1432 if (!proc_fd_access_allowed(inode))
1433 goto out;
1434
1435 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1436 if (error)
1437 goto out;
1438
1439 error = do_proc_readlink(&path, buffer, buflen);
1440 path_put(&path);
1441 out:
1442 return error;
1443 }
1444
1445 const struct inode_operations proc_pid_link_inode_operations = {
1446 .readlink = proc_pid_readlink,
1447 .follow_link = proc_pid_follow_link,
1448 .setattr = proc_setattr,
1449 };
1450
1451
1452 /* building an inode */
1453
proc_pid_make_inode(struct super_block * sb,struct task_struct * task)1454 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1455 {
1456 struct inode * inode;
1457 struct proc_inode *ei;
1458 const struct cred *cred;
1459
1460 /* We need a new inode */
1461
1462 inode = new_inode(sb);
1463 if (!inode)
1464 goto out;
1465
1466 /* Common stuff */
1467 ei = PROC_I(inode);
1468 inode->i_ino = get_next_ino();
1469 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1470 inode->i_op = &proc_def_inode_operations;
1471
1472 /*
1473 * grab the reference to task.
1474 */
1475 ei->pid = get_task_pid(task, PIDTYPE_PID);
1476 if (!ei->pid)
1477 goto out_unlock;
1478
1479 if (task_dumpable(task)) {
1480 rcu_read_lock();
1481 cred = __task_cred(task);
1482 inode->i_uid = cred->euid;
1483 inode->i_gid = cred->egid;
1484 rcu_read_unlock();
1485 }
1486 security_task_to_inode(task, inode);
1487
1488 out:
1489 return inode;
1490
1491 out_unlock:
1492 iput(inode);
1493 return NULL;
1494 }
1495
pid_getattr(struct vfsmount * mnt,struct dentry * dentry,struct kstat * stat)1496 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1497 {
1498 struct inode *inode = d_inode(dentry);
1499 struct task_struct *task;
1500 const struct cred *cred;
1501 struct pid_namespace *pid = dentry->d_sb->s_fs_info;
1502
1503 generic_fillattr(inode, stat);
1504
1505 rcu_read_lock();
1506 stat->uid = GLOBAL_ROOT_UID;
1507 stat->gid = GLOBAL_ROOT_GID;
1508 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1509 if (task) {
1510 if (!has_pid_permissions(pid, task, 2)) {
1511 rcu_read_unlock();
1512 /*
1513 * This doesn't prevent learning whether PID exists,
1514 * it only makes getattr() consistent with readdir().
1515 */
1516 return -ENOENT;
1517 }
1518 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1519 task_dumpable(task)) {
1520 cred = __task_cred(task);
1521 stat->uid = cred->euid;
1522 stat->gid = cred->egid;
1523 }
1524 }
1525 rcu_read_unlock();
1526 return 0;
1527 }
1528
1529 /* dentry stuff */
1530
1531 /*
1532 * Exceptional case: normally we are not allowed to unhash a busy
1533 * directory. In this case, however, we can do it - no aliasing problems
1534 * due to the way we treat inodes.
1535 *
1536 * Rewrite the inode's ownerships here because the owning task may have
1537 * performed a setuid(), etc.
1538 *
1539 * Before the /proc/pid/status file was created the only way to read
1540 * the effective uid of a /process was to stat /proc/pid. Reading
1541 * /proc/pid/status is slow enough that procps and other packages
1542 * kept stating /proc/pid. To keep the rules in /proc simple I have
1543 * made this apply to all per process world readable and executable
1544 * directories.
1545 */
pid_revalidate(struct dentry * dentry,unsigned int flags)1546 int pid_revalidate(struct dentry *dentry, unsigned int flags)
1547 {
1548 struct inode *inode;
1549 struct task_struct *task;
1550 const struct cred *cred;
1551
1552 if (flags & LOOKUP_RCU)
1553 return -ECHILD;
1554
1555 inode = d_inode(dentry);
1556 task = get_proc_task(inode);
1557
1558 if (task) {
1559 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1560 task_dumpable(task)) {
1561 rcu_read_lock();
1562 cred = __task_cred(task);
1563 inode->i_uid = cred->euid;
1564 inode->i_gid = cred->egid;
1565 rcu_read_unlock();
1566 } else {
1567 inode->i_uid = GLOBAL_ROOT_UID;
1568 inode->i_gid = GLOBAL_ROOT_GID;
1569 }
1570 inode->i_mode &= ~(S_ISUID | S_ISGID);
1571 security_task_to_inode(task, inode);
1572 put_task_struct(task);
1573 return 1;
1574 }
1575 return 0;
1576 }
1577
proc_inode_is_dead(struct inode * inode)1578 static inline bool proc_inode_is_dead(struct inode *inode)
1579 {
1580 return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
1581 }
1582
pid_delete_dentry(const struct dentry * dentry)1583 int pid_delete_dentry(const struct dentry *dentry)
1584 {
1585 /* Is the task we represent dead?
1586 * If so, then don't put the dentry on the lru list,
1587 * kill it immediately.
1588 */
1589 return proc_inode_is_dead(d_inode(dentry));
1590 }
1591
1592 const struct dentry_operations pid_dentry_operations =
1593 {
1594 .d_revalidate = pid_revalidate,
1595 .d_delete = pid_delete_dentry,
1596 };
1597
1598 /* Lookups */
1599
1600 /*
1601 * Fill a directory entry.
1602 *
1603 * If possible create the dcache entry and derive our inode number and
1604 * file type from dcache entry.
1605 *
1606 * Since all of the proc inode numbers are dynamically generated, the inode
1607 * numbers do not exist until the inode is cache. This means creating the
1608 * the dcache entry in readdir is necessary to keep the inode numbers
1609 * reported by readdir in sync with the inode numbers reported
1610 * by stat.
1611 */
proc_fill_cache(struct file * file,struct dir_context * ctx,const char * name,int len,instantiate_t instantiate,struct task_struct * task,const void * ptr)1612 bool proc_fill_cache(struct file *file, struct dir_context *ctx,
1613 const char *name, int len,
1614 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1615 {
1616 struct dentry *child, *dir = file->f_path.dentry;
1617 struct qstr qname = QSTR_INIT(name, len);
1618 struct inode *inode;
1619 unsigned type;
1620 ino_t ino;
1621
1622 child = d_hash_and_lookup(dir, &qname);
1623 if (!child) {
1624 child = d_alloc(dir, &qname);
1625 if (!child)
1626 goto end_instantiate;
1627 if (instantiate(d_inode(dir), child, task, ptr) < 0) {
1628 dput(child);
1629 goto end_instantiate;
1630 }
1631 }
1632 inode = d_inode(child);
1633 ino = inode->i_ino;
1634 type = inode->i_mode >> 12;
1635 dput(child);
1636 return dir_emit(ctx, name, len, ino, type);
1637
1638 end_instantiate:
1639 return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
1640 }
1641
1642 #ifdef CONFIG_CHECKPOINT_RESTORE
1643
1644 /*
1645 * dname_to_vma_addr - maps a dentry name into two unsigned longs
1646 * which represent vma start and end addresses.
1647 */
dname_to_vma_addr(struct dentry * dentry,unsigned long * start,unsigned long * end)1648 static int dname_to_vma_addr(struct dentry *dentry,
1649 unsigned long *start, unsigned long *end)
1650 {
1651 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
1652 return -EINVAL;
1653
1654 return 0;
1655 }
1656
map_files_d_revalidate(struct dentry * dentry,unsigned int flags)1657 static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
1658 {
1659 unsigned long vm_start, vm_end;
1660 bool exact_vma_exists = false;
1661 struct mm_struct *mm = NULL;
1662 struct task_struct *task;
1663 const struct cred *cred;
1664 struct inode *inode;
1665 int status = 0;
1666
1667 if (flags & LOOKUP_RCU)
1668 return -ECHILD;
1669
1670 if (!capable(CAP_SYS_ADMIN)) {
1671 status = -EPERM;
1672 goto out_notask;
1673 }
1674
1675 inode = d_inode(dentry);
1676 task = get_proc_task(inode);
1677 if (!task)
1678 goto out_notask;
1679
1680 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
1681 if (IS_ERR_OR_NULL(mm))
1682 goto out;
1683
1684 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
1685 down_read(&mm->mmap_sem);
1686 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
1687 up_read(&mm->mmap_sem);
1688 }
1689
1690 mmput(mm);
1691
1692 if (exact_vma_exists) {
1693 if (task_dumpable(task)) {
1694 rcu_read_lock();
1695 cred = __task_cred(task);
1696 inode->i_uid = cred->euid;
1697 inode->i_gid = cred->egid;
1698 rcu_read_unlock();
1699 } else {
1700 inode->i_uid = GLOBAL_ROOT_UID;
1701 inode->i_gid = GLOBAL_ROOT_GID;
1702 }
1703 security_task_to_inode(task, inode);
1704 status = 1;
1705 }
1706
1707 out:
1708 put_task_struct(task);
1709
1710 out_notask:
1711 return status;
1712 }
1713
1714 static const struct dentry_operations tid_map_files_dentry_operations = {
1715 .d_revalidate = map_files_d_revalidate,
1716 .d_delete = pid_delete_dentry,
1717 };
1718
proc_map_files_get_link(struct dentry * dentry,struct path * path)1719 static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
1720 {
1721 unsigned long vm_start, vm_end;
1722 struct vm_area_struct *vma;
1723 struct task_struct *task;
1724 struct mm_struct *mm;
1725 int rc;
1726
1727 rc = -ENOENT;
1728 task = get_proc_task(d_inode(dentry));
1729 if (!task)
1730 goto out;
1731
1732 mm = get_task_mm(task);
1733 put_task_struct(task);
1734 if (!mm)
1735 goto out;
1736
1737 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
1738 if (rc)
1739 goto out_mmput;
1740
1741 rc = -ENOENT;
1742 down_read(&mm->mmap_sem);
1743 vma = find_exact_vma(mm, vm_start, vm_end);
1744 if (vma && vma->vm_file) {
1745 *path = vma->vm_file->f_path;
1746 path_get(path);
1747 rc = 0;
1748 }
1749 up_read(&mm->mmap_sem);
1750
1751 out_mmput:
1752 mmput(mm);
1753 out:
1754 return rc;
1755 }
1756
1757 struct map_files_info {
1758 fmode_t mode;
1759 unsigned long len;
1760 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
1761 };
1762
1763 static int
proc_map_files_instantiate(struct inode * dir,struct dentry * dentry,struct task_struct * task,const void * ptr)1764 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
1765 struct task_struct *task, const void *ptr)
1766 {
1767 fmode_t mode = (fmode_t)(unsigned long)ptr;
1768 struct proc_inode *ei;
1769 struct inode *inode;
1770
1771 inode = proc_pid_make_inode(dir->i_sb, task);
1772 if (!inode)
1773 return -ENOENT;
1774
1775 ei = PROC_I(inode);
1776 ei->op.proc_get_link = proc_map_files_get_link;
1777
1778 inode->i_op = &proc_pid_link_inode_operations;
1779 inode->i_size = 64;
1780 inode->i_mode = S_IFLNK;
1781
1782 if (mode & FMODE_READ)
1783 inode->i_mode |= S_IRUSR;
1784 if (mode & FMODE_WRITE)
1785 inode->i_mode |= S_IWUSR;
1786
1787 d_set_d_op(dentry, &tid_map_files_dentry_operations);
1788 d_add(dentry, inode);
1789
1790 return 0;
1791 }
1792
proc_map_files_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)1793 static struct dentry *proc_map_files_lookup(struct inode *dir,
1794 struct dentry *dentry, unsigned int flags)
1795 {
1796 unsigned long vm_start, vm_end;
1797 struct vm_area_struct *vma;
1798 struct task_struct *task;
1799 int result;
1800 struct mm_struct *mm;
1801
1802 result = -EPERM;
1803 if (!capable(CAP_SYS_ADMIN))
1804 goto out;
1805
1806 result = -ENOENT;
1807 task = get_proc_task(dir);
1808 if (!task)
1809 goto out;
1810
1811 result = -EACCES;
1812 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
1813 goto out_put_task;
1814
1815 result = -ENOENT;
1816 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
1817 goto out_put_task;
1818
1819 mm = get_task_mm(task);
1820 if (!mm)
1821 goto out_put_task;
1822
1823 down_read(&mm->mmap_sem);
1824 vma = find_exact_vma(mm, vm_start, vm_end);
1825 if (!vma)
1826 goto out_no_vma;
1827
1828 if (vma->vm_file)
1829 result = proc_map_files_instantiate(dir, dentry, task,
1830 (void *)(unsigned long)vma->vm_file->f_mode);
1831
1832 out_no_vma:
1833 up_read(&mm->mmap_sem);
1834 mmput(mm);
1835 out_put_task:
1836 put_task_struct(task);
1837 out:
1838 return ERR_PTR(result);
1839 }
1840
1841 static const struct inode_operations proc_map_files_inode_operations = {
1842 .lookup = proc_map_files_lookup,
1843 .permission = proc_fd_permission,
1844 .setattr = proc_setattr,
1845 };
1846
1847 static int
proc_map_files_readdir(struct file * file,struct dir_context * ctx)1848 proc_map_files_readdir(struct file *file, struct dir_context *ctx)
1849 {
1850 struct vm_area_struct *vma;
1851 struct task_struct *task;
1852 struct mm_struct *mm;
1853 unsigned long nr_files, pos, i;
1854 struct flex_array *fa = NULL;
1855 struct map_files_info info;
1856 struct map_files_info *p;
1857 int ret;
1858
1859 ret = -EPERM;
1860 if (!capable(CAP_SYS_ADMIN))
1861 goto out;
1862
1863 ret = -ENOENT;
1864 task = get_proc_task(file_inode(file));
1865 if (!task)
1866 goto out;
1867
1868 ret = -EACCES;
1869 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
1870 goto out_put_task;
1871
1872 ret = 0;
1873 if (!dir_emit_dots(file, ctx))
1874 goto out_put_task;
1875
1876 mm = get_task_mm(task);
1877 if (!mm)
1878 goto out_put_task;
1879 down_read(&mm->mmap_sem);
1880
1881 nr_files = 0;
1882
1883 /*
1884 * We need two passes here:
1885 *
1886 * 1) Collect vmas of mapped files with mmap_sem taken
1887 * 2) Release mmap_sem and instantiate entries
1888 *
1889 * otherwise we get lockdep complained, since filldir()
1890 * routine might require mmap_sem taken in might_fault().
1891 */
1892
1893 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
1894 if (vma->vm_file && ++pos > ctx->pos)
1895 nr_files++;
1896 }
1897
1898 if (nr_files) {
1899 fa = flex_array_alloc(sizeof(info), nr_files,
1900 GFP_KERNEL);
1901 if (!fa || flex_array_prealloc(fa, 0, nr_files,
1902 GFP_KERNEL)) {
1903 ret = -ENOMEM;
1904 if (fa)
1905 flex_array_free(fa);
1906 up_read(&mm->mmap_sem);
1907 mmput(mm);
1908 goto out_put_task;
1909 }
1910 for (i = 0, vma = mm->mmap, pos = 2; vma;
1911 vma = vma->vm_next) {
1912 if (!vma->vm_file)
1913 continue;
1914 if (++pos <= ctx->pos)
1915 continue;
1916
1917 info.mode = vma->vm_file->f_mode;
1918 info.len = snprintf(info.name,
1919 sizeof(info.name), "%lx-%lx",
1920 vma->vm_start, vma->vm_end);
1921 if (flex_array_put(fa, i++, &info, GFP_KERNEL))
1922 BUG();
1923 }
1924 }
1925 up_read(&mm->mmap_sem);
1926
1927 for (i = 0; i < nr_files; i++) {
1928 p = flex_array_get(fa, i);
1929 if (!proc_fill_cache(file, ctx,
1930 p->name, p->len,
1931 proc_map_files_instantiate,
1932 task,
1933 (void *)(unsigned long)p->mode))
1934 break;
1935 ctx->pos++;
1936 }
1937 if (fa)
1938 flex_array_free(fa);
1939 mmput(mm);
1940
1941 out_put_task:
1942 put_task_struct(task);
1943 out:
1944 return ret;
1945 }
1946
1947 static const struct file_operations proc_map_files_operations = {
1948 .read = generic_read_dir,
1949 .iterate = proc_map_files_readdir,
1950 .llseek = default_llseek,
1951 };
1952
1953 struct timers_private {
1954 struct pid *pid;
1955 struct task_struct *task;
1956 struct sighand_struct *sighand;
1957 struct pid_namespace *ns;
1958 unsigned long flags;
1959 };
1960
timers_start(struct seq_file * m,loff_t * pos)1961 static void *timers_start(struct seq_file *m, loff_t *pos)
1962 {
1963 struct timers_private *tp = m->private;
1964
1965 tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
1966 if (!tp->task)
1967 return ERR_PTR(-ESRCH);
1968
1969 tp->sighand = lock_task_sighand(tp->task, &tp->flags);
1970 if (!tp->sighand)
1971 return ERR_PTR(-ESRCH);
1972
1973 return seq_list_start(&tp->task->signal->posix_timers, *pos);
1974 }
1975
timers_next(struct seq_file * m,void * v,loff_t * pos)1976 static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
1977 {
1978 struct timers_private *tp = m->private;
1979 return seq_list_next(v, &tp->task->signal->posix_timers, pos);
1980 }
1981
timers_stop(struct seq_file * m,void * v)1982 static void timers_stop(struct seq_file *m, void *v)
1983 {
1984 struct timers_private *tp = m->private;
1985
1986 if (tp->sighand) {
1987 unlock_task_sighand(tp->task, &tp->flags);
1988 tp->sighand = NULL;
1989 }
1990
1991 if (tp->task) {
1992 put_task_struct(tp->task);
1993 tp->task = NULL;
1994 }
1995 }
1996
show_timer(struct seq_file * m,void * v)1997 static int show_timer(struct seq_file *m, void *v)
1998 {
1999 struct k_itimer *timer;
2000 struct timers_private *tp = m->private;
2001 int notify;
2002 static const char * const nstr[] = {
2003 [SIGEV_SIGNAL] = "signal",
2004 [SIGEV_NONE] = "none",
2005 [SIGEV_THREAD] = "thread",
2006 };
2007
2008 timer = list_entry((struct list_head *)v, struct k_itimer, list);
2009 notify = timer->it_sigev_notify;
2010
2011 seq_printf(m, "ID: %d\n", timer->it_id);
2012 seq_printf(m, "signal: %d/%p\n",
2013 timer->sigq->info.si_signo,
2014 timer->sigq->info.si_value.sival_ptr);
2015 seq_printf(m, "notify: %s/%s.%d\n",
2016 nstr[notify & ~SIGEV_THREAD_ID],
2017 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2018 pid_nr_ns(timer->it_pid, tp->ns));
2019 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2020
2021 return 0;
2022 }
2023
2024 static const struct seq_operations proc_timers_seq_ops = {
2025 .start = timers_start,
2026 .next = timers_next,
2027 .stop = timers_stop,
2028 .show = show_timer,
2029 };
2030
proc_timers_open(struct inode * inode,struct file * file)2031 static int proc_timers_open(struct inode *inode, struct file *file)
2032 {
2033 struct timers_private *tp;
2034
2035 tp = __seq_open_private(file, &proc_timers_seq_ops,
2036 sizeof(struct timers_private));
2037 if (!tp)
2038 return -ENOMEM;
2039
2040 tp->pid = proc_pid(inode);
2041 tp->ns = inode->i_sb->s_fs_info;
2042 return 0;
2043 }
2044
2045 static const struct file_operations proc_timers_operations = {
2046 .open = proc_timers_open,
2047 .read = seq_read,
2048 .llseek = seq_lseek,
2049 .release = seq_release_private,
2050 };
2051 #endif /* CONFIG_CHECKPOINT_RESTORE */
2052
proc_pident_instantiate(struct inode * dir,struct dentry * dentry,struct task_struct * task,const void * ptr)2053 static int proc_pident_instantiate(struct inode *dir,
2054 struct dentry *dentry, struct task_struct *task, const void *ptr)
2055 {
2056 const struct pid_entry *p = ptr;
2057 struct inode *inode;
2058 struct proc_inode *ei;
2059
2060 inode = proc_pid_make_inode(dir->i_sb, task);
2061 if (!inode)
2062 goto out;
2063
2064 ei = PROC_I(inode);
2065 inode->i_mode = p->mode;
2066 if (S_ISDIR(inode->i_mode))
2067 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2068 if (p->iop)
2069 inode->i_op = p->iop;
2070 if (p->fop)
2071 inode->i_fop = p->fop;
2072 ei->op = p->op;
2073 d_set_d_op(dentry, &pid_dentry_operations);
2074 d_add(dentry, inode);
2075 /* Close the race of the process dying before we return the dentry */
2076 if (pid_revalidate(dentry, 0))
2077 return 0;
2078 out:
2079 return -ENOENT;
2080 }
2081
proc_pident_lookup(struct inode * dir,struct dentry * dentry,const struct pid_entry * ents,unsigned int nents)2082 static struct dentry *proc_pident_lookup(struct inode *dir,
2083 struct dentry *dentry,
2084 const struct pid_entry *ents,
2085 unsigned int nents)
2086 {
2087 int error;
2088 struct task_struct *task = get_proc_task(dir);
2089 const struct pid_entry *p, *last;
2090
2091 error = -ENOENT;
2092
2093 if (!task)
2094 goto out_no_task;
2095
2096 /*
2097 * Yes, it does not scale. And it should not. Don't add
2098 * new entries into /proc/<tgid>/ without very good reasons.
2099 */
2100 last = &ents[nents - 1];
2101 for (p = ents; p <= last; p++) {
2102 if (p->len != dentry->d_name.len)
2103 continue;
2104 if (!memcmp(dentry->d_name.name, p->name, p->len))
2105 break;
2106 }
2107 if (p > last)
2108 goto out;
2109
2110 error = proc_pident_instantiate(dir, dentry, task, p);
2111 out:
2112 put_task_struct(task);
2113 out_no_task:
2114 return ERR_PTR(error);
2115 }
2116
proc_pident_readdir(struct file * file,struct dir_context * ctx,const struct pid_entry * ents,unsigned int nents)2117 static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
2118 const struct pid_entry *ents, unsigned int nents)
2119 {
2120 struct task_struct *task = get_proc_task(file_inode(file));
2121 const struct pid_entry *p;
2122
2123 if (!task)
2124 return -ENOENT;
2125
2126 if (!dir_emit_dots(file, ctx))
2127 goto out;
2128
2129 if (ctx->pos >= nents + 2)
2130 goto out;
2131
2132 for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
2133 if (!proc_fill_cache(file, ctx, p->name, p->len,
2134 proc_pident_instantiate, task, p))
2135 break;
2136 ctx->pos++;
2137 }
2138 out:
2139 put_task_struct(task);
2140 return 0;
2141 }
2142
2143 #ifdef CONFIG_SECURITY
proc_pid_attr_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)2144 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2145 size_t count, loff_t *ppos)
2146 {
2147 struct inode * inode = file_inode(file);
2148 char *p = NULL;
2149 ssize_t length;
2150 struct task_struct *task = get_proc_task(inode);
2151
2152 if (!task)
2153 return -ESRCH;
2154
2155 length = security_getprocattr(task,
2156 (char*)file->f_path.dentry->d_name.name,
2157 &p);
2158 put_task_struct(task);
2159 if (length > 0)
2160 length = simple_read_from_buffer(buf, count, ppos, p, length);
2161 kfree(p);
2162 return length;
2163 }
2164
proc_pid_attr_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)2165 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2166 size_t count, loff_t *ppos)
2167 {
2168 struct inode * inode = file_inode(file);
2169 char *page;
2170 ssize_t length;
2171 struct task_struct *task = get_proc_task(inode);
2172
2173 length = -ESRCH;
2174 if (!task)
2175 goto out_no_task;
2176 if (count > PAGE_SIZE)
2177 count = PAGE_SIZE;
2178
2179 /* No partial writes. */
2180 length = -EINVAL;
2181 if (*ppos != 0)
2182 goto out;
2183
2184 length = -ENOMEM;
2185 page = (char*)__get_free_page(GFP_TEMPORARY);
2186 if (!page)
2187 goto out;
2188
2189 length = -EFAULT;
2190 if (copy_from_user(page, buf, count))
2191 goto out_free;
2192
2193 /* Guard against adverse ptrace interaction */
2194 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
2195 if (length < 0)
2196 goto out_free;
2197
2198 length = security_setprocattr(task,
2199 (char*)file->f_path.dentry->d_name.name,
2200 (void*)page, count);
2201 mutex_unlock(&task->signal->cred_guard_mutex);
2202 out_free:
2203 free_page((unsigned long) page);
2204 out:
2205 put_task_struct(task);
2206 out_no_task:
2207 return length;
2208 }
2209
2210 static const struct file_operations proc_pid_attr_operations = {
2211 .read = proc_pid_attr_read,
2212 .write = proc_pid_attr_write,
2213 .llseek = generic_file_llseek,
2214 };
2215
2216 static const struct pid_entry attr_dir_stuff[] = {
2217 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2218 REG("prev", S_IRUGO, proc_pid_attr_operations),
2219 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2220 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2221 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2222 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2223 };
2224
proc_attr_dir_readdir(struct file * file,struct dir_context * ctx)2225 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
2226 {
2227 return proc_pident_readdir(file, ctx,
2228 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2229 }
2230
2231 static const struct file_operations proc_attr_dir_operations = {
2232 .read = generic_read_dir,
2233 .iterate = proc_attr_dir_readdir,
2234 .llseek = default_llseek,
2235 };
2236
proc_attr_dir_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)2237 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2238 struct dentry *dentry, unsigned int flags)
2239 {
2240 return proc_pident_lookup(dir, dentry,
2241 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2242 }
2243
2244 static const struct inode_operations proc_attr_dir_inode_operations = {
2245 .lookup = proc_attr_dir_lookup,
2246 .getattr = pid_getattr,
2247 .setattr = proc_setattr,
2248 };
2249
2250 #endif
2251
2252 #ifdef CONFIG_ELF_CORE
proc_coredump_filter_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)2253 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2254 size_t count, loff_t *ppos)
2255 {
2256 struct task_struct *task = get_proc_task(file_inode(file));
2257 struct mm_struct *mm;
2258 char buffer[PROC_NUMBUF];
2259 size_t len;
2260 int ret;
2261
2262 if (!task)
2263 return -ESRCH;
2264
2265 ret = 0;
2266 mm = get_task_mm(task);
2267 if (mm) {
2268 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
2269 ((mm->flags & MMF_DUMP_FILTER_MASK) >>
2270 MMF_DUMP_FILTER_SHIFT));
2271 mmput(mm);
2272 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
2273 }
2274
2275 put_task_struct(task);
2276
2277 return ret;
2278 }
2279
proc_coredump_filter_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)2280 static ssize_t proc_coredump_filter_write(struct file *file,
2281 const char __user *buf,
2282 size_t count,
2283 loff_t *ppos)
2284 {
2285 struct task_struct *task;
2286 struct mm_struct *mm;
2287 char buffer[PROC_NUMBUF], *end;
2288 unsigned int val;
2289 int ret;
2290 int i;
2291 unsigned long mask;
2292
2293 ret = -EFAULT;
2294 memset(buffer, 0, sizeof(buffer));
2295 if (count > sizeof(buffer) - 1)
2296 count = sizeof(buffer) - 1;
2297 if (copy_from_user(buffer, buf, count))
2298 goto out_no_task;
2299
2300 ret = -EINVAL;
2301 val = (unsigned int)simple_strtoul(buffer, &end, 0);
2302 if (*end == '\n')
2303 end++;
2304 if (end - buffer == 0)
2305 goto out_no_task;
2306
2307 ret = -ESRCH;
2308 task = get_proc_task(file_inode(file));
2309 if (!task)
2310 goto out_no_task;
2311
2312 ret = end - buffer;
2313 mm = get_task_mm(task);
2314 if (!mm)
2315 goto out_no_mm;
2316
2317 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
2318 if (val & mask)
2319 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2320 else
2321 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
2322 }
2323
2324 mmput(mm);
2325 out_no_mm:
2326 put_task_struct(task);
2327 out_no_task:
2328 return ret;
2329 }
2330
2331 static const struct file_operations proc_coredump_filter_operations = {
2332 .read = proc_coredump_filter_read,
2333 .write = proc_coredump_filter_write,
2334 .llseek = generic_file_llseek,
2335 };
2336 #endif
2337
2338 #ifdef CONFIG_TASK_IO_ACCOUNTING
do_io_accounting(struct task_struct * task,struct seq_file * m,int whole)2339 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
2340 {
2341 struct task_io_accounting acct = task->ioac;
2342 unsigned long flags;
2343 int result;
2344
2345 result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2346 if (result)
2347 return result;
2348
2349 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
2350 result = -EACCES;
2351 goto out_unlock;
2352 }
2353
2354 if (whole && lock_task_sighand(task, &flags)) {
2355 struct task_struct *t = task;
2356
2357 task_io_accounting_add(&acct, &task->signal->ioac);
2358 while_each_thread(task, t)
2359 task_io_accounting_add(&acct, &t->ioac);
2360
2361 unlock_task_sighand(task, &flags);
2362 }
2363 seq_printf(m,
2364 "rchar: %llu\n"
2365 "wchar: %llu\n"
2366 "syscr: %llu\n"
2367 "syscw: %llu\n"
2368 "read_bytes: %llu\n"
2369 "write_bytes: %llu\n"
2370 "cancelled_write_bytes: %llu\n",
2371 (unsigned long long)acct.rchar,
2372 (unsigned long long)acct.wchar,
2373 (unsigned long long)acct.syscr,
2374 (unsigned long long)acct.syscw,
2375 (unsigned long long)acct.read_bytes,
2376 (unsigned long long)acct.write_bytes,
2377 (unsigned long long)acct.cancelled_write_bytes);
2378 result = 0;
2379
2380 out_unlock:
2381 mutex_unlock(&task->signal->cred_guard_mutex);
2382 return result;
2383 }
2384
proc_tid_io_accounting(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)2385 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2386 struct pid *pid, struct task_struct *task)
2387 {
2388 return do_io_accounting(task, m, 0);
2389 }
2390
proc_tgid_io_accounting(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)2391 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2392 struct pid *pid, struct task_struct *task)
2393 {
2394 return do_io_accounting(task, m, 1);
2395 }
2396 #endif /* CONFIG_TASK_IO_ACCOUNTING */
2397
2398 #ifdef CONFIG_USER_NS
proc_id_map_open(struct inode * inode,struct file * file,const struct seq_operations * seq_ops)2399 static int proc_id_map_open(struct inode *inode, struct file *file,
2400 const struct seq_operations *seq_ops)
2401 {
2402 struct user_namespace *ns = NULL;
2403 struct task_struct *task;
2404 struct seq_file *seq;
2405 int ret = -EINVAL;
2406
2407 task = get_proc_task(inode);
2408 if (task) {
2409 rcu_read_lock();
2410 ns = get_user_ns(task_cred_xxx(task, user_ns));
2411 rcu_read_unlock();
2412 put_task_struct(task);
2413 }
2414 if (!ns)
2415 goto err;
2416
2417 ret = seq_open(file, seq_ops);
2418 if (ret)
2419 goto err_put_ns;
2420
2421 seq = file->private_data;
2422 seq->private = ns;
2423
2424 return 0;
2425 err_put_ns:
2426 put_user_ns(ns);
2427 err:
2428 return ret;
2429 }
2430
proc_id_map_release(struct inode * inode,struct file * file)2431 static int proc_id_map_release(struct inode *inode, struct file *file)
2432 {
2433 struct seq_file *seq = file->private_data;
2434 struct user_namespace *ns = seq->private;
2435 put_user_ns(ns);
2436 return seq_release(inode, file);
2437 }
2438
proc_uid_map_open(struct inode * inode,struct file * file)2439 static int proc_uid_map_open(struct inode *inode, struct file *file)
2440 {
2441 return proc_id_map_open(inode, file, &proc_uid_seq_operations);
2442 }
2443
proc_gid_map_open(struct inode * inode,struct file * file)2444 static int proc_gid_map_open(struct inode *inode, struct file *file)
2445 {
2446 return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2447 }
2448
proc_projid_map_open(struct inode * inode,struct file * file)2449 static int proc_projid_map_open(struct inode *inode, struct file *file)
2450 {
2451 return proc_id_map_open(inode, file, &proc_projid_seq_operations);
2452 }
2453
2454 static const struct file_operations proc_uid_map_operations = {
2455 .open = proc_uid_map_open,
2456 .write = proc_uid_map_write,
2457 .read = seq_read,
2458 .llseek = seq_lseek,
2459 .release = proc_id_map_release,
2460 };
2461
2462 static const struct file_operations proc_gid_map_operations = {
2463 .open = proc_gid_map_open,
2464 .write = proc_gid_map_write,
2465 .read = seq_read,
2466 .llseek = seq_lseek,
2467 .release = proc_id_map_release,
2468 };
2469
2470 static const struct file_operations proc_projid_map_operations = {
2471 .open = proc_projid_map_open,
2472 .write = proc_projid_map_write,
2473 .read = seq_read,
2474 .llseek = seq_lseek,
2475 .release = proc_id_map_release,
2476 };
2477
proc_setgroups_open(struct inode * inode,struct file * file)2478 static int proc_setgroups_open(struct inode *inode, struct file *file)
2479 {
2480 struct user_namespace *ns = NULL;
2481 struct task_struct *task;
2482 int ret;
2483
2484 ret = -ESRCH;
2485 task = get_proc_task(inode);
2486 if (task) {
2487 rcu_read_lock();
2488 ns = get_user_ns(task_cred_xxx(task, user_ns));
2489 rcu_read_unlock();
2490 put_task_struct(task);
2491 }
2492 if (!ns)
2493 goto err;
2494
2495 if (file->f_mode & FMODE_WRITE) {
2496 ret = -EACCES;
2497 if (!ns_capable(ns, CAP_SYS_ADMIN))
2498 goto err_put_ns;
2499 }
2500
2501 ret = single_open(file, &proc_setgroups_show, ns);
2502 if (ret)
2503 goto err_put_ns;
2504
2505 return 0;
2506 err_put_ns:
2507 put_user_ns(ns);
2508 err:
2509 return ret;
2510 }
2511
proc_setgroups_release(struct inode * inode,struct file * file)2512 static int proc_setgroups_release(struct inode *inode, struct file *file)
2513 {
2514 struct seq_file *seq = file->private_data;
2515 struct user_namespace *ns = seq->private;
2516 int ret = single_release(inode, file);
2517 put_user_ns(ns);
2518 return ret;
2519 }
2520
2521 static const struct file_operations proc_setgroups_operations = {
2522 .open = proc_setgroups_open,
2523 .write = proc_setgroups_write,
2524 .read = seq_read,
2525 .llseek = seq_lseek,
2526 .release = proc_setgroups_release,
2527 };
2528 #endif /* CONFIG_USER_NS */
2529
proc_pid_personality(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)2530 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2531 struct pid *pid, struct task_struct *task)
2532 {
2533 int err = lock_trace(task);
2534 if (!err) {
2535 seq_printf(m, "%08x\n", task->personality);
2536 unlock_trace(task);
2537 }
2538 return err;
2539 }
2540
2541 /*
2542 * Thread groups
2543 */
2544 static const struct file_operations proc_task_operations;
2545 static const struct inode_operations proc_task_inode_operations;
2546
2547 static const struct pid_entry tgid_base_stuff[] = {
2548 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2549 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2550 #ifdef CONFIG_CHECKPOINT_RESTORE
2551 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
2552 #endif
2553 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2554 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2555 #ifdef CONFIG_NET
2556 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2557 #endif
2558 REG("environ", S_IRUSR, proc_environ_operations),
2559 ONE("auxv", S_IRUSR, proc_pid_auxv),
2560 ONE("status", S_IRUGO, proc_pid_status),
2561 ONE("personality", S_IRUSR, proc_pid_personality),
2562 ONE("limits", S_IRUGO, proc_pid_limits),
2563 #ifdef CONFIG_SCHED_DEBUG
2564 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2565 #endif
2566 #ifdef CONFIG_SCHED_AUTOGROUP
2567 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2568 #endif
2569 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2570 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2571 ONE("syscall", S_IRUSR, proc_pid_syscall),
2572 #endif
2573 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2574 ONE("stat", S_IRUGO, proc_tgid_stat),
2575 ONE("statm", S_IRUGO, proc_pid_statm),
2576 REG("maps", S_IRUGO, proc_pid_maps_operations),
2577 #ifdef CONFIG_NUMA
2578 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
2579 #endif
2580 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2581 LNK("cwd", proc_cwd_link),
2582 LNK("root", proc_root_link),
2583 LNK("exe", proc_exe_link),
2584 REG("mounts", S_IRUGO, proc_mounts_operations),
2585 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
2586 REG("mountstats", S_IRUSR, proc_mountstats_operations),
2587 #ifdef CONFIG_PROC_PAGE_MONITOR
2588 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2589 REG("smaps", S_IRUGO, proc_pid_smaps_operations),
2590 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2591 #endif
2592 #ifdef CONFIG_SECURITY
2593 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2594 #endif
2595 #ifdef CONFIG_KALLSYMS
2596 ONE("wchan", S_IRUGO, proc_pid_wchan),
2597 #endif
2598 #ifdef CONFIG_STACKTRACE
2599 ONE("stack", S_IRUSR, proc_pid_stack),
2600 #endif
2601 #ifdef CONFIG_SCHEDSTATS
2602 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2603 #endif
2604 #ifdef CONFIG_LATENCYTOP
2605 REG("latency", S_IRUGO, proc_lstats_operations),
2606 #endif
2607 #ifdef CONFIG_PROC_PID_CPUSET
2608 ONE("cpuset", S_IRUGO, proc_cpuset_show),
2609 #endif
2610 #ifdef CONFIG_CGROUPS
2611 ONE("cgroup", S_IRUGO, proc_cgroup_show),
2612 #endif
2613 ONE("oom_score", S_IRUGO, proc_oom_score),
2614 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2615 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2616 #ifdef CONFIG_AUDITSYSCALL
2617 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
2618 REG("sessionid", S_IRUGO, proc_sessionid_operations),
2619 #endif
2620 #ifdef CONFIG_FAULT_INJECTION
2621 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2622 #endif
2623 #ifdef CONFIG_ELF_CORE
2624 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2625 #endif
2626 #ifdef CONFIG_TASK_IO_ACCOUNTING
2627 ONE("io", S_IRUSR, proc_tgid_io_accounting),
2628 #endif
2629 #ifdef CONFIG_HARDWALL
2630 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2631 #endif
2632 #ifdef CONFIG_USER_NS
2633 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2634 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2635 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2636 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2637 #endif
2638 #ifdef CONFIG_CHECKPOINT_RESTORE
2639 REG("timers", S_IRUGO, proc_timers_operations),
2640 #endif
2641 };
2642
proc_tgid_base_readdir(struct file * file,struct dir_context * ctx)2643 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
2644 {
2645 return proc_pident_readdir(file, ctx,
2646 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
2647 }
2648
2649 static const struct file_operations proc_tgid_base_operations = {
2650 .read = generic_read_dir,
2651 .iterate = proc_tgid_base_readdir,
2652 .llseek = default_llseek,
2653 };
2654
proc_tgid_base_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)2655 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2656 {
2657 return proc_pident_lookup(dir, dentry,
2658 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
2659 }
2660
2661 static const struct inode_operations proc_tgid_base_inode_operations = {
2662 .lookup = proc_tgid_base_lookup,
2663 .getattr = pid_getattr,
2664 .setattr = proc_setattr,
2665 .permission = proc_pid_permission,
2666 };
2667
proc_flush_task_mnt(struct vfsmount * mnt,pid_t pid,pid_t tgid)2668 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2669 {
2670 struct dentry *dentry, *leader, *dir;
2671 char buf[PROC_NUMBUF];
2672 struct qstr name;
2673
2674 name.name = buf;
2675 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2676 /* no ->d_hash() rejects on procfs */
2677 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2678 if (dentry) {
2679 d_invalidate(dentry);
2680 dput(dentry);
2681 }
2682
2683 if (pid == tgid)
2684 return;
2685
2686 name.name = buf;
2687 name.len = snprintf(buf, sizeof(buf), "%d", tgid);
2688 leader = d_hash_and_lookup(mnt->mnt_root, &name);
2689 if (!leader)
2690 goto out;
2691
2692 name.name = "task";
2693 name.len = strlen(name.name);
2694 dir = d_hash_and_lookup(leader, &name);
2695 if (!dir)
2696 goto out_put_leader;
2697
2698 name.name = buf;
2699 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2700 dentry = d_hash_and_lookup(dir, &name);
2701 if (dentry) {
2702 d_invalidate(dentry);
2703 dput(dentry);
2704 }
2705
2706 dput(dir);
2707 out_put_leader:
2708 dput(leader);
2709 out:
2710 return;
2711 }
2712
2713 /**
2714 * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
2715 * @task: task that should be flushed.
2716 *
2717 * When flushing dentries from proc, one needs to flush them from global
2718 * proc (proc_mnt) and from all the namespaces' procs this task was seen
2719 * in. This call is supposed to do all of this job.
2720 *
2721 * Looks in the dcache for
2722 * /proc/@pid
2723 * /proc/@tgid/task/@pid
2724 * if either directory is present flushes it and all of it'ts children
2725 * from the dcache.
2726 *
2727 * It is safe and reasonable to cache /proc entries for a task until
2728 * that task exits. After that they just clog up the dcache with
2729 * useless entries, possibly causing useful dcache entries to be
2730 * flushed instead. This routine is proved to flush those useless
2731 * dcache entries at process exit time.
2732 *
2733 * NOTE: This routine is just an optimization so it does not guarantee
2734 * that no dcache entries will exist at process exit time it
2735 * just makes it very unlikely that any will persist.
2736 */
2737
proc_flush_task(struct task_struct * task)2738 void proc_flush_task(struct task_struct *task)
2739 {
2740 int i;
2741 struct pid *pid, *tgid;
2742 struct upid *upid;
2743
2744 pid = task_pid(task);
2745 tgid = task_tgid(task);
2746
2747 for (i = 0; i <= pid->level; i++) {
2748 upid = &pid->numbers[i];
2749 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2750 tgid->numbers[i].nr);
2751 }
2752 }
2753
proc_pid_instantiate(struct inode * dir,struct dentry * dentry,struct task_struct * task,const void * ptr)2754 static int proc_pid_instantiate(struct inode *dir,
2755 struct dentry * dentry,
2756 struct task_struct *task, const void *ptr)
2757 {
2758 struct inode *inode;
2759
2760 inode = proc_pid_make_inode(dir->i_sb, task);
2761 if (!inode)
2762 goto out;
2763
2764 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2765 inode->i_op = &proc_tgid_base_inode_operations;
2766 inode->i_fop = &proc_tgid_base_operations;
2767 inode->i_flags|=S_IMMUTABLE;
2768
2769 set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
2770 ARRAY_SIZE(tgid_base_stuff)));
2771
2772 d_set_d_op(dentry, &pid_dentry_operations);
2773
2774 d_add(dentry, inode);
2775 /* Close the race of the process dying before we return the dentry */
2776 if (pid_revalidate(dentry, 0))
2777 return 0;
2778 out:
2779 return -ENOENT;
2780 }
2781
proc_pid_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)2782 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2783 {
2784 int result = -ENOENT;
2785 struct task_struct *task;
2786 unsigned tgid;
2787 struct pid_namespace *ns;
2788
2789 tgid = name_to_int(&dentry->d_name);
2790 if (tgid == ~0U)
2791 goto out;
2792
2793 ns = dentry->d_sb->s_fs_info;
2794 rcu_read_lock();
2795 task = find_task_by_pid_ns(tgid, ns);
2796 if (task)
2797 get_task_struct(task);
2798 rcu_read_unlock();
2799 if (!task)
2800 goto out;
2801
2802 result = proc_pid_instantiate(dir, dentry, task, NULL);
2803 put_task_struct(task);
2804 out:
2805 return ERR_PTR(result);
2806 }
2807
2808 /*
2809 * Find the first task with tgid >= tgid
2810 *
2811 */
2812 struct tgid_iter {
2813 unsigned int tgid;
2814 struct task_struct *task;
2815 };
next_tgid(struct pid_namespace * ns,struct tgid_iter iter)2816 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
2817 {
2818 struct pid *pid;
2819
2820 if (iter.task)
2821 put_task_struct(iter.task);
2822 rcu_read_lock();
2823 retry:
2824 iter.task = NULL;
2825 pid = find_ge_pid(iter.tgid, ns);
2826 if (pid) {
2827 iter.tgid = pid_nr_ns(pid, ns);
2828 iter.task = pid_task(pid, PIDTYPE_PID);
2829 /* What we to know is if the pid we have find is the
2830 * pid of a thread_group_leader. Testing for task
2831 * being a thread_group_leader is the obvious thing
2832 * todo but there is a window when it fails, due to
2833 * the pid transfer logic in de_thread.
2834 *
2835 * So we perform the straight forward test of seeing
2836 * if the pid we have found is the pid of a thread
2837 * group leader, and don't worry if the task we have
2838 * found doesn't happen to be a thread group leader.
2839 * As we don't care in the case of readdir.
2840 */
2841 if (!iter.task || !has_group_leader_pid(iter.task)) {
2842 iter.tgid += 1;
2843 goto retry;
2844 }
2845 get_task_struct(iter.task);
2846 }
2847 rcu_read_unlock();
2848 return iter;
2849 }
2850
2851 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
2852
2853 /* for the /proc/ directory itself, after non-process stuff has been done */
proc_pid_readdir(struct file * file,struct dir_context * ctx)2854 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
2855 {
2856 struct tgid_iter iter;
2857 struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
2858 loff_t pos = ctx->pos;
2859
2860 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
2861 return 0;
2862
2863 if (pos == TGID_OFFSET - 2) {
2864 struct inode *inode = d_inode(ns->proc_self);
2865 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
2866 return 0;
2867 ctx->pos = pos = pos + 1;
2868 }
2869 if (pos == TGID_OFFSET - 1) {
2870 struct inode *inode = d_inode(ns->proc_thread_self);
2871 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
2872 return 0;
2873 ctx->pos = pos = pos + 1;
2874 }
2875 iter.tgid = pos - TGID_OFFSET;
2876 iter.task = NULL;
2877 for (iter = next_tgid(ns, iter);
2878 iter.task;
2879 iter.tgid += 1, iter = next_tgid(ns, iter)) {
2880 char name[PROC_NUMBUF];
2881 int len;
2882 if (!has_pid_permissions(ns, iter.task, 2))
2883 continue;
2884
2885 len = snprintf(name, sizeof(name), "%d", iter.tgid);
2886 ctx->pos = iter.tgid + TGID_OFFSET;
2887 if (!proc_fill_cache(file, ctx, name, len,
2888 proc_pid_instantiate, iter.task, NULL)) {
2889 put_task_struct(iter.task);
2890 return 0;
2891 }
2892 }
2893 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
2894 return 0;
2895 }
2896
2897 /*
2898 * Tasks
2899 */
2900 static const struct pid_entry tid_base_stuff[] = {
2901 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2902 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2903 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2904 #ifdef CONFIG_NET
2905 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2906 #endif
2907 REG("environ", S_IRUSR, proc_environ_operations),
2908 ONE("auxv", S_IRUSR, proc_pid_auxv),
2909 ONE("status", S_IRUGO, proc_pid_status),
2910 ONE("personality", S_IRUSR, proc_pid_personality),
2911 ONE("limits", S_IRUGO, proc_pid_limits),
2912 #ifdef CONFIG_SCHED_DEBUG
2913 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2914 #endif
2915 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2916 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2917 ONE("syscall", S_IRUSR, proc_pid_syscall),
2918 #endif
2919 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2920 ONE("stat", S_IRUGO, proc_tid_stat),
2921 ONE("statm", S_IRUGO, proc_pid_statm),
2922 REG("maps", S_IRUGO, proc_tid_maps_operations),
2923 #ifdef CONFIG_CHECKPOINT_RESTORE
2924 REG("children", S_IRUGO, proc_tid_children_operations),
2925 #endif
2926 #ifdef CONFIG_NUMA
2927 REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
2928 #endif
2929 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
2930 LNK("cwd", proc_cwd_link),
2931 LNK("root", proc_root_link),
2932 LNK("exe", proc_exe_link),
2933 REG("mounts", S_IRUGO, proc_mounts_operations),
2934 REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
2935 #ifdef CONFIG_PROC_PAGE_MONITOR
2936 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2937 REG("smaps", S_IRUGO, proc_tid_smaps_operations),
2938 REG("pagemap", S_IRUSR, proc_pagemap_operations),
2939 #endif
2940 #ifdef CONFIG_SECURITY
2941 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2942 #endif
2943 #ifdef CONFIG_KALLSYMS
2944 ONE("wchan", S_IRUGO, proc_pid_wchan),
2945 #endif
2946 #ifdef CONFIG_STACKTRACE
2947 ONE("stack", S_IRUSR, proc_pid_stack),
2948 #endif
2949 #ifdef CONFIG_SCHEDSTATS
2950 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2951 #endif
2952 #ifdef CONFIG_LATENCYTOP
2953 REG("latency", S_IRUGO, proc_lstats_operations),
2954 #endif
2955 #ifdef CONFIG_PROC_PID_CPUSET
2956 ONE("cpuset", S_IRUGO, proc_cpuset_show),
2957 #endif
2958 #ifdef CONFIG_CGROUPS
2959 ONE("cgroup", S_IRUGO, proc_cgroup_show),
2960 #endif
2961 ONE("oom_score", S_IRUGO, proc_oom_score),
2962 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2963 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2964 #ifdef CONFIG_AUDITSYSCALL
2965 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
2966 REG("sessionid", S_IRUGO, proc_sessionid_operations),
2967 #endif
2968 #ifdef CONFIG_FAULT_INJECTION
2969 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2970 #endif
2971 #ifdef CONFIG_TASK_IO_ACCOUNTING
2972 ONE("io", S_IRUSR, proc_tid_io_accounting),
2973 #endif
2974 #ifdef CONFIG_HARDWALL
2975 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2976 #endif
2977 #ifdef CONFIG_USER_NS
2978 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
2979 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2980 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
2981 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
2982 #endif
2983 };
2984
proc_tid_base_readdir(struct file * file,struct dir_context * ctx)2985 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
2986 {
2987 return proc_pident_readdir(file, ctx,
2988 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
2989 }
2990
proc_tid_base_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)2991 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2992 {
2993 return proc_pident_lookup(dir, dentry,
2994 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
2995 }
2996
2997 static const struct file_operations proc_tid_base_operations = {
2998 .read = generic_read_dir,
2999 .iterate = proc_tid_base_readdir,
3000 .llseek = default_llseek,
3001 };
3002
3003 static const struct inode_operations proc_tid_base_inode_operations = {
3004 .lookup = proc_tid_base_lookup,
3005 .getattr = pid_getattr,
3006 .setattr = proc_setattr,
3007 };
3008
proc_task_instantiate(struct inode * dir,struct dentry * dentry,struct task_struct * task,const void * ptr)3009 static int proc_task_instantiate(struct inode *dir,
3010 struct dentry *dentry, struct task_struct *task, const void *ptr)
3011 {
3012 struct inode *inode;
3013 inode = proc_pid_make_inode(dir->i_sb, task);
3014
3015 if (!inode)
3016 goto out;
3017 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
3018 inode->i_op = &proc_tid_base_inode_operations;
3019 inode->i_fop = &proc_tid_base_operations;
3020 inode->i_flags|=S_IMMUTABLE;
3021
3022 set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3023 ARRAY_SIZE(tid_base_stuff)));
3024
3025 d_set_d_op(dentry, &pid_dentry_operations);
3026
3027 d_add(dentry, inode);
3028 /* Close the race of the process dying before we return the dentry */
3029 if (pid_revalidate(dentry, 0))
3030 return 0;
3031 out:
3032 return -ENOENT;
3033 }
3034
proc_task_lookup(struct inode * dir,struct dentry * dentry,unsigned int flags)3035 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3036 {
3037 int result = -ENOENT;
3038 struct task_struct *task;
3039 struct task_struct *leader = get_proc_task(dir);
3040 unsigned tid;
3041 struct pid_namespace *ns;
3042
3043 if (!leader)
3044 goto out_no_task;
3045
3046 tid = name_to_int(&dentry->d_name);
3047 if (tid == ~0U)
3048 goto out;
3049
3050 ns = dentry->d_sb->s_fs_info;
3051 rcu_read_lock();
3052 task = find_task_by_pid_ns(tid, ns);
3053 if (task)
3054 get_task_struct(task);
3055 rcu_read_unlock();
3056 if (!task)
3057 goto out;
3058 if (!same_thread_group(leader, task))
3059 goto out_drop_task;
3060
3061 result = proc_task_instantiate(dir, dentry, task, NULL);
3062 out_drop_task:
3063 put_task_struct(task);
3064 out:
3065 put_task_struct(leader);
3066 out_no_task:
3067 return ERR_PTR(result);
3068 }
3069
3070 /*
3071 * Find the first tid of a thread group to return to user space.
3072 *
3073 * Usually this is just the thread group leader, but if the users
3074 * buffer was too small or there was a seek into the middle of the
3075 * directory we have more work todo.
3076 *
3077 * In the case of a short read we start with find_task_by_pid.
3078 *
3079 * In the case of a seek we start with the leader and walk nr
3080 * threads past it.
3081 */
first_tid(struct pid * pid,int tid,loff_t f_pos,struct pid_namespace * ns)3082 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
3083 struct pid_namespace *ns)
3084 {
3085 struct task_struct *pos, *task;
3086 unsigned long nr = f_pos;
3087
3088 if (nr != f_pos) /* 32bit overflow? */
3089 return NULL;
3090
3091 rcu_read_lock();
3092 task = pid_task(pid, PIDTYPE_PID);
3093 if (!task)
3094 goto fail;
3095
3096 /* Attempt to start with the tid of a thread */
3097 if (tid && nr) {
3098 pos = find_task_by_pid_ns(tid, ns);
3099 if (pos && same_thread_group(pos, task))
3100 goto found;
3101 }
3102
3103 /* If nr exceeds the number of threads there is nothing todo */
3104 if (nr >= get_nr_threads(task))
3105 goto fail;
3106
3107 /* If we haven't found our starting place yet start
3108 * with the leader and walk nr threads forward.
3109 */
3110 pos = task = task->group_leader;
3111 do {
3112 if (!nr--)
3113 goto found;
3114 } while_each_thread(task, pos);
3115 fail:
3116 pos = NULL;
3117 goto out;
3118 found:
3119 get_task_struct(pos);
3120 out:
3121 rcu_read_unlock();
3122 return pos;
3123 }
3124
3125 /*
3126 * Find the next thread in the thread list.
3127 * Return NULL if there is an error or no next thread.
3128 *
3129 * The reference to the input task_struct is released.
3130 */
next_tid(struct task_struct * start)3131 static struct task_struct *next_tid(struct task_struct *start)
3132 {
3133 struct task_struct *pos = NULL;
3134 rcu_read_lock();
3135 if (pid_alive(start)) {
3136 pos = next_thread(start);
3137 if (thread_group_leader(pos))
3138 pos = NULL;
3139 else
3140 get_task_struct(pos);
3141 }
3142 rcu_read_unlock();
3143 put_task_struct(start);
3144 return pos;
3145 }
3146
3147 /* for the /proc/TGID/task/ directories */
proc_task_readdir(struct file * file,struct dir_context * ctx)3148 static int proc_task_readdir(struct file *file, struct dir_context *ctx)
3149 {
3150 struct inode *inode = file_inode(file);
3151 struct task_struct *task;
3152 struct pid_namespace *ns;
3153 int tid;
3154
3155 if (proc_inode_is_dead(inode))
3156 return -ENOENT;
3157
3158 if (!dir_emit_dots(file, ctx))
3159 return 0;
3160
3161 /* f_version caches the tgid value that the last readdir call couldn't
3162 * return. lseek aka telldir automagically resets f_version to 0.
3163 */
3164 ns = inode->i_sb->s_fs_info;
3165 tid = (int)file->f_version;
3166 file->f_version = 0;
3167 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
3168 task;
3169 task = next_tid(task), ctx->pos++) {
3170 char name[PROC_NUMBUF];
3171 int len;
3172 tid = task_pid_nr_ns(task, ns);
3173 len = snprintf(name, sizeof(name), "%d", tid);
3174 if (!proc_fill_cache(file, ctx, name, len,
3175 proc_task_instantiate, task, NULL)) {
3176 /* returning this tgid failed, save it as the first
3177 * pid for the next readir call */
3178 file->f_version = (u64)tid;
3179 put_task_struct(task);
3180 break;
3181 }
3182 }
3183
3184 return 0;
3185 }
3186
proc_task_getattr(struct vfsmount * mnt,struct dentry * dentry,struct kstat * stat)3187 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
3188 {
3189 struct inode *inode = d_inode(dentry);
3190 struct task_struct *p = get_proc_task(inode);
3191 generic_fillattr(inode, stat);
3192
3193 if (p) {
3194 stat->nlink += get_nr_threads(p);
3195 put_task_struct(p);
3196 }
3197
3198 return 0;
3199 }
3200
3201 static const struct inode_operations proc_task_inode_operations = {
3202 .lookup = proc_task_lookup,
3203 .getattr = proc_task_getattr,
3204 .setattr = proc_setattr,
3205 .permission = proc_pid_permission,
3206 };
3207
3208 static const struct file_operations proc_task_operations = {
3209 .read = generic_read_dir,
3210 .iterate = proc_task_readdir,
3211 .llseek = default_llseek,
3212 };
3213