1/*
2 *  linux/fs/proc/inode.c
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 */
6
7#include <linux/time.h>
8#include <linux/proc_fs.h>
9#include <linux/kernel.h>
10#include <linux/pid_namespace.h>
11#include <linux/mm.h>
12#include <linux/string.h>
13#include <linux/stat.h>
14#include <linux/completion.h>
15#include <linux/poll.h>
16#include <linux/printk.h>
17#include <linux/file.h>
18#include <linux/limits.h>
19#include <linux/init.h>
20#include <linux/module.h>
21#include <linux/sysctl.h>
22#include <linux/seq_file.h>
23#include <linux/slab.h>
24#include <linux/mount.h>
25#include <linux/magic.h>
26
27#include <asm/uaccess.h>
28
29#include "internal.h"
30
31static void proc_evict_inode(struct inode *inode)
32{
33	struct proc_dir_entry *de;
34	struct ctl_table_header *head;
35
36	truncate_inode_pages_final(&inode->i_data);
37	clear_inode(inode);
38
39	/* Stop tracking associated processes */
40	put_pid(PROC_I(inode)->pid);
41
42	/* Let go of any associated proc directory entry */
43	de = PDE(inode);
44	if (de)
45		pde_put(de);
46	head = PROC_I(inode)->sysctl;
47	if (head) {
48		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
49		sysctl_head_put(head);
50	}
51}
52
53static struct kmem_cache * proc_inode_cachep;
54
55static struct inode *proc_alloc_inode(struct super_block *sb)
56{
57	struct proc_inode *ei;
58	struct inode *inode;
59
60	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
61	if (!ei)
62		return NULL;
63	ei->pid = NULL;
64	ei->fd = 0;
65	ei->op.proc_get_link = NULL;
66	ei->pde = NULL;
67	ei->sysctl = NULL;
68	ei->sysctl_entry = NULL;
69	ei->ns_ops = NULL;
70	inode = &ei->vfs_inode;
71	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
72	return inode;
73}
74
75static void proc_i_callback(struct rcu_head *head)
76{
77	struct inode *inode = container_of(head, struct inode, i_rcu);
78	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
79}
80
81static void proc_destroy_inode(struct inode *inode)
82{
83	call_rcu(&inode->i_rcu, proc_i_callback);
84}
85
86static void init_once(void *foo)
87{
88	struct proc_inode *ei = (struct proc_inode *) foo;
89
90	inode_init_once(&ei->vfs_inode);
91}
92
93void __init proc_init_inodecache(void)
94{
95	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
96					     sizeof(struct proc_inode),
97					     0, (SLAB_RECLAIM_ACCOUNT|
98						SLAB_MEM_SPREAD|SLAB_PANIC),
99					     init_once);
100}
101
102static int proc_show_options(struct seq_file *seq, struct dentry *root)
103{
104	struct super_block *sb = root->d_sb;
105	struct pid_namespace *pid = sb->s_fs_info;
106
107	if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
108		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
109	if (pid->hide_pid != 0)
110		seq_printf(seq, ",hidepid=%u", pid->hide_pid);
111
112	return 0;
113}
114
115static const struct super_operations proc_sops = {
116	.alloc_inode	= proc_alloc_inode,
117	.destroy_inode	= proc_destroy_inode,
118	.drop_inode	= generic_delete_inode,
119	.evict_inode	= proc_evict_inode,
120	.statfs		= simple_statfs,
121	.remount_fs	= proc_remount,
122	.show_options	= proc_show_options,
123};
124
125enum {BIAS = -1U<<31};
126
127static inline int use_pde(struct proc_dir_entry *pde)
128{
129	return atomic_inc_unless_negative(&pde->in_use);
130}
131
132static void unuse_pde(struct proc_dir_entry *pde)
133{
134	if (atomic_dec_return(&pde->in_use) == BIAS)
135		complete(pde->pde_unload_completion);
136}
137
138/* pde is locked */
139static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
140{
141	if (pdeo->closing) {
142		/* somebody else is doing that, just wait */
143		DECLARE_COMPLETION_ONSTACK(c);
144		pdeo->c = &c;
145		spin_unlock(&pde->pde_unload_lock);
146		wait_for_completion(&c);
147		spin_lock(&pde->pde_unload_lock);
148	} else {
149		struct file *file;
150		pdeo->closing = 1;
151		spin_unlock(&pde->pde_unload_lock);
152		file = pdeo->file;
153		pde->proc_fops->release(file_inode(file), file);
154		spin_lock(&pde->pde_unload_lock);
155		list_del_init(&pdeo->lh);
156		if (pdeo->c)
157			complete(pdeo->c);
158		kfree(pdeo);
159	}
160}
161
162void proc_entry_rundown(struct proc_dir_entry *de)
163{
164	DECLARE_COMPLETION_ONSTACK(c);
165	/* Wait until all existing callers into module are done. */
166	de->pde_unload_completion = &c;
167	if (atomic_add_return(BIAS, &de->in_use) != BIAS)
168		wait_for_completion(&c);
169
170	spin_lock(&de->pde_unload_lock);
171	while (!list_empty(&de->pde_openers)) {
172		struct pde_opener *pdeo;
173		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
174		close_pdeo(de, pdeo);
175	}
176	spin_unlock(&de->pde_unload_lock);
177}
178
179static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
180{
181	struct proc_dir_entry *pde = PDE(file_inode(file));
182	loff_t rv = -EINVAL;
183	if (use_pde(pde)) {
184		loff_t (*llseek)(struct file *, loff_t, int);
185		llseek = pde->proc_fops->llseek;
186		if (!llseek)
187			llseek = default_llseek;
188		rv = llseek(file, offset, whence);
189		unuse_pde(pde);
190	}
191	return rv;
192}
193
194static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
195{
196	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
197	struct proc_dir_entry *pde = PDE(file_inode(file));
198	ssize_t rv = -EIO;
199	if (use_pde(pde)) {
200		read = pde->proc_fops->read;
201		if (read)
202			rv = read(file, buf, count, ppos);
203		unuse_pde(pde);
204	}
205	return rv;
206}
207
208static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
209{
210	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
211	struct proc_dir_entry *pde = PDE(file_inode(file));
212	ssize_t rv = -EIO;
213	if (use_pde(pde)) {
214		write = pde->proc_fops->write;
215		if (write)
216			rv = write(file, buf, count, ppos);
217		unuse_pde(pde);
218	}
219	return rv;
220}
221
222static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
223{
224	struct proc_dir_entry *pde = PDE(file_inode(file));
225	unsigned int rv = DEFAULT_POLLMASK;
226	unsigned int (*poll)(struct file *, struct poll_table_struct *);
227	if (use_pde(pde)) {
228		poll = pde->proc_fops->poll;
229		if (poll)
230			rv = poll(file, pts);
231		unuse_pde(pde);
232	}
233	return rv;
234}
235
236static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
237{
238	struct proc_dir_entry *pde = PDE(file_inode(file));
239	long rv = -ENOTTY;
240	long (*ioctl)(struct file *, unsigned int, unsigned long);
241	if (use_pde(pde)) {
242		ioctl = pde->proc_fops->unlocked_ioctl;
243		if (ioctl)
244			rv = ioctl(file, cmd, arg);
245		unuse_pde(pde);
246	}
247	return rv;
248}
249
250#ifdef CONFIG_COMPAT
251static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
252{
253	struct proc_dir_entry *pde = PDE(file_inode(file));
254	long rv = -ENOTTY;
255	long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
256	if (use_pde(pde)) {
257		compat_ioctl = pde->proc_fops->compat_ioctl;
258		if (compat_ioctl)
259			rv = compat_ioctl(file, cmd, arg);
260		unuse_pde(pde);
261	}
262	return rv;
263}
264#endif
265
266static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
267{
268	struct proc_dir_entry *pde = PDE(file_inode(file));
269	int rv = -EIO;
270	int (*mmap)(struct file *, struct vm_area_struct *);
271	if (use_pde(pde)) {
272		mmap = pde->proc_fops->mmap;
273		if (mmap)
274			rv = mmap(file, vma);
275		unuse_pde(pde);
276	}
277	return rv;
278}
279
280static unsigned long
281proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
282			   unsigned long len, unsigned long pgoff,
283			   unsigned long flags)
284{
285	struct proc_dir_entry *pde = PDE(file_inode(file));
286	unsigned long rv = -EIO;
287
288	if (use_pde(pde)) {
289		typeof(proc_reg_get_unmapped_area) *get_area;
290
291		get_area = pde->proc_fops->get_unmapped_area;
292#ifdef CONFIG_MMU
293		if (!get_area)
294			get_area = current->mm->get_unmapped_area;
295#endif
296
297		if (get_area)
298			rv = get_area(file, orig_addr, len, pgoff, flags);
299		else
300			rv = orig_addr;
301		unuse_pde(pde);
302	}
303	return rv;
304}
305
306static int proc_reg_open(struct inode *inode, struct file *file)
307{
308	struct proc_dir_entry *pde = PDE(inode);
309	int rv = 0;
310	int (*open)(struct inode *, struct file *);
311	int (*release)(struct inode *, struct file *);
312	struct pde_opener *pdeo;
313
314	/*
315	 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
316	 * sequence. ->release won't be called because ->proc_fops will be
317	 * cleared. Depending on complexity of ->release, consequences vary.
318	 *
319	 * We can't wait for mercy when close will be done for real, it's
320	 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
321	 * by hand in remove_proc_entry(). For this, save opener's credentials
322	 * for later.
323	 */
324	pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
325	if (!pdeo)
326		return -ENOMEM;
327
328	if (!use_pde(pde)) {
329		kfree(pdeo);
330		return -ENOENT;
331	}
332	open = pde->proc_fops->open;
333	release = pde->proc_fops->release;
334
335	if (open)
336		rv = open(inode, file);
337
338	if (rv == 0 && release) {
339		/* To know what to release. */
340		pdeo->file = file;
341		/* Strictly for "too late" ->release in proc_reg_release(). */
342		spin_lock(&pde->pde_unload_lock);
343		list_add(&pdeo->lh, &pde->pde_openers);
344		spin_unlock(&pde->pde_unload_lock);
345	} else
346		kfree(pdeo);
347
348	unuse_pde(pde);
349	return rv;
350}
351
352static int proc_reg_release(struct inode *inode, struct file *file)
353{
354	struct proc_dir_entry *pde = PDE(inode);
355	struct pde_opener *pdeo;
356	spin_lock(&pde->pde_unload_lock);
357	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
358		if (pdeo->file == file) {
359			close_pdeo(pde, pdeo);
360			break;
361		}
362	}
363	spin_unlock(&pde->pde_unload_lock);
364	return 0;
365}
366
367static const struct file_operations proc_reg_file_ops = {
368	.llseek		= proc_reg_llseek,
369	.read		= proc_reg_read,
370	.write		= proc_reg_write,
371	.poll		= proc_reg_poll,
372	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
373#ifdef CONFIG_COMPAT
374	.compat_ioctl	= proc_reg_compat_ioctl,
375#endif
376	.mmap		= proc_reg_mmap,
377	.get_unmapped_area = proc_reg_get_unmapped_area,
378	.open		= proc_reg_open,
379	.release	= proc_reg_release,
380};
381
382#ifdef CONFIG_COMPAT
383static const struct file_operations proc_reg_file_ops_no_compat = {
384	.llseek		= proc_reg_llseek,
385	.read		= proc_reg_read,
386	.write		= proc_reg_write,
387	.poll		= proc_reg_poll,
388	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
389	.mmap		= proc_reg_mmap,
390	.get_unmapped_area = proc_reg_get_unmapped_area,
391	.open		= proc_reg_open,
392	.release	= proc_reg_release,
393};
394#endif
395
396static const char *proc_follow_link(struct dentry *dentry, void **cookie)
397{
398	struct proc_dir_entry *pde = PDE(d_inode(dentry));
399	if (unlikely(!use_pde(pde)))
400		return ERR_PTR(-EINVAL);
401	*cookie = pde;
402	return pde->data;
403}
404
405static void proc_put_link(struct inode *unused, void *p)
406{
407	unuse_pde(p);
408}
409
410const struct inode_operations proc_link_inode_operations = {
411	.readlink	= generic_readlink,
412	.follow_link	= proc_follow_link,
413	.put_link	= proc_put_link,
414};
415
416struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
417{
418	struct inode *inode = new_inode_pseudo(sb);
419
420	if (inode) {
421		inode->i_ino = de->low_ino;
422		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
423		PROC_I(inode)->pde = de;
424
425		if (is_empty_pde(de)) {
426			make_empty_dir_inode(inode);
427			return inode;
428		}
429		if (de->mode) {
430			inode->i_mode = de->mode;
431			inode->i_uid = de->uid;
432			inode->i_gid = de->gid;
433		}
434		if (de->size)
435			inode->i_size = de->size;
436		if (de->nlink)
437			set_nlink(inode, de->nlink);
438		WARN_ON(!de->proc_iops);
439		inode->i_op = de->proc_iops;
440		if (de->proc_fops) {
441			if (S_ISREG(inode->i_mode)) {
442#ifdef CONFIG_COMPAT
443				if (!de->proc_fops->compat_ioctl)
444					inode->i_fop =
445						&proc_reg_file_ops_no_compat;
446				else
447#endif
448					inode->i_fop = &proc_reg_file_ops;
449			} else {
450				inode->i_fop = de->proc_fops;
451			}
452		}
453	} else
454	       pde_put(de);
455	return inode;
456}
457
458int proc_fill_super(struct super_block *s)
459{
460	struct inode *root_inode;
461	int ret;
462
463	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
464	s->s_blocksize = 1024;
465	s->s_blocksize_bits = 10;
466	s->s_magic = PROC_SUPER_MAGIC;
467	s->s_op = &proc_sops;
468	s->s_time_gran = 1;
469
470	pde_get(&proc_root);
471	root_inode = proc_get_inode(s, &proc_root);
472	if (!root_inode) {
473		pr_err("proc_fill_super: get root inode failed\n");
474		return -ENOMEM;
475	}
476
477	s->s_root = d_make_root(root_inode);
478	if (!s->s_root) {
479		pr_err("proc_fill_super: allocate dentry failed\n");
480		return -ENOMEM;
481	}
482
483	ret = proc_setup_self(s);
484	if (ret) {
485		return ret;
486	}
487	return proc_setup_thread_self(s);
488}
489