1/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines.  Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/mm.h>
16#include <linux/mman.h>
17#include <linux/errno.h>
18#include <linux/signal.h>
19#include <linux/binfmts.h>
20#include <linux/string.h>
21#include <linux/file.h>
22#include <linux/slab.h>
23#include <linux/personality.h>
24#include <linux/elfcore.h>
25#include <linux/init.h>
26#include <linux/highuid.h>
27#include <linux/compiler.h>
28#include <linux/highmem.h>
29#include <linux/pagemap.h>
30#include <linux/vmalloc.h>
31#include <linux/security.h>
32#include <linux/random.h>
33#include <linux/elf.h>
34#include <linux/elf-randomize.h>
35#include <linux/utsname.h>
36#include <linux/coredump.h>
37#include <linux/sched.h>
38#include <linux/dax.h>
39#include <asm/uaccess.h>
40#include <asm/param.h>
41#include <asm/page.h>
42
43#ifndef user_long_t
44#define user_long_t long
45#endif
46#ifndef user_siginfo_t
47#define user_siginfo_t siginfo_t
48#endif
49
50static int load_elf_binary(struct linux_binprm *bprm);
51static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52				int, int, unsigned long);
53
54#ifdef CONFIG_USELIB
55static int load_elf_library(struct file *);
56#else
57#define load_elf_library NULL
58#endif
59
60/*
61 * If we don't support core dumping, then supply a NULL so we
62 * don't even try.
63 */
64#ifdef CONFIG_ELF_CORE
65static int elf_core_dump(struct coredump_params *cprm);
66#else
67#define elf_core_dump	NULL
68#endif
69
70#if ELF_EXEC_PAGESIZE > PAGE_SIZE
71#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
72#else
73#define ELF_MIN_ALIGN	PAGE_SIZE
74#endif
75
76#ifndef ELF_CORE_EFLAGS
77#define ELF_CORE_EFLAGS	0
78#endif
79
80#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83
84static struct linux_binfmt elf_format = {
85	.module		= THIS_MODULE,
86	.load_binary	= load_elf_binary,
87	.load_shlib	= load_elf_library,
88	.core_dump	= elf_core_dump,
89	.min_coredump	= ELF_EXEC_PAGESIZE,
90};
91
92#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93
94static int set_brk(unsigned long start, unsigned long end)
95{
96	start = ELF_PAGEALIGN(start);
97	end = ELF_PAGEALIGN(end);
98	if (end > start) {
99		unsigned long addr;
100		addr = vm_brk(start, end - start);
101		if (BAD_ADDR(addr))
102			return addr;
103	}
104	current->mm->start_brk = current->mm->brk = end;
105	return 0;
106}
107
108/* We need to explicitly zero any fractional pages
109   after the data section (i.e. bss).  This would
110   contain the junk from the file that should not
111   be in memory
112 */
113static int padzero(unsigned long elf_bss)
114{
115	unsigned long nbyte;
116
117	nbyte = ELF_PAGEOFFSET(elf_bss);
118	if (nbyte) {
119		nbyte = ELF_MIN_ALIGN - nbyte;
120		if (clear_user((void __user *) elf_bss, nbyte))
121			return -EFAULT;
122	}
123	return 0;
124}
125
126/* Let's use some macros to make this stack manipulation a little clearer */
127#ifdef CONFIG_STACK_GROWSUP
128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129#define STACK_ROUND(sp, items) \
130	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131#define STACK_ALLOC(sp, len) ({ \
132	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
133	old_sp; })
134#else
135#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136#define STACK_ROUND(sp, items) \
137	(((unsigned long) (sp - items)) &~ 15UL)
138#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
139#endif
140
141#ifndef ELF_BASE_PLATFORM
142/*
143 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145 * will be copied to the user stack in the same manner as AT_PLATFORM.
146 */
147#define ELF_BASE_PLATFORM NULL
148#endif
149
150static int
151create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152		unsigned long load_addr, unsigned long interp_load_addr)
153{
154	unsigned long p = bprm->p;
155	int argc = bprm->argc;
156	int envc = bprm->envc;
157	elf_addr_t __user *argv;
158	elf_addr_t __user *envp;
159	elf_addr_t __user *sp;
160	elf_addr_t __user *u_platform;
161	elf_addr_t __user *u_base_platform;
162	elf_addr_t __user *u_rand_bytes;
163	const char *k_platform = ELF_PLATFORM;
164	const char *k_base_platform = ELF_BASE_PLATFORM;
165	unsigned char k_rand_bytes[16];
166	int items;
167	elf_addr_t *elf_info;
168	int ei_index = 0;
169	const struct cred *cred = current_cred();
170	struct vm_area_struct *vma;
171
172	/*
173	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174	 * evictions by the processes running on the same package. One
175	 * thing we can do is to shuffle the initial stack for them.
176	 */
177
178	p = arch_align_stack(p);
179
180	/*
181	 * If this architecture has a platform capability string, copy it
182	 * to userspace.  In some cases (Sparc), this info is impossible
183	 * for userspace to get any other way, in others (i386) it is
184	 * merely difficult.
185	 */
186	u_platform = NULL;
187	if (k_platform) {
188		size_t len = strlen(k_platform) + 1;
189
190		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191		if (__copy_to_user(u_platform, k_platform, len))
192			return -EFAULT;
193	}
194
195	/*
196	 * If this architecture has a "base" platform capability
197	 * string, copy it to userspace.
198	 */
199	u_base_platform = NULL;
200	if (k_base_platform) {
201		size_t len = strlen(k_base_platform) + 1;
202
203		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204		if (__copy_to_user(u_base_platform, k_base_platform, len))
205			return -EFAULT;
206	}
207
208	/*
209	 * Generate 16 random bytes for userspace PRNG seeding.
210	 */
211	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212	u_rand_bytes = (elf_addr_t __user *)
213		       STACK_ALLOC(p, sizeof(k_rand_bytes));
214	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
215		return -EFAULT;
216
217	/* Create the ELF interpreter info */
218	elf_info = (elf_addr_t *)current->mm->saved_auxv;
219	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220#define NEW_AUX_ENT(id, val) \
221	do { \
222		elf_info[ei_index++] = id; \
223		elf_info[ei_index++] = val; \
224	} while (0)
225
226#ifdef ARCH_DLINFO
227	/*
228	 * ARCH_DLINFO must come first so PPC can do its special alignment of
229	 * AUXV.
230	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231	 * ARCH_DLINFO changes
232	 */
233	ARCH_DLINFO;
234#endif
235	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241	NEW_AUX_ENT(AT_BASE, interp_load_addr);
242	NEW_AUX_ENT(AT_FLAGS, 0);
243	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250#ifdef ELF_HWCAP2
251	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252#endif
253	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254	if (k_platform) {
255		NEW_AUX_ENT(AT_PLATFORM,
256			    (elf_addr_t)(unsigned long)u_platform);
257	}
258	if (k_base_platform) {
259		NEW_AUX_ENT(AT_BASE_PLATFORM,
260			    (elf_addr_t)(unsigned long)u_base_platform);
261	}
262	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
264	}
265#undef NEW_AUX_ENT
266	/* AT_NULL is zero; clear the rest too */
267	memset(&elf_info[ei_index], 0,
268	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269
270	/* And advance past the AT_NULL entry.  */
271	ei_index += 2;
272
273	sp = STACK_ADD(p, ei_index);
274
275	items = (argc + 1) + (envc + 1) + 1;
276	bprm->p = STACK_ROUND(sp, items);
277
278	/* Point sp at the lowest address on the stack */
279#ifdef CONFIG_STACK_GROWSUP
280	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282#else
283	sp = (elf_addr_t __user *)bprm->p;
284#endif
285
286
287	/*
288	 * Grow the stack manually; some architectures have a limit on how
289	 * far ahead a user-space access may be in order to grow the stack.
290	 */
291	vma = find_extend_vma(current->mm, bprm->p);
292	if (!vma)
293		return -EFAULT;
294
295	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
296	if (__put_user(argc, sp++))
297		return -EFAULT;
298	argv = sp;
299	envp = argv + argc + 1;
300
301	/* Populate argv and envp */
302	p = current->mm->arg_end = current->mm->arg_start;
303	while (argc-- > 0) {
304		size_t len;
305		if (__put_user((elf_addr_t)p, argv++))
306			return -EFAULT;
307		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308		if (!len || len > MAX_ARG_STRLEN)
309			return -EINVAL;
310		p += len;
311	}
312	if (__put_user(0, argv))
313		return -EFAULT;
314	current->mm->arg_end = current->mm->env_start = p;
315	while (envc-- > 0) {
316		size_t len;
317		if (__put_user((elf_addr_t)p, envp++))
318			return -EFAULT;
319		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320		if (!len || len > MAX_ARG_STRLEN)
321			return -EINVAL;
322		p += len;
323	}
324	if (__put_user(0, envp))
325		return -EFAULT;
326	current->mm->env_end = p;
327
328	/* Put the elf_info on the stack in the right place.  */
329	sp = (elf_addr_t __user *)envp + 1;
330	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
331		return -EFAULT;
332	return 0;
333}
334
335#ifndef elf_map
336
337static unsigned long elf_map(struct file *filep, unsigned long addr,
338		struct elf_phdr *eppnt, int prot, int type,
339		unsigned long total_size)
340{
341	unsigned long map_addr;
342	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344	addr = ELF_PAGESTART(addr);
345	size = ELF_PAGEALIGN(size);
346
347	/* mmap() will return -EINVAL if given a zero size, but a
348	 * segment with zero filesize is perfectly valid */
349	if (!size)
350		return addr;
351
352	/*
353	* total_size is the size of the ELF (interpreter) image.
354	* The _first_ mmap needs to know the full size, otherwise
355	* randomization might put this image into an overlapping
356	* position with the ELF binary image. (since size < total_size)
357	* So we first map the 'big' image - and unmap the remainder at
358	* the end. (which unmap is needed for ELF images with holes.)
359	*/
360	if (total_size) {
361		total_size = ELF_PAGEALIGN(total_size);
362		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363		if (!BAD_ADDR(map_addr))
364			vm_munmap(map_addr+size, total_size-size);
365	} else
366		map_addr = vm_mmap(filep, addr, size, prot, type, off);
367
368	return(map_addr);
369}
370
371#endif /* !elf_map */
372
373static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374{
375	int i, first_idx = -1, last_idx = -1;
376
377	for (i = 0; i < nr; i++) {
378		if (cmds[i].p_type == PT_LOAD) {
379			last_idx = i;
380			if (first_idx == -1)
381				first_idx = i;
382		}
383	}
384	if (first_idx == -1)
385		return 0;
386
387	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388				ELF_PAGESTART(cmds[first_idx].p_vaddr);
389}
390
391/**
392 * load_elf_phdrs() - load ELF program headers
393 * @elf_ex:   ELF header of the binary whose program headers should be loaded
394 * @elf_file: the opened ELF binary file
395 *
396 * Loads ELF program headers from the binary file elf_file, which has the ELF
397 * header pointed to by elf_ex, into a newly allocated array. The caller is
398 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399 */
400static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401				       struct file *elf_file)
402{
403	struct elf_phdr *elf_phdata = NULL;
404	int retval, size, err = -1;
405
406	/*
407	 * If the size of this structure has changed, then punt, since
408	 * we will be doing the wrong thing.
409	 */
410	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
411		goto out;
412
413	/* Sanity check the number of program headers... */
414	if (elf_ex->e_phnum < 1 ||
415		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
416		goto out;
417
418	/* ...and their total size. */
419	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420	if (size > ELF_MIN_ALIGN)
421		goto out;
422
423	elf_phdata = kmalloc(size, GFP_KERNEL);
424	if (!elf_phdata)
425		goto out;
426
427	/* Read in the program headers */
428	retval = kernel_read(elf_file, elf_ex->e_phoff,
429			     (char *)elf_phdata, size);
430	if (retval != size) {
431		err = (retval < 0) ? retval : -EIO;
432		goto out;
433	}
434
435	/* Success! */
436	err = 0;
437out:
438	if (err) {
439		kfree(elf_phdata);
440		elf_phdata = NULL;
441	}
442	return elf_phdata;
443}
444
445#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
446
447/**
448 * struct arch_elf_state - arch-specific ELF loading state
449 *
450 * This structure is used to preserve architecture specific data during
451 * the loading of an ELF file, throughout the checking of architecture
452 * specific ELF headers & through to the point where the ELF load is
453 * known to be proceeding (ie. SET_PERSONALITY).
454 *
455 * This implementation is a dummy for architectures which require no
456 * specific state.
457 */
458struct arch_elf_state {
459};
460
461#define INIT_ARCH_ELF_STATE {}
462
463/**
464 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465 * @ehdr:	The main ELF header
466 * @phdr:	The program header to check
467 * @elf:	The open ELF file
468 * @is_interp:	True if the phdr is from the interpreter of the ELF being
469 *		loaded, else false.
470 * @state:	Architecture-specific state preserved throughout the process
471 *		of loading the ELF.
472 *
473 * Inspects the program header phdr to validate its correctness and/or
474 * suitability for the system. Called once per ELF program header in the
475 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
476 * interpreter.
477 *
478 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479 *         with that return code.
480 */
481static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482				   struct elf_phdr *phdr,
483				   struct file *elf, bool is_interp,
484				   struct arch_elf_state *state)
485{
486	/* Dummy implementation, always proceed */
487	return 0;
488}
489
490/**
491 * arch_check_elf() - check an ELF executable
492 * @ehdr:	The main ELF header
493 * @has_interp:	True if the ELF has an interpreter, else false.
494 * @state:	Architecture-specific state preserved throughout the process
495 *		of loading the ELF.
496 *
497 * Provides a final opportunity for architecture code to reject the loading
498 * of the ELF & cause an exec syscall to return an error. This is called after
499 * all program headers to be checked by arch_elf_pt_proc have been.
500 *
501 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502 *         with that return code.
503 */
504static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
505				 struct arch_elf_state *state)
506{
507	/* Dummy implementation, always proceed */
508	return 0;
509}
510
511#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
512
513/* This is much more generalized than the library routine read function,
514   so we keep this separate.  Technically the library read function
515   is only provided so that we can read a.out libraries that have
516   an ELF header */
517
518static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
519		struct file *interpreter, unsigned long *interp_map_addr,
520		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
521{
522	struct elf_phdr *eppnt;
523	unsigned long load_addr = 0;
524	int load_addr_set = 0;
525	unsigned long last_bss = 0, elf_bss = 0;
526	unsigned long error = ~0UL;
527	unsigned long total_size;
528	int i;
529
530	/* First of all, some simple consistency checks */
531	if (interp_elf_ex->e_type != ET_EXEC &&
532	    interp_elf_ex->e_type != ET_DYN)
533		goto out;
534	if (!elf_check_arch(interp_elf_ex))
535		goto out;
536	if (!interpreter->f_op->mmap)
537		goto out;
538
539	total_size = total_mapping_size(interp_elf_phdata,
540					interp_elf_ex->e_phnum);
541	if (!total_size) {
542		error = -EINVAL;
543		goto out;
544	}
545
546	eppnt = interp_elf_phdata;
547	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
548		if (eppnt->p_type == PT_LOAD) {
549			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
550			int elf_prot = 0;
551			unsigned long vaddr = 0;
552			unsigned long k, map_addr;
553
554			if (eppnt->p_flags & PF_R)
555		    		elf_prot = PROT_READ;
556			if (eppnt->p_flags & PF_W)
557				elf_prot |= PROT_WRITE;
558			if (eppnt->p_flags & PF_X)
559				elf_prot |= PROT_EXEC;
560			vaddr = eppnt->p_vaddr;
561			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
562				elf_type |= MAP_FIXED;
563			else if (no_base && interp_elf_ex->e_type == ET_DYN)
564				load_addr = -vaddr;
565
566			map_addr = elf_map(interpreter, load_addr + vaddr,
567					eppnt, elf_prot, elf_type, total_size);
568			total_size = 0;
569			if (!*interp_map_addr)
570				*interp_map_addr = map_addr;
571			error = map_addr;
572			if (BAD_ADDR(map_addr))
573				goto out;
574
575			if (!load_addr_set &&
576			    interp_elf_ex->e_type == ET_DYN) {
577				load_addr = map_addr - ELF_PAGESTART(vaddr);
578				load_addr_set = 1;
579			}
580
581			/*
582			 * Check to see if the section's size will overflow the
583			 * allowed task size. Note that p_filesz must always be
584			 * <= p_memsize so it's only necessary to check p_memsz.
585			 */
586			k = load_addr + eppnt->p_vaddr;
587			if (BAD_ADDR(k) ||
588			    eppnt->p_filesz > eppnt->p_memsz ||
589			    eppnt->p_memsz > TASK_SIZE ||
590			    TASK_SIZE - eppnt->p_memsz < k) {
591				error = -ENOMEM;
592				goto out;
593			}
594
595			/*
596			 * Find the end of the file mapping for this phdr, and
597			 * keep track of the largest address we see for this.
598			 */
599			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
600			if (k > elf_bss)
601				elf_bss = k;
602
603			/*
604			 * Do the same thing for the memory mapping - between
605			 * elf_bss and last_bss is the bss section.
606			 */
607			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
608			if (k > last_bss)
609				last_bss = k;
610		}
611	}
612
613	if (last_bss > elf_bss) {
614		/*
615		 * Now fill out the bss section.  First pad the last page up
616		 * to the page boundary, and then perform a mmap to make sure
617		 * that there are zero-mapped pages up to and including the
618		 * last bss page.
619		 */
620		if (padzero(elf_bss)) {
621			error = -EFAULT;
622			goto out;
623		}
624
625		/* What we have mapped so far */
626		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
627
628		/* Map the last of the bss segment */
629		error = vm_brk(elf_bss, last_bss - elf_bss);
630		if (BAD_ADDR(error))
631			goto out;
632	}
633
634	error = load_addr;
635out:
636	return error;
637}
638
639/*
640 * These are the functions used to load ELF style executables and shared
641 * libraries.  There is no binary dependent code anywhere else.
642 */
643
644#ifndef STACK_RND_MASK
645#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
646#endif
647
648static unsigned long randomize_stack_top(unsigned long stack_top)
649{
650	unsigned long random_variable = 0;
651
652	if ((current->flags & PF_RANDOMIZE) &&
653		!(current->personality & ADDR_NO_RANDOMIZE)) {
654		random_variable = (unsigned long) get_random_int();
655		random_variable &= STACK_RND_MASK;
656		random_variable <<= PAGE_SHIFT;
657	}
658#ifdef CONFIG_STACK_GROWSUP
659	return PAGE_ALIGN(stack_top) + random_variable;
660#else
661	return PAGE_ALIGN(stack_top) - random_variable;
662#endif
663}
664
665static int load_elf_binary(struct linux_binprm *bprm)
666{
667	struct file *interpreter = NULL; /* to shut gcc up */
668 	unsigned long load_addr = 0, load_bias = 0;
669	int load_addr_set = 0;
670	char * elf_interpreter = NULL;
671	unsigned long error;
672	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
673	unsigned long elf_bss, elf_brk;
674	int retval, i;
675	unsigned long elf_entry;
676	unsigned long interp_load_addr = 0;
677	unsigned long start_code, end_code, start_data, end_data;
678	unsigned long reloc_func_desc __maybe_unused = 0;
679	int executable_stack = EXSTACK_DEFAULT;
680	struct pt_regs *regs = current_pt_regs();
681	struct {
682		struct elfhdr elf_ex;
683		struct elfhdr interp_elf_ex;
684	} *loc;
685	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
686
687	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
688	if (!loc) {
689		retval = -ENOMEM;
690		goto out_ret;
691	}
692
693	/* Get the exec-header */
694	loc->elf_ex = *((struct elfhdr *)bprm->buf);
695
696	retval = -ENOEXEC;
697	/* First of all, some simple consistency checks */
698	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
699		goto out;
700
701	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
702		goto out;
703	if (!elf_check_arch(&loc->elf_ex))
704		goto out;
705	if (!bprm->file->f_op->mmap)
706		goto out;
707
708	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
709	if (!elf_phdata)
710		goto out;
711
712	elf_ppnt = elf_phdata;
713	elf_bss = 0;
714	elf_brk = 0;
715
716	start_code = ~0UL;
717	end_code = 0;
718	start_data = 0;
719	end_data = 0;
720
721	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
722		if (elf_ppnt->p_type == PT_INTERP) {
723			/* This is the program interpreter used for
724			 * shared libraries - for now assume that this
725			 * is an a.out format binary
726			 */
727			retval = -ENOEXEC;
728			if (elf_ppnt->p_filesz > PATH_MAX ||
729			    elf_ppnt->p_filesz < 2)
730				goto out_free_ph;
731
732			retval = -ENOMEM;
733			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
734						  GFP_KERNEL);
735			if (!elf_interpreter)
736				goto out_free_ph;
737
738			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
739					     elf_interpreter,
740					     elf_ppnt->p_filesz);
741			if (retval != elf_ppnt->p_filesz) {
742				if (retval >= 0)
743					retval = -EIO;
744				goto out_free_interp;
745			}
746			/* make sure path is NULL terminated */
747			retval = -ENOEXEC;
748			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
749				goto out_free_interp;
750
751			interpreter = open_exec(elf_interpreter);
752			retval = PTR_ERR(interpreter);
753			if (IS_ERR(interpreter))
754				goto out_free_interp;
755
756			/*
757			 * If the binary is not readable then enforce
758			 * mm->dumpable = 0 regardless of the interpreter's
759			 * permissions.
760			 */
761			would_dump(bprm, interpreter);
762
763			/* Get the exec headers */
764			retval = kernel_read(interpreter, 0,
765					     (void *)&loc->interp_elf_ex,
766					     sizeof(loc->interp_elf_ex));
767			if (retval != sizeof(loc->interp_elf_ex)) {
768				if (retval >= 0)
769					retval = -EIO;
770				goto out_free_dentry;
771			}
772
773			break;
774		}
775		elf_ppnt++;
776	}
777
778	elf_ppnt = elf_phdata;
779	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
780		switch (elf_ppnt->p_type) {
781		case PT_GNU_STACK:
782			if (elf_ppnt->p_flags & PF_X)
783				executable_stack = EXSTACK_ENABLE_X;
784			else
785				executable_stack = EXSTACK_DISABLE_X;
786			break;
787
788		case PT_LOPROC ... PT_HIPROC:
789			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
790						  bprm->file, false,
791						  &arch_state);
792			if (retval)
793				goto out_free_dentry;
794			break;
795		}
796
797	/* Some simple consistency checks for the interpreter */
798	if (elf_interpreter) {
799		retval = -ELIBBAD;
800		/* Not an ELF interpreter */
801		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
802			goto out_free_dentry;
803		/* Verify the interpreter has a valid arch */
804		if (!elf_check_arch(&loc->interp_elf_ex))
805			goto out_free_dentry;
806
807		/* Load the interpreter program headers */
808		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
809						   interpreter);
810		if (!interp_elf_phdata)
811			goto out_free_dentry;
812
813		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
814		elf_ppnt = interp_elf_phdata;
815		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
816			switch (elf_ppnt->p_type) {
817			case PT_LOPROC ... PT_HIPROC:
818				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
819							  elf_ppnt, interpreter,
820							  true, &arch_state);
821				if (retval)
822					goto out_free_dentry;
823				break;
824			}
825	}
826
827	/*
828	 * Allow arch code to reject the ELF at this point, whilst it's
829	 * still possible to return an error to the code that invoked
830	 * the exec syscall.
831	 */
832	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
833	if (retval)
834		goto out_free_dentry;
835
836	/* Flush all traces of the currently running executable */
837	retval = flush_old_exec(bprm);
838	if (retval)
839		goto out_free_dentry;
840
841	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
842	   may depend on the personality.  */
843	SET_PERSONALITY2(loc->elf_ex, &arch_state);
844	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
845		current->personality |= READ_IMPLIES_EXEC;
846
847	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
848		current->flags |= PF_RANDOMIZE;
849
850	setup_new_exec(bprm);
851
852	/* Do this so that we can load the interpreter, if need be.  We will
853	   change some of these later */
854	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
855				 executable_stack);
856	if (retval < 0)
857		goto out_free_dentry;
858
859	current->mm->start_stack = bprm->p;
860
861	/* Now we do a little grungy work by mmapping the ELF image into
862	   the correct location in memory. */
863	for(i = 0, elf_ppnt = elf_phdata;
864	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
865		int elf_prot = 0, elf_flags;
866		unsigned long k, vaddr;
867		unsigned long total_size = 0;
868
869		if (elf_ppnt->p_type != PT_LOAD)
870			continue;
871
872		if (unlikely (elf_brk > elf_bss)) {
873			unsigned long nbyte;
874
875			/* There was a PT_LOAD segment with p_memsz > p_filesz
876			   before this one. Map anonymous pages, if needed,
877			   and clear the area.  */
878			retval = set_brk(elf_bss + load_bias,
879					 elf_brk + load_bias);
880			if (retval)
881				goto out_free_dentry;
882			nbyte = ELF_PAGEOFFSET(elf_bss);
883			if (nbyte) {
884				nbyte = ELF_MIN_ALIGN - nbyte;
885				if (nbyte > elf_brk - elf_bss)
886					nbyte = elf_brk - elf_bss;
887				if (clear_user((void __user *)elf_bss +
888							load_bias, nbyte)) {
889					/*
890					 * This bss-zeroing can fail if the ELF
891					 * file specifies odd protections. So
892					 * we don't check the return value
893					 */
894				}
895			}
896		}
897
898		if (elf_ppnt->p_flags & PF_R)
899			elf_prot |= PROT_READ;
900		if (elf_ppnt->p_flags & PF_W)
901			elf_prot |= PROT_WRITE;
902		if (elf_ppnt->p_flags & PF_X)
903			elf_prot |= PROT_EXEC;
904
905		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906
907		vaddr = elf_ppnt->p_vaddr;
908		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
909			elf_flags |= MAP_FIXED;
910		} else if (loc->elf_ex.e_type == ET_DYN) {
911			/* Try and get dynamic programs out of the way of the
912			 * default mmap base, as well as whatever program they
913			 * might try to exec.  This is because the brk will
914			 * follow the loader, and is not movable.  */
915			load_bias = ELF_ET_DYN_BASE - vaddr;
916			if (current->flags & PF_RANDOMIZE)
917				load_bias += arch_mmap_rnd();
918			load_bias = ELF_PAGESTART(load_bias);
919			total_size = total_mapping_size(elf_phdata,
920							loc->elf_ex.e_phnum);
921			if (!total_size) {
922				retval = -EINVAL;
923				goto out_free_dentry;
924			}
925		}
926
927		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928				elf_prot, elf_flags, total_size);
929		if (BAD_ADDR(error)) {
930			retval = IS_ERR((void *)error) ?
931				PTR_ERR((void*)error) : -EINVAL;
932			goto out_free_dentry;
933		}
934
935		if (!load_addr_set) {
936			load_addr_set = 1;
937			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
938			if (loc->elf_ex.e_type == ET_DYN) {
939				load_bias += error -
940				             ELF_PAGESTART(load_bias + vaddr);
941				load_addr += load_bias;
942				reloc_func_desc = load_bias;
943			}
944		}
945		k = elf_ppnt->p_vaddr;
946		if (k < start_code)
947			start_code = k;
948		if (start_data < k)
949			start_data = k;
950
951		/*
952		 * Check to see if the section's size will overflow the
953		 * allowed task size. Note that p_filesz must always be
954		 * <= p_memsz so it is only necessary to check p_memsz.
955		 */
956		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
957		    elf_ppnt->p_memsz > TASK_SIZE ||
958		    TASK_SIZE - elf_ppnt->p_memsz < k) {
959			/* set_brk can never work. Avoid overflows. */
960			retval = -EINVAL;
961			goto out_free_dentry;
962		}
963
964		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
965
966		if (k > elf_bss)
967			elf_bss = k;
968		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
969			end_code = k;
970		if (end_data < k)
971			end_data = k;
972		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
973		if (k > elf_brk)
974			elf_brk = k;
975	}
976
977	loc->elf_ex.e_entry += load_bias;
978	elf_bss += load_bias;
979	elf_brk += load_bias;
980	start_code += load_bias;
981	end_code += load_bias;
982	start_data += load_bias;
983	end_data += load_bias;
984
985	/* Calling set_brk effectively mmaps the pages that we need
986	 * for the bss and break sections.  We must do this before
987	 * mapping in the interpreter, to make sure it doesn't wind
988	 * up getting placed where the bss needs to go.
989	 */
990	retval = set_brk(elf_bss, elf_brk);
991	if (retval)
992		goto out_free_dentry;
993	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
994		retval = -EFAULT; /* Nobody gets to see this, but.. */
995		goto out_free_dentry;
996	}
997
998	if (elf_interpreter) {
999		unsigned long interp_map_addr = 0;
1000
1001		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1002					    interpreter,
1003					    &interp_map_addr,
1004					    load_bias, interp_elf_phdata);
1005		if (!IS_ERR((void *)elf_entry)) {
1006			/*
1007			 * load_elf_interp() returns relocation
1008			 * adjustment
1009			 */
1010			interp_load_addr = elf_entry;
1011			elf_entry += loc->interp_elf_ex.e_entry;
1012		}
1013		if (BAD_ADDR(elf_entry)) {
1014			retval = IS_ERR((void *)elf_entry) ?
1015					(int)elf_entry : -EINVAL;
1016			goto out_free_dentry;
1017		}
1018		reloc_func_desc = interp_load_addr;
1019
1020		allow_write_access(interpreter);
1021		fput(interpreter);
1022		kfree(elf_interpreter);
1023	} else {
1024		elf_entry = loc->elf_ex.e_entry;
1025		if (BAD_ADDR(elf_entry)) {
1026			retval = -EINVAL;
1027			goto out_free_dentry;
1028		}
1029	}
1030
1031	kfree(interp_elf_phdata);
1032	kfree(elf_phdata);
1033
1034	set_binfmt(&elf_format);
1035
1036#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1037	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1038	if (retval < 0)
1039		goto out;
1040#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1041
1042	install_exec_creds(bprm);
1043	retval = create_elf_tables(bprm, &loc->elf_ex,
1044			  load_addr, interp_load_addr);
1045	if (retval < 0)
1046		goto out;
1047	/* N.B. passed_fileno might not be initialized? */
1048	current->mm->end_code = end_code;
1049	current->mm->start_code = start_code;
1050	current->mm->start_data = start_data;
1051	current->mm->end_data = end_data;
1052	current->mm->start_stack = bprm->p;
1053
1054	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1055		current->mm->brk = current->mm->start_brk =
1056			arch_randomize_brk(current->mm);
1057#ifdef compat_brk_randomized
1058		current->brk_randomized = 1;
1059#endif
1060	}
1061
1062	if (current->personality & MMAP_PAGE_ZERO) {
1063		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1064		   and some applications "depend" upon this behavior.
1065		   Since we do not have the power to recompile these, we
1066		   emulate the SVr4 behavior. Sigh. */
1067		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1068				MAP_FIXED | MAP_PRIVATE, 0);
1069	}
1070
1071#ifdef ELF_PLAT_INIT
1072	/*
1073	 * The ABI may specify that certain registers be set up in special
1074	 * ways (on i386 %edx is the address of a DT_FINI function, for
1075	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1076	 * that the e_entry field is the address of the function descriptor
1077	 * for the startup routine, rather than the address of the startup
1078	 * routine itself.  This macro performs whatever initialization to
1079	 * the regs structure is required as well as any relocations to the
1080	 * function descriptor entries when executing dynamically links apps.
1081	 */
1082	ELF_PLAT_INIT(regs, reloc_func_desc);
1083#endif
1084
1085	start_thread(regs, elf_entry, bprm->p);
1086	retval = 0;
1087out:
1088	kfree(loc);
1089out_ret:
1090	return retval;
1091
1092	/* error cleanup */
1093out_free_dentry:
1094	kfree(interp_elf_phdata);
1095	allow_write_access(interpreter);
1096	if (interpreter)
1097		fput(interpreter);
1098out_free_interp:
1099	kfree(elf_interpreter);
1100out_free_ph:
1101	kfree(elf_phdata);
1102	goto out;
1103}
1104
1105#ifdef CONFIG_USELIB
1106/* This is really simpleminded and specialized - we are loading an
1107   a.out library that is given an ELF header. */
1108static int load_elf_library(struct file *file)
1109{
1110	struct elf_phdr *elf_phdata;
1111	struct elf_phdr *eppnt;
1112	unsigned long elf_bss, bss, len;
1113	int retval, error, i, j;
1114	struct elfhdr elf_ex;
1115
1116	error = -ENOEXEC;
1117	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1118	if (retval != sizeof(elf_ex))
1119		goto out;
1120
1121	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1122		goto out;
1123
1124	/* First of all, some simple consistency checks */
1125	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1126	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1127		goto out;
1128
1129	/* Now read in all of the header information */
1130
1131	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1132	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1133
1134	error = -ENOMEM;
1135	elf_phdata = kmalloc(j, GFP_KERNEL);
1136	if (!elf_phdata)
1137		goto out;
1138
1139	eppnt = elf_phdata;
1140	error = -ENOEXEC;
1141	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1142	if (retval != j)
1143		goto out_free_ph;
1144
1145	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1146		if ((eppnt + i)->p_type == PT_LOAD)
1147			j++;
1148	if (j != 1)
1149		goto out_free_ph;
1150
1151	while (eppnt->p_type != PT_LOAD)
1152		eppnt++;
1153
1154	/* Now use mmap to map the library into memory. */
1155	error = vm_mmap(file,
1156			ELF_PAGESTART(eppnt->p_vaddr),
1157			(eppnt->p_filesz +
1158			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1159			PROT_READ | PROT_WRITE | PROT_EXEC,
1160			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1161			(eppnt->p_offset -
1162			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1163	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1164		goto out_free_ph;
1165
1166	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1167	if (padzero(elf_bss)) {
1168		error = -EFAULT;
1169		goto out_free_ph;
1170	}
1171
1172	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1173			    ELF_MIN_ALIGN - 1);
1174	bss = eppnt->p_memsz + eppnt->p_vaddr;
1175	if (bss > len)
1176		vm_brk(len, bss - len);
1177	error = 0;
1178
1179out_free_ph:
1180	kfree(elf_phdata);
1181out:
1182	return error;
1183}
1184#endif /* #ifdef CONFIG_USELIB */
1185
1186#ifdef CONFIG_ELF_CORE
1187/*
1188 * ELF core dumper
1189 *
1190 * Modelled on fs/exec.c:aout_core_dump()
1191 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1192 */
1193
1194/*
1195 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1196 * that are useful for post-mortem analysis are included in every core dump.
1197 * In that way we ensure that the core dump is fully interpretable later
1198 * without matching up the same kernel and hardware config to see what PC values
1199 * meant. These special mappings include - vDSO, vsyscall, and other
1200 * architecture specific mappings
1201 */
1202static bool always_dump_vma(struct vm_area_struct *vma)
1203{
1204	/* Any vsyscall mappings? */
1205	if (vma == get_gate_vma(vma->vm_mm))
1206		return true;
1207
1208	/*
1209	 * Assume that all vmas with a .name op should always be dumped.
1210	 * If this changes, a new vm_ops field can easily be added.
1211	 */
1212	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1213		return true;
1214
1215	/*
1216	 * arch_vma_name() returns non-NULL for special architecture mappings,
1217	 * such as vDSO sections.
1218	 */
1219	if (arch_vma_name(vma))
1220		return true;
1221
1222	return false;
1223}
1224
1225/*
1226 * Decide what to dump of a segment, part, all or none.
1227 */
1228static unsigned long vma_dump_size(struct vm_area_struct *vma,
1229				   unsigned long mm_flags)
1230{
1231#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1232
1233	/* always dump the vdso and vsyscall sections */
1234	if (always_dump_vma(vma))
1235		goto whole;
1236
1237	if (vma->vm_flags & VM_DONTDUMP)
1238		return 0;
1239
1240	/* support for DAX */
1241	if (vma_is_dax(vma)) {
1242		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1243			goto whole;
1244		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1245			goto whole;
1246		return 0;
1247	}
1248
1249	/* Hugetlb memory check */
1250	if (vma->vm_flags & VM_HUGETLB) {
1251		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1252			goto whole;
1253		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1254			goto whole;
1255		return 0;
1256	}
1257
1258	/* Do not dump I/O mapped devices or special mappings */
1259	if (vma->vm_flags & VM_IO)
1260		return 0;
1261
1262	/* By default, dump shared memory if mapped from an anonymous file. */
1263	if (vma->vm_flags & VM_SHARED) {
1264		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1265		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1266			goto whole;
1267		return 0;
1268	}
1269
1270	/* Dump segments that have been written to.  */
1271	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1272		goto whole;
1273	if (vma->vm_file == NULL)
1274		return 0;
1275
1276	if (FILTER(MAPPED_PRIVATE))
1277		goto whole;
1278
1279	/*
1280	 * If this looks like the beginning of a DSO or executable mapping,
1281	 * check for an ELF header.  If we find one, dump the first page to
1282	 * aid in determining what was mapped here.
1283	 */
1284	if (FILTER(ELF_HEADERS) &&
1285	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1286		u32 __user *header = (u32 __user *) vma->vm_start;
1287		u32 word;
1288		mm_segment_t fs = get_fs();
1289		/*
1290		 * Doing it this way gets the constant folded by GCC.
1291		 */
1292		union {
1293			u32 cmp;
1294			char elfmag[SELFMAG];
1295		} magic;
1296		BUILD_BUG_ON(SELFMAG != sizeof word);
1297		magic.elfmag[EI_MAG0] = ELFMAG0;
1298		magic.elfmag[EI_MAG1] = ELFMAG1;
1299		magic.elfmag[EI_MAG2] = ELFMAG2;
1300		magic.elfmag[EI_MAG3] = ELFMAG3;
1301		/*
1302		 * Switch to the user "segment" for get_user(),
1303		 * then put back what elf_core_dump() had in place.
1304		 */
1305		set_fs(USER_DS);
1306		if (unlikely(get_user(word, header)))
1307			word = 0;
1308		set_fs(fs);
1309		if (word == magic.cmp)
1310			return PAGE_SIZE;
1311	}
1312
1313#undef	FILTER
1314
1315	return 0;
1316
1317whole:
1318	return vma->vm_end - vma->vm_start;
1319}
1320
1321/* An ELF note in memory */
1322struct memelfnote
1323{
1324	const char *name;
1325	int type;
1326	unsigned int datasz;
1327	void *data;
1328};
1329
1330static int notesize(struct memelfnote *en)
1331{
1332	int sz;
1333
1334	sz = sizeof(struct elf_note);
1335	sz += roundup(strlen(en->name) + 1, 4);
1336	sz += roundup(en->datasz, 4);
1337
1338	return sz;
1339}
1340
1341static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1342{
1343	struct elf_note en;
1344	en.n_namesz = strlen(men->name) + 1;
1345	en.n_descsz = men->datasz;
1346	en.n_type = men->type;
1347
1348	return dump_emit(cprm, &en, sizeof(en)) &&
1349	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1350	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1351}
1352
1353static void fill_elf_header(struct elfhdr *elf, int segs,
1354			    u16 machine, u32 flags)
1355{
1356	memset(elf, 0, sizeof(*elf));
1357
1358	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1359	elf->e_ident[EI_CLASS] = ELF_CLASS;
1360	elf->e_ident[EI_DATA] = ELF_DATA;
1361	elf->e_ident[EI_VERSION] = EV_CURRENT;
1362	elf->e_ident[EI_OSABI] = ELF_OSABI;
1363
1364	elf->e_type = ET_CORE;
1365	elf->e_machine = machine;
1366	elf->e_version = EV_CURRENT;
1367	elf->e_phoff = sizeof(struct elfhdr);
1368	elf->e_flags = flags;
1369	elf->e_ehsize = sizeof(struct elfhdr);
1370	elf->e_phentsize = sizeof(struct elf_phdr);
1371	elf->e_phnum = segs;
1372
1373	return;
1374}
1375
1376static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1377{
1378	phdr->p_type = PT_NOTE;
1379	phdr->p_offset = offset;
1380	phdr->p_vaddr = 0;
1381	phdr->p_paddr = 0;
1382	phdr->p_filesz = sz;
1383	phdr->p_memsz = 0;
1384	phdr->p_flags = 0;
1385	phdr->p_align = 0;
1386	return;
1387}
1388
1389static void fill_note(struct memelfnote *note, const char *name, int type,
1390		unsigned int sz, void *data)
1391{
1392	note->name = name;
1393	note->type = type;
1394	note->datasz = sz;
1395	note->data = data;
1396	return;
1397}
1398
1399/*
1400 * fill up all the fields in prstatus from the given task struct, except
1401 * registers which need to be filled up separately.
1402 */
1403static void fill_prstatus(struct elf_prstatus *prstatus,
1404		struct task_struct *p, long signr)
1405{
1406	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1407	prstatus->pr_sigpend = p->pending.signal.sig[0];
1408	prstatus->pr_sighold = p->blocked.sig[0];
1409	rcu_read_lock();
1410	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1411	rcu_read_unlock();
1412	prstatus->pr_pid = task_pid_vnr(p);
1413	prstatus->pr_pgrp = task_pgrp_vnr(p);
1414	prstatus->pr_sid = task_session_vnr(p);
1415	if (thread_group_leader(p)) {
1416		struct task_cputime cputime;
1417
1418		/*
1419		 * This is the record for the group leader.  It shows the
1420		 * group-wide total, not its individual thread total.
1421		 */
1422		thread_group_cputime(p, &cputime);
1423		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1424		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1425	} else {
1426		cputime_t utime, stime;
1427
1428		task_cputime(p, &utime, &stime);
1429		cputime_to_timeval(utime, &prstatus->pr_utime);
1430		cputime_to_timeval(stime, &prstatus->pr_stime);
1431	}
1432	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1433	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1434}
1435
1436static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1437		       struct mm_struct *mm)
1438{
1439	const struct cred *cred;
1440	unsigned int i, len;
1441
1442	/* first copy the parameters from user space */
1443	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1444
1445	len = mm->arg_end - mm->arg_start;
1446	if (len >= ELF_PRARGSZ)
1447		len = ELF_PRARGSZ-1;
1448	if (copy_from_user(&psinfo->pr_psargs,
1449		           (const char __user *)mm->arg_start, len))
1450		return -EFAULT;
1451	for(i = 0; i < len; i++)
1452		if (psinfo->pr_psargs[i] == 0)
1453			psinfo->pr_psargs[i] = ' ';
1454	psinfo->pr_psargs[len] = 0;
1455
1456	rcu_read_lock();
1457	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1458	rcu_read_unlock();
1459	psinfo->pr_pid = task_pid_vnr(p);
1460	psinfo->pr_pgrp = task_pgrp_vnr(p);
1461	psinfo->pr_sid = task_session_vnr(p);
1462
1463	i = p->state ? ffz(~p->state) + 1 : 0;
1464	psinfo->pr_state = i;
1465	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1466	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1467	psinfo->pr_nice = task_nice(p);
1468	psinfo->pr_flag = p->flags;
1469	rcu_read_lock();
1470	cred = __task_cred(p);
1471	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1472	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1473	rcu_read_unlock();
1474	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1475
1476	return 0;
1477}
1478
1479static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1480{
1481	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1482	int i = 0;
1483	do
1484		i += 2;
1485	while (auxv[i - 2] != AT_NULL);
1486	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1487}
1488
1489static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1490		const siginfo_t *siginfo)
1491{
1492	mm_segment_t old_fs = get_fs();
1493	set_fs(KERNEL_DS);
1494	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1495	set_fs(old_fs);
1496	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1497}
1498
1499#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1500/*
1501 * Format of NT_FILE note:
1502 *
1503 * long count     -- how many files are mapped
1504 * long page_size -- units for file_ofs
1505 * array of [COUNT] elements of
1506 *   long start
1507 *   long end
1508 *   long file_ofs
1509 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1510 */
1511static int fill_files_note(struct memelfnote *note)
1512{
1513	struct vm_area_struct *vma;
1514	unsigned count, size, names_ofs, remaining, n;
1515	user_long_t *data;
1516	user_long_t *start_end_ofs;
1517	char *name_base, *name_curpos;
1518
1519	/* *Estimated* file count and total data size needed */
1520	count = current->mm->map_count;
1521	size = count * 64;
1522
1523	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1524 alloc:
1525	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1526		return -EINVAL;
1527	size = round_up(size, PAGE_SIZE);
1528	data = vmalloc(size);
1529	if (!data)
1530		return -ENOMEM;
1531
1532	start_end_ofs = data + 2;
1533	name_base = name_curpos = ((char *)data) + names_ofs;
1534	remaining = size - names_ofs;
1535	count = 0;
1536	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1537		struct file *file;
1538		const char *filename;
1539
1540		file = vma->vm_file;
1541		if (!file)
1542			continue;
1543		filename = file_path(file, name_curpos, remaining);
1544		if (IS_ERR(filename)) {
1545			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1546				vfree(data);
1547				size = size * 5 / 4;
1548				goto alloc;
1549			}
1550			continue;
1551		}
1552
1553		/* file_path() fills at the end, move name down */
1554		/* n = strlen(filename) + 1: */
1555		n = (name_curpos + remaining) - filename;
1556		remaining = filename - name_curpos;
1557		memmove(name_curpos, filename, n);
1558		name_curpos += n;
1559
1560		*start_end_ofs++ = vma->vm_start;
1561		*start_end_ofs++ = vma->vm_end;
1562		*start_end_ofs++ = vma->vm_pgoff;
1563		count++;
1564	}
1565
1566	/* Now we know exact count of files, can store it */
1567	data[0] = count;
1568	data[1] = PAGE_SIZE;
1569	/*
1570	 * Count usually is less than current->mm->map_count,
1571	 * we need to move filenames down.
1572	 */
1573	n = current->mm->map_count - count;
1574	if (n != 0) {
1575		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1576		memmove(name_base - shift_bytes, name_base,
1577			name_curpos - name_base);
1578		name_curpos -= shift_bytes;
1579	}
1580
1581	size = name_curpos - (char *)data;
1582	fill_note(note, "CORE", NT_FILE, size, data);
1583	return 0;
1584}
1585
1586#ifdef CORE_DUMP_USE_REGSET
1587#include <linux/regset.h>
1588
1589struct elf_thread_core_info {
1590	struct elf_thread_core_info *next;
1591	struct task_struct *task;
1592	struct elf_prstatus prstatus;
1593	struct memelfnote notes[0];
1594};
1595
1596struct elf_note_info {
1597	struct elf_thread_core_info *thread;
1598	struct memelfnote psinfo;
1599	struct memelfnote signote;
1600	struct memelfnote auxv;
1601	struct memelfnote files;
1602	user_siginfo_t csigdata;
1603	size_t size;
1604	int thread_notes;
1605};
1606
1607/*
1608 * When a regset has a writeback hook, we call it on each thread before
1609 * dumping user memory.  On register window machines, this makes sure the
1610 * user memory backing the register data is up to date before we read it.
1611 */
1612static void do_thread_regset_writeback(struct task_struct *task,
1613				       const struct user_regset *regset)
1614{
1615	if (regset->writeback)
1616		regset->writeback(task, regset, 1);
1617}
1618
1619#ifndef PR_REG_SIZE
1620#define PR_REG_SIZE(S) sizeof(S)
1621#endif
1622
1623#ifndef PRSTATUS_SIZE
1624#define PRSTATUS_SIZE(S) sizeof(S)
1625#endif
1626
1627#ifndef PR_REG_PTR
1628#define PR_REG_PTR(S) (&((S)->pr_reg))
1629#endif
1630
1631#ifndef SET_PR_FPVALID
1632#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1633#endif
1634
1635static int fill_thread_core_info(struct elf_thread_core_info *t,
1636				 const struct user_regset_view *view,
1637				 long signr, size_t *total)
1638{
1639	unsigned int i;
1640
1641	/*
1642	 * NT_PRSTATUS is the one special case, because the regset data
1643	 * goes into the pr_reg field inside the note contents, rather
1644	 * than being the whole note contents.  We fill the reset in here.
1645	 * We assume that regset 0 is NT_PRSTATUS.
1646	 */
1647	fill_prstatus(&t->prstatus, t->task, signr);
1648	(void) view->regsets[0].get(t->task, &view->regsets[0],
1649				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1650				    PR_REG_PTR(&t->prstatus), NULL);
1651
1652	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1653		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1654	*total += notesize(&t->notes[0]);
1655
1656	do_thread_regset_writeback(t->task, &view->regsets[0]);
1657
1658	/*
1659	 * Each other regset might generate a note too.  For each regset
1660	 * that has no core_note_type or is inactive, we leave t->notes[i]
1661	 * all zero and we'll know to skip writing it later.
1662	 */
1663	for (i = 1; i < view->n; ++i) {
1664		const struct user_regset *regset = &view->regsets[i];
1665		do_thread_regset_writeback(t->task, regset);
1666		if (regset->core_note_type && regset->get &&
1667		    (!regset->active || regset->active(t->task, regset))) {
1668			int ret;
1669			size_t size = regset->n * regset->size;
1670			void *data = kmalloc(size, GFP_KERNEL);
1671			if (unlikely(!data))
1672				return 0;
1673			ret = regset->get(t->task, regset,
1674					  0, size, data, NULL);
1675			if (unlikely(ret))
1676				kfree(data);
1677			else {
1678				if (regset->core_note_type != NT_PRFPREG)
1679					fill_note(&t->notes[i], "LINUX",
1680						  regset->core_note_type,
1681						  size, data);
1682				else {
1683					SET_PR_FPVALID(&t->prstatus, 1);
1684					fill_note(&t->notes[i], "CORE",
1685						  NT_PRFPREG, size, data);
1686				}
1687				*total += notesize(&t->notes[i]);
1688			}
1689		}
1690	}
1691
1692	return 1;
1693}
1694
1695static int fill_note_info(struct elfhdr *elf, int phdrs,
1696			  struct elf_note_info *info,
1697			  const siginfo_t *siginfo, struct pt_regs *regs)
1698{
1699	struct task_struct *dump_task = current;
1700	const struct user_regset_view *view = task_user_regset_view(dump_task);
1701	struct elf_thread_core_info *t;
1702	struct elf_prpsinfo *psinfo;
1703	struct core_thread *ct;
1704	unsigned int i;
1705
1706	info->size = 0;
1707	info->thread = NULL;
1708
1709	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1710	if (psinfo == NULL) {
1711		info->psinfo.data = NULL; /* So we don't free this wrongly */
1712		return 0;
1713	}
1714
1715	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1716
1717	/*
1718	 * Figure out how many notes we're going to need for each thread.
1719	 */
1720	info->thread_notes = 0;
1721	for (i = 0; i < view->n; ++i)
1722		if (view->regsets[i].core_note_type != 0)
1723			++info->thread_notes;
1724
1725	/*
1726	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1727	 * since it is our one special case.
1728	 */
1729	if (unlikely(info->thread_notes == 0) ||
1730	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1731		WARN_ON(1);
1732		return 0;
1733	}
1734
1735	/*
1736	 * Initialize the ELF file header.
1737	 */
1738	fill_elf_header(elf, phdrs,
1739			view->e_machine, view->e_flags);
1740
1741	/*
1742	 * Allocate a structure for each thread.
1743	 */
1744	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1745		t = kzalloc(offsetof(struct elf_thread_core_info,
1746				     notes[info->thread_notes]),
1747			    GFP_KERNEL);
1748		if (unlikely(!t))
1749			return 0;
1750
1751		t->task = ct->task;
1752		if (ct->task == dump_task || !info->thread) {
1753			t->next = info->thread;
1754			info->thread = t;
1755		} else {
1756			/*
1757			 * Make sure to keep the original task at
1758			 * the head of the list.
1759			 */
1760			t->next = info->thread->next;
1761			info->thread->next = t;
1762		}
1763	}
1764
1765	/*
1766	 * Now fill in each thread's information.
1767	 */
1768	for (t = info->thread; t != NULL; t = t->next)
1769		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1770			return 0;
1771
1772	/*
1773	 * Fill in the two process-wide notes.
1774	 */
1775	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1776	info->size += notesize(&info->psinfo);
1777
1778	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1779	info->size += notesize(&info->signote);
1780
1781	fill_auxv_note(&info->auxv, current->mm);
1782	info->size += notesize(&info->auxv);
1783
1784	if (fill_files_note(&info->files) == 0)
1785		info->size += notesize(&info->files);
1786
1787	return 1;
1788}
1789
1790static size_t get_note_info_size(struct elf_note_info *info)
1791{
1792	return info->size;
1793}
1794
1795/*
1796 * Write all the notes for each thread.  When writing the first thread, the
1797 * process-wide notes are interleaved after the first thread-specific note.
1798 */
1799static int write_note_info(struct elf_note_info *info,
1800			   struct coredump_params *cprm)
1801{
1802	bool first = true;
1803	struct elf_thread_core_info *t = info->thread;
1804
1805	do {
1806		int i;
1807
1808		if (!writenote(&t->notes[0], cprm))
1809			return 0;
1810
1811		if (first && !writenote(&info->psinfo, cprm))
1812			return 0;
1813		if (first && !writenote(&info->signote, cprm))
1814			return 0;
1815		if (first && !writenote(&info->auxv, cprm))
1816			return 0;
1817		if (first && info->files.data &&
1818				!writenote(&info->files, cprm))
1819			return 0;
1820
1821		for (i = 1; i < info->thread_notes; ++i)
1822			if (t->notes[i].data &&
1823			    !writenote(&t->notes[i], cprm))
1824				return 0;
1825
1826		first = false;
1827		t = t->next;
1828	} while (t);
1829
1830	return 1;
1831}
1832
1833static void free_note_info(struct elf_note_info *info)
1834{
1835	struct elf_thread_core_info *threads = info->thread;
1836	while (threads) {
1837		unsigned int i;
1838		struct elf_thread_core_info *t = threads;
1839		threads = t->next;
1840		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1841		for (i = 1; i < info->thread_notes; ++i)
1842			kfree(t->notes[i].data);
1843		kfree(t);
1844	}
1845	kfree(info->psinfo.data);
1846	vfree(info->files.data);
1847}
1848
1849#else
1850
1851/* Here is the structure in which status of each thread is captured. */
1852struct elf_thread_status
1853{
1854	struct list_head list;
1855	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1856	elf_fpregset_t fpu;		/* NT_PRFPREG */
1857	struct task_struct *thread;
1858#ifdef ELF_CORE_COPY_XFPREGS
1859	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1860#endif
1861	struct memelfnote notes[3];
1862	int num_notes;
1863};
1864
1865/*
1866 * In order to add the specific thread information for the elf file format,
1867 * we need to keep a linked list of every threads pr_status and then create
1868 * a single section for them in the final core file.
1869 */
1870static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1871{
1872	int sz = 0;
1873	struct task_struct *p = t->thread;
1874	t->num_notes = 0;
1875
1876	fill_prstatus(&t->prstatus, p, signr);
1877	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1878
1879	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1880		  &(t->prstatus));
1881	t->num_notes++;
1882	sz += notesize(&t->notes[0]);
1883
1884	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1885								&t->fpu))) {
1886		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1887			  &(t->fpu));
1888		t->num_notes++;
1889		sz += notesize(&t->notes[1]);
1890	}
1891
1892#ifdef ELF_CORE_COPY_XFPREGS
1893	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1894		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1895			  sizeof(t->xfpu), &t->xfpu);
1896		t->num_notes++;
1897		sz += notesize(&t->notes[2]);
1898	}
1899#endif
1900	return sz;
1901}
1902
1903struct elf_note_info {
1904	struct memelfnote *notes;
1905	struct memelfnote *notes_files;
1906	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1907	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1908	struct list_head thread_list;
1909	elf_fpregset_t *fpu;
1910#ifdef ELF_CORE_COPY_XFPREGS
1911	elf_fpxregset_t *xfpu;
1912#endif
1913	user_siginfo_t csigdata;
1914	int thread_status_size;
1915	int numnote;
1916};
1917
1918static int elf_note_info_init(struct elf_note_info *info)
1919{
1920	memset(info, 0, sizeof(*info));
1921	INIT_LIST_HEAD(&info->thread_list);
1922
1923	/* Allocate space for ELF notes */
1924	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1925	if (!info->notes)
1926		return 0;
1927	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1928	if (!info->psinfo)
1929		return 0;
1930	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1931	if (!info->prstatus)
1932		return 0;
1933	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1934	if (!info->fpu)
1935		return 0;
1936#ifdef ELF_CORE_COPY_XFPREGS
1937	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1938	if (!info->xfpu)
1939		return 0;
1940#endif
1941	return 1;
1942}
1943
1944static int fill_note_info(struct elfhdr *elf, int phdrs,
1945			  struct elf_note_info *info,
1946			  const siginfo_t *siginfo, struct pt_regs *regs)
1947{
1948	struct list_head *t;
1949	struct core_thread *ct;
1950	struct elf_thread_status *ets;
1951
1952	if (!elf_note_info_init(info))
1953		return 0;
1954
1955	for (ct = current->mm->core_state->dumper.next;
1956					ct; ct = ct->next) {
1957		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1958		if (!ets)
1959			return 0;
1960
1961		ets->thread = ct->task;
1962		list_add(&ets->list, &info->thread_list);
1963	}
1964
1965	list_for_each(t, &info->thread_list) {
1966		int sz;
1967
1968		ets = list_entry(t, struct elf_thread_status, list);
1969		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1970		info->thread_status_size += sz;
1971	}
1972	/* now collect the dump for the current */
1973	memset(info->prstatus, 0, sizeof(*info->prstatus));
1974	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1975	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1976
1977	/* Set up header */
1978	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1979
1980	/*
1981	 * Set up the notes in similar form to SVR4 core dumps made
1982	 * with info from their /proc.
1983	 */
1984
1985	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1986		  sizeof(*info->prstatus), info->prstatus);
1987	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1988	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1989		  sizeof(*info->psinfo), info->psinfo);
1990
1991	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1992	fill_auxv_note(info->notes + 3, current->mm);
1993	info->numnote = 4;
1994
1995	if (fill_files_note(info->notes + info->numnote) == 0) {
1996		info->notes_files = info->notes + info->numnote;
1997		info->numnote++;
1998	}
1999
2000	/* Try to dump the FPU. */
2001	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2002							       info->fpu);
2003	if (info->prstatus->pr_fpvalid)
2004		fill_note(info->notes + info->numnote++,
2005			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2006#ifdef ELF_CORE_COPY_XFPREGS
2007	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2008		fill_note(info->notes + info->numnote++,
2009			  "LINUX", ELF_CORE_XFPREG_TYPE,
2010			  sizeof(*info->xfpu), info->xfpu);
2011#endif
2012
2013	return 1;
2014}
2015
2016static size_t get_note_info_size(struct elf_note_info *info)
2017{
2018	int sz = 0;
2019	int i;
2020
2021	for (i = 0; i < info->numnote; i++)
2022		sz += notesize(info->notes + i);
2023
2024	sz += info->thread_status_size;
2025
2026	return sz;
2027}
2028
2029static int write_note_info(struct elf_note_info *info,
2030			   struct coredump_params *cprm)
2031{
2032	int i;
2033	struct list_head *t;
2034
2035	for (i = 0; i < info->numnote; i++)
2036		if (!writenote(info->notes + i, cprm))
2037			return 0;
2038
2039	/* write out the thread status notes section */
2040	list_for_each(t, &info->thread_list) {
2041		struct elf_thread_status *tmp =
2042				list_entry(t, struct elf_thread_status, list);
2043
2044		for (i = 0; i < tmp->num_notes; i++)
2045			if (!writenote(&tmp->notes[i], cprm))
2046				return 0;
2047	}
2048
2049	return 1;
2050}
2051
2052static void free_note_info(struct elf_note_info *info)
2053{
2054	while (!list_empty(&info->thread_list)) {
2055		struct list_head *tmp = info->thread_list.next;
2056		list_del(tmp);
2057		kfree(list_entry(tmp, struct elf_thread_status, list));
2058	}
2059
2060	/* Free data possibly allocated by fill_files_note(): */
2061	if (info->notes_files)
2062		vfree(info->notes_files->data);
2063
2064	kfree(info->prstatus);
2065	kfree(info->psinfo);
2066	kfree(info->notes);
2067	kfree(info->fpu);
2068#ifdef ELF_CORE_COPY_XFPREGS
2069	kfree(info->xfpu);
2070#endif
2071}
2072
2073#endif
2074
2075static struct vm_area_struct *first_vma(struct task_struct *tsk,
2076					struct vm_area_struct *gate_vma)
2077{
2078	struct vm_area_struct *ret = tsk->mm->mmap;
2079
2080	if (ret)
2081		return ret;
2082	return gate_vma;
2083}
2084/*
2085 * Helper function for iterating across a vma list.  It ensures that the caller
2086 * will visit `gate_vma' prior to terminating the search.
2087 */
2088static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2089					struct vm_area_struct *gate_vma)
2090{
2091	struct vm_area_struct *ret;
2092
2093	ret = this_vma->vm_next;
2094	if (ret)
2095		return ret;
2096	if (this_vma == gate_vma)
2097		return NULL;
2098	return gate_vma;
2099}
2100
2101static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2102			     elf_addr_t e_shoff, int segs)
2103{
2104	elf->e_shoff = e_shoff;
2105	elf->e_shentsize = sizeof(*shdr4extnum);
2106	elf->e_shnum = 1;
2107	elf->e_shstrndx = SHN_UNDEF;
2108
2109	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2110
2111	shdr4extnum->sh_type = SHT_NULL;
2112	shdr4extnum->sh_size = elf->e_shnum;
2113	shdr4extnum->sh_link = elf->e_shstrndx;
2114	shdr4extnum->sh_info = segs;
2115}
2116
2117/*
2118 * Actual dumper
2119 *
2120 * This is a two-pass process; first we find the offsets of the bits,
2121 * and then they are actually written out.  If we run out of core limit
2122 * we just truncate.
2123 */
2124static int elf_core_dump(struct coredump_params *cprm)
2125{
2126	int has_dumped = 0;
2127	mm_segment_t fs;
2128	int segs, i;
2129	size_t vma_data_size = 0;
2130	struct vm_area_struct *vma, *gate_vma;
2131	struct elfhdr *elf = NULL;
2132	loff_t offset = 0, dataoff;
2133	struct elf_note_info info = { };
2134	struct elf_phdr *phdr4note = NULL;
2135	struct elf_shdr *shdr4extnum = NULL;
2136	Elf_Half e_phnum;
2137	elf_addr_t e_shoff;
2138	elf_addr_t *vma_filesz = NULL;
2139
2140	/*
2141	 * We no longer stop all VM operations.
2142	 *
2143	 * This is because those proceses that could possibly change map_count
2144	 * or the mmap / vma pages are now blocked in do_exit on current
2145	 * finishing this core dump.
2146	 *
2147	 * Only ptrace can touch these memory addresses, but it doesn't change
2148	 * the map_count or the pages allocated. So no possibility of crashing
2149	 * exists while dumping the mm->vm_next areas to the core file.
2150	 */
2151
2152	/* alloc memory for large data structures: too large to be on stack */
2153	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2154	if (!elf)
2155		goto out;
2156	/*
2157	 * The number of segs are recored into ELF header as 16bit value.
2158	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2159	 */
2160	segs = current->mm->map_count;
2161	segs += elf_core_extra_phdrs();
2162
2163	gate_vma = get_gate_vma(current->mm);
2164	if (gate_vma != NULL)
2165		segs++;
2166
2167	/* for notes section */
2168	segs++;
2169
2170	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2171	 * this, kernel supports extended numbering. Have a look at
2172	 * include/linux/elf.h for further information. */
2173	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2174
2175	/*
2176	 * Collect all the non-memory information about the process for the
2177	 * notes.  This also sets up the file header.
2178	 */
2179	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2180		goto cleanup;
2181
2182	has_dumped = 1;
2183
2184	fs = get_fs();
2185	set_fs(KERNEL_DS);
2186
2187	offset += sizeof(*elf);				/* Elf header */
2188	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2189
2190	/* Write notes phdr entry */
2191	{
2192		size_t sz = get_note_info_size(&info);
2193
2194		sz += elf_coredump_extra_notes_size();
2195
2196		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2197		if (!phdr4note)
2198			goto end_coredump;
2199
2200		fill_elf_note_phdr(phdr4note, sz, offset);
2201		offset += sz;
2202	}
2203
2204	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2205
2206	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2207	if (!vma_filesz)
2208		goto end_coredump;
2209
2210	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2211			vma = next_vma(vma, gate_vma)) {
2212		unsigned long dump_size;
2213
2214		dump_size = vma_dump_size(vma, cprm->mm_flags);
2215		vma_filesz[i++] = dump_size;
2216		vma_data_size += dump_size;
2217	}
2218
2219	offset += vma_data_size;
2220	offset += elf_core_extra_data_size();
2221	e_shoff = offset;
2222
2223	if (e_phnum == PN_XNUM) {
2224		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2225		if (!shdr4extnum)
2226			goto end_coredump;
2227		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2228	}
2229
2230	offset = dataoff;
2231
2232	if (!dump_emit(cprm, elf, sizeof(*elf)))
2233		goto end_coredump;
2234
2235	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2236		goto end_coredump;
2237
2238	/* Write program headers for segments dump */
2239	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2240			vma = next_vma(vma, gate_vma)) {
2241		struct elf_phdr phdr;
2242
2243		phdr.p_type = PT_LOAD;
2244		phdr.p_offset = offset;
2245		phdr.p_vaddr = vma->vm_start;
2246		phdr.p_paddr = 0;
2247		phdr.p_filesz = vma_filesz[i++];
2248		phdr.p_memsz = vma->vm_end - vma->vm_start;
2249		offset += phdr.p_filesz;
2250		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2251		if (vma->vm_flags & VM_WRITE)
2252			phdr.p_flags |= PF_W;
2253		if (vma->vm_flags & VM_EXEC)
2254			phdr.p_flags |= PF_X;
2255		phdr.p_align = ELF_EXEC_PAGESIZE;
2256
2257		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2258			goto end_coredump;
2259	}
2260
2261	if (!elf_core_write_extra_phdrs(cprm, offset))
2262		goto end_coredump;
2263
2264 	/* write out the notes section */
2265	if (!write_note_info(&info, cprm))
2266		goto end_coredump;
2267
2268	if (elf_coredump_extra_notes_write(cprm))
2269		goto end_coredump;
2270
2271	/* Align to page */
2272	if (!dump_skip(cprm, dataoff - cprm->written))
2273		goto end_coredump;
2274
2275	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2276			vma = next_vma(vma, gate_vma)) {
2277		unsigned long addr;
2278		unsigned long end;
2279
2280		end = vma->vm_start + vma_filesz[i++];
2281
2282		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2283			struct page *page;
2284			int stop;
2285
2286			page = get_dump_page(addr);
2287			if (page) {
2288				void *kaddr = kmap(page);
2289				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2290				kunmap(page);
2291				page_cache_release(page);
2292			} else
2293				stop = !dump_skip(cprm, PAGE_SIZE);
2294			if (stop)
2295				goto end_coredump;
2296		}
2297	}
2298
2299	if (!elf_core_write_extra_data(cprm))
2300		goto end_coredump;
2301
2302	if (e_phnum == PN_XNUM) {
2303		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2304			goto end_coredump;
2305	}
2306
2307end_coredump:
2308	set_fs(fs);
2309
2310cleanup:
2311	free_note_info(&info);
2312	kfree(shdr4extnum);
2313	kfree(vma_filesz);
2314	kfree(phdr4note);
2315	kfree(elf);
2316out:
2317	return has_dumped;
2318}
2319
2320#endif		/* CONFIG_ELF_CORE */
2321
2322static int __init init_elf_binfmt(void)
2323{
2324	register_binfmt(&elf_format);
2325	return 0;
2326}
2327
2328static void __exit exit_elf_binfmt(void)
2329{
2330	/* Remove the COFF and ELF loaders. */
2331	unregister_binfmt(&elf_format);
2332}
2333
2334core_initcall(init_elf_binfmt);
2335module_exit(exit_elf_binfmt);
2336MODULE_LICENSE("GPL");
2337