1/*
2 * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License.  See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * This is an implementation of a DWARF unwinder. Its main purpose is
9 * for generating stacktrace information. Based on the DWARF 3
10 * specification from http://www.dwarfstd.org.
11 *
12 * TODO:
13 *	- DWARF64 doesn't work.
14 *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
15 */
16
17/* #define DEBUG */
18#include <linux/kernel.h>
19#include <linux/io.h>
20#include <linux/list.h>
21#include <linux/mempool.h>
22#include <linux/mm.h>
23#include <linux/elf.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/slab.h>
27#include <asm/dwarf.h>
28#include <asm/unwinder.h>
29#include <asm/sections.h>
30#include <asm/unaligned.h>
31#include <asm/stacktrace.h>
32
33/* Reserve enough memory for two stack frames */
34#define DWARF_FRAME_MIN_REQ	2
35/* ... with 4 registers per frame. */
36#define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
37
38static struct kmem_cache *dwarf_frame_cachep;
39static mempool_t *dwarf_frame_pool;
40
41static struct kmem_cache *dwarf_reg_cachep;
42static mempool_t *dwarf_reg_pool;
43
44static struct rb_root cie_root;
45static DEFINE_SPINLOCK(dwarf_cie_lock);
46
47static struct rb_root fde_root;
48static DEFINE_SPINLOCK(dwarf_fde_lock);
49
50static struct dwarf_cie *cached_cie;
51
52static unsigned int dwarf_unwinder_ready;
53
54/**
55 *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
56 *	@frame: the DWARF frame whose list of registers we insert on
57 *	@reg_num: the register number
58 *
59 *	Allocate space for, and initialise, a dwarf reg from
60 *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
61 *	dwarf registers for @frame.
62 *
63 *	Return the initialised DWARF reg.
64 */
65static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
66					       unsigned int reg_num)
67{
68	struct dwarf_reg *reg;
69
70	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
71	if (!reg) {
72		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
73		/*
74		 * Let's just bomb hard here, we have no way to
75		 * gracefully recover.
76		 */
77		UNWINDER_BUG();
78	}
79
80	reg->number = reg_num;
81	reg->addr = 0;
82	reg->flags = 0;
83
84	list_add(&reg->link, &frame->reg_list);
85
86	return reg;
87}
88
89static void dwarf_frame_free_regs(struct dwarf_frame *frame)
90{
91	struct dwarf_reg *reg, *n;
92
93	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
94		list_del(&reg->link);
95		mempool_free(reg, dwarf_reg_pool);
96	}
97}
98
99/**
100 *	dwarf_frame_reg - return a DWARF register
101 *	@frame: the DWARF frame to search in for @reg_num
102 *	@reg_num: the register number to search for
103 *
104 *	Lookup and return the dwarf reg @reg_num for this frame. Return
105 *	NULL if @reg_num is an register invalid number.
106 */
107static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
108					 unsigned int reg_num)
109{
110	struct dwarf_reg *reg;
111
112	list_for_each_entry(reg, &frame->reg_list, link) {
113		if (reg->number == reg_num)
114			return reg;
115	}
116
117	return NULL;
118}
119
120/**
121 *	dwarf_read_addr - read dwarf data
122 *	@src: source address of data
123 *	@dst: destination address to store the data to
124 *
125 *	Read 'n' bytes from @src, where 'n' is the size of an address on
126 *	the native machine. We return the number of bytes read, which
127 *	should always be 'n'. We also have to be careful when reading
128 *	from @src and writing to @dst, because they can be arbitrarily
129 *	aligned. Return 'n' - the number of bytes read.
130 */
131static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
132{
133	u32 val = get_unaligned(src);
134	put_unaligned(val, dst);
135	return sizeof(unsigned long *);
136}
137
138/**
139 *	dwarf_read_uleb128 - read unsigned LEB128 data
140 *	@addr: the address where the ULEB128 data is stored
141 *	@ret: address to store the result
142 *
143 *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
144 *	from Appendix C of the DWARF 3 spec. For information on the
145 *	encodings refer to section "7.6 - Variable Length Data". Return
146 *	the number of bytes read.
147 */
148static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
149{
150	unsigned int result;
151	unsigned char byte;
152	int shift, count;
153
154	result = 0;
155	shift = 0;
156	count = 0;
157
158	while (1) {
159		byte = __raw_readb(addr);
160		addr++;
161		count++;
162
163		result |= (byte & 0x7f) << shift;
164		shift += 7;
165
166		if (!(byte & 0x80))
167			break;
168	}
169
170	*ret = result;
171
172	return count;
173}
174
175/**
176 *	dwarf_read_leb128 - read signed LEB128 data
177 *	@addr: the address of the LEB128 encoded data
178 *	@ret: address to store the result
179 *
180 *	Decode signed LEB128 data. The algorithm is taken from Appendix
181 *	C of the DWARF 3 spec. Return the number of bytes read.
182 */
183static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
184{
185	unsigned char byte;
186	int result, shift;
187	int num_bits;
188	int count;
189
190	result = 0;
191	shift = 0;
192	count = 0;
193
194	while (1) {
195		byte = __raw_readb(addr);
196		addr++;
197		result |= (byte & 0x7f) << shift;
198		shift += 7;
199		count++;
200
201		if (!(byte & 0x80))
202			break;
203	}
204
205	/* The number of bits in a signed integer. */
206	num_bits = 8 * sizeof(result);
207
208	if ((shift < num_bits) && (byte & 0x40))
209		result |= (-1 << shift);
210
211	*ret = result;
212
213	return count;
214}
215
216/**
217 *	dwarf_read_encoded_value - return the decoded value at @addr
218 *	@addr: the address of the encoded value
219 *	@val: where to write the decoded value
220 *	@encoding: the encoding with which we can decode @addr
221 *
222 *	GCC emits encoded address in the .eh_frame FDE entries. Decode
223 *	the value at @addr using @encoding. The decoded value is written
224 *	to @val and the number of bytes read is returned.
225 */
226static int dwarf_read_encoded_value(char *addr, unsigned long *val,
227				    char encoding)
228{
229	unsigned long decoded_addr = 0;
230	int count = 0;
231
232	switch (encoding & 0x70) {
233	case DW_EH_PE_absptr:
234		break;
235	case DW_EH_PE_pcrel:
236		decoded_addr = (unsigned long)addr;
237		break;
238	default:
239		pr_debug("encoding=0x%x\n", (encoding & 0x70));
240		UNWINDER_BUG();
241	}
242
243	if ((encoding & 0x07) == 0x00)
244		encoding |= DW_EH_PE_udata4;
245
246	switch (encoding & 0x0f) {
247	case DW_EH_PE_sdata4:
248	case DW_EH_PE_udata4:
249		count += 4;
250		decoded_addr += get_unaligned((u32 *)addr);
251		__raw_writel(decoded_addr, val);
252		break;
253	default:
254		pr_debug("encoding=0x%x\n", encoding);
255		UNWINDER_BUG();
256	}
257
258	return count;
259}
260
261/**
262 *	dwarf_entry_len - return the length of an FDE or CIE
263 *	@addr: the address of the entry
264 *	@len: the length of the entry
265 *
266 *	Read the initial_length field of the entry and store the size of
267 *	the entry in @len. We return the number of bytes read. Return a
268 *	count of 0 on error.
269 */
270static inline int dwarf_entry_len(char *addr, unsigned long *len)
271{
272	u32 initial_len;
273	int count;
274
275	initial_len = get_unaligned((u32 *)addr);
276	count = 4;
277
278	/*
279	 * An initial length field value in the range DW_LEN_EXT_LO -
280	 * DW_LEN_EXT_HI indicates an extension, and should not be
281	 * interpreted as a length. The only extension that we currently
282	 * understand is the use of DWARF64 addresses.
283	 */
284	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
285		/*
286		 * The 64-bit length field immediately follows the
287		 * compulsory 32-bit length field.
288		 */
289		if (initial_len == DW_EXT_DWARF64) {
290			*len = get_unaligned((u64 *)addr + 4);
291			count = 12;
292		} else {
293			printk(KERN_WARNING "Unknown DWARF extension\n");
294			count = 0;
295		}
296	} else
297		*len = initial_len;
298
299	return count;
300}
301
302/**
303 *	dwarf_lookup_cie - locate the cie
304 *	@cie_ptr: pointer to help with lookup
305 */
306static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
307{
308	struct rb_node **rb_node = &cie_root.rb_node;
309	struct dwarf_cie *cie = NULL;
310	unsigned long flags;
311
312	spin_lock_irqsave(&dwarf_cie_lock, flags);
313
314	/*
315	 * We've cached the last CIE we looked up because chances are
316	 * that the FDE wants this CIE.
317	 */
318	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
319		cie = cached_cie;
320		goto out;
321	}
322
323	while (*rb_node) {
324		struct dwarf_cie *cie_tmp;
325
326		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
327		BUG_ON(!cie_tmp);
328
329		if (cie_ptr == cie_tmp->cie_pointer) {
330			cie = cie_tmp;
331			cached_cie = cie_tmp;
332			goto out;
333		} else {
334			if (cie_ptr < cie_tmp->cie_pointer)
335				rb_node = &(*rb_node)->rb_left;
336			else
337				rb_node = &(*rb_node)->rb_right;
338		}
339	}
340
341out:
342	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
343	return cie;
344}
345
346/**
347 *	dwarf_lookup_fde - locate the FDE that covers pc
348 *	@pc: the program counter
349 */
350struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
351{
352	struct rb_node **rb_node = &fde_root.rb_node;
353	struct dwarf_fde *fde = NULL;
354	unsigned long flags;
355
356	spin_lock_irqsave(&dwarf_fde_lock, flags);
357
358	while (*rb_node) {
359		struct dwarf_fde *fde_tmp;
360		unsigned long tmp_start, tmp_end;
361
362		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
363		BUG_ON(!fde_tmp);
364
365		tmp_start = fde_tmp->initial_location;
366		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
367
368		if (pc < tmp_start) {
369			rb_node = &(*rb_node)->rb_left;
370		} else {
371			if (pc < tmp_end) {
372				fde = fde_tmp;
373				goto out;
374			} else
375				rb_node = &(*rb_node)->rb_right;
376		}
377	}
378
379out:
380	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
381
382	return fde;
383}
384
385/**
386 *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
387 *	@insn_start: address of the first instruction
388 *	@insn_end: address of the last instruction
389 *	@cie: the CIE for this function
390 *	@fde: the FDE for this function
391 *	@frame: the instructions calculate the CFA for this frame
392 *	@pc: the program counter of the address we're interested in
393 *
394 *	Execute the Call Frame instruction sequence starting at
395 *	@insn_start and ending at @insn_end. The instructions describe
396 *	how to calculate the Canonical Frame Address of a stackframe.
397 *	Store the results in @frame.
398 */
399static int dwarf_cfa_execute_insns(unsigned char *insn_start,
400				   unsigned char *insn_end,
401				   struct dwarf_cie *cie,
402				   struct dwarf_fde *fde,
403				   struct dwarf_frame *frame,
404				   unsigned long pc)
405{
406	unsigned char insn;
407	unsigned char *current_insn;
408	unsigned int count, delta, reg, expr_len, offset;
409	struct dwarf_reg *regp;
410
411	current_insn = insn_start;
412
413	while (current_insn < insn_end && frame->pc <= pc) {
414		insn = __raw_readb(current_insn++);
415
416		/*
417		 * Firstly, handle the opcodes that embed their operands
418		 * in the instructions.
419		 */
420		switch (DW_CFA_opcode(insn)) {
421		case DW_CFA_advance_loc:
422			delta = DW_CFA_operand(insn);
423			delta *= cie->code_alignment_factor;
424			frame->pc += delta;
425			continue;
426			/* NOTREACHED */
427		case DW_CFA_offset:
428			reg = DW_CFA_operand(insn);
429			count = dwarf_read_uleb128(current_insn, &offset);
430			current_insn += count;
431			offset *= cie->data_alignment_factor;
432			regp = dwarf_frame_alloc_reg(frame, reg);
433			regp->addr = offset;
434			regp->flags |= DWARF_REG_OFFSET;
435			continue;
436			/* NOTREACHED */
437		case DW_CFA_restore:
438			reg = DW_CFA_operand(insn);
439			continue;
440			/* NOTREACHED */
441		}
442
443		/*
444		 * Secondly, handle the opcodes that don't embed their
445		 * operands in the instruction.
446		 */
447		switch (insn) {
448		case DW_CFA_nop:
449			continue;
450		case DW_CFA_advance_loc1:
451			delta = *current_insn++;
452			frame->pc += delta * cie->code_alignment_factor;
453			break;
454		case DW_CFA_advance_loc2:
455			delta = get_unaligned((u16 *)current_insn);
456			current_insn += 2;
457			frame->pc += delta * cie->code_alignment_factor;
458			break;
459		case DW_CFA_advance_loc4:
460			delta = get_unaligned((u32 *)current_insn);
461			current_insn += 4;
462			frame->pc += delta * cie->code_alignment_factor;
463			break;
464		case DW_CFA_offset_extended:
465			count = dwarf_read_uleb128(current_insn, &reg);
466			current_insn += count;
467			count = dwarf_read_uleb128(current_insn, &offset);
468			current_insn += count;
469			offset *= cie->data_alignment_factor;
470			break;
471		case DW_CFA_restore_extended:
472			count = dwarf_read_uleb128(current_insn, &reg);
473			current_insn += count;
474			break;
475		case DW_CFA_undefined:
476			count = dwarf_read_uleb128(current_insn, &reg);
477			current_insn += count;
478			regp = dwarf_frame_alloc_reg(frame, reg);
479			regp->flags |= DWARF_UNDEFINED;
480			break;
481		case DW_CFA_def_cfa:
482			count = dwarf_read_uleb128(current_insn,
483						   &frame->cfa_register);
484			current_insn += count;
485			count = dwarf_read_uleb128(current_insn,
486						   &frame->cfa_offset);
487			current_insn += count;
488
489			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
490			break;
491		case DW_CFA_def_cfa_register:
492			count = dwarf_read_uleb128(current_insn,
493						   &frame->cfa_register);
494			current_insn += count;
495			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
496			break;
497		case DW_CFA_def_cfa_offset:
498			count = dwarf_read_uleb128(current_insn, &offset);
499			current_insn += count;
500			frame->cfa_offset = offset;
501			break;
502		case DW_CFA_def_cfa_expression:
503			count = dwarf_read_uleb128(current_insn, &expr_len);
504			current_insn += count;
505
506			frame->cfa_expr = current_insn;
507			frame->cfa_expr_len = expr_len;
508			current_insn += expr_len;
509
510			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
511			break;
512		case DW_CFA_offset_extended_sf:
513			count = dwarf_read_uleb128(current_insn, &reg);
514			current_insn += count;
515			count = dwarf_read_leb128(current_insn, &offset);
516			current_insn += count;
517			offset *= cie->data_alignment_factor;
518			regp = dwarf_frame_alloc_reg(frame, reg);
519			regp->flags |= DWARF_REG_OFFSET;
520			regp->addr = offset;
521			break;
522		case DW_CFA_val_offset:
523			count = dwarf_read_uleb128(current_insn, &reg);
524			current_insn += count;
525			count = dwarf_read_leb128(current_insn, &offset);
526			offset *= cie->data_alignment_factor;
527			regp = dwarf_frame_alloc_reg(frame, reg);
528			regp->flags |= DWARF_VAL_OFFSET;
529			regp->addr = offset;
530			break;
531		case DW_CFA_GNU_args_size:
532			count = dwarf_read_uleb128(current_insn, &offset);
533			current_insn += count;
534			break;
535		case DW_CFA_GNU_negative_offset_extended:
536			count = dwarf_read_uleb128(current_insn, &reg);
537			current_insn += count;
538			count = dwarf_read_uleb128(current_insn, &offset);
539			offset *= cie->data_alignment_factor;
540
541			regp = dwarf_frame_alloc_reg(frame, reg);
542			regp->flags |= DWARF_REG_OFFSET;
543			regp->addr = -offset;
544			break;
545		default:
546			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
547			UNWINDER_BUG();
548			break;
549		}
550	}
551
552	return 0;
553}
554
555/**
556 *	dwarf_free_frame - free the memory allocated for @frame
557 *	@frame: the frame to free
558 */
559void dwarf_free_frame(struct dwarf_frame *frame)
560{
561	dwarf_frame_free_regs(frame);
562	mempool_free(frame, dwarf_frame_pool);
563}
564
565extern void ret_from_irq(void);
566
567/**
568 *	dwarf_unwind_stack - unwind the stack
569 *
570 *	@pc: address of the function to unwind
571 *	@prev: struct dwarf_frame of the previous stackframe on the callstack
572 *
573 *	Return a struct dwarf_frame representing the most recent frame
574 *	on the callstack. Each of the lower (older) stack frames are
575 *	linked via the "prev" member.
576 */
577struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
578				       struct dwarf_frame *prev)
579{
580	struct dwarf_frame *frame;
581	struct dwarf_cie *cie;
582	struct dwarf_fde *fde;
583	struct dwarf_reg *reg;
584	unsigned long addr;
585
586	/*
587	 * If we've been called in to before initialization has
588	 * completed, bail out immediately.
589	 */
590	if (!dwarf_unwinder_ready)
591		return NULL;
592
593	/*
594	 * If we're starting at the top of the stack we need get the
595	 * contents of a physical register to get the CFA in order to
596	 * begin the virtual unwinding of the stack.
597	 *
598	 * NOTE: the return address is guaranteed to be setup by the
599	 * time this function makes its first function call.
600	 */
601	if (!pc || !prev)
602		pc = (unsigned long)current_text_addr();
603
604#ifdef CONFIG_FUNCTION_GRAPH_TRACER
605	/*
606	 * If our stack has been patched by the function graph tracer
607	 * then we might see the address of return_to_handler() where we
608	 * expected to find the real return address.
609	 */
610	if (pc == (unsigned long)&return_to_handler) {
611		int index = current->curr_ret_stack;
612
613		/*
614		 * We currently have no way of tracking how many
615		 * return_to_handler()'s we've seen. If there is more
616		 * than one patched return address on our stack,
617		 * complain loudly.
618		 */
619		WARN_ON(index > 0);
620
621		pc = current->ret_stack[index].ret;
622	}
623#endif
624
625	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
626	if (!frame) {
627		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
628		UNWINDER_BUG();
629	}
630
631	INIT_LIST_HEAD(&frame->reg_list);
632	frame->flags = 0;
633	frame->prev = prev;
634	frame->return_addr = 0;
635
636	fde = dwarf_lookup_fde(pc);
637	if (!fde) {
638		/*
639		 * This is our normal exit path. There are two reasons
640		 * why we might exit here,
641		 *
642		 *	a) pc has no asscociated DWARF frame info and so
643		 *	we don't know how to unwind this frame. This is
644		 *	usually the case when we're trying to unwind a
645		 *	frame that was called from some assembly code
646		 *	that has no DWARF info, e.g. syscalls.
647		 *
648		 *	b) the DEBUG info for pc is bogus. There's
649		 *	really no way to distinguish this case from the
650		 *	case above, which sucks because we could print a
651		 *	warning here.
652		 */
653		goto bail;
654	}
655
656	cie = dwarf_lookup_cie(fde->cie_pointer);
657
658	frame->pc = fde->initial_location;
659
660	/* CIE initial instructions */
661	dwarf_cfa_execute_insns(cie->initial_instructions,
662				cie->instructions_end, cie, fde,
663				frame, pc);
664
665	/* FDE instructions */
666	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
667				fde, frame, pc);
668
669	/* Calculate the CFA */
670	switch (frame->flags) {
671	case DWARF_FRAME_CFA_REG_OFFSET:
672		if (prev) {
673			reg = dwarf_frame_reg(prev, frame->cfa_register);
674			UNWINDER_BUG_ON(!reg);
675			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
676
677			addr = prev->cfa + reg->addr;
678			frame->cfa = __raw_readl(addr);
679
680		} else {
681			/*
682			 * Again, we're starting from the top of the
683			 * stack. We need to physically read
684			 * the contents of a register in order to get
685			 * the Canonical Frame Address for this
686			 * function.
687			 */
688			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
689		}
690
691		frame->cfa += frame->cfa_offset;
692		break;
693	default:
694		UNWINDER_BUG();
695	}
696
697	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
698
699	/*
700	 * If we haven't seen the return address register or the return
701	 * address column is undefined then we must assume that this is
702	 * the end of the callstack.
703	 */
704	if (!reg || reg->flags == DWARF_UNDEFINED)
705		goto bail;
706
707	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
708
709	addr = frame->cfa + reg->addr;
710	frame->return_addr = __raw_readl(addr);
711
712	/*
713	 * Ah, the joys of unwinding through interrupts.
714	 *
715	 * Interrupts are tricky - the DWARF info needs to be _really_
716	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
717	 * info. For example, I've seen interrupts occur in epilogues
718	 * just after the frame pointer (r14) had been restored. The
719	 * problem was that the DWARF info claimed that the CFA could be
720	 * reached by using the value of the frame pointer before it was
721	 * restored.
722	 *
723	 * So until the compiler can be trusted to produce reliable
724	 * DWARF info when it really matters, let's stop unwinding once
725	 * we've calculated the function that was interrupted.
726	 */
727	if (prev && prev->pc == (unsigned long)ret_from_irq)
728		frame->return_addr = 0;
729
730	return frame;
731
732bail:
733	dwarf_free_frame(frame);
734	return NULL;
735}
736
737static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
738			   unsigned char *end, struct module *mod)
739{
740	struct rb_node **rb_node = &cie_root.rb_node;
741	struct rb_node *parent = *rb_node;
742	struct dwarf_cie *cie;
743	unsigned long flags;
744	int count;
745
746	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
747	if (!cie)
748		return -ENOMEM;
749
750	cie->length = len;
751
752	/*
753	 * Record the offset into the .eh_frame section
754	 * for this CIE. It allows this CIE to be
755	 * quickly and easily looked up from the
756	 * corresponding FDE.
757	 */
758	cie->cie_pointer = (unsigned long)entry;
759
760	cie->version = *(char *)p++;
761	UNWINDER_BUG_ON(cie->version != 1);
762
763	cie->augmentation = p;
764	p += strlen(cie->augmentation) + 1;
765
766	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
767	p += count;
768
769	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
770	p += count;
771
772	/*
773	 * Which column in the rule table contains the
774	 * return address?
775	 */
776	if (cie->version == 1) {
777		cie->return_address_reg = __raw_readb(p);
778		p++;
779	} else {
780		count = dwarf_read_uleb128(p, &cie->return_address_reg);
781		p += count;
782	}
783
784	if (cie->augmentation[0] == 'z') {
785		unsigned int length, count;
786		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
787
788		count = dwarf_read_uleb128(p, &length);
789		p += count;
790
791		UNWINDER_BUG_ON((unsigned char *)p > end);
792
793		cie->initial_instructions = p + length;
794		cie->augmentation++;
795	}
796
797	while (*cie->augmentation) {
798		/*
799		 * "L" indicates a byte showing how the
800		 * LSDA pointer is encoded. Skip it.
801		 */
802		if (*cie->augmentation == 'L') {
803			p++;
804			cie->augmentation++;
805		} else if (*cie->augmentation == 'R') {
806			/*
807			 * "R" indicates a byte showing
808			 * how FDE addresses are
809			 * encoded.
810			 */
811			cie->encoding = *(char *)p++;
812			cie->augmentation++;
813		} else if (*cie->augmentation == 'P') {
814			/*
815			 * "R" indicates a personality
816			 * routine in the CIE
817			 * augmentation.
818			 */
819			UNWINDER_BUG();
820		} else if (*cie->augmentation == 'S') {
821			UNWINDER_BUG();
822		} else {
823			/*
824			 * Unknown augmentation. Assume
825			 * 'z' augmentation.
826			 */
827			p = cie->initial_instructions;
828			UNWINDER_BUG_ON(!p);
829			break;
830		}
831	}
832
833	cie->initial_instructions = p;
834	cie->instructions_end = end;
835
836	/* Add to list */
837	spin_lock_irqsave(&dwarf_cie_lock, flags);
838
839	while (*rb_node) {
840		struct dwarf_cie *cie_tmp;
841
842		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
843
844		parent = *rb_node;
845
846		if (cie->cie_pointer < cie_tmp->cie_pointer)
847			rb_node = &parent->rb_left;
848		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
849			rb_node = &parent->rb_right;
850		else
851			WARN_ON(1);
852	}
853
854	rb_link_node(&cie->node, parent, rb_node);
855	rb_insert_color(&cie->node, &cie_root);
856
857#ifdef CONFIG_MODULES
858	if (mod != NULL)
859		list_add_tail(&cie->link, &mod->arch.cie_list);
860#endif
861
862	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
863
864	return 0;
865}
866
867static int dwarf_parse_fde(void *entry, u32 entry_type,
868			   void *start, unsigned long len,
869			   unsigned char *end, struct module *mod)
870{
871	struct rb_node **rb_node = &fde_root.rb_node;
872	struct rb_node *parent = *rb_node;
873	struct dwarf_fde *fde;
874	struct dwarf_cie *cie;
875	unsigned long flags;
876	int count;
877	void *p = start;
878
879	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
880	if (!fde)
881		return -ENOMEM;
882
883	fde->length = len;
884
885	/*
886	 * In a .eh_frame section the CIE pointer is the
887	 * delta between the address within the FDE
888	 */
889	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
890
891	cie = dwarf_lookup_cie(fde->cie_pointer);
892	fde->cie = cie;
893
894	if (cie->encoding)
895		count = dwarf_read_encoded_value(p, &fde->initial_location,
896						 cie->encoding);
897	else
898		count = dwarf_read_addr(p, &fde->initial_location);
899
900	p += count;
901
902	if (cie->encoding)
903		count = dwarf_read_encoded_value(p, &fde->address_range,
904						 cie->encoding & 0x0f);
905	else
906		count = dwarf_read_addr(p, &fde->address_range);
907
908	p += count;
909
910	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
911		unsigned int length;
912		count = dwarf_read_uleb128(p, &length);
913		p += count + length;
914	}
915
916	/* Call frame instructions. */
917	fde->instructions = p;
918	fde->end = end;
919
920	/* Add to list. */
921	spin_lock_irqsave(&dwarf_fde_lock, flags);
922
923	while (*rb_node) {
924		struct dwarf_fde *fde_tmp;
925		unsigned long tmp_start, tmp_end;
926		unsigned long start, end;
927
928		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
929
930		start = fde->initial_location;
931		end = fde->initial_location + fde->address_range;
932
933		tmp_start = fde_tmp->initial_location;
934		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
935
936		parent = *rb_node;
937
938		if (start < tmp_start)
939			rb_node = &parent->rb_left;
940		else if (start >= tmp_end)
941			rb_node = &parent->rb_right;
942		else
943			WARN_ON(1);
944	}
945
946	rb_link_node(&fde->node, parent, rb_node);
947	rb_insert_color(&fde->node, &fde_root);
948
949#ifdef CONFIG_MODULES
950	if (mod != NULL)
951		list_add_tail(&fde->link, &mod->arch.fde_list);
952#endif
953
954	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
955
956	return 0;
957}
958
959static void dwarf_unwinder_dump(struct task_struct *task,
960				struct pt_regs *regs,
961				unsigned long *sp,
962				const struct stacktrace_ops *ops,
963				void *data)
964{
965	struct dwarf_frame *frame, *_frame;
966	unsigned long return_addr;
967
968	_frame = NULL;
969	return_addr = 0;
970
971	while (1) {
972		frame = dwarf_unwind_stack(return_addr, _frame);
973
974		if (_frame)
975			dwarf_free_frame(_frame);
976
977		_frame = frame;
978
979		if (!frame || !frame->return_addr)
980			break;
981
982		return_addr = frame->return_addr;
983		ops->address(data, return_addr, 1);
984	}
985
986	if (frame)
987		dwarf_free_frame(frame);
988}
989
990static struct unwinder dwarf_unwinder = {
991	.name = "dwarf-unwinder",
992	.dump = dwarf_unwinder_dump,
993	.rating = 150,
994};
995
996static void __init dwarf_unwinder_cleanup(void)
997{
998	struct dwarf_fde *fde, *next_fde;
999	struct dwarf_cie *cie, *next_cie;
1000
1001	/*
1002	 * Deallocate all the memory allocated for the DWARF unwinder.
1003	 * Traverse all the FDE/CIE lists and remove and free all the
1004	 * memory associated with those data structures.
1005	 */
1006	rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
1007		kfree(fde);
1008
1009	rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
1010		kfree(cie);
1011
1012	if (dwarf_reg_pool)
1013		mempool_destroy(dwarf_reg_pool);
1014	if (dwarf_frame_pool)
1015		mempool_destroy(dwarf_frame_pool);
1016	kmem_cache_destroy(dwarf_reg_cachep);
1017	kmem_cache_destroy(dwarf_frame_cachep);
1018}
1019
1020/**
1021 *	dwarf_parse_section - parse DWARF section
1022 *	@eh_frame_start: start address of the .eh_frame section
1023 *	@eh_frame_end: end address of the .eh_frame section
1024 *	@mod: the kernel module containing the .eh_frame section
1025 *
1026 *	Parse the information in a .eh_frame section.
1027 */
1028static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1029			       struct module *mod)
1030{
1031	u32 entry_type;
1032	void *p, *entry;
1033	int count, err = 0;
1034	unsigned long len = 0;
1035	unsigned int c_entries, f_entries;
1036	unsigned char *end;
1037
1038	c_entries = 0;
1039	f_entries = 0;
1040	entry = eh_frame_start;
1041
1042	while ((char *)entry < eh_frame_end) {
1043		p = entry;
1044
1045		count = dwarf_entry_len(p, &len);
1046		if (count == 0) {
1047			/*
1048			 * We read a bogus length field value. There is
1049			 * nothing we can do here apart from disabling
1050			 * the DWARF unwinder. We can't even skip this
1051			 * entry and move to the next one because 'len'
1052			 * tells us where our next entry is.
1053			 */
1054			err = -EINVAL;
1055			goto out;
1056		} else
1057			p += count;
1058
1059		/* initial length does not include itself */
1060		end = p + len;
1061
1062		entry_type = get_unaligned((u32 *)p);
1063		p += 4;
1064
1065		if (entry_type == DW_EH_FRAME_CIE) {
1066			err = dwarf_parse_cie(entry, p, len, end, mod);
1067			if (err < 0)
1068				goto out;
1069			else
1070				c_entries++;
1071		} else {
1072			err = dwarf_parse_fde(entry, entry_type, p, len,
1073					      end, mod);
1074			if (err < 0)
1075				goto out;
1076			else
1077				f_entries++;
1078		}
1079
1080		entry = (char *)entry + len + 4;
1081	}
1082
1083	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1084	       c_entries, f_entries);
1085
1086	return 0;
1087
1088out:
1089	return err;
1090}
1091
1092#ifdef CONFIG_MODULES
1093int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1094			  struct module *me)
1095{
1096	unsigned int i, err;
1097	unsigned long start, end;
1098	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1099
1100	start = end = 0;
1101
1102	for (i = 1; i < hdr->e_shnum; i++) {
1103		/* Alloc bit cleared means "ignore it." */
1104		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1105		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1106			start = sechdrs[i].sh_addr;
1107			end = start + sechdrs[i].sh_size;
1108			break;
1109		}
1110	}
1111
1112	/* Did we find the .eh_frame section? */
1113	if (i != hdr->e_shnum) {
1114		INIT_LIST_HEAD(&me->arch.cie_list);
1115		INIT_LIST_HEAD(&me->arch.fde_list);
1116		err = dwarf_parse_section((char *)start, (char *)end, me);
1117		if (err) {
1118			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1119			       me->name);
1120			return err;
1121		}
1122	}
1123
1124	return 0;
1125}
1126
1127/**
1128 *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1129 *	@mod: the module that is being unloaded
1130 *
1131 *	Remove any FDEs and CIEs from the global lists that came from
1132 *	@mod's .eh_frame section because @mod is being unloaded.
1133 */
1134void module_dwarf_cleanup(struct module *mod)
1135{
1136	struct dwarf_fde *fde, *ftmp;
1137	struct dwarf_cie *cie, *ctmp;
1138	unsigned long flags;
1139
1140	spin_lock_irqsave(&dwarf_cie_lock, flags);
1141
1142	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1143		list_del(&cie->link);
1144		rb_erase(&cie->node, &cie_root);
1145		kfree(cie);
1146	}
1147
1148	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1149
1150	spin_lock_irqsave(&dwarf_fde_lock, flags);
1151
1152	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1153		list_del(&fde->link);
1154		rb_erase(&fde->node, &fde_root);
1155		kfree(fde);
1156	}
1157
1158	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1159}
1160#endif /* CONFIG_MODULES */
1161
1162/**
1163 *	dwarf_unwinder_init - initialise the dwarf unwinder
1164 *
1165 *	Build the data structures describing the .dwarf_frame section to
1166 *	make it easier to lookup CIE and FDE entries. Because the
1167 *	.eh_frame section is packed as tightly as possible it is not
1168 *	easy to lookup the FDE for a given PC, so we build a list of FDE
1169 *	and CIE entries that make it easier.
1170 */
1171static int __init dwarf_unwinder_init(void)
1172{
1173	int err = -ENOMEM;
1174
1175	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1176			sizeof(struct dwarf_frame), 0,
1177			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1178
1179	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1180			sizeof(struct dwarf_reg), 0,
1181			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1182
1183	dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
1184						    dwarf_frame_cachep);
1185	if (!dwarf_frame_pool)
1186		goto out;
1187
1188	dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
1189						  dwarf_reg_cachep);
1190	if (!dwarf_reg_pool)
1191		goto out;
1192
1193	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1194	if (err)
1195		goto out;
1196
1197	err = unwinder_register(&dwarf_unwinder);
1198	if (err)
1199		goto out;
1200
1201	dwarf_unwinder_ready = 1;
1202
1203	return 0;
1204
1205out:
1206	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1207	dwarf_unwinder_cleanup();
1208	return err;
1209}
1210early_initcall(dwarf_unwinder_init);
1211