1/*
2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
3 *
4 *   This program is free software; you can redistribute it and/or
5 *   modify it under the terms of the GNU General Public License
6 *   as published by the Free Software Foundation, version 2.
7 *
8 *   This program is distributed in the hope that it will be useful, but
9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 *   NON INFRINGEMENT.  See the GNU General Public License for
12 *   more details.
13 *
14 * A code-rewriter that handles unaligned exception.
15 */
16
17#include <linux/smp.h>
18#include <linux/ptrace.h>
19#include <linux/slab.h>
20#include <linux/thread_info.h>
21#include <linux/uaccess.h>
22#include <linux/mman.h>
23#include <linux/types.h>
24#include <linux/err.h>
25#include <linux/module.h>
26#include <linux/compat.h>
27#include <linux/prctl.h>
28#include <linux/context_tracking.h>
29#include <asm/cacheflush.h>
30#include <asm/traps.h>
31#include <asm/uaccess.h>
32#include <asm/unaligned.h>
33#include <arch/abi.h>
34#include <arch/spr_def.h>
35#include <arch/opcode.h>
36
37
38/*
39 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
40 * exception is supported out of single_step.c
41 */
42
43int unaligned_printk;
44
45static int __init setup_unaligned_printk(char *str)
46{
47	long val;
48	if (kstrtol(str, 0, &val) != 0)
49		return 0;
50	unaligned_printk = val;
51	pr_info("Printk for each unaligned data accesses is %s\n",
52		unaligned_printk ? "enabled" : "disabled");
53	return 1;
54}
55__setup("unaligned_printk=", setup_unaligned_printk);
56
57unsigned int unaligned_fixup_count;
58
59#ifdef __tilegx__
60
61/*
62 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
63 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
64 * instruction bundle followed by 14 JIT bundles.
65 */
66
67struct unaligned_jit_fragment {
68	unsigned long       pc;
69	tilegx_bundle_bits  bundle;
70	tilegx_bundle_bits  insn[14];
71};
72
73/*
74 * Check if a nop or fnop at bundle's pipeline X0.
75 */
76
77static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
78{
79	return (((get_UnaryOpcodeExtension_X0(bundle) ==
80		  NOP_UNARY_OPCODE_X0) &&
81		 (get_RRROpcodeExtension_X0(bundle) ==
82		  UNARY_RRR_0_OPCODE_X0) &&
83		 (get_Opcode_X0(bundle) ==
84		  RRR_0_OPCODE_X0)) ||
85		((get_UnaryOpcodeExtension_X0(bundle) ==
86		  FNOP_UNARY_OPCODE_X0) &&
87		 (get_RRROpcodeExtension_X0(bundle) ==
88		  UNARY_RRR_0_OPCODE_X0) &&
89		 (get_Opcode_X0(bundle) ==
90		  RRR_0_OPCODE_X0)));
91}
92
93/*
94 * Check if nop or fnop at bundle's pipeline X1.
95 */
96
97static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
98{
99	return (((get_UnaryOpcodeExtension_X1(bundle) ==
100		  NOP_UNARY_OPCODE_X1) &&
101		 (get_RRROpcodeExtension_X1(bundle) ==
102		  UNARY_RRR_0_OPCODE_X1) &&
103		 (get_Opcode_X1(bundle) ==
104		  RRR_0_OPCODE_X1)) ||
105		((get_UnaryOpcodeExtension_X1(bundle) ==
106		  FNOP_UNARY_OPCODE_X1) &&
107		 (get_RRROpcodeExtension_X1(bundle) ==
108		  UNARY_RRR_0_OPCODE_X1) &&
109		 (get_Opcode_X1(bundle) ==
110		  RRR_0_OPCODE_X1)));
111}
112
113/*
114 * Check if nop or fnop at bundle's Y0 pipeline.
115 */
116
117static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
118{
119	return (((get_UnaryOpcodeExtension_Y0(bundle) ==
120		  NOP_UNARY_OPCODE_Y0) &&
121		 (get_RRROpcodeExtension_Y0(bundle) ==
122		  UNARY_RRR_1_OPCODE_Y0) &&
123		 (get_Opcode_Y0(bundle) ==
124		  RRR_1_OPCODE_Y0)) ||
125		((get_UnaryOpcodeExtension_Y0(bundle) ==
126		  FNOP_UNARY_OPCODE_Y0) &&
127		 (get_RRROpcodeExtension_Y0(bundle) ==
128		  UNARY_RRR_1_OPCODE_Y0) &&
129		 (get_Opcode_Y0(bundle) ==
130		  RRR_1_OPCODE_Y0)));
131}
132
133/*
134 * Check if nop or fnop at bundle's pipeline Y1.
135 */
136
137static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
138{
139	return (((get_UnaryOpcodeExtension_Y1(bundle) ==
140		  NOP_UNARY_OPCODE_Y1) &&
141		 (get_RRROpcodeExtension_Y1(bundle) ==
142		  UNARY_RRR_1_OPCODE_Y1) &&
143		 (get_Opcode_Y1(bundle) ==
144		  RRR_1_OPCODE_Y1)) ||
145		((get_UnaryOpcodeExtension_Y1(bundle) ==
146		  FNOP_UNARY_OPCODE_Y1) &&
147		 (get_RRROpcodeExtension_Y1(bundle) ==
148		  UNARY_RRR_1_OPCODE_Y1) &&
149		 (get_Opcode_Y1(bundle) ==
150		  RRR_1_OPCODE_Y1)));
151}
152
153/*
154 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
155 */
156
157static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
158{
159	return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
160}
161
162/*
163 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
164 */
165
166static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
167{
168	return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
169}
170
171/*
172 * Find the destination, source registers of fault unalign access instruction
173 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
174 * clob3, which are guaranteed different from any register used in the fault
175 * bundle. r_alias is used to return if the other instructions other than the
176 * unalign load/store shares same register with ra, rb and rd.
177 */
178
179static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
180		      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
181		      uint64_t *clob3, bool *r_alias)
182{
183	int i;
184	uint64_t reg;
185	uint64_t reg_map = 0, alias_reg_map = 0, map;
186	bool alias = false;
187
188	/*
189	 * Parse fault bundle, find potential used registers and mark
190	 * corresponding bits in reg_map and alias_map. These 2 bit maps
191	 * are used to find the scratch registers and determine if there
192	 * is register alais.
193	 */
194	if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
195
196		reg = get_SrcA_Y2(bundle);
197		reg_map |= 1ULL << reg;
198		*ra = reg;
199		reg = get_SrcBDest_Y2(bundle);
200		reg_map |= 1ULL << reg;
201
202		if (rd) {
203			/* Load. */
204			*rd = reg;
205			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
206		} else {
207			/* Store. */
208			*rb = reg;
209			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
210		}
211
212		if (!is_bundle_y1_nop(bundle)) {
213			reg = get_SrcA_Y1(bundle);
214			reg_map |= (1ULL << reg);
215			map = (1ULL << reg);
216
217			reg = get_SrcB_Y1(bundle);
218			reg_map |= (1ULL << reg);
219			map |= (1ULL << reg);
220
221			reg = get_Dest_Y1(bundle);
222			reg_map |= (1ULL << reg);
223			map |= (1ULL << reg);
224
225			if (map & alias_reg_map)
226				alias = true;
227		}
228
229		if (!is_bundle_y0_nop(bundle)) {
230			reg = get_SrcA_Y0(bundle);
231			reg_map |= (1ULL << reg);
232			map = (1ULL << reg);
233
234			reg = get_SrcB_Y0(bundle);
235			reg_map |= (1ULL << reg);
236			map |= (1ULL << reg);
237
238			reg = get_Dest_Y0(bundle);
239			reg_map |= (1ULL << reg);
240			map |= (1ULL << reg);
241
242			if (map & alias_reg_map)
243				alias = true;
244		}
245	} else	{ /* X Mode Bundle. */
246
247		reg = get_SrcA_X1(bundle);
248		reg_map |= (1ULL << reg);
249		*ra = reg;
250		if (rd)	{
251			/* Load. */
252			reg = get_Dest_X1(bundle);
253			reg_map |= (1ULL << reg);
254			*rd = reg;
255			alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
256		} else {
257			/* Store. */
258			reg = get_SrcB_X1(bundle);
259			reg_map |= (1ULL << reg);
260			*rb = reg;
261			alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
262		}
263
264		if (!is_bundle_x0_nop(bundle)) {
265			reg = get_SrcA_X0(bundle);
266			reg_map |= (1ULL << reg);
267			map = (1ULL << reg);
268
269			reg = get_SrcB_X0(bundle);
270			reg_map |= (1ULL << reg);
271			map |= (1ULL << reg);
272
273			reg = get_Dest_X0(bundle);
274			reg_map |= (1ULL << reg);
275			map |= (1ULL << reg);
276
277			if (map & alias_reg_map)
278				alias = true;
279		}
280	}
281
282	/*
283	 * "alias" indicates if the unalign access registers have collision
284	 * with others in the same bundle. We jsut simply test all register
285	 * operands case (RRR), ignored the case with immidate. If a bundle
286	 * has no register alias, we may do fixup in a simple or fast manner.
287	 * So if an immidata field happens to hit with a register, we may end
288	 * up fall back to the generic handling.
289	 */
290
291	*r_alias = alias;
292
293	/* Flip bits on reg_map. */
294	reg_map ^= -1ULL;
295
296	/* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
297	for (i = 0; i < TREG_SP; i++) {
298		if (reg_map & (0x1ULL << i)) {
299			if (*clob1 == -1) {
300				*clob1 = i;
301			} else if (*clob2 == -1) {
302				*clob2 = i;
303			} else if (*clob3 == -1) {
304				*clob3 = i;
305				return;
306			}
307		}
308	}
309}
310
311/*
312 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
313 * is unexpected.
314 */
315
316static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
317		       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
318{
319	bool unexpected = false;
320	if ((ra >= 56) && (ra != TREG_ZERO))
321		unexpected = true;
322
323	if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
324		unexpected = true;
325
326	if (rd != -1) {
327		if ((rd >= 56) && (rd != TREG_ZERO))
328			unexpected = true;
329	} else {
330		if ((rb >= 56) && (rb != TREG_ZERO))
331			unexpected = true;
332	}
333	return unexpected;
334}
335
336
337#define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
338#define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
339#define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
340#define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
341#define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
342
343#ifdef __LITTLE_ENDIAN
344#define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
345#else
346#define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
347#endif /* __LITTLE_ENDIAN */
348
349/*
350 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
351 * The corresponding static function jix_x#_###(.) generates partial or
352 * whole bundle based on the template and given arguments.
353 */
354
355#define __JIT_CODE(_X_)						\
356	asm (".pushsection .rodata.unalign_data, \"a\"\n"	\
357	     _X_"\n"						\
358	     ".popsection\n")
359
360__JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
361static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
362{
363	extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
364	return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
365		create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
366}
367
368__JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
369static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
370{
371	extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
372	return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
373		create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
374}
375
376__JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
377static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
378{
379	extern  tilegx_bundle_bits __unalign_jit_x0_addi;
380	return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
381		create_Dest_X0(rd) | create_SrcA_X0(ra) |
382		create_Imm8_X0(imm8);
383}
384
385__JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
386static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
387{
388	extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
389	return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
390		create_Dest_X1(rd) | create_SrcA_X1(ra);
391}
392
393__JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
394static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
395{
396	extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
397	return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
398		create_Dest_X0(rd) | create_SrcA_X0(ra) |
399		create_SrcB_X0(rb);
400}
401
402__JIT_CODE("__unalign_jit_x1_iret:   {iret}");
403static tilegx_bundle_bits  jit_x1_iret(void)
404{
405	extern  tilegx_bundle_bits __unalign_jit_x1_iret;
406	return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
407}
408
409__JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
410static tilegx_bundle_bits  jit_x0_fnop(void)
411{
412	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
413	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
414}
415
416static tilegx_bundle_bits  jit_x1_fnop(void)
417{
418	extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
419	return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
420}
421
422__JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
423static tilegx_bundle_bits  jit_y2_dummy(void)
424{
425	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
426	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
427}
428
429static tilegx_bundle_bits  jit_y1_fnop(void)
430{
431	extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
432	return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
433}
434
435__JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
436static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
437{
438	extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
439	return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
440		(~create_SrcA_X1(-1)) &
441		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
442		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
443}
444
445__JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
446static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
447{
448	extern  tilegx_bundle_bits __unalign_jit_x1_st;
449	return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
450		create_SrcA_X1(ra) | create_SrcB_X1(rb);
451}
452
453__JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
454static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
455{
456	extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
457	return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
458		(~create_SrcA_X1(-1)) &
459		GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
460		create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
461}
462
463__JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
464static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
465{
466	extern  tilegx_bundle_bits __unalign_jit_x1_ld;
467	return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
468		create_Dest_X1(rd) | create_SrcA_X1(ra);
469}
470
471__JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
472static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
473{
474	extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
475	return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
476		(~create_Dest_X1(-1)) &
477		GX_INSN_X1_MASK) | create_Dest_X1(rd) |
478		create_SrcA_X1(ra) | create_Imm8_X1(imm8);
479}
480
481__JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
482static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
483{
484	extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
485	return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
486		GX_INSN_X0_MASK) |
487		create_Dest_X0(rd) | create_SrcA_X0(ra) |
488		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
489}
490
491__JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
492static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
493{
494	extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
495	return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
496		GX_INSN_X0_MASK) |
497		create_Dest_X0(rd) | create_SrcA_X0(ra) |
498		create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
499}
500
501__JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
502static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
503{
504	extern  tilegx_bundle_bits __unalign_jit_x1_addi;
505	return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
506		create_Dest_X1(rd) | create_SrcA_X1(ra) |
507		create_Imm8_X1(imm8);
508}
509
510__JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
511static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
512{
513	extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
514	return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
515		GX_INSN_X0_MASK) |
516		create_Dest_X0(rd) | create_SrcA_X0(ra) |
517		create_ShAmt_X0(imm6);
518}
519
520__JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
521static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
522{
523	extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
524	return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
525		GX_INSN_X0_MASK) |
526		create_Dest_X0(rd) | create_SrcA_X0(ra) |
527		create_ShAmt_X0(imm6);
528}
529
530__JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
531static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
532{
533	extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
534	return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
535		GX_INSN_X1_MASK) |
536		create_SrcA_X1(ra) | create_BrOff_X1(broff);
537}
538
539#undef __JIT_CODE
540
541/*
542 * This function generates unalign fixup JIT.
543 *
544 * We first find unalign load/store instruction's destination, source
545 * registers: ra, rb and rd. and 3 scratch registers by calling
546 * find_regs(...). 3 scratch clobbers should not alias with any register
547 * used in the fault bundle. Then analyze the fault bundle to determine
548 * if it's a load or store, operand width, branch or address increment etc.
549 * At last generated JIT is copied into JIT code area in user space.
550 */
551
552static
553void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
554		    int align_ctl)
555{
556	struct thread_info *info = current_thread_info();
557	struct unaligned_jit_fragment frag;
558	struct unaligned_jit_fragment *jit_code_area;
559	tilegx_bundle_bits bundle_2 = 0;
560	/* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
561	bool     bundle_2_enable = true;
562	uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
563	/*
564	 * Indicate if the unalign access
565	 * instruction's registers hit with
566	 * others in the same bundle.
567	 */
568	bool     alias = false;
569	bool     load_n_store = true;
570	bool     load_store_signed = false;
571	unsigned int  load_store_size = 8;
572	bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
573	int      y1_br_reg = 0;
574	/* True for link operation. i.e. jalr or lnk at Y1 */
575	bool     y1_lr = false;
576	int      y1_lr_reg = 0;
577	bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
578	int      x1_add_imm8 = 0;
579	bool     unexpected = false;
580	int      n = 0, k;
581
582	jit_code_area =
583		(struct unaligned_jit_fragment *)(info->unalign_jit_base);
584
585	memset((void *)&frag, 0, sizeof(frag));
586
587	/* 0: X mode, Otherwise: Y mode. */
588	if (bundle & TILEGX_BUNDLE_MODE_MASK) {
589		unsigned int mod, opcode;
590
591		if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
592		    get_RRROpcodeExtension_Y1(bundle) ==
593		    UNARY_RRR_1_OPCODE_Y1) {
594
595			opcode = get_UnaryOpcodeExtension_Y1(bundle);
596
597			/*
598			 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
599			 * pipeline.
600			 */
601			switch (opcode) {
602			case JALR_UNARY_OPCODE_Y1:
603			case JALRP_UNARY_OPCODE_Y1:
604				y1_lr = true;
605				y1_lr_reg = 55; /* Link register. */
606				/* FALLTHROUGH */
607			case JR_UNARY_OPCODE_Y1:
608			case JRP_UNARY_OPCODE_Y1:
609				y1_br = true;
610				y1_br_reg = get_SrcA_Y1(bundle);
611				break;
612			case LNK_UNARY_OPCODE_Y1:
613				/* "lnk" at Y1 pipeline. */
614				y1_lr = true;
615				y1_lr_reg = get_Dest_Y1(bundle);
616				break;
617			}
618		}
619
620		opcode = get_Opcode_Y2(bundle);
621		mod = get_Mode(bundle);
622
623		/*
624		 *  bundle_2 is bundle after making Y2 as a dummy operation
625		 *  - ld zero, sp
626		 */
627		bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
628
629		/* Make Y1 as fnop if Y1 is a branch or lnk operation. */
630		if (y1_br || y1_lr) {
631			bundle_2 &= ~(GX_INSN_Y1_MASK);
632			bundle_2 |= jit_y1_fnop();
633		}
634
635		if (is_y0_y1_nop(bundle_2))
636			bundle_2_enable = false;
637
638		if (mod == MODE_OPCODE_YC2) {
639			/* Store. */
640			load_n_store = false;
641			load_store_size = 1 << opcode;
642			load_store_signed = false;
643			find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
644				  &clob3, &alias);
645			if (load_store_size > 8)
646				unexpected = true;
647		} else {
648			/* Load. */
649			load_n_store = true;
650			if (mod == MODE_OPCODE_YB2) {
651				switch (opcode) {
652				case LD_OPCODE_Y2:
653					load_store_signed = false;
654					load_store_size = 8;
655					break;
656				case LD4S_OPCODE_Y2:
657					load_store_signed = true;
658					load_store_size = 4;
659					break;
660				case LD4U_OPCODE_Y2:
661					load_store_signed = false;
662					load_store_size = 4;
663					break;
664				default:
665					unexpected = true;
666				}
667			} else if (mod == MODE_OPCODE_YA2) {
668				if (opcode == LD2S_OPCODE_Y2) {
669					load_store_signed = true;
670					load_store_size = 2;
671				} else if (opcode == LD2U_OPCODE_Y2) {
672					load_store_signed = false;
673					load_store_size = 2;
674				} else
675					unexpected = true;
676			} else
677				unexpected = true;
678			find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
679				  &clob3, &alias);
680		}
681	} else {
682		unsigned int opcode;
683
684		/* bundle_2 is bundle after making X1 as "fnop". */
685		bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
686
687		if (is_x0_x1_nop(bundle_2))
688			bundle_2_enable = false;
689
690		if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
691			opcode = get_UnaryOpcodeExtension_X1(bundle);
692
693			if (get_RRROpcodeExtension_X1(bundle) ==
694			    UNARY_RRR_0_OPCODE_X1) {
695				load_n_store = true;
696				find_regs(bundle, &rd, &ra, &rb, &clob1,
697					  &clob2, &clob3, &alias);
698
699				switch (opcode) {
700				case LD_UNARY_OPCODE_X1:
701					load_store_signed = false;
702					load_store_size = 8;
703					break;
704				case LD4S_UNARY_OPCODE_X1:
705					load_store_signed = true;
706					/* FALLTHROUGH */
707				case LD4U_UNARY_OPCODE_X1:
708					load_store_size = 4;
709					break;
710
711				case LD2S_UNARY_OPCODE_X1:
712					load_store_signed = true;
713					/* FALLTHROUGH */
714				case LD2U_UNARY_OPCODE_X1:
715					load_store_size = 2;
716					break;
717				default:
718					unexpected = true;
719				}
720			} else {
721				load_n_store = false;
722				load_store_signed = false;
723				find_regs(bundle, 0, &ra, &rb,
724					  &clob1, &clob2, &clob3,
725					  &alias);
726
727				opcode = get_RRROpcodeExtension_X1(bundle);
728				switch (opcode)	{
729				case ST_RRR_0_OPCODE_X1:
730					load_store_size = 8;
731					break;
732				case ST4_RRR_0_OPCODE_X1:
733					load_store_size = 4;
734					break;
735				case ST2_RRR_0_OPCODE_X1:
736					load_store_size = 2;
737					break;
738				default:
739					unexpected = true;
740				}
741			}
742		} else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
743			load_n_store = true;
744			opcode = get_Imm8OpcodeExtension_X1(bundle);
745			switch (opcode)	{
746			case LD_ADD_IMM8_OPCODE_X1:
747				load_store_size = 8;
748				break;
749
750			case LD4S_ADD_IMM8_OPCODE_X1:
751				load_store_signed = true;
752				/* FALLTHROUGH */
753			case LD4U_ADD_IMM8_OPCODE_X1:
754				load_store_size = 4;
755				break;
756
757			case LD2S_ADD_IMM8_OPCODE_X1:
758				load_store_signed = true;
759				/* FALLTHROUGH */
760			case LD2U_ADD_IMM8_OPCODE_X1:
761				load_store_size = 2;
762				break;
763
764			case ST_ADD_IMM8_OPCODE_X1:
765				load_n_store = false;
766				load_store_size = 8;
767				break;
768			case ST4_ADD_IMM8_OPCODE_X1:
769				load_n_store = false;
770				load_store_size = 4;
771				break;
772			case ST2_ADD_IMM8_OPCODE_X1:
773				load_n_store = false;
774				load_store_size = 2;
775				break;
776			default:
777				unexpected = true;
778			}
779
780			if (!unexpected) {
781				x1_add = true;
782				if (load_n_store)
783					x1_add_imm8 = get_Imm8_X1(bundle);
784				else
785					x1_add_imm8 = get_Dest_Imm8_X1(bundle);
786			}
787
788			find_regs(bundle, load_n_store ? (&rd) : NULL,
789				  &ra, &rb, &clob1, &clob2, &clob3, &alias);
790		} else
791			unexpected = true;
792	}
793
794	/*
795	 * Some sanity check for register numbers extracted from fault bundle.
796	 */
797	if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
798		unexpected = true;
799
800	/* Give warning if register ra has an aligned address. */
801	if (!unexpected)
802		WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
803
804
805	/*
806	 * Fault came from kernel space, here we only need take care of
807	 * unaligned "get_user/put_user" macros defined in "uaccess.h".
808	 * Basically, we will handle bundle like this:
809	 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
810	 * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
811	 * For either load or store, byte-wise operation is performed by calling
812	 * get_user() or put_user(). If the macro returns non-zero value,
813	 * set the value to rx, otherwise set zero to rx. Finally make pc point
814	 * to next bundle and return.
815	 */
816
817	if (EX1_PL(regs->ex1) != USER_PL) {
818
819		unsigned long rx = 0;
820		unsigned long x = 0, ret = 0;
821
822		if (y1_br || y1_lr || x1_add ||
823		    (load_store_signed !=
824		     (load_n_store && load_store_size == 4))) {
825			/* No branch, link, wrong sign-ext or load/store add. */
826			unexpected = true;
827		} else if (!unexpected) {
828			if (bundle & TILEGX_BUNDLE_MODE_MASK) {
829				/*
830				 * Fault bundle is Y mode.
831				 * Check if the Y1 and Y0 is the form of
832				 * { movei rx, 0; nop/fnop }, if yes,
833				 * find the rx.
834				 */
835
836				if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
837				    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
838				    (get_Imm8_Y1(bundle) == 0) &&
839				    is_bundle_y0_nop(bundle)) {
840					rx = get_Dest_Y1(bundle);
841				} else if ((get_Opcode_Y0(bundle) ==
842					    ADDI_OPCODE_Y0) &&
843					   (get_SrcA_Y0(bundle) == TREG_ZERO) &&
844					   (get_Imm8_Y0(bundle) == 0) &&
845					   is_bundle_y1_nop(bundle)) {
846					rx = get_Dest_Y0(bundle);
847				} else {
848					unexpected = true;
849				}
850			} else {
851				/*
852				 * Fault bundle is X mode.
853				 * Check if the X0 is 'movei rx, 0',
854				 * if yes, find the rx.
855				 */
856
857				if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
858				    && (get_Imm8OpcodeExtension_X0(bundle) ==
859					ADDI_IMM8_OPCODE_X0) &&
860				    (get_SrcA_X0(bundle) == TREG_ZERO) &&
861				    (get_Imm8_X0(bundle) == 0)) {
862					rx = get_Dest_X0(bundle);
863				} else {
864					unexpected = true;
865				}
866			}
867
868			/* rx should be less than 56. */
869			if (!unexpected && (rx >= 56))
870				unexpected = true;
871		}
872
873		if (!search_exception_tables(regs->pc)) {
874			/* No fixup in the exception tables for the pc. */
875			unexpected = true;
876		}
877
878		if (unexpected) {
879			/* Unexpected unalign kernel fault. */
880			struct task_struct *tsk = validate_current();
881
882			bust_spinlocks(1);
883
884			show_regs(regs);
885
886			if (unlikely(tsk->pid < 2)) {
887				panic("Kernel unalign fault running %s!",
888				      tsk->pid ? "init" : "the idle task");
889			}
890#ifdef SUPPORT_DIE
891			die("Oops", regs);
892#endif
893			bust_spinlocks(1);
894
895			do_group_exit(SIGKILL);
896
897		} else {
898			unsigned long i, b = 0;
899			unsigned char *ptr =
900				(unsigned char *)regs->regs[ra];
901			if (load_n_store) {
902				/* handle get_user(x, ptr) */
903				for (i = 0; i < load_store_size; i++) {
904					ret = get_user(b, ptr++);
905					if (!ret) {
906						/* Success! update x. */
907#ifdef __LITTLE_ENDIAN
908						x |= (b << (8 * i));
909#else
910						x <<= 8;
911						x |= b;
912#endif /* __LITTLE_ENDIAN */
913					} else {
914						x = 0;
915						break;
916					}
917				}
918
919				/* Sign-extend 4-byte loads. */
920				if (load_store_size == 4)
921					x = (long)(int)x;
922
923				/* Set register rd. */
924				regs->regs[rd] = x;
925
926				/* Set register rx. */
927				regs->regs[rx] = ret;
928
929				/* Bump pc. */
930				regs->pc += 8;
931
932			} else {
933				/* Handle put_user(x, ptr) */
934				x = regs->regs[rb];
935#ifdef __LITTLE_ENDIAN
936				b = x;
937#else
938				/*
939				 * Swap x in order to store x from low
940				 * to high memory same as the
941				 * little-endian case.
942				 */
943				switch (load_store_size) {
944				case 8:
945					b = swab64(x);
946					break;
947				case 4:
948					b = swab32(x);
949					break;
950				case 2:
951					b = swab16(x);
952					break;
953				}
954#endif /* __LITTLE_ENDIAN */
955				for (i = 0; i < load_store_size; i++) {
956					ret = put_user(b, ptr++);
957					if (ret)
958						break;
959					/* Success! shift 1 byte. */
960					b >>= 8;
961				}
962				/* Set register rx. */
963				regs->regs[rx] = ret;
964
965				/* Bump pc. */
966				regs->pc += 8;
967			}
968		}
969
970		unaligned_fixup_count++;
971
972		if (unaligned_printk) {
973			pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
974				current->comm, current->pid, regs->regs[ra]);
975		}
976
977		/* Done! Return to the exception handler. */
978		return;
979	}
980
981	if ((align_ctl == 0) || unexpected) {
982		siginfo_t info = {
983			.si_signo = SIGBUS,
984			.si_code = BUS_ADRALN,
985			.si_addr = (unsigned char __user *)0
986		};
987		if (unaligned_printk)
988			pr_info("Unalign bundle: unexp @%llx, %llx\n",
989				(unsigned long long)regs->pc,
990				(unsigned long long)bundle);
991
992		if (ra < 56) {
993			unsigned long uaa = (unsigned long)regs->regs[ra];
994			/* Set bus Address. */
995			info.si_addr = (unsigned char __user *)uaa;
996		}
997
998		unaligned_fixup_count++;
999
1000		trace_unhandled_signal("unaligned fixup trap", regs,
1001				       (unsigned long)info.si_addr, SIGBUS);
1002		force_sig_info(info.si_signo, &info, current);
1003		return;
1004	}
1005
1006#ifdef __LITTLE_ENDIAN
1007#define UA_FIXUP_ADDR_DELTA          1
1008#define UA_FIXUP_BFEXT_START(_B_)    0
1009#define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1010#else /* __BIG_ENDIAN */
1011#define UA_FIXUP_ADDR_DELTA          -1
1012#define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1013#define UA_FIXUP_BFEXT_END(_B_)      63
1014#endif /* __LITTLE_ENDIAN */
1015
1016
1017
1018	if ((ra != rb) && (rd != TREG_SP) && !alias &&
1019	    !y1_br && !y1_lr && !x1_add) {
1020		/*
1021		 * Simple case: ra != rb and no register alias found,
1022		 * and no branch or link. This will be the majority.
1023		 * We can do a little better for simplae case than the
1024		 * generic scheme below.
1025		 */
1026		if (!load_n_store) {
1027			/*
1028			 * Simple store: ra != rb, no need for scratch register.
1029			 * Just store and rotate to right bytewise.
1030			 */
1031#ifdef __BIG_ENDIAN
1032			frag.insn[n++] =
1033				jit_x0_addi(ra, ra, load_store_size - 1) |
1034				jit_x1_fnop();
1035#endif /* __BIG_ENDIAN */
1036			for (k = 0; k < load_store_size; k++) {
1037				/* Store a byte. */
1038				frag.insn[n++] =
1039					jit_x0_rotli(rb, rb, 56) |
1040					jit_x1_st1_add(ra, rb,
1041						       UA_FIXUP_ADDR_DELTA);
1042			}
1043#ifdef __BIG_ENDIAN
1044			frag.insn[n] = jit_x1_addi(ra, ra, 1);
1045#else
1046			frag.insn[n] = jit_x1_addi(ra, ra,
1047						   -1 * load_store_size);
1048#endif /* __LITTLE_ENDIAN */
1049
1050			if (load_store_size == 8) {
1051				frag.insn[n] |= jit_x0_fnop();
1052			} else if (load_store_size == 4) {
1053				frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1054			} else { /* = 2 */
1055				frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1056			}
1057			n++;
1058			if (bundle_2_enable)
1059				frag.insn[n++] = bundle_2;
1060			frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1061		} else {
1062			if (rd == ra) {
1063				/* Use two clobber registers: clob1/2. */
1064				frag.insn[n++] =
1065					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1066					jit_x1_fnop();
1067				frag.insn[n++] =
1068					jit_x0_addi(clob1, ra, 7) |
1069					jit_x1_st_add(TREG_SP, clob1, -8);
1070				frag.insn[n++] =
1071					jit_x0_addi(clob2, ra, 0) |
1072					jit_x1_st(TREG_SP, clob2);
1073				frag.insn[n++] =
1074					jit_x0_fnop() |
1075					jit_x1_ldna(rd, ra);
1076				frag.insn[n++] =
1077					jit_x0_fnop() |
1078					jit_x1_ldna(clob1, clob1);
1079				/*
1080				 * Note: we must make sure that rd must not
1081				 * be sp. Recover clob1/2 from stack.
1082				 */
1083				frag.insn[n++] =
1084					jit_x0_dblalign(rd, clob1, clob2) |
1085					jit_x1_ld_add(clob2, TREG_SP, 8);
1086				frag.insn[n++] =
1087					jit_x0_fnop() |
1088					jit_x1_ld_add(clob1, TREG_SP, 16);
1089			} else {
1090				/* Use one clobber register: clob1 only. */
1091				frag.insn[n++] =
1092					jit_x0_addi(TREG_SP, TREG_SP, -16) |
1093					jit_x1_fnop();
1094				frag.insn[n++] =
1095					jit_x0_addi(clob1, ra, 7) |
1096					jit_x1_st(TREG_SP, clob1);
1097				frag.insn[n++] =
1098					jit_x0_fnop() |
1099					jit_x1_ldna(rd, ra);
1100				frag.insn[n++] =
1101					jit_x0_fnop() |
1102					jit_x1_ldna(clob1, clob1);
1103				/*
1104				 * Note: we must make sure that rd must not
1105				 * be sp. Recover clob1 from stack.
1106				 */
1107				frag.insn[n++] =
1108					jit_x0_dblalign(rd, clob1, ra) |
1109					jit_x1_ld_add(clob1, TREG_SP, 16);
1110			}
1111
1112			if (bundle_2_enable)
1113				frag.insn[n++] = bundle_2;
1114			/*
1115			 * For non 8-byte load, extract corresponding bytes and
1116			 * signed extension.
1117			 */
1118			if (load_store_size == 4) {
1119				if (load_store_signed)
1120					frag.insn[n++] =
1121						jit_x0_bfexts(
1122							rd, rd,
1123							UA_FIXUP_BFEXT_START(4),
1124							UA_FIXUP_BFEXT_END(4)) |
1125						jit_x1_fnop();
1126				else
1127					frag.insn[n++] =
1128						jit_x0_bfextu(
1129							rd, rd,
1130							UA_FIXUP_BFEXT_START(4),
1131							UA_FIXUP_BFEXT_END(4)) |
1132						jit_x1_fnop();
1133			} else if (load_store_size == 2) {
1134				if (load_store_signed)
1135					frag.insn[n++] =
1136						jit_x0_bfexts(
1137							rd, rd,
1138							UA_FIXUP_BFEXT_START(2),
1139							UA_FIXUP_BFEXT_END(2)) |
1140						jit_x1_fnop();
1141				else
1142					frag.insn[n++] =
1143						jit_x0_bfextu(
1144							rd, rd,
1145							UA_FIXUP_BFEXT_START(2),
1146							UA_FIXUP_BFEXT_END(2)) |
1147						jit_x1_fnop();
1148			}
1149
1150			frag.insn[n++] =
1151				jit_x0_fnop()  |
1152				jit_x1_iret();
1153		}
1154	} else if (!load_n_store) {
1155
1156		/*
1157		 * Generic memory store cases: use 3 clobber registers.
1158		 *
1159		 * Alloc space for saveing clob2,1,3 on user's stack.
1160		 * register clob3 points to where clob2 saved, followed by
1161		 * clob1 and 3 from high to low memory.
1162		 */
1163		frag.insn[n++] =
1164			jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1165			jit_x1_fnop();
1166		frag.insn[n++] =
1167			jit_x0_addi(clob3, TREG_SP, 16)  |
1168			jit_x1_st_add(TREG_SP, clob3, 8);
1169#ifdef __LITTLE_ENDIAN
1170		frag.insn[n++] =
1171			jit_x0_addi(clob1, ra, 0)   |
1172			jit_x1_st_add(TREG_SP, clob1, 8);
1173#else
1174		frag.insn[n++] =
1175			jit_x0_addi(clob1, ra, load_store_size - 1)   |
1176			jit_x1_st_add(TREG_SP, clob1, 8);
1177#endif
1178		if (load_store_size == 8) {
1179			/*
1180			 * We save one byte a time, not for fast, but compact
1181			 * code. After each store, data source register shift
1182			 * right one byte. unchanged after 8 stores.
1183			 */
1184			frag.insn[n++] =
1185				jit_x0_addi(clob2, TREG_ZERO, 7)     |
1186				jit_x1_st_add(TREG_SP, clob2, 16);
1187			frag.insn[n++] =
1188				jit_x0_rotli(rb, rb, 56)      |
1189				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1190			frag.insn[n++] =
1191				jit_x0_addi(clob2, clob2, -1) |
1192				jit_x1_bnezt(clob2, -1);
1193			frag.insn[n++] =
1194				jit_x0_fnop()                 |
1195				jit_x1_addi(clob2, y1_br_reg, 0);
1196		} else if (load_store_size == 4) {
1197			frag.insn[n++] =
1198				jit_x0_addi(clob2, TREG_ZERO, 3)     |
1199				jit_x1_st_add(TREG_SP, clob2, 16);
1200			frag.insn[n++] =
1201				jit_x0_rotli(rb, rb, 56)      |
1202				jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1203			frag.insn[n++] =
1204				jit_x0_addi(clob2, clob2, -1) |
1205				jit_x1_bnezt(clob2, -1);
1206			/*
1207			 * same as 8-byte case, but need shift another 4
1208			 * byte to recover rb for 4-byte store.
1209			 */
1210			frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1211				jit_x1_addi(clob2, y1_br_reg, 0);
1212		} else { /* =2 */
1213			frag.insn[n++] =
1214				jit_x0_addi(clob2, rb, 0)     |
1215				jit_x1_st_add(TREG_SP, clob2, 16);
1216			for (k = 0; k < 2; k++) {
1217				frag.insn[n++] =
1218					jit_x0_shrui(rb, rb, 8)  |
1219					jit_x1_st1_add(clob1, rb,
1220						       UA_FIXUP_ADDR_DELTA);
1221			}
1222			frag.insn[n++] =
1223				jit_x0_addi(rb, clob2, 0)       |
1224				jit_x1_addi(clob2, y1_br_reg, 0);
1225		}
1226
1227		if (bundle_2_enable)
1228			frag.insn[n++] = bundle_2;
1229
1230		if (y1_lr) {
1231			frag.insn[n++] =
1232				jit_x0_fnop()                    |
1233				jit_x1_mfspr(y1_lr_reg,
1234					     SPR_EX_CONTEXT_0_0);
1235		}
1236		if (y1_br) {
1237			frag.insn[n++] =
1238				jit_x0_fnop()                    |
1239				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1240					     clob2);
1241		}
1242		if (x1_add) {
1243			frag.insn[n++] =
1244				jit_x0_addi(ra, ra, x1_add_imm8) |
1245				jit_x1_ld_add(clob2, clob3, -8);
1246		} else {
1247			frag.insn[n++] =
1248				jit_x0_fnop()                    |
1249				jit_x1_ld_add(clob2, clob3, -8);
1250		}
1251		frag.insn[n++] =
1252			jit_x0_fnop()   |
1253			jit_x1_ld_add(clob1, clob3, -8);
1254		frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1255		frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1256
1257	} else {
1258		/*
1259		 * Generic memory load cases.
1260		 *
1261		 * Alloc space for saveing clob1,2,3 on user's stack.
1262		 * register clob3 points to where clob1 saved, followed
1263		 * by clob2 and 3 from high to low memory.
1264		 */
1265
1266		frag.insn[n++] =
1267			jit_x0_addi(TREG_SP, TREG_SP, -32) |
1268			jit_x1_fnop();
1269		frag.insn[n++] =
1270			jit_x0_addi(clob3, TREG_SP, 16) |
1271			jit_x1_st_add(TREG_SP, clob3, 8);
1272		frag.insn[n++] =
1273			jit_x0_addi(clob2, ra, 0) |
1274			jit_x1_st_add(TREG_SP, clob2, 8);
1275
1276		if (y1_br) {
1277			frag.insn[n++] =
1278				jit_x0_addi(clob1, y1_br_reg, 0) |
1279				jit_x1_st_add(TREG_SP, clob1, 16);
1280		} else {
1281			frag.insn[n++] =
1282				jit_x0_fnop() |
1283				jit_x1_st_add(TREG_SP, clob1, 16);
1284		}
1285
1286		if (bundle_2_enable)
1287			frag.insn[n++] = bundle_2;
1288
1289		if (y1_lr) {
1290			frag.insn[n++] =
1291				jit_x0_fnop()  |
1292				jit_x1_mfspr(y1_lr_reg,
1293					     SPR_EX_CONTEXT_0_0);
1294		}
1295
1296		if (y1_br) {
1297			frag.insn[n++] =
1298				jit_x0_fnop() |
1299				jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1300					     clob1);
1301		}
1302
1303		frag.insn[n++] =
1304			jit_x0_addi(clob1, clob2, 7)      |
1305			jit_x1_ldna(rd, clob2);
1306		frag.insn[n++] =
1307			jit_x0_fnop()                     |
1308			jit_x1_ldna(clob1, clob1);
1309		frag.insn[n++] =
1310			jit_x0_dblalign(rd, clob1, clob2) |
1311			jit_x1_ld_add(clob1, clob3, -8);
1312		if (x1_add) {
1313			frag.insn[n++] =
1314				jit_x0_addi(ra, ra, x1_add_imm8) |
1315				jit_x1_ld_add(clob2, clob3, -8);
1316		} else {
1317			frag.insn[n++] =
1318				jit_x0_fnop()  |
1319				jit_x1_ld_add(clob2, clob3, -8);
1320		}
1321
1322		frag.insn[n++] =
1323			jit_x0_fnop() |
1324			jit_x1_ld(clob3, clob3);
1325
1326		if (load_store_size == 4) {
1327			if (load_store_signed)
1328				frag.insn[n++] =
1329					jit_x0_bfexts(
1330						rd, rd,
1331						UA_FIXUP_BFEXT_START(4),
1332						UA_FIXUP_BFEXT_END(4)) |
1333					jit_x1_fnop();
1334			else
1335				frag.insn[n++] =
1336					jit_x0_bfextu(
1337						rd, rd,
1338						UA_FIXUP_BFEXT_START(4),
1339						UA_FIXUP_BFEXT_END(4)) |
1340					jit_x1_fnop();
1341		} else if (load_store_size == 2) {
1342			if (load_store_signed)
1343				frag.insn[n++] =
1344					jit_x0_bfexts(
1345						rd, rd,
1346						UA_FIXUP_BFEXT_START(2),
1347						UA_FIXUP_BFEXT_END(2)) |
1348					jit_x1_fnop();
1349			else
1350				frag.insn[n++] =
1351					jit_x0_bfextu(
1352						rd, rd,
1353						UA_FIXUP_BFEXT_START(2),
1354						UA_FIXUP_BFEXT_END(2)) |
1355					jit_x1_fnop();
1356		}
1357
1358		frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1359	}
1360
1361	/* Max JIT bundle count is 14. */
1362	WARN_ON(n > 14);
1363
1364	if (!unexpected) {
1365		int status = 0;
1366		int idx = (regs->pc >> 3) &
1367			((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1368
1369		frag.pc = regs->pc;
1370		frag.bundle = bundle;
1371
1372		if (unaligned_printk) {
1373			pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1374				current->comm, current->pid,
1375				(unsigned long)frag.pc,
1376				(unsigned long)frag.bundle,
1377				(int)alias, (int)rd, (int)ra,
1378				(int)rb, (int)bundle_2_enable,
1379				(int)y1_lr, (int)y1_br, (int)x1_add);
1380
1381			for (k = 0; k < n; k += 2)
1382				pr_info("[%d] %016llx %016llx\n",
1383					k, (unsigned long long)frag.insn[k],
1384					(unsigned long long)frag.insn[k+1]);
1385		}
1386
1387		/* Swap bundle byte order for big endian sys. */
1388#ifdef __BIG_ENDIAN
1389		frag.bundle = GX_INSN_BSWAP(frag.bundle);
1390		for (k = 0; k < n; k++)
1391			frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1392#endif /* __BIG_ENDIAN */
1393
1394		status = copy_to_user((void __user *)&jit_code_area[idx],
1395				      &frag, sizeof(frag));
1396		if (status) {
1397			/* Fail to copy JIT into user land. send SIGSEGV. */
1398			siginfo_t info = {
1399				.si_signo = SIGSEGV,
1400				.si_code = SEGV_MAPERR,
1401				.si_addr = (void __user *)&jit_code_area[idx]
1402			};
1403
1404			pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1405				current->pid, current->comm,
1406				(unsigned long long)&jit_code_area[idx]);
1407
1408			trace_unhandled_signal("segfault in unalign fixup",
1409					       regs,
1410					       (unsigned long)info.si_addr,
1411					       SIGSEGV);
1412			force_sig_info(info.si_signo, &info, current);
1413			return;
1414		}
1415
1416
1417		/* Do a cheaper increment, not accurate. */
1418		unaligned_fixup_count++;
1419		__flush_icache_range((unsigned long)&jit_code_area[idx],
1420				     (unsigned long)&jit_code_area[idx] +
1421				     sizeof(frag));
1422
1423		/* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1424		__insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1425		__insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1426
1427		/* Modify pc at the start of new JIT. */
1428		regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1429		/* Set ICS in SPR_EX_CONTEXT_K_1. */
1430		regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1431	}
1432}
1433
1434
1435/*
1436 * C function to generate unalign data JIT. Called from unalign data
1437 * interrupt handler.
1438 *
1439 * First check if unalign fix is disabled or exception did not not come from
1440 * user space or sp register points to unalign address, if true, generate a
1441 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1442 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1443 * back to exception handler.
1444 *
1445 * The exception handler will "iret" to new generated JIT code after
1446 * restoring caller saved registers. In theory, the JIT code will perform
1447 * another "iret" to resume user's program.
1448 */
1449
1450void do_unaligned(struct pt_regs *regs, int vecnum)
1451{
1452	enum ctx_state prev_state = exception_enter();
1453	tilegx_bundle_bits __user  *pc;
1454	tilegx_bundle_bits bundle;
1455	struct thread_info *info = current_thread_info();
1456	int align_ctl;
1457
1458	/* Checks the per-process unaligned JIT flags */
1459	align_ctl = unaligned_fixup;
1460	switch (task_thread_info(current)->align_ctl) {
1461	case PR_UNALIGN_NOPRINT:
1462		align_ctl = 1;
1463		break;
1464	case PR_UNALIGN_SIGBUS:
1465		align_ctl = 0;
1466		break;
1467	}
1468
1469	/* Enable iterrupt in order to access user land. */
1470	local_irq_enable();
1471
1472	/*
1473	 * The fault came from kernel space. Two choices:
1474	 * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475	 *     to return -EFAULT. If no fixup, simply panic the kernel.
1476	 * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477	 *     if it was triggered by get_user/put_user() macros. Panic the
1478	 *     kernel if it is not fixable.
1479	 */
1480
1481	if (EX1_PL(regs->ex1) != USER_PL) {
1482
1483		if (align_ctl < 1) {
1484			unaligned_fixup_count++;
1485			/* If exception came from kernel, try fix it up. */
1486			if (fixup_exception(regs)) {
1487				if (unaligned_printk)
1488					pr_info("Unalign fixup: %d %llx @%llx\n",
1489						(int)unaligned_fixup,
1490						(unsigned long long)regs->ex1,
1491						(unsigned long long)regs->pc);
1492			} else {
1493				/* Not fixable. Go panic. */
1494				panic("Unalign exception in Kernel. pc=%lx",
1495				      regs->pc);
1496			}
1497		} else {
1498			/*
1499			 * Try to fix the exception. If we can't, panic the
1500			 * kernel.
1501			 */
1502			bundle = GX_INSN_BSWAP(
1503				*((tilegx_bundle_bits *)(regs->pc)));
1504			jit_bundle_gen(regs, bundle, align_ctl);
1505		}
1506		goto done;
1507	}
1508
1509	/*
1510	 * Fault came from user with ICS or stack is not aligned.
1511	 * If so, we will trigger SIGBUS.
1512	 */
1513	if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1514		siginfo_t info = {
1515			.si_signo = SIGBUS,
1516			.si_code = BUS_ADRALN,
1517			.si_addr = (unsigned char __user *)0
1518		};
1519
1520		if (unaligned_printk)
1521			pr_info("Unalign fixup: %d %llx @%llx\n",
1522				(int)unaligned_fixup,
1523				(unsigned long long)regs->ex1,
1524				(unsigned long long)regs->pc);
1525
1526		unaligned_fixup_count++;
1527
1528		trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1529		force_sig_info(info.si_signo, &info, current);
1530		goto done;
1531	}
1532
1533
1534	/* Read the bundle casued the exception! */
1535	pc = (tilegx_bundle_bits __user *)(regs->pc);
1536	if (get_user(bundle, pc) != 0) {
1537		/* Probably never be here since pc is valid user address.*/
1538		siginfo_t info = {
1539			.si_signo = SIGSEGV,
1540			.si_code = SEGV_MAPERR,
1541			.si_addr = (void __user *)pc
1542		};
1543		pr_err("Couldn't read instruction at %p trying to step\n", pc);
1544		trace_unhandled_signal("segfault in unalign fixup", regs,
1545				       (unsigned long)info.si_addr, SIGSEGV);
1546		force_sig_info(info.si_signo, &info, current);
1547		goto done;
1548	}
1549
1550	if (!info->unalign_jit_base) {
1551		void __user *user_page;
1552
1553		/*
1554		 * Allocate a page in userland.
1555		 * For 64-bit processes we try to place the mapping far
1556		 * from anything else that might be going on (specifically
1557		 * 64 GB below the top of the user address space).  If it
1558		 * happens not to be possible to put it there, it's OK;
1559		 * the kernel will choose another location and we'll
1560		 * remember it for later.
1561		 */
1562		if (is_compat_task())
1563			user_page = NULL;
1564		else
1565			user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1566				(current->pid << PAGE_SHIFT);
1567
1568		user_page = (void __user *) vm_mmap(NULL,
1569						    (unsigned long)user_page,
1570						    PAGE_SIZE,
1571						    PROT_EXEC | PROT_READ |
1572						    PROT_WRITE,
1573#ifdef CONFIG_HOMECACHE
1574						    MAP_CACHE_HOME_TASK |
1575#endif
1576						    MAP_PRIVATE |
1577						    MAP_ANONYMOUS,
1578						    0);
1579
1580		if (IS_ERR((void __force *)user_page)) {
1581			pr_err("Out of kernel pages trying do_mmap\n");
1582			goto done;
1583		}
1584
1585		/* Save the address in the thread_info struct */
1586		info->unalign_jit_base = user_page;
1587		if (unaligned_printk)
1588			pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1589				raw_smp_processor_id(), current->pid,
1590				(unsigned long long)user_page);
1591	}
1592
1593	/* Generate unalign JIT */
1594	jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1595
1596done:
1597	exception_exit(prev_state);
1598}
1599
1600#endif /* __tilegx__ */
1601