1/* 2 * Copyright 2013 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 * 14 * A code-rewriter that handles unaligned exception. 15 */ 16 17#include <linux/smp.h> 18#include <linux/ptrace.h> 19#include <linux/slab.h> 20#include <linux/thread_info.h> 21#include <linux/uaccess.h> 22#include <linux/mman.h> 23#include <linux/types.h> 24#include <linux/err.h> 25#include <linux/module.h> 26#include <linux/compat.h> 27#include <linux/prctl.h> 28#include <linux/context_tracking.h> 29#include <asm/cacheflush.h> 30#include <asm/traps.h> 31#include <asm/uaccess.h> 32#include <asm/unaligned.h> 33#include <arch/abi.h> 34#include <arch/spr_def.h> 35#include <arch/opcode.h> 36 37 38/* 39 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned 40 * exception is supported out of single_step.c 41 */ 42 43int unaligned_printk; 44 45static int __init setup_unaligned_printk(char *str) 46{ 47 long val; 48 if (kstrtol(str, 0, &val) != 0) 49 return 0; 50 unaligned_printk = val; 51 pr_info("Printk for each unaligned data accesses is %s\n", 52 unaligned_printk ? "enabled" : "disabled"); 53 return 1; 54} 55__setup("unaligned_printk=", setup_unaligned_printk); 56 57unsigned int unaligned_fixup_count; 58 59#ifdef __tilegx__ 60 61/* 62 * Unalign data jit fixup code fragement. Reserved space is 128 bytes. 63 * The 1st 64-bit word saves fault PC address, 2nd word is the fault 64 * instruction bundle followed by 14 JIT bundles. 65 */ 66 67struct unaligned_jit_fragment { 68 unsigned long pc; 69 tilegx_bundle_bits bundle; 70 tilegx_bundle_bits insn[14]; 71}; 72 73/* 74 * Check if a nop or fnop at bundle's pipeline X0. 75 */ 76 77static bool is_bundle_x0_nop(tilegx_bundle_bits bundle) 78{ 79 return (((get_UnaryOpcodeExtension_X0(bundle) == 80 NOP_UNARY_OPCODE_X0) && 81 (get_RRROpcodeExtension_X0(bundle) == 82 UNARY_RRR_0_OPCODE_X0) && 83 (get_Opcode_X0(bundle) == 84 RRR_0_OPCODE_X0)) || 85 ((get_UnaryOpcodeExtension_X0(bundle) == 86 FNOP_UNARY_OPCODE_X0) && 87 (get_RRROpcodeExtension_X0(bundle) == 88 UNARY_RRR_0_OPCODE_X0) && 89 (get_Opcode_X0(bundle) == 90 RRR_0_OPCODE_X0))); 91} 92 93/* 94 * Check if nop or fnop at bundle's pipeline X1. 95 */ 96 97static bool is_bundle_x1_nop(tilegx_bundle_bits bundle) 98{ 99 return (((get_UnaryOpcodeExtension_X1(bundle) == 100 NOP_UNARY_OPCODE_X1) && 101 (get_RRROpcodeExtension_X1(bundle) == 102 UNARY_RRR_0_OPCODE_X1) && 103 (get_Opcode_X1(bundle) == 104 RRR_0_OPCODE_X1)) || 105 ((get_UnaryOpcodeExtension_X1(bundle) == 106 FNOP_UNARY_OPCODE_X1) && 107 (get_RRROpcodeExtension_X1(bundle) == 108 UNARY_RRR_0_OPCODE_X1) && 109 (get_Opcode_X1(bundle) == 110 RRR_0_OPCODE_X1))); 111} 112 113/* 114 * Check if nop or fnop at bundle's Y0 pipeline. 115 */ 116 117static bool is_bundle_y0_nop(tilegx_bundle_bits bundle) 118{ 119 return (((get_UnaryOpcodeExtension_Y0(bundle) == 120 NOP_UNARY_OPCODE_Y0) && 121 (get_RRROpcodeExtension_Y0(bundle) == 122 UNARY_RRR_1_OPCODE_Y0) && 123 (get_Opcode_Y0(bundle) == 124 RRR_1_OPCODE_Y0)) || 125 ((get_UnaryOpcodeExtension_Y0(bundle) == 126 FNOP_UNARY_OPCODE_Y0) && 127 (get_RRROpcodeExtension_Y0(bundle) == 128 UNARY_RRR_1_OPCODE_Y0) && 129 (get_Opcode_Y0(bundle) == 130 RRR_1_OPCODE_Y0))); 131} 132 133/* 134 * Check if nop or fnop at bundle's pipeline Y1. 135 */ 136 137static bool is_bundle_y1_nop(tilegx_bundle_bits bundle) 138{ 139 return (((get_UnaryOpcodeExtension_Y1(bundle) == 140 NOP_UNARY_OPCODE_Y1) && 141 (get_RRROpcodeExtension_Y1(bundle) == 142 UNARY_RRR_1_OPCODE_Y1) && 143 (get_Opcode_Y1(bundle) == 144 RRR_1_OPCODE_Y1)) || 145 ((get_UnaryOpcodeExtension_Y1(bundle) == 146 FNOP_UNARY_OPCODE_Y1) && 147 (get_RRROpcodeExtension_Y1(bundle) == 148 UNARY_RRR_1_OPCODE_Y1) && 149 (get_Opcode_Y1(bundle) == 150 RRR_1_OPCODE_Y1))); 151} 152 153/* 154 * Test if a bundle's y0 and y1 pipelines are both nop or fnop. 155 */ 156 157static bool is_y0_y1_nop(tilegx_bundle_bits bundle) 158{ 159 return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle); 160} 161 162/* 163 * Test if a bundle's x0 and x1 pipelines are both nop or fnop. 164 */ 165 166static bool is_x0_x1_nop(tilegx_bundle_bits bundle) 167{ 168 return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle); 169} 170 171/* 172 * Find the destination, source registers of fault unalign access instruction 173 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and 174 * clob3, which are guaranteed different from any register used in the fault 175 * bundle. r_alias is used to return if the other instructions other than the 176 * unalign load/store shares same register with ra, rb and rd. 177 */ 178 179static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra, 180 uint64_t *rb, uint64_t *clob1, uint64_t *clob2, 181 uint64_t *clob3, bool *r_alias) 182{ 183 int i; 184 uint64_t reg; 185 uint64_t reg_map = 0, alias_reg_map = 0, map; 186 bool alias = false; 187 188 /* 189 * Parse fault bundle, find potential used registers and mark 190 * corresponding bits in reg_map and alias_map. These 2 bit maps 191 * are used to find the scratch registers and determine if there 192 * is register alais. 193 */ 194 if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */ 195 196 reg = get_SrcA_Y2(bundle); 197 reg_map |= 1ULL << reg; 198 *ra = reg; 199 reg = get_SrcBDest_Y2(bundle); 200 reg_map |= 1ULL << reg; 201 202 if (rd) { 203 /* Load. */ 204 *rd = reg; 205 alias_reg_map = (1ULL << *rd) | (1ULL << *ra); 206 } else { 207 /* Store. */ 208 *rb = reg; 209 alias_reg_map = (1ULL << *ra) | (1ULL << *rb); 210 } 211 212 if (!is_bundle_y1_nop(bundle)) { 213 reg = get_SrcA_Y1(bundle); 214 reg_map |= (1ULL << reg); 215 map = (1ULL << reg); 216 217 reg = get_SrcB_Y1(bundle); 218 reg_map |= (1ULL << reg); 219 map |= (1ULL << reg); 220 221 reg = get_Dest_Y1(bundle); 222 reg_map |= (1ULL << reg); 223 map |= (1ULL << reg); 224 225 if (map & alias_reg_map) 226 alias = true; 227 } 228 229 if (!is_bundle_y0_nop(bundle)) { 230 reg = get_SrcA_Y0(bundle); 231 reg_map |= (1ULL << reg); 232 map = (1ULL << reg); 233 234 reg = get_SrcB_Y0(bundle); 235 reg_map |= (1ULL << reg); 236 map |= (1ULL << reg); 237 238 reg = get_Dest_Y0(bundle); 239 reg_map |= (1ULL << reg); 240 map |= (1ULL << reg); 241 242 if (map & alias_reg_map) 243 alias = true; 244 } 245 } else { /* X Mode Bundle. */ 246 247 reg = get_SrcA_X1(bundle); 248 reg_map |= (1ULL << reg); 249 *ra = reg; 250 if (rd) { 251 /* Load. */ 252 reg = get_Dest_X1(bundle); 253 reg_map |= (1ULL << reg); 254 *rd = reg; 255 alias_reg_map = (1ULL << *rd) | (1ULL << *ra); 256 } else { 257 /* Store. */ 258 reg = get_SrcB_X1(bundle); 259 reg_map |= (1ULL << reg); 260 *rb = reg; 261 alias_reg_map = (1ULL << *ra) | (1ULL << *rb); 262 } 263 264 if (!is_bundle_x0_nop(bundle)) { 265 reg = get_SrcA_X0(bundle); 266 reg_map |= (1ULL << reg); 267 map = (1ULL << reg); 268 269 reg = get_SrcB_X0(bundle); 270 reg_map |= (1ULL << reg); 271 map |= (1ULL << reg); 272 273 reg = get_Dest_X0(bundle); 274 reg_map |= (1ULL << reg); 275 map |= (1ULL << reg); 276 277 if (map & alias_reg_map) 278 alias = true; 279 } 280 } 281 282 /* 283 * "alias" indicates if the unalign access registers have collision 284 * with others in the same bundle. We jsut simply test all register 285 * operands case (RRR), ignored the case with immidate. If a bundle 286 * has no register alias, we may do fixup in a simple or fast manner. 287 * So if an immidata field happens to hit with a register, we may end 288 * up fall back to the generic handling. 289 */ 290 291 *r_alias = alias; 292 293 /* Flip bits on reg_map. */ 294 reg_map ^= -1ULL; 295 296 /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */ 297 for (i = 0; i < TREG_SP; i++) { 298 if (reg_map & (0x1ULL << i)) { 299 if (*clob1 == -1) { 300 *clob1 = i; 301 } else if (*clob2 == -1) { 302 *clob2 = i; 303 } else if (*clob3 == -1) { 304 *clob3 = i; 305 return; 306 } 307 } 308 } 309} 310 311/* 312 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them 313 * is unexpected. 314 */ 315 316static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb, 317 uint64_t clob1, uint64_t clob2, uint64_t clob3) 318{ 319 bool unexpected = false; 320 if ((ra >= 56) && (ra != TREG_ZERO)) 321 unexpected = true; 322 323 if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56)) 324 unexpected = true; 325 326 if (rd != -1) { 327 if ((rd >= 56) && (rd != TREG_ZERO)) 328 unexpected = true; 329 } else { 330 if ((rb >= 56) && (rb != TREG_ZERO)) 331 unexpected = true; 332 } 333 return unexpected; 334} 335 336 337#define GX_INSN_X0_MASK ((1ULL << 31) - 1) 338#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31) 339#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL)) 340#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31) 341#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20)) 342 343#ifdef __LITTLE_ENDIAN 344#define GX_INSN_BSWAP(_bundle_) (_bundle_) 345#else 346#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_) 347#endif /* __LITTLE_ENDIAN */ 348 349/* 350 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section. 351 * The corresponding static function jix_x#_###(.) generates partial or 352 * whole bundle based on the template and given arguments. 353 */ 354 355#define __JIT_CODE(_X_) \ 356 asm (".pushsection .rodata.unalign_data, \"a\"\n" \ 357 _X_"\n" \ 358 ".popsection\n") 359 360__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}"); 361static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg) 362{ 363 extern tilegx_bundle_bits __unalign_jit_x1_mtspr; 364 return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) | 365 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg); 366} 367 368__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}"); 369static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr) 370{ 371 extern tilegx_bundle_bits __unalign_jit_x1_mfspr; 372 return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) | 373 create_MF_Imm14_X1(spr) | create_Dest_X1(reg); 374} 375 376__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}"); 377static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8) 378{ 379 extern tilegx_bundle_bits __unalign_jit_x0_addi; 380 return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) | 381 create_Dest_X0(rd) | create_SrcA_X0(ra) | 382 create_Imm8_X0(imm8); 383} 384 385__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}"); 386static tilegx_bundle_bits jit_x1_ldna(int rd, int ra) 387{ 388 extern tilegx_bundle_bits __unalign_jit_x1_ldna; 389 return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) | 390 create_Dest_X1(rd) | create_SrcA_X1(ra); 391} 392 393__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}"); 394static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb) 395{ 396 extern tilegx_bundle_bits __unalign_jit_x0_dblalign; 397 return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) | 398 create_Dest_X0(rd) | create_SrcA_X0(ra) | 399 create_SrcB_X0(rb); 400} 401 402__JIT_CODE("__unalign_jit_x1_iret: {iret}"); 403static tilegx_bundle_bits jit_x1_iret(void) 404{ 405 extern tilegx_bundle_bits __unalign_jit_x1_iret; 406 return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK; 407} 408 409__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}"); 410static tilegx_bundle_bits jit_x0_fnop(void) 411{ 412 extern tilegx_bundle_bits __unalign_jit_x01_fnop; 413 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK; 414} 415 416static tilegx_bundle_bits jit_x1_fnop(void) 417{ 418 extern tilegx_bundle_bits __unalign_jit_x01_fnop; 419 return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK; 420} 421 422__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}"); 423static tilegx_bundle_bits jit_y2_dummy(void) 424{ 425 extern tilegx_bundle_bits __unalign_jit_y2_dummy; 426 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK; 427} 428 429static tilegx_bundle_bits jit_y1_fnop(void) 430{ 431 extern tilegx_bundle_bits __unalign_jit_y2_dummy; 432 return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK; 433} 434 435__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}"); 436static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8) 437{ 438 extern tilegx_bundle_bits __unalign_jit_x1_st1_add; 439 return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) & 440 (~create_SrcA_X1(-1)) & 441 GX_INSN_X1_MASK) | create_SrcA_X1(ra) | 442 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); 443} 444 445__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}"); 446static tilegx_bundle_bits jit_x1_st(int ra, int rb) 447{ 448 extern tilegx_bundle_bits __unalign_jit_x1_st; 449 return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) | 450 create_SrcA_X1(ra) | create_SrcB_X1(rb); 451} 452 453__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}"); 454static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8) 455{ 456 extern tilegx_bundle_bits __unalign_jit_x1_st_add; 457 return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) & 458 (~create_SrcA_X1(-1)) & 459 GX_INSN_X1_MASK) | create_SrcA_X1(ra) | 460 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8); 461} 462 463__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}"); 464static tilegx_bundle_bits jit_x1_ld(int rd, int ra) 465{ 466 extern tilegx_bundle_bits __unalign_jit_x1_ld; 467 return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) | 468 create_Dest_X1(rd) | create_SrcA_X1(ra); 469} 470 471__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}"); 472static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8) 473{ 474 extern tilegx_bundle_bits __unalign_jit_x1_ld_add; 475 return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) & 476 (~create_Dest_X1(-1)) & 477 GX_INSN_X1_MASK) | create_Dest_X1(rd) | 478 create_SrcA_X1(ra) | create_Imm8_X1(imm8); 479} 480 481__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}"); 482static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe) 483{ 484 extern tilegx_bundle_bits __unalign_jit_x0_bfexts; 485 return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) & 486 GX_INSN_X0_MASK) | 487 create_Dest_X0(rd) | create_SrcA_X0(ra) | 488 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); 489} 490 491__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}"); 492static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe) 493{ 494 extern tilegx_bundle_bits __unalign_jit_x0_bfextu; 495 return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) & 496 GX_INSN_X0_MASK) | 497 create_Dest_X0(rd) | create_SrcA_X0(ra) | 498 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe); 499} 500 501__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}"); 502static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8) 503{ 504 extern tilegx_bundle_bits __unalign_jit_x1_addi; 505 return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) | 506 create_Dest_X1(rd) | create_SrcA_X1(ra) | 507 create_Imm8_X1(imm8); 508} 509 510__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}"); 511static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6) 512{ 513 extern tilegx_bundle_bits __unalign_jit_x0_shrui; 514 return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) & 515 GX_INSN_X0_MASK) | 516 create_Dest_X0(rd) | create_SrcA_X0(ra) | 517 create_ShAmt_X0(imm6); 518} 519 520__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}"); 521static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6) 522{ 523 extern tilegx_bundle_bits __unalign_jit_x0_rotli; 524 return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) & 525 GX_INSN_X0_MASK) | 526 create_Dest_X0(rd) | create_SrcA_X0(ra) | 527 create_ShAmt_X0(imm6); 528} 529 530__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}"); 531static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff) 532{ 533 extern tilegx_bundle_bits __unalign_jit_x1_bnezt; 534 return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) & 535 GX_INSN_X1_MASK) | 536 create_SrcA_X1(ra) | create_BrOff_X1(broff); 537} 538 539#undef __JIT_CODE 540 541/* 542 * This function generates unalign fixup JIT. 543 * 544 * We first find unalign load/store instruction's destination, source 545 * registers: ra, rb and rd. and 3 scratch registers by calling 546 * find_regs(...). 3 scratch clobbers should not alias with any register 547 * used in the fault bundle. Then analyze the fault bundle to determine 548 * if it's a load or store, operand width, branch or address increment etc. 549 * At last generated JIT is copied into JIT code area in user space. 550 */ 551 552static 553void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, 554 int align_ctl) 555{ 556 struct thread_info *info = current_thread_info(); 557 struct unaligned_jit_fragment frag; 558 struct unaligned_jit_fragment *jit_code_area; 559 tilegx_bundle_bits bundle_2 = 0; 560 /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */ 561 bool bundle_2_enable = true; 562 uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1; 563 /* 564 * Indicate if the unalign access 565 * instruction's registers hit with 566 * others in the same bundle. 567 */ 568 bool alias = false; 569 bool load_n_store = true; 570 bool load_store_signed = false; 571 unsigned int load_store_size = 8; 572 bool y1_br = false; /* True, for a branch in same bundle at Y1.*/ 573 int y1_br_reg = 0; 574 /* True for link operation. i.e. jalr or lnk at Y1 */ 575 bool y1_lr = false; 576 int y1_lr_reg = 0; 577 bool x1_add = false;/* True, for load/store ADD instruction at X1*/ 578 int x1_add_imm8 = 0; 579 bool unexpected = false; 580 int n = 0, k; 581 582 jit_code_area = 583 (struct unaligned_jit_fragment *)(info->unalign_jit_base); 584 585 memset((void *)&frag, 0, sizeof(frag)); 586 587 /* 0: X mode, Otherwise: Y mode. */ 588 if (bundle & TILEGX_BUNDLE_MODE_MASK) { 589 unsigned int mod, opcode; 590 591 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 && 592 get_RRROpcodeExtension_Y1(bundle) == 593 UNARY_RRR_1_OPCODE_Y1) { 594 595 opcode = get_UnaryOpcodeExtension_Y1(bundle); 596 597 /* 598 * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1 599 * pipeline. 600 */ 601 switch (opcode) { 602 case JALR_UNARY_OPCODE_Y1: 603 case JALRP_UNARY_OPCODE_Y1: 604 y1_lr = true; 605 y1_lr_reg = 55; /* Link register. */ 606 /* FALLTHROUGH */ 607 case JR_UNARY_OPCODE_Y1: 608 case JRP_UNARY_OPCODE_Y1: 609 y1_br = true; 610 y1_br_reg = get_SrcA_Y1(bundle); 611 break; 612 case LNK_UNARY_OPCODE_Y1: 613 /* "lnk" at Y1 pipeline. */ 614 y1_lr = true; 615 y1_lr_reg = get_Dest_Y1(bundle); 616 break; 617 } 618 } 619 620 opcode = get_Opcode_Y2(bundle); 621 mod = get_Mode(bundle); 622 623 /* 624 * bundle_2 is bundle after making Y2 as a dummy operation 625 * - ld zero, sp 626 */ 627 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy(); 628 629 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */ 630 if (y1_br || y1_lr) { 631 bundle_2 &= ~(GX_INSN_Y1_MASK); 632 bundle_2 |= jit_y1_fnop(); 633 } 634 635 if (is_y0_y1_nop(bundle_2)) 636 bundle_2_enable = false; 637 638 if (mod == MODE_OPCODE_YC2) { 639 /* Store. */ 640 load_n_store = false; 641 load_store_size = 1 << opcode; 642 load_store_signed = false; 643 find_regs(bundle, 0, &ra, &rb, &clob1, &clob2, 644 &clob3, &alias); 645 if (load_store_size > 8) 646 unexpected = true; 647 } else { 648 /* Load. */ 649 load_n_store = true; 650 if (mod == MODE_OPCODE_YB2) { 651 switch (opcode) { 652 case LD_OPCODE_Y2: 653 load_store_signed = false; 654 load_store_size = 8; 655 break; 656 case LD4S_OPCODE_Y2: 657 load_store_signed = true; 658 load_store_size = 4; 659 break; 660 case LD4U_OPCODE_Y2: 661 load_store_signed = false; 662 load_store_size = 4; 663 break; 664 default: 665 unexpected = true; 666 } 667 } else if (mod == MODE_OPCODE_YA2) { 668 if (opcode == LD2S_OPCODE_Y2) { 669 load_store_signed = true; 670 load_store_size = 2; 671 } else if (opcode == LD2U_OPCODE_Y2) { 672 load_store_signed = false; 673 load_store_size = 2; 674 } else 675 unexpected = true; 676 } else 677 unexpected = true; 678 find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2, 679 &clob3, &alias); 680 } 681 } else { 682 unsigned int opcode; 683 684 /* bundle_2 is bundle after making X1 as "fnop". */ 685 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop(); 686 687 if (is_x0_x1_nop(bundle_2)) 688 bundle_2_enable = false; 689 690 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) { 691 opcode = get_UnaryOpcodeExtension_X1(bundle); 692 693 if (get_RRROpcodeExtension_X1(bundle) == 694 UNARY_RRR_0_OPCODE_X1) { 695 load_n_store = true; 696 find_regs(bundle, &rd, &ra, &rb, &clob1, 697 &clob2, &clob3, &alias); 698 699 switch (opcode) { 700 case LD_UNARY_OPCODE_X1: 701 load_store_signed = false; 702 load_store_size = 8; 703 break; 704 case LD4S_UNARY_OPCODE_X1: 705 load_store_signed = true; 706 /* FALLTHROUGH */ 707 case LD4U_UNARY_OPCODE_X1: 708 load_store_size = 4; 709 break; 710 711 case LD2S_UNARY_OPCODE_X1: 712 load_store_signed = true; 713 /* FALLTHROUGH */ 714 case LD2U_UNARY_OPCODE_X1: 715 load_store_size = 2; 716 break; 717 default: 718 unexpected = true; 719 } 720 } else { 721 load_n_store = false; 722 load_store_signed = false; 723 find_regs(bundle, 0, &ra, &rb, 724 &clob1, &clob2, &clob3, 725 &alias); 726 727 opcode = get_RRROpcodeExtension_X1(bundle); 728 switch (opcode) { 729 case ST_RRR_0_OPCODE_X1: 730 load_store_size = 8; 731 break; 732 case ST4_RRR_0_OPCODE_X1: 733 load_store_size = 4; 734 break; 735 case ST2_RRR_0_OPCODE_X1: 736 load_store_size = 2; 737 break; 738 default: 739 unexpected = true; 740 } 741 } 742 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) { 743 load_n_store = true; 744 opcode = get_Imm8OpcodeExtension_X1(bundle); 745 switch (opcode) { 746 case LD_ADD_IMM8_OPCODE_X1: 747 load_store_size = 8; 748 break; 749 750 case LD4S_ADD_IMM8_OPCODE_X1: 751 load_store_signed = true; 752 /* FALLTHROUGH */ 753 case LD4U_ADD_IMM8_OPCODE_X1: 754 load_store_size = 4; 755 break; 756 757 case LD2S_ADD_IMM8_OPCODE_X1: 758 load_store_signed = true; 759 /* FALLTHROUGH */ 760 case LD2U_ADD_IMM8_OPCODE_X1: 761 load_store_size = 2; 762 break; 763 764 case ST_ADD_IMM8_OPCODE_X1: 765 load_n_store = false; 766 load_store_size = 8; 767 break; 768 case ST4_ADD_IMM8_OPCODE_X1: 769 load_n_store = false; 770 load_store_size = 4; 771 break; 772 case ST2_ADD_IMM8_OPCODE_X1: 773 load_n_store = false; 774 load_store_size = 2; 775 break; 776 default: 777 unexpected = true; 778 } 779 780 if (!unexpected) { 781 x1_add = true; 782 if (load_n_store) 783 x1_add_imm8 = get_Imm8_X1(bundle); 784 else 785 x1_add_imm8 = get_Dest_Imm8_X1(bundle); 786 } 787 788 find_regs(bundle, load_n_store ? (&rd) : NULL, 789 &ra, &rb, &clob1, &clob2, &clob3, &alias); 790 } else 791 unexpected = true; 792 } 793 794 /* 795 * Some sanity check for register numbers extracted from fault bundle. 796 */ 797 if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true) 798 unexpected = true; 799 800 /* Give warning if register ra has an aligned address. */ 801 if (!unexpected) 802 WARN_ON(!((load_store_size - 1) & (regs->regs[ra]))); 803 804 805 /* 806 * Fault came from kernel space, here we only need take care of 807 * unaligned "get_user/put_user" macros defined in "uaccess.h". 808 * Basically, we will handle bundle like this: 809 * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0} 810 * (Refer to file "arch/tile/include/asm/uaccess.h" for details). 811 * For either load or store, byte-wise operation is performed by calling 812 * get_user() or put_user(). If the macro returns non-zero value, 813 * set the value to rx, otherwise set zero to rx. Finally make pc point 814 * to next bundle and return. 815 */ 816 817 if (EX1_PL(regs->ex1) != USER_PL) { 818 819 unsigned long rx = 0; 820 unsigned long x = 0, ret = 0; 821 822 if (y1_br || y1_lr || x1_add || 823 (load_store_signed != 824 (load_n_store && load_store_size == 4))) { 825 /* No branch, link, wrong sign-ext or load/store add. */ 826 unexpected = true; 827 } else if (!unexpected) { 828 if (bundle & TILEGX_BUNDLE_MODE_MASK) { 829 /* 830 * Fault bundle is Y mode. 831 * Check if the Y1 and Y0 is the form of 832 * { movei rx, 0; nop/fnop }, if yes, 833 * find the rx. 834 */ 835 836 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1) 837 && (get_SrcA_Y1(bundle) == TREG_ZERO) && 838 (get_Imm8_Y1(bundle) == 0) && 839 is_bundle_y0_nop(bundle)) { 840 rx = get_Dest_Y1(bundle); 841 } else if ((get_Opcode_Y0(bundle) == 842 ADDI_OPCODE_Y0) && 843 (get_SrcA_Y0(bundle) == TREG_ZERO) && 844 (get_Imm8_Y0(bundle) == 0) && 845 is_bundle_y1_nop(bundle)) { 846 rx = get_Dest_Y0(bundle); 847 } else { 848 unexpected = true; 849 } 850 } else { 851 /* 852 * Fault bundle is X mode. 853 * Check if the X0 is 'movei rx, 0', 854 * if yes, find the rx. 855 */ 856 857 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0) 858 && (get_Imm8OpcodeExtension_X0(bundle) == 859 ADDI_IMM8_OPCODE_X0) && 860 (get_SrcA_X0(bundle) == TREG_ZERO) && 861 (get_Imm8_X0(bundle) == 0)) { 862 rx = get_Dest_X0(bundle); 863 } else { 864 unexpected = true; 865 } 866 } 867 868 /* rx should be less than 56. */ 869 if (!unexpected && (rx >= 56)) 870 unexpected = true; 871 } 872 873 if (!search_exception_tables(regs->pc)) { 874 /* No fixup in the exception tables for the pc. */ 875 unexpected = true; 876 } 877 878 if (unexpected) { 879 /* Unexpected unalign kernel fault. */ 880 struct task_struct *tsk = validate_current(); 881 882 bust_spinlocks(1); 883 884 show_regs(regs); 885 886 if (unlikely(tsk->pid < 2)) { 887 panic("Kernel unalign fault running %s!", 888 tsk->pid ? "init" : "the idle task"); 889 } 890#ifdef SUPPORT_DIE 891 die("Oops", regs); 892#endif 893 bust_spinlocks(1); 894 895 do_group_exit(SIGKILL); 896 897 } else { 898 unsigned long i, b = 0; 899 unsigned char *ptr = 900 (unsigned char *)regs->regs[ra]; 901 if (load_n_store) { 902 /* handle get_user(x, ptr) */ 903 for (i = 0; i < load_store_size; i++) { 904 ret = get_user(b, ptr++); 905 if (!ret) { 906 /* Success! update x. */ 907#ifdef __LITTLE_ENDIAN 908 x |= (b << (8 * i)); 909#else 910 x <<= 8; 911 x |= b; 912#endif /* __LITTLE_ENDIAN */ 913 } else { 914 x = 0; 915 break; 916 } 917 } 918 919 /* Sign-extend 4-byte loads. */ 920 if (load_store_size == 4) 921 x = (long)(int)x; 922 923 /* Set register rd. */ 924 regs->regs[rd] = x; 925 926 /* Set register rx. */ 927 regs->regs[rx] = ret; 928 929 /* Bump pc. */ 930 regs->pc += 8; 931 932 } else { 933 /* Handle put_user(x, ptr) */ 934 x = regs->regs[rb]; 935#ifdef __LITTLE_ENDIAN 936 b = x; 937#else 938 /* 939 * Swap x in order to store x from low 940 * to high memory same as the 941 * little-endian case. 942 */ 943 switch (load_store_size) { 944 case 8: 945 b = swab64(x); 946 break; 947 case 4: 948 b = swab32(x); 949 break; 950 case 2: 951 b = swab16(x); 952 break; 953 } 954#endif /* __LITTLE_ENDIAN */ 955 for (i = 0; i < load_store_size; i++) { 956 ret = put_user(b, ptr++); 957 if (ret) 958 break; 959 /* Success! shift 1 byte. */ 960 b >>= 8; 961 } 962 /* Set register rx. */ 963 regs->regs[rx] = ret; 964 965 /* Bump pc. */ 966 regs->pc += 8; 967 } 968 } 969 970 unaligned_fixup_count++; 971 972 if (unaligned_printk) { 973 pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n", 974 current->comm, current->pid, regs->regs[ra]); 975 } 976 977 /* Done! Return to the exception handler. */ 978 return; 979 } 980 981 if ((align_ctl == 0) || unexpected) { 982 siginfo_t info = { 983 .si_signo = SIGBUS, 984 .si_code = BUS_ADRALN, 985 .si_addr = (unsigned char __user *)0 986 }; 987 if (unaligned_printk) 988 pr_info("Unalign bundle: unexp @%llx, %llx\n", 989 (unsigned long long)regs->pc, 990 (unsigned long long)bundle); 991 992 if (ra < 56) { 993 unsigned long uaa = (unsigned long)regs->regs[ra]; 994 /* Set bus Address. */ 995 info.si_addr = (unsigned char __user *)uaa; 996 } 997 998 unaligned_fixup_count++; 999 1000 trace_unhandled_signal("unaligned fixup trap", regs, 1001 (unsigned long)info.si_addr, SIGBUS); 1002 force_sig_info(info.si_signo, &info, current); 1003 return; 1004 } 1005 1006#ifdef __LITTLE_ENDIAN 1007#define UA_FIXUP_ADDR_DELTA 1 1008#define UA_FIXUP_BFEXT_START(_B_) 0 1009#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1) 1010#else /* __BIG_ENDIAN */ 1011#define UA_FIXUP_ADDR_DELTA -1 1012#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_)) 1013#define UA_FIXUP_BFEXT_END(_B_) 63 1014#endif /* __LITTLE_ENDIAN */ 1015 1016 1017 1018 if ((ra != rb) && (rd != TREG_SP) && !alias && 1019 !y1_br && !y1_lr && !x1_add) { 1020 /* 1021 * Simple case: ra != rb and no register alias found, 1022 * and no branch or link. This will be the majority. 1023 * We can do a little better for simplae case than the 1024 * generic scheme below. 1025 */ 1026 if (!load_n_store) { 1027 /* 1028 * Simple store: ra != rb, no need for scratch register. 1029 * Just store and rotate to right bytewise. 1030 */ 1031#ifdef __BIG_ENDIAN 1032 frag.insn[n++] = 1033 jit_x0_addi(ra, ra, load_store_size - 1) | 1034 jit_x1_fnop(); 1035#endif /* __BIG_ENDIAN */ 1036 for (k = 0; k < load_store_size; k++) { 1037 /* Store a byte. */ 1038 frag.insn[n++] = 1039 jit_x0_rotli(rb, rb, 56) | 1040 jit_x1_st1_add(ra, rb, 1041 UA_FIXUP_ADDR_DELTA); 1042 } 1043#ifdef __BIG_ENDIAN 1044 frag.insn[n] = jit_x1_addi(ra, ra, 1); 1045#else 1046 frag.insn[n] = jit_x1_addi(ra, ra, 1047 -1 * load_store_size); 1048#endif /* __LITTLE_ENDIAN */ 1049 1050 if (load_store_size == 8) { 1051 frag.insn[n] |= jit_x0_fnop(); 1052 } else if (load_store_size == 4) { 1053 frag.insn[n] |= jit_x0_rotli(rb, rb, 32); 1054 } else { /* = 2 */ 1055 frag.insn[n] |= jit_x0_rotli(rb, rb, 16); 1056 } 1057 n++; 1058 if (bundle_2_enable) 1059 frag.insn[n++] = bundle_2; 1060 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); 1061 } else { 1062 if (rd == ra) { 1063 /* Use two clobber registers: clob1/2. */ 1064 frag.insn[n++] = 1065 jit_x0_addi(TREG_SP, TREG_SP, -16) | 1066 jit_x1_fnop(); 1067 frag.insn[n++] = 1068 jit_x0_addi(clob1, ra, 7) | 1069 jit_x1_st_add(TREG_SP, clob1, -8); 1070 frag.insn[n++] = 1071 jit_x0_addi(clob2, ra, 0) | 1072 jit_x1_st(TREG_SP, clob2); 1073 frag.insn[n++] = 1074 jit_x0_fnop() | 1075 jit_x1_ldna(rd, ra); 1076 frag.insn[n++] = 1077 jit_x0_fnop() | 1078 jit_x1_ldna(clob1, clob1); 1079 /* 1080 * Note: we must make sure that rd must not 1081 * be sp. Recover clob1/2 from stack. 1082 */ 1083 frag.insn[n++] = 1084 jit_x0_dblalign(rd, clob1, clob2) | 1085 jit_x1_ld_add(clob2, TREG_SP, 8); 1086 frag.insn[n++] = 1087 jit_x0_fnop() | 1088 jit_x1_ld_add(clob1, TREG_SP, 16); 1089 } else { 1090 /* Use one clobber register: clob1 only. */ 1091 frag.insn[n++] = 1092 jit_x0_addi(TREG_SP, TREG_SP, -16) | 1093 jit_x1_fnop(); 1094 frag.insn[n++] = 1095 jit_x0_addi(clob1, ra, 7) | 1096 jit_x1_st(TREG_SP, clob1); 1097 frag.insn[n++] = 1098 jit_x0_fnop() | 1099 jit_x1_ldna(rd, ra); 1100 frag.insn[n++] = 1101 jit_x0_fnop() | 1102 jit_x1_ldna(clob1, clob1); 1103 /* 1104 * Note: we must make sure that rd must not 1105 * be sp. Recover clob1 from stack. 1106 */ 1107 frag.insn[n++] = 1108 jit_x0_dblalign(rd, clob1, ra) | 1109 jit_x1_ld_add(clob1, TREG_SP, 16); 1110 } 1111 1112 if (bundle_2_enable) 1113 frag.insn[n++] = bundle_2; 1114 /* 1115 * For non 8-byte load, extract corresponding bytes and 1116 * signed extension. 1117 */ 1118 if (load_store_size == 4) { 1119 if (load_store_signed) 1120 frag.insn[n++] = 1121 jit_x0_bfexts( 1122 rd, rd, 1123 UA_FIXUP_BFEXT_START(4), 1124 UA_FIXUP_BFEXT_END(4)) | 1125 jit_x1_fnop(); 1126 else 1127 frag.insn[n++] = 1128 jit_x0_bfextu( 1129 rd, rd, 1130 UA_FIXUP_BFEXT_START(4), 1131 UA_FIXUP_BFEXT_END(4)) | 1132 jit_x1_fnop(); 1133 } else if (load_store_size == 2) { 1134 if (load_store_signed) 1135 frag.insn[n++] = 1136 jit_x0_bfexts( 1137 rd, rd, 1138 UA_FIXUP_BFEXT_START(2), 1139 UA_FIXUP_BFEXT_END(2)) | 1140 jit_x1_fnop(); 1141 else 1142 frag.insn[n++] = 1143 jit_x0_bfextu( 1144 rd, rd, 1145 UA_FIXUP_BFEXT_START(2), 1146 UA_FIXUP_BFEXT_END(2)) | 1147 jit_x1_fnop(); 1148 } 1149 1150 frag.insn[n++] = 1151 jit_x0_fnop() | 1152 jit_x1_iret(); 1153 } 1154 } else if (!load_n_store) { 1155 1156 /* 1157 * Generic memory store cases: use 3 clobber registers. 1158 * 1159 * Alloc space for saveing clob2,1,3 on user's stack. 1160 * register clob3 points to where clob2 saved, followed by 1161 * clob1 and 3 from high to low memory. 1162 */ 1163 frag.insn[n++] = 1164 jit_x0_addi(TREG_SP, TREG_SP, -32) | 1165 jit_x1_fnop(); 1166 frag.insn[n++] = 1167 jit_x0_addi(clob3, TREG_SP, 16) | 1168 jit_x1_st_add(TREG_SP, clob3, 8); 1169#ifdef __LITTLE_ENDIAN 1170 frag.insn[n++] = 1171 jit_x0_addi(clob1, ra, 0) | 1172 jit_x1_st_add(TREG_SP, clob1, 8); 1173#else 1174 frag.insn[n++] = 1175 jit_x0_addi(clob1, ra, load_store_size - 1) | 1176 jit_x1_st_add(TREG_SP, clob1, 8); 1177#endif 1178 if (load_store_size == 8) { 1179 /* 1180 * We save one byte a time, not for fast, but compact 1181 * code. After each store, data source register shift 1182 * right one byte. unchanged after 8 stores. 1183 */ 1184 frag.insn[n++] = 1185 jit_x0_addi(clob2, TREG_ZERO, 7) | 1186 jit_x1_st_add(TREG_SP, clob2, 16); 1187 frag.insn[n++] = 1188 jit_x0_rotli(rb, rb, 56) | 1189 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); 1190 frag.insn[n++] = 1191 jit_x0_addi(clob2, clob2, -1) | 1192 jit_x1_bnezt(clob2, -1); 1193 frag.insn[n++] = 1194 jit_x0_fnop() | 1195 jit_x1_addi(clob2, y1_br_reg, 0); 1196 } else if (load_store_size == 4) { 1197 frag.insn[n++] = 1198 jit_x0_addi(clob2, TREG_ZERO, 3) | 1199 jit_x1_st_add(TREG_SP, clob2, 16); 1200 frag.insn[n++] = 1201 jit_x0_rotli(rb, rb, 56) | 1202 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA); 1203 frag.insn[n++] = 1204 jit_x0_addi(clob2, clob2, -1) | 1205 jit_x1_bnezt(clob2, -1); 1206 /* 1207 * same as 8-byte case, but need shift another 4 1208 * byte to recover rb for 4-byte store. 1209 */ 1210 frag.insn[n++] = jit_x0_rotli(rb, rb, 32) | 1211 jit_x1_addi(clob2, y1_br_reg, 0); 1212 } else { /* =2 */ 1213 frag.insn[n++] = 1214 jit_x0_addi(clob2, rb, 0) | 1215 jit_x1_st_add(TREG_SP, clob2, 16); 1216 for (k = 0; k < 2; k++) { 1217 frag.insn[n++] = 1218 jit_x0_shrui(rb, rb, 8) | 1219 jit_x1_st1_add(clob1, rb, 1220 UA_FIXUP_ADDR_DELTA); 1221 } 1222 frag.insn[n++] = 1223 jit_x0_addi(rb, clob2, 0) | 1224 jit_x1_addi(clob2, y1_br_reg, 0); 1225 } 1226 1227 if (bundle_2_enable) 1228 frag.insn[n++] = bundle_2; 1229 1230 if (y1_lr) { 1231 frag.insn[n++] = 1232 jit_x0_fnop() | 1233 jit_x1_mfspr(y1_lr_reg, 1234 SPR_EX_CONTEXT_0_0); 1235 } 1236 if (y1_br) { 1237 frag.insn[n++] = 1238 jit_x0_fnop() | 1239 jit_x1_mtspr(SPR_EX_CONTEXT_0_0, 1240 clob2); 1241 } 1242 if (x1_add) { 1243 frag.insn[n++] = 1244 jit_x0_addi(ra, ra, x1_add_imm8) | 1245 jit_x1_ld_add(clob2, clob3, -8); 1246 } else { 1247 frag.insn[n++] = 1248 jit_x0_fnop() | 1249 jit_x1_ld_add(clob2, clob3, -8); 1250 } 1251 frag.insn[n++] = 1252 jit_x0_fnop() | 1253 jit_x1_ld_add(clob1, clob3, -8); 1254 frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3); 1255 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); 1256 1257 } else { 1258 /* 1259 * Generic memory load cases. 1260 * 1261 * Alloc space for saveing clob1,2,3 on user's stack. 1262 * register clob3 points to where clob1 saved, followed 1263 * by clob2 and 3 from high to low memory. 1264 */ 1265 1266 frag.insn[n++] = 1267 jit_x0_addi(TREG_SP, TREG_SP, -32) | 1268 jit_x1_fnop(); 1269 frag.insn[n++] = 1270 jit_x0_addi(clob3, TREG_SP, 16) | 1271 jit_x1_st_add(TREG_SP, clob3, 8); 1272 frag.insn[n++] = 1273 jit_x0_addi(clob2, ra, 0) | 1274 jit_x1_st_add(TREG_SP, clob2, 8); 1275 1276 if (y1_br) { 1277 frag.insn[n++] = 1278 jit_x0_addi(clob1, y1_br_reg, 0) | 1279 jit_x1_st_add(TREG_SP, clob1, 16); 1280 } else { 1281 frag.insn[n++] = 1282 jit_x0_fnop() | 1283 jit_x1_st_add(TREG_SP, clob1, 16); 1284 } 1285 1286 if (bundle_2_enable) 1287 frag.insn[n++] = bundle_2; 1288 1289 if (y1_lr) { 1290 frag.insn[n++] = 1291 jit_x0_fnop() | 1292 jit_x1_mfspr(y1_lr_reg, 1293 SPR_EX_CONTEXT_0_0); 1294 } 1295 1296 if (y1_br) { 1297 frag.insn[n++] = 1298 jit_x0_fnop() | 1299 jit_x1_mtspr(SPR_EX_CONTEXT_0_0, 1300 clob1); 1301 } 1302 1303 frag.insn[n++] = 1304 jit_x0_addi(clob1, clob2, 7) | 1305 jit_x1_ldna(rd, clob2); 1306 frag.insn[n++] = 1307 jit_x0_fnop() | 1308 jit_x1_ldna(clob1, clob1); 1309 frag.insn[n++] = 1310 jit_x0_dblalign(rd, clob1, clob2) | 1311 jit_x1_ld_add(clob1, clob3, -8); 1312 if (x1_add) { 1313 frag.insn[n++] = 1314 jit_x0_addi(ra, ra, x1_add_imm8) | 1315 jit_x1_ld_add(clob2, clob3, -8); 1316 } else { 1317 frag.insn[n++] = 1318 jit_x0_fnop() | 1319 jit_x1_ld_add(clob2, clob3, -8); 1320 } 1321 1322 frag.insn[n++] = 1323 jit_x0_fnop() | 1324 jit_x1_ld(clob3, clob3); 1325 1326 if (load_store_size == 4) { 1327 if (load_store_signed) 1328 frag.insn[n++] = 1329 jit_x0_bfexts( 1330 rd, rd, 1331 UA_FIXUP_BFEXT_START(4), 1332 UA_FIXUP_BFEXT_END(4)) | 1333 jit_x1_fnop(); 1334 else 1335 frag.insn[n++] = 1336 jit_x0_bfextu( 1337 rd, rd, 1338 UA_FIXUP_BFEXT_START(4), 1339 UA_FIXUP_BFEXT_END(4)) | 1340 jit_x1_fnop(); 1341 } else if (load_store_size == 2) { 1342 if (load_store_signed) 1343 frag.insn[n++] = 1344 jit_x0_bfexts( 1345 rd, rd, 1346 UA_FIXUP_BFEXT_START(2), 1347 UA_FIXUP_BFEXT_END(2)) | 1348 jit_x1_fnop(); 1349 else 1350 frag.insn[n++] = 1351 jit_x0_bfextu( 1352 rd, rd, 1353 UA_FIXUP_BFEXT_START(2), 1354 UA_FIXUP_BFEXT_END(2)) | 1355 jit_x1_fnop(); 1356 } 1357 1358 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret(); 1359 } 1360 1361 /* Max JIT bundle count is 14. */ 1362 WARN_ON(n > 14); 1363 1364 if (!unexpected) { 1365 int status = 0; 1366 int idx = (regs->pc >> 3) & 1367 ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1); 1368 1369 frag.pc = regs->pc; 1370 frag.bundle = bundle; 1371 1372 if (unaligned_printk) { 1373 pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n", 1374 current->comm, current->pid, 1375 (unsigned long)frag.pc, 1376 (unsigned long)frag.bundle, 1377 (int)alias, (int)rd, (int)ra, 1378 (int)rb, (int)bundle_2_enable, 1379 (int)y1_lr, (int)y1_br, (int)x1_add); 1380 1381 for (k = 0; k < n; k += 2) 1382 pr_info("[%d] %016llx %016llx\n", 1383 k, (unsigned long long)frag.insn[k], 1384 (unsigned long long)frag.insn[k+1]); 1385 } 1386 1387 /* Swap bundle byte order for big endian sys. */ 1388#ifdef __BIG_ENDIAN 1389 frag.bundle = GX_INSN_BSWAP(frag.bundle); 1390 for (k = 0; k < n; k++) 1391 frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]); 1392#endif /* __BIG_ENDIAN */ 1393 1394 status = copy_to_user((void __user *)&jit_code_area[idx], 1395 &frag, sizeof(frag)); 1396 if (status) { 1397 /* Fail to copy JIT into user land. send SIGSEGV. */ 1398 siginfo_t info = { 1399 .si_signo = SIGSEGV, 1400 .si_code = SEGV_MAPERR, 1401 .si_addr = (void __user *)&jit_code_area[idx] 1402 }; 1403 1404 pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n", 1405 current->pid, current->comm, 1406 (unsigned long long)&jit_code_area[idx]); 1407 1408 trace_unhandled_signal("segfault in unalign fixup", 1409 regs, 1410 (unsigned long)info.si_addr, 1411 SIGSEGV); 1412 force_sig_info(info.si_signo, &info, current); 1413 return; 1414 } 1415 1416 1417 /* Do a cheaper increment, not accurate. */ 1418 unaligned_fixup_count++; 1419 __flush_icache_range((unsigned long)&jit_code_area[idx], 1420 (unsigned long)&jit_code_area[idx] + 1421 sizeof(frag)); 1422 1423 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/ 1424 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8); 1425 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0)); 1426 1427 /* Modify pc at the start of new JIT. */ 1428 regs->pc = (unsigned long)&jit_code_area[idx].insn[0]; 1429 /* Set ICS in SPR_EX_CONTEXT_K_1. */ 1430 regs->ex1 = PL_ICS_EX1(USER_PL, 1); 1431 } 1432} 1433 1434 1435/* 1436 * C function to generate unalign data JIT. Called from unalign data 1437 * interrupt handler. 1438 * 1439 * First check if unalign fix is disabled or exception did not not come from 1440 * user space or sp register points to unalign address, if true, generate a 1441 * SIGBUS. Then map a page into user space as JIT area if it is not mapped 1442 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return 1443 * back to exception handler. 1444 * 1445 * The exception handler will "iret" to new generated JIT code after 1446 * restoring caller saved registers. In theory, the JIT code will perform 1447 * another "iret" to resume user's program. 1448 */ 1449 1450void do_unaligned(struct pt_regs *regs, int vecnum) 1451{ 1452 enum ctx_state prev_state = exception_enter(); 1453 tilegx_bundle_bits __user *pc; 1454 tilegx_bundle_bits bundle; 1455 struct thread_info *info = current_thread_info(); 1456 int align_ctl; 1457 1458 /* Checks the per-process unaligned JIT flags */ 1459 align_ctl = unaligned_fixup; 1460 switch (task_thread_info(current)->align_ctl) { 1461 case PR_UNALIGN_NOPRINT: 1462 align_ctl = 1; 1463 break; 1464 case PR_UNALIGN_SIGBUS: 1465 align_ctl = 0; 1466 break; 1467 } 1468 1469 /* Enable iterrupt in order to access user land. */ 1470 local_irq_enable(); 1471 1472 /* 1473 * The fault came from kernel space. Two choices: 1474 * (a) unaligned_fixup < 1, we will first call get/put_user fixup 1475 * to return -EFAULT. If no fixup, simply panic the kernel. 1476 * (b) unaligned_fixup >=1, we will try to fix the unaligned access 1477 * if it was triggered by get_user/put_user() macros. Panic the 1478 * kernel if it is not fixable. 1479 */ 1480 1481 if (EX1_PL(regs->ex1) != USER_PL) { 1482 1483 if (align_ctl < 1) { 1484 unaligned_fixup_count++; 1485 /* If exception came from kernel, try fix it up. */ 1486 if (fixup_exception(regs)) { 1487 if (unaligned_printk) 1488 pr_info("Unalign fixup: %d %llx @%llx\n", 1489 (int)unaligned_fixup, 1490 (unsigned long long)regs->ex1, 1491 (unsigned long long)regs->pc); 1492 } else { 1493 /* Not fixable. Go panic. */ 1494 panic("Unalign exception in Kernel. pc=%lx", 1495 regs->pc); 1496 } 1497 } else { 1498 /* 1499 * Try to fix the exception. If we can't, panic the 1500 * kernel. 1501 */ 1502 bundle = GX_INSN_BSWAP( 1503 *((tilegx_bundle_bits *)(regs->pc))); 1504 jit_bundle_gen(regs, bundle, align_ctl); 1505 } 1506 goto done; 1507 } 1508 1509 /* 1510 * Fault came from user with ICS or stack is not aligned. 1511 * If so, we will trigger SIGBUS. 1512 */ 1513 if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) { 1514 siginfo_t info = { 1515 .si_signo = SIGBUS, 1516 .si_code = BUS_ADRALN, 1517 .si_addr = (unsigned char __user *)0 1518 }; 1519 1520 if (unaligned_printk) 1521 pr_info("Unalign fixup: %d %llx @%llx\n", 1522 (int)unaligned_fixup, 1523 (unsigned long long)regs->ex1, 1524 (unsigned long long)regs->pc); 1525 1526 unaligned_fixup_count++; 1527 1528 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS); 1529 force_sig_info(info.si_signo, &info, current); 1530 goto done; 1531 } 1532 1533 1534 /* Read the bundle casued the exception! */ 1535 pc = (tilegx_bundle_bits __user *)(regs->pc); 1536 if (get_user(bundle, pc) != 0) { 1537 /* Probably never be here since pc is valid user address.*/ 1538 siginfo_t info = { 1539 .si_signo = SIGSEGV, 1540 .si_code = SEGV_MAPERR, 1541 .si_addr = (void __user *)pc 1542 }; 1543 pr_err("Couldn't read instruction at %p trying to step\n", pc); 1544 trace_unhandled_signal("segfault in unalign fixup", regs, 1545 (unsigned long)info.si_addr, SIGSEGV); 1546 force_sig_info(info.si_signo, &info, current); 1547 goto done; 1548 } 1549 1550 if (!info->unalign_jit_base) { 1551 void __user *user_page; 1552 1553 /* 1554 * Allocate a page in userland. 1555 * For 64-bit processes we try to place the mapping far 1556 * from anything else that might be going on (specifically 1557 * 64 GB below the top of the user address space). If it 1558 * happens not to be possible to put it there, it's OK; 1559 * the kernel will choose another location and we'll 1560 * remember it for later. 1561 */ 1562 if (is_compat_task()) 1563 user_page = NULL; 1564 else 1565 user_page = (void __user *)(TASK_SIZE - (1UL << 36)) + 1566 (current->pid << PAGE_SHIFT); 1567 1568 user_page = (void __user *) vm_mmap(NULL, 1569 (unsigned long)user_page, 1570 PAGE_SIZE, 1571 PROT_EXEC | PROT_READ | 1572 PROT_WRITE, 1573#ifdef CONFIG_HOMECACHE 1574 MAP_CACHE_HOME_TASK | 1575#endif 1576 MAP_PRIVATE | 1577 MAP_ANONYMOUS, 1578 0); 1579 1580 if (IS_ERR((void __force *)user_page)) { 1581 pr_err("Out of kernel pages trying do_mmap\n"); 1582 goto done; 1583 } 1584 1585 /* Save the address in the thread_info struct */ 1586 info->unalign_jit_base = user_page; 1587 if (unaligned_printk) 1588 pr_info("Unalign bundle: %d:%d, allocate page @%llx\n", 1589 raw_smp_processor_id(), current->pid, 1590 (unsigned long long)user_page); 1591 } 1592 1593 /* Generate unalign JIT */ 1594 jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl); 1595 1596done: 1597 exception_exit(prev_state); 1598} 1599 1600#endif /* __tilegx__ */ 1601