1/* 2 * Just-In-Time compiler for BPF filters on MIPS 3 * 4 * Copyright (c) 2014 Imagination Technologies Ltd. 5 * Author: Markos Chandras <markos.chandras@imgtec.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 */ 11 12#include <linux/bitops.h> 13#include <linux/compiler.h> 14#include <linux/errno.h> 15#include <linux/filter.h> 16#include <linux/if_vlan.h> 17#include <linux/kconfig.h> 18#include <linux/moduleloader.h> 19#include <linux/netdevice.h> 20#include <linux/string.h> 21#include <linux/slab.h> 22#include <linux/types.h> 23#include <asm/asm.h> 24#include <asm/bitops.h> 25#include <asm/cacheflush.h> 26#include <asm/cpu-features.h> 27#include <asm/uasm.h> 28 29#include "bpf_jit.h" 30 31/* ABI 32 * r_skb_hl SKB header length 33 * r_data SKB data pointer 34 * r_off Offset 35 * r_A BPF register A 36 * r_X BPF register X 37 * r_skb *skb 38 * r_M *scratch memory 39 * r_skb_len SKB length 40 * 41 * On entry (*bpf_func)(*skb, *filter) 42 * a0 = MIPS_R_A0 = skb; 43 * a1 = MIPS_R_A1 = filter; 44 * 45 * Stack 46 * ... 47 * M[15] 48 * M[14] 49 * M[13] 50 * ... 51 * M[0] <-- r_M 52 * saved reg k-1 53 * saved reg k-2 54 * ... 55 * saved reg 0 <-- r_sp 56 * <no argument area> 57 * 58 * Packet layout 59 * 60 * <--------------------- len ------------------------> 61 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> 62 * ---------------------------------------------------- 63 * | skb->data | 64 * ---------------------------------------------------- 65 */ 66 67#define ptr typeof(unsigned long) 68 69#define SCRATCH_OFF(k) (4 * (k)) 70 71/* JIT flags */ 72#define SEEN_CALL (1 << BPF_MEMWORDS) 73#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) 74#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) 75#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) 76#define SEEN_OFF SEEN_SREG(2) 77#define SEEN_A SEEN_SREG(3) 78#define SEEN_X SEEN_SREG(4) 79#define SEEN_SKB SEEN_SREG(5) 80#define SEEN_MEM SEEN_SREG(6) 81/* SEEN_SK_DATA also implies skb_hl an skb_len */ 82#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) 83 84/* Arguments used by JIT */ 85#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ 86 87#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ 88 89/** 90 * struct jit_ctx - JIT context 91 * @skf: The sk_filter 92 * @prologue_bytes: Number of bytes for prologue 93 * @idx: Instruction index 94 * @flags: JIT flags 95 * @offsets: Instruction offsets 96 * @target: Memory location for the compiled filter 97 */ 98struct jit_ctx { 99 const struct bpf_prog *skf; 100 unsigned int prologue_bytes; 101 u32 idx; 102 u32 flags; 103 u32 *offsets; 104 u32 *target; 105}; 106 107 108static inline int optimize_div(u32 *k) 109{ 110 /* power of 2 divides can be implemented with right shift */ 111 if (!(*k & (*k-1))) { 112 *k = ilog2(*k); 113 return 1; 114 } 115 116 return 0; 117} 118 119static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); 120 121/* Simply emit the instruction if the JIT memory space has been allocated */ 122#define emit_instr(ctx, func, ...) \ 123do { \ 124 if ((ctx)->target != NULL) { \ 125 u32 *p = &(ctx)->target[ctx->idx]; \ 126 uasm_i_##func(&p, ##__VA_ARGS__); \ 127 } \ 128 (ctx)->idx++; \ 129} while (0) 130 131/* 132 * Similar to emit_instr but it must be used when we need to emit 133 * 32-bit or 64-bit instructions 134 */ 135#define emit_long_instr(ctx, func, ...) \ 136do { \ 137 if ((ctx)->target != NULL) { \ 138 u32 *p = &(ctx)->target[ctx->idx]; \ 139 UASM_i_##func(&p, ##__VA_ARGS__); \ 140 } \ 141 (ctx)->idx++; \ 142} while (0) 143 144/* Determine if immediate is within the 16-bit signed range */ 145static inline bool is_range16(s32 imm) 146{ 147 return !(imm >= SBIT(15) || imm < -SBIT(15)); 148} 149 150static inline void emit_addu(unsigned int dst, unsigned int src1, 151 unsigned int src2, struct jit_ctx *ctx) 152{ 153 emit_instr(ctx, addu, dst, src1, src2); 154} 155 156static inline void emit_nop(struct jit_ctx *ctx) 157{ 158 emit_instr(ctx, nop); 159} 160 161/* Load a u32 immediate to a register */ 162static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) 163{ 164 if (ctx->target != NULL) { 165 /* addiu can only handle s16 */ 166 if (!is_range16(imm)) { 167 u32 *p = &ctx->target[ctx->idx]; 168 uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); 169 p = &ctx->target[ctx->idx + 1]; 170 uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); 171 } else { 172 u32 *p = &ctx->target[ctx->idx]; 173 uasm_i_addiu(&p, dst, r_zero, imm); 174 } 175 } 176 ctx->idx++; 177 178 if (!is_range16(imm)) 179 ctx->idx++; 180} 181 182static inline void emit_or(unsigned int dst, unsigned int src1, 183 unsigned int src2, struct jit_ctx *ctx) 184{ 185 emit_instr(ctx, or, dst, src1, src2); 186} 187 188static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, 189 struct jit_ctx *ctx) 190{ 191 if (imm >= BIT(16)) { 192 emit_load_imm(r_tmp, imm, ctx); 193 emit_or(dst, src, r_tmp, ctx); 194 } else { 195 emit_instr(ctx, ori, dst, src, imm); 196 } 197} 198 199static inline void emit_daddiu(unsigned int dst, unsigned int src, 200 int imm, struct jit_ctx *ctx) 201{ 202 /* 203 * Only used for stack, so the imm is relatively small 204 * and it fits in 15-bits 205 */ 206 emit_instr(ctx, daddiu, dst, src, imm); 207} 208 209static inline void emit_addiu(unsigned int dst, unsigned int src, 210 u32 imm, struct jit_ctx *ctx) 211{ 212 if (!is_range16(imm)) { 213 emit_load_imm(r_tmp, imm, ctx); 214 emit_addu(dst, r_tmp, src, ctx); 215 } else { 216 emit_instr(ctx, addiu, dst, src, imm); 217 } 218} 219 220static inline void emit_and(unsigned int dst, unsigned int src1, 221 unsigned int src2, struct jit_ctx *ctx) 222{ 223 emit_instr(ctx, and, dst, src1, src2); 224} 225 226static inline void emit_andi(unsigned int dst, unsigned int src, 227 u32 imm, struct jit_ctx *ctx) 228{ 229 /* If imm does not fit in u16 then load it to register */ 230 if (imm >= BIT(16)) { 231 emit_load_imm(r_tmp, imm, ctx); 232 emit_and(dst, src, r_tmp, ctx); 233 } else { 234 emit_instr(ctx, andi, dst, src, imm); 235 } 236} 237 238static inline void emit_xor(unsigned int dst, unsigned int src1, 239 unsigned int src2, struct jit_ctx *ctx) 240{ 241 emit_instr(ctx, xor, dst, src1, src2); 242} 243 244static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) 245{ 246 /* If imm does not fit in u16 then load it to register */ 247 if (imm >= BIT(16)) { 248 emit_load_imm(r_tmp, imm, ctx); 249 emit_xor(dst, src, r_tmp, ctx); 250 } else { 251 emit_instr(ctx, xori, dst, src, imm); 252 } 253} 254 255static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) 256{ 257 emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); 258} 259 260static inline void emit_subu(unsigned int dst, unsigned int src1, 261 unsigned int src2, struct jit_ctx *ctx) 262{ 263 emit_instr(ctx, subu, dst, src1, src2); 264} 265 266static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) 267{ 268 emit_subu(reg, r_zero, reg, ctx); 269} 270 271static inline void emit_sllv(unsigned int dst, unsigned int src, 272 unsigned int sa, struct jit_ctx *ctx) 273{ 274 emit_instr(ctx, sllv, dst, src, sa); 275} 276 277static inline void emit_sll(unsigned int dst, unsigned int src, 278 unsigned int sa, struct jit_ctx *ctx) 279{ 280 /* sa is 5-bits long */ 281 if (sa >= BIT(5)) 282 /* Shifting >= 32 results in zero */ 283 emit_jit_reg_move(dst, r_zero, ctx); 284 else 285 emit_instr(ctx, sll, dst, src, sa); 286} 287 288static inline void emit_srlv(unsigned int dst, unsigned int src, 289 unsigned int sa, struct jit_ctx *ctx) 290{ 291 emit_instr(ctx, srlv, dst, src, sa); 292} 293 294static inline void emit_srl(unsigned int dst, unsigned int src, 295 unsigned int sa, struct jit_ctx *ctx) 296{ 297 /* sa is 5-bits long */ 298 if (sa >= BIT(5)) 299 /* Shifting >= 32 results in zero */ 300 emit_jit_reg_move(dst, r_zero, ctx); 301 else 302 emit_instr(ctx, srl, dst, src, sa); 303} 304 305static inline void emit_slt(unsigned int dst, unsigned int src1, 306 unsigned int src2, struct jit_ctx *ctx) 307{ 308 emit_instr(ctx, slt, dst, src1, src2); 309} 310 311static inline void emit_sltu(unsigned int dst, unsigned int src1, 312 unsigned int src2, struct jit_ctx *ctx) 313{ 314 emit_instr(ctx, sltu, dst, src1, src2); 315} 316 317static inline void emit_sltiu(unsigned dst, unsigned int src, 318 unsigned int imm, struct jit_ctx *ctx) 319{ 320 /* 16 bit immediate */ 321 if (!is_range16((s32)imm)) { 322 emit_load_imm(r_tmp, imm, ctx); 323 emit_sltu(dst, src, r_tmp, ctx); 324 } else { 325 emit_instr(ctx, sltiu, dst, src, imm); 326 } 327 328} 329 330/* Store register on the stack */ 331static inline void emit_store_stack_reg(ptr reg, ptr base, 332 unsigned int offset, 333 struct jit_ctx *ctx) 334{ 335 emit_long_instr(ctx, SW, reg, offset, base); 336} 337 338static inline void emit_store(ptr reg, ptr base, unsigned int offset, 339 struct jit_ctx *ctx) 340{ 341 emit_instr(ctx, sw, reg, offset, base); 342} 343 344static inline void emit_load_stack_reg(ptr reg, ptr base, 345 unsigned int offset, 346 struct jit_ctx *ctx) 347{ 348 emit_long_instr(ctx, LW, reg, offset, base); 349} 350 351static inline void emit_load(unsigned int reg, unsigned int base, 352 unsigned int offset, struct jit_ctx *ctx) 353{ 354 emit_instr(ctx, lw, reg, offset, base); 355} 356 357static inline void emit_load_byte(unsigned int reg, unsigned int base, 358 unsigned int offset, struct jit_ctx *ctx) 359{ 360 emit_instr(ctx, lb, reg, offset, base); 361} 362 363static inline void emit_half_load(unsigned int reg, unsigned int base, 364 unsigned int offset, struct jit_ctx *ctx) 365{ 366 emit_instr(ctx, lh, reg, offset, base); 367} 368 369static inline void emit_mul(unsigned int dst, unsigned int src1, 370 unsigned int src2, struct jit_ctx *ctx) 371{ 372 emit_instr(ctx, mul, dst, src1, src2); 373} 374 375static inline void emit_div(unsigned int dst, unsigned int src, 376 struct jit_ctx *ctx) 377{ 378 if (ctx->target != NULL) { 379 u32 *p = &ctx->target[ctx->idx]; 380 uasm_i_divu(&p, dst, src); 381 p = &ctx->target[ctx->idx + 1]; 382 uasm_i_mflo(&p, dst); 383 } 384 ctx->idx += 2; /* 2 insts */ 385} 386 387static inline void emit_mod(unsigned int dst, unsigned int src, 388 struct jit_ctx *ctx) 389{ 390 if (ctx->target != NULL) { 391 u32 *p = &ctx->target[ctx->idx]; 392 uasm_i_divu(&p, dst, src); 393 p = &ctx->target[ctx->idx + 1]; 394 uasm_i_mfhi(&p, dst); 395 } 396 ctx->idx += 2; /* 2 insts */ 397} 398 399static inline void emit_dsll(unsigned int dst, unsigned int src, 400 unsigned int sa, struct jit_ctx *ctx) 401{ 402 emit_instr(ctx, dsll, dst, src, sa); 403} 404 405static inline void emit_dsrl32(unsigned int dst, unsigned int src, 406 unsigned int sa, struct jit_ctx *ctx) 407{ 408 emit_instr(ctx, dsrl32, dst, src, sa); 409} 410 411static inline void emit_wsbh(unsigned int dst, unsigned int src, 412 struct jit_ctx *ctx) 413{ 414 emit_instr(ctx, wsbh, dst, src); 415} 416 417/* load pointer to register */ 418static inline void emit_load_ptr(unsigned int dst, unsigned int src, 419 int imm, struct jit_ctx *ctx) 420{ 421 /* src contains the base addr of the 32/64-pointer */ 422 emit_long_instr(ctx, LW, dst, imm, src); 423} 424 425/* load a function pointer to register */ 426static inline void emit_load_func(unsigned int reg, ptr imm, 427 struct jit_ctx *ctx) 428{ 429 if (config_enabled(CONFIG_64BIT)) { 430 /* At this point imm is always 64-bit */ 431 emit_load_imm(r_tmp, (u64)imm >> 32, ctx); 432 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 433 emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); 434 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 435 emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); 436 } else { 437 emit_load_imm(reg, imm, ctx); 438 } 439} 440 441/* Move to real MIPS register */ 442static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 443{ 444 emit_long_instr(ctx, ADDU, dst, src, r_zero); 445} 446 447/* Move to JIT (32-bit) register */ 448static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 449{ 450 emit_addu(dst, src, r_zero, ctx); 451} 452 453/* Compute the immediate value for PC-relative branches. */ 454static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) 455{ 456 if (ctx->target == NULL) 457 return 0; 458 459 /* 460 * We want a pc-relative branch. We only do forward branches 461 * so tgt is always after pc. tgt is the instruction offset 462 * we want to jump to. 463 464 * Branch on MIPS: 465 * I: target_offset <- sign_extend(offset) 466 * I+1: PC += target_offset (delay slot) 467 * 468 * ctx->idx currently points to the branch instruction 469 * but the offset is added to the delay slot so we need 470 * to subtract 4. 471 */ 472 return ctx->offsets[tgt] - 473 (ctx->idx * 4 - ctx->prologue_bytes) - 4; 474} 475 476static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, 477 unsigned int imm, struct jit_ctx *ctx) 478{ 479 if (ctx->target != NULL) { 480 u32 *p = &ctx->target[ctx->idx]; 481 482 switch (cond) { 483 case MIPS_COND_EQ: 484 uasm_i_beq(&p, reg1, reg2, imm); 485 break; 486 case MIPS_COND_NE: 487 uasm_i_bne(&p, reg1, reg2, imm); 488 break; 489 case MIPS_COND_ALL: 490 uasm_i_b(&p, imm); 491 break; 492 default: 493 pr_warn("%s: Unhandled branch conditional: %d\n", 494 __func__, cond); 495 } 496 } 497 ctx->idx++; 498} 499 500static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) 501{ 502 emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); 503} 504 505static inline void emit_jalr(unsigned int link, unsigned int reg, 506 struct jit_ctx *ctx) 507{ 508 emit_instr(ctx, jalr, link, reg); 509} 510 511static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) 512{ 513 emit_instr(ctx, jr, reg); 514} 515 516static inline u16 align_sp(unsigned int num) 517{ 518 /* Double word alignment for 32-bit, quadword for 64-bit */ 519 unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8; 520 num = (num + (align - 1)) & -align; 521 return num; 522} 523 524static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) 525{ 526 int i = 0, real_off = 0; 527 u32 sflags, tmp_flags; 528 529 /* Adjust the stack pointer */ 530 emit_stack_offset(-align_sp(offset), ctx); 531 532 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 533 /* sflags is essentially a bitmap */ 534 while (tmp_flags) { 535 if ((sflags >> i) & 0x1) { 536 emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 537 ctx); 538 real_off += SZREG; 539 } 540 i++; 541 tmp_flags >>= 1; 542 } 543 544 /* save return address */ 545 if (ctx->flags & SEEN_CALL) { 546 emit_store_stack_reg(r_ra, r_sp, real_off, ctx); 547 real_off += SZREG; 548 } 549 550 /* Setup r_M leaving the alignment gap if necessary */ 551 if (ctx->flags & SEEN_MEM) { 552 if (real_off % (SZREG * 2)) 553 real_off += SZREG; 554 emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); 555 } 556} 557 558static void restore_bpf_jit_regs(struct jit_ctx *ctx, 559 unsigned int offset) 560{ 561 int i, real_off = 0; 562 u32 sflags, tmp_flags; 563 564 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 565 /* sflags is a bitmap */ 566 i = 0; 567 while (tmp_flags) { 568 if ((sflags >> i) & 0x1) { 569 emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 570 ctx); 571 real_off += SZREG; 572 } 573 i++; 574 tmp_flags >>= 1; 575 } 576 577 /* restore return address */ 578 if (ctx->flags & SEEN_CALL) 579 emit_load_stack_reg(r_ra, r_sp, real_off, ctx); 580 581 /* Restore the sp and discard the scrach memory */ 582 emit_stack_offset(align_sp(offset), ctx); 583} 584 585static unsigned int get_stack_depth(struct jit_ctx *ctx) 586{ 587 int sp_off = 0; 588 589 590 /* How may s* regs do we need to preserved? */ 591 sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; 592 593 if (ctx->flags & SEEN_MEM) 594 sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ 595 596 if (ctx->flags & SEEN_CALL) 597 sp_off += SZREG; /* Space for our ra register */ 598 599 return sp_off; 600} 601 602static void build_prologue(struct jit_ctx *ctx) 603{ 604 int sp_off; 605 606 /* Calculate the total offset for the stack pointer */ 607 sp_off = get_stack_depth(ctx); 608 save_bpf_jit_regs(ctx, sp_off); 609 610 if (ctx->flags & SEEN_SKB) 611 emit_reg_move(r_skb, MIPS_R_A0, ctx); 612 613 if (ctx->flags & SEEN_SKB_DATA) { 614 /* Load packet length */ 615 emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), 616 ctx); 617 emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), 618 ctx); 619 /* Load the data pointer */ 620 emit_load_ptr(r_skb_data, r_skb, 621 offsetof(struct sk_buff, data), ctx); 622 /* Load the header length */ 623 emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); 624 } 625 626 if (ctx->flags & SEEN_X) 627 emit_jit_reg_move(r_X, r_zero, ctx); 628 629 /* Do not leak kernel data to userspace */ 630 if (bpf_needs_clear_a(&ctx->skf->insns[0])) 631 emit_jit_reg_move(r_A, r_zero, ctx); 632} 633 634static void build_epilogue(struct jit_ctx *ctx) 635{ 636 unsigned int sp_off; 637 638 /* Calculate the total offset for the stack pointer */ 639 640 sp_off = get_stack_depth(ctx); 641 restore_bpf_jit_regs(ctx, sp_off); 642 643 /* Return */ 644 emit_jr(r_ra, ctx); 645 emit_nop(ctx); 646} 647 648#define CHOOSE_LOAD_FUNC(K, func) \ 649 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ 650 func##_positive) 651 652static int build_body(struct jit_ctx *ctx) 653{ 654 const struct bpf_prog *prog = ctx->skf; 655 const struct sock_filter *inst; 656 unsigned int i, off, condt; 657 u32 k, b_off __maybe_unused; 658 u8 (*sk_load_func)(unsigned long *skb, int offset); 659 660 for (i = 0; i < prog->len; i++) { 661 u16 code; 662 663 inst = &(prog->insns[i]); 664 pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", 665 __func__, inst->code, inst->jt, inst->jf, inst->k); 666 k = inst->k; 667 code = bpf_anc_helper(inst); 668 669 if (ctx->target == NULL) 670 ctx->offsets[i] = ctx->idx * 4; 671 672 switch (code) { 673 case BPF_LD | BPF_IMM: 674 /* A <- k ==> li r_A, k */ 675 ctx->flags |= SEEN_A; 676 emit_load_imm(r_A, k, ctx); 677 break; 678 case BPF_LD | BPF_W | BPF_LEN: 679 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 680 /* A <- len ==> lw r_A, offset(skb) */ 681 ctx->flags |= SEEN_SKB | SEEN_A; 682 off = offsetof(struct sk_buff, len); 683 emit_load(r_A, r_skb, off, ctx); 684 break; 685 case BPF_LD | BPF_MEM: 686 /* A <- M[k] ==> lw r_A, offset(M) */ 687 ctx->flags |= SEEN_MEM | SEEN_A; 688 emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); 689 break; 690 case BPF_LD | BPF_W | BPF_ABS: 691 /* A <- P[k:4] */ 692 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); 693 goto load; 694 case BPF_LD | BPF_H | BPF_ABS: 695 /* A <- P[k:2] */ 696 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); 697 goto load; 698 case BPF_LD | BPF_B | BPF_ABS: 699 /* A <- P[k:1] */ 700 sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); 701load: 702 emit_load_imm(r_off, k, ctx); 703load_common: 704 ctx->flags |= SEEN_CALL | SEEN_OFF | 705 SEEN_SKB | SEEN_A | SEEN_SKB_DATA; 706 707 emit_load_func(r_s0, (ptr)sk_load_func, ctx); 708 emit_reg_move(MIPS_R_A0, r_skb, ctx); 709 emit_jalr(MIPS_R_RA, r_s0, ctx); 710 /* Load second argument to delay slot */ 711 emit_reg_move(MIPS_R_A1, r_off, ctx); 712 /* Check the error value */ 713 emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), 714 ctx); 715 /* Load return register on DS for failures */ 716 emit_reg_move(r_ret, r_zero, ctx); 717 /* Return with error */ 718 emit_b(b_imm(prog->len, ctx), ctx); 719 emit_nop(ctx); 720 break; 721 case BPF_LD | BPF_W | BPF_IND: 722 /* A <- P[X + k:4] */ 723 sk_load_func = sk_load_word; 724 goto load_ind; 725 case BPF_LD | BPF_H | BPF_IND: 726 /* A <- P[X + k:2] */ 727 sk_load_func = sk_load_half; 728 goto load_ind; 729 case BPF_LD | BPF_B | BPF_IND: 730 /* A <- P[X + k:1] */ 731 sk_load_func = sk_load_byte; 732load_ind: 733 ctx->flags |= SEEN_OFF | SEEN_X; 734 emit_addiu(r_off, r_X, k, ctx); 735 goto load_common; 736 case BPF_LDX | BPF_IMM: 737 /* X <- k */ 738 ctx->flags |= SEEN_X; 739 emit_load_imm(r_X, k, ctx); 740 break; 741 case BPF_LDX | BPF_MEM: 742 /* X <- M[k] */ 743 ctx->flags |= SEEN_X | SEEN_MEM; 744 emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); 745 break; 746 case BPF_LDX | BPF_W | BPF_LEN: 747 /* X <- len */ 748 ctx->flags |= SEEN_X | SEEN_SKB; 749 off = offsetof(struct sk_buff, len); 750 emit_load(r_X, r_skb, off, ctx); 751 break; 752 case BPF_LDX | BPF_B | BPF_MSH: 753 /* X <- 4 * (P[k:1] & 0xf) */ 754 ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; 755 /* Load offset to a1 */ 756 emit_load_func(r_s0, (ptr)sk_load_byte, ctx); 757 /* 758 * This may emit two instructions so it may not fit 759 * in the delay slot. So use a0 in the delay slot. 760 */ 761 emit_load_imm(MIPS_R_A1, k, ctx); 762 emit_jalr(MIPS_R_RA, r_s0, ctx); 763 emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ 764 /* Check the error value */ 765 emit_bcond(MIPS_COND_NE, r_ret, 0, 766 b_imm(prog->len, ctx), ctx); 767 emit_reg_move(r_ret, r_zero, ctx); 768 /* We are good */ 769 /* X <- P[1:K] & 0xf */ 770 emit_andi(r_X, r_A, 0xf, ctx); 771 /* X << 2 */ 772 emit_b(b_imm(i + 1, ctx), ctx); 773 emit_sll(r_X, r_X, 2, ctx); /* delay slot */ 774 break; 775 case BPF_ST: 776 /* M[k] <- A */ 777 ctx->flags |= SEEN_MEM | SEEN_A; 778 emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); 779 break; 780 case BPF_STX: 781 /* M[k] <- X */ 782 ctx->flags |= SEEN_MEM | SEEN_X; 783 emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); 784 break; 785 case BPF_ALU | BPF_ADD | BPF_K: 786 /* A += K */ 787 ctx->flags |= SEEN_A; 788 emit_addiu(r_A, r_A, k, ctx); 789 break; 790 case BPF_ALU | BPF_ADD | BPF_X: 791 /* A += X */ 792 ctx->flags |= SEEN_A | SEEN_X; 793 emit_addu(r_A, r_A, r_X, ctx); 794 break; 795 case BPF_ALU | BPF_SUB | BPF_K: 796 /* A -= K */ 797 ctx->flags |= SEEN_A; 798 emit_addiu(r_A, r_A, -k, ctx); 799 break; 800 case BPF_ALU | BPF_SUB | BPF_X: 801 /* A -= X */ 802 ctx->flags |= SEEN_A | SEEN_X; 803 emit_subu(r_A, r_A, r_X, ctx); 804 break; 805 case BPF_ALU | BPF_MUL | BPF_K: 806 /* A *= K */ 807 /* Load K to scratch register before MUL */ 808 ctx->flags |= SEEN_A; 809 emit_load_imm(r_s0, k, ctx); 810 emit_mul(r_A, r_A, r_s0, ctx); 811 break; 812 case BPF_ALU | BPF_MUL | BPF_X: 813 /* A *= X */ 814 ctx->flags |= SEEN_A | SEEN_X; 815 emit_mul(r_A, r_A, r_X, ctx); 816 break; 817 case BPF_ALU | BPF_DIV | BPF_K: 818 /* A /= k */ 819 if (k == 1) 820 break; 821 if (optimize_div(&k)) { 822 ctx->flags |= SEEN_A; 823 emit_srl(r_A, r_A, k, ctx); 824 break; 825 } 826 ctx->flags |= SEEN_A; 827 emit_load_imm(r_s0, k, ctx); 828 emit_div(r_A, r_s0, ctx); 829 break; 830 case BPF_ALU | BPF_MOD | BPF_K: 831 /* A %= k */ 832 if (k == 1) { 833 ctx->flags |= SEEN_A; 834 emit_jit_reg_move(r_A, r_zero, ctx); 835 } else { 836 ctx->flags |= SEEN_A; 837 emit_load_imm(r_s0, k, ctx); 838 emit_mod(r_A, r_s0, ctx); 839 } 840 break; 841 case BPF_ALU | BPF_DIV | BPF_X: 842 /* A /= X */ 843 ctx->flags |= SEEN_X | SEEN_A; 844 /* Check if r_X is zero */ 845 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 846 b_imm(prog->len, ctx), ctx); 847 emit_load_imm(r_ret, 0, ctx); /* delay slot */ 848 emit_div(r_A, r_X, ctx); 849 break; 850 case BPF_ALU | BPF_MOD | BPF_X: 851 /* A %= X */ 852 ctx->flags |= SEEN_X | SEEN_A; 853 /* Check if r_X is zero */ 854 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 855 b_imm(prog->len, ctx), ctx); 856 emit_load_imm(r_ret, 0, ctx); /* delay slot */ 857 emit_mod(r_A, r_X, ctx); 858 break; 859 case BPF_ALU | BPF_OR | BPF_K: 860 /* A |= K */ 861 ctx->flags |= SEEN_A; 862 emit_ori(r_A, r_A, k, ctx); 863 break; 864 case BPF_ALU | BPF_OR | BPF_X: 865 /* A |= X */ 866 ctx->flags |= SEEN_A; 867 emit_ori(r_A, r_A, r_X, ctx); 868 break; 869 case BPF_ALU | BPF_XOR | BPF_K: 870 /* A ^= k */ 871 ctx->flags |= SEEN_A; 872 emit_xori(r_A, r_A, k, ctx); 873 break; 874 case BPF_ANC | SKF_AD_ALU_XOR_X: 875 case BPF_ALU | BPF_XOR | BPF_X: 876 /* A ^= X */ 877 ctx->flags |= SEEN_A; 878 emit_xor(r_A, r_A, r_X, ctx); 879 break; 880 case BPF_ALU | BPF_AND | BPF_K: 881 /* A &= K */ 882 ctx->flags |= SEEN_A; 883 emit_andi(r_A, r_A, k, ctx); 884 break; 885 case BPF_ALU | BPF_AND | BPF_X: 886 /* A &= X */ 887 ctx->flags |= SEEN_A | SEEN_X; 888 emit_and(r_A, r_A, r_X, ctx); 889 break; 890 case BPF_ALU | BPF_LSH | BPF_K: 891 /* A <<= K */ 892 ctx->flags |= SEEN_A; 893 emit_sll(r_A, r_A, k, ctx); 894 break; 895 case BPF_ALU | BPF_LSH | BPF_X: 896 /* A <<= X */ 897 ctx->flags |= SEEN_A | SEEN_X; 898 emit_sllv(r_A, r_A, r_X, ctx); 899 break; 900 case BPF_ALU | BPF_RSH | BPF_K: 901 /* A >>= K */ 902 ctx->flags |= SEEN_A; 903 emit_srl(r_A, r_A, k, ctx); 904 break; 905 case BPF_ALU | BPF_RSH | BPF_X: 906 ctx->flags |= SEEN_A | SEEN_X; 907 emit_srlv(r_A, r_A, r_X, ctx); 908 break; 909 case BPF_ALU | BPF_NEG: 910 /* A = -A */ 911 ctx->flags |= SEEN_A; 912 emit_neg(r_A, ctx); 913 break; 914 case BPF_JMP | BPF_JA: 915 /* pc += K */ 916 emit_b(b_imm(i + k + 1, ctx), ctx); 917 emit_nop(ctx); 918 break; 919 case BPF_JMP | BPF_JEQ | BPF_K: 920 /* pc += ( A == K ) ? pc->jt : pc->jf */ 921 condt = MIPS_COND_EQ | MIPS_COND_K; 922 goto jmp_cmp; 923 case BPF_JMP | BPF_JEQ | BPF_X: 924 ctx->flags |= SEEN_X; 925 /* pc += ( A == X ) ? pc->jt : pc->jf */ 926 condt = MIPS_COND_EQ | MIPS_COND_X; 927 goto jmp_cmp; 928 case BPF_JMP | BPF_JGE | BPF_K: 929 /* pc += ( A >= K ) ? pc->jt : pc->jf */ 930 condt = MIPS_COND_GE | MIPS_COND_K; 931 goto jmp_cmp; 932 case BPF_JMP | BPF_JGE | BPF_X: 933 ctx->flags |= SEEN_X; 934 /* pc += ( A >= X ) ? pc->jt : pc->jf */ 935 condt = MIPS_COND_GE | MIPS_COND_X; 936 goto jmp_cmp; 937 case BPF_JMP | BPF_JGT | BPF_K: 938 /* pc += ( A > K ) ? pc->jt : pc->jf */ 939 condt = MIPS_COND_GT | MIPS_COND_K; 940 goto jmp_cmp; 941 case BPF_JMP | BPF_JGT | BPF_X: 942 ctx->flags |= SEEN_X; 943 /* pc += ( A > X ) ? pc->jt : pc->jf */ 944 condt = MIPS_COND_GT | MIPS_COND_X; 945jmp_cmp: 946 /* Greater or Equal */ 947 if ((condt & MIPS_COND_GE) || 948 (condt & MIPS_COND_GT)) { 949 if (condt & MIPS_COND_K) { /* K */ 950 ctx->flags |= SEEN_A; 951 emit_sltiu(r_s0, r_A, k, ctx); 952 } else { /* X */ 953 ctx->flags |= SEEN_A | 954 SEEN_X; 955 emit_sltu(r_s0, r_A, r_X, ctx); 956 } 957 /* A < (K|X) ? r_scrach = 1 */ 958 b_off = b_imm(i + inst->jf + 1, ctx); 959 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, 960 ctx); 961 emit_nop(ctx); 962 /* A > (K|X) ? scratch = 0 */ 963 if (condt & MIPS_COND_GT) { 964 /* Checking for equality */ 965 ctx->flags |= SEEN_A | SEEN_X; 966 if (condt & MIPS_COND_K) 967 emit_load_imm(r_s0, k, ctx); 968 else 969 emit_jit_reg_move(r_s0, r_X, 970 ctx); 971 b_off = b_imm(i + inst->jf + 1, ctx); 972 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 973 b_off, ctx); 974 emit_nop(ctx); 975 /* Finally, A > K|X */ 976 b_off = b_imm(i + inst->jt + 1, ctx); 977 emit_b(b_off, ctx); 978 emit_nop(ctx); 979 } else { 980 /* A >= (K|X) so jump */ 981 b_off = b_imm(i + inst->jt + 1, ctx); 982 emit_b(b_off, ctx); 983 emit_nop(ctx); 984 } 985 } else { 986 /* A == K|X */ 987 if (condt & MIPS_COND_K) { /* K */ 988 ctx->flags |= SEEN_A; 989 emit_load_imm(r_s0, k, ctx); 990 /* jump true */ 991 b_off = b_imm(i + inst->jt + 1, ctx); 992 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 993 b_off, ctx); 994 emit_nop(ctx); 995 /* jump false */ 996 b_off = b_imm(i + inst->jf + 1, 997 ctx); 998 emit_bcond(MIPS_COND_NE, r_A, r_s0, 999 b_off, ctx); 1000 emit_nop(ctx); 1001 } else { /* X */ 1002 /* jump true */ 1003 ctx->flags |= SEEN_A | SEEN_X; 1004 b_off = b_imm(i + inst->jt + 1, 1005 ctx); 1006 emit_bcond(MIPS_COND_EQ, r_A, r_X, 1007 b_off, ctx); 1008 emit_nop(ctx); 1009 /* jump false */ 1010 b_off = b_imm(i + inst->jf + 1, ctx); 1011 emit_bcond(MIPS_COND_NE, r_A, r_X, 1012 b_off, ctx); 1013 emit_nop(ctx); 1014 } 1015 } 1016 break; 1017 case BPF_JMP | BPF_JSET | BPF_K: 1018 ctx->flags |= SEEN_A; 1019 /* pc += (A & K) ? pc -> jt : pc -> jf */ 1020 emit_load_imm(r_s1, k, ctx); 1021 emit_and(r_s0, r_A, r_s1, ctx); 1022 /* jump true */ 1023 b_off = b_imm(i + inst->jt + 1, ctx); 1024 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1025 emit_nop(ctx); 1026 /* jump false */ 1027 b_off = b_imm(i + inst->jf + 1, ctx); 1028 emit_b(b_off, ctx); 1029 emit_nop(ctx); 1030 break; 1031 case BPF_JMP | BPF_JSET | BPF_X: 1032 ctx->flags |= SEEN_X | SEEN_A; 1033 /* pc += (A & X) ? pc -> jt : pc -> jf */ 1034 emit_and(r_s0, r_A, r_X, ctx); 1035 /* jump true */ 1036 b_off = b_imm(i + inst->jt + 1, ctx); 1037 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1038 emit_nop(ctx); 1039 /* jump false */ 1040 b_off = b_imm(i + inst->jf + 1, ctx); 1041 emit_b(b_off, ctx); 1042 emit_nop(ctx); 1043 break; 1044 case BPF_RET | BPF_A: 1045 ctx->flags |= SEEN_A; 1046 if (i != prog->len - 1) 1047 /* 1048 * If this is not the last instruction 1049 * then jump to the epilogue 1050 */ 1051 emit_b(b_imm(prog->len, ctx), ctx); 1052 emit_reg_move(r_ret, r_A, ctx); /* delay slot */ 1053 break; 1054 case BPF_RET | BPF_K: 1055 /* 1056 * It can emit two instructions so it does not fit on 1057 * the delay slot. 1058 */ 1059 emit_load_imm(r_ret, k, ctx); 1060 if (i != prog->len - 1) { 1061 /* 1062 * If this is not the last instruction 1063 * then jump to the epilogue 1064 */ 1065 emit_b(b_imm(prog->len, ctx), ctx); 1066 emit_nop(ctx); 1067 } 1068 break; 1069 case BPF_MISC | BPF_TAX: 1070 /* X = A */ 1071 ctx->flags |= SEEN_X | SEEN_A; 1072 emit_jit_reg_move(r_X, r_A, ctx); 1073 break; 1074 case BPF_MISC | BPF_TXA: 1075 /* A = X */ 1076 ctx->flags |= SEEN_A | SEEN_X; 1077 emit_jit_reg_move(r_A, r_X, ctx); 1078 break; 1079 /* AUX */ 1080 case BPF_ANC | SKF_AD_PROTOCOL: 1081 /* A = ntohs(skb->protocol */ 1082 ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; 1083 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1084 protocol) != 2); 1085 off = offsetof(struct sk_buff, protocol); 1086 emit_half_load(r_A, r_skb, off, ctx); 1087#ifdef CONFIG_CPU_LITTLE_ENDIAN 1088 /* This needs little endian fixup */ 1089 if (cpu_has_wsbh) { 1090 /* R2 and later have the wsbh instruction */ 1091 emit_wsbh(r_A, r_A, ctx); 1092 } else { 1093 /* Get first byte */ 1094 emit_andi(r_tmp_imm, r_A, 0xff, ctx); 1095 /* Shift it */ 1096 emit_sll(r_tmp, r_tmp_imm, 8, ctx); 1097 /* Get second byte */ 1098 emit_srl(r_tmp_imm, r_A, 8, ctx); 1099 emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); 1100 /* Put everyting together in r_A */ 1101 emit_or(r_A, r_tmp, r_tmp_imm, ctx); 1102 } 1103#endif 1104 break; 1105 case BPF_ANC | SKF_AD_CPU: 1106 ctx->flags |= SEEN_A | SEEN_OFF; 1107 /* A = current_thread_info()->cpu */ 1108 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, 1109 cpu) != 4); 1110 off = offsetof(struct thread_info, cpu); 1111 /* $28/gp points to the thread_info struct */ 1112 emit_load(r_A, 28, off, ctx); 1113 break; 1114 case BPF_ANC | SKF_AD_IFINDEX: 1115 /* A = skb->dev->ifindex */ 1116 ctx->flags |= SEEN_SKB | SEEN_A; 1117 off = offsetof(struct sk_buff, dev); 1118 /* Load *dev pointer */ 1119 emit_load_ptr(r_s0, r_skb, off, ctx); 1120 /* error (0) in the delay slot */ 1121 emit_bcond(MIPS_COND_EQ, r_s0, r_zero, 1122 b_imm(prog->len, ctx), ctx); 1123 emit_reg_move(r_ret, r_zero, ctx); 1124 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 1125 ifindex) != 4); 1126 off = offsetof(struct net_device, ifindex); 1127 emit_load(r_A, r_s0, off, ctx); 1128 break; 1129 case BPF_ANC | SKF_AD_MARK: 1130 ctx->flags |= SEEN_SKB | SEEN_A; 1131 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 1132 off = offsetof(struct sk_buff, mark); 1133 emit_load(r_A, r_skb, off, ctx); 1134 break; 1135 case BPF_ANC | SKF_AD_RXHASH: 1136 ctx->flags |= SEEN_SKB | SEEN_A; 1137 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 1138 off = offsetof(struct sk_buff, hash); 1139 emit_load(r_A, r_skb, off, ctx); 1140 break; 1141 case BPF_ANC | SKF_AD_VLAN_TAG: 1142 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 1143 ctx->flags |= SEEN_SKB | SEEN_A; 1144 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1145 vlan_tci) != 2); 1146 off = offsetof(struct sk_buff, vlan_tci); 1147 emit_half_load(r_s0, r_skb, off, ctx); 1148 if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { 1149 emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx); 1150 } else { 1151 emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx); 1152 /* return 1 if present */ 1153 emit_sltu(r_A, r_zero, r_A, ctx); 1154 } 1155 break; 1156 case BPF_ANC | SKF_AD_PKTTYPE: 1157 ctx->flags |= SEEN_SKB; 1158 1159 emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); 1160 /* Keep only the last 3 bits */ 1161 emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); 1162#ifdef __BIG_ENDIAN_BITFIELD 1163 /* Get the actual packet type to the lower 3 bits */ 1164 emit_srl(r_A, r_A, 5, ctx); 1165#endif 1166 break; 1167 case BPF_ANC | SKF_AD_QUEUE: 1168 ctx->flags |= SEEN_SKB | SEEN_A; 1169 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1170 queue_mapping) != 2); 1171 BUILD_BUG_ON(offsetof(struct sk_buff, 1172 queue_mapping) > 0xff); 1173 off = offsetof(struct sk_buff, queue_mapping); 1174 emit_half_load(r_A, r_skb, off, ctx); 1175 break; 1176 default: 1177 pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, 1178 inst->code); 1179 return -1; 1180 } 1181 } 1182 1183 /* compute offsets only during the first pass */ 1184 if (ctx->target == NULL) 1185 ctx->offsets[i] = ctx->idx * 4; 1186 1187 return 0; 1188} 1189 1190int bpf_jit_enable __read_mostly; 1191 1192void bpf_jit_compile(struct bpf_prog *fp) 1193{ 1194 struct jit_ctx ctx; 1195 unsigned int alloc_size, tmp_idx; 1196 1197 if (!bpf_jit_enable) 1198 return; 1199 1200 memset(&ctx, 0, sizeof(ctx)); 1201 1202 ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL); 1203 if (ctx.offsets == NULL) 1204 return; 1205 1206 ctx.skf = fp; 1207 1208 if (build_body(&ctx)) 1209 goto out; 1210 1211 tmp_idx = ctx.idx; 1212 build_prologue(&ctx); 1213 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1214 /* just to complete the ctx.idx count */ 1215 build_epilogue(&ctx); 1216 1217 alloc_size = 4 * ctx.idx; 1218 ctx.target = module_alloc(alloc_size); 1219 if (ctx.target == NULL) 1220 goto out; 1221 1222 /* Clean it */ 1223 memset(ctx.target, 0, alloc_size); 1224 1225 ctx.idx = 0; 1226 1227 /* Generate the actual JIT code */ 1228 build_prologue(&ctx); 1229 build_body(&ctx); 1230 build_epilogue(&ctx); 1231 1232 /* Update the icache */ 1233 flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); 1234 1235 if (bpf_jit_enable > 1) 1236 /* Dump JIT code */ 1237 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 1238 1239 fp->bpf_func = (void *)ctx.target; 1240 fp->jited = 1; 1241 1242out: 1243 kfree(ctx.offsets); 1244} 1245 1246void bpf_jit_free(struct bpf_prog *fp) 1247{ 1248 if (fp->jited) 1249 module_memfree(fp->bpf_func); 1250 1251 bpf_prog_unlock_free(fp); 1252} 1253