1/* 2 * Just-In-Time compiler for BPF filters on MIPS 3 * 4 * Copyright (c) 2014 Imagination Technologies Ltd. 5 * Author: Markos Chandras <markos.chandras@imgtec.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 */ 11 12#include <linux/bitops.h> 13#include <linux/compiler.h> 14#include <linux/errno.h> 15#include <linux/filter.h> 16#include <linux/if_vlan.h> 17#include <linux/kconfig.h> 18#include <linux/moduleloader.h> 19#include <linux/netdevice.h> 20#include <linux/string.h> 21#include <linux/slab.h> 22#include <linux/types.h> 23#include <asm/bitops.h> 24#include <asm/cacheflush.h> 25#include <asm/cpu-features.h> 26#include <asm/uasm.h> 27 28#include "bpf_jit.h" 29 30/* ABI 31 * 32 * s0 1st scratch register 33 * s1 2nd scratch register 34 * s2 offset register 35 * s3 BPF register A 36 * s4 BPF register X 37 * s5 *skb 38 * s6 *scratch memory 39 * 40 * On entry (*bpf_func)(*skb, *filter) 41 * a0 = MIPS_R_A0 = skb; 42 * a1 = MIPS_R_A1 = filter; 43 * 44 * Stack 45 * ... 46 * M[15] 47 * M[14] 48 * M[13] 49 * ... 50 * M[0] <-- r_M 51 * saved reg k-1 52 * saved reg k-2 53 * ... 54 * saved reg 0 <-- r_sp 55 * <no argument area> 56 * 57 * Packet layout 58 * 59 * <--------------------- len ------------------------> 60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> 61 * ---------------------------------------------------- 62 * | skb->data | 63 * ---------------------------------------------------- 64 */ 65 66#define RSIZE (sizeof(unsigned long)) 67#define ptr typeof(unsigned long) 68 69/* ABI specific return values */ 70#ifdef CONFIG_32BIT /* O32 */ 71#ifdef CONFIG_CPU_LITTLE_ENDIAN 72#define r_err MIPS_R_V1 73#define r_val MIPS_R_V0 74#else /* CONFIG_CPU_LITTLE_ENDIAN */ 75#define r_err MIPS_R_V0 76#define r_val MIPS_R_V1 77#endif 78#else /* N64 */ 79#define r_err MIPS_R_V0 80#define r_val MIPS_R_V0 81#endif 82 83#define r_ret MIPS_R_V0 84 85/* 86 * Use 2 scratch registers to avoid pipeline interlocks. 87 * There is no overhead during epilogue and prologue since 88 * any of the $s0-$s6 registers will only be preserved if 89 * they are going to actually be used. 90 */ 91#define r_s0 MIPS_R_S0 /* scratch reg 1 */ 92#define r_s1 MIPS_R_S1 /* scratch reg 2 */ 93#define r_off MIPS_R_S2 94#define r_A MIPS_R_S3 95#define r_X MIPS_R_S4 96#define r_skb MIPS_R_S5 97#define r_M MIPS_R_S6 98#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ 99#define r_tmp MIPS_R_T7 /* No need to preserve this */ 100#define r_zero MIPS_R_ZERO 101#define r_sp MIPS_R_SP 102#define r_ra MIPS_R_RA 103 104#define SCRATCH_OFF(k) (4 * (k)) 105 106/* JIT flags */ 107#define SEEN_CALL (1 << BPF_MEMWORDS) 108#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) 109#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) 110#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) 111#define SEEN_S0 SEEN_SREG(0) 112#define SEEN_S1 SEEN_SREG(1) 113#define SEEN_OFF SEEN_SREG(2) 114#define SEEN_A SEEN_SREG(3) 115#define SEEN_X SEEN_SREG(4) 116#define SEEN_SKB SEEN_SREG(5) 117#define SEEN_MEM SEEN_SREG(6) 118 119/* Arguments used by JIT */ 120#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ 121 122#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ 123 124/** 125 * struct jit_ctx - JIT context 126 * @skf: The sk_filter 127 * @prologue_bytes: Number of bytes for prologue 128 * @idx: Instruction index 129 * @flags: JIT flags 130 * @offsets: Instruction offsets 131 * @target: Memory location for the compiled filter 132 */ 133struct jit_ctx { 134 const struct bpf_prog *skf; 135 unsigned int prologue_bytes; 136 u32 idx; 137 u32 flags; 138 u32 *offsets; 139 u32 *target; 140}; 141 142 143static inline int optimize_div(u32 *k) 144{ 145 /* power of 2 divides can be implemented with right shift */ 146 if (!(*k & (*k-1))) { 147 *k = ilog2(*k); 148 return 1; 149 } 150 151 return 0; 152} 153 154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); 155 156/* Simply emit the instruction if the JIT memory space has been allocated */ 157#define emit_instr(ctx, func, ...) \ 158do { \ 159 if ((ctx)->target != NULL) { \ 160 u32 *p = &(ctx)->target[ctx->idx]; \ 161 uasm_i_##func(&p, ##__VA_ARGS__); \ 162 } \ 163 (ctx)->idx++; \ 164} while (0) 165 166/* 167 * Similar to emit_instr but it must be used when we need to emit 168 * 32-bit or 64-bit instructions 169 */ 170#define emit_long_instr(ctx, func, ...) \ 171do { \ 172 if ((ctx)->target != NULL) { \ 173 u32 *p = &(ctx)->target[ctx->idx]; \ 174 UASM_i_##func(&p, ##__VA_ARGS__); \ 175 } \ 176 (ctx)->idx++; \ 177} while (0) 178 179/* Determine if immediate is within the 16-bit signed range */ 180static inline bool is_range16(s32 imm) 181{ 182 return !(imm >= SBIT(15) || imm < -SBIT(15)); 183} 184 185static inline void emit_addu(unsigned int dst, unsigned int src1, 186 unsigned int src2, struct jit_ctx *ctx) 187{ 188 emit_instr(ctx, addu, dst, src1, src2); 189} 190 191static inline void emit_nop(struct jit_ctx *ctx) 192{ 193 emit_instr(ctx, nop); 194} 195 196/* Load a u32 immediate to a register */ 197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) 198{ 199 if (ctx->target != NULL) { 200 /* addiu can only handle s16 */ 201 if (!is_range16(imm)) { 202 u32 *p = &ctx->target[ctx->idx]; 203 uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); 204 p = &ctx->target[ctx->idx + 1]; 205 uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); 206 } else { 207 u32 *p = &ctx->target[ctx->idx]; 208 uasm_i_addiu(&p, dst, r_zero, imm); 209 } 210 } 211 ctx->idx++; 212 213 if (!is_range16(imm)) 214 ctx->idx++; 215} 216 217static inline void emit_or(unsigned int dst, unsigned int src1, 218 unsigned int src2, struct jit_ctx *ctx) 219{ 220 emit_instr(ctx, or, dst, src1, src2); 221} 222 223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, 224 struct jit_ctx *ctx) 225{ 226 if (imm >= BIT(16)) { 227 emit_load_imm(r_tmp, imm, ctx); 228 emit_or(dst, src, r_tmp, ctx); 229 } else { 230 emit_instr(ctx, ori, dst, src, imm); 231 } 232} 233 234static inline void emit_daddiu(unsigned int dst, unsigned int src, 235 int imm, struct jit_ctx *ctx) 236{ 237 /* 238 * Only used for stack, so the imm is relatively small 239 * and it fits in 15-bits 240 */ 241 emit_instr(ctx, daddiu, dst, src, imm); 242} 243 244static inline void emit_addiu(unsigned int dst, unsigned int src, 245 u32 imm, struct jit_ctx *ctx) 246{ 247 if (!is_range16(imm)) { 248 emit_load_imm(r_tmp, imm, ctx); 249 emit_addu(dst, r_tmp, src, ctx); 250 } else { 251 emit_instr(ctx, addiu, dst, src, imm); 252 } 253} 254 255static inline void emit_and(unsigned int dst, unsigned int src1, 256 unsigned int src2, struct jit_ctx *ctx) 257{ 258 emit_instr(ctx, and, dst, src1, src2); 259} 260 261static inline void emit_andi(unsigned int dst, unsigned int src, 262 u32 imm, struct jit_ctx *ctx) 263{ 264 /* If imm does not fit in u16 then load it to register */ 265 if (imm >= BIT(16)) { 266 emit_load_imm(r_tmp, imm, ctx); 267 emit_and(dst, src, r_tmp, ctx); 268 } else { 269 emit_instr(ctx, andi, dst, src, imm); 270 } 271} 272 273static inline void emit_xor(unsigned int dst, unsigned int src1, 274 unsigned int src2, struct jit_ctx *ctx) 275{ 276 emit_instr(ctx, xor, dst, src1, src2); 277} 278 279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) 280{ 281 /* If imm does not fit in u16 then load it to register */ 282 if (imm >= BIT(16)) { 283 emit_load_imm(r_tmp, imm, ctx); 284 emit_xor(dst, src, r_tmp, ctx); 285 } else { 286 emit_instr(ctx, xori, dst, src, imm); 287 } 288} 289 290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) 291{ 292 emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); 293} 294 295static inline void emit_subu(unsigned int dst, unsigned int src1, 296 unsigned int src2, struct jit_ctx *ctx) 297{ 298 emit_instr(ctx, subu, dst, src1, src2); 299} 300 301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) 302{ 303 emit_subu(reg, r_zero, reg, ctx); 304} 305 306static inline void emit_sllv(unsigned int dst, unsigned int src, 307 unsigned int sa, struct jit_ctx *ctx) 308{ 309 emit_instr(ctx, sllv, dst, src, sa); 310} 311 312static inline void emit_sll(unsigned int dst, unsigned int src, 313 unsigned int sa, struct jit_ctx *ctx) 314{ 315 /* sa is 5-bits long */ 316 if (sa >= BIT(5)) 317 /* Shifting >= 32 results in zero */ 318 emit_jit_reg_move(dst, r_zero, ctx); 319 else 320 emit_instr(ctx, sll, dst, src, sa); 321} 322 323static inline void emit_srlv(unsigned int dst, unsigned int src, 324 unsigned int sa, struct jit_ctx *ctx) 325{ 326 emit_instr(ctx, srlv, dst, src, sa); 327} 328 329static inline void emit_srl(unsigned int dst, unsigned int src, 330 unsigned int sa, struct jit_ctx *ctx) 331{ 332 /* sa is 5-bits long */ 333 if (sa >= BIT(5)) 334 /* Shifting >= 32 results in zero */ 335 emit_jit_reg_move(dst, r_zero, ctx); 336 else 337 emit_instr(ctx, srl, dst, src, sa); 338} 339 340static inline void emit_slt(unsigned int dst, unsigned int src1, 341 unsigned int src2, struct jit_ctx *ctx) 342{ 343 emit_instr(ctx, slt, dst, src1, src2); 344} 345 346static inline void emit_sltu(unsigned int dst, unsigned int src1, 347 unsigned int src2, struct jit_ctx *ctx) 348{ 349 emit_instr(ctx, sltu, dst, src1, src2); 350} 351 352static inline void emit_sltiu(unsigned dst, unsigned int src, 353 unsigned int imm, struct jit_ctx *ctx) 354{ 355 /* 16 bit immediate */ 356 if (!is_range16((s32)imm)) { 357 emit_load_imm(r_tmp, imm, ctx); 358 emit_sltu(dst, src, r_tmp, ctx); 359 } else { 360 emit_instr(ctx, sltiu, dst, src, imm); 361 } 362 363} 364 365/* Store register on the stack */ 366static inline void emit_store_stack_reg(ptr reg, ptr base, 367 unsigned int offset, 368 struct jit_ctx *ctx) 369{ 370 emit_long_instr(ctx, SW, reg, offset, base); 371} 372 373static inline void emit_store(ptr reg, ptr base, unsigned int offset, 374 struct jit_ctx *ctx) 375{ 376 emit_instr(ctx, sw, reg, offset, base); 377} 378 379static inline void emit_load_stack_reg(ptr reg, ptr base, 380 unsigned int offset, 381 struct jit_ctx *ctx) 382{ 383 emit_long_instr(ctx, LW, reg, offset, base); 384} 385 386static inline void emit_load(unsigned int reg, unsigned int base, 387 unsigned int offset, struct jit_ctx *ctx) 388{ 389 emit_instr(ctx, lw, reg, offset, base); 390} 391 392static inline void emit_load_byte(unsigned int reg, unsigned int base, 393 unsigned int offset, struct jit_ctx *ctx) 394{ 395 emit_instr(ctx, lb, reg, offset, base); 396} 397 398static inline void emit_half_load(unsigned int reg, unsigned int base, 399 unsigned int offset, struct jit_ctx *ctx) 400{ 401 emit_instr(ctx, lh, reg, offset, base); 402} 403 404static inline void emit_mul(unsigned int dst, unsigned int src1, 405 unsigned int src2, struct jit_ctx *ctx) 406{ 407 emit_instr(ctx, mul, dst, src1, src2); 408} 409 410static inline void emit_div(unsigned int dst, unsigned int src, 411 struct jit_ctx *ctx) 412{ 413 if (ctx->target != NULL) { 414 u32 *p = &ctx->target[ctx->idx]; 415 uasm_i_divu(&p, dst, src); 416 p = &ctx->target[ctx->idx + 1]; 417 uasm_i_mflo(&p, dst); 418 } 419 ctx->idx += 2; /* 2 insts */ 420} 421 422static inline void emit_mod(unsigned int dst, unsigned int src, 423 struct jit_ctx *ctx) 424{ 425 if (ctx->target != NULL) { 426 u32 *p = &ctx->target[ctx->idx]; 427 uasm_i_divu(&p, dst, src); 428 p = &ctx->target[ctx->idx + 1]; 429 uasm_i_mfhi(&p, dst); 430 } 431 ctx->idx += 2; /* 2 insts */ 432} 433 434static inline void emit_dsll(unsigned int dst, unsigned int src, 435 unsigned int sa, struct jit_ctx *ctx) 436{ 437 emit_instr(ctx, dsll, dst, src, sa); 438} 439 440static inline void emit_dsrl32(unsigned int dst, unsigned int src, 441 unsigned int sa, struct jit_ctx *ctx) 442{ 443 emit_instr(ctx, dsrl32, dst, src, sa); 444} 445 446static inline void emit_wsbh(unsigned int dst, unsigned int src, 447 struct jit_ctx *ctx) 448{ 449 emit_instr(ctx, wsbh, dst, src); 450} 451 452/* load pointer to register */ 453static inline void emit_load_ptr(unsigned int dst, unsigned int src, 454 int imm, struct jit_ctx *ctx) 455{ 456 /* src contains the base addr of the 32/64-pointer */ 457 emit_long_instr(ctx, LW, dst, imm, src); 458} 459 460/* load a function pointer to register */ 461static inline void emit_load_func(unsigned int reg, ptr imm, 462 struct jit_ctx *ctx) 463{ 464 if (config_enabled(CONFIG_64BIT)) { 465 /* At this point imm is always 64-bit */ 466 emit_load_imm(r_tmp, (u64)imm >> 32, ctx); 467 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 468 emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); 469 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 470 emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); 471 } else { 472 emit_load_imm(reg, imm, ctx); 473 } 474} 475 476/* Move to real MIPS register */ 477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 478{ 479 emit_long_instr(ctx, ADDU, dst, src, r_zero); 480} 481 482/* Move to JIT (32-bit) register */ 483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 484{ 485 emit_addu(dst, src, r_zero, ctx); 486} 487 488/* Compute the immediate value for PC-relative branches. */ 489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) 490{ 491 if (ctx->target == NULL) 492 return 0; 493 494 /* 495 * We want a pc-relative branch. We only do forward branches 496 * so tgt is always after pc. tgt is the instruction offset 497 * we want to jump to. 498 499 * Branch on MIPS: 500 * I: target_offset <- sign_extend(offset) 501 * I+1: PC += target_offset (delay slot) 502 * 503 * ctx->idx currently points to the branch instruction 504 * but the offset is added to the delay slot so we need 505 * to subtract 4. 506 */ 507 return ctx->offsets[tgt] - 508 (ctx->idx * 4 - ctx->prologue_bytes) - 4; 509} 510 511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, 512 unsigned int imm, struct jit_ctx *ctx) 513{ 514 if (ctx->target != NULL) { 515 u32 *p = &ctx->target[ctx->idx]; 516 517 switch (cond) { 518 case MIPS_COND_EQ: 519 uasm_i_beq(&p, reg1, reg2, imm); 520 break; 521 case MIPS_COND_NE: 522 uasm_i_bne(&p, reg1, reg2, imm); 523 break; 524 case MIPS_COND_ALL: 525 uasm_i_b(&p, imm); 526 break; 527 default: 528 pr_warn("%s: Unhandled branch conditional: %d\n", 529 __func__, cond); 530 } 531 } 532 ctx->idx++; 533} 534 535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) 536{ 537 emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); 538} 539 540static inline void emit_jalr(unsigned int link, unsigned int reg, 541 struct jit_ctx *ctx) 542{ 543 emit_instr(ctx, jalr, link, reg); 544} 545 546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) 547{ 548 emit_instr(ctx, jr, reg); 549} 550 551static inline u16 align_sp(unsigned int num) 552{ 553 /* Double word alignment for 32-bit, quadword for 64-bit */ 554 unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8; 555 num = (num + (align - 1)) & -align; 556 return num; 557} 558 559static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) 560{ 561 int i = 0, real_off = 0; 562 u32 sflags, tmp_flags; 563 564 /* Adjust the stack pointer */ 565 emit_stack_offset(-align_sp(offset), ctx); 566 567 if (ctx->flags & SEEN_CALL) { 568 /* Argument save area */ 569 if (config_enabled(CONFIG_64BIT)) 570 /* Bottom of current frame */ 571 real_off = align_sp(offset) - RSIZE; 572 else 573 /* Top of previous frame */ 574 real_off = align_sp(offset) + RSIZE; 575 emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); 576 emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); 577 578 real_off = 0; 579 } 580 581 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 582 /* sflags is essentially a bitmap */ 583 while (tmp_flags) { 584 if ((sflags >> i) & 0x1) { 585 emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 586 ctx); 587 real_off += RSIZE; 588 } 589 i++; 590 tmp_flags >>= 1; 591 } 592 593 /* save return address */ 594 if (ctx->flags & SEEN_CALL) { 595 emit_store_stack_reg(r_ra, r_sp, real_off, ctx); 596 real_off += RSIZE; 597 } 598 599 /* Setup r_M leaving the alignment gap if necessary */ 600 if (ctx->flags & SEEN_MEM) { 601 if (real_off % (RSIZE * 2)) 602 real_off += RSIZE; 603 emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); 604 } 605} 606 607static void restore_bpf_jit_regs(struct jit_ctx *ctx, 608 unsigned int offset) 609{ 610 int i, real_off = 0; 611 u32 sflags, tmp_flags; 612 613 if (ctx->flags & SEEN_CALL) { 614 if (config_enabled(CONFIG_64BIT)) 615 /* Bottom of current frame */ 616 real_off = align_sp(offset) - RSIZE; 617 else 618 /* Top of previous frame */ 619 real_off = align_sp(offset) + RSIZE; 620 emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); 621 emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); 622 623 real_off = 0; 624 } 625 626 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 627 /* sflags is a bitmap */ 628 i = 0; 629 while (tmp_flags) { 630 if ((sflags >> i) & 0x1) { 631 emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 632 ctx); 633 real_off += RSIZE; 634 } 635 i++; 636 tmp_flags >>= 1; 637 } 638 639 /* restore return address */ 640 if (ctx->flags & SEEN_CALL) 641 emit_load_stack_reg(r_ra, r_sp, real_off, ctx); 642 643 /* Restore the sp and discard the scrach memory */ 644 emit_stack_offset(align_sp(offset), ctx); 645} 646 647static unsigned int get_stack_depth(struct jit_ctx *ctx) 648{ 649 int sp_off = 0; 650 651 652 /* How may s* regs do we need to preserved? */ 653 sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE; 654 655 if (ctx->flags & SEEN_MEM) 656 sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ 657 658 if (ctx->flags & SEEN_CALL) 659 /* 660 * The JIT code make calls to external functions using 2 661 * arguments. Therefore, for o32 we don't need to allocate 662 * space because we don't care if the argumetns are lost 663 * across calls. We do need however to preserve incoming 664 * arguments but the space is already allocated for us by 665 * the caller. On the other hand, for n64, we need to allocate 666 * this space ourselves. We need to preserve $ra as well. 667 */ 668 sp_off += config_enabled(CONFIG_64BIT) ? 669 (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; 670 671 return sp_off; 672} 673 674static void build_prologue(struct jit_ctx *ctx) 675{ 676 int sp_off; 677 678 /* Calculate the total offset for the stack pointer */ 679 sp_off = get_stack_depth(ctx); 680 save_bpf_jit_regs(ctx, sp_off); 681 682 if (ctx->flags & SEEN_SKB) 683 emit_reg_move(r_skb, MIPS_R_A0, ctx); 684 685 if (ctx->flags & SEEN_X) 686 emit_jit_reg_move(r_X, r_zero, ctx); 687 688 /* Do not leak kernel data to userspace */ 689 if (bpf_needs_clear_a(&ctx->skf->insns[0])) 690 emit_jit_reg_move(r_A, r_zero, ctx); 691} 692 693static void build_epilogue(struct jit_ctx *ctx) 694{ 695 unsigned int sp_off; 696 697 /* Calculate the total offset for the stack pointer */ 698 699 sp_off = get_stack_depth(ctx); 700 restore_bpf_jit_regs(ctx, sp_off); 701 702 /* Return */ 703 emit_jr(r_ra, ctx); 704 emit_nop(ctx); 705} 706 707static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 708{ 709 u8 ret; 710 int err; 711 712 err = skb_copy_bits(skb, offset, &ret, 1); 713 714 return (u64)err << 32 | ret; 715} 716 717static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) 718{ 719 u16 ret; 720 int err; 721 722 err = skb_copy_bits(skb, offset, &ret, 2); 723 724 return (u64)err << 32 | ntohs(ret); 725} 726 727static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) 728{ 729 u32 ret; 730 int err; 731 732 err = skb_copy_bits(skb, offset, &ret, 4); 733 734 return (u64)err << 32 | ntohl(ret); 735} 736 737static int build_body(struct jit_ctx *ctx) 738{ 739 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 740 const struct bpf_prog *prog = ctx->skf; 741 const struct sock_filter *inst; 742 unsigned int i, off, load_order, condt; 743 u32 k, b_off __maybe_unused; 744 745 for (i = 0; i < prog->len; i++) { 746 u16 code; 747 748 inst = &(prog->insns[i]); 749 pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", 750 __func__, inst->code, inst->jt, inst->jf, inst->k); 751 k = inst->k; 752 code = bpf_anc_helper(inst); 753 754 if (ctx->target == NULL) 755 ctx->offsets[i] = ctx->idx * 4; 756 757 switch (code) { 758 case BPF_LD | BPF_IMM: 759 /* A <- k ==> li r_A, k */ 760 ctx->flags |= SEEN_A; 761 emit_load_imm(r_A, k, ctx); 762 break; 763 case BPF_LD | BPF_W | BPF_LEN: 764 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 765 /* A <- len ==> lw r_A, offset(skb) */ 766 ctx->flags |= SEEN_SKB | SEEN_A; 767 off = offsetof(struct sk_buff, len); 768 emit_load(r_A, r_skb, off, ctx); 769 break; 770 case BPF_LD | BPF_MEM: 771 /* A <- M[k] ==> lw r_A, offset(M) */ 772 ctx->flags |= SEEN_MEM | SEEN_A; 773 emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); 774 break; 775 case BPF_LD | BPF_W | BPF_ABS: 776 /* A <- P[k:4] */ 777 load_order = 2; 778 goto load; 779 case BPF_LD | BPF_H | BPF_ABS: 780 /* A <- P[k:2] */ 781 load_order = 1; 782 goto load; 783 case BPF_LD | BPF_B | BPF_ABS: 784 /* A <- P[k:1] */ 785 load_order = 0; 786load: 787 /* the interpreter will deal with the negative K */ 788 if ((int)k < 0) 789 return -ENOTSUPP; 790 791 emit_load_imm(r_off, k, ctx); 792load_common: 793 /* 794 * We may got here from the indirect loads so 795 * return if offset is negative. 796 */ 797 emit_slt(r_s0, r_off, r_zero, ctx); 798 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 799 b_imm(prog->len, ctx), ctx); 800 emit_reg_move(r_ret, r_zero, ctx); 801 802 ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 | 803 SEEN_SKB | SEEN_A; 804 805 emit_load_func(r_s0, (ptr)load_func[load_order], 806 ctx); 807 emit_reg_move(MIPS_R_A0, r_skb, ctx); 808 emit_jalr(MIPS_R_RA, r_s0, ctx); 809 /* Load second argument to delay slot */ 810 emit_reg_move(MIPS_R_A1, r_off, ctx); 811 /* Check the error value */ 812 if (config_enabled(CONFIG_64BIT)) { 813 /* Get error code from the top 32-bits */ 814 emit_dsrl32(r_s0, r_val, 0, ctx); 815 /* Branch to 3 instructions ahead */ 816 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2, 817 ctx); 818 } else { 819 /* Branch to 3 instructions ahead */ 820 emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2, 821 ctx); 822 } 823 emit_nop(ctx); 824 /* We are good */ 825 emit_b(b_imm(i + 1, ctx), ctx); 826 emit_jit_reg_move(r_A, r_val, ctx); 827 /* Return with error */ 828 emit_b(b_imm(prog->len, ctx), ctx); 829 emit_reg_move(r_ret, r_zero, ctx); 830 break; 831 case BPF_LD | BPF_W | BPF_IND: 832 /* A <- P[X + k:4] */ 833 load_order = 2; 834 goto load_ind; 835 case BPF_LD | BPF_H | BPF_IND: 836 /* A <- P[X + k:2] */ 837 load_order = 1; 838 goto load_ind; 839 case BPF_LD | BPF_B | BPF_IND: 840 /* A <- P[X + k:1] */ 841 load_order = 0; 842load_ind: 843 ctx->flags |= SEEN_OFF | SEEN_X; 844 emit_addiu(r_off, r_X, k, ctx); 845 goto load_common; 846 case BPF_LDX | BPF_IMM: 847 /* X <- k */ 848 ctx->flags |= SEEN_X; 849 emit_load_imm(r_X, k, ctx); 850 break; 851 case BPF_LDX | BPF_MEM: 852 /* X <- M[k] */ 853 ctx->flags |= SEEN_X | SEEN_MEM; 854 emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); 855 break; 856 case BPF_LDX | BPF_W | BPF_LEN: 857 /* X <- len */ 858 ctx->flags |= SEEN_X | SEEN_SKB; 859 off = offsetof(struct sk_buff, len); 860 emit_load(r_X, r_skb, off, ctx); 861 break; 862 case BPF_LDX | BPF_B | BPF_MSH: 863 /* the interpreter will deal with the negative K */ 864 if ((int)k < 0) 865 return -ENOTSUPP; 866 867 /* X <- 4 * (P[k:1] & 0xf) */ 868 ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB; 869 /* Load offset to a1 */ 870 emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx); 871 /* 872 * This may emit two instructions so it may not fit 873 * in the delay slot. So use a0 in the delay slot. 874 */ 875 emit_load_imm(MIPS_R_A1, k, ctx); 876 emit_jalr(MIPS_R_RA, r_s0, ctx); 877 emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ 878 /* Check the error value */ 879 if (config_enabled(CONFIG_64BIT)) { 880 /* Top 32-bits of $v0 on 64-bit */ 881 emit_dsrl32(r_s0, r_val, 0, ctx); 882 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 883 3 << 2, ctx); 884 } else { 885 emit_bcond(MIPS_COND_NE, r_err, r_zero, 886 3 << 2, ctx); 887 } 888 /* No need for delay slot */ 889 /* We are good */ 890 /* X <- P[1:K] & 0xf */ 891 emit_andi(r_X, r_val, 0xf, ctx); 892 /* X << 2 */ 893 emit_b(b_imm(i + 1, ctx), ctx); 894 emit_sll(r_X, r_X, 2, ctx); /* delay slot */ 895 /* Return with error */ 896 emit_b(b_imm(prog->len, ctx), ctx); 897 emit_load_imm(r_ret, 0, ctx); /* delay slot */ 898 break; 899 case BPF_ST: 900 /* M[k] <- A */ 901 ctx->flags |= SEEN_MEM | SEEN_A; 902 emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); 903 break; 904 case BPF_STX: 905 /* M[k] <- X */ 906 ctx->flags |= SEEN_MEM | SEEN_X; 907 emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); 908 break; 909 case BPF_ALU | BPF_ADD | BPF_K: 910 /* A += K */ 911 ctx->flags |= SEEN_A; 912 emit_addiu(r_A, r_A, k, ctx); 913 break; 914 case BPF_ALU | BPF_ADD | BPF_X: 915 /* A += X */ 916 ctx->flags |= SEEN_A | SEEN_X; 917 emit_addu(r_A, r_A, r_X, ctx); 918 break; 919 case BPF_ALU | BPF_SUB | BPF_K: 920 /* A -= K */ 921 ctx->flags |= SEEN_A; 922 emit_addiu(r_A, r_A, -k, ctx); 923 break; 924 case BPF_ALU | BPF_SUB | BPF_X: 925 /* A -= X */ 926 ctx->flags |= SEEN_A | SEEN_X; 927 emit_subu(r_A, r_A, r_X, ctx); 928 break; 929 case BPF_ALU | BPF_MUL | BPF_K: 930 /* A *= K */ 931 /* Load K to scratch register before MUL */ 932 ctx->flags |= SEEN_A | SEEN_S0; 933 emit_load_imm(r_s0, k, ctx); 934 emit_mul(r_A, r_A, r_s0, ctx); 935 break; 936 case BPF_ALU | BPF_MUL | BPF_X: 937 /* A *= X */ 938 ctx->flags |= SEEN_A | SEEN_X; 939 emit_mul(r_A, r_A, r_X, ctx); 940 break; 941 case BPF_ALU | BPF_DIV | BPF_K: 942 /* A /= k */ 943 if (k == 1) 944 break; 945 if (optimize_div(&k)) { 946 ctx->flags |= SEEN_A; 947 emit_srl(r_A, r_A, k, ctx); 948 break; 949 } 950 ctx->flags |= SEEN_A | SEEN_S0; 951 emit_load_imm(r_s0, k, ctx); 952 emit_div(r_A, r_s0, ctx); 953 break; 954 case BPF_ALU | BPF_MOD | BPF_K: 955 /* A %= k */ 956 if (k == 1) { 957 ctx->flags |= SEEN_A; 958 emit_jit_reg_move(r_A, r_zero, ctx); 959 } else { 960 ctx->flags |= SEEN_A | SEEN_S0; 961 emit_load_imm(r_s0, k, ctx); 962 emit_mod(r_A, r_s0, ctx); 963 } 964 break; 965 case BPF_ALU | BPF_DIV | BPF_X: 966 /* A /= X */ 967 ctx->flags |= SEEN_X | SEEN_A; 968 /* Check if r_X is zero */ 969 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 970 b_imm(prog->len, ctx), ctx); 971 emit_load_imm(r_val, 0, ctx); /* delay slot */ 972 emit_div(r_A, r_X, ctx); 973 break; 974 case BPF_ALU | BPF_MOD | BPF_X: 975 /* A %= X */ 976 ctx->flags |= SEEN_X | SEEN_A; 977 /* Check if r_X is zero */ 978 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 979 b_imm(prog->len, ctx), ctx); 980 emit_load_imm(r_val, 0, ctx); /* delay slot */ 981 emit_mod(r_A, r_X, ctx); 982 break; 983 case BPF_ALU | BPF_OR | BPF_K: 984 /* A |= K */ 985 ctx->flags |= SEEN_A; 986 emit_ori(r_A, r_A, k, ctx); 987 break; 988 case BPF_ALU | BPF_OR | BPF_X: 989 /* A |= X */ 990 ctx->flags |= SEEN_A; 991 emit_ori(r_A, r_A, r_X, ctx); 992 break; 993 case BPF_ALU | BPF_XOR | BPF_K: 994 /* A ^= k */ 995 ctx->flags |= SEEN_A; 996 emit_xori(r_A, r_A, k, ctx); 997 break; 998 case BPF_ANC | SKF_AD_ALU_XOR_X: 999 case BPF_ALU | BPF_XOR | BPF_X: 1000 /* A ^= X */ 1001 ctx->flags |= SEEN_A; 1002 emit_xor(r_A, r_A, r_X, ctx); 1003 break; 1004 case BPF_ALU | BPF_AND | BPF_K: 1005 /* A &= K */ 1006 ctx->flags |= SEEN_A; 1007 emit_andi(r_A, r_A, k, ctx); 1008 break; 1009 case BPF_ALU | BPF_AND | BPF_X: 1010 /* A &= X */ 1011 ctx->flags |= SEEN_A | SEEN_X; 1012 emit_and(r_A, r_A, r_X, ctx); 1013 break; 1014 case BPF_ALU | BPF_LSH | BPF_K: 1015 /* A <<= K */ 1016 ctx->flags |= SEEN_A; 1017 emit_sll(r_A, r_A, k, ctx); 1018 break; 1019 case BPF_ALU | BPF_LSH | BPF_X: 1020 /* A <<= X */ 1021 ctx->flags |= SEEN_A | SEEN_X; 1022 emit_sllv(r_A, r_A, r_X, ctx); 1023 break; 1024 case BPF_ALU | BPF_RSH | BPF_K: 1025 /* A >>= K */ 1026 ctx->flags |= SEEN_A; 1027 emit_srl(r_A, r_A, k, ctx); 1028 break; 1029 case BPF_ALU | BPF_RSH | BPF_X: 1030 ctx->flags |= SEEN_A | SEEN_X; 1031 emit_srlv(r_A, r_A, r_X, ctx); 1032 break; 1033 case BPF_ALU | BPF_NEG: 1034 /* A = -A */ 1035 ctx->flags |= SEEN_A; 1036 emit_neg(r_A, ctx); 1037 break; 1038 case BPF_JMP | BPF_JA: 1039 /* pc += K */ 1040 emit_b(b_imm(i + k + 1, ctx), ctx); 1041 emit_nop(ctx); 1042 break; 1043 case BPF_JMP | BPF_JEQ | BPF_K: 1044 /* pc += ( A == K ) ? pc->jt : pc->jf */ 1045 condt = MIPS_COND_EQ | MIPS_COND_K; 1046 goto jmp_cmp; 1047 case BPF_JMP | BPF_JEQ | BPF_X: 1048 ctx->flags |= SEEN_X; 1049 /* pc += ( A == X ) ? pc->jt : pc->jf */ 1050 condt = MIPS_COND_EQ | MIPS_COND_X; 1051 goto jmp_cmp; 1052 case BPF_JMP | BPF_JGE | BPF_K: 1053 /* pc += ( A >= K ) ? pc->jt : pc->jf */ 1054 condt = MIPS_COND_GE | MIPS_COND_K; 1055 goto jmp_cmp; 1056 case BPF_JMP | BPF_JGE | BPF_X: 1057 ctx->flags |= SEEN_X; 1058 /* pc += ( A >= X ) ? pc->jt : pc->jf */ 1059 condt = MIPS_COND_GE | MIPS_COND_X; 1060 goto jmp_cmp; 1061 case BPF_JMP | BPF_JGT | BPF_K: 1062 /* pc += ( A > K ) ? pc->jt : pc->jf */ 1063 condt = MIPS_COND_GT | MIPS_COND_K; 1064 goto jmp_cmp; 1065 case BPF_JMP | BPF_JGT | BPF_X: 1066 ctx->flags |= SEEN_X; 1067 /* pc += ( A > X ) ? pc->jt : pc->jf */ 1068 condt = MIPS_COND_GT | MIPS_COND_X; 1069jmp_cmp: 1070 /* Greater or Equal */ 1071 if ((condt & MIPS_COND_GE) || 1072 (condt & MIPS_COND_GT)) { 1073 if (condt & MIPS_COND_K) { /* K */ 1074 ctx->flags |= SEEN_S0 | SEEN_A; 1075 emit_sltiu(r_s0, r_A, k, ctx); 1076 } else { /* X */ 1077 ctx->flags |= SEEN_S0 | SEEN_A | 1078 SEEN_X; 1079 emit_sltu(r_s0, r_A, r_X, ctx); 1080 } 1081 /* A < (K|X) ? r_scrach = 1 */ 1082 b_off = b_imm(i + inst->jf + 1, ctx); 1083 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, 1084 ctx); 1085 emit_nop(ctx); 1086 /* A > (K|X) ? scratch = 0 */ 1087 if (condt & MIPS_COND_GT) { 1088 /* Checking for equality */ 1089 ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X; 1090 if (condt & MIPS_COND_K) 1091 emit_load_imm(r_s0, k, ctx); 1092 else 1093 emit_jit_reg_move(r_s0, r_X, 1094 ctx); 1095 b_off = b_imm(i + inst->jf + 1, ctx); 1096 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 1097 b_off, ctx); 1098 emit_nop(ctx); 1099 /* Finally, A > K|X */ 1100 b_off = b_imm(i + inst->jt + 1, ctx); 1101 emit_b(b_off, ctx); 1102 emit_nop(ctx); 1103 } else { 1104 /* A >= (K|X) so jump */ 1105 b_off = b_imm(i + inst->jt + 1, ctx); 1106 emit_b(b_off, ctx); 1107 emit_nop(ctx); 1108 } 1109 } else { 1110 /* A == K|X */ 1111 if (condt & MIPS_COND_K) { /* K */ 1112 ctx->flags |= SEEN_S0 | SEEN_A; 1113 emit_load_imm(r_s0, k, ctx); 1114 /* jump true */ 1115 b_off = b_imm(i + inst->jt + 1, ctx); 1116 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 1117 b_off, ctx); 1118 emit_nop(ctx); 1119 /* jump false */ 1120 b_off = b_imm(i + inst->jf + 1, 1121 ctx); 1122 emit_bcond(MIPS_COND_NE, r_A, r_s0, 1123 b_off, ctx); 1124 emit_nop(ctx); 1125 } else { /* X */ 1126 /* jump true */ 1127 ctx->flags |= SEEN_A | SEEN_X; 1128 b_off = b_imm(i + inst->jt + 1, 1129 ctx); 1130 emit_bcond(MIPS_COND_EQ, r_A, r_X, 1131 b_off, ctx); 1132 emit_nop(ctx); 1133 /* jump false */ 1134 b_off = b_imm(i + inst->jf + 1, ctx); 1135 emit_bcond(MIPS_COND_NE, r_A, r_X, 1136 b_off, ctx); 1137 emit_nop(ctx); 1138 } 1139 } 1140 break; 1141 case BPF_JMP | BPF_JSET | BPF_K: 1142 ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A; 1143 /* pc += (A & K) ? pc -> jt : pc -> jf */ 1144 emit_load_imm(r_s1, k, ctx); 1145 emit_and(r_s0, r_A, r_s1, ctx); 1146 /* jump true */ 1147 b_off = b_imm(i + inst->jt + 1, ctx); 1148 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1149 emit_nop(ctx); 1150 /* jump false */ 1151 b_off = b_imm(i + inst->jf + 1, ctx); 1152 emit_b(b_off, ctx); 1153 emit_nop(ctx); 1154 break; 1155 case BPF_JMP | BPF_JSET | BPF_X: 1156 ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A; 1157 /* pc += (A & X) ? pc -> jt : pc -> jf */ 1158 emit_and(r_s0, r_A, r_X, ctx); 1159 /* jump true */ 1160 b_off = b_imm(i + inst->jt + 1, ctx); 1161 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1162 emit_nop(ctx); 1163 /* jump false */ 1164 b_off = b_imm(i + inst->jf + 1, ctx); 1165 emit_b(b_off, ctx); 1166 emit_nop(ctx); 1167 break; 1168 case BPF_RET | BPF_A: 1169 ctx->flags |= SEEN_A; 1170 if (i != prog->len - 1) 1171 /* 1172 * If this is not the last instruction 1173 * then jump to the epilogue 1174 */ 1175 emit_b(b_imm(prog->len, ctx), ctx); 1176 emit_reg_move(r_ret, r_A, ctx); /* delay slot */ 1177 break; 1178 case BPF_RET | BPF_K: 1179 /* 1180 * It can emit two instructions so it does not fit on 1181 * the delay slot. 1182 */ 1183 emit_load_imm(r_ret, k, ctx); 1184 if (i != prog->len - 1) { 1185 /* 1186 * If this is not the last instruction 1187 * then jump to the epilogue 1188 */ 1189 emit_b(b_imm(prog->len, ctx), ctx); 1190 emit_nop(ctx); 1191 } 1192 break; 1193 case BPF_MISC | BPF_TAX: 1194 /* X = A */ 1195 ctx->flags |= SEEN_X | SEEN_A; 1196 emit_jit_reg_move(r_X, r_A, ctx); 1197 break; 1198 case BPF_MISC | BPF_TXA: 1199 /* A = X */ 1200 ctx->flags |= SEEN_A | SEEN_X; 1201 emit_jit_reg_move(r_A, r_X, ctx); 1202 break; 1203 /* AUX */ 1204 case BPF_ANC | SKF_AD_PROTOCOL: 1205 /* A = ntohs(skb->protocol */ 1206 ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; 1207 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1208 protocol) != 2); 1209 off = offsetof(struct sk_buff, protocol); 1210 emit_half_load(r_A, r_skb, off, ctx); 1211#ifdef CONFIG_CPU_LITTLE_ENDIAN 1212 /* This needs little endian fixup */ 1213 if (cpu_has_wsbh) { 1214 /* R2 and later have the wsbh instruction */ 1215 emit_wsbh(r_A, r_A, ctx); 1216 } else { 1217 /* Get first byte */ 1218 emit_andi(r_tmp_imm, r_A, 0xff, ctx); 1219 /* Shift it */ 1220 emit_sll(r_tmp, r_tmp_imm, 8, ctx); 1221 /* Get second byte */ 1222 emit_srl(r_tmp_imm, r_A, 8, ctx); 1223 emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); 1224 /* Put everyting together in r_A */ 1225 emit_or(r_A, r_tmp, r_tmp_imm, ctx); 1226 } 1227#endif 1228 break; 1229 case BPF_ANC | SKF_AD_CPU: 1230 ctx->flags |= SEEN_A | SEEN_OFF; 1231 /* A = current_thread_info()->cpu */ 1232 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, 1233 cpu) != 4); 1234 off = offsetof(struct thread_info, cpu); 1235 /* $28/gp points to the thread_info struct */ 1236 emit_load(r_A, 28, off, ctx); 1237 break; 1238 case BPF_ANC | SKF_AD_IFINDEX: 1239 /* A = skb->dev->ifindex */ 1240 ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0; 1241 off = offsetof(struct sk_buff, dev); 1242 /* Load *dev pointer */ 1243 emit_load_ptr(r_s0, r_skb, off, ctx); 1244 /* error (0) in the delay slot */ 1245 emit_bcond(MIPS_COND_EQ, r_s0, r_zero, 1246 b_imm(prog->len, ctx), ctx); 1247 emit_reg_move(r_ret, r_zero, ctx); 1248 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 1249 ifindex) != 4); 1250 off = offsetof(struct net_device, ifindex); 1251 emit_load(r_A, r_s0, off, ctx); 1252 break; 1253 case BPF_ANC | SKF_AD_MARK: 1254 ctx->flags |= SEEN_SKB | SEEN_A; 1255 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 1256 off = offsetof(struct sk_buff, mark); 1257 emit_load(r_A, r_skb, off, ctx); 1258 break; 1259 case BPF_ANC | SKF_AD_RXHASH: 1260 ctx->flags |= SEEN_SKB | SEEN_A; 1261 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 1262 off = offsetof(struct sk_buff, hash); 1263 emit_load(r_A, r_skb, off, ctx); 1264 break; 1265 case BPF_ANC | SKF_AD_VLAN_TAG: 1266 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 1267 ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A; 1268 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1269 vlan_tci) != 2); 1270 off = offsetof(struct sk_buff, vlan_tci); 1271 emit_half_load(r_s0, r_skb, off, ctx); 1272 if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { 1273 emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx); 1274 } else { 1275 emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx); 1276 /* return 1 if present */ 1277 emit_sltu(r_A, r_zero, r_A, ctx); 1278 } 1279 break; 1280 case BPF_ANC | SKF_AD_PKTTYPE: 1281 ctx->flags |= SEEN_SKB; 1282 1283 emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); 1284 /* Keep only the last 3 bits */ 1285 emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); 1286#ifdef __BIG_ENDIAN_BITFIELD 1287 /* Get the actual packet type to the lower 3 bits */ 1288 emit_srl(r_A, r_A, 5, ctx); 1289#endif 1290 break; 1291 case BPF_ANC | SKF_AD_QUEUE: 1292 ctx->flags |= SEEN_SKB | SEEN_A; 1293 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1294 queue_mapping) != 2); 1295 BUILD_BUG_ON(offsetof(struct sk_buff, 1296 queue_mapping) > 0xff); 1297 off = offsetof(struct sk_buff, queue_mapping); 1298 emit_half_load(r_A, r_skb, off, ctx); 1299 break; 1300 default: 1301 pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, 1302 inst->code); 1303 return -1; 1304 } 1305 } 1306 1307 /* compute offsets only during the first pass */ 1308 if (ctx->target == NULL) 1309 ctx->offsets[i] = ctx->idx * 4; 1310 1311 return 0; 1312} 1313 1314int bpf_jit_enable __read_mostly; 1315 1316void bpf_jit_compile(struct bpf_prog *fp) 1317{ 1318 struct jit_ctx ctx; 1319 unsigned int alloc_size, tmp_idx; 1320 1321 if (!bpf_jit_enable) 1322 return; 1323 1324 memset(&ctx, 0, sizeof(ctx)); 1325 1326 ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL); 1327 if (ctx.offsets == NULL) 1328 return; 1329 1330 ctx.skf = fp; 1331 1332 if (build_body(&ctx)) 1333 goto out; 1334 1335 tmp_idx = ctx.idx; 1336 build_prologue(&ctx); 1337 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1338 /* just to complete the ctx.idx count */ 1339 build_epilogue(&ctx); 1340 1341 alloc_size = 4 * ctx.idx; 1342 ctx.target = module_alloc(alloc_size); 1343 if (ctx.target == NULL) 1344 goto out; 1345 1346 /* Clean it */ 1347 memset(ctx.target, 0, alloc_size); 1348 1349 ctx.idx = 0; 1350 1351 /* Generate the actual JIT code */ 1352 build_prologue(&ctx); 1353 build_body(&ctx); 1354 build_epilogue(&ctx); 1355 1356 /* Update the icache */ 1357 flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); 1358 1359 if (bpf_jit_enable > 1) 1360 /* Dump JIT code */ 1361 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 1362 1363 fp->bpf_func = (void *)ctx.target; 1364 fp->jited = true; 1365 1366out: 1367 kfree(ctx.offsets); 1368} 1369 1370void bpf_jit_free(struct bpf_prog *fp) 1371{ 1372 if (fp->jited) 1373 module_memfree(fp->bpf_func); 1374 1375 bpf_prog_unlock_free(fp); 1376} 1377