1/*
2 * Just-In-Time compiler for BPF filters on MIPS
3 *
4 * Copyright (c) 2014 Imagination Technologies Ltd.
5 * Author: Markos Chandras <markos.chandras@imgtec.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License.
10 */
11
12#include <linux/bitops.h>
13#include <linux/compiler.h>
14#include <linux/errno.h>
15#include <linux/filter.h>
16#include <linux/if_vlan.h>
17#include <linux/kconfig.h>
18#include <linux/moduleloader.h>
19#include <linux/netdevice.h>
20#include <linux/string.h>
21#include <linux/slab.h>
22#include <linux/types.h>
23#include <asm/bitops.h>
24#include <asm/cacheflush.h>
25#include <asm/cpu-features.h>
26#include <asm/uasm.h>
27
28#include "bpf_jit.h"
29
30/* ABI
31 *
32 * s0	1st scratch register
33 * s1	2nd scratch register
34 * s2	offset register
35 * s3	BPF register A
36 * s4	BPF register X
37 * s5	*skb
38 * s6	*scratch memory
39 *
40 * On entry (*bpf_func)(*skb, *filter)
41 * a0 = MIPS_R_A0 = skb;
42 * a1 = MIPS_R_A1 = filter;
43 *
44 * Stack
45 * ...
46 * M[15]
47 * M[14]
48 * M[13]
49 * ...
50 * M[0] <-- r_M
51 * saved reg k-1
52 * saved reg k-2
53 * ...
54 * saved reg 0 <-- r_sp
55 * <no argument area>
56 *
57 *                     Packet layout
58 *
59 * <--------------------- len ------------------------>
60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
61 * ----------------------------------------------------
62 * |                  skb->data                       |
63 * ----------------------------------------------------
64 */
65
66#define RSIZE	(sizeof(unsigned long))
67#define ptr typeof(unsigned long)
68
69/* ABI specific return values */
70#ifdef CONFIG_32BIT /* O32 */
71#ifdef CONFIG_CPU_LITTLE_ENDIAN
72#define r_err	MIPS_R_V1
73#define r_val	MIPS_R_V0
74#else /* CONFIG_CPU_LITTLE_ENDIAN */
75#define r_err	MIPS_R_V0
76#define r_val	MIPS_R_V1
77#endif
78#else /* N64 */
79#define r_err	MIPS_R_V0
80#define r_val	MIPS_R_V0
81#endif
82
83#define r_ret	MIPS_R_V0
84
85/*
86 * Use 2 scratch registers to avoid pipeline interlocks.
87 * There is no overhead during epilogue and prologue since
88 * any of the $s0-$s6 registers will only be preserved if
89 * they are going to actually be used.
90 */
91#define r_s0		MIPS_R_S0 /* scratch reg 1 */
92#define r_s1		MIPS_R_S1 /* scratch reg 2 */
93#define r_off		MIPS_R_S2
94#define r_A		MIPS_R_S3
95#define r_X		MIPS_R_S4
96#define r_skb		MIPS_R_S5
97#define r_M		MIPS_R_S6
98#define r_tmp_imm	MIPS_R_T6 /* No need to preserve this */
99#define r_tmp		MIPS_R_T7 /* No need to preserve this */
100#define r_zero		MIPS_R_ZERO
101#define r_sp		MIPS_R_SP
102#define r_ra		MIPS_R_RA
103
104#define SCRATCH_OFF(k)		(4 * (k))
105
106/* JIT flags */
107#define SEEN_CALL		(1 << BPF_MEMWORDS)
108#define SEEN_SREG_SFT		(BPF_MEMWORDS + 1)
109#define SEEN_SREG_BASE		(1 << SEEN_SREG_SFT)
110#define SEEN_SREG(x)		(SEEN_SREG_BASE << (x))
111#define SEEN_S0			SEEN_SREG(0)
112#define SEEN_S1			SEEN_SREG(1)
113#define SEEN_OFF		SEEN_SREG(2)
114#define SEEN_A			SEEN_SREG(3)
115#define SEEN_X			SEEN_SREG(4)
116#define SEEN_SKB		SEEN_SREG(5)
117#define SEEN_MEM		SEEN_SREG(6)
118
119/* Arguments used by JIT */
120#define ARGS_USED_BY_JIT	2 /* only applicable to 64-bit */
121
122#define SBIT(x)			(1 << (x)) /* Signed version of BIT() */
123
124/**
125 * struct jit_ctx - JIT context
126 * @skf:		The sk_filter
127 * @prologue_bytes:	Number of bytes for prologue
128 * @idx:		Instruction index
129 * @flags:		JIT flags
130 * @offsets:		Instruction offsets
131 * @target:		Memory location for the compiled filter
132 */
133struct jit_ctx {
134	const struct bpf_prog *skf;
135	unsigned int prologue_bytes;
136	u32 idx;
137	u32 flags;
138	u32 *offsets;
139	u32 *target;
140};
141
142
143static inline int optimize_div(u32 *k)
144{
145	/* power of 2 divides can be implemented with right shift */
146	if (!(*k & (*k-1))) {
147		*k = ilog2(*k);
148		return 1;
149	}
150
151	return 0;
152}
153
154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
155
156/* Simply emit the instruction if the JIT memory space has been allocated */
157#define emit_instr(ctx, func, ...)			\
158do {							\
159	if ((ctx)->target != NULL) {			\
160		u32 *p = &(ctx)->target[ctx->idx];	\
161		uasm_i_##func(&p, ##__VA_ARGS__);	\
162	}						\
163	(ctx)->idx++;					\
164} while (0)
165
166/*
167 * Similar to emit_instr but it must be used when we need to emit
168 * 32-bit or 64-bit instructions
169 */
170#define emit_long_instr(ctx, func, ...)			\
171do {							\
172	if ((ctx)->target != NULL) {			\
173		u32 *p = &(ctx)->target[ctx->idx];	\
174		UASM_i_##func(&p, ##__VA_ARGS__);	\
175	}						\
176	(ctx)->idx++;					\
177} while (0)
178
179/* Determine if immediate is within the 16-bit signed range */
180static inline bool is_range16(s32 imm)
181{
182	return !(imm >= SBIT(15) || imm < -SBIT(15));
183}
184
185static inline void emit_addu(unsigned int dst, unsigned int src1,
186			     unsigned int src2, struct jit_ctx *ctx)
187{
188	emit_instr(ctx, addu, dst, src1, src2);
189}
190
191static inline void emit_nop(struct jit_ctx *ctx)
192{
193	emit_instr(ctx, nop);
194}
195
196/* Load a u32 immediate to a register */
197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
198{
199	if (ctx->target != NULL) {
200		/* addiu can only handle s16 */
201		if (!is_range16(imm)) {
202			u32 *p = &ctx->target[ctx->idx];
203			uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
204			p = &ctx->target[ctx->idx + 1];
205			uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
206		} else {
207			u32 *p = &ctx->target[ctx->idx];
208			uasm_i_addiu(&p, dst, r_zero, imm);
209		}
210	}
211	ctx->idx++;
212
213	if (!is_range16(imm))
214		ctx->idx++;
215}
216
217static inline void emit_or(unsigned int dst, unsigned int src1,
218			   unsigned int src2, struct jit_ctx *ctx)
219{
220	emit_instr(ctx, or, dst, src1, src2);
221}
222
223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
224			    struct jit_ctx *ctx)
225{
226	if (imm >= BIT(16)) {
227		emit_load_imm(r_tmp, imm, ctx);
228		emit_or(dst, src, r_tmp, ctx);
229	} else {
230		emit_instr(ctx, ori, dst, src, imm);
231	}
232}
233
234static inline void emit_daddiu(unsigned int dst, unsigned int src,
235			       int imm, struct jit_ctx *ctx)
236{
237	/*
238	 * Only used for stack, so the imm is relatively small
239	 * and it fits in 15-bits
240	 */
241	emit_instr(ctx, daddiu, dst, src, imm);
242}
243
244static inline void emit_addiu(unsigned int dst, unsigned int src,
245			      u32 imm, struct jit_ctx *ctx)
246{
247	if (!is_range16(imm)) {
248		emit_load_imm(r_tmp, imm, ctx);
249		emit_addu(dst, r_tmp, src, ctx);
250	} else {
251		emit_instr(ctx, addiu, dst, src, imm);
252	}
253}
254
255static inline void emit_and(unsigned int dst, unsigned int src1,
256			    unsigned int src2, struct jit_ctx *ctx)
257{
258	emit_instr(ctx, and, dst, src1, src2);
259}
260
261static inline void emit_andi(unsigned int dst, unsigned int src,
262			     u32 imm, struct jit_ctx *ctx)
263{
264	/* If imm does not fit in u16 then load it to register */
265	if (imm >= BIT(16)) {
266		emit_load_imm(r_tmp, imm, ctx);
267		emit_and(dst, src, r_tmp, ctx);
268	} else {
269		emit_instr(ctx, andi, dst, src, imm);
270	}
271}
272
273static inline void emit_xor(unsigned int dst, unsigned int src1,
274			    unsigned int src2, struct jit_ctx *ctx)
275{
276	emit_instr(ctx, xor, dst, src1, src2);
277}
278
279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
280{
281	/* If imm does not fit in u16 then load it to register */
282	if (imm >= BIT(16)) {
283		emit_load_imm(r_tmp, imm, ctx);
284		emit_xor(dst, src, r_tmp, ctx);
285	} else {
286		emit_instr(ctx, xori, dst, src, imm);
287	}
288}
289
290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
291{
292	emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
293}
294
295static inline void emit_subu(unsigned int dst, unsigned int src1,
296			     unsigned int src2, struct jit_ctx *ctx)
297{
298	emit_instr(ctx, subu, dst, src1, src2);
299}
300
301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
302{
303	emit_subu(reg, r_zero, reg, ctx);
304}
305
306static inline void emit_sllv(unsigned int dst, unsigned int src,
307			     unsigned int sa, struct jit_ctx *ctx)
308{
309	emit_instr(ctx, sllv, dst, src, sa);
310}
311
312static inline void emit_sll(unsigned int dst, unsigned int src,
313			    unsigned int sa, struct jit_ctx *ctx)
314{
315	/* sa is 5-bits long */
316	if (sa >= BIT(5))
317		/* Shifting >= 32 results in zero */
318		emit_jit_reg_move(dst, r_zero, ctx);
319	else
320		emit_instr(ctx, sll, dst, src, sa);
321}
322
323static inline void emit_srlv(unsigned int dst, unsigned int src,
324			     unsigned int sa, struct jit_ctx *ctx)
325{
326	emit_instr(ctx, srlv, dst, src, sa);
327}
328
329static inline void emit_srl(unsigned int dst, unsigned int src,
330			    unsigned int sa, struct jit_ctx *ctx)
331{
332	/* sa is 5-bits long */
333	if (sa >= BIT(5))
334		/* Shifting >= 32 results in zero */
335		emit_jit_reg_move(dst, r_zero, ctx);
336	else
337		emit_instr(ctx, srl, dst, src, sa);
338}
339
340static inline void emit_slt(unsigned int dst, unsigned int src1,
341			    unsigned int src2, struct jit_ctx *ctx)
342{
343	emit_instr(ctx, slt, dst, src1, src2);
344}
345
346static inline void emit_sltu(unsigned int dst, unsigned int src1,
347			     unsigned int src2, struct jit_ctx *ctx)
348{
349	emit_instr(ctx, sltu, dst, src1, src2);
350}
351
352static inline void emit_sltiu(unsigned dst, unsigned int src,
353			      unsigned int imm, struct jit_ctx *ctx)
354{
355	/* 16 bit immediate */
356	if (!is_range16((s32)imm)) {
357		emit_load_imm(r_tmp, imm, ctx);
358		emit_sltu(dst, src, r_tmp, ctx);
359	} else {
360		emit_instr(ctx, sltiu, dst, src, imm);
361	}
362
363}
364
365/* Store register on the stack */
366static inline void emit_store_stack_reg(ptr reg, ptr base,
367					unsigned int offset,
368					struct jit_ctx *ctx)
369{
370	emit_long_instr(ctx, SW, reg, offset, base);
371}
372
373static inline void emit_store(ptr reg, ptr base, unsigned int offset,
374			      struct jit_ctx *ctx)
375{
376	emit_instr(ctx, sw, reg, offset, base);
377}
378
379static inline void emit_load_stack_reg(ptr reg, ptr base,
380				       unsigned int offset,
381				       struct jit_ctx *ctx)
382{
383	emit_long_instr(ctx, LW, reg, offset, base);
384}
385
386static inline void emit_load(unsigned int reg, unsigned int base,
387			     unsigned int offset, struct jit_ctx *ctx)
388{
389	emit_instr(ctx, lw, reg, offset, base);
390}
391
392static inline void emit_load_byte(unsigned int reg, unsigned int base,
393				  unsigned int offset, struct jit_ctx *ctx)
394{
395	emit_instr(ctx, lb, reg, offset, base);
396}
397
398static inline void emit_half_load(unsigned int reg, unsigned int base,
399				  unsigned int offset, struct jit_ctx *ctx)
400{
401	emit_instr(ctx, lh, reg, offset, base);
402}
403
404static inline void emit_mul(unsigned int dst, unsigned int src1,
405			    unsigned int src2, struct jit_ctx *ctx)
406{
407	emit_instr(ctx, mul, dst, src1, src2);
408}
409
410static inline void emit_div(unsigned int dst, unsigned int src,
411			    struct jit_ctx *ctx)
412{
413	if (ctx->target != NULL) {
414		u32 *p = &ctx->target[ctx->idx];
415		uasm_i_divu(&p, dst, src);
416		p = &ctx->target[ctx->idx + 1];
417		uasm_i_mflo(&p, dst);
418	}
419	ctx->idx += 2; /* 2 insts */
420}
421
422static inline void emit_mod(unsigned int dst, unsigned int src,
423			    struct jit_ctx *ctx)
424{
425	if (ctx->target != NULL) {
426		u32 *p = &ctx->target[ctx->idx];
427		uasm_i_divu(&p, dst, src);
428		p = &ctx->target[ctx->idx + 1];
429		uasm_i_mfhi(&p, dst);
430	}
431	ctx->idx += 2; /* 2 insts */
432}
433
434static inline void emit_dsll(unsigned int dst, unsigned int src,
435			     unsigned int sa, struct jit_ctx *ctx)
436{
437	emit_instr(ctx, dsll, dst, src, sa);
438}
439
440static inline void emit_dsrl32(unsigned int dst, unsigned int src,
441			       unsigned int sa, struct jit_ctx *ctx)
442{
443	emit_instr(ctx, dsrl32, dst, src, sa);
444}
445
446static inline void emit_wsbh(unsigned int dst, unsigned int src,
447			     struct jit_ctx *ctx)
448{
449	emit_instr(ctx, wsbh, dst, src);
450}
451
452/* load pointer to register */
453static inline void emit_load_ptr(unsigned int dst, unsigned int src,
454				     int imm, struct jit_ctx *ctx)
455{
456	/* src contains the base addr of the 32/64-pointer */
457	emit_long_instr(ctx, LW, dst, imm, src);
458}
459
460/* load a function pointer to register */
461static inline void emit_load_func(unsigned int reg, ptr imm,
462				  struct jit_ctx *ctx)
463{
464	if (config_enabled(CONFIG_64BIT)) {
465		/* At this point imm is always 64-bit */
466		emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
467		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
468		emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
469		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
470		emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
471	} else {
472		emit_load_imm(reg, imm, ctx);
473	}
474}
475
476/* Move to real MIPS register */
477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
478{
479	emit_long_instr(ctx, ADDU, dst, src, r_zero);
480}
481
482/* Move to JIT (32-bit) register */
483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
484{
485	emit_addu(dst, src, r_zero, ctx);
486}
487
488/* Compute the immediate value for PC-relative branches. */
489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
490{
491	if (ctx->target == NULL)
492		return 0;
493
494	/*
495	 * We want a pc-relative branch. We only do forward branches
496	 * so tgt is always after pc. tgt is the instruction offset
497	 * we want to jump to.
498
499	 * Branch on MIPS:
500	 * I: target_offset <- sign_extend(offset)
501	 * I+1: PC += target_offset (delay slot)
502	 *
503	 * ctx->idx currently points to the branch instruction
504	 * but the offset is added to the delay slot so we need
505	 * to subtract 4.
506	 */
507	return ctx->offsets[tgt] -
508		(ctx->idx * 4 - ctx->prologue_bytes) - 4;
509}
510
511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
512			     unsigned int imm, struct jit_ctx *ctx)
513{
514	if (ctx->target != NULL) {
515		u32 *p = &ctx->target[ctx->idx];
516
517		switch (cond) {
518		case MIPS_COND_EQ:
519			uasm_i_beq(&p, reg1, reg2, imm);
520			break;
521		case MIPS_COND_NE:
522			uasm_i_bne(&p, reg1, reg2, imm);
523			break;
524		case MIPS_COND_ALL:
525			uasm_i_b(&p, imm);
526			break;
527		default:
528			pr_warn("%s: Unhandled branch conditional: %d\n",
529				__func__, cond);
530		}
531	}
532	ctx->idx++;
533}
534
535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
536{
537	emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
538}
539
540static inline void emit_jalr(unsigned int link, unsigned int reg,
541			     struct jit_ctx *ctx)
542{
543	emit_instr(ctx, jalr, link, reg);
544}
545
546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
547{
548	emit_instr(ctx, jr, reg);
549}
550
551static inline u16 align_sp(unsigned int num)
552{
553	/* Double word alignment for 32-bit, quadword for 64-bit */
554	unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
555	num = (num + (align - 1)) & -align;
556	return num;
557}
558
559static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
560{
561	int i = 0, real_off = 0;
562	u32 sflags, tmp_flags;
563
564	/* Adjust the stack pointer */
565	emit_stack_offset(-align_sp(offset), ctx);
566
567	if (ctx->flags & SEEN_CALL) {
568		/* Argument save area */
569		if (config_enabled(CONFIG_64BIT))
570			/* Bottom of current frame */
571			real_off = align_sp(offset) - RSIZE;
572		else
573			/* Top of previous frame */
574			real_off = align_sp(offset) + RSIZE;
575		emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
576		emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
577
578		real_off = 0;
579	}
580
581	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
582	/* sflags is essentially a bitmap */
583	while (tmp_flags) {
584		if ((sflags >> i) & 0x1) {
585			emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
586					     ctx);
587			real_off += RSIZE;
588		}
589		i++;
590		tmp_flags >>= 1;
591	}
592
593	/* save return address */
594	if (ctx->flags & SEEN_CALL) {
595		emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
596		real_off += RSIZE;
597	}
598
599	/* Setup r_M leaving the alignment gap if necessary */
600	if (ctx->flags & SEEN_MEM) {
601		if (real_off % (RSIZE * 2))
602			real_off += RSIZE;
603		emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
604	}
605}
606
607static void restore_bpf_jit_regs(struct jit_ctx *ctx,
608				 unsigned int offset)
609{
610	int i, real_off = 0;
611	u32 sflags, tmp_flags;
612
613	if (ctx->flags & SEEN_CALL) {
614		if (config_enabled(CONFIG_64BIT))
615			/* Bottom of current frame */
616			real_off = align_sp(offset) - RSIZE;
617		else
618			/* Top of previous frame */
619			real_off = align_sp(offset) + RSIZE;
620		emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
621		emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
622
623		real_off = 0;
624	}
625
626	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
627	/* sflags is a bitmap */
628	i = 0;
629	while (tmp_flags) {
630		if ((sflags >> i) & 0x1) {
631			emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
632					    ctx);
633			real_off += RSIZE;
634		}
635		i++;
636		tmp_flags >>= 1;
637	}
638
639	/* restore return address */
640	if (ctx->flags & SEEN_CALL)
641		emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
642
643	/* Restore the sp and discard the scrach memory */
644	emit_stack_offset(align_sp(offset), ctx);
645}
646
647static unsigned int get_stack_depth(struct jit_ctx *ctx)
648{
649	int sp_off = 0;
650
651
652	/* How may s* regs do we need to preserved? */
653	sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
654
655	if (ctx->flags & SEEN_MEM)
656		sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
657
658	if (ctx->flags & SEEN_CALL)
659		/*
660		 * The JIT code make calls to external functions using 2
661		 * arguments. Therefore, for o32 we don't need to allocate
662		 * space because we don't care if the argumetns are lost
663		 * across calls. We do need however to preserve incoming
664		 * arguments but the space is already allocated for us by
665		 * the caller. On the other hand, for n64, we need to allocate
666		 * this space ourselves. We need to preserve $ra as well.
667		 */
668		sp_off += config_enabled(CONFIG_64BIT) ?
669			(ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
670
671	return sp_off;
672}
673
674static void build_prologue(struct jit_ctx *ctx)
675{
676	int sp_off;
677
678	/* Calculate the total offset for the stack pointer */
679	sp_off = get_stack_depth(ctx);
680	save_bpf_jit_regs(ctx, sp_off);
681
682	if (ctx->flags & SEEN_SKB)
683		emit_reg_move(r_skb, MIPS_R_A0, ctx);
684
685	if (ctx->flags & SEEN_X)
686		emit_jit_reg_move(r_X, r_zero, ctx);
687
688	/* Do not leak kernel data to userspace */
689	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
690		emit_jit_reg_move(r_A, r_zero, ctx);
691}
692
693static void build_epilogue(struct jit_ctx *ctx)
694{
695	unsigned int sp_off;
696
697	/* Calculate the total offset for the stack pointer */
698
699	sp_off = get_stack_depth(ctx);
700	restore_bpf_jit_regs(ctx, sp_off);
701
702	/* Return */
703	emit_jr(r_ra, ctx);
704	emit_nop(ctx);
705}
706
707static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
708{
709	u8 ret;
710	int err;
711
712	err = skb_copy_bits(skb, offset, &ret, 1);
713
714	return (u64)err << 32 | ret;
715}
716
717static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
718{
719	u16 ret;
720	int err;
721
722	err = skb_copy_bits(skb, offset, &ret, 2);
723
724	return (u64)err << 32 | ntohs(ret);
725}
726
727static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
728{
729	u32 ret;
730	int err;
731
732	err = skb_copy_bits(skb, offset, &ret, 4);
733
734	return (u64)err << 32 | ntohl(ret);
735}
736
737static int build_body(struct jit_ctx *ctx)
738{
739	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
740	const struct bpf_prog *prog = ctx->skf;
741	const struct sock_filter *inst;
742	unsigned int i, off, load_order, condt;
743	u32 k, b_off __maybe_unused;
744
745	for (i = 0; i < prog->len; i++) {
746		u16 code;
747
748		inst = &(prog->insns[i]);
749		pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
750			 __func__, inst->code, inst->jt, inst->jf, inst->k);
751		k = inst->k;
752		code = bpf_anc_helper(inst);
753
754		if (ctx->target == NULL)
755			ctx->offsets[i] = ctx->idx * 4;
756
757		switch (code) {
758		case BPF_LD | BPF_IMM:
759			/* A <- k ==> li r_A, k */
760			ctx->flags |= SEEN_A;
761			emit_load_imm(r_A, k, ctx);
762			break;
763		case BPF_LD | BPF_W | BPF_LEN:
764			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
765			/* A <- len ==> lw r_A, offset(skb) */
766			ctx->flags |= SEEN_SKB | SEEN_A;
767			off = offsetof(struct sk_buff, len);
768			emit_load(r_A, r_skb, off, ctx);
769			break;
770		case BPF_LD | BPF_MEM:
771			/* A <- M[k] ==> lw r_A, offset(M) */
772			ctx->flags |= SEEN_MEM | SEEN_A;
773			emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
774			break;
775		case BPF_LD | BPF_W | BPF_ABS:
776			/* A <- P[k:4] */
777			load_order = 2;
778			goto load;
779		case BPF_LD | BPF_H | BPF_ABS:
780			/* A <- P[k:2] */
781			load_order = 1;
782			goto load;
783		case BPF_LD | BPF_B | BPF_ABS:
784			/* A <- P[k:1] */
785			load_order = 0;
786load:
787			/* the interpreter will deal with the negative K */
788			if ((int)k < 0)
789				return -ENOTSUPP;
790
791			emit_load_imm(r_off, k, ctx);
792load_common:
793			/*
794			 * We may got here from the indirect loads so
795			 * return if offset is negative.
796			 */
797			emit_slt(r_s0, r_off, r_zero, ctx);
798			emit_bcond(MIPS_COND_NE, r_s0, r_zero,
799				   b_imm(prog->len, ctx), ctx);
800			emit_reg_move(r_ret, r_zero, ctx);
801
802			ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
803				SEEN_SKB | SEEN_A;
804
805			emit_load_func(r_s0, (ptr)load_func[load_order],
806				      ctx);
807			emit_reg_move(MIPS_R_A0, r_skb, ctx);
808			emit_jalr(MIPS_R_RA, r_s0, ctx);
809			/* Load second argument to delay slot */
810			emit_reg_move(MIPS_R_A1, r_off, ctx);
811			/* Check the error value */
812			if (config_enabled(CONFIG_64BIT)) {
813				/* Get error code from the top 32-bits */
814				emit_dsrl32(r_s0, r_val, 0, ctx);
815				/* Branch to 3 instructions ahead */
816				emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
817					   ctx);
818			} else {
819				/* Branch to 3 instructions ahead */
820				emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
821					   ctx);
822			}
823			emit_nop(ctx);
824			/* We are good */
825			emit_b(b_imm(i + 1, ctx), ctx);
826			emit_jit_reg_move(r_A, r_val, ctx);
827			/* Return with error */
828			emit_b(b_imm(prog->len, ctx), ctx);
829			emit_reg_move(r_ret, r_zero, ctx);
830			break;
831		case BPF_LD | BPF_W | BPF_IND:
832			/* A <- P[X + k:4] */
833			load_order = 2;
834			goto load_ind;
835		case BPF_LD | BPF_H | BPF_IND:
836			/* A <- P[X + k:2] */
837			load_order = 1;
838			goto load_ind;
839		case BPF_LD | BPF_B | BPF_IND:
840			/* A <- P[X + k:1] */
841			load_order = 0;
842load_ind:
843			ctx->flags |= SEEN_OFF | SEEN_X;
844			emit_addiu(r_off, r_X, k, ctx);
845			goto load_common;
846		case BPF_LDX | BPF_IMM:
847			/* X <- k */
848			ctx->flags |= SEEN_X;
849			emit_load_imm(r_X, k, ctx);
850			break;
851		case BPF_LDX | BPF_MEM:
852			/* X <- M[k] */
853			ctx->flags |= SEEN_X | SEEN_MEM;
854			emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
855			break;
856		case BPF_LDX | BPF_W | BPF_LEN:
857			/* X <- len */
858			ctx->flags |= SEEN_X | SEEN_SKB;
859			off = offsetof(struct sk_buff, len);
860			emit_load(r_X, r_skb, off, ctx);
861			break;
862		case BPF_LDX | BPF_B | BPF_MSH:
863			/* the interpreter will deal with the negative K */
864			if ((int)k < 0)
865				return -ENOTSUPP;
866
867			/* X <- 4 * (P[k:1] & 0xf) */
868			ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
869			/* Load offset to a1 */
870			emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
871			/*
872			 * This may emit two instructions so it may not fit
873			 * in the delay slot. So use a0 in the delay slot.
874			 */
875			emit_load_imm(MIPS_R_A1, k, ctx);
876			emit_jalr(MIPS_R_RA, r_s0, ctx);
877			emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
878			/* Check the error value */
879			if (config_enabled(CONFIG_64BIT)) {
880				/* Top 32-bits of $v0 on 64-bit */
881				emit_dsrl32(r_s0, r_val, 0, ctx);
882				emit_bcond(MIPS_COND_NE, r_s0, r_zero,
883					   3 << 2, ctx);
884			} else {
885				emit_bcond(MIPS_COND_NE, r_err, r_zero,
886					   3 << 2, ctx);
887			}
888			/* No need for delay slot */
889			/* We are good */
890			/* X <- P[1:K] & 0xf */
891			emit_andi(r_X, r_val, 0xf, ctx);
892			/* X << 2 */
893			emit_b(b_imm(i + 1, ctx), ctx);
894			emit_sll(r_X, r_X, 2, ctx); /* delay slot */
895			/* Return with error */
896			emit_b(b_imm(prog->len, ctx), ctx);
897			emit_load_imm(r_ret, 0, ctx); /* delay slot */
898			break;
899		case BPF_ST:
900			/* M[k] <- A */
901			ctx->flags |= SEEN_MEM | SEEN_A;
902			emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
903			break;
904		case BPF_STX:
905			/* M[k] <- X */
906			ctx->flags |= SEEN_MEM | SEEN_X;
907			emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
908			break;
909		case BPF_ALU | BPF_ADD | BPF_K:
910			/* A += K */
911			ctx->flags |= SEEN_A;
912			emit_addiu(r_A, r_A, k, ctx);
913			break;
914		case BPF_ALU | BPF_ADD | BPF_X:
915			/* A += X */
916			ctx->flags |= SEEN_A | SEEN_X;
917			emit_addu(r_A, r_A, r_X, ctx);
918			break;
919		case BPF_ALU | BPF_SUB | BPF_K:
920			/* A -= K */
921			ctx->flags |= SEEN_A;
922			emit_addiu(r_A, r_A, -k, ctx);
923			break;
924		case BPF_ALU | BPF_SUB | BPF_X:
925			/* A -= X */
926			ctx->flags |= SEEN_A | SEEN_X;
927			emit_subu(r_A, r_A, r_X, ctx);
928			break;
929		case BPF_ALU | BPF_MUL | BPF_K:
930			/* A *= K */
931			/* Load K to scratch register before MUL */
932			ctx->flags |= SEEN_A | SEEN_S0;
933			emit_load_imm(r_s0, k, ctx);
934			emit_mul(r_A, r_A, r_s0, ctx);
935			break;
936		case BPF_ALU | BPF_MUL | BPF_X:
937			/* A *= X */
938			ctx->flags |= SEEN_A | SEEN_X;
939			emit_mul(r_A, r_A, r_X, ctx);
940			break;
941		case BPF_ALU | BPF_DIV | BPF_K:
942			/* A /= k */
943			if (k == 1)
944				break;
945			if (optimize_div(&k)) {
946				ctx->flags |= SEEN_A;
947				emit_srl(r_A, r_A, k, ctx);
948				break;
949			}
950			ctx->flags |= SEEN_A | SEEN_S0;
951			emit_load_imm(r_s0, k, ctx);
952			emit_div(r_A, r_s0, ctx);
953			break;
954		case BPF_ALU | BPF_MOD | BPF_K:
955			/* A %= k */
956			if (k == 1) {
957				ctx->flags |= SEEN_A;
958				emit_jit_reg_move(r_A, r_zero, ctx);
959			} else {
960				ctx->flags |= SEEN_A | SEEN_S0;
961				emit_load_imm(r_s0, k, ctx);
962				emit_mod(r_A, r_s0, ctx);
963			}
964			break;
965		case BPF_ALU | BPF_DIV | BPF_X:
966			/* A /= X */
967			ctx->flags |= SEEN_X | SEEN_A;
968			/* Check if r_X is zero */
969			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
970				   b_imm(prog->len, ctx), ctx);
971			emit_load_imm(r_val, 0, ctx); /* delay slot */
972			emit_div(r_A, r_X, ctx);
973			break;
974		case BPF_ALU | BPF_MOD | BPF_X:
975			/* A %= X */
976			ctx->flags |= SEEN_X | SEEN_A;
977			/* Check if r_X is zero */
978			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
979				   b_imm(prog->len, ctx), ctx);
980			emit_load_imm(r_val, 0, ctx); /* delay slot */
981			emit_mod(r_A, r_X, ctx);
982			break;
983		case BPF_ALU | BPF_OR | BPF_K:
984			/* A |= K */
985			ctx->flags |= SEEN_A;
986			emit_ori(r_A, r_A, k, ctx);
987			break;
988		case BPF_ALU | BPF_OR | BPF_X:
989			/* A |= X */
990			ctx->flags |= SEEN_A;
991			emit_ori(r_A, r_A, r_X, ctx);
992			break;
993		case BPF_ALU | BPF_XOR | BPF_K:
994			/* A ^= k */
995			ctx->flags |= SEEN_A;
996			emit_xori(r_A, r_A, k, ctx);
997			break;
998		case BPF_ANC | SKF_AD_ALU_XOR_X:
999		case BPF_ALU | BPF_XOR | BPF_X:
1000			/* A ^= X */
1001			ctx->flags |= SEEN_A;
1002			emit_xor(r_A, r_A, r_X, ctx);
1003			break;
1004		case BPF_ALU | BPF_AND | BPF_K:
1005			/* A &= K */
1006			ctx->flags |= SEEN_A;
1007			emit_andi(r_A, r_A, k, ctx);
1008			break;
1009		case BPF_ALU | BPF_AND | BPF_X:
1010			/* A &= X */
1011			ctx->flags |= SEEN_A | SEEN_X;
1012			emit_and(r_A, r_A, r_X, ctx);
1013			break;
1014		case BPF_ALU | BPF_LSH | BPF_K:
1015			/* A <<= K */
1016			ctx->flags |= SEEN_A;
1017			emit_sll(r_A, r_A, k, ctx);
1018			break;
1019		case BPF_ALU | BPF_LSH | BPF_X:
1020			/* A <<= X */
1021			ctx->flags |= SEEN_A | SEEN_X;
1022			emit_sllv(r_A, r_A, r_X, ctx);
1023			break;
1024		case BPF_ALU | BPF_RSH | BPF_K:
1025			/* A >>= K */
1026			ctx->flags |= SEEN_A;
1027			emit_srl(r_A, r_A, k, ctx);
1028			break;
1029		case BPF_ALU | BPF_RSH | BPF_X:
1030			ctx->flags |= SEEN_A | SEEN_X;
1031			emit_srlv(r_A, r_A, r_X, ctx);
1032			break;
1033		case BPF_ALU | BPF_NEG:
1034			/* A = -A */
1035			ctx->flags |= SEEN_A;
1036			emit_neg(r_A, ctx);
1037			break;
1038		case BPF_JMP | BPF_JA:
1039			/* pc += K */
1040			emit_b(b_imm(i + k + 1, ctx), ctx);
1041			emit_nop(ctx);
1042			break;
1043		case BPF_JMP | BPF_JEQ | BPF_K:
1044			/* pc += ( A == K ) ? pc->jt : pc->jf */
1045			condt = MIPS_COND_EQ | MIPS_COND_K;
1046			goto jmp_cmp;
1047		case BPF_JMP | BPF_JEQ | BPF_X:
1048			ctx->flags |= SEEN_X;
1049			/* pc += ( A == X ) ? pc->jt : pc->jf */
1050			condt = MIPS_COND_EQ | MIPS_COND_X;
1051			goto jmp_cmp;
1052		case BPF_JMP | BPF_JGE | BPF_K:
1053			/* pc += ( A >= K ) ? pc->jt : pc->jf */
1054			condt = MIPS_COND_GE | MIPS_COND_K;
1055			goto jmp_cmp;
1056		case BPF_JMP | BPF_JGE | BPF_X:
1057			ctx->flags |= SEEN_X;
1058			/* pc += ( A >= X ) ? pc->jt : pc->jf */
1059			condt = MIPS_COND_GE | MIPS_COND_X;
1060			goto jmp_cmp;
1061		case BPF_JMP | BPF_JGT | BPF_K:
1062			/* pc += ( A > K ) ? pc->jt : pc->jf */
1063			condt = MIPS_COND_GT | MIPS_COND_K;
1064			goto jmp_cmp;
1065		case BPF_JMP | BPF_JGT | BPF_X:
1066			ctx->flags |= SEEN_X;
1067			/* pc += ( A > X ) ? pc->jt : pc->jf */
1068			condt = MIPS_COND_GT | MIPS_COND_X;
1069jmp_cmp:
1070			/* Greater or Equal */
1071			if ((condt & MIPS_COND_GE) ||
1072			    (condt & MIPS_COND_GT)) {
1073				if (condt & MIPS_COND_K) { /* K */
1074					ctx->flags |= SEEN_S0 | SEEN_A;
1075					emit_sltiu(r_s0, r_A, k, ctx);
1076				} else { /* X */
1077					ctx->flags |= SEEN_S0 | SEEN_A |
1078						SEEN_X;
1079					emit_sltu(r_s0, r_A, r_X, ctx);
1080				}
1081				/* A < (K|X) ? r_scrach = 1 */
1082				b_off = b_imm(i + inst->jf + 1, ctx);
1083				emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
1084					   ctx);
1085				emit_nop(ctx);
1086				/* A > (K|X) ? scratch = 0 */
1087				if (condt & MIPS_COND_GT) {
1088					/* Checking for equality */
1089					ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
1090					if (condt & MIPS_COND_K)
1091						emit_load_imm(r_s0, k, ctx);
1092					else
1093						emit_jit_reg_move(r_s0, r_X,
1094								  ctx);
1095					b_off = b_imm(i + inst->jf + 1, ctx);
1096					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1097						   b_off, ctx);
1098					emit_nop(ctx);
1099					/* Finally, A > K|X */
1100					b_off = b_imm(i + inst->jt + 1, ctx);
1101					emit_b(b_off, ctx);
1102					emit_nop(ctx);
1103				} else {
1104					/* A >= (K|X) so jump */
1105					b_off = b_imm(i + inst->jt + 1, ctx);
1106					emit_b(b_off, ctx);
1107					emit_nop(ctx);
1108				}
1109			} else {
1110				/* A == K|X */
1111				if (condt & MIPS_COND_K) { /* K */
1112					ctx->flags |= SEEN_S0 | SEEN_A;
1113					emit_load_imm(r_s0, k, ctx);
1114					/* jump true */
1115					b_off = b_imm(i + inst->jt + 1, ctx);
1116					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1117						   b_off, ctx);
1118					emit_nop(ctx);
1119					/* jump false */
1120					b_off = b_imm(i + inst->jf + 1,
1121						      ctx);
1122					emit_bcond(MIPS_COND_NE, r_A, r_s0,
1123						   b_off, ctx);
1124					emit_nop(ctx);
1125				} else { /* X */
1126					/* jump true */
1127					ctx->flags |= SEEN_A | SEEN_X;
1128					b_off = b_imm(i + inst->jt + 1,
1129						      ctx);
1130					emit_bcond(MIPS_COND_EQ, r_A, r_X,
1131						   b_off, ctx);
1132					emit_nop(ctx);
1133					/* jump false */
1134					b_off = b_imm(i + inst->jf + 1, ctx);
1135					emit_bcond(MIPS_COND_NE, r_A, r_X,
1136						   b_off, ctx);
1137					emit_nop(ctx);
1138				}
1139			}
1140			break;
1141		case BPF_JMP | BPF_JSET | BPF_K:
1142			ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
1143			/* pc += (A & K) ? pc -> jt : pc -> jf */
1144			emit_load_imm(r_s1, k, ctx);
1145			emit_and(r_s0, r_A, r_s1, ctx);
1146			/* jump true */
1147			b_off = b_imm(i + inst->jt + 1, ctx);
1148			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1149			emit_nop(ctx);
1150			/* jump false */
1151			b_off = b_imm(i + inst->jf + 1, ctx);
1152			emit_b(b_off, ctx);
1153			emit_nop(ctx);
1154			break;
1155		case BPF_JMP | BPF_JSET | BPF_X:
1156			ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
1157			/* pc += (A & X) ? pc -> jt : pc -> jf */
1158			emit_and(r_s0, r_A, r_X, ctx);
1159			/* jump true */
1160			b_off = b_imm(i + inst->jt + 1, ctx);
1161			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1162			emit_nop(ctx);
1163			/* jump false */
1164			b_off = b_imm(i + inst->jf + 1, ctx);
1165			emit_b(b_off, ctx);
1166			emit_nop(ctx);
1167			break;
1168		case BPF_RET | BPF_A:
1169			ctx->flags |= SEEN_A;
1170			if (i != prog->len - 1)
1171				/*
1172				 * If this is not the last instruction
1173				 * then jump to the epilogue
1174				 */
1175				emit_b(b_imm(prog->len, ctx), ctx);
1176			emit_reg_move(r_ret, r_A, ctx); /* delay slot */
1177			break;
1178		case BPF_RET | BPF_K:
1179			/*
1180			 * It can emit two instructions so it does not fit on
1181			 * the delay slot.
1182			 */
1183			emit_load_imm(r_ret, k, ctx);
1184			if (i != prog->len - 1) {
1185				/*
1186				 * If this is not the last instruction
1187				 * then jump to the epilogue
1188				 */
1189				emit_b(b_imm(prog->len, ctx), ctx);
1190				emit_nop(ctx);
1191			}
1192			break;
1193		case BPF_MISC | BPF_TAX:
1194			/* X = A */
1195			ctx->flags |= SEEN_X | SEEN_A;
1196			emit_jit_reg_move(r_X, r_A, ctx);
1197			break;
1198		case BPF_MISC | BPF_TXA:
1199			/* A = X */
1200			ctx->flags |= SEEN_A | SEEN_X;
1201			emit_jit_reg_move(r_A, r_X, ctx);
1202			break;
1203		/* AUX */
1204		case BPF_ANC | SKF_AD_PROTOCOL:
1205			/* A = ntohs(skb->protocol */
1206			ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
1207			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1208						  protocol) != 2);
1209			off = offsetof(struct sk_buff, protocol);
1210			emit_half_load(r_A, r_skb, off, ctx);
1211#ifdef CONFIG_CPU_LITTLE_ENDIAN
1212			/* This needs little endian fixup */
1213			if (cpu_has_wsbh) {
1214				/* R2 and later have the wsbh instruction */
1215				emit_wsbh(r_A, r_A, ctx);
1216			} else {
1217				/* Get first byte */
1218				emit_andi(r_tmp_imm, r_A, 0xff, ctx);
1219				/* Shift it */
1220				emit_sll(r_tmp, r_tmp_imm, 8, ctx);
1221				/* Get second byte */
1222				emit_srl(r_tmp_imm, r_A, 8, ctx);
1223				emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
1224				/* Put everyting together in r_A */
1225				emit_or(r_A, r_tmp, r_tmp_imm, ctx);
1226			}
1227#endif
1228			break;
1229		case BPF_ANC | SKF_AD_CPU:
1230			ctx->flags |= SEEN_A | SEEN_OFF;
1231			/* A = current_thread_info()->cpu */
1232			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
1233						  cpu) != 4);
1234			off = offsetof(struct thread_info, cpu);
1235			/* $28/gp points to the thread_info struct */
1236			emit_load(r_A, 28, off, ctx);
1237			break;
1238		case BPF_ANC | SKF_AD_IFINDEX:
1239			/* A = skb->dev->ifindex */
1240			ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
1241			off = offsetof(struct sk_buff, dev);
1242			/* Load *dev pointer */
1243			emit_load_ptr(r_s0, r_skb, off, ctx);
1244			/* error (0) in the delay slot */
1245			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
1246				   b_imm(prog->len, ctx), ctx);
1247			emit_reg_move(r_ret, r_zero, ctx);
1248			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
1249						  ifindex) != 4);
1250			off = offsetof(struct net_device, ifindex);
1251			emit_load(r_A, r_s0, off, ctx);
1252			break;
1253		case BPF_ANC | SKF_AD_MARK:
1254			ctx->flags |= SEEN_SKB | SEEN_A;
1255			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1256			off = offsetof(struct sk_buff, mark);
1257			emit_load(r_A, r_skb, off, ctx);
1258			break;
1259		case BPF_ANC | SKF_AD_RXHASH:
1260			ctx->flags |= SEEN_SKB | SEEN_A;
1261			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
1262			off = offsetof(struct sk_buff, hash);
1263			emit_load(r_A, r_skb, off, ctx);
1264			break;
1265		case BPF_ANC | SKF_AD_VLAN_TAG:
1266		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
1267			ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
1268			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1269						  vlan_tci) != 2);
1270			off = offsetof(struct sk_buff, vlan_tci);
1271			emit_half_load(r_s0, r_skb, off, ctx);
1272			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
1273				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
1274			} else {
1275				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
1276				/* return 1 if present */
1277				emit_sltu(r_A, r_zero, r_A, ctx);
1278			}
1279			break;
1280		case BPF_ANC | SKF_AD_PKTTYPE:
1281			ctx->flags |= SEEN_SKB;
1282
1283			emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
1284			/* Keep only the last 3 bits */
1285			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
1286#ifdef __BIG_ENDIAN_BITFIELD
1287			/* Get the actual packet type to the lower 3 bits */
1288			emit_srl(r_A, r_A, 5, ctx);
1289#endif
1290			break;
1291		case BPF_ANC | SKF_AD_QUEUE:
1292			ctx->flags |= SEEN_SKB | SEEN_A;
1293			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1294						  queue_mapping) != 2);
1295			BUILD_BUG_ON(offsetof(struct sk_buff,
1296					      queue_mapping) > 0xff);
1297			off = offsetof(struct sk_buff, queue_mapping);
1298			emit_half_load(r_A, r_skb, off, ctx);
1299			break;
1300		default:
1301			pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
1302				 inst->code);
1303			return -1;
1304		}
1305	}
1306
1307	/* compute offsets only during the first pass */
1308	if (ctx->target == NULL)
1309		ctx->offsets[i] = ctx->idx * 4;
1310
1311	return 0;
1312}
1313
1314int bpf_jit_enable __read_mostly;
1315
1316void bpf_jit_compile(struct bpf_prog *fp)
1317{
1318	struct jit_ctx ctx;
1319	unsigned int alloc_size, tmp_idx;
1320
1321	if (!bpf_jit_enable)
1322		return;
1323
1324	memset(&ctx, 0, sizeof(ctx));
1325
1326	ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
1327	if (ctx.offsets == NULL)
1328		return;
1329
1330	ctx.skf = fp;
1331
1332	if (build_body(&ctx))
1333		goto out;
1334
1335	tmp_idx = ctx.idx;
1336	build_prologue(&ctx);
1337	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1338	/* just to complete the ctx.idx count */
1339	build_epilogue(&ctx);
1340
1341	alloc_size = 4 * ctx.idx;
1342	ctx.target = module_alloc(alloc_size);
1343	if (ctx.target == NULL)
1344		goto out;
1345
1346	/* Clean it */
1347	memset(ctx.target, 0, alloc_size);
1348
1349	ctx.idx = 0;
1350
1351	/* Generate the actual JIT code */
1352	build_prologue(&ctx);
1353	build_body(&ctx);
1354	build_epilogue(&ctx);
1355
1356	/* Update the icache */
1357	flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
1358
1359	if (bpf_jit_enable > 1)
1360		/* Dump JIT code */
1361		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
1362
1363	fp->bpf_func = (void *)ctx.target;
1364	fp->jited = true;
1365
1366out:
1367	kfree(ctx.offsets);
1368}
1369
1370void bpf_jit_free(struct bpf_prog *fp)
1371{
1372	if (fp->jited)
1373		module_memfree(fp->bpf_func);
1374
1375	bpf_prog_unlock_free(fp);
1376}
1377