1/*
2 * Just-In-Time compiler for BPF filters on 32bit ARM
3 *
4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License.
9 */
10
11#include <linux/bitops.h>
12#include <linux/compiler.h>
13#include <linux/errno.h>
14#include <linux/filter.h>
15#include <linux/netdevice.h>
16#include <linux/string.h>
17#include <linux/slab.h>
18#include <linux/if_vlan.h>
19
20#include <asm/cacheflush.h>
21#include <asm/hwcap.h>
22#include <asm/opcodes.h>
23
24#include "bpf_jit_32.h"
25
26/*
27 * ABI:
28 *
29 * r0	scratch register
30 * r4	BPF register A
31 * r5	BPF register X
32 * r6	pointer to the skb
33 * r7	skb->data
34 * r8	skb_headlen(skb)
35 */
36
37#define r_scratch	ARM_R0
38/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
39#define r_off		ARM_R1
40#define r_A		ARM_R4
41#define r_X		ARM_R5
42#define r_skb		ARM_R6
43#define r_skb_data	ARM_R7
44#define r_skb_hl	ARM_R8
45
46#define SCRATCH_SP_OFFSET	0
47#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
48
49#define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
50#define SEEN_MEM_WORD(k)	(1 << (k))
51#define SEEN_X			(1 << BPF_MEMWORDS)
52#define SEEN_CALL		(1 << (BPF_MEMWORDS + 1))
53#define SEEN_SKB		(1 << (BPF_MEMWORDS + 2))
54#define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
55
56#define FLAG_NEED_X_RESET	(1 << 0)
57#define FLAG_IMM_OVERFLOW	(1 << 1)
58
59struct jit_ctx {
60	const struct bpf_prog *skf;
61	unsigned idx;
62	unsigned prologue_bytes;
63	int ret0_fp_idx;
64	u32 seen;
65	u32 flags;
66	u32 *offsets;
67	u32 *target;
68#if __LINUX_ARM_ARCH__ < 7
69	u16 epilogue_bytes;
70	u16 imm_count;
71	u32 *imms;
72#endif
73};
74
75int bpf_jit_enable __read_mostly;
76
77static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
78{
79	u8 ret;
80	int err;
81
82	err = skb_copy_bits(skb, offset, &ret, 1);
83
84	return (u64)err << 32 | ret;
85}
86
87static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
88{
89	u16 ret;
90	int err;
91
92	err = skb_copy_bits(skb, offset, &ret, 2);
93
94	return (u64)err << 32 | ntohs(ret);
95}
96
97static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
98{
99	u32 ret;
100	int err;
101
102	err = skb_copy_bits(skb, offset, &ret, 4);
103
104	return (u64)err << 32 | ntohl(ret);
105}
106
107/*
108 * Wrapper that handles both OABI and EABI and assures Thumb2 interworking
109 * (where the assembly routines like __aeabi_uidiv could cause problems).
110 */
111static u32 jit_udiv(u32 dividend, u32 divisor)
112{
113	return dividend / divisor;
114}
115
116static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
117{
118	inst |= (cond << 28);
119	inst = __opcode_to_mem_arm(inst);
120
121	if (ctx->target != NULL)
122		ctx->target[ctx->idx] = inst;
123
124	ctx->idx++;
125}
126
127/*
128 * Emit an instruction that will be executed unconditionally.
129 */
130static inline void emit(u32 inst, struct jit_ctx *ctx)
131{
132	_emit(ARM_COND_AL, inst, ctx);
133}
134
135static u16 saved_regs(struct jit_ctx *ctx)
136{
137	u16 ret = 0;
138
139	if ((ctx->skf->len > 1) ||
140	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
141		ret |= 1 << r_A;
142
143#ifdef CONFIG_FRAME_POINTER
144	ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
145#else
146	if (ctx->seen & SEEN_CALL)
147		ret |= 1 << ARM_LR;
148#endif
149	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
150		ret |= 1 << r_skb;
151	if (ctx->seen & SEEN_DATA)
152		ret |= (1 << r_skb_data) | (1 << r_skb_hl);
153	if (ctx->seen & SEEN_X)
154		ret |= 1 << r_X;
155
156	return ret;
157}
158
159static inline int mem_words_used(struct jit_ctx *ctx)
160{
161	/* yes, we do waste some stack space IF there are "holes" in the set" */
162	return fls(ctx->seen & SEEN_MEM);
163}
164
165static void jit_fill_hole(void *area, unsigned int size)
166{
167	u32 *ptr;
168	/* We are guaranteed to have aligned memory. */
169	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
170		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
171}
172
173static void build_prologue(struct jit_ctx *ctx)
174{
175	u16 reg_set = saved_regs(ctx);
176	u16 off;
177
178#ifdef CONFIG_FRAME_POINTER
179	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
180	emit(ARM_PUSH(reg_set), ctx);
181	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
182#else
183	if (reg_set)
184		emit(ARM_PUSH(reg_set), ctx);
185#endif
186
187	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
188		emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
189
190	if (ctx->seen & SEEN_DATA) {
191		off = offsetof(struct sk_buff, data);
192		emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
193		/* headlen = len - data_len */
194		off = offsetof(struct sk_buff, len);
195		emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
196		off = offsetof(struct sk_buff, data_len);
197		emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
198		emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
199	}
200
201	if (ctx->flags & FLAG_NEED_X_RESET)
202		emit(ARM_MOV_I(r_X, 0), ctx);
203
204	/* do not leak kernel data to userspace */
205	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
206		emit(ARM_MOV_I(r_A, 0), ctx);
207
208	/* stack space for the BPF_MEM words */
209	if (ctx->seen & SEEN_MEM)
210		emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
211}
212
213static void build_epilogue(struct jit_ctx *ctx)
214{
215	u16 reg_set = saved_regs(ctx);
216
217	if (ctx->seen & SEEN_MEM)
218		emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
219
220	reg_set &= ~(1 << ARM_LR);
221
222#ifdef CONFIG_FRAME_POINTER
223	/* the first instruction of the prologue was: mov ip, sp */
224	reg_set &= ~(1 << ARM_IP);
225	reg_set |= (1 << ARM_SP);
226	emit(ARM_LDM(ARM_SP, reg_set), ctx);
227#else
228	if (reg_set) {
229		if (ctx->seen & SEEN_CALL)
230			reg_set |= 1 << ARM_PC;
231		emit(ARM_POP(reg_set), ctx);
232	}
233
234	if (!(ctx->seen & SEEN_CALL))
235		emit(ARM_BX(ARM_LR), ctx);
236#endif
237}
238
239static int16_t imm8m(u32 x)
240{
241	u32 rot;
242
243	for (rot = 0; rot < 16; rot++)
244		if ((x & ~ror32(0xff, 2 * rot)) == 0)
245			return rol32(x, 2 * rot) | (rot << 8);
246
247	return -1;
248}
249
250#if __LINUX_ARM_ARCH__ < 7
251
252static u16 imm_offset(u32 k, struct jit_ctx *ctx)
253{
254	unsigned i = 0, offset;
255	u16 imm;
256
257	/* on the "fake" run we just count them (duplicates included) */
258	if (ctx->target == NULL) {
259		ctx->imm_count++;
260		return 0;
261	}
262
263	while ((i < ctx->imm_count) && ctx->imms[i]) {
264		if (ctx->imms[i] == k)
265			break;
266		i++;
267	}
268
269	if (ctx->imms[i] == 0)
270		ctx->imms[i] = k;
271
272	/* constants go just after the epilogue */
273	offset =  ctx->offsets[ctx->skf->len];
274	offset += ctx->prologue_bytes;
275	offset += ctx->epilogue_bytes;
276	offset += i * 4;
277
278	ctx->target[offset / 4] = k;
279
280	/* PC in ARM mode == address of the instruction + 8 */
281	imm = offset - (8 + ctx->idx * 4);
282
283	if (imm & ~0xfff) {
284		/*
285		 * literal pool is too far, signal it into flags. we
286		 * can only detect it on the second pass unfortunately.
287		 */
288		ctx->flags |= FLAG_IMM_OVERFLOW;
289		return 0;
290	}
291
292	return imm;
293}
294
295#endif /* __LINUX_ARM_ARCH__ */
296
297/*
298 * Move an immediate that's not an imm8m to a core register.
299 */
300static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
301{
302#if __LINUX_ARM_ARCH__ < 7
303	emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
304#else
305	emit(ARM_MOVW(rd, val & 0xffff), ctx);
306	if (val > 0xffff)
307		emit(ARM_MOVT(rd, val >> 16), ctx);
308#endif
309}
310
311static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
312{
313	int imm12 = imm8m(val);
314
315	if (imm12 >= 0)
316		emit(ARM_MOV_I(rd, imm12), ctx);
317	else
318		emit_mov_i_no8m(rd, val, ctx);
319}
320
321#if __LINUX_ARM_ARCH__ < 6
322
323static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
324{
325	_emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
326	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
327	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
328	_emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
329	_emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
330	_emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
331	_emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
332	_emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
333}
334
335static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
336{
337	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
338	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
339	_emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
340}
341
342static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
343{
344	/* r_dst = (r_src << 8) | (r_src >> 8) */
345	emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
346	emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
347
348	/*
349	 * we need to mask out the bits set in r_dst[23:16] due to
350	 * the first shift instruction.
351	 *
352	 * note that 0x8ff is the encoded immediate 0x00ff0000.
353	 */
354	emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
355}
356
357#else  /* ARMv6+ */
358
359static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
360{
361	_emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
362#ifdef __LITTLE_ENDIAN
363	_emit(cond, ARM_REV(r_res, r_res), ctx);
364#endif
365}
366
367static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
368{
369	_emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
370#ifdef __LITTLE_ENDIAN
371	_emit(cond, ARM_REV16(r_res, r_res), ctx);
372#endif
373}
374
375static inline void emit_swap16(u8 r_dst __maybe_unused,
376			       u8 r_src __maybe_unused,
377			       struct jit_ctx *ctx __maybe_unused)
378{
379#ifdef __LITTLE_ENDIAN
380	emit(ARM_REV16(r_dst, r_src), ctx);
381#endif
382}
383
384#endif /* __LINUX_ARM_ARCH__ < 6 */
385
386
387/* Compute the immediate value for a PC-relative branch. */
388static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
389{
390	u32 imm;
391
392	if (ctx->target == NULL)
393		return 0;
394	/*
395	 * BPF allows only forward jumps and the offset of the target is
396	 * still the one computed during the first pass.
397	 */
398	imm  = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
399
400	return imm >> 2;
401}
402
403#define OP_IMM3(op, r1, r2, imm_val, ctx)				\
404	do {								\
405		imm12 = imm8m(imm_val);					\
406		if (imm12 < 0) {					\
407			emit_mov_i_no8m(r_scratch, imm_val, ctx);	\
408			emit(op ## _R((r1), (r2), r_scratch), ctx);	\
409		} else {						\
410			emit(op ## _I((r1), (r2), imm12), ctx);		\
411		}							\
412	} while (0)
413
414static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
415{
416	if (ctx->ret0_fp_idx >= 0) {
417		_emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
418		/* NOP to keep the size constant between passes */
419		emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
420	} else {
421		_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
422		_emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
423	}
424}
425
426static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
427{
428#if __LINUX_ARM_ARCH__ < 5
429	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
430
431	if (elf_hwcap & HWCAP_THUMB)
432		emit(ARM_BX(tgt_reg), ctx);
433	else
434		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
435#else
436	emit(ARM_BLX_R(tgt_reg), ctx);
437#endif
438}
439
440static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
441{
442#if __LINUX_ARM_ARCH__ == 7
443	if (elf_hwcap & HWCAP_IDIVA) {
444		emit(ARM_UDIV(rd, rm, rn), ctx);
445		return;
446	}
447#endif
448
449	/*
450	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
451	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
452	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
453	 * before using it as a source for ARM_R1.
454	 *
455	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
456	 * ARM_R5 (r_X) so there is no particular register overlap
457	 * issues.
458	 */
459	if (rn != ARM_R1)
460		emit(ARM_MOV_R(ARM_R1, rn), ctx);
461	if (rm != ARM_R0)
462		emit(ARM_MOV_R(ARM_R0, rm), ctx);
463
464	ctx->seen |= SEEN_CALL;
465	emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
466	emit_blx_r(ARM_R3, ctx);
467
468	if (rd != ARM_R0)
469		emit(ARM_MOV_R(rd, ARM_R0), ctx);
470}
471
472static inline void update_on_xread(struct jit_ctx *ctx)
473{
474	if (!(ctx->seen & SEEN_X))
475		ctx->flags |= FLAG_NEED_X_RESET;
476
477	ctx->seen |= SEEN_X;
478}
479
480static int build_body(struct jit_ctx *ctx)
481{
482	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
483	const struct bpf_prog *prog = ctx->skf;
484	const struct sock_filter *inst;
485	unsigned i, load_order, off, condt;
486	int imm12;
487	u32 k;
488
489	for (i = 0; i < prog->len; i++) {
490		u16 code;
491
492		inst = &(prog->insns[i]);
493		/* K as an immediate value operand */
494		k = inst->k;
495		code = bpf_anc_helper(inst);
496
497		/* compute offsets only in the fake pass */
498		if (ctx->target == NULL)
499			ctx->offsets[i] = ctx->idx * 4;
500
501		switch (code) {
502		case BPF_LD | BPF_IMM:
503			emit_mov_i(r_A, k, ctx);
504			break;
505		case BPF_LD | BPF_W | BPF_LEN:
506			ctx->seen |= SEEN_SKB;
507			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
508			emit(ARM_LDR_I(r_A, r_skb,
509				       offsetof(struct sk_buff, len)), ctx);
510			break;
511		case BPF_LD | BPF_MEM:
512			/* A = scratch[k] */
513			ctx->seen |= SEEN_MEM_WORD(k);
514			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
515			break;
516		case BPF_LD | BPF_W | BPF_ABS:
517			load_order = 2;
518			goto load;
519		case BPF_LD | BPF_H | BPF_ABS:
520			load_order = 1;
521			goto load;
522		case BPF_LD | BPF_B | BPF_ABS:
523			load_order = 0;
524load:
525			/* the interpreter will deal with the negative K */
526			if ((int)k < 0)
527				return -ENOTSUPP;
528			emit_mov_i(r_off, k, ctx);
529load_common:
530			ctx->seen |= SEEN_DATA | SEEN_CALL;
531
532			if (load_order > 0) {
533				emit(ARM_SUB_I(r_scratch, r_skb_hl,
534					       1 << load_order), ctx);
535				emit(ARM_CMP_R(r_scratch, r_off), ctx);
536				condt = ARM_COND_HS;
537			} else {
538				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
539				condt = ARM_COND_HI;
540			}
541
542			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
543			      ctx);
544
545			if (load_order == 0)
546				_emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
547				      ctx);
548			else if (load_order == 1)
549				emit_load_be16(condt, r_A, r_scratch, ctx);
550			else if (load_order == 2)
551				emit_load_be32(condt, r_A, r_scratch, ctx);
552
553			_emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
554
555			/* the slowpath */
556			emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
557			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
558			/* the offset is already in R1 */
559			emit_blx_r(ARM_R3, ctx);
560			/* check the result of skb_copy_bits */
561			emit(ARM_CMP_I(ARM_R1, 0), ctx);
562			emit_err_ret(ARM_COND_NE, ctx);
563			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
564			break;
565		case BPF_LD | BPF_W | BPF_IND:
566			load_order = 2;
567			goto load_ind;
568		case BPF_LD | BPF_H | BPF_IND:
569			load_order = 1;
570			goto load_ind;
571		case BPF_LD | BPF_B | BPF_IND:
572			load_order = 0;
573load_ind:
574			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
575			goto load_common;
576		case BPF_LDX | BPF_IMM:
577			ctx->seen |= SEEN_X;
578			emit_mov_i(r_X, k, ctx);
579			break;
580		case BPF_LDX | BPF_W | BPF_LEN:
581			ctx->seen |= SEEN_X | SEEN_SKB;
582			emit(ARM_LDR_I(r_X, r_skb,
583				       offsetof(struct sk_buff, len)), ctx);
584			break;
585		case BPF_LDX | BPF_MEM:
586			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
587			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
588			break;
589		case BPF_LDX | BPF_B | BPF_MSH:
590			/* x = ((*(frame + k)) & 0xf) << 2; */
591			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
592			/* the interpreter should deal with the negative K */
593			if ((int)k < 0)
594				return -1;
595			/* offset in r1: we might have to take the slow path */
596			emit_mov_i(r_off, k, ctx);
597			emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
598
599			/* load in r0: common with the slowpath */
600			_emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
601						      ARM_R1), ctx);
602			/*
603			 * emit_mov_i() might generate one or two instructions,
604			 * the same holds for emit_blx_r()
605			 */
606			_emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
607
608			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
609			/* r_off is r1 */
610			emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
611			emit_blx_r(ARM_R3, ctx);
612			/* check the return value of skb_copy_bits */
613			emit(ARM_CMP_I(ARM_R1, 0), ctx);
614			emit_err_ret(ARM_COND_NE, ctx);
615
616			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
617			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
618			break;
619		case BPF_ST:
620			ctx->seen |= SEEN_MEM_WORD(k);
621			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
622			break;
623		case BPF_STX:
624			update_on_xread(ctx);
625			ctx->seen |= SEEN_MEM_WORD(k);
626			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
627			break;
628		case BPF_ALU | BPF_ADD | BPF_K:
629			/* A += K */
630			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
631			break;
632		case BPF_ALU | BPF_ADD | BPF_X:
633			update_on_xread(ctx);
634			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
635			break;
636		case BPF_ALU | BPF_SUB | BPF_K:
637			/* A -= K */
638			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
639			break;
640		case BPF_ALU | BPF_SUB | BPF_X:
641			update_on_xread(ctx);
642			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
643			break;
644		case BPF_ALU | BPF_MUL | BPF_K:
645			/* A *= K */
646			emit_mov_i(r_scratch, k, ctx);
647			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
648			break;
649		case BPF_ALU | BPF_MUL | BPF_X:
650			update_on_xread(ctx);
651			emit(ARM_MUL(r_A, r_A, r_X), ctx);
652			break;
653		case BPF_ALU | BPF_DIV | BPF_K:
654			if (k == 1)
655				break;
656			emit_mov_i(r_scratch, k, ctx);
657			emit_udiv(r_A, r_A, r_scratch, ctx);
658			break;
659		case BPF_ALU | BPF_DIV | BPF_X:
660			update_on_xread(ctx);
661			emit(ARM_CMP_I(r_X, 0), ctx);
662			emit_err_ret(ARM_COND_EQ, ctx);
663			emit_udiv(r_A, r_A, r_X, ctx);
664			break;
665		case BPF_ALU | BPF_OR | BPF_K:
666			/* A |= K */
667			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
668			break;
669		case BPF_ALU | BPF_OR | BPF_X:
670			update_on_xread(ctx);
671			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
672			break;
673		case BPF_ALU | BPF_XOR | BPF_K:
674			/* A ^= K; */
675			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
676			break;
677		case BPF_ANC | SKF_AD_ALU_XOR_X:
678		case BPF_ALU | BPF_XOR | BPF_X:
679			/* A ^= X */
680			update_on_xread(ctx);
681			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
682			break;
683		case BPF_ALU | BPF_AND | BPF_K:
684			/* A &= K */
685			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
686			break;
687		case BPF_ALU | BPF_AND | BPF_X:
688			update_on_xread(ctx);
689			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
690			break;
691		case BPF_ALU | BPF_LSH | BPF_K:
692			if (unlikely(k > 31))
693				return -1;
694			emit(ARM_LSL_I(r_A, r_A, k), ctx);
695			break;
696		case BPF_ALU | BPF_LSH | BPF_X:
697			update_on_xread(ctx);
698			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
699			break;
700		case BPF_ALU | BPF_RSH | BPF_K:
701			if (unlikely(k > 31))
702				return -1;
703			emit(ARM_LSR_I(r_A, r_A, k), ctx);
704			break;
705		case BPF_ALU | BPF_RSH | BPF_X:
706			update_on_xread(ctx);
707			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
708			break;
709		case BPF_ALU | BPF_NEG:
710			/* A = -A */
711			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
712			break;
713		case BPF_JMP | BPF_JA:
714			/* pc += K */
715			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
716			break;
717		case BPF_JMP | BPF_JEQ | BPF_K:
718			/* pc += (A == K) ? pc->jt : pc->jf */
719			condt  = ARM_COND_EQ;
720			goto cmp_imm;
721		case BPF_JMP | BPF_JGT | BPF_K:
722			/* pc += (A > K) ? pc->jt : pc->jf */
723			condt  = ARM_COND_HI;
724			goto cmp_imm;
725		case BPF_JMP | BPF_JGE | BPF_K:
726			/* pc += (A >= K) ? pc->jt : pc->jf */
727			condt  = ARM_COND_HS;
728cmp_imm:
729			imm12 = imm8m(k);
730			if (imm12 < 0) {
731				emit_mov_i_no8m(r_scratch, k, ctx);
732				emit(ARM_CMP_R(r_A, r_scratch), ctx);
733			} else {
734				emit(ARM_CMP_I(r_A, imm12), ctx);
735			}
736cond_jump:
737			if (inst->jt)
738				_emit(condt, ARM_B(b_imm(i + inst->jt + 1,
739						   ctx)), ctx);
740			if (inst->jf)
741				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
742							     ctx)), ctx);
743			break;
744		case BPF_JMP | BPF_JEQ | BPF_X:
745			/* pc += (A == X) ? pc->jt : pc->jf */
746			condt   = ARM_COND_EQ;
747			goto cmp_x;
748		case BPF_JMP | BPF_JGT | BPF_X:
749			/* pc += (A > X) ? pc->jt : pc->jf */
750			condt   = ARM_COND_HI;
751			goto cmp_x;
752		case BPF_JMP | BPF_JGE | BPF_X:
753			/* pc += (A >= X) ? pc->jt : pc->jf */
754			condt   = ARM_COND_CS;
755cmp_x:
756			update_on_xread(ctx);
757			emit(ARM_CMP_R(r_A, r_X), ctx);
758			goto cond_jump;
759		case BPF_JMP | BPF_JSET | BPF_K:
760			/* pc += (A & K) ? pc->jt : pc->jf */
761			condt  = ARM_COND_NE;
762			/* not set iff all zeroes iff Z==1 iff EQ */
763
764			imm12 = imm8m(k);
765			if (imm12 < 0) {
766				emit_mov_i_no8m(r_scratch, k, ctx);
767				emit(ARM_TST_R(r_A, r_scratch), ctx);
768			} else {
769				emit(ARM_TST_I(r_A, imm12), ctx);
770			}
771			goto cond_jump;
772		case BPF_JMP | BPF_JSET | BPF_X:
773			/* pc += (A & X) ? pc->jt : pc->jf */
774			update_on_xread(ctx);
775			condt  = ARM_COND_NE;
776			emit(ARM_TST_R(r_A, r_X), ctx);
777			goto cond_jump;
778		case BPF_RET | BPF_A:
779			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
780			goto b_epilogue;
781		case BPF_RET | BPF_K:
782			if ((k == 0) && (ctx->ret0_fp_idx < 0))
783				ctx->ret0_fp_idx = i;
784			emit_mov_i(ARM_R0, k, ctx);
785b_epilogue:
786			if (i != ctx->skf->len - 1)
787				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
788			break;
789		case BPF_MISC | BPF_TAX:
790			/* X = A */
791			ctx->seen |= SEEN_X;
792			emit(ARM_MOV_R(r_X, r_A), ctx);
793			break;
794		case BPF_MISC | BPF_TXA:
795			/* A = X */
796			update_on_xread(ctx);
797			emit(ARM_MOV_R(r_A, r_X), ctx);
798			break;
799		case BPF_ANC | SKF_AD_PROTOCOL:
800			/* A = ntohs(skb->protocol) */
801			ctx->seen |= SEEN_SKB;
802			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
803						  protocol) != 2);
804			off = offsetof(struct sk_buff, protocol);
805			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
806			emit_swap16(r_A, r_scratch, ctx);
807			break;
808		case BPF_ANC | SKF_AD_CPU:
809			/* r_scratch = current_thread_info() */
810			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
811			/* A = current_thread_info()->cpu */
812			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
813			off = offsetof(struct thread_info, cpu);
814			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
815			break;
816		case BPF_ANC | SKF_AD_IFINDEX:
817			/* A = skb->dev->ifindex */
818			ctx->seen |= SEEN_SKB;
819			off = offsetof(struct sk_buff, dev);
820			emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
821
822			emit(ARM_CMP_I(r_scratch, 0), ctx);
823			emit_err_ret(ARM_COND_EQ, ctx);
824
825			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
826						  ifindex) != 4);
827			off = offsetof(struct net_device, ifindex);
828			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
829			break;
830		case BPF_ANC | SKF_AD_MARK:
831			ctx->seen |= SEEN_SKB;
832			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
833			off = offsetof(struct sk_buff, mark);
834			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
835			break;
836		case BPF_ANC | SKF_AD_RXHASH:
837			ctx->seen |= SEEN_SKB;
838			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
839			off = offsetof(struct sk_buff, hash);
840			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
841			break;
842		case BPF_ANC | SKF_AD_VLAN_TAG:
843		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
844			ctx->seen |= SEEN_SKB;
845			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
846			off = offsetof(struct sk_buff, vlan_tci);
847			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
848			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
849				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
850			else
851				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
852			break;
853		case BPF_ANC | SKF_AD_QUEUE:
854			ctx->seen |= SEEN_SKB;
855			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
856						  queue_mapping) != 2);
857			BUILD_BUG_ON(offsetof(struct sk_buff,
858					      queue_mapping) > 0xff);
859			off = offsetof(struct sk_buff, queue_mapping);
860			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
861			break;
862		default:
863			return -1;
864		}
865
866		if (ctx->flags & FLAG_IMM_OVERFLOW)
867			/*
868			 * this instruction generated an overflow when
869			 * trying to access the literal pool, so
870			 * delegate this filter to the kernel interpreter.
871			 */
872			return -1;
873	}
874
875	/* compute offsets only during the first pass */
876	if (ctx->target == NULL)
877		ctx->offsets[i] = ctx->idx * 4;
878
879	return 0;
880}
881
882
883void bpf_jit_compile(struct bpf_prog *fp)
884{
885	struct bpf_binary_header *header;
886	struct jit_ctx ctx;
887	unsigned tmp_idx;
888	unsigned alloc_size;
889	u8 *target_ptr;
890
891	if (!bpf_jit_enable)
892		return;
893
894	memset(&ctx, 0, sizeof(ctx));
895	ctx.skf		= fp;
896	ctx.ret0_fp_idx = -1;
897
898	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
899	if (ctx.offsets == NULL)
900		return;
901
902	/* fake pass to fill in the ctx->seen */
903	if (unlikely(build_body(&ctx)))
904		goto out;
905
906	tmp_idx = ctx.idx;
907	build_prologue(&ctx);
908	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
909
910#if __LINUX_ARM_ARCH__ < 7
911	tmp_idx = ctx.idx;
912	build_epilogue(&ctx);
913	ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4;
914
915	ctx.idx += ctx.imm_count;
916	if (ctx.imm_count) {
917		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
918		if (ctx.imms == NULL)
919			goto out;
920	}
921#else
922	/* there's nothing after the epilogue on ARMv7 */
923	build_epilogue(&ctx);
924#endif
925	alloc_size = 4 * ctx.idx;
926	header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
927				      4, jit_fill_hole);
928	if (header == NULL)
929		goto out;
930
931	ctx.target = (u32 *) target_ptr;
932	ctx.idx = 0;
933
934	build_prologue(&ctx);
935	if (build_body(&ctx) < 0) {
936#if __LINUX_ARM_ARCH__ < 7
937		if (ctx.imm_count)
938			kfree(ctx.imms);
939#endif
940		bpf_jit_binary_free(header);
941		goto out;
942	}
943	build_epilogue(&ctx);
944
945	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
946
947#if __LINUX_ARM_ARCH__ < 7
948	if (ctx.imm_count)
949		kfree(ctx.imms);
950#endif
951
952	if (bpf_jit_enable > 1)
953		/* there are 2 passes here */
954		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
955
956	set_memory_ro((unsigned long)header, header->pages);
957	fp->bpf_func = (void *)ctx.target;
958	fp->jited = true;
959out:
960	kfree(ctx.offsets);
961	return;
962}
963
964void bpf_jit_free(struct bpf_prog *fp)
965{
966	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
967	struct bpf_binary_header *header = (void *)addr;
968
969	if (!fp->jited)
970		goto free_filter;
971
972	set_memory_rw(addr, header->pages);
973	bpf_jit_binary_free(header);
974
975free_filter:
976	bpf_prog_unlock_free(fp);
977}
978