1/*
2 * BPF JIT compiler for ARM64
3 *
4 * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#define pr_fmt(fmt) "bpf_jit: " fmt
20
21#include <linux/filter.h>
22#include <linux/printk.h>
23#include <linux/skbuff.h>
24#include <linux/slab.h>
25
26#include <asm/byteorder.h>
27#include <asm/cacheflush.h>
28#include <asm/debug-monitors.h>
29
30#include "bpf_jit.h"
31
32int bpf_jit_enable __read_mostly;
33
34#define TMP_REG_1 (MAX_BPF_REG + 0)
35#define TMP_REG_2 (MAX_BPF_REG + 1)
36
37/* Map BPF registers to A64 registers */
38static const int bpf2a64[] = {
39	/* return value from in-kernel function, and exit value from eBPF */
40	[BPF_REG_0] = A64_R(7),
41	/* arguments from eBPF program to in-kernel function */
42	[BPF_REG_1] = A64_R(0),
43	[BPF_REG_2] = A64_R(1),
44	[BPF_REG_3] = A64_R(2),
45	[BPF_REG_4] = A64_R(3),
46	[BPF_REG_5] = A64_R(4),
47	/* callee saved registers that in-kernel function will preserve */
48	[BPF_REG_6] = A64_R(19),
49	[BPF_REG_7] = A64_R(20),
50	[BPF_REG_8] = A64_R(21),
51	[BPF_REG_9] = A64_R(22),
52	/* read-only frame pointer to access stack */
53	[BPF_REG_FP] = A64_R(25),
54	/* temporary register for internal BPF JIT */
55	[TMP_REG_1] = A64_R(23),
56	[TMP_REG_2] = A64_R(24),
57};
58
59struct jit_ctx {
60	const struct bpf_prog *prog;
61	int idx;
62	int tmp_used;
63	int epilogue_offset;
64	int *offset;
65	u32 *image;
66};
67
68static inline void emit(const u32 insn, struct jit_ctx *ctx)
69{
70	if (ctx->image != NULL)
71		ctx->image[ctx->idx] = cpu_to_le32(insn);
72
73	ctx->idx++;
74}
75
76static inline void emit_a64_mov_i64(const int reg, const u64 val,
77				    struct jit_ctx *ctx)
78{
79	u64 tmp = val;
80	int shift = 0;
81
82	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
83	tmp >>= 16;
84	shift += 16;
85	while (tmp) {
86		if (tmp & 0xffff)
87			emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
88		tmp >>= 16;
89		shift += 16;
90	}
91}
92
93static inline void emit_a64_mov_i(const int is64, const int reg,
94				  const s32 val, struct jit_ctx *ctx)
95{
96	u16 hi = val >> 16;
97	u16 lo = val & 0xffff;
98
99	if (hi & 0x8000) {
100		if (hi == 0xffff) {
101			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
102		} else {
103			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
104			emit(A64_MOVK(is64, reg, lo, 0), ctx);
105		}
106	} else {
107		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
108		if (hi)
109			emit(A64_MOVK(is64, reg, hi, 16), ctx);
110	}
111}
112
113static inline int bpf2a64_offset(int bpf_to, int bpf_from,
114				 const struct jit_ctx *ctx)
115{
116	int to = ctx->offset[bpf_to];
117	/* -1 to account for the Branch instruction */
118	int from = ctx->offset[bpf_from] - 1;
119
120	return to - from;
121}
122
123static void jit_fill_hole(void *area, unsigned int size)
124{
125	u32 *ptr;
126	/* We are guaranteed to have aligned memory. */
127	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
128		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
129}
130
131static inline int epilogue_offset(const struct jit_ctx *ctx)
132{
133	int to = ctx->epilogue_offset;
134	int from = ctx->idx;
135
136	return to - from;
137}
138
139/* Stack must be multiples of 16B */
140#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
141
142#define _STACK_SIZE \
143	(MAX_BPF_STACK \
144	 + 4 /* extra for skb_copy_bits buffer */)
145
146#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
147
148static void build_prologue(struct jit_ctx *ctx)
149{
150	const u8 r6 = bpf2a64[BPF_REG_6];
151	const u8 r7 = bpf2a64[BPF_REG_7];
152	const u8 r8 = bpf2a64[BPF_REG_8];
153	const u8 r9 = bpf2a64[BPF_REG_9];
154	const u8 fp = bpf2a64[BPF_REG_FP];
155	const u8 ra = bpf2a64[BPF_REG_A];
156	const u8 rx = bpf2a64[BPF_REG_X];
157	const u8 tmp1 = bpf2a64[TMP_REG_1];
158	const u8 tmp2 = bpf2a64[TMP_REG_2];
159
160	/*
161	 * BPF prog stack layout
162	 *
163	 *                         high
164	 * original A64_SP =>   0:+-----+ BPF prologue
165	 *                        |FP/LR|
166	 * current A64_FP =>  -16:+-----+
167	 *                        | ... | callee saved registers
168	 *                        +-----+
169	 *                        |     | x25/x26
170	 * BPF fp register => -80:+-----+ <= (BPF_FP)
171	 *                        |     |
172	 *                        | ... | BPF prog stack
173	 *                        |     |
174	 *                        +-----+ <= (BPF_FP - MAX_BPF_STACK)
175	 *                        |RSVD | JIT scratchpad
176	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
177	 *                        |     |
178	 *                        | ... | Function call stack
179	 *                        |     |
180	 *                        +-----+
181	 *                          low
182	 *
183	 */
184
185	/* Save FP and LR registers to stay align with ARM64 AAPCS */
186	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
187	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
188
189	/* Save callee-saved register */
190	emit(A64_PUSH(r6, r7, A64_SP), ctx);
191	emit(A64_PUSH(r8, r9, A64_SP), ctx);
192	if (ctx->tmp_used)
193		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
194
195	/* Save fp (x25) and x26. SP requires 16 bytes alignment */
196	emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
197
198	/* Set up BPF prog stack base register (x25) */
199	emit(A64_MOV(1, fp, A64_SP), ctx);
200
201	/* Set up function call stack */
202	emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
203
204	/* Clear registers A and X */
205	emit_a64_mov_i64(ra, 0, ctx);
206	emit_a64_mov_i64(rx, 0, ctx);
207}
208
209static void build_epilogue(struct jit_ctx *ctx)
210{
211	const u8 r0 = bpf2a64[BPF_REG_0];
212	const u8 r6 = bpf2a64[BPF_REG_6];
213	const u8 r7 = bpf2a64[BPF_REG_7];
214	const u8 r8 = bpf2a64[BPF_REG_8];
215	const u8 r9 = bpf2a64[BPF_REG_9];
216	const u8 fp = bpf2a64[BPF_REG_FP];
217	const u8 tmp1 = bpf2a64[TMP_REG_1];
218	const u8 tmp2 = bpf2a64[TMP_REG_2];
219
220	/* We're done with BPF stack */
221	emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
222
223	/* Restore fs (x25) and x26 */
224	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
225
226	/* Restore callee-saved register */
227	if (ctx->tmp_used)
228		emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
229	emit(A64_POP(r8, r9, A64_SP), ctx);
230	emit(A64_POP(r6, r7, A64_SP), ctx);
231
232	/* Restore FP/LR registers */
233	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
234
235	/* Set return value */
236	emit(A64_MOV(1, A64_R(0), r0), ctx);
237
238	emit(A64_RET(A64_LR), ctx);
239}
240
241/* JITs an eBPF instruction.
242 * Returns:
243 * 0  - successfully JITed an 8-byte eBPF instruction.
244 * >0 - successfully JITed a 16-byte eBPF instruction.
245 * <0 - failed to JIT.
246 */
247static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
248{
249	const u8 code = insn->code;
250	const u8 dst = bpf2a64[insn->dst_reg];
251	const u8 src = bpf2a64[insn->src_reg];
252	const u8 tmp = bpf2a64[TMP_REG_1];
253	const u8 tmp2 = bpf2a64[TMP_REG_2];
254	const s16 off = insn->off;
255	const s32 imm = insn->imm;
256	const int i = insn - ctx->prog->insnsi;
257	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
258	u8 jmp_cond;
259	s32 jmp_offset;
260
261#define check_imm(bits, imm) do {				\
262	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
263	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
264		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
265			i, imm, imm);				\
266		return -EINVAL;					\
267	}							\
268} while (0)
269#define check_imm19(imm) check_imm(19, imm)
270#define check_imm26(imm) check_imm(26, imm)
271
272	switch (code) {
273	/* dst = src */
274	case BPF_ALU | BPF_MOV | BPF_X:
275	case BPF_ALU64 | BPF_MOV | BPF_X:
276		emit(A64_MOV(is64, dst, src), ctx);
277		break;
278	/* dst = dst OP src */
279	case BPF_ALU | BPF_ADD | BPF_X:
280	case BPF_ALU64 | BPF_ADD | BPF_X:
281		emit(A64_ADD(is64, dst, dst, src), ctx);
282		break;
283	case BPF_ALU | BPF_SUB | BPF_X:
284	case BPF_ALU64 | BPF_SUB | BPF_X:
285		emit(A64_SUB(is64, dst, dst, src), ctx);
286		break;
287	case BPF_ALU | BPF_AND | BPF_X:
288	case BPF_ALU64 | BPF_AND | BPF_X:
289		emit(A64_AND(is64, dst, dst, src), ctx);
290		break;
291	case BPF_ALU | BPF_OR | BPF_X:
292	case BPF_ALU64 | BPF_OR | BPF_X:
293		emit(A64_ORR(is64, dst, dst, src), ctx);
294		break;
295	case BPF_ALU | BPF_XOR | BPF_X:
296	case BPF_ALU64 | BPF_XOR | BPF_X:
297		emit(A64_EOR(is64, dst, dst, src), ctx);
298		break;
299	case BPF_ALU | BPF_MUL | BPF_X:
300	case BPF_ALU64 | BPF_MUL | BPF_X:
301		emit(A64_MUL(is64, dst, dst, src), ctx);
302		break;
303	case BPF_ALU | BPF_DIV | BPF_X:
304	case BPF_ALU64 | BPF_DIV | BPF_X:
305	case BPF_ALU | BPF_MOD | BPF_X:
306	case BPF_ALU64 | BPF_MOD | BPF_X:
307	{
308		const u8 r0 = bpf2a64[BPF_REG_0];
309
310		/* if (src == 0) return 0 */
311		jmp_offset = 3; /* skip ahead to else path */
312		check_imm19(jmp_offset);
313		emit(A64_CBNZ(is64, src, jmp_offset), ctx);
314		emit(A64_MOVZ(1, r0, 0, 0), ctx);
315		jmp_offset = epilogue_offset(ctx);
316		check_imm26(jmp_offset);
317		emit(A64_B(jmp_offset), ctx);
318		/* else */
319		switch (BPF_OP(code)) {
320		case BPF_DIV:
321			emit(A64_UDIV(is64, dst, dst, src), ctx);
322			break;
323		case BPF_MOD:
324			ctx->tmp_used = 1;
325			emit(A64_UDIV(is64, tmp, dst, src), ctx);
326			emit(A64_MUL(is64, tmp, tmp, src), ctx);
327			emit(A64_SUB(is64, dst, dst, tmp), ctx);
328			break;
329		}
330		break;
331	}
332	case BPF_ALU | BPF_LSH | BPF_X:
333	case BPF_ALU64 | BPF_LSH | BPF_X:
334		emit(A64_LSLV(is64, dst, dst, src), ctx);
335		break;
336	case BPF_ALU | BPF_RSH | BPF_X:
337	case BPF_ALU64 | BPF_RSH | BPF_X:
338		emit(A64_LSRV(is64, dst, dst, src), ctx);
339		break;
340	case BPF_ALU | BPF_ARSH | BPF_X:
341	case BPF_ALU64 | BPF_ARSH | BPF_X:
342		emit(A64_ASRV(is64, dst, dst, src), ctx);
343		break;
344	/* dst = -dst */
345	case BPF_ALU | BPF_NEG:
346	case BPF_ALU64 | BPF_NEG:
347		emit(A64_NEG(is64, dst, dst), ctx);
348		break;
349	/* dst = BSWAP##imm(dst) */
350	case BPF_ALU | BPF_END | BPF_FROM_LE:
351	case BPF_ALU | BPF_END | BPF_FROM_BE:
352#ifdef CONFIG_CPU_BIG_ENDIAN
353		if (BPF_SRC(code) == BPF_FROM_BE)
354			goto emit_bswap_uxt;
355#else /* !CONFIG_CPU_BIG_ENDIAN */
356		if (BPF_SRC(code) == BPF_FROM_LE)
357			goto emit_bswap_uxt;
358#endif
359		switch (imm) {
360		case 16:
361			emit(A64_REV16(is64, dst, dst), ctx);
362			/* zero-extend 16 bits into 64 bits */
363			emit(A64_UXTH(is64, dst, dst), ctx);
364			break;
365		case 32:
366			emit(A64_REV32(is64, dst, dst), ctx);
367			/* upper 32 bits already cleared */
368			break;
369		case 64:
370			emit(A64_REV64(dst, dst), ctx);
371			break;
372		}
373		break;
374emit_bswap_uxt:
375		switch (imm) {
376		case 16:
377			/* zero-extend 16 bits into 64 bits */
378			emit(A64_UXTH(is64, dst, dst), ctx);
379			break;
380		case 32:
381			/* zero-extend 32 bits into 64 bits */
382			emit(A64_UXTW(is64, dst, dst), ctx);
383			break;
384		case 64:
385			/* nop */
386			break;
387		}
388		break;
389	/* dst = imm */
390	case BPF_ALU | BPF_MOV | BPF_K:
391	case BPF_ALU64 | BPF_MOV | BPF_K:
392		emit_a64_mov_i(is64, dst, imm, ctx);
393		break;
394	/* dst = dst OP imm */
395	case BPF_ALU | BPF_ADD | BPF_K:
396	case BPF_ALU64 | BPF_ADD | BPF_K:
397		ctx->tmp_used = 1;
398		emit_a64_mov_i(is64, tmp, imm, ctx);
399		emit(A64_ADD(is64, dst, dst, tmp), ctx);
400		break;
401	case BPF_ALU | BPF_SUB | BPF_K:
402	case BPF_ALU64 | BPF_SUB | BPF_K:
403		ctx->tmp_used = 1;
404		emit_a64_mov_i(is64, tmp, imm, ctx);
405		emit(A64_SUB(is64, dst, dst, tmp), ctx);
406		break;
407	case BPF_ALU | BPF_AND | BPF_K:
408	case BPF_ALU64 | BPF_AND | BPF_K:
409		ctx->tmp_used = 1;
410		emit_a64_mov_i(is64, tmp, imm, ctx);
411		emit(A64_AND(is64, dst, dst, tmp), ctx);
412		break;
413	case BPF_ALU | BPF_OR | BPF_K:
414	case BPF_ALU64 | BPF_OR | BPF_K:
415		ctx->tmp_used = 1;
416		emit_a64_mov_i(is64, tmp, imm, ctx);
417		emit(A64_ORR(is64, dst, dst, tmp), ctx);
418		break;
419	case BPF_ALU | BPF_XOR | BPF_K:
420	case BPF_ALU64 | BPF_XOR | BPF_K:
421		ctx->tmp_used = 1;
422		emit_a64_mov_i(is64, tmp, imm, ctx);
423		emit(A64_EOR(is64, dst, dst, tmp), ctx);
424		break;
425	case BPF_ALU | BPF_MUL | BPF_K:
426	case BPF_ALU64 | BPF_MUL | BPF_K:
427		ctx->tmp_used = 1;
428		emit_a64_mov_i(is64, tmp, imm, ctx);
429		emit(A64_MUL(is64, dst, dst, tmp), ctx);
430		break;
431	case BPF_ALU | BPF_DIV | BPF_K:
432	case BPF_ALU64 | BPF_DIV | BPF_K:
433		ctx->tmp_used = 1;
434		emit_a64_mov_i(is64, tmp, imm, ctx);
435		emit(A64_UDIV(is64, dst, dst, tmp), ctx);
436		break;
437	case BPF_ALU | BPF_MOD | BPF_K:
438	case BPF_ALU64 | BPF_MOD | BPF_K:
439		ctx->tmp_used = 1;
440		emit_a64_mov_i(is64, tmp2, imm, ctx);
441		emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
442		emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
443		emit(A64_SUB(is64, dst, dst, tmp), ctx);
444		break;
445	case BPF_ALU | BPF_LSH | BPF_K:
446	case BPF_ALU64 | BPF_LSH | BPF_K:
447		emit(A64_LSL(is64, dst, dst, imm), ctx);
448		break;
449	case BPF_ALU | BPF_RSH | BPF_K:
450	case BPF_ALU64 | BPF_RSH | BPF_K:
451		emit(A64_LSR(is64, dst, dst, imm), ctx);
452		break;
453	case BPF_ALU | BPF_ARSH | BPF_K:
454	case BPF_ALU64 | BPF_ARSH | BPF_K:
455		emit(A64_ASR(is64, dst, dst, imm), ctx);
456		break;
457
458	/* JUMP off */
459	case BPF_JMP | BPF_JA:
460		jmp_offset = bpf2a64_offset(i + off, i, ctx);
461		check_imm26(jmp_offset);
462		emit(A64_B(jmp_offset), ctx);
463		break;
464	/* IF (dst COND src) JUMP off */
465	case BPF_JMP | BPF_JEQ | BPF_X:
466	case BPF_JMP | BPF_JGT | BPF_X:
467	case BPF_JMP | BPF_JGE | BPF_X:
468	case BPF_JMP | BPF_JNE | BPF_X:
469	case BPF_JMP | BPF_JSGT | BPF_X:
470	case BPF_JMP | BPF_JSGE | BPF_X:
471		emit(A64_CMP(1, dst, src), ctx);
472emit_cond_jmp:
473		jmp_offset = bpf2a64_offset(i + off, i, ctx);
474		check_imm19(jmp_offset);
475		switch (BPF_OP(code)) {
476		case BPF_JEQ:
477			jmp_cond = A64_COND_EQ;
478			break;
479		case BPF_JGT:
480			jmp_cond = A64_COND_HI;
481			break;
482		case BPF_JGE:
483			jmp_cond = A64_COND_CS;
484			break;
485		case BPF_JNE:
486			jmp_cond = A64_COND_NE;
487			break;
488		case BPF_JSGT:
489			jmp_cond = A64_COND_GT;
490			break;
491		case BPF_JSGE:
492			jmp_cond = A64_COND_GE;
493			break;
494		default:
495			return -EFAULT;
496		}
497		emit(A64_B_(jmp_cond, jmp_offset), ctx);
498		break;
499	case BPF_JMP | BPF_JSET | BPF_X:
500		emit(A64_TST(1, dst, src), ctx);
501		goto emit_cond_jmp;
502	/* IF (dst COND imm) JUMP off */
503	case BPF_JMP | BPF_JEQ | BPF_K:
504	case BPF_JMP | BPF_JGT | BPF_K:
505	case BPF_JMP | BPF_JGE | BPF_K:
506	case BPF_JMP | BPF_JNE | BPF_K:
507	case BPF_JMP | BPF_JSGT | BPF_K:
508	case BPF_JMP | BPF_JSGE | BPF_K:
509		ctx->tmp_used = 1;
510		emit_a64_mov_i(1, tmp, imm, ctx);
511		emit(A64_CMP(1, dst, tmp), ctx);
512		goto emit_cond_jmp;
513	case BPF_JMP | BPF_JSET | BPF_K:
514		ctx->tmp_used = 1;
515		emit_a64_mov_i(1, tmp, imm, ctx);
516		emit(A64_TST(1, dst, tmp), ctx);
517		goto emit_cond_jmp;
518	/* function call */
519	case BPF_JMP | BPF_CALL:
520	{
521		const u8 r0 = bpf2a64[BPF_REG_0];
522		const u64 func = (u64)__bpf_call_base + imm;
523
524		ctx->tmp_used = 1;
525		emit_a64_mov_i64(tmp, func, ctx);
526		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
527		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
528		emit(A64_BLR(tmp), ctx);
529		emit(A64_MOV(1, r0, A64_R(0)), ctx);
530		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
531		break;
532	}
533	/* function return */
534	case BPF_JMP | BPF_EXIT:
535		/* Optimization: when last instruction is EXIT,
536		   simply fallthrough to epilogue. */
537		if (i == ctx->prog->len - 1)
538			break;
539		jmp_offset = epilogue_offset(ctx);
540		check_imm26(jmp_offset);
541		emit(A64_B(jmp_offset), ctx);
542		break;
543
544	/* dst = imm64 */
545	case BPF_LD | BPF_IMM | BPF_DW:
546	{
547		const struct bpf_insn insn1 = insn[1];
548		u64 imm64;
549
550		if (insn1.code != 0 || insn1.src_reg != 0 ||
551		    insn1.dst_reg != 0 || insn1.off != 0) {
552			/* Note: verifier in BPF core must catch invalid
553			 * instructions.
554			 */
555			pr_err_once("Invalid BPF_LD_IMM64 instruction\n");
556			return -EINVAL;
557		}
558
559		imm64 = (u64)insn1.imm << 32 | (u32)imm;
560		emit_a64_mov_i64(dst, imm64, ctx);
561
562		return 1;
563	}
564
565	/* LDX: dst = *(size *)(src + off) */
566	case BPF_LDX | BPF_MEM | BPF_W:
567	case BPF_LDX | BPF_MEM | BPF_H:
568	case BPF_LDX | BPF_MEM | BPF_B:
569	case BPF_LDX | BPF_MEM | BPF_DW:
570		ctx->tmp_used = 1;
571		emit_a64_mov_i(1, tmp, off, ctx);
572		switch (BPF_SIZE(code)) {
573		case BPF_W:
574			emit(A64_LDR32(dst, src, tmp), ctx);
575			break;
576		case BPF_H:
577			emit(A64_LDRH(dst, src, tmp), ctx);
578			break;
579		case BPF_B:
580			emit(A64_LDRB(dst, src, tmp), ctx);
581			break;
582		case BPF_DW:
583			emit(A64_LDR64(dst, src, tmp), ctx);
584			break;
585		}
586		break;
587
588	/* ST: *(size *)(dst + off) = imm */
589	case BPF_ST | BPF_MEM | BPF_W:
590	case BPF_ST | BPF_MEM | BPF_H:
591	case BPF_ST | BPF_MEM | BPF_B:
592	case BPF_ST | BPF_MEM | BPF_DW:
593		/* Load imm to a register then store it */
594		ctx->tmp_used = 1;
595		emit_a64_mov_i(1, tmp2, off, ctx);
596		emit_a64_mov_i(1, tmp, imm, ctx);
597		switch (BPF_SIZE(code)) {
598		case BPF_W:
599			emit(A64_STR32(tmp, dst, tmp2), ctx);
600			break;
601		case BPF_H:
602			emit(A64_STRH(tmp, dst, tmp2), ctx);
603			break;
604		case BPF_B:
605			emit(A64_STRB(tmp, dst, tmp2), ctx);
606			break;
607		case BPF_DW:
608			emit(A64_STR64(tmp, dst, tmp2), ctx);
609			break;
610		}
611		break;
612
613	/* STX: *(size *)(dst + off) = src */
614	case BPF_STX | BPF_MEM | BPF_W:
615	case BPF_STX | BPF_MEM | BPF_H:
616	case BPF_STX | BPF_MEM | BPF_B:
617	case BPF_STX | BPF_MEM | BPF_DW:
618		ctx->tmp_used = 1;
619		emit_a64_mov_i(1, tmp, off, ctx);
620		switch (BPF_SIZE(code)) {
621		case BPF_W:
622			emit(A64_STR32(src, dst, tmp), ctx);
623			break;
624		case BPF_H:
625			emit(A64_STRH(src, dst, tmp), ctx);
626			break;
627		case BPF_B:
628			emit(A64_STRB(src, dst, tmp), ctx);
629			break;
630		case BPF_DW:
631			emit(A64_STR64(src, dst, tmp), ctx);
632			break;
633		}
634		break;
635	/* STX XADD: lock *(u32 *)(dst + off) += src */
636	case BPF_STX | BPF_XADD | BPF_W:
637	/* STX XADD: lock *(u64 *)(dst + off) += src */
638	case BPF_STX | BPF_XADD | BPF_DW:
639		goto notyet;
640
641	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
642	case BPF_LD | BPF_ABS | BPF_W:
643	case BPF_LD | BPF_ABS | BPF_H:
644	case BPF_LD | BPF_ABS | BPF_B:
645	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
646	case BPF_LD | BPF_IND | BPF_W:
647	case BPF_LD | BPF_IND | BPF_H:
648	case BPF_LD | BPF_IND | BPF_B:
649	{
650		const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
651		const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
652		const u8 fp = bpf2a64[BPF_REG_FP];
653		const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
654		const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
655		const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
656		const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
657		const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
658		int size;
659
660		emit(A64_MOV(1, r1, r6), ctx);
661		emit_a64_mov_i(0, r2, imm, ctx);
662		if (BPF_MODE(code) == BPF_IND)
663			emit(A64_ADD(0, r2, r2, src), ctx);
664		switch (BPF_SIZE(code)) {
665		case BPF_W:
666			size = 4;
667			break;
668		case BPF_H:
669			size = 2;
670			break;
671		case BPF_B:
672			size = 1;
673			break;
674		default:
675			return -EINVAL;
676		}
677		emit_a64_mov_i64(r3, size, ctx);
678		emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
679		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
680		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
681		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
682		emit(A64_BLR(r5), ctx);
683		emit(A64_MOV(1, r0, A64_R(0)), ctx);
684		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
685
686		jmp_offset = epilogue_offset(ctx);
687		check_imm19(jmp_offset);
688		emit(A64_CBZ(1, r0, jmp_offset), ctx);
689		emit(A64_MOV(1, r5, r0), ctx);
690		switch (BPF_SIZE(code)) {
691		case BPF_W:
692			emit(A64_LDR32(r0, r5, A64_ZR), ctx);
693#ifndef CONFIG_CPU_BIG_ENDIAN
694			emit(A64_REV32(0, r0, r0), ctx);
695#endif
696			break;
697		case BPF_H:
698			emit(A64_LDRH(r0, r5, A64_ZR), ctx);
699#ifndef CONFIG_CPU_BIG_ENDIAN
700			emit(A64_REV16(0, r0, r0), ctx);
701#endif
702			break;
703		case BPF_B:
704			emit(A64_LDRB(r0, r5, A64_ZR), ctx);
705			break;
706		}
707		break;
708	}
709notyet:
710		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
711		return -EFAULT;
712
713	default:
714		pr_err_once("unknown opcode %02x\n", code);
715		return -EINVAL;
716	}
717
718	return 0;
719}
720
721static int build_body(struct jit_ctx *ctx)
722{
723	const struct bpf_prog *prog = ctx->prog;
724	int i;
725
726	for (i = 0; i < prog->len; i++) {
727		const struct bpf_insn *insn = &prog->insnsi[i];
728		int ret;
729
730		ret = build_insn(insn, ctx);
731
732		if (ctx->image == NULL)
733			ctx->offset[i] = ctx->idx;
734
735		if (ret > 0) {
736			i++;
737			continue;
738		}
739		if (ret)
740			return ret;
741	}
742
743	return 0;
744}
745
746static inline void bpf_flush_icache(void *start, void *end)
747{
748	flush_icache_range((unsigned long)start, (unsigned long)end);
749}
750
751void bpf_jit_compile(struct bpf_prog *prog)
752{
753	/* Nothing to do here. We support Internal BPF. */
754}
755
756void bpf_int_jit_compile(struct bpf_prog *prog)
757{
758	struct bpf_binary_header *header;
759	struct jit_ctx ctx;
760	int image_size;
761	u8 *image_ptr;
762
763	if (!bpf_jit_enable)
764		return;
765
766	if (!prog || !prog->len)
767		return;
768
769	memset(&ctx, 0, sizeof(ctx));
770	ctx.prog = prog;
771
772	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
773	if (ctx.offset == NULL)
774		return;
775
776	/* 1. Initial fake pass to compute ctx->idx. */
777
778	/* Fake pass to fill in ctx->offset and ctx->tmp_used. */
779	if (build_body(&ctx))
780		goto out;
781
782	build_prologue(&ctx);
783
784	ctx.epilogue_offset = ctx.idx;
785	build_epilogue(&ctx);
786
787	/* Now we know the actual image size. */
788	image_size = sizeof(u32) * ctx.idx;
789	header = bpf_jit_binary_alloc(image_size, &image_ptr,
790				      sizeof(u32), jit_fill_hole);
791	if (header == NULL)
792		goto out;
793
794	/* 2. Now, the actual pass. */
795
796	ctx.image = (u32 *)image_ptr;
797	ctx.idx = 0;
798
799	build_prologue(&ctx);
800
801	if (build_body(&ctx)) {
802		bpf_jit_binary_free(header);
803		goto out;
804	}
805
806	build_epilogue(&ctx);
807
808	/* And we're done. */
809	if (bpf_jit_enable > 1)
810		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
811
812	bpf_flush_icache(header, ctx.image + ctx.idx);
813
814	set_memory_ro((unsigned long)header, header->pages);
815	prog->bpf_func = (void *)ctx.image;
816	prog->jited = 1;
817out:
818	kfree(ctx.offset);
819}
820
821void bpf_jit_free(struct bpf_prog *prog)
822{
823	unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
824	struct bpf_binary_header *header = (void *)addr;
825
826	if (!prog->jited)
827		goto free_filter;
828
829	set_memory_rw(addr, header->pages);
830	bpf_jit_binary_free(header);
831
832free_filter:
833	bpf_prog_unlock_free(prog);
834}
835