blob: 687cd352648aaa7b1e1604676b3508d0b7c4df48 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * bpf_jit_comp64.c: eBPF JIT compiler
4 *
5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
6 * IBM Corporation
7 *
8 * Based on the powerpc classic BPF JIT compiler by Matt Evans
9 */
10#include <linux/moduleloader.h>
11#include <asm/cacheflush.h>
12#include <asm/asm-compat.h>
13#include <linux/netdevice.h>
14#include <linux/filter.h>
15#include <linux/if_vlan.h>
16#include <asm/kprobes.h>
17#include <linux/bpf.h>
18#include <asm/security_features.h>
19
20#include "bpf_jit64.h"
21
22static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
23{
24 memset32(area, BREAKPOINT_INSTRUCTION, size/4);
25}
26
27static inline void bpf_flush_icache(void *start, void *end)
28{
29 smp_wmb();
30 flush_icache_range((unsigned long)start, (unsigned long)end);
31}
32
33static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
34{
35 return (ctx->seen & (1 << (31 - b2p[i])));
36}
37
38static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
39{
40 ctx->seen |= (1 << (31 - b2p[i]));
41}
42
43static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
44{
45 /*
46 * We only need a stack frame if:
47 * - we call other functions (kernel helpers), or
48 * - the bpf program uses its stack area
49 * The latter condition is deduced from the usage of BPF_REG_FP
50 */
51 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
52}
53
54/*
55 * When not setting up our own stackframe, the redzone usage is:
56 *
57 * [ prev sp ] <-------------
58 * [ ... ] |
59 * sp (r1) ---> [ stack pointer ] --------------
60 * [ nv gpr save area ] 5*8
61 * [ tail_call_cnt ] 8
62 * [ local_tmp_var ] 16
63 * [ unused red zone ] 208 bytes protected
64 */
65static int bpf_jit_stack_local(struct codegen_context *ctx)
66{
67 if (bpf_has_stack_frame(ctx))
68 return STACK_FRAME_MIN_SIZE + ctx->stack_size;
69 else
70 return -(BPF_PPC_STACK_SAVE + 24);
71}
72
73static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
74{
75 return bpf_jit_stack_local(ctx) + 16;
76}
77
78static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
79{
80 if (reg >= BPF_PPC_NVR_MIN && reg < 32)
81 return (bpf_has_stack_frame(ctx) ?
82 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
83 - (8 * (32 - reg));
84
85 pr_err("BPF JIT is asking about unknown registers");
86 BUG();
87}
88
89static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
90{
91 int i;
92
93 /*
94 * Initialize tail_call_cnt if we do tail calls.
95 * Otherwise, put in NOPs so that it can be skipped when we are
96 * invoked through a tail call.
97 */
98 if (ctx->seen & SEEN_TAILCALL) {
99 PPC_LI(b2p[TMP_REG_1], 0);
100 /* this goes in the redzone */
101 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
102 } else {
103 PPC_NOP();
104 PPC_NOP();
105 }
106
107#define BPF_TAILCALL_PROLOGUE_SIZE 8
108
109 if (bpf_has_stack_frame(ctx)) {
110 /*
111 * We need a stack frame, but we don't necessarily need to
112 * save/restore LR unless we call other functions
113 */
114 if (ctx->seen & SEEN_FUNC) {
115 EMIT(PPC_INST_MFLR | __PPC_RT(R0));
116 PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
117 }
118
119 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
120 }
121
122 /*
123 * Back up non-volatile regs -- BPF registers 6-10
124 * If we haven't created our own stack frame, we save these
125 * in the protected zone below the previous stack frame
126 */
127 for (i = BPF_REG_6; i <= BPF_REG_10; i++)
128 if (bpf_is_seen_register(ctx, i))
129 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
130
131 /* Setup frame pointer to point to the bpf stack area */
132 if (bpf_is_seen_register(ctx, BPF_REG_FP))
133 PPC_ADDI(b2p[BPF_REG_FP], 1,
134 STACK_FRAME_MIN_SIZE + ctx->stack_size);
135}
136
137static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
138{
139 int i;
140
141 /* Restore NVRs */
142 for (i = BPF_REG_6; i <= BPF_REG_10; i++)
143 if (bpf_is_seen_register(ctx, i))
144 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
145
146 /* Tear down our stack frame */
147 if (bpf_has_stack_frame(ctx)) {
148 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
149 if (ctx->seen & SEEN_FUNC) {
150 PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
151 PPC_MTLR(0);
152 }
153 }
154}
155
156static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
157{
158 bpf_jit_emit_common_epilogue(image, ctx);
159
160 /* Move result to r3 */
161 PPC_MR(3, b2p[BPF_REG_0]);
162
163 PPC_BLR();
164}
165
166static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
167 u64 func)
168{
169#ifdef PPC64_ELF_ABI_v1
170 /* func points to the function descriptor */
171 PPC_LI64(b2p[TMP_REG_2], func);
172 /* Load actual entry point from function descriptor */
173 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
174 /* ... and move it to LR */
175 PPC_MTLR(b2p[TMP_REG_1]);
176 /*
177 * Load TOC from function descriptor at offset 8.
178 * We can clobber r2 since we get called through a
179 * function pointer (so caller will save/restore r2)
180 * and since we don't use a TOC ourself.
181 */
182 PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
183#else
184 /* We can clobber r12 */
185 PPC_FUNC_ADDR(12, func);
186 PPC_MTLR(12);
187#endif
188 PPC_BLRL();
189}
190
191static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
192 u64 func)
193{
194 unsigned int i, ctx_idx = ctx->idx;
195
196 /* Load function address into r12 */
197 PPC_LI64(12, func);
198
199 /* For bpf-to-bpf function calls, the callee's address is unknown
200 * until the last extra pass. As seen above, we use PPC_LI64() to
201 * load the callee's address, but this may optimize the number of
202 * instructions required based on the nature of the address.
203 *
204 * Since we don't want the number of instructions emitted to change,
205 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
206 * we always have a five-instruction sequence, which is the maximum
207 * that PPC_LI64() can emit.
208 */
209 for (i = ctx->idx - ctx_idx; i < 5; i++)
210 PPC_NOP();
211
212#ifdef PPC64_ELF_ABI_v1
213 /*
214 * Load TOC from function descriptor at offset 8.
215 * We can clobber r2 since we get called through a
216 * function pointer (so caller will save/restore r2)
217 * and since we don't use a TOC ourself.
218 */
219 PPC_BPF_LL(2, 12, 8);
220 /* Load actual entry point from function descriptor */
221 PPC_BPF_LL(12, 12, 0);
222#endif
223
224 PPC_MTLR(12);
225 PPC_BLRL();
226}
227
228static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
229{
230 /*
231 * By now, the eBPF program has already setup parameters in r3, r4 and r5
232 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
233 * r4/BPF_REG_2 - pointer to bpf_array
234 * r5/BPF_REG_3 - index in bpf_array
235 */
236 int b2p_bpf_array = b2p[BPF_REG_2];
237 int b2p_index = b2p[BPF_REG_3];
238
239 /*
240 * if (index >= array->map.max_entries)
241 * goto out;
242 */
243 PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
244 PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
245 PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
246 PPC_BCC(COND_GE, out);
247
248 /*
249 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
250 * goto out;
251 */
252 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
253 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
254 PPC_BCC(COND_GT, out);
255
256 /*
257 * tail_call_cnt++;
258 */
259 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
260 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
261
262 /* prog = array->ptrs[index]; */
263 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
264 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
265 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
266
267 /*
268 * if (prog == NULL)
269 * goto out;
270 */
271 PPC_CMPLDI(b2p[TMP_REG_1], 0);
272 PPC_BCC(COND_EQ, out);
273
274 /* goto *(prog->bpf_func + prologue_size); */
275 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
276#ifdef PPC64_ELF_ABI_v1
277 /* skip past the function descriptor */
278 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
279 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
280#else
281 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
282#endif
283 PPC_MTCTR(b2p[TMP_REG_1]);
284
285 /* tear down stack, restore NVRs, ... */
286 bpf_jit_emit_common_epilogue(image, ctx);
287
288 PPC_BCTR();
289
290 /* out: */
291 return 0;
292}
293
294/*
295 * We spill into the redzone always, even if the bpf program has its own stackframe.
296 * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
297 */
298void bpf_stf_barrier(void);
299
300asm (
301" .global bpf_stf_barrier ;"
302" bpf_stf_barrier: ;"
303" std 21,-64(1) ;"
304" std 22,-56(1) ;"
305" sync ;"
306" ld 21,-64(1) ;"
307" ld 22,-56(1) ;"
308" ori 31,31,0 ;"
309" .rept 14 ;"
310" b 1f ;"
311" 1: ;"
312" .endr ;"
313" blr ;"
314);
315
316/* Assemble the body code between the prologue & epilogue */
317static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
318 struct codegen_context *ctx,
319 u32 *addrs, bool extra_pass)
320{
321 enum stf_barrier_type stf_barrier = stf_barrier_type_get();
322 const struct bpf_insn *insn = fp->insnsi;
323 int flen = fp->len;
324 int i, ret;
325
326 /* Start of epilogue code - will only be valid 2nd pass onwards */
327 u32 exit_addr = addrs[flen];
328
329 for (i = 0; i < flen; i++) {
330 u32 code = insn[i].code;
331 u32 dst_reg = b2p[insn[i].dst_reg];
332 u32 src_reg = b2p[insn[i].src_reg];
333 s16 off = insn[i].off;
334 s32 imm = insn[i].imm;
335 bool func_addr_fixed;
336 u64 func_addr;
337 u64 imm64;
338 u32 true_cond;
339 u32 tmp_idx;
340
341 /*
342 * addrs[] maps a BPF bytecode address into a real offset from
343 * the start of the body code.
344 */
345 addrs[i] = ctx->idx * 4;
346
347 /*
348 * As an optimization, we note down which non-volatile registers
349 * are used so that we can only save/restore those in our
350 * prologue and epilogue. We do this here regardless of whether
351 * the actual BPF instruction uses src/dst registers or not
352 * (for instance, BPF_CALL does not use them). The expectation
353 * is that those instructions will have src_reg/dst_reg set to
354 * 0. Even otherwise, we just lose some prologue/epilogue
355 * optimization but everything else should work without
356 * any issues.
357 */
358 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
359 bpf_set_seen_register(ctx, insn[i].dst_reg);
360 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
361 bpf_set_seen_register(ctx, insn[i].src_reg);
362
363 switch (code) {
364 /*
365 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
366 */
367 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
368 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
369 PPC_ADD(dst_reg, dst_reg, src_reg);
370 goto bpf_alu32_trunc;
371 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
372 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
373 PPC_SUB(dst_reg, dst_reg, src_reg);
374 goto bpf_alu32_trunc;
375 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
376 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
377 if (!imm) {
378 goto bpf_alu32_trunc;
379 } else if (imm >= -32768 && imm < 32768) {
380 PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
381 } else {
382 PPC_LI32(b2p[TMP_REG_1], imm);
383 PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
384 }
385 goto bpf_alu32_trunc;
386 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
387 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
388 if (!imm) {
389 goto bpf_alu32_trunc;
390 } else if (imm > -32768 && imm <= 32768) {
391 PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm));
392 } else {
393 PPC_LI32(b2p[TMP_REG_1], imm);
394 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
395 }
396 goto bpf_alu32_trunc;
397 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
398 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
399 if (BPF_CLASS(code) == BPF_ALU)
400 PPC_MULW(dst_reg, dst_reg, src_reg);
401 else
402 PPC_MULD(dst_reg, dst_reg, src_reg);
403 goto bpf_alu32_trunc;
404 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
405 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
406 if (imm >= -32768 && imm < 32768)
407 PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
408 else {
409 PPC_LI32(b2p[TMP_REG_1], imm);
410 if (BPF_CLASS(code) == BPF_ALU)
411 PPC_MULW(dst_reg, dst_reg,
412 b2p[TMP_REG_1]);
413 else
414 PPC_MULD(dst_reg, dst_reg,
415 b2p[TMP_REG_1]);
416 }
417 goto bpf_alu32_trunc;
418 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
419 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
420 if (BPF_OP(code) == BPF_MOD) {
421 PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
422 PPC_MULW(b2p[TMP_REG_1], src_reg,
423 b2p[TMP_REG_1]);
424 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
425 } else
426 PPC_DIVWU(dst_reg, dst_reg, src_reg);
427 goto bpf_alu32_trunc;
428 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
429 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
430 if (BPF_OP(code) == BPF_MOD) {
431 PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg);
432 PPC_MULD(b2p[TMP_REG_1], src_reg,
433 b2p[TMP_REG_1]);
434 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
435 } else
436 PPC_DIVDU(dst_reg, dst_reg, src_reg);
437 break;
438 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
439 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
440 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
441 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
442 if (imm == 0)
443 return -EINVAL;
444 if (imm == 1) {
445 if (BPF_OP(code) == BPF_DIV) {
446 goto bpf_alu32_trunc;
447 } else {
448 PPC_LI(dst_reg, 0);
449 break;
450 }
451 }
452
453 PPC_LI32(b2p[TMP_REG_1], imm);
454 switch (BPF_CLASS(code)) {
455 case BPF_ALU:
456 if (BPF_OP(code) == BPF_MOD) {
457 PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
458 b2p[TMP_REG_1]);
459 PPC_MULW(b2p[TMP_REG_1],
460 b2p[TMP_REG_1],
461 b2p[TMP_REG_2]);
462 PPC_SUB(dst_reg, dst_reg,
463 b2p[TMP_REG_1]);
464 } else
465 PPC_DIVWU(dst_reg, dst_reg,
466 b2p[TMP_REG_1]);
467 break;
468 case BPF_ALU64:
469 if (BPF_OP(code) == BPF_MOD) {
470 PPC_DIVDU(b2p[TMP_REG_2], dst_reg,
471 b2p[TMP_REG_1]);
472 PPC_MULD(b2p[TMP_REG_1],
473 b2p[TMP_REG_1],
474 b2p[TMP_REG_2]);
475 PPC_SUB(dst_reg, dst_reg,
476 b2p[TMP_REG_1]);
477 } else
478 PPC_DIVDU(dst_reg, dst_reg,
479 b2p[TMP_REG_1]);
480 break;
481 }
482 goto bpf_alu32_trunc;
483 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
484 case BPF_ALU64 | BPF_NEG: /* dst = -dst */
485 PPC_NEG(dst_reg, dst_reg);
486 goto bpf_alu32_trunc;
487
488 /*
489 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
490 */
491 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
492 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
493 PPC_AND(dst_reg, dst_reg, src_reg);
494 goto bpf_alu32_trunc;
495 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
496 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
497 if (!IMM_H(imm))
498 PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
499 else {
500 /* Sign-extended */
501 PPC_LI32(b2p[TMP_REG_1], imm);
502 PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
503 }
504 goto bpf_alu32_trunc;
505 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
506 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
507 PPC_OR(dst_reg, dst_reg, src_reg);
508 goto bpf_alu32_trunc;
509 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
510 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
511 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
512 /* Sign-extended */
513 PPC_LI32(b2p[TMP_REG_1], imm);
514 PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
515 } else {
516 if (IMM_L(imm))
517 PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
518 if (IMM_H(imm))
519 PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
520 }
521 goto bpf_alu32_trunc;
522 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
523 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
524 PPC_XOR(dst_reg, dst_reg, src_reg);
525 goto bpf_alu32_trunc;
526 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
527 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
528 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
529 /* Sign-extended */
530 PPC_LI32(b2p[TMP_REG_1], imm);
531 PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
532 } else {
533 if (IMM_L(imm))
534 PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
535 if (IMM_H(imm))
536 PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
537 }
538 goto bpf_alu32_trunc;
539 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
540 /* slw clears top 32 bits */
541 PPC_SLW(dst_reg, dst_reg, src_reg);
542 /* skip zero extension move, but set address map. */
543 if (insn_is_zext(&insn[i + 1]))
544 addrs[++i] = ctx->idx * 4;
545 break;
546 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
547 PPC_SLD(dst_reg, dst_reg, src_reg);
548 break;
549 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
550 /* with imm 0, we still need to clear top 32 bits */
551 PPC_SLWI(dst_reg, dst_reg, imm);
552 if (insn_is_zext(&insn[i + 1]))
553 addrs[++i] = ctx->idx * 4;
554 break;
555 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
556 if (imm != 0)
557 PPC_SLDI(dst_reg, dst_reg, imm);
558 break;
559 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
560 PPC_SRW(dst_reg, dst_reg, src_reg);
561 if (insn_is_zext(&insn[i + 1]))
562 addrs[++i] = ctx->idx * 4;
563 break;
564 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
565 PPC_SRD(dst_reg, dst_reg, src_reg);
566 break;
567 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
568 PPC_SRWI(dst_reg, dst_reg, imm);
569 if (insn_is_zext(&insn[i + 1]))
570 addrs[++i] = ctx->idx * 4;
571 break;
572 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
573 if (imm != 0)
574 PPC_SRDI(dst_reg, dst_reg, imm);
575 break;
576 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
577 PPC_SRAW(dst_reg, dst_reg, src_reg);
578 goto bpf_alu32_trunc;
579 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
580 PPC_SRAD(dst_reg, dst_reg, src_reg);
581 break;
582 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
583 PPC_SRAWI(dst_reg, dst_reg, imm);
584 goto bpf_alu32_trunc;
585 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
586 if (imm != 0)
587 PPC_SRADI(dst_reg, dst_reg, imm);
588 break;
589
590 /*
591 * MOV
592 */
593 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
594 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
595 if (imm == 1) {
596 /* special mov32 for zext */
597 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
598 break;
599 }
600 PPC_MR(dst_reg, src_reg);
601 goto bpf_alu32_trunc;
602 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
603 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
604 PPC_LI32(dst_reg, imm);
605 if (imm < 0)
606 goto bpf_alu32_trunc;
607 else if (insn_is_zext(&insn[i + 1]))
608 addrs[++i] = ctx->idx * 4;
609 break;
610
611bpf_alu32_trunc:
612 /* Truncate to 32-bits */
613 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
614 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
615 break;
616
617 /*
618 * BPF_FROM_BE/LE
619 */
620 case BPF_ALU | BPF_END | BPF_FROM_LE:
621 case BPF_ALU | BPF_END | BPF_FROM_BE:
622#ifdef __BIG_ENDIAN__
623 if (BPF_SRC(code) == BPF_FROM_BE)
624 goto emit_clear;
625#else /* !__BIG_ENDIAN__ */
626 if (BPF_SRC(code) == BPF_FROM_LE)
627 goto emit_clear;
628#endif
629 switch (imm) {
630 case 16:
631 /* Rotate 8 bits left & mask with 0x0000ff00 */
632 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
633 /* Rotate 8 bits right & insert LSB to reg */
634 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
635 /* Move result back to dst_reg */
636 PPC_MR(dst_reg, b2p[TMP_REG_1]);
637 break;
638 case 32:
639 /*
640 * Rotate word left by 8 bits:
641 * 2 bytes are already in their final position
642 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
643 */
644 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
645 /* Rotate 24 bits and insert byte 1 */
646 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
647 /* Rotate 24 bits and insert byte 3 */
648 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
649 PPC_MR(dst_reg, b2p[TMP_REG_1]);
650 break;
651 case 64:
652 /*
653 * Way easier and faster(?) to store the value
654 * into stack and then use ldbrx
655 *
656 * ctx->seen will be reliable in pass2, but
657 * the instructions generated will remain the
658 * same across all passes
659 */
660 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
661 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
662 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
663 break;
664 }
665 break;
666
667emit_clear:
668 switch (imm) {
669 case 16:
670 /* zero-extend 16 bits into 64 bits */
671 PPC_RLDICL(dst_reg, dst_reg, 0, 48);
672 if (insn_is_zext(&insn[i + 1]))
673 addrs[++i] = ctx->idx * 4;
674 break;
675 case 32:
676 if (!fp->aux->verifier_zext)
677 /* zero-extend 32 bits into 64 bits */
678 PPC_RLDICL(dst_reg, dst_reg, 0, 32);
679 break;
680 case 64:
681 /* nop */
682 break;
683 }
684 break;
685
686 /*
687 * BPF_ST NOSPEC (speculation barrier)
688 */
689 case BPF_ST | BPF_NOSPEC:
690 if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) ||
691 (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) &&
692 (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) || !cpu_has_feature(CPU_FTR_HVMODE))))
693 break;
694
695 switch (stf_barrier) {
696 case STF_BARRIER_EIEIO:
697 EMIT(0x7c0006ac | 0x02000000);
698 break;
699 case STF_BARRIER_SYNC_ORI:
700 EMIT(PPC_INST_SYNC);
701 PPC_LD(b2p[TMP_REG_1], 13, 0);
702 PPC_ORI(31, 31, 0);
703 break;
704 case STF_BARRIER_FALLBACK:
705 EMIT(PPC_INST_MFLR | ___PPC_RT(b2p[TMP_REG_1]));
706 PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier));
707 PPC_MTCTR(12);
708 EMIT(PPC_INST_BCTR | 0x1);
709 PPC_MTLR(b2p[TMP_REG_1]);
710 break;
711 case STF_BARRIER_NONE:
712 break;
713 }
714 break;
715
716 /*
717 * BPF_ST(X)
718 */
719 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
720 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
721 if (BPF_CLASS(code) == BPF_ST) {
722 PPC_LI(b2p[TMP_REG_1], imm);
723 src_reg = b2p[TMP_REG_1];
724 }
725 PPC_STB(src_reg, dst_reg, off);
726 break;
727 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
728 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
729 if (BPF_CLASS(code) == BPF_ST) {
730 PPC_LI(b2p[TMP_REG_1], imm);
731 src_reg = b2p[TMP_REG_1];
732 }
733 PPC_STH(src_reg, dst_reg, off);
734 break;
735 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
736 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
737 if (BPF_CLASS(code) == BPF_ST) {
738 PPC_LI32(b2p[TMP_REG_1], imm);
739 src_reg = b2p[TMP_REG_1];
740 }
741 PPC_STW(src_reg, dst_reg, off);
742 break;
743 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
744 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
745 if (BPF_CLASS(code) == BPF_ST) {
746 PPC_LI32(b2p[TMP_REG_1], imm);
747 src_reg = b2p[TMP_REG_1];
748 }
749 PPC_BPF_STL(src_reg, dst_reg, off);
750 break;
751
752 /*
753 * BPF_STX XADD (atomic_add)
754 */
755 /* *(u32 *)(dst + off) += src */
756 case BPF_STX | BPF_XADD | BPF_W:
757 /* Get EA into TMP_REG_1 */
758 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
759 tmp_idx = ctx->idx * 4;
760 /* load value from memory into TMP_REG_2 */
761 PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
762 /* add value from src_reg into this */
763 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
764 /* store result back */
765 PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
766 /* we're done if this succeeded */
767 PPC_BCC_SHORT(COND_NE, tmp_idx);
768 break;
769 /* *(u64 *)(dst + off) += src */
770 case BPF_STX | BPF_XADD | BPF_DW:
771 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
772 tmp_idx = ctx->idx * 4;
773 PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
774 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
775 PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
776 PPC_BCC_SHORT(COND_NE, tmp_idx);
777 break;
778
779 /*
780 * BPF_LDX
781 */
782 /* dst = *(u8 *)(ul) (src + off) */
783 case BPF_LDX | BPF_MEM | BPF_B:
784 PPC_LBZ(dst_reg, src_reg, off);
785 if (insn_is_zext(&insn[i + 1]))
786 addrs[++i] = ctx->idx * 4;
787 break;
788 /* dst = *(u16 *)(ul) (src + off) */
789 case BPF_LDX | BPF_MEM | BPF_H:
790 PPC_LHZ(dst_reg, src_reg, off);
791 if (insn_is_zext(&insn[i + 1]))
792 addrs[++i] = ctx->idx * 4;
793 break;
794 /* dst = *(u32 *)(ul) (src + off) */
795 case BPF_LDX | BPF_MEM | BPF_W:
796 PPC_LWZ(dst_reg, src_reg, off);
797 if (insn_is_zext(&insn[i + 1]))
798 addrs[++i] = ctx->idx * 4;
799 break;
800 /* dst = *(u64 *)(ul) (src + off) */
801 case BPF_LDX | BPF_MEM | BPF_DW:
802 PPC_BPF_LL(dst_reg, src_reg, off);
803 break;
804
805 /*
806 * Doubleword load
807 * 16 byte instruction that uses two 'struct bpf_insn'
808 */
809 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
810 imm64 = ((u64)(u32) insn[i].imm) |
811 (((u64)(u32) insn[i+1].imm) << 32);
812 /* Adjust for two bpf instructions */
813 addrs[++i] = ctx->idx * 4;
814 PPC_LI64(dst_reg, imm64);
815 break;
816
817 /*
818 * Return/Exit
819 */
820 case BPF_JMP | BPF_EXIT:
821 /*
822 * If this isn't the very last instruction, branch to
823 * the epilogue. If we _are_ the last instruction,
824 * we'll just fall through to the epilogue.
825 */
826 if (i != flen - 1)
827 PPC_JMP(exit_addr);
828 /* else fall through to the epilogue */
829 break;
830
831 /*
832 * Call kernel helper or bpf function
833 */
834 case BPF_JMP | BPF_CALL:
835 ctx->seen |= SEEN_FUNC;
836
837 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
838 &func_addr, &func_addr_fixed);
839 if (ret < 0)
840 return ret;
841
842 if (func_addr_fixed)
843 bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
844 else
845 bpf_jit_emit_func_call_rel(image, ctx, func_addr);
846 /* move return value from r3 to BPF_REG_0 */
847 PPC_MR(b2p[BPF_REG_0], 3);
848 break;
849
850 /*
851 * Jumps and branches
852 */
853 case BPF_JMP | BPF_JA:
854 PPC_JMP(addrs[i + 1 + off]);
855 break;
856
857 case BPF_JMP | BPF_JGT | BPF_K:
858 case BPF_JMP | BPF_JGT | BPF_X:
859 case BPF_JMP | BPF_JSGT | BPF_K:
860 case BPF_JMP | BPF_JSGT | BPF_X:
861 case BPF_JMP32 | BPF_JGT | BPF_K:
862 case BPF_JMP32 | BPF_JGT | BPF_X:
863 case BPF_JMP32 | BPF_JSGT | BPF_K:
864 case BPF_JMP32 | BPF_JSGT | BPF_X:
865 true_cond = COND_GT;
866 goto cond_branch;
867 case BPF_JMP | BPF_JLT | BPF_K:
868 case BPF_JMP | BPF_JLT | BPF_X:
869 case BPF_JMP | BPF_JSLT | BPF_K:
870 case BPF_JMP | BPF_JSLT | BPF_X:
871 case BPF_JMP32 | BPF_JLT | BPF_K:
872 case BPF_JMP32 | BPF_JLT | BPF_X:
873 case BPF_JMP32 | BPF_JSLT | BPF_K:
874 case BPF_JMP32 | BPF_JSLT | BPF_X:
875 true_cond = COND_LT;
876 goto cond_branch;
877 case BPF_JMP | BPF_JGE | BPF_K:
878 case BPF_JMP | BPF_JGE | BPF_X:
879 case BPF_JMP | BPF_JSGE | BPF_K:
880 case BPF_JMP | BPF_JSGE | BPF_X:
881 case BPF_JMP32 | BPF_JGE | BPF_K:
882 case BPF_JMP32 | BPF_JGE | BPF_X:
883 case BPF_JMP32 | BPF_JSGE | BPF_K:
884 case BPF_JMP32 | BPF_JSGE | BPF_X:
885 true_cond = COND_GE;
886 goto cond_branch;
887 case BPF_JMP | BPF_JLE | BPF_K:
888 case BPF_JMP | BPF_JLE | BPF_X:
889 case BPF_JMP | BPF_JSLE | BPF_K:
890 case BPF_JMP | BPF_JSLE | BPF_X:
891 case BPF_JMP32 | BPF_JLE | BPF_K:
892 case BPF_JMP32 | BPF_JLE | BPF_X:
893 case BPF_JMP32 | BPF_JSLE | BPF_K:
894 case BPF_JMP32 | BPF_JSLE | BPF_X:
895 true_cond = COND_LE;
896 goto cond_branch;
897 case BPF_JMP | BPF_JEQ | BPF_K:
898 case BPF_JMP | BPF_JEQ | BPF_X:
899 case BPF_JMP32 | BPF_JEQ | BPF_K:
900 case BPF_JMP32 | BPF_JEQ | BPF_X:
901 true_cond = COND_EQ;
902 goto cond_branch;
903 case BPF_JMP | BPF_JNE | BPF_K:
904 case BPF_JMP | BPF_JNE | BPF_X:
905 case BPF_JMP32 | BPF_JNE | BPF_K:
906 case BPF_JMP32 | BPF_JNE | BPF_X:
907 true_cond = COND_NE;
908 goto cond_branch;
909 case BPF_JMP | BPF_JSET | BPF_K:
910 case BPF_JMP | BPF_JSET | BPF_X:
911 case BPF_JMP32 | BPF_JSET | BPF_K:
912 case BPF_JMP32 | BPF_JSET | BPF_X:
913 true_cond = COND_NE;
914 /* Fall through */
915
916cond_branch:
917 switch (code) {
918 case BPF_JMP | BPF_JGT | BPF_X:
919 case BPF_JMP | BPF_JLT | BPF_X:
920 case BPF_JMP | BPF_JGE | BPF_X:
921 case BPF_JMP | BPF_JLE | BPF_X:
922 case BPF_JMP | BPF_JEQ | BPF_X:
923 case BPF_JMP | BPF_JNE | BPF_X:
924 case BPF_JMP32 | BPF_JGT | BPF_X:
925 case BPF_JMP32 | BPF_JLT | BPF_X:
926 case BPF_JMP32 | BPF_JGE | BPF_X:
927 case BPF_JMP32 | BPF_JLE | BPF_X:
928 case BPF_JMP32 | BPF_JEQ | BPF_X:
929 case BPF_JMP32 | BPF_JNE | BPF_X:
930 /* unsigned comparison */
931 if (BPF_CLASS(code) == BPF_JMP32)
932 PPC_CMPLW(dst_reg, src_reg);
933 else
934 PPC_CMPLD(dst_reg, src_reg);
935 break;
936 case BPF_JMP | BPF_JSGT | BPF_X:
937 case BPF_JMP | BPF_JSLT | BPF_X:
938 case BPF_JMP | BPF_JSGE | BPF_X:
939 case BPF_JMP | BPF_JSLE | BPF_X:
940 case BPF_JMP32 | BPF_JSGT | BPF_X:
941 case BPF_JMP32 | BPF_JSLT | BPF_X:
942 case BPF_JMP32 | BPF_JSGE | BPF_X:
943 case BPF_JMP32 | BPF_JSLE | BPF_X:
944 /* signed comparison */
945 if (BPF_CLASS(code) == BPF_JMP32)
946 PPC_CMPW(dst_reg, src_reg);
947 else
948 PPC_CMPD(dst_reg, src_reg);
949 break;
950 case BPF_JMP | BPF_JSET | BPF_X:
951 case BPF_JMP32 | BPF_JSET | BPF_X:
952 if (BPF_CLASS(code) == BPF_JMP) {
953 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
954 src_reg);
955 } else {
956 int tmp_reg = b2p[TMP_REG_1];
957
958 PPC_AND(tmp_reg, dst_reg, src_reg);
959 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
960 31);
961 }
962 break;
963 case BPF_JMP | BPF_JNE | BPF_K:
964 case BPF_JMP | BPF_JEQ | BPF_K:
965 case BPF_JMP | BPF_JGT | BPF_K:
966 case BPF_JMP | BPF_JLT | BPF_K:
967 case BPF_JMP | BPF_JGE | BPF_K:
968 case BPF_JMP | BPF_JLE | BPF_K:
969 case BPF_JMP32 | BPF_JNE | BPF_K:
970 case BPF_JMP32 | BPF_JEQ | BPF_K:
971 case BPF_JMP32 | BPF_JGT | BPF_K:
972 case BPF_JMP32 | BPF_JLT | BPF_K:
973 case BPF_JMP32 | BPF_JGE | BPF_K:
974 case BPF_JMP32 | BPF_JLE | BPF_K:
975 {
976 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
977
978 /*
979 * Need sign-extended load, so only positive
980 * values can be used as imm in cmpldi
981 */
982 if (imm >= 0 && imm < 32768) {
983 if (is_jmp32)
984 PPC_CMPLWI(dst_reg, imm);
985 else
986 PPC_CMPLDI(dst_reg, imm);
987 } else {
988 /* sign-extending load */
989 PPC_LI32(b2p[TMP_REG_1], imm);
990 /* ... but unsigned comparison */
991 if (is_jmp32)
992 PPC_CMPLW(dst_reg,
993 b2p[TMP_REG_1]);
994 else
995 PPC_CMPLD(dst_reg,
996 b2p[TMP_REG_1]);
997 }
998 break;
999 }
1000 case BPF_JMP | BPF_JSGT | BPF_K:
1001 case BPF_JMP | BPF_JSLT | BPF_K:
1002 case BPF_JMP | BPF_JSGE | BPF_K:
1003 case BPF_JMP | BPF_JSLE | BPF_K:
1004 case BPF_JMP32 | BPF_JSGT | BPF_K:
1005 case BPF_JMP32 | BPF_JSLT | BPF_K:
1006 case BPF_JMP32 | BPF_JSGE | BPF_K:
1007 case BPF_JMP32 | BPF_JSLE | BPF_K:
1008 {
1009 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
1010
1011 /*
1012 * signed comparison, so any 16-bit value
1013 * can be used in cmpdi
1014 */
1015 if (imm >= -32768 && imm < 32768) {
1016 if (is_jmp32)
1017 PPC_CMPWI(dst_reg, imm);
1018 else
1019 PPC_CMPDI(dst_reg, imm);
1020 } else {
1021 PPC_LI32(b2p[TMP_REG_1], imm);
1022 if (is_jmp32)
1023 PPC_CMPW(dst_reg,
1024 b2p[TMP_REG_1]);
1025 else
1026 PPC_CMPD(dst_reg,
1027 b2p[TMP_REG_1]);
1028 }
1029 break;
1030 }
1031 case BPF_JMP | BPF_JSET | BPF_K:
1032 case BPF_JMP32 | BPF_JSET | BPF_K:
1033 /* andi does not sign-extend the immediate */
1034 if (imm >= 0 && imm < 32768)
1035 /* PPC_ANDI is _only/always_ dot-form */
1036 PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
1037 else {
1038 int tmp_reg = b2p[TMP_REG_1];
1039
1040 PPC_LI32(tmp_reg, imm);
1041 if (BPF_CLASS(code) == BPF_JMP) {
1042 PPC_AND_DOT(tmp_reg, dst_reg,
1043 tmp_reg);
1044 } else {
1045 PPC_AND(tmp_reg, dst_reg,
1046 tmp_reg);
1047 PPC_RLWINM_DOT(tmp_reg, tmp_reg,
1048 0, 0, 31);
1049 }
1050 }
1051 break;
1052 }
1053 PPC_BCC(true_cond, addrs[i + 1 + off]);
1054 break;
1055
1056 /*
1057 * Tail call
1058 */
1059 case BPF_JMP | BPF_TAIL_CALL:
1060 ctx->seen |= SEEN_TAILCALL;
1061 ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
1062 if (ret < 0)
1063 return ret;
1064 break;
1065
1066 default:
1067 /*
1068 * The filter contains something cruel & unusual.
1069 * We don't handle it, but also there shouldn't be
1070 * anything missing from our list.
1071 */
1072 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
1073 code, i);
1074 return -ENOTSUPP;
1075 }
1076 }
1077
1078 /* Set end-of-body-code address for exit. */
1079 addrs[i] = ctx->idx * 4;
1080
1081 return 0;
1082}
1083
1084/* Fix the branch target addresses for subprog calls */
1085static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
1086 struct codegen_context *ctx, u32 *addrs)
1087{
1088 const struct bpf_insn *insn = fp->insnsi;
1089 bool func_addr_fixed;
1090 u64 func_addr;
1091 u32 tmp_idx;
1092 int i, ret;
1093
1094 for (i = 0; i < fp->len; i++) {
1095 /*
1096 * During the extra pass, only the branch target addresses for
1097 * the subprog calls need to be fixed. All other instructions
1098 * can left untouched.
1099 *
1100 * The JITed image length does not change because we already
1101 * ensure that the JITed instruction sequence for these calls
1102 * are of fixed length by padding them with NOPs.
1103 */
1104 if (insn[i].code == (BPF_JMP | BPF_CALL) &&
1105 insn[i].src_reg == BPF_PSEUDO_CALL) {
1106 ret = bpf_jit_get_func_addr(fp, &insn[i], true,
1107 &func_addr,
1108 &func_addr_fixed);
1109 if (ret < 0)
1110 return ret;
1111
1112 /*
1113 * Save ctx->idx as this would currently point to the
1114 * end of the JITed image and set it to the offset of
1115 * the instruction sequence corresponding to the
1116 * subprog call temporarily.
1117 */
1118 tmp_idx = ctx->idx;
1119 ctx->idx = addrs[i] / 4;
1120 bpf_jit_emit_func_call_rel(image, ctx, func_addr);
1121
1122 /*
1123 * Restore ctx->idx here. This is safe as the length
1124 * of the JITed sequence remains unchanged.
1125 */
1126 ctx->idx = tmp_idx;
1127 }
1128 }
1129
1130 return 0;
1131}
1132
1133struct powerpc64_jit_data {
1134 struct bpf_binary_header *header;
1135 u32 *addrs;
1136 u8 *image;
1137 u32 proglen;
1138 struct codegen_context ctx;
1139};
1140
1141bool bpf_jit_needs_zext(void)
1142{
1143 return true;
1144}
1145
1146struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1147{
1148 u32 proglen;
1149 u32 alloclen;
1150 u8 *image = NULL;
1151 u32 *code_base;
1152 u32 *addrs;
1153 struct powerpc64_jit_data *jit_data;
1154 struct codegen_context cgctx;
1155 int pass;
1156 int flen;
1157 struct bpf_binary_header *bpf_hdr;
1158 struct bpf_prog *org_fp = fp;
1159 struct bpf_prog *tmp_fp;
1160 bool bpf_blinded = false;
1161 bool extra_pass = false;
1162
1163 if (!fp->jit_requested)
1164 return org_fp;
1165
1166 tmp_fp = bpf_jit_blind_constants(org_fp);
1167 if (IS_ERR(tmp_fp))
1168 return org_fp;
1169
1170 if (tmp_fp != org_fp) {
1171 bpf_blinded = true;
1172 fp = tmp_fp;
1173 }
1174
1175 jit_data = fp->aux->jit_data;
1176 if (!jit_data) {
1177 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1178 if (!jit_data) {
1179 fp = org_fp;
1180 goto out;
1181 }
1182 fp->aux->jit_data = jit_data;
1183 }
1184
1185 flen = fp->len;
1186 addrs = jit_data->addrs;
1187 if (addrs) {
1188 cgctx = jit_data->ctx;
1189 image = jit_data->image;
1190 bpf_hdr = jit_data->header;
1191 proglen = jit_data->proglen;
1192 alloclen = proglen + FUNCTION_DESCR_SIZE;
1193 extra_pass = true;
1194 goto skip_init_ctx;
1195 }
1196
1197 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
1198 if (addrs == NULL) {
1199 fp = org_fp;
1200 goto out_addrs;
1201 }
1202
1203 memset(&cgctx, 0, sizeof(struct codegen_context));
1204
1205 /* Make sure that the stack is quadword aligned. */
1206 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
1207
1208 /* Scouting faux-generate pass 0 */
1209 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
1210 /* We hit something illegal or unsupported. */
1211 fp = org_fp;
1212 goto out_addrs;
1213 }
1214
1215 /*
1216 * If we have seen a tail call, we need a second pass.
1217 * This is because bpf_jit_emit_common_epilogue() is called
1218 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
1219 */
1220 if (cgctx.seen & SEEN_TAILCALL) {
1221 cgctx.idx = 0;
1222 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
1223 fp = org_fp;
1224 goto out_addrs;
1225 }
1226 }
1227
1228 /*
1229 * Pretend to build prologue, given the features we've seen. This will
1230 * update ctgtx.idx as it pretends to output instructions, then we can
1231 * calculate total size from idx.
1232 */
1233 bpf_jit_build_prologue(0, &cgctx);
1234 bpf_jit_build_epilogue(0, &cgctx);
1235
1236 proglen = cgctx.idx * 4;
1237 alloclen = proglen + FUNCTION_DESCR_SIZE;
1238
1239 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
1240 bpf_jit_fill_ill_insns);
1241 if (!bpf_hdr) {
1242 fp = org_fp;
1243 goto out_addrs;
1244 }
1245
1246skip_init_ctx:
1247 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
1248
1249 if (extra_pass) {
1250 /*
1251 * Do not touch the prologue and epilogue as they will remain
1252 * unchanged. Only fix the branch target address for subprog
1253 * calls in the body.
1254 *
1255 * This does not change the offsets and lengths of the subprog
1256 * call instruction sequences and hence, the size of the JITed
1257 * image as well.
1258 */
1259 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
1260
1261 /* There is no need to perform the usual passes. */
1262 goto skip_codegen_passes;
1263 }
1264
1265 /* Code generation passes 1-2 */
1266 for (pass = 1; pass < 3; pass++) {
1267 /* Now build the prologue, body code & epilogue for real. */
1268 cgctx.idx = 0;
1269 bpf_jit_build_prologue(code_base, &cgctx);
1270 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
1271 bpf_jit_build_epilogue(code_base, &cgctx);
1272
1273 if (bpf_jit_enable > 1)
1274 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
1275 proglen - (cgctx.idx * 4), cgctx.seen);
1276 }
1277
1278skip_codegen_passes:
1279 if (bpf_jit_enable > 1)
1280 /*
1281 * Note that we output the base address of the code_base
1282 * rather than image, since opcodes are in code_base.
1283 */
1284 bpf_jit_dump(flen, proglen, pass, code_base);
1285
1286#ifdef PPC64_ELF_ABI_v1
1287 /* Function descriptor nastiness: Address + TOC */
1288 ((u64 *)image)[0] = (u64)code_base;
1289 ((u64 *)image)[1] = local_paca->kernel_toc;
1290#endif
1291
1292 fp->bpf_func = (void *)image;
1293 fp->jited = 1;
1294 fp->jited_len = alloclen;
1295
1296 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
1297 if (!fp->is_func || extra_pass) {
1298 bpf_prog_fill_jited_linfo(fp, addrs);
1299out_addrs:
1300 kfree(addrs);
1301 kfree(jit_data);
1302 fp->aux->jit_data = NULL;
1303 } else {
1304 jit_data->addrs = addrs;
1305 jit_data->ctx = cgctx;
1306 jit_data->proglen = proglen;
1307 jit_data->image = image;
1308 jit_data->header = bpf_hdr;
1309 }
1310
1311out:
1312 if (bpf_blinded)
1313 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
1314
1315 return fp;
1316}
1317
1318/* Overriding bpf_jit_free() as we don't set images read-only. */
1319void bpf_jit_free(struct bpf_prog *fp)
1320{
1321 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1322 struct bpf_binary_header *bpf_hdr = (void *)addr;
1323
1324 if (fp->jited)
1325 bpf_jit_binary_free(bpf_hdr);
1326
1327 bpf_prog_unlock_free(fp);
1328}