| rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | #include <linux/err.h> | 
|  | 3 | #include <linux/slab.h> | 
|  | 4 | #include <linux/mm_types.h> | 
|  | 5 | #include <linux/sched/task.h> | 
|  | 6 |  | 
|  | 7 | #include <asm/branch.h> | 
|  | 8 | #include <asm/cacheflush.h> | 
|  | 9 | #include <asm/fpu_emulator.h> | 
|  | 10 | #include <asm/inst.h> | 
|  | 11 | #include <asm/mipsregs.h> | 
|  | 12 | #include <linux/uaccess.h> | 
|  | 13 |  | 
|  | 14 | /** | 
|  | 15 | * struct emuframe - The 'emulation' frame structure | 
|  | 16 | * @emul:	The instruction to 'emulate'. | 
|  | 17 | * @badinst:	A break instruction to cause a return to the kernel. | 
|  | 18 | * | 
|  | 19 | * This structure defines the frames placed within the delay slot emulation | 
|  | 20 | * page in response to a call to mips_dsemul(). Each thread may be allocated | 
|  | 21 | * only one frame at any given time. The kernel stores within it the | 
|  | 22 | * instruction to be 'emulated' followed by a break instruction, then | 
|  | 23 | * executes the frame in user mode. The break causes a trap to the kernel | 
|  | 24 | * which leads to do_dsemulret() being called unless the instruction in | 
|  | 25 | * @emul causes a trap itself, is a branch, or a signal is delivered to | 
|  | 26 | * the thread. In these cases the allocated frame will either be reused by | 
|  | 27 | * a subsequent delay slot 'emulation', or be freed during signal delivery or | 
|  | 28 | * upon thread exit. | 
|  | 29 | * | 
|  | 30 | * This approach is used because: | 
|  | 31 | * | 
|  | 32 | * - Actually emulating all instructions isn't feasible. We would need to | 
|  | 33 | *   be able to handle instructions from all revisions of the MIPS ISA, | 
|  | 34 | *   all ASEs & all vendor instruction set extensions. This would be a | 
|  | 35 | *   whole lot of work & continual maintenance burden as new instructions | 
|  | 36 | *   are introduced, and in the case of some vendor extensions may not | 
|  | 37 | *   even be possible. Thus we need to take the approach of actually | 
|  | 38 | *   executing the instruction. | 
|  | 39 | * | 
|  | 40 | * - We must execute the instruction within user context. If we were to | 
|  | 41 | *   execute the instruction in kernel mode then it would have access to | 
|  | 42 | *   kernel resources without very careful checks, leaving us with a | 
|  | 43 | *   high potential for security or stability issues to arise. | 
|  | 44 | * | 
|  | 45 | * - We used to place the frame on the users stack, but this requires | 
|  | 46 | *   that the stack be executable. This is bad for security so the | 
|  | 47 | *   per-process page is now used instead. | 
|  | 48 | * | 
|  | 49 | * - The instruction in @emul may be something entirely invalid for a | 
|  | 50 | *   delay slot. The user may (intentionally or otherwise) place a branch | 
|  | 51 | *   in a delay slot, or a kernel mode instruction, or something else | 
|  | 52 | *   which generates an exception. Thus we can't rely upon the break in | 
|  | 53 | *   @badinst always being hit. For this reason we track the index of the | 
|  | 54 | *   frame allocated to each thread, allowing us to clean it up at later | 
|  | 55 | *   points such as signal delivery or thread exit. | 
|  | 56 | * | 
|  | 57 | * - The user may generate a fake struct emuframe if they wish, invoking | 
|  | 58 | *   the BRK_MEMU break instruction themselves. We must therefore not | 
|  | 59 | *   trust that BRK_MEMU means there's actually a valid frame allocated | 
|  | 60 | *   to the thread, and must not allow the user to do anything they | 
|  | 61 | *   couldn't already. | 
|  | 62 | */ | 
|  | 63 | struct emuframe { | 
|  | 64 | mips_instruction	emul; | 
|  | 65 | mips_instruction	badinst; | 
|  | 66 | }; | 
|  | 67 |  | 
|  | 68 | static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe); | 
|  | 69 |  | 
|  | 70 | static inline __user struct emuframe *dsemul_page(void) | 
|  | 71 | { | 
|  | 72 | return (__user struct emuframe *)STACK_TOP; | 
|  | 73 | } | 
|  | 74 |  | 
|  | 75 | static int alloc_emuframe(void) | 
|  | 76 | { | 
|  | 77 | mm_context_t *mm_ctx = ¤t->mm->context; | 
|  | 78 | int idx; | 
|  | 79 |  | 
|  | 80 | retry: | 
|  | 81 | spin_lock(&mm_ctx->bd_emupage_lock); | 
|  | 82 |  | 
|  | 83 | /* Ensure we have an allocation bitmap */ | 
|  | 84 | if (!mm_ctx->bd_emupage_allocmap) { | 
|  | 85 | mm_ctx->bd_emupage_allocmap = | 
|  | 86 | kcalloc(BITS_TO_LONGS(emupage_frame_count), | 
|  | 87 | sizeof(unsigned long), | 
|  | 88 | GFP_ATOMIC); | 
|  | 89 |  | 
|  | 90 | if (!mm_ctx->bd_emupage_allocmap) { | 
|  | 91 | idx = BD_EMUFRAME_NONE; | 
|  | 92 | goto out_unlock; | 
|  | 93 | } | 
|  | 94 | } | 
|  | 95 |  | 
|  | 96 | /* Attempt to allocate a single bit/frame */ | 
|  | 97 | idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap, | 
|  | 98 | emupage_frame_count, 0); | 
|  | 99 | if (idx < 0) { | 
|  | 100 | /* | 
|  | 101 | * Failed to allocate a frame. We'll wait until one becomes | 
|  | 102 | * available. We unlock the page so that other threads actually | 
|  | 103 | * get the opportunity to free their frames, which means | 
|  | 104 | * technically the result of bitmap_full may be incorrect. | 
|  | 105 | * However the worst case is that we repeat all this and end up | 
|  | 106 | * back here again. | 
|  | 107 | */ | 
|  | 108 | spin_unlock(&mm_ctx->bd_emupage_lock); | 
|  | 109 | if (!wait_event_killable(mm_ctx->bd_emupage_queue, | 
|  | 110 | !bitmap_full(mm_ctx->bd_emupage_allocmap, | 
|  | 111 | emupage_frame_count))) | 
|  | 112 | goto retry; | 
|  | 113 |  | 
|  | 114 | /* Received a fatal signal - just give in */ | 
|  | 115 | return BD_EMUFRAME_NONE; | 
|  | 116 | } | 
|  | 117 |  | 
|  | 118 | /* Success! */ | 
|  | 119 | pr_debug("allocate emuframe %d to %d\n", idx, current->pid); | 
|  | 120 | out_unlock: | 
|  | 121 | spin_unlock(&mm_ctx->bd_emupage_lock); | 
|  | 122 | return idx; | 
|  | 123 | } | 
|  | 124 |  | 
|  | 125 | static void free_emuframe(int idx, struct mm_struct *mm) | 
|  | 126 | { | 
|  | 127 | mm_context_t *mm_ctx = &mm->context; | 
|  | 128 |  | 
|  | 129 | spin_lock(&mm_ctx->bd_emupage_lock); | 
|  | 130 |  | 
|  | 131 | pr_debug("free emuframe %d from %d\n", idx, current->pid); | 
|  | 132 | bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1); | 
|  | 133 |  | 
|  | 134 | /* If some thread is waiting for a frame, now's its chance */ | 
|  | 135 | wake_up(&mm_ctx->bd_emupage_queue); | 
|  | 136 |  | 
|  | 137 | spin_unlock(&mm_ctx->bd_emupage_lock); | 
|  | 138 | } | 
|  | 139 |  | 
|  | 140 | static bool within_emuframe(struct pt_regs *regs) | 
|  | 141 | { | 
|  | 142 | unsigned long base = (unsigned long)dsemul_page(); | 
|  | 143 |  | 
|  | 144 | if (regs->cp0_epc < base) | 
|  | 145 | return false; | 
|  | 146 | if (regs->cp0_epc >= (base + PAGE_SIZE)) | 
|  | 147 | return false; | 
|  | 148 |  | 
|  | 149 | return true; | 
|  | 150 | } | 
|  | 151 |  | 
|  | 152 | bool dsemul_thread_cleanup(struct task_struct *tsk) | 
|  | 153 | { | 
|  | 154 | int fr_idx; | 
|  | 155 |  | 
|  | 156 | /* Clear any allocated frame, retrieving its index */ | 
|  | 157 | fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE); | 
|  | 158 |  | 
|  | 159 | /* If no frame was allocated, we're done */ | 
|  | 160 | if (fr_idx == BD_EMUFRAME_NONE) | 
|  | 161 | return false; | 
|  | 162 |  | 
|  | 163 | task_lock(tsk); | 
|  | 164 |  | 
|  | 165 | /* Free the frame that this thread had allocated */ | 
|  | 166 | if (tsk->mm) | 
|  | 167 | free_emuframe(fr_idx, tsk->mm); | 
|  | 168 |  | 
|  | 169 | task_unlock(tsk); | 
|  | 170 | return true; | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | bool dsemul_thread_rollback(struct pt_regs *regs) | 
|  | 174 | { | 
|  | 175 | struct emuframe __user *fr; | 
|  | 176 | int fr_idx; | 
|  | 177 |  | 
|  | 178 | /* Do nothing if we're not executing from a frame */ | 
|  | 179 | if (!within_emuframe(regs)) | 
|  | 180 | return false; | 
|  | 181 |  | 
|  | 182 | /* Find the frame being executed */ | 
|  | 183 | fr_idx = atomic_read(¤t->thread.bd_emu_frame); | 
|  | 184 | if (fr_idx == BD_EMUFRAME_NONE) | 
|  | 185 | return false; | 
|  | 186 | fr = &dsemul_page()[fr_idx]; | 
|  | 187 |  | 
|  | 188 | /* | 
|  | 189 | * If the PC is at the emul instruction, roll back to the branch. If | 
|  | 190 | * PC is at the badinst (break) instruction, we've already emulated the | 
|  | 191 | * instruction so progress to the continue PC. If it's anything else | 
|  | 192 | * then something is amiss & the user has branched into some other area | 
|  | 193 | * of the emupage - we'll free the allocated frame anyway. | 
|  | 194 | */ | 
|  | 195 | if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul) | 
|  | 196 | regs->cp0_epc = current->thread.bd_emu_branch_pc; | 
|  | 197 | else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst) | 
|  | 198 | regs->cp0_epc = current->thread.bd_emu_cont_pc; | 
|  | 199 |  | 
|  | 200 | atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); | 
|  | 201 | free_emuframe(fr_idx, current->mm); | 
|  | 202 | return true; | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | void dsemul_mm_cleanup(struct mm_struct *mm) | 
|  | 206 | { | 
|  | 207 | mm_context_t *mm_ctx = &mm->context; | 
|  | 208 |  | 
|  | 209 | kfree(mm_ctx->bd_emupage_allocmap); | 
|  | 210 | } | 
|  | 211 |  | 
|  | 212 | int mips_dsemul(struct pt_regs *regs, mips_instruction ir, | 
|  | 213 | unsigned long branch_pc, unsigned long cont_pc) | 
|  | 214 | { | 
|  | 215 | int isa16 = get_isa16_mode(regs->cp0_epc); | 
|  | 216 | mips_instruction break_math; | 
|  | 217 | unsigned long fr_uaddr; | 
|  | 218 | struct emuframe fr; | 
|  | 219 | int fr_idx, ret; | 
|  | 220 |  | 
|  | 221 | /* NOP is easy */ | 
|  | 222 | if (ir == 0) | 
|  | 223 | return -1; | 
|  | 224 |  | 
|  | 225 | /* microMIPS instructions */ | 
|  | 226 | if (isa16) { | 
|  | 227 | union mips_instruction insn = { .word = ir }; | 
|  | 228 |  | 
|  | 229 | /* NOP16 aka MOVE16 $0, $0 */ | 
|  | 230 | if ((ir >> 16) == MM_NOP16) | 
|  | 231 | return -1; | 
|  | 232 |  | 
|  | 233 | /* ADDIUPC */ | 
|  | 234 | if (insn.mm_a_format.opcode == mm_addiupc_op) { | 
|  | 235 | unsigned int rs; | 
|  | 236 | s32 v; | 
|  | 237 |  | 
|  | 238 | rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2); | 
|  | 239 | v = regs->cp0_epc & ~3; | 
|  | 240 | v += insn.mm_a_format.simmediate << 2; | 
|  | 241 | regs->regs[rs] = (long)v; | 
|  | 242 | return -1; | 
|  | 243 | } | 
|  | 244 | } | 
|  | 245 |  | 
|  | 246 | pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc); | 
|  | 247 |  | 
|  | 248 | /* Allocate a frame if we don't already have one */ | 
|  | 249 | fr_idx = atomic_read(¤t->thread.bd_emu_frame); | 
|  | 250 | if (fr_idx == BD_EMUFRAME_NONE) | 
|  | 251 | fr_idx = alloc_emuframe(); | 
|  | 252 | if (fr_idx == BD_EMUFRAME_NONE) | 
|  | 253 | return SIGBUS; | 
|  | 254 |  | 
|  | 255 | /* Retrieve the appropriately encoded break instruction */ | 
|  | 256 | break_math = BREAK_MATH(isa16); | 
|  | 257 |  | 
|  | 258 | /* Write the instructions to the frame */ | 
|  | 259 | if (isa16) { | 
|  | 260 | union mips_instruction _emul = { | 
|  | 261 | .halfword = { ir >> 16, ir } | 
|  | 262 | }; | 
|  | 263 | union mips_instruction _badinst = { | 
|  | 264 | .halfword = { break_math >> 16, break_math } | 
|  | 265 | }; | 
|  | 266 |  | 
|  | 267 | fr.emul = _emul.word; | 
|  | 268 | fr.badinst = _badinst.word; | 
|  | 269 | } else { | 
|  | 270 | fr.emul = ir; | 
|  | 271 | fr.badinst = break_math; | 
|  | 272 | } | 
|  | 273 |  | 
|  | 274 | /* Write the frame to user memory */ | 
|  | 275 | fr_uaddr = (unsigned long)&dsemul_page()[fr_idx]; | 
|  | 276 | ret = access_process_vm(current, fr_uaddr, &fr, sizeof(fr), | 
|  | 277 | FOLL_FORCE | FOLL_WRITE); | 
|  | 278 | if (unlikely(ret != sizeof(fr))) { | 
|  | 279 | MIPS_FPU_EMU_INC_STATS(errors); | 
|  | 280 | free_emuframe(fr_idx, current->mm); | 
|  | 281 | return SIGBUS; | 
|  | 282 | } | 
|  | 283 |  | 
|  | 284 | /* Record the PC of the branch, PC to continue from & frame index */ | 
|  | 285 | current->thread.bd_emu_branch_pc = branch_pc; | 
|  | 286 | current->thread.bd_emu_cont_pc = cont_pc; | 
|  | 287 | atomic_set(¤t->thread.bd_emu_frame, fr_idx); | 
|  | 288 |  | 
|  | 289 | /* Change user register context to execute the frame */ | 
|  | 290 | regs->cp0_epc = fr_uaddr | isa16; | 
|  | 291 |  | 
|  | 292 | return 0; | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | bool do_dsemulret(struct pt_regs *xcp) | 
|  | 296 | { | 
|  | 297 | /* Cleanup the allocated frame, returning if there wasn't one */ | 
|  | 298 | if (!dsemul_thread_cleanup(current)) { | 
|  | 299 | MIPS_FPU_EMU_INC_STATS(errors); | 
|  | 300 | return false; | 
|  | 301 | } | 
|  | 302 |  | 
|  | 303 | /* Set EPC to return to post-branch instruction */ | 
|  | 304 | xcp->cp0_epc = current->thread.bd_emu_cont_pc; | 
|  | 305 | pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); | 
|  | 306 | MIPS_FPU_EMU_INC_STATS(ds_emul); | 
|  | 307 | return true; | 
|  | 308 | } |