rjw | 1f88458 | 2022-01-06 17:20:42 +0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2009 Corey Tabaka |
| 3 | * Copyright (c) 2015 Intel Corporation |
| 4 | * |
| 5 | * Permission is hereby granted, free of charge, to any person obtaining |
| 6 | * a copy of this software and associated documentation files |
| 7 | * (the "Software"), to deal in the Software without restriction, |
| 8 | * including without limitation the rights to use, copy, modify, merge, |
| 9 | * publish, distribute, sublicense, and/or sell copies of the Software, |
| 10 | * and to permit persons to whom the Software is furnished to do so, |
| 11 | * subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be |
| 14 | * included in all copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 17 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 19 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 20 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 21 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 22 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 23 | */ |
| 24 | #include <debug.h> |
| 25 | #include <sys/types.h> |
| 26 | #include <compiler.h> |
| 27 | #include <arch.h> |
| 28 | #include <arch/x86.h> |
| 29 | #include <arch/x86/mmu.h> |
| 30 | #include <stdlib.h> |
| 31 | #include <string.h> |
| 32 | #include <arch/mmu.h> |
| 33 | #include <assert.h> |
| 34 | #include <err.h> |
| 35 | #include <arch/arch_ops.h> |
| 36 | |
| 37 | extern map_addr_t g_CR3; |
| 38 | |
| 39 | /* Address width */ |
| 40 | extern uint8_t g_vaddr_width; |
| 41 | extern uint8_t g_paddr_width; |
| 42 | |
| 43 | |
| 44 | /** |
| 45 | * @brief check if the virtual address is aligned and canonical |
| 46 | * |
| 47 | */ |
| 48 | static bool x86_mmu_check_vaddr(vaddr_t vaddr) |
| 49 | { |
| 50 | uint64_t addr = (uint64_t)vaddr; |
| 51 | uint64_t max_vaddr_lohalf, |
| 52 | min_vaddr_hihalf; |
| 53 | |
| 54 | /* Check to see if the address is PAGE aligned */ |
| 55 | if (!IS_ALIGNED(addr, PAGE_SIZE)) |
| 56 | return false; |
| 57 | |
| 58 | /* get max address in lower-half canonical addr space */ |
| 59 | /* e.g. if width is 48, then 0x00007FFF_FFFFFFFF */ |
| 60 | max_vaddr_lohalf = ((uint64_t)1ull << (g_vaddr_width - 1)) - 1; |
| 61 | |
| 62 | /* get min address in higher-half canonical addr space */ |
| 63 | /* e.g. if width is 48, then 0xFFFF8000_00000000*/ |
| 64 | min_vaddr_hihalf = ~ max_vaddr_lohalf; |
| 65 | |
| 66 | /* Check to see if the address in a canonical address */ |
| 67 | if ((addr > max_vaddr_lohalf) && (addr < min_vaddr_hihalf)) |
| 68 | return false; |
| 69 | |
| 70 | return true; |
| 71 | } |
| 72 | |
| 73 | |
| 74 | /** |
| 75 | * @brief check if the physical address is valid and aligned |
| 76 | * |
| 77 | */ |
| 78 | static bool x86_mmu_check_paddr(paddr_t paddr) |
| 79 | { |
| 80 | uint64_t addr = (uint64_t)paddr; |
| 81 | uint64_t max_paddr; |
| 82 | |
| 83 | /* Check to see if the address is PAGE aligned */ |
| 84 | if (!IS_ALIGNED(addr, PAGE_SIZE)) |
| 85 | return false; |
| 86 | |
| 87 | max_paddr = ((uint64_t)1ull << g_paddr_width) - 1; |
| 88 | |
| 89 | return addr <= max_paddr; |
| 90 | } |
| 91 | |
| 92 | |
| 93 | static inline uint64_t get_pml4_entry_from_pml4_table(vaddr_t vaddr, addr_t pml4_addr) |
| 94 | { |
| 95 | uint32_t pml4_index; |
| 96 | uint64_t *pml4_table = (uint64_t *)X86_PHYS_TO_VIRT(pml4_addr); |
| 97 | |
| 98 | pml4_index = (((uint64_t)vaddr >> PML4_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 99 | return X86_PHYS_TO_VIRT(pml4_table[pml4_index]); |
| 100 | } |
| 101 | |
| 102 | static inline uint64_t get_pdp_entry_from_pdp_table(vaddr_t vaddr, uint64_t pml4e) |
| 103 | { |
| 104 | uint32_t pdp_index; |
| 105 | uint64_t *pdpe; |
| 106 | |
| 107 | pdp_index = (((uint64_t)vaddr >> PDP_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 108 | pdpe = (uint64_t *)(pml4e & X86_PG_FRAME); |
| 109 | return X86_PHYS_TO_VIRT(pdpe[pdp_index]); |
| 110 | } |
| 111 | |
| 112 | static inline uint64_t get_pd_entry_from_pd_table(vaddr_t vaddr, uint64_t pdpe) |
| 113 | { |
| 114 | uint32_t pd_index; |
| 115 | uint64_t *pde; |
| 116 | |
| 117 | pd_index = (((uint64_t)vaddr >> PD_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 118 | pde = (uint64_t *)(pdpe & X86_PG_FRAME); |
| 119 | return X86_PHYS_TO_VIRT(pde[pd_index]); |
| 120 | } |
| 121 | |
| 122 | static inline uint64_t get_pt_entry_from_pt_table(vaddr_t vaddr, uint64_t pde) |
| 123 | { |
| 124 | uint32_t pt_index; |
| 125 | uint64_t *pte; |
| 126 | |
| 127 | pt_index = (((uint64_t)vaddr >> PT_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 128 | pte = (uint64_t *)(pde & X86_PG_FRAME); |
| 129 | return X86_PHYS_TO_VIRT(pte[pt_index]); |
| 130 | } |
| 131 | |
| 132 | static inline uint64_t get_pfn_from_pte(uint64_t pte) |
| 133 | { |
| 134 | uint64_t pfn; |
| 135 | |
| 136 | pfn = (pte & X86_PG_FRAME); |
| 137 | return X86_PHYS_TO_VIRT(pfn); |
| 138 | } |
| 139 | |
| 140 | static inline uint64_t get_pfn_from_pde(uint64_t pde) |
| 141 | { |
| 142 | uint64_t pfn; |
| 143 | |
| 144 | pfn = (pde & X86_2MB_PAGE_FRAME); |
| 145 | return X86_PHYS_TO_VIRT(pfn); |
| 146 | } |
| 147 | |
| 148 | static void map_zero_page(addr_t *ptr) |
| 149 | { |
| 150 | if (ptr) |
| 151 | memset(ptr, 0, PAGE_SIZE); |
| 152 | } |
| 153 | |
| 154 | /** |
| 155 | * @brief Returning the x86 arch flags from generic mmu flags |
| 156 | */ |
| 157 | arch_flags_t get_x86_arch_flags(arch_flags_t flags) |
| 158 | { |
| 159 | arch_flags_t arch_flags = 0; |
| 160 | |
| 161 | if (!(flags & ARCH_MMU_FLAG_PERM_RO)) |
| 162 | arch_flags |= X86_MMU_PG_RW; |
| 163 | |
| 164 | if (flags & ARCH_MMU_FLAG_PERM_USER) |
| 165 | arch_flags |= X86_MMU_PG_U; |
| 166 | |
| 167 | if (flags & ARCH_MMU_FLAG_UNCACHED) |
| 168 | arch_flags |= X86_MMU_CACHE_DISABLE; |
| 169 | |
| 170 | if (flags & ARCH_MMU_FLAG_PERM_NO_EXECUTE) |
| 171 | arch_flags |= X86_MMU_PG_NX; |
| 172 | |
| 173 | return arch_flags; |
| 174 | } |
| 175 | |
| 176 | /** |
| 177 | * @brief Returning the generic mmu flags from x86 arch flags |
| 178 | */ |
| 179 | uint get_arch_mmu_flags(arch_flags_t flags) |
| 180 | { |
| 181 | arch_flags_t mmu_flags = 0; |
| 182 | |
| 183 | if (!(flags & X86_MMU_PG_RW)) |
| 184 | mmu_flags |= ARCH_MMU_FLAG_PERM_RO; |
| 185 | |
| 186 | if (flags & X86_MMU_PG_U) |
| 187 | mmu_flags |= ARCH_MMU_FLAG_PERM_USER; |
| 188 | |
| 189 | if (flags & X86_MMU_CACHE_DISABLE) |
| 190 | mmu_flags |= ARCH_MMU_FLAG_UNCACHED; |
| 191 | |
| 192 | if (flags & X86_MMU_PG_NX) |
| 193 | mmu_flags |= ARCH_MMU_FLAG_PERM_NO_EXECUTE; |
| 194 | |
| 195 | return (uint)mmu_flags; |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * @brief Walk the page table structures |
| 200 | * |
| 201 | * In this scenario, we are considering the paging scheme to be a PAE mode with |
| 202 | * 4KB pages. |
| 203 | * |
| 204 | */ |
| 205 | static status_t x86_mmu_get_mapping(addr_t pml4, vaddr_t vaddr, uint32_t *ret_level, |
| 206 | arch_flags_t *mmu_flags, map_addr_t *last_valid_entry) |
| 207 | { |
| 208 | uint64_t pml4e, pdpe, pde, pte; |
| 209 | |
| 210 | DEBUG_ASSERT(pml4); |
| 211 | if ((!ret_level) || (!last_valid_entry) || (!mmu_flags)) { |
| 212 | return ERR_INVALID_ARGS; |
| 213 | } |
| 214 | |
| 215 | *ret_level = PML4_L; |
| 216 | *last_valid_entry = pml4; |
| 217 | *mmu_flags = 0; |
| 218 | |
| 219 | pml4e = get_pml4_entry_from_pml4_table(vaddr, pml4); |
| 220 | if ((pml4e & X86_MMU_PG_P) == 0) { |
| 221 | return ERR_NOT_FOUND; |
| 222 | } |
| 223 | |
| 224 | pdpe = get_pdp_entry_from_pdp_table(vaddr, pml4e); |
| 225 | if ((pdpe & X86_MMU_PG_P) == 0) { |
| 226 | *ret_level = PDP_L; |
| 227 | *last_valid_entry = pml4e; |
| 228 | return ERR_NOT_FOUND; |
| 229 | } |
| 230 | |
| 231 | pde = get_pd_entry_from_pd_table(vaddr, pdpe); |
| 232 | if ((pde & X86_MMU_PG_P) == 0) { |
| 233 | *ret_level = PD_L; |
| 234 | *last_valid_entry = pdpe; |
| 235 | return ERR_NOT_FOUND; |
| 236 | } |
| 237 | |
| 238 | /* 2 MB pages */ |
| 239 | if (pde & X86_MMU_PG_PS) { |
| 240 | /* Getting the Page frame & adding the 4KB page offset from the vaddr */ |
| 241 | *last_valid_entry = get_pfn_from_pde(pde) + ((uint64_t)vaddr & PAGE_OFFSET_MASK_2MB); |
| 242 | *mmu_flags = get_arch_mmu_flags((X86_PHYS_TO_VIRT(pde)) & X86_FLAGS_MASK); |
| 243 | goto last; |
| 244 | } |
| 245 | |
| 246 | /* 4 KB pages */ |
| 247 | pte = get_pt_entry_from_pt_table(vaddr, pde); |
| 248 | if ((pte & X86_MMU_PG_P) == 0) { |
| 249 | *ret_level = PT_L; |
| 250 | *last_valid_entry = pde; |
| 251 | return ERR_NOT_FOUND; |
| 252 | } |
| 253 | |
| 254 | /* Getting the Page frame & adding the 4KB page offset from the vaddr */ |
| 255 | *last_valid_entry = get_pfn_from_pte(pte) + ((uint64_t)vaddr & PAGE_OFFSET_MASK_4KB); |
| 256 | *mmu_flags = get_arch_mmu_flags((X86_PHYS_TO_VIRT(pte)) & X86_FLAGS_MASK); |
| 257 | |
| 258 | last: |
| 259 | *ret_level = PF_L; |
| 260 | return NO_ERROR; |
| 261 | } |
| 262 | |
| 263 | /** |
| 264 | * Walk the page table structures to see if the mapping between a virtual address |
| 265 | * and a physical address exists. Also, check the flags. |
| 266 | * |
| 267 | */ |
| 268 | status_t x86_mmu_check_mapping(addr_t pml4, paddr_t paddr, |
| 269 | vaddr_t vaddr, arch_flags_t in_flags, |
| 270 | uint32_t *ret_level, arch_flags_t *ret_flags, |
| 271 | map_addr_t *last_valid_entry) |
| 272 | { |
| 273 | status_t status; |
| 274 | arch_flags_t existing_flags = 0; |
| 275 | |
| 276 | DEBUG_ASSERT(pml4); |
| 277 | if ((!ret_level) || (!last_valid_entry) || (!ret_flags) || |
| 278 | (!x86_mmu_check_vaddr(vaddr)) || |
| 279 | (!x86_mmu_check_paddr(paddr))) { |
| 280 | return ERR_INVALID_ARGS; |
| 281 | } |
| 282 | |
| 283 | status = x86_mmu_get_mapping(pml4, vaddr, ret_level, &existing_flags, last_valid_entry); |
| 284 | if (status || ((*last_valid_entry) != (uint64_t)paddr)) { |
| 285 | /* We did not reach till we check the access flags for the mapping */ |
| 286 | *ret_flags = in_flags; |
| 287 | return ERR_NOT_FOUND; |
| 288 | } |
| 289 | |
| 290 | /* Checking the access flags for the mapped address. If it is not zero, then |
| 291 | * the access flags are different & the return flag will have those access bits |
| 292 | * which are different. |
| 293 | */ |
| 294 | *ret_flags = (in_flags ^ get_x86_arch_flags(existing_flags)) & X86_DIRTY_ACCESS_MASK; |
| 295 | |
| 296 | if (!(*ret_flags)) |
| 297 | return NO_ERROR; |
| 298 | |
| 299 | return ERR_NOT_FOUND; |
| 300 | } |
| 301 | |
| 302 | static void update_pt_entry(vaddr_t vaddr, paddr_t paddr, uint64_t pde, arch_flags_t flags) |
| 303 | { |
| 304 | uint32_t pt_index; |
| 305 | |
| 306 | uint64_t *pt_table = (uint64_t *)(pde & X86_PG_FRAME); |
| 307 | pt_index = (((uint64_t)vaddr >> PT_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 308 | pt_table[pt_index] = (uint64_t)paddr; |
| 309 | pt_table[pt_index] |= flags | X86_MMU_PG_P; |
| 310 | if (!(flags & X86_MMU_PG_U)) |
| 311 | pt_table[pt_index] |= X86_MMU_PG_G; /* setting global flag for kernel pages */ |
| 312 | } |
| 313 | |
| 314 | static void update_pd_entry(vaddr_t vaddr, uint64_t pdpe, addr_t *m, arch_flags_t flags) |
| 315 | { |
| 316 | uint32_t pd_index; |
| 317 | |
| 318 | uint64_t *pd_table = (uint64_t *)(pdpe & X86_PG_FRAME); |
| 319 | pd_index = (((uint64_t)vaddr >> PD_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 320 | pd_table[pd_index] = (uint64_t)m; |
| 321 | pd_table[pd_index] |= X86_MMU_PG_P | X86_MMU_PG_RW; |
| 322 | if (flags & X86_MMU_PG_U) |
| 323 | pd_table[pd_index] |= X86_MMU_PG_U; |
| 324 | else |
| 325 | pd_table[pd_index] |= X86_MMU_PG_G; /* setting global flag for kernel pages */ |
| 326 | } |
| 327 | |
| 328 | static void update_pdp_entry(vaddr_t vaddr, uint64_t pml4e, addr_t *m, arch_flags_t flags) |
| 329 | { |
| 330 | uint32_t pdp_index; |
| 331 | |
| 332 | uint64_t *pdp_table = (uint64_t *)(pml4e & X86_PG_FRAME); |
| 333 | pdp_index = (((uint64_t)vaddr >> PDP_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 334 | pdp_table[pdp_index] = (uint64_t)m; |
| 335 | pdp_table[pdp_index] |= X86_MMU_PG_P | X86_MMU_PG_RW; |
| 336 | if (flags & X86_MMU_PG_U) |
| 337 | pdp_table[pdp_index] |= X86_MMU_PG_U; |
| 338 | else |
| 339 | pdp_table[pdp_index] |= X86_MMU_PG_G; /* setting global flag for kernel pages */ |
| 340 | } |
| 341 | |
| 342 | static void update_pml4_entry(vaddr_t vaddr, addr_t pml4_addr, addr_t *m, arch_flags_t flags) |
| 343 | { |
| 344 | uint32_t pml4_index; |
| 345 | uint64_t *pml4_table = (uint64_t *)(pml4_addr); |
| 346 | |
| 347 | pml4_index = (((uint64_t)vaddr >> PML4_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 348 | pml4_table[pml4_index] = (uint64_t)m; |
| 349 | pml4_table[pml4_index] |= X86_MMU_PG_P | X86_MMU_PG_RW; |
| 350 | if (flags & X86_MMU_PG_U) |
| 351 | pml4_table[pml4_index] |= X86_MMU_PG_U; |
| 352 | else |
| 353 | pml4_table[pml4_index] |= X86_MMU_PG_G; /* setting global flag for kernel pages */ |
| 354 | } |
| 355 | |
| 356 | /** |
| 357 | * @brief Allocating a new page table |
| 358 | */ |
| 359 | static addr_t *_map_alloc(size_t size) |
| 360 | { |
| 361 | addr_t *page_ptr = memalign(PAGE_SIZE, size); |
| 362 | if (page_ptr) |
| 363 | map_zero_page(page_ptr); |
| 364 | return page_ptr; |
| 365 | } |
| 366 | |
| 367 | /** |
| 368 | * @brief Creating a new CR3 and copying all the kernel mappings |
| 369 | */ |
| 370 | addr_t *x86_create_new_cr3(void) |
| 371 | { |
| 372 | map_addr_t *kernel_table, *new_table = NULL; |
| 373 | |
| 374 | if (!g_CR3) |
| 375 | return 0; |
| 376 | |
| 377 | kernel_table = (map_addr_t *)X86_PHYS_TO_VIRT(g_CR3); |
| 378 | |
| 379 | /* Allocate a new Page to generate a new paging structure for a new CR3 */ |
| 380 | new_table = (map_addr_t *)_map_alloc(PAGE_SIZE); |
| 381 | ASSERT(new_table); |
| 382 | |
| 383 | /* Copying the kernel mapping as-is */ |
| 384 | memcpy(new_table, kernel_table, PAGE_SIZE); |
| 385 | |
| 386 | return (addr_t *)new_table; |
| 387 | } |
| 388 | |
| 389 | /** |
| 390 | * @brief Returning the kernel CR3 |
| 391 | */ |
| 392 | map_addr_t get_kernel_cr3() |
| 393 | { |
| 394 | return g_CR3; |
| 395 | } |
| 396 | |
| 397 | /** |
| 398 | * @brief Add a new mapping for the given virtual address & physical address |
| 399 | * |
| 400 | * This is a API which handles the mapping b/w a virtual address & physical address |
| 401 | * either by checking if the mapping already exists and is valid OR by adding a |
| 402 | * new mapping with the required flags. |
| 403 | * |
| 404 | * In this scenario, we are considering the paging scheme to be a PAE mode with |
| 405 | * 4KB pages. |
| 406 | * |
| 407 | */ |
| 408 | status_t x86_mmu_add_mapping(addr_t pml4, paddr_t paddr, |
| 409 | vaddr_t vaddr, arch_flags_t mmu_flags) |
| 410 | { |
| 411 | uint32_t pd_new = 0, pdp_new = 0; |
| 412 | uint64_t pml4e, pdpe, pde; |
| 413 | addr_t *m = NULL; |
| 414 | status_t ret = NO_ERROR; |
| 415 | |
| 416 | DEBUG_ASSERT(pml4); |
| 417 | if ((!x86_mmu_check_vaddr(vaddr)) || (!x86_mmu_check_paddr(paddr)) ) |
| 418 | return ERR_INVALID_ARGS; |
| 419 | |
| 420 | pml4e = get_pml4_entry_from_pml4_table(vaddr, pml4); |
| 421 | |
| 422 | if ((pml4e & X86_MMU_PG_P) == 0) { |
| 423 | /* Creating a new pdp table */ |
| 424 | m = _map_alloc(PAGE_SIZE); |
| 425 | if (m == NULL) { |
| 426 | ret = ERR_NO_MEMORY; |
| 427 | goto clean; |
| 428 | } |
| 429 | |
| 430 | update_pml4_entry(vaddr, pml4, m, get_x86_arch_flags(mmu_flags)); |
| 431 | pml4e = (uint64_t)m; |
| 432 | X86_SET_FLAG(pdp_new); |
| 433 | } |
| 434 | |
| 435 | if (!pdp_new) |
| 436 | pdpe = get_pdp_entry_from_pdp_table(vaddr, pml4e); |
| 437 | |
| 438 | if (pdp_new || (pdpe & X86_MMU_PG_P) == 0) { |
| 439 | /* Creating a new pd table */ |
| 440 | m = _map_alloc(PAGE_SIZE); |
| 441 | if (m == NULL) { |
| 442 | ret = ERR_NO_MEMORY; |
| 443 | if (pdp_new) |
| 444 | goto clean_pdp; |
| 445 | goto clean; |
| 446 | } |
| 447 | |
| 448 | update_pdp_entry(vaddr, pml4e, m, get_x86_arch_flags(mmu_flags)); |
| 449 | pdpe = (uint64_t)m; |
| 450 | X86_SET_FLAG(pd_new); |
| 451 | } |
| 452 | |
| 453 | if (!pd_new) |
| 454 | pde = get_pd_entry_from_pd_table(vaddr, pdpe); |
| 455 | |
| 456 | if (pd_new || (pde & X86_MMU_PG_P) == 0) { |
| 457 | /* Creating a new pt */ |
| 458 | m = _map_alloc(PAGE_SIZE); |
| 459 | if (m == NULL) { |
| 460 | ret = ERR_NO_MEMORY; |
| 461 | if (pd_new) |
| 462 | goto clean_pd; |
| 463 | goto clean; |
| 464 | } |
| 465 | |
| 466 | update_pd_entry(vaddr, pdpe, m, get_x86_arch_flags(mmu_flags)); |
| 467 | pde = (uint64_t)m; |
| 468 | } |
| 469 | |
| 470 | /* Updating the page table entry with the paddr and access flags required for the mapping */ |
| 471 | update_pt_entry(vaddr, paddr, pde, get_x86_arch_flags(mmu_flags)); |
| 472 | ret = NO_ERROR; |
| 473 | goto clean; |
| 474 | |
| 475 | clean_pd: |
| 476 | if (pd_new) |
| 477 | free((addr_t *)pdpe); |
| 478 | |
| 479 | clean_pdp: |
| 480 | if (pdp_new) |
| 481 | free((addr_t *)pml4e); |
| 482 | |
| 483 | clean: |
| 484 | return ret; |
| 485 | } |
| 486 | |
| 487 | /** |
| 488 | * @brief x86-64 MMU unmap an entry in the page tables recursively and clear out tables |
| 489 | * |
| 490 | */ |
| 491 | static void x86_mmu_unmap_entry(vaddr_t vaddr, int level, vaddr_t table_entry) |
| 492 | { |
| 493 | uint32_t offset = 0, next_level_offset = 0; |
| 494 | vaddr_t *table, *next_table_addr, value; |
| 495 | |
| 496 | next_table_addr = NULL; |
| 497 | table = (vaddr_t *)(X86_VIRT_TO_PHYS(table_entry) & X86_PG_FRAME); |
| 498 | |
| 499 | switch (level) { |
| 500 | case PML4_L: |
| 501 | offset = (((uint64_t)vaddr >> PML4_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 502 | next_table_addr = (vaddr_t *)X86_PHYS_TO_VIRT(table[offset]); |
| 503 | if ((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P)== 0) |
| 504 | return; |
| 505 | break; |
| 506 | case PDP_L: |
| 507 | offset = (((uint64_t)vaddr >> PDP_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 508 | next_table_addr = (vaddr_t *)X86_PHYS_TO_VIRT(table[offset]); |
| 509 | if ((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0) |
| 510 | return; |
| 511 | break; |
| 512 | case PD_L: |
| 513 | offset = (((uint64_t)vaddr >> PD_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 514 | next_table_addr = (vaddr_t *)X86_PHYS_TO_VIRT(table[offset]); |
| 515 | if ((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0) |
| 516 | return; |
| 517 | break; |
| 518 | case PT_L: |
| 519 | offset = (((uint64_t)vaddr >> PT_SHIFT) & ((1ul << ADDR_OFFSET) - 1)); |
| 520 | next_table_addr = (vaddr_t *)X86_PHYS_TO_VIRT(table[offset]); |
| 521 | if ((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) == 0) |
| 522 | return; |
| 523 | break; |
| 524 | case PF_L: |
| 525 | /* Reached page frame, Let's go back */ |
| 526 | default: |
| 527 | return; |
| 528 | } |
| 529 | |
| 530 | level -= 1; |
| 531 | x86_mmu_unmap_entry(vaddr, level,(vaddr_t)next_table_addr); |
| 532 | level += 1; |
| 533 | |
| 534 | next_table_addr = (vaddr_t*)((vaddr_t)(X86_VIRT_TO_PHYS(next_table_addr)) & X86_PG_FRAME); |
| 535 | if (level > PT_L) { |
| 536 | /* Check all entries of next level table for present bit */ |
| 537 | for (next_level_offset = 0; next_level_offset < (PAGE_SIZE/8); next_level_offset++) { |
| 538 | if ((next_table_addr[next_level_offset] & X86_MMU_PG_P) != 0) |
| 539 | return; /* There is an entry in the next level table */ |
| 540 | } |
| 541 | free(next_table_addr); |
| 542 | } |
| 543 | /* All present bits for all entries in next level table for this address are 0 */ |
| 544 | if ((X86_PHYS_TO_VIRT(table[offset]) & X86_MMU_PG_P) != 0) { |
| 545 | arch_disable_ints(); |
| 546 | value = table[offset]; |
| 547 | value = value & X86_PTE_NOT_PRESENT; |
| 548 | table[offset] = value; |
| 549 | arch_enable_ints(); |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | status_t x86_mmu_unmap(addr_t pml4, vaddr_t vaddr, uint count) |
| 554 | { |
| 555 | vaddr_t next_aligned_v_addr; |
| 556 | |
| 557 | DEBUG_ASSERT(pml4); |
| 558 | if (!(x86_mmu_check_vaddr(vaddr))) |
| 559 | return ERR_INVALID_ARGS; |
| 560 | |
| 561 | if (count == 0) |
| 562 | return NO_ERROR; |
| 563 | |
| 564 | next_aligned_v_addr = vaddr; |
| 565 | while (count > 0) { |
| 566 | x86_mmu_unmap_entry(next_aligned_v_addr, PAGING_LEVELS, X86_PHYS_TO_VIRT(pml4)); |
| 567 | next_aligned_v_addr += PAGE_SIZE; |
| 568 | count--; |
| 569 | } |
| 570 | return NO_ERROR; |
| 571 | } |
| 572 | |
| 573 | int arch_mmu_unmap(vaddr_t vaddr, uint count) |
| 574 | { |
| 575 | addr_t current_cr3_val; |
| 576 | |
| 577 | if (!(x86_mmu_check_vaddr(vaddr))) |
| 578 | return ERR_INVALID_ARGS; |
| 579 | |
| 580 | if (count == 0) |
| 581 | return NO_ERROR; |
| 582 | |
| 583 | DEBUG_ASSERT(x86_get_cr3()); |
| 584 | current_cr3_val = (addr_t)x86_get_cr3(); |
| 585 | |
| 586 | return (x86_mmu_unmap(current_cr3_val, vaddr, count)); |
| 587 | } |
| 588 | |
| 589 | /** |
| 590 | * @brief Mapping a section/range with specific permissions |
| 591 | * |
| 592 | */ |
| 593 | status_t x86_mmu_map_range(addr_t pml4, struct map_range *range, arch_flags_t flags) |
| 594 | { |
| 595 | vaddr_t next_aligned_v_addr; |
| 596 | paddr_t next_aligned_p_addr; |
| 597 | status_t map_status; |
| 598 | uint32_t no_of_pages, index; |
| 599 | |
| 600 | DEBUG_ASSERT(pml4); |
| 601 | if (!range) |
| 602 | return ERR_INVALID_ARGS; |
| 603 | |
| 604 | /* Calculating the number of 4k pages */ |
| 605 | if (IS_ALIGNED(range->size, PAGE_SIZE)) |
| 606 | no_of_pages = (range->size) >> PAGE_DIV_SHIFT; |
| 607 | else |
| 608 | no_of_pages = ((range->size) >> PAGE_DIV_SHIFT) + 1; |
| 609 | |
| 610 | next_aligned_v_addr = range->start_vaddr; |
| 611 | next_aligned_p_addr = range->start_paddr; |
| 612 | |
| 613 | for (index = 0; index < no_of_pages; index++) { |
| 614 | map_status = x86_mmu_add_mapping(pml4, next_aligned_p_addr, next_aligned_v_addr, flags); |
| 615 | if (map_status) { |
| 616 | dprintf(SPEW, "Add mapping failed with err=%d\n", map_status); |
| 617 | /* Unmap the partial mapping - if any */ |
| 618 | x86_mmu_unmap(pml4, range->start_vaddr, index); |
| 619 | return map_status; |
| 620 | } |
| 621 | next_aligned_v_addr += PAGE_SIZE; |
| 622 | next_aligned_p_addr += PAGE_SIZE; |
| 623 | } |
| 624 | return NO_ERROR; |
| 625 | } |
| 626 | |
| 627 | status_t arch_mmu_query(vaddr_t vaddr, paddr_t *paddr, uint *flags) |
| 628 | { |
| 629 | addr_t current_cr3_val; |
| 630 | uint32_t ret_level; |
| 631 | map_addr_t last_valid_entry; |
| 632 | arch_flags_t ret_flags; |
| 633 | status_t stat; |
| 634 | |
| 635 | if (!paddr) |
| 636 | return ERR_INVALID_ARGS; |
| 637 | |
| 638 | DEBUG_ASSERT(x86_get_cr3()); |
| 639 | current_cr3_val = (addr_t)x86_get_cr3(); |
| 640 | |
| 641 | stat = x86_mmu_get_mapping(current_cr3_val, vaddr, &ret_level, &ret_flags, &last_valid_entry); |
| 642 | if (stat) |
| 643 | return stat; |
| 644 | |
| 645 | *paddr = (paddr_t)(last_valid_entry); |
| 646 | |
| 647 | /* converting x86 arch specific flags to arch mmu flags */ |
| 648 | if (flags) |
| 649 | *flags = ret_flags; |
| 650 | |
| 651 | return NO_ERROR; |
| 652 | } |
| 653 | |
| 654 | int arch_mmu_map(vaddr_t vaddr, paddr_t paddr, uint count, uint flags) |
| 655 | { |
| 656 | addr_t current_cr3_val; |
| 657 | struct map_range range; |
| 658 | |
| 659 | if ((!x86_mmu_check_paddr(paddr)) || (!x86_mmu_check_vaddr(vaddr))) |
| 660 | return ERR_INVALID_ARGS; |
| 661 | |
| 662 | if (count == 0) |
| 663 | return NO_ERROR; |
| 664 | |
| 665 | DEBUG_ASSERT(x86_get_cr3()); |
| 666 | current_cr3_val = (addr_t)x86_get_cr3(); |
| 667 | |
| 668 | range.start_vaddr = vaddr; |
| 669 | range.start_paddr = paddr; |
| 670 | range.size = count * PAGE_SIZE; |
| 671 | |
| 672 | return (x86_mmu_map_range(current_cr3_val, &range, flags)); |
| 673 | } |
| 674 | |
| 675 | /** |
| 676 | * @brief x86-64 MMU basic initialization |
| 677 | * |
| 678 | */ |
| 679 | void arch_mmu_init(void) |
| 680 | { |
| 681 | volatile uint64_t efer_msr, cr0, cr4; |
| 682 | |
| 683 | /* Set WP bit in CR0*/ |
| 684 | cr0 = x86_get_cr0(); |
| 685 | cr0 |= X86_CR0_WP; |
| 686 | x86_set_cr0(cr0); |
| 687 | |
| 688 | /* Setting the SMEP & SMAP bit in CR4 */ |
| 689 | cr4 = x86_get_cr4(); |
| 690 | if (check_smep_avail()) |
| 691 | cr4 |= X86_CR4_SMEP; |
| 692 | if (check_smap_avail()) |
| 693 | cr4 |=X86_CR4_SMAP; |
| 694 | x86_set_cr4(cr4); |
| 695 | |
| 696 | /* Set NXE bit in MSR_EFER*/ |
| 697 | efer_msr = read_msr(x86_MSR_EFER); |
| 698 | efer_msr |= x86_EFER_NXE; |
| 699 | write_msr(x86_MSR_EFER, efer_msr); |
| 700 | } |