| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 | 
|  | 2 | /* | 
|  | 3 | *  mm/mprotect.c | 
|  | 4 | * | 
|  | 5 | *  (C) Copyright 1994 Linus Torvalds | 
|  | 6 | *  (C) Copyright 2002 Christoph Hellwig | 
|  | 7 | * | 
|  | 8 | *  Address space accounting code	<alan@lxorguk.ukuu.org.uk> | 
|  | 9 | *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved | 
|  | 10 | */ | 
|  | 11 |  | 
|  | 12 | #include <linux/mm.h> | 
|  | 13 | #include <linux/hugetlb.h> | 
|  | 14 | #include <linux/shm.h> | 
|  | 15 | #include <linux/mman.h> | 
|  | 16 | #include <linux/fs.h> | 
|  | 17 | #include <linux/highmem.h> | 
|  | 18 | #include <linux/security.h> | 
|  | 19 | #include <linux/mempolicy.h> | 
|  | 20 | #include <linux/personality.h> | 
|  | 21 | #include <linux/syscalls.h> | 
|  | 22 | #include <linux/swap.h> | 
|  | 23 | #include <linux/swapops.h> | 
|  | 24 | #include <linux/mmu_notifier.h> | 
|  | 25 | #include <linux/migrate.h> | 
|  | 26 | #include <linux/perf_event.h> | 
|  | 27 | #include <linux/pkeys.h> | 
|  | 28 | #include <linux/ksm.h> | 
|  | 29 | #include <linux/uaccess.h> | 
|  | 30 | #include <linux/mm_inline.h> | 
|  | 31 | #include <asm/pgtable.h> | 
|  | 32 | #include <asm/cacheflush.h> | 
|  | 33 | #include <asm/mmu_context.h> | 
|  | 34 | #include <asm/tlbflush.h> | 
|  | 35 |  | 
|  | 36 | #include "internal.h" | 
|  | 37 |  | 
|  | 38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 
|  | 39 | unsigned long addr, unsigned long end, pgprot_t newprot, | 
|  | 40 | int dirty_accountable, int prot_numa) | 
|  | 41 | { | 
|  | 42 | struct mm_struct *mm = vma->vm_mm; | 
|  | 43 | pte_t *pte, oldpte; | 
|  | 44 | spinlock_t *ptl; | 
|  | 45 | unsigned long pages = 0; | 
|  | 46 | int target_node = NUMA_NO_NODE; | 
|  | 47 |  | 
|  | 48 | /* | 
|  | 49 | * Can be called with only the mmap_sem for reading by | 
|  | 50 | * prot_numa so we must check the pmd isn't constantly | 
|  | 51 | * changing from under us from pmd_none to pmd_trans_huge | 
|  | 52 | * and/or the other way around. | 
|  | 53 | */ | 
|  | 54 | if (pmd_trans_unstable(pmd)) | 
|  | 55 | return 0; | 
|  | 56 |  | 
|  | 57 | /* | 
|  | 58 | * The pmd points to a regular pte so the pmd can't change | 
|  | 59 | * from under us even if the mmap_sem is only hold for | 
|  | 60 | * reading. | 
|  | 61 | */ | 
|  | 62 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 
|  | 63 |  | 
|  | 64 | /* Get target node for single threaded private VMAs */ | 
|  | 65 | if (prot_numa && !(vma->vm_flags & VM_SHARED) && | 
|  | 66 | atomic_read(&vma->vm_mm->mm_users) == 1) | 
|  | 67 | target_node = numa_node_id(); | 
|  | 68 |  | 
|  | 69 | flush_tlb_batched_pending(vma->vm_mm); | 
|  | 70 | arch_enter_lazy_mmu_mode(); | 
|  | 71 | do { | 
|  | 72 | oldpte = *pte; | 
|  | 73 | if (pte_present(oldpte)) { | 
|  | 74 | pte_t ptent; | 
|  | 75 | bool preserve_write = prot_numa && pte_write(oldpte); | 
|  | 76 |  | 
|  | 77 | /* | 
|  | 78 | * Avoid trapping faults against the zero or KSM | 
|  | 79 | * pages. See similar comment in change_huge_pmd. | 
|  | 80 | */ | 
|  | 81 | if (prot_numa) { | 
|  | 82 | struct page *page; | 
|  | 83 |  | 
|  | 84 | page = vm_normal_page(vma, addr, oldpte); | 
|  | 85 | if (!page || PageKsm(page)) | 
|  | 86 | continue; | 
|  | 87 |  | 
|  | 88 | /* Also skip shared copy-on-write pages */ | 
|  | 89 | if (is_cow_mapping(vma->vm_flags) && | 
|  | 90 | page_mapcount(page) != 1) | 
|  | 91 | continue; | 
|  | 92 |  | 
|  | 93 | /* | 
|  | 94 | * While migration can move some dirty pages, | 
|  | 95 | * it cannot move them all from MIGRATE_ASYNC | 
|  | 96 | * context. | 
|  | 97 | */ | 
|  | 98 | if (page_is_file_cache(page) && PageDirty(page)) | 
|  | 99 | continue; | 
|  | 100 |  | 
|  | 101 | /* Avoid TLB flush if possible */ | 
|  | 102 | if (pte_protnone(oldpte)) | 
|  | 103 | continue; | 
|  | 104 |  | 
|  | 105 | /* | 
|  | 106 | * Don't mess with PTEs if page is already on the node | 
|  | 107 | * a single-threaded process is running on. | 
|  | 108 | */ | 
|  | 109 | if (target_node == page_to_nid(page)) | 
|  | 110 | continue; | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | ptent = ptep_modify_prot_start(mm, addr, pte); | 
|  | 114 | ptent = pte_modify(ptent, newprot); | 
|  | 115 | if (preserve_write) | 
|  | 116 | ptent = pte_mk_savedwrite(ptent); | 
|  | 117 |  | 
|  | 118 | /* Avoid taking write faults for known dirty pages */ | 
|  | 119 | if (dirty_accountable && pte_dirty(ptent) && | 
|  | 120 | (pte_soft_dirty(ptent) || | 
|  | 121 | !(vma->vm_flags & VM_SOFTDIRTY))) { | 
|  | 122 | ptent = pte_mkwrite(ptent); | 
|  | 123 | } | 
|  | 124 | ptep_modify_prot_commit(mm, addr, pte, ptent); | 
|  | 125 | pages++; | 
|  | 126 | } else if (IS_ENABLED(CONFIG_MIGRATION)) { | 
|  | 127 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 
|  | 128 |  | 
|  | 129 | if (is_write_migration_entry(entry)) { | 
|  | 130 | pte_t newpte; | 
|  | 131 | /* | 
|  | 132 | * A protection check is difficult so | 
|  | 133 | * just be safe and disable write | 
|  | 134 | */ | 
|  | 135 | make_migration_entry_read(&entry); | 
|  | 136 | newpte = swp_entry_to_pte(entry); | 
|  | 137 | if (pte_swp_soft_dirty(oldpte)) | 
|  | 138 | newpte = pte_swp_mksoft_dirty(newpte); | 
|  | 139 | set_pte_at(mm, addr, pte, newpte); | 
|  | 140 |  | 
|  | 141 | pages++; | 
|  | 142 | } | 
|  | 143 |  | 
|  | 144 | if (is_write_device_private_entry(entry)) { | 
|  | 145 | pte_t newpte; | 
|  | 146 |  | 
|  | 147 | /* | 
|  | 148 | * We do not preserve soft-dirtiness. See | 
|  | 149 | * copy_one_pte() for explanation. | 
|  | 150 | */ | 
|  | 151 | make_device_private_entry_read(&entry); | 
|  | 152 | newpte = swp_entry_to_pte(entry); | 
|  | 153 | set_pte_at(mm, addr, pte, newpte); | 
|  | 154 |  | 
|  | 155 | pages++; | 
|  | 156 | } | 
|  | 157 | } | 
|  | 158 | } while (pte++, addr += PAGE_SIZE, addr != end); | 
|  | 159 | arch_leave_lazy_mmu_mode(); | 
|  | 160 | pte_unmap_unlock(pte - 1, ptl); | 
|  | 161 |  | 
|  | 162 | return pages; | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | 
|  | 166 | pud_t *pud, unsigned long addr, unsigned long end, | 
|  | 167 | pgprot_t newprot, int dirty_accountable, int prot_numa) | 
|  | 168 | { | 
|  | 169 | pmd_t *pmd; | 
|  | 170 | struct mm_struct *mm = vma->vm_mm; | 
|  | 171 | unsigned long next; | 
|  | 172 | unsigned long pages = 0; | 
|  | 173 | unsigned long nr_huge_updates = 0; | 
|  | 174 | unsigned long mni_start = 0; | 
|  | 175 |  | 
|  | 176 | pmd = pmd_offset(pud, addr); | 
|  | 177 | do { | 
|  | 178 | unsigned long this_pages; | 
|  | 179 |  | 
|  | 180 | next = pmd_addr_end(addr, end); | 
|  | 181 | if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) | 
|  | 182 | && pmd_none_or_clear_bad(pmd)) | 
|  | 183 | goto next; | 
|  | 184 |  | 
|  | 185 | /* invoke the mmu notifier if the pmd is populated */ | 
|  | 186 | if (!mni_start) { | 
|  | 187 | mni_start = addr; | 
|  | 188 | mmu_notifier_invalidate_range_start(mm, mni_start, end); | 
|  | 189 | } | 
|  | 190 |  | 
|  | 191 | if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { | 
|  | 192 | if (next - addr != HPAGE_PMD_SIZE) { | 
|  | 193 | __split_huge_pmd(vma, pmd, addr, false, NULL); | 
|  | 194 | } else { | 
|  | 195 | int nr_ptes = change_huge_pmd(vma, pmd, addr, | 
|  | 196 | newprot, prot_numa); | 
|  | 197 |  | 
|  | 198 | if (nr_ptes) { | 
|  | 199 | if (nr_ptes == HPAGE_PMD_NR) { | 
|  | 200 | pages += HPAGE_PMD_NR; | 
|  | 201 | nr_huge_updates++; | 
|  | 202 | } | 
|  | 203 |  | 
|  | 204 | /* huge pmd was handled */ | 
|  | 205 | goto next; | 
|  | 206 | } | 
|  | 207 | } | 
|  | 208 | /* fall through, the trans huge pmd just split */ | 
|  | 209 | } | 
|  | 210 | this_pages = change_pte_range(vma, pmd, addr, next, newprot, | 
|  | 211 | dirty_accountable, prot_numa); | 
|  | 212 | pages += this_pages; | 
|  | 213 | next: | 
|  | 214 | cond_resched(); | 
|  | 215 | } while (pmd++, addr = next, addr != end); | 
|  | 216 |  | 
|  | 217 | if (mni_start) | 
|  | 218 | mmu_notifier_invalidate_range_end(mm, mni_start, end); | 
|  | 219 |  | 
|  | 220 | if (nr_huge_updates) | 
|  | 221 | count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates); | 
|  | 222 | return pages; | 
|  | 223 | } | 
|  | 224 |  | 
|  | 225 | static inline unsigned long change_pud_range(struct vm_area_struct *vma, | 
|  | 226 | p4d_t *p4d, unsigned long addr, unsigned long end, | 
|  | 227 | pgprot_t newprot, int dirty_accountable, int prot_numa) | 
|  | 228 | { | 
|  | 229 | pud_t *pud; | 
|  | 230 | unsigned long next; | 
|  | 231 | unsigned long pages = 0; | 
|  | 232 |  | 
|  | 233 | pud = pud_offset(p4d, addr); | 
|  | 234 | do { | 
|  | 235 | next = pud_addr_end(addr, end); | 
|  | 236 | if (pud_none_or_clear_bad(pud)) | 
|  | 237 | continue; | 
|  | 238 | pages += change_pmd_range(vma, pud, addr, next, newprot, | 
|  | 239 | dirty_accountable, prot_numa); | 
|  | 240 | } while (pud++, addr = next, addr != end); | 
|  | 241 |  | 
|  | 242 | return pages; | 
|  | 243 | } | 
|  | 244 |  | 
|  | 245 | static inline unsigned long change_p4d_range(struct vm_area_struct *vma, | 
|  | 246 | pgd_t *pgd, unsigned long addr, unsigned long end, | 
|  | 247 | pgprot_t newprot, int dirty_accountable, int prot_numa) | 
|  | 248 | { | 
|  | 249 | p4d_t *p4d; | 
|  | 250 | unsigned long next; | 
|  | 251 | unsigned long pages = 0; | 
|  | 252 |  | 
|  | 253 | p4d = p4d_offset(pgd, addr); | 
|  | 254 | do { | 
|  | 255 | next = p4d_addr_end(addr, end); | 
|  | 256 | if (p4d_none_or_clear_bad(p4d)) | 
|  | 257 | continue; | 
|  | 258 | pages += change_pud_range(vma, p4d, addr, next, newprot, | 
|  | 259 | dirty_accountable, prot_numa); | 
|  | 260 | } while (p4d++, addr = next, addr != end); | 
|  | 261 |  | 
|  | 262 | return pages; | 
|  | 263 | } | 
|  | 264 |  | 
|  | 265 | static unsigned long change_protection_range(struct vm_area_struct *vma, | 
|  | 266 | unsigned long addr, unsigned long end, pgprot_t newprot, | 
|  | 267 | int dirty_accountable, int prot_numa) | 
|  | 268 | { | 
|  | 269 | struct mm_struct *mm = vma->vm_mm; | 
|  | 270 | pgd_t *pgd; | 
|  | 271 | unsigned long next; | 
|  | 272 | unsigned long start = addr; | 
|  | 273 | unsigned long pages = 0; | 
|  | 274 |  | 
|  | 275 | BUG_ON(addr >= end); | 
|  | 276 | pgd = pgd_offset(mm, addr); | 
|  | 277 | flush_cache_range(vma, addr, end); | 
|  | 278 | inc_tlb_flush_pending(mm); | 
|  | 279 | do { | 
|  | 280 | next = pgd_addr_end(addr, end); | 
|  | 281 | if (pgd_none_or_clear_bad(pgd)) | 
|  | 282 | continue; | 
|  | 283 | pages += change_p4d_range(vma, pgd, addr, next, newprot, | 
|  | 284 | dirty_accountable, prot_numa); | 
|  | 285 | } while (pgd++, addr = next, addr != end); | 
|  | 286 |  | 
|  | 287 | /* Only flush the TLB if we actually modified any entries: */ | 
|  | 288 | if (pages) | 
|  | 289 | flush_tlb_range(vma, start, end); | 
|  | 290 | dec_tlb_flush_pending(mm); | 
|  | 291 |  | 
|  | 292 | return pages; | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, | 
|  | 296 | unsigned long end, pgprot_t newprot, | 
|  | 297 | int dirty_accountable, int prot_numa) | 
|  | 298 | { | 
|  | 299 | unsigned long pages; | 
|  | 300 |  | 
|  | 301 | if (is_vm_hugetlb_page(vma)) | 
|  | 302 | pages = hugetlb_change_protection(vma, start, end, newprot); | 
|  | 303 | else | 
|  | 304 | pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); | 
|  | 305 |  | 
|  | 306 | return pages; | 
|  | 307 | } | 
|  | 308 |  | 
|  | 309 | static int prot_none_pte_entry(pte_t *pte, unsigned long addr, | 
|  | 310 | unsigned long next, struct mm_walk *walk) | 
|  | 311 | { | 
|  | 312 | return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? | 
|  | 313 | 0 : -EACCES; | 
|  | 314 | } | 
|  | 315 |  | 
|  | 316 | static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask, | 
|  | 317 | unsigned long addr, unsigned long next, | 
|  | 318 | struct mm_walk *walk) | 
|  | 319 | { | 
|  | 320 | return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ? | 
|  | 321 | 0 : -EACCES; | 
|  | 322 | } | 
|  | 323 |  | 
|  | 324 | static int prot_none_test(unsigned long addr, unsigned long next, | 
|  | 325 | struct mm_walk *walk) | 
|  | 326 | { | 
|  | 327 | return 0; | 
|  | 328 | } | 
|  | 329 |  | 
|  | 330 | static int prot_none_walk(struct vm_area_struct *vma, unsigned long start, | 
|  | 331 | unsigned long end, unsigned long newflags) | 
|  | 332 | { | 
|  | 333 | pgprot_t new_pgprot = vm_get_page_prot(newflags); | 
|  | 334 | struct mm_walk prot_none_walk = { | 
|  | 335 | .pte_entry = prot_none_pte_entry, | 
|  | 336 | .hugetlb_entry = prot_none_hugetlb_entry, | 
|  | 337 | .test_walk = prot_none_test, | 
|  | 338 | .mm = current->mm, | 
|  | 339 | .private = &new_pgprot, | 
|  | 340 | }; | 
|  | 341 |  | 
|  | 342 | return walk_page_range(start, end, &prot_none_walk); | 
|  | 343 | } | 
|  | 344 |  | 
|  | 345 | int | 
|  | 346 | mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | 
|  | 347 | unsigned long start, unsigned long end, unsigned long newflags) | 
|  | 348 | { | 
|  | 349 | struct mm_struct *mm = vma->vm_mm; | 
|  | 350 | unsigned long oldflags = vma->vm_flags; | 
|  | 351 | long nrpages = (end - start) >> PAGE_SHIFT; | 
|  | 352 | unsigned long charged = 0; | 
|  | 353 | pgoff_t pgoff; | 
|  | 354 | int error; | 
|  | 355 | int dirty_accountable = 0; | 
|  | 356 |  | 
|  | 357 | if (newflags == oldflags) { | 
|  | 358 | *pprev = vma; | 
|  | 359 | return 0; | 
|  | 360 | } | 
|  | 361 |  | 
|  | 362 | /* | 
|  | 363 | * Do PROT_NONE PFN permission checks here when we can still | 
|  | 364 | * bail out without undoing a lot of state. This is a rather | 
|  | 365 | * uncommon case, so doesn't need to be very optimized. | 
|  | 366 | */ | 
|  | 367 | if (arch_has_pfn_modify_check() && | 
|  | 368 | (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && | 
|  | 369 | (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { | 
|  | 370 | error = prot_none_walk(vma, start, end, newflags); | 
|  | 371 | if (error) | 
|  | 372 | return error; | 
|  | 373 | } | 
|  | 374 |  | 
|  | 375 | /* | 
|  | 376 | * If we make a private mapping writable we increase our commit; | 
|  | 377 | * but (without finer accounting) cannot reduce our commit if we | 
|  | 378 | * make it unwritable again. hugetlb mapping were accounted for | 
|  | 379 | * even if read-only so there is no need to account for them here | 
|  | 380 | */ | 
|  | 381 | if (newflags & VM_WRITE) { | 
|  | 382 | /* Check space limits when area turns into data. */ | 
|  | 383 | if (!may_expand_vm(mm, newflags, nrpages) && | 
|  | 384 | may_expand_vm(mm, oldflags, nrpages)) | 
|  | 385 | return -ENOMEM; | 
|  | 386 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| | 
|  | 387 | VM_SHARED|VM_NORESERVE))) { | 
|  | 388 | charged = nrpages; | 
|  | 389 | if (security_vm_enough_memory_mm(mm, charged)) | 
|  | 390 | return -ENOMEM; | 
|  | 391 | newflags |= VM_ACCOUNT; | 
|  | 392 | } | 
|  | 393 | } | 
|  | 394 |  | 
|  | 395 | /* | 
|  | 396 | * First try to merge with previous and/or next vma. | 
|  | 397 | */ | 
|  | 398 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | 
|  | 399 | *pprev = vma_merge(mm, *pprev, start, end, newflags, | 
|  | 400 | vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), | 
|  | 401 | vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); | 
|  | 402 | if (*pprev) { | 
|  | 403 | vma = *pprev; | 
|  | 404 | VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); | 
|  | 405 | goto success; | 
|  | 406 | } | 
|  | 407 |  | 
|  | 408 | *pprev = vma; | 
|  | 409 |  | 
|  | 410 | if (start != vma->vm_start) { | 
|  | 411 | error = split_vma(mm, vma, start, 1); | 
|  | 412 | if (error) | 
|  | 413 | goto fail; | 
|  | 414 | } | 
|  | 415 |  | 
|  | 416 | if (end != vma->vm_end) { | 
|  | 417 | error = split_vma(mm, vma, end, 0); | 
|  | 418 | if (error) | 
|  | 419 | goto fail; | 
|  | 420 | } | 
|  | 421 |  | 
|  | 422 | success: | 
|  | 423 | /* | 
|  | 424 | * vm_flags and vm_page_prot are protected by the mmap_sem | 
|  | 425 | * held in write mode. | 
|  | 426 | */ | 
|  | 427 | vma->vm_flags = newflags; | 
|  | 428 | dirty_accountable = vma_wants_writenotify(vma, vma->vm_page_prot); | 
|  | 429 | vma_set_page_prot(vma); | 
|  | 430 |  | 
|  | 431 | change_protection(vma, start, end, vma->vm_page_prot, | 
|  | 432 | dirty_accountable, 0); | 
|  | 433 |  | 
|  | 434 | /* | 
|  | 435 | * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major | 
|  | 436 | * fault on access. | 
|  | 437 | */ | 
|  | 438 | if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED && | 
|  | 439 | (newflags & VM_WRITE)) { | 
|  | 440 | populate_vma_page_range(vma, start, end, NULL); | 
|  | 441 | } | 
|  | 442 |  | 
|  | 443 | vm_stat_account(mm, oldflags, -nrpages); | 
|  | 444 | vm_stat_account(mm, newflags, nrpages); | 
|  | 445 | perf_event_mmap(vma); | 
|  | 446 | return 0; | 
|  | 447 |  | 
|  | 448 | fail: | 
|  | 449 | vm_unacct_memory(charged); | 
|  | 450 | return error; | 
|  | 451 | } | 
|  | 452 |  | 
|  | 453 | /* | 
|  | 454 | * pkey==-1 when doing a legacy mprotect() | 
|  | 455 | */ | 
|  | 456 | static int do_mprotect_pkey(unsigned long start, size_t len, | 
|  | 457 | unsigned long prot, int pkey) | 
|  | 458 | { | 
|  | 459 | unsigned long nstart, end, tmp, reqprot; | 
|  | 460 | struct vm_area_struct *vma, *prev; | 
|  | 461 | int error = -EINVAL; | 
|  | 462 | const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP); | 
|  | 463 | const bool rier = (current->personality & READ_IMPLIES_EXEC) && | 
|  | 464 | (prot & PROT_READ); | 
|  | 465 |  | 
|  | 466 | start = untagged_addr(start); | 
|  | 467 |  | 
|  | 468 | prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP); | 
|  | 469 | if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */ | 
|  | 470 | return -EINVAL; | 
|  | 471 |  | 
|  | 472 | if (start & ~PAGE_MASK) | 
|  | 473 | return -EINVAL; | 
|  | 474 | if (!len) | 
|  | 475 | return 0; | 
|  | 476 | len = PAGE_ALIGN(len); | 
|  | 477 | end = start + len; | 
|  | 478 | if (end <= start) | 
|  | 479 | return -ENOMEM; | 
|  | 480 | if (!arch_validate_prot(prot, start)) | 
|  | 481 | return -EINVAL; | 
|  | 482 |  | 
|  | 483 | reqprot = prot; | 
|  | 484 |  | 
|  | 485 | if (down_write_killable(¤t->mm->mmap_sem)) | 
|  | 486 | return -EINTR; | 
|  | 487 |  | 
|  | 488 | /* | 
|  | 489 | * If userspace did not allocate the pkey, do not let | 
|  | 490 | * them use it here. | 
|  | 491 | */ | 
|  | 492 | error = -EINVAL; | 
|  | 493 | if ((pkey != -1) && !mm_pkey_is_allocated(current->mm, pkey)) | 
|  | 494 | goto out; | 
|  | 495 |  | 
|  | 496 | vma = find_vma(current->mm, start); | 
|  | 497 | error = -ENOMEM; | 
|  | 498 | if (!vma) | 
|  | 499 | goto out; | 
|  | 500 | prev = vma->vm_prev; | 
|  | 501 | if (unlikely(grows & PROT_GROWSDOWN)) { | 
|  | 502 | if (vma->vm_start >= end) | 
|  | 503 | goto out; | 
|  | 504 | start = vma->vm_start; | 
|  | 505 | error = -EINVAL; | 
|  | 506 | if (!(vma->vm_flags & VM_GROWSDOWN)) | 
|  | 507 | goto out; | 
|  | 508 | } else { | 
|  | 509 | if (vma->vm_start > start) | 
|  | 510 | goto out; | 
|  | 511 | if (unlikely(grows & PROT_GROWSUP)) { | 
|  | 512 | end = vma->vm_end; | 
|  | 513 | error = -EINVAL; | 
|  | 514 | if (!(vma->vm_flags & VM_GROWSUP)) | 
|  | 515 | goto out; | 
|  | 516 | } | 
|  | 517 | } | 
|  | 518 | if (start > vma->vm_start) | 
|  | 519 | prev = vma; | 
|  | 520 |  | 
|  | 521 | for (nstart = start ; ; ) { | 
|  | 522 | unsigned long mask_off_old_flags; | 
|  | 523 | unsigned long newflags; | 
|  | 524 | int new_vma_pkey; | 
|  | 525 |  | 
|  | 526 | /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ | 
|  | 527 |  | 
|  | 528 | /* Does the application expect PROT_READ to imply PROT_EXEC */ | 
|  | 529 | if (rier && (vma->vm_flags & VM_MAYEXEC)) | 
|  | 530 | prot |= PROT_EXEC; | 
|  | 531 |  | 
|  | 532 | /* | 
|  | 533 | * Each mprotect() call explicitly passes r/w/x permissions. | 
|  | 534 | * If a permission is not passed to mprotect(), it must be | 
|  | 535 | * cleared from the VMA. | 
|  | 536 | */ | 
|  | 537 | mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC | | 
|  | 538 | VM_FLAGS_CLEAR; | 
|  | 539 |  | 
|  | 540 | new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); | 
|  | 541 | newflags = calc_vm_prot_bits(prot, new_vma_pkey); | 
|  | 542 | newflags |= (vma->vm_flags & ~mask_off_old_flags); | 
|  | 543 |  | 
|  | 544 | /* newflags >> 4 shift VM_MAY% in place of VM_% */ | 
|  | 545 | if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { | 
|  | 546 | error = -EACCES; | 
|  | 547 | goto out; | 
|  | 548 | } | 
|  | 549 |  | 
|  | 550 | error = security_file_mprotect(vma, reqprot, prot); | 
|  | 551 | if (error) | 
|  | 552 | goto out; | 
|  | 553 |  | 
|  | 554 | tmp = vma->vm_end; | 
|  | 555 | if (tmp > end) | 
|  | 556 | tmp = end; | 
|  | 557 | error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); | 
|  | 558 | if (error) | 
|  | 559 | goto out; | 
|  | 560 | nstart = tmp; | 
|  | 561 |  | 
|  | 562 | if (nstart < prev->vm_end) | 
|  | 563 | nstart = prev->vm_end; | 
|  | 564 | if (nstart >= end) | 
|  | 565 | goto out; | 
|  | 566 |  | 
|  | 567 | vma = prev->vm_next; | 
|  | 568 | if (!vma || vma->vm_start != nstart) { | 
|  | 569 | error = -ENOMEM; | 
|  | 570 | goto out; | 
|  | 571 | } | 
|  | 572 | prot = reqprot; | 
|  | 573 | } | 
|  | 574 | out: | 
|  | 575 | up_write(¤t->mm->mmap_sem); | 
|  | 576 | return error; | 
|  | 577 | } | 
|  | 578 |  | 
|  | 579 | SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, | 
|  | 580 | unsigned long, prot) | 
|  | 581 | { | 
|  | 582 | return do_mprotect_pkey(start, len, prot, -1); | 
|  | 583 | } | 
|  | 584 |  | 
|  | 585 | #ifdef CONFIG_ARCH_HAS_PKEYS | 
|  | 586 |  | 
|  | 587 | SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len, | 
|  | 588 | unsigned long, prot, int, pkey) | 
|  | 589 | { | 
|  | 590 | return do_mprotect_pkey(start, len, prot, pkey); | 
|  | 591 | } | 
|  | 592 |  | 
|  | 593 | SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val) | 
|  | 594 | { | 
|  | 595 | int pkey; | 
|  | 596 | int ret; | 
|  | 597 |  | 
|  | 598 | /* No flags supported yet. */ | 
|  | 599 | if (flags) | 
|  | 600 | return -EINVAL; | 
|  | 601 | /* check for unsupported init values */ | 
|  | 602 | if (init_val & ~PKEY_ACCESS_MASK) | 
|  | 603 | return -EINVAL; | 
|  | 604 |  | 
|  | 605 | down_write(¤t->mm->mmap_sem); | 
|  | 606 | pkey = mm_pkey_alloc(current->mm); | 
|  | 607 |  | 
|  | 608 | ret = -ENOSPC; | 
|  | 609 | if (pkey == -1) | 
|  | 610 | goto out; | 
|  | 611 |  | 
|  | 612 | ret = arch_set_user_pkey_access(current, pkey, init_val); | 
|  | 613 | if (ret) { | 
|  | 614 | mm_pkey_free(current->mm, pkey); | 
|  | 615 | goto out; | 
|  | 616 | } | 
|  | 617 | ret = pkey; | 
|  | 618 | out: | 
|  | 619 | up_write(¤t->mm->mmap_sem); | 
|  | 620 | return ret; | 
|  | 621 | } | 
|  | 622 |  | 
|  | 623 | SYSCALL_DEFINE1(pkey_free, int, pkey) | 
|  | 624 | { | 
|  | 625 | int ret; | 
|  | 626 |  | 
|  | 627 | down_write(¤t->mm->mmap_sem); | 
|  | 628 | ret = mm_pkey_free(current->mm, pkey); | 
|  | 629 | up_write(¤t->mm->mmap_sem); | 
|  | 630 |  | 
|  | 631 | /* | 
|  | 632 | * We could provie warnings or errors if any VMA still | 
|  | 633 | * has the pkey set here. | 
|  | 634 | */ | 
|  | 635 | return ret; | 
|  | 636 | } | 
|  | 637 |  | 
|  | 638 | #endif /* CONFIG_ARCH_HAS_PKEYS */ |