Blame - marvell/linux/mm/mremap.c - T108

blob: b0f780c8d1d5ea5ad1e5e20b82a91374a0bfc80b [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* mm/mremap.c
				4	*
				5	* (C) Copyright 1996 Linus Torvalds
				6	*
				7	* Address space accounting code <alan@lxorguk.ukuu.org.uk>
				8	* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
				9	*/
				10
				11	#include <linux/mm.h>
				12	#include <linux/hugetlb.h>
				13	#include <linux/shm.h>
				14	#include <linux/ksm.h>
				15	#include <linux/mman.h>
				16	#include <linux/swap.h>
				17	#include <linux/capability.h>
				18	#include <linux/fs.h>
				19	#include <linux/swapops.h>
				20	#include <linux/highmem.h>
				21	#include <linux/security.h>
				22	#include <linux/syscalls.h>
				23	#include <linux/mmu_notifier.h>
				24	#include <linux/uaccess.h>
				25	#include <linux/mm-arch-hooks.h>
				26	#include <linux/userfaultfd_k.h>
				27
				28	#include <asm/cacheflush.h>
				29	#include <asm/tlbflush.h>
				30
				31	#include "internal.h"
				32
				33	static pud_t get_old_pud(struct mm_struct mm, unsigned long addr)
				34	{
				35	pgd_t *pgd;
				36	p4d_t *p4d;
				37	pud_t *pud;
				38
				39	pgd = pgd_offset(mm, addr);
				40	if (pgd_none_or_clear_bad(pgd))
				41	return NULL;
				42
				43	p4d = p4d_offset(pgd, addr);
				44	if (p4d_none_or_clear_bad(p4d))
				45	return NULL;
				46
				47	pud = pud_offset(p4d, addr);
				48	if (pud_none_or_clear_bad(pud))
				49	return NULL;
				50
				51	return pud;
				52	}
				53
				54	static pmd_t get_old_pmd(struct mm_struct mm, unsigned long addr)
				55	{
				56	pud_t *pud;
				57	pmd_t *pmd;
				58
				59	pud = get_old_pud(mm, addr);
				60	if (!pud)
				61	return NULL;
				62
				63	pmd = pmd_offset(pud, addr);
				64	if (pmd_none(*pmd))
				65	return NULL;
				66
				67	return pmd;
				68	}
				69
				70	static pud_t alloc_new_pud(struct mm_struct mm, struct vm_area_struct *vma,
				71	unsigned long addr)
				72	{
				73	pgd_t *pgd;
				74	p4d_t *p4d;
				75
				76	pgd = pgd_offset(mm, addr);
				77	p4d = p4d_alloc(mm, pgd, addr);
				78	if (!p4d)
				79	return NULL;
				80
				81	return pud_alloc(mm, p4d, addr);
				82	}
				83
				84	static pmd_t alloc_new_pmd(struct mm_struct mm, struct vm_area_struct *vma,
				85	unsigned long addr)
				86	{
				87	pud_t *pud;
				88	pmd_t *pmd;
				89
				90	pud = alloc_new_pud(mm, vma, addr);
				91	if (!pud)
				92	return NULL;
				93
				94	pmd = pmd_alloc(mm, pud, addr);
				95	if (!pmd)
				96	return NULL;
				97
				98	VM_BUG_ON(pmd_trans_huge(*pmd));
				99
				100	return pmd;
				101	}
				102
				103	static void take_rmap_locks(struct vm_area_struct *vma)
				104	{
				105	if (vma->vm_file)
				106	i_mmap_lock_write(vma->vm_file->f_mapping);
				107	if (vma->anon_vma)
				108	anon_vma_lock_write(vma->anon_vma);
				109	}
				110
				111	static void drop_rmap_locks(struct vm_area_struct *vma)
				112	{
				113	if (vma->anon_vma)
				114	anon_vma_unlock_write(vma->anon_vma);
				115	if (vma->vm_file)
				116	i_mmap_unlock_write(vma->vm_file->f_mapping);
				117	}
				118
				119	static pte_t move_soft_dirty_pte(pte_t pte)
				120	{
				121	/*
				122	* Set soft dirty bit so we can notice
				123	* in userspace the ptes were moved.
				124	*/
				125	#ifdef CONFIG_MEM_SOFT_DIRTY
				126	if (pte_present(pte))
				127	pte = pte_mksoft_dirty(pte);
				128	else if (is_swap_pte(pte))
				129	pte = pte_swp_mksoft_dirty(pte);
				130	#endif
				131	return pte;
				132	}
				133
				134	static void move_ptes(struct vm_area_struct vma, pmd_t old_pmd,
				135	unsigned long old_addr, unsigned long old_end,
				136	struct vm_area_struct new_vma, pmd_t new_pmd,
				137	unsigned long new_addr, bool need_rmap_locks)
				138	{
				139	struct mm_struct *mm = vma->vm_mm;
				140	pte_t old_pte, new_pte, pte;
				141	spinlock_t old_ptl, new_ptl;
				142	bool force_flush = false;
				143	unsigned long len = old_end - old_addr;
				144
				145	/*
				146	* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
				147	* locks to ensure that rmap will always observe either the old or the
				148	* new ptes. This is the easiest way to avoid races with
				149	* truncate_pagecache(), page migration, etc...
				150	*
				151	* When need_rmap_locks is false, we use other ways to avoid
				152	* such races:
				153	*
				154	* - During exec() shift_arg_pages(), we use a specially tagged vma
				155	* which rmap call sites look for using is_vma_temporary_stack().
				156	*
				157	* - During mremap(), new_vma is often known to be placed after vma
				158	* in rmap traversal order. This ensures rmap will always observe
				159	* either the old pte, or the new pte, or both (the page table locks
				160	* serialize access to individual ptes, but only rmap traversal
				161	* order guarantees that we won't miss both the old and new ptes).
				162	*/
				163	if (need_rmap_locks)
				164	take_rmap_locks(vma);
				165
				166	/*
				167	* We don't have to worry about the ordering of src and dst
				168	* pte locks because exclusive mmap_sem prevents deadlock.
				169	*/
				170	old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
				171	new_pte = pte_offset_map(new_pmd, new_addr);
				172	new_ptl = pte_lockptr(mm, new_pmd);
				173	if (new_ptl != old_ptl)
				174	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
				175	flush_tlb_batched_pending(vma->vm_mm);
				176	arch_enter_lazy_mmu_mode();
				177
				178	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
				179	new_pte++, new_addr += PAGE_SIZE) {
				180	if (pte_none(*old_pte))
				181	continue;
				182
				183	pte = ptep_get_and_clear(mm, old_addr, old_pte);
				184	/*
				185	* If we are remapping a valid PTE, make sure
				186	* to flush TLB before we drop the PTL for the
				187	* PTE.
				188	*
				189	* NOTE! Both old and new PTL matter: the old one
				190	* for racing with page_mkclean(), the new one to
				191	* make sure the physical page stays valid until
				192	* the TLB entry for the old mapping has been
				193	* flushed.
				194	*/
				195	if (pte_present(pte))
				196	force_flush = true;
				197	pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
				198	pte = move_soft_dirty_pte(pte);
				199	set_pte_at(mm, new_addr, new_pte, pte);
				200	}
				201
				202	arch_leave_lazy_mmu_mode();
				203	if (force_flush)
				204	flush_tlb_range(vma, old_end - len, old_end);
				205	if (new_ptl != old_ptl)
				206	spin_unlock(new_ptl);
				207	pte_unmap(new_pte - 1);
				208	pte_unmap_unlock(old_pte - 1, old_ptl);
				209	if (need_rmap_locks)
				210	drop_rmap_locks(vma);
				211	}
				212
				213	#ifdef CONFIG_HAVE_MOVE_PMD
				214	static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
				215	unsigned long new_addr, unsigned long old_end,
				216	pmd_t old_pmd, pmd_t new_pmd)
				217	{
				218	spinlock_t old_ptl, new_ptl;
				219	struct mm_struct *mm = vma->vm_mm;
				220	pmd_t pmd;
				221
				222	if ((old_addr & ~PMD_MASK) \|\| (new_addr & ~PMD_MASK)
				223	\|\| old_end - old_addr < PMD_SIZE)
				224	return false;
				225
				226	/*
				227	* The destination pmd shouldn't be established, free_pgtables()
				228	* should have release it.
				229	*/
				230	if (WARN_ON(!pmd_none(*new_pmd)))
				231	return false;
				232
				233	/*
				234	* We don't have to worry about the ordering of src and dst
				235	* ptlocks because exclusive mmap_sem prevents deadlock.
				236	*/
				237	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
				238	new_ptl = pmd_lockptr(mm, new_pmd);
				239	if (new_ptl != old_ptl)
				240	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
				241
				242	/* Clear the pmd */
				243	pmd = *old_pmd;
				244	pmd_clear(old_pmd);
				245
				246	VM_BUG_ON(!pmd_none(*new_pmd));
				247
				248	/* Set the new pmd */
				249	set_pmd_at(mm, new_addr, new_pmd, pmd);
				250	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
				251	if (new_ptl != old_ptl)
				252	spin_unlock(new_ptl);
				253	spin_unlock(old_ptl);
				254
				255	return true;
				256	}
				257	#else
				258	static inline bool move_normal_pmd(struct vm_area_struct *vma,
				259	unsigned long old_addr, unsigned long new_addr,
				260	unsigned long old_end, pmd_t old_pmd, pmd_t new_pmd)
				261	{
				262	return false;
				263	}
				264	#endif
				265
				266	#ifdef CONFIG_HAVE_MOVE_PUD
				267	static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
				268	unsigned long new_addr, pud_t old_pud, pud_t new_pud)
				269	{
				270	spinlock_t old_ptl, new_ptl;
				271	struct mm_struct *mm = vma->vm_mm;
				272	pud_t pud;
				273
				274	/*
				275	* The destination pud shouldn't be established, free_pgtables()
				276	* should have released it.
				277	*/
				278	if (WARN_ON_ONCE(!pud_none(*new_pud)))
				279	return false;
				280
				281	/*
				282	* We don't have to worry about the ordering of src and dst
				283	* ptlocks because exclusive mmap_lock prevents deadlock.
				284	*/
				285	old_ptl = pud_lock(vma->vm_mm, old_pud);
				286	new_ptl = pud_lockptr(mm, new_pud);
				287	if (new_ptl != old_ptl)
				288	spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
				289
				290	/* Clear the pud */
				291	pud = *old_pud;
				292	pud_clear(old_pud);
				293
				294	VM_BUG_ON(!pud_none(*new_pud));
				295
				296	/* Set the new pud */
				297	set_pud_at(mm, new_addr, new_pud, pud);
				298	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
				299	if (new_ptl != old_ptl)
				300	spin_unlock(new_ptl);
				301	spin_unlock(old_ptl);
				302
				303	return true;
				304	}
				305	#else
				306	static inline bool move_normal_pud(struct vm_area_struct *vma,
				307	unsigned long old_addr, unsigned long new_addr, pud_t *old_pud,
				308	pud_t *new_pud)
				309	{
				310	return false;
				311	}
				312	#endif
				313
				314	enum pgt_entry {
				315	NORMAL_PMD,
				316	HPAGE_PMD,
				317	NORMAL_PUD,
				318	};
				319
				320	/*
				321	* Returns an extent of the corresponding size for the pgt_entry specified if
				322	* valid. Else returns a smaller extent bounded by the end of the source and
				323	* destination pgt_entry.
				324	*/
				325	static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr,
				326	unsigned long old_end, unsigned long new_addr)
				327	{
				328	unsigned long next, extent, mask, size;
				329
				330	switch (entry) {
				331	case HPAGE_PMD:
				332	case NORMAL_PMD:
				333	mask = PMD_MASK;
				334	size = PMD_SIZE;
				335	break;
				336	case NORMAL_PUD:
				337	mask = PUD_MASK;
				338	size = PUD_SIZE;
				339	break;
				340	default:
				341	BUILD_BUG();
				342	break;
				343	}
				344
				345	next = (old_addr + size) & mask;
				346	/* even if next overflowed, extent below will be ok */
				347	extent = next - old_addr;
				348	if (extent > old_end - old_addr)
				349	extent = old_end - old_addr;
				350	next = (new_addr + size) & mask;
				351	if (extent > next - new_addr)
				352	extent = next - new_addr;
				353	return extent;
				354	}
				355
				356	/*
				357	* Attempts to speedup the move by moving entry at the level corresponding to
				358	* pgt_entry. Returns true if the move was successful, else false.
				359	*/
				360	static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
				361	unsigned long old_addr, unsigned long new_addr,
				362	unsigned long old_end, void *old_entry,
				363	void *new_entry, bool need_rmap_locks)
				364	{
				365	bool moved = false;
				366
				367	/* See comment in move_ptes() */
				368	if (need_rmap_locks)
				369	take_rmap_locks(vma);
				370
				371	switch (entry) {
				372	case NORMAL_PMD:
				373	moved = move_normal_pmd(vma, old_addr, new_addr, old_end,
				374	old_entry, new_entry);
				375	break;
				376	case NORMAL_PUD:
				377	moved = move_normal_pud(vma, old_addr, new_addr, old_entry,
				378	new_entry);
				379	break;
				380	case HPAGE_PMD:
				381	moved = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
				382	move_huge_pmd(vma, old_addr, new_addr, old_end,
				383	old_entry, new_entry);
				384	break;
				385	default:
				386	WARN_ON_ONCE(1);
				387	break;
				388	}
				389
				390	if (need_rmap_locks)
				391	drop_rmap_locks(vma);
				392
				393	return moved;
				394	}
				395
				396	unsigned long move_page_tables(struct vm_area_struct *vma,
				397	unsigned long old_addr, struct vm_area_struct *new_vma,
				398	unsigned long new_addr, unsigned long len,
				399	bool need_rmap_locks)
				400	{
				401	unsigned long extent, old_end;
				402	struct mmu_notifier_range range;
				403	pmd_t old_pmd, new_pmd;
				404
				405	if (!len)
				406	return 0;
				407
				408	old_end = old_addr + len;
				409	flush_cache_range(vma, old_addr, old_end);
				410
				411	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
				412	old_addr, old_end);
				413	mmu_notifier_invalidate_range_start(&range);
				414
				415	for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
				416	cond_resched();
				417	/*
				418	* If extent is PUD-sized try to speed up the move by moving at the
				419	* PUD level if possible.
				420	*/
				421	extent = get_extent(NORMAL_PUD, old_addr, old_end, new_addr);
				422	if (IS_ENABLED(CONFIG_HAVE_MOVE_PUD) && extent == PUD_SIZE) {
				423	pud_t old_pud, new_pud;
				424
				425	old_pud = get_old_pud(vma->vm_mm, old_addr);
				426	if (!old_pud)
				427	continue;
				428	new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
				429	if (!new_pud)
				430	break;
				431	if (move_pgt_entry(NORMAL_PUD, vma, old_addr, new_addr,
				432	old_end, old_pud, new_pud,
				433	true))
				434	continue;
				435	}
				436
				437	extent = get_extent(NORMAL_PMD, old_addr, old_end, new_addr);
				438	old_pmd = get_old_pmd(vma->vm_mm, old_addr);
				439	if (!old_pmd)
				440	continue;
				441	new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
				442	if (!new_pmd)
				443	break;
				444	if (is_swap_pmd(old_pmd) \|\| pmd_trans_huge(old_pmd) \|\|
				445	pmd_devmap(*old_pmd)) {
				446	if (extent == HPAGE_PMD_SIZE &&
				447	move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr,
				448	old_end, old_pmd, new_pmd, need_rmap_locks))
				449	continue;
				450	split_huge_pmd(vma, old_pmd, old_addr);
				451	if (pmd_trans_unstable(old_pmd))
				452	continue;
				453	} else if (IS_ENABLED(CONFIG_HAVE_MOVE_PMD) &&
				454	extent == PMD_SIZE) {
				455	/*
				456	* If the extent is PMD-sized, try to speed the move by
				457	* moving at the PMD level if possible.
				458	*/
				459	if (move_pgt_entry(NORMAL_PMD, vma, old_addr, new_addr,
				460	old_end, old_pmd, new_pmd,
				461	true))
				462	continue;
				463	}
				464
				465	if (pte_alloc(new_vma->vm_mm, new_pmd))
				466	break;
				467	move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
				468	new_pmd, new_addr, need_rmap_locks);
				469	}
				470
				471	mmu_notifier_invalidate_range_end(&range);
				472
				473	return len + old_addr - old_end; /* how much done */
				474	}
				475
				476	static unsigned long move_vma(struct vm_area_struct *vma,
				477	unsigned long old_addr, unsigned long old_len,
				478	unsigned long new_len, unsigned long new_addr,
				479	bool *locked, unsigned long flags,
				480	struct vm_userfaultfd_ctx uf, struct list_head uf_unmap)
				481	{
				482	struct mm_struct *mm = vma->vm_mm;
				483	struct vm_area_struct *new_vma;
				484	unsigned long vm_flags = vma->vm_flags;
				485	unsigned long new_pgoff;
				486	unsigned long moved_len;
				487	unsigned long excess = 0;
				488	unsigned long hiwater_vm;
				489	int split = 0;
				490	int err;
				491	bool need_rmap_locks;
				492
				493	/*
				494	* We'd prefer to avoid failure later on in do_munmap:
				495	* which may split one vma into three before unmapping.
				496	*/
				497	if (mm->map_count >= sysctl_max_map_count - 3)
				498	return -ENOMEM;
				499
				500	/*
				501	* Advise KSM to break any KSM pages in the area to be moved:
				502	* it would be confusing if they were to turn up at the new
				503	* location, where they happen to coincide with different KSM
				504	* pages recently unmapped. But leave vma->vm_flags as it was,
				505	* so KSM can come around to merge on vma and new_vma afterwards.
				506	*/
				507	err = ksm_madvise(vma, old_addr, old_addr + old_len,
				508	MADV_UNMERGEABLE, &vm_flags);
				509	if (err)
				510	return err;
				511
				512	if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) {
				513	if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT))
				514	return -ENOMEM;
				515	}
				516
				517	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
				518	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
				519	&need_rmap_locks);
				520	if (!new_vma) {
				521	if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT))
				522	vm_unacct_memory(new_len >> PAGE_SHIFT);
				523	return -ENOMEM;
				524	}
				525
				526	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
				527	need_rmap_locks);
				528	if (moved_len < old_len) {
				529	err = -ENOMEM;
				530	} else if (vma->vm_ops && vma->vm_ops->mremap) {
				531	err = vma->vm_ops->mremap(new_vma);
				532	}
				533
				534	if (unlikely(err)) {
				535	/*
				536	* On error, move entries back from new area to old,
				537	* which will succeed since page tables still there,
				538	* and then proceed to unmap new area instead of old.
				539	*/
				540	move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
				541	true);
				542	vma = new_vma;
				543	old_len = new_len;
				544	old_addr = new_addr;
				545	new_addr = err;
				546	} else {
				547	mremap_userfaultfd_prep(new_vma, uf);
				548	arch_remap(mm, old_addr, old_addr + old_len,
				549	new_addr, new_addr + new_len);
				550	}
				551
				552	/* Conceal VM_ACCOUNT so old reservation is not undone */
				553	if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) {
				554	vma->vm_flags &= ~VM_ACCOUNT;
				555	excess = vma->vm_end - vma->vm_start - old_len;
				556	if (old_addr > vma->vm_start &&
				557	old_addr + old_len < vma->vm_end)
				558	split = 1;
				559	}
				560
				561	/*
				562	* If we failed to move page tables we still do total_vm increment
				563	* since do_munmap() will decrement it by old_len == new_len.
				564	*
				565	* Since total_vm is about to be raised artificially high for a
				566	* moment, we need to restore high watermark afterwards: if stats
				567	* are taken meanwhile, total_vm and hiwater_vm appear too high.
				568	* If this were a serious issue, we'd add a flag to do_munmap().
				569	*/
				570	hiwater_vm = mm->hiwater_vm;
				571	vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
				572
				573	/* Tell pfnmap has moved from this vma */
				574	if (unlikely(vma->vm_flags & VM_PFNMAP))
				575	untrack_pfn_moved(vma);
				576
				577	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
				578	/* We always clear VM_LOCKED[ONFAULT] on the old vma */
				579	vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
				580
				581	/* Because we won't unmap we don't need to touch locked_vm */
				582	return new_addr;
				583	}
				584
				585	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
				586	/* OOM: unable to split vma, just get accounts right */
				587	if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
				588	vm_acct_memory(new_len >> PAGE_SHIFT);
				589	excess = 0;
				590	}
				591
				592	if (vm_flags & VM_LOCKED) {
				593	mm->locked_vm += new_len >> PAGE_SHIFT;
				594	*locked = true;
				595	}
				596
				597	mm->hiwater_vm = hiwater_vm;
				598
				599	/* Restore VM_ACCOUNT if one or two pieces of vma left */
				600	if (excess) {
				601	vma->vm_flags \|= VM_ACCOUNT;
				602	if (split)
				603	vma->vm_next->vm_flags \|= VM_ACCOUNT;
				604	}
				605
				606	return new_addr;
				607	}
				608
				609	static struct vm_area_struct *vma_to_resize(unsigned long addr,
				610	unsigned long old_len, unsigned long new_len, unsigned long flags,
				611	unsigned long *p)
				612	{
				613	struct mm_struct *mm = current->mm;
				614	struct vm_area_struct *vma = find_vma(mm, addr);
				615	unsigned long pgoff;
				616
				617	if (!vma \|\| vma->vm_start > addr)
				618	return ERR_PTR(-EFAULT);
				619
				620	/*
				621	* !old_len is a special case where an attempt is made to 'duplicate'
				622	* a mapping. This makes no sense for private mappings as it will
				623	* instead create a fresh/new mapping unrelated to the original. This
				624	* is contrary to the basic idea of mremap which creates new mappings
				625	* based on the original. There are no known use cases for this
				626	* behavior. As a result, fail such attempts.
				627	*/
				628	if (!old_len && !(vma->vm_flags & (VM_SHARED \| VM_MAYSHARE))) {
				629	pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid);
				630	return ERR_PTR(-EINVAL);
				631	}
				632
				633	if ((flags & MREMAP_DONTUNMAP) &&
				634	(vma->vm_flags & (VM_DONTEXPAND \| VM_PFNMAP)))
				635	return ERR_PTR(-EINVAL);
				636
				637	if (is_vm_hugetlb_page(vma))
				638	return ERR_PTR(-EINVAL);
				639
				640	/* We can't remap across vm area boundaries */
				641	if (old_len > vma->vm_end - addr)
				642	return ERR_PTR(-EFAULT);
				643
				644	if (new_len == old_len)
				645	return vma;
				646
				647	/* Need to be careful about a growing mapping */
				648	pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
				649	pgoff += vma->vm_pgoff;
				650	if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
				651	return ERR_PTR(-EINVAL);
				652
				653	if (vma->vm_flags & (VM_DONTEXPAND \| VM_PFNMAP))
				654	return ERR_PTR(-EFAULT);
				655
				656	if (vma->vm_flags & VM_LOCKED) {
				657	unsigned long locked, lock_limit;
				658	locked = mm->locked_vm << PAGE_SHIFT;
				659	lock_limit = rlimit(RLIMIT_MEMLOCK);
				660	locked += new_len - old_len;
				661	if (locked > lock_limit && !capable(CAP_IPC_LOCK))
				662	return ERR_PTR(-EAGAIN);
				663	}
				664
				665	if (!may_expand_vm(mm, vma->vm_flags,
				666	(new_len - old_len) >> PAGE_SHIFT))
				667	return ERR_PTR(-ENOMEM);
				668
				669	if (vma->vm_flags & VM_ACCOUNT) {
				670	unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
				671	if (security_vm_enough_memory_mm(mm, charged))
				672	return ERR_PTR(-ENOMEM);
				673	*p = charged;
				674	}
				675
				676	return vma;
				677	}
				678
				679	static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
				680	unsigned long new_addr, unsigned long new_len, bool *locked,
				681	unsigned long flags, struct vm_userfaultfd_ctx *uf,
				682	struct list_head *uf_unmap_early,
				683	struct list_head *uf_unmap)
				684	{
				685	struct mm_struct *mm = current->mm;
				686	struct vm_area_struct *vma;
				687	unsigned long ret = -EINVAL;
				688	unsigned long charged = 0;
				689	unsigned long map_flags = 0;
				690
				691	if (offset_in_page(new_addr))
				692	goto out;
				693
				694	if (new_len > TASK_SIZE \|\| new_addr > TASK_SIZE - new_len)
				695	goto out;
				696
				697	/* Ensure the old/new locations do not overlap */
				698	if (addr + old_len > new_addr && new_addr + new_len > addr)
				699	goto out;
				700
				701	/*
				702	* move_vma() need us to stay 4 maps below the threshold, otherwise
				703	* it will bail out at the very beginning.
				704	* That is a problem if we have already unmaped the regions here
				705	* (new_addr, and old_addr), because userspace will not know the
				706	* state of the vma's after it gets -ENOMEM.
				707	* So, to avoid such scenario we can pre-compute if the whole
				708	* operation has high chances to success map-wise.
				709	* Worst-scenario case is when both vma's (new_addr and old_addr) get
				710	* split in 3 before unmaping it.
				711	* That means 2 more maps (1 for each) to the ones we already hold.
				712	* Check whether current map count plus 2 still leads us to 4 maps below
				713	* the threshold, otherwise return -ENOMEM here to be more safe.
				714	*/
				715	if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
				716	return -ENOMEM;
				717
				718	if (flags & MREMAP_FIXED) {
				719	ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
				720	if (ret)
				721	goto out;
				722	}
				723
				724	if (old_len >= new_len) {
				725	ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
				726	if (ret && old_len != new_len)
				727	goto out;
				728	old_len = new_len;
				729	}
				730
				731	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
				732	if (IS_ERR(vma)) {
				733	ret = PTR_ERR(vma);
				734	goto out;
				735	}
				736
				737	/* MREMAP_DONTUNMAP expands by old_len since old_len == new_len */
				738	if (flags & MREMAP_DONTUNMAP &&
				739	!may_expand_vm(mm, vma->vm_flags, old_len >> PAGE_SHIFT)) {
				740	ret = -ENOMEM;
				741	goto out;
				742	}
				743
				744	if (flags & MREMAP_FIXED)
				745	map_flags \|= MAP_FIXED;
				746
				747	if (vma->vm_flags & VM_MAYSHARE)
				748	map_flags \|= MAP_SHARED;
				749
				750	ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
				751	((addr - vma->vm_start) >> PAGE_SHIFT),
				752	map_flags);
				753	if (offset_in_page(ret))
				754	goto out1;
				755
				756	/* We got a new mapping */
				757	if (!(flags & MREMAP_FIXED))
				758	new_addr = ret;
				759
				760	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
				761	uf_unmap);
				762
				763	if (!(offset_in_page(ret)))
				764	goto out;
				765
				766	out1:
				767	vm_unacct_memory(charged);
				768
				769	out:
				770	return ret;
				771	}
				772
				773	static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
				774	{
				775	unsigned long end = vma->vm_end + delta;
				776	if (end < vma->vm_end) /* overflow */
				777	return 0;
				778	if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
				779	return 0;
				780	if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
				781	0, MAP_FIXED) & ~PAGE_MASK)
				782	return 0;
				783	return 1;
				784	}
				785
				786	/*
				787	* Expand (or shrink) an existing mapping, potentially moving it at the
				788	* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
				789	*
				790	* MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
				791	* This option implies MREMAP_MAYMOVE.
				792	*/
				793	SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
				794	unsigned long, new_len, unsigned long, flags,
				795	unsigned long, new_addr)
				796	{
				797	struct mm_struct *mm = current->mm;
				798	struct vm_area_struct *vma;
				799	unsigned long ret = -EINVAL;
				800	unsigned long charged = 0;
				801	bool locked = false;
				802	bool downgraded = false;
				803	struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
				804	LIST_HEAD(uf_unmap_early);
				805	LIST_HEAD(uf_unmap);
				806
				807	/*
				808	* There is a deliberate asymmetry here: we strip the pointer tag
				809	* from the old address but leave the new address alone. This is
				810	* for consistency with mmap(), where we prevent the creation of
				811	* aliasing mappings in userspace by leaving the tag bits of the
				812	* mapping address intact. A non-zero tag will cause the subsequent
				813	* range checks to reject the address as invalid.
				814	*
				815	* See Documentation/arm64/tagged-address-abi.rst for more information.
				816	*/
				817	addr = untagged_addr(addr);
				818
				819	if (flags & ~(MREMAP_FIXED \| MREMAP_MAYMOVE \| MREMAP_DONTUNMAP))
				820	return ret;
				821
				822	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
				823	return ret;
				824
				825	/*
				826	* MREMAP_DONTUNMAP is always a move and it does not allow resizing
				827	* in the process.
				828	*/
				829	if (flags & MREMAP_DONTUNMAP &&
				830	(!(flags & MREMAP_MAYMOVE) \|\| old_len != new_len))
				831	return ret;
				832
				833
				834	if (offset_in_page(addr))
				835	return ret;
				836
				837	old_len = PAGE_ALIGN(old_len);
				838	new_len = PAGE_ALIGN(new_len);
				839
				840	/*
				841	* We allow a zero old-len as a special case
				842	* for DOS-emu "duplicate shm area" thing. But
				843	* a zero new-len is nonsensical.
				844	*/
				845	if (!new_len)
				846	return ret;
				847
				848	if (down_write_killable(&current->mm->mmap_sem))
				849	return -EINTR;
				850
				851	if (flags & (MREMAP_FIXED \| MREMAP_DONTUNMAP)) {
				852	ret = mremap_to(addr, old_len, new_addr, new_len,
				853	&locked, flags, &uf, &uf_unmap_early,
				854	&uf_unmap);
				855	goto out;
				856	}
				857
				858	/*
				859	* Always allow a shrinking remap: that just unmaps
				860	* the unnecessary pages..
				861	* __do_munmap does all the needed commit accounting, and
				862	* downgrades mmap_sem to read if so directed.
				863	*/
				864	if (old_len >= new_len) {
				865	int retval;
				866
				867	retval = __do_munmap(mm, addr+new_len, old_len - new_len,
				868	&uf_unmap, true);
				869	if (retval < 0 && old_len != new_len) {
				870	ret = retval;
				871	goto out;
				872	/* Returning 1 indicates mmap_sem is downgraded to read. */
				873	} else if (retval == 1)
				874	downgraded = true;
				875	ret = addr;
				876	goto out;
				877	}
				878
				879	/*
				880	* Ok, we need to grow..
				881	*/
				882	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
				883	if (IS_ERR(vma)) {
				884	ret = PTR_ERR(vma);
				885	goto out;
				886	}
				887
				888	/* old_len exactly to the end of the area..
				889	*/
				890	if (old_len == vma->vm_end - addr) {
				891	/* can we just expand the current mapping? */
				892	if (vma_expandable(vma, new_len - old_len)) {
				893	int pages = (new_len - old_len) >> PAGE_SHIFT;
				894
				895	if (vma_adjust(vma, vma->vm_start, addr + new_len,
				896	vma->vm_pgoff, NULL)) {
				897	ret = -ENOMEM;
				898	goto out;
				899	}
				900
				901	vm_stat_account(mm, vma->vm_flags, pages);
				902	if (vma->vm_flags & VM_LOCKED) {
				903	mm->locked_vm += pages;
				904	locked = true;
				905	new_addr = addr;
				906	}
				907	ret = addr;
				908	goto out;
				909	}
				910	}
				911
				912	/*
				913	* We weren't able to just expand or shrink the area,
				914	* we need to create a new one and move it..
				915	*/
				916	ret = -ENOMEM;
				917	if (flags & MREMAP_MAYMOVE) {
				918	unsigned long map_flags = 0;
				919	if (vma->vm_flags & VM_MAYSHARE)
				920	map_flags \|= MAP_SHARED;
				921
				922	new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
				923	vma->vm_pgoff +
				924	((addr - vma->vm_start) >> PAGE_SHIFT),
				925	map_flags);
				926	if (offset_in_page(new_addr)) {
				927	ret = new_addr;
				928	goto out;
				929	}
				930
				931	ret = move_vma(vma, addr, old_len, new_len, new_addr,
				932	&locked, flags, &uf, &uf_unmap);
				933	}
				934	out:
				935	if (offset_in_page(ret)) {
				936	vm_unacct_memory(charged);
				937	locked = 0;
				938	}
				939	if (downgraded)
				940	up_read(&current->mm->mmap_sem);
				941	else
				942	up_write(&current->mm->mmap_sem);
				943	if (locked && new_len > old_len)
				944	mm_populate(new_addr + old_len, new_len - old_len);
				945	userfaultfd_unmap_complete(mm, &uf_unmap_early);
				946	mremap_userfaultfd_complete(&uf, addr, ret, old_len);
				947	userfaultfd_unmap_complete(mm, &uf_unmap);
				948	return ret;
				949	}