Blame - ap/os/linux/linux-3.4.x/mm/rmap.c - T106_DC

blob: 57f503b185a906c74f4eed0a836e369d207a98c6 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* mm/rmap.c - physical to virtual reverse mappings
				3	*
				4	* Copyright 2001, Rik van Riel <riel@conectiva.com.br>
				5	* Released under the General Public License (GPL).
				6	*
				7	* Simple, low overhead reverse mapping scheme.
				8	* Please try to keep this thing as modular as possible.
				9	*
				10	* Provides methods for unmapping each kind of mapped page:
				11	* the anon methods track anonymous pages, and
				12	* the file methods track pages belonging to an inode.
				13	*
				14	* Original design by Rik van Riel <riel@conectiva.com.br> 2001
				15	* File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004
				16	* Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004
				17	* Contributions by Hugh Dickins 2003, 2004
				18	*/
				19
				20	/*
				21	* Lock ordering in mm:
				22	*
				23	* inode->i_mutex (while writing or truncating, not reading or faulting)
				24	* mm->mmap_sem
				25	* page->flags PG_locked (lock_page)
				26	* mapping->i_mmap_mutex
				27	* anon_vma->mutex
				28	* mm->page_table_lock or pte_lock
				29	* zone->lru_lock (in mark_page_accessed, isolate_lru_page)
				30	* swap_lock (in swap_duplicate, swap_info_get)
				31	* mmlist_lock (in mmput, drain_mmlist and others)
				32	* mapping->private_lock (in __set_page_dirty_buffers)
				33	* inode->i_lock (in set_page_dirty's __mark_inode_dirty)
				34	* bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
				35	* sb_lock (within inode_lock in fs/fs-writeback.c)
				36	* mapping->tree_lock (widely used, in set_page_dirty,
				37	* in arch-dependent flush_dcache_mmap_lock,
				38	* within bdi.wb->list_lock in __sync_single_inode)
				39	*
				40	* anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon)
				41	* ->tasklist_lock
				42	* pte map lock
				43	*/
				44
				45	#include <linux/mm.h>
				46	#include <linux/pagemap.h>
				47	#include <linux/swap.h>
				48	#include <linux/swapops.h>
				49	#include <linux/slab.h>
				50	#include <linux/init.h>
				51	#include <linux/ksm.h>
				52	#include <linux/rmap.h>
				53	#include <linux/rcupdate.h>
				54	#include <linux/export.h>
				55	#include <linux/memcontrol.h>
				56	#include <linux/mmu_notifier.h>
				57	#include <linux/migrate.h>
				58	#include <linux/hugetlb.h>
				59	#include <linux/backing-dev.h>
				60
				61	#include <asm/tlbflush.h>
				62
				63	#include "internal.h"
				64
				65	static struct kmem_cache *anon_vma_cachep;
				66	static struct kmem_cache *anon_vma_chain_cachep;
				67
				68	static inline struct anon_vma *anon_vma_alloc(void)
				69	{
				70	struct anon_vma *anon_vma;
				71
				72	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
				73	if (anon_vma) {
				74	atomic_set(&anon_vma->refcount, 1);
				75	anon_vma->degree = 1; /* Reference for first vma */
				76	anon_vma->parent = anon_vma;
				77	/*
				78	* Initialise the anon_vma root to point to itself. If called
				79	* from fork, the root will be reset to the parents anon_vma.
				80	*/
				81	anon_vma->root = anon_vma;
				82	}
				83
				84	return anon_vma;
				85	}
				86
				87	static inline void anon_vma_free(struct anon_vma *anon_vma)
				88	{
				89	VM_BUG_ON(atomic_read(&anon_vma->refcount));
				90
				91	/*
				92	* Synchronize against page_lock_anon_vma() such that
				93	* we can safely hold the lock without the anon_vma getting
				94	* freed.
				95	*
				96	* Relies on the full mb implied by the atomic_dec_and_test() from
				97	* put_anon_vma() against the acquire barrier implied by
				98	* mutex_trylock() from page_lock_anon_vma(). This orders:
				99	*
				100	* page_lock_anon_vma() VS put_anon_vma()
				101	* mutex_trylock() atomic_dec_and_test()
				102	* LOCK MB
				103	* atomic_read() mutex_is_locked()
				104	*
				105	* LOCK should suffice since the actual taking of the lock must
				106	* happen _before_ what follows.
				107	*/
				108	might_sleep();
				109	if (mutex_is_locked(&anon_vma->root->mutex)) {
				110	anon_vma_lock(anon_vma);
				111	anon_vma_unlock(anon_vma);
				112	}
				113
				114	kmem_cache_free(anon_vma_cachep, anon_vma);
				115	}
				116
				117	static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
				118	{
				119	return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
				120	}
				121
				122	static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
				123	{
				124	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
				125	}
				126
				127	static void anon_vma_chain_link(struct vm_area_struct *vma,
				128	struct anon_vma_chain *avc,
				129	struct anon_vma *anon_vma)
				130	{
				131	avc->vma = vma;
				132	avc->anon_vma = anon_vma;
				133	list_add(&avc->same_vma, &vma->anon_vma_chain);
				134
				135	/*
				136	* It's critical to add new vmas to the tail of the anon_vma,
				137	* see comment in huge_memory.c:__split_huge_page().
				138	*/
				139	list_add_tail(&avc->same_anon_vma, &anon_vma->head);
				140	}
				141
				142	/**
				143	* anon_vma_prepare - attach an anon_vma to a memory region
				144	* @vma: the memory region in question
				145	*
				146	* This makes sure the memory mapping described by 'vma' has
				147	* an 'anon_vma' attached to it, so that we can associate the
				148	* anonymous pages mapped into it with that anon_vma.
				149	*
				150	* The common case will be that we already have one, but if
				151	* not we either need to find an adjacent mapping that we
				152	* can re-use the anon_vma from (very common when the only
				153	* reason for splitting a vma has been mprotect()), or we
				154	* allocate a new one.
				155	*
				156	* Anon-vma allocations are very subtle, because we may have
				157	* optimistically looked up an anon_vma in page_lock_anon_vma()
				158	* and that may actually touch the spinlock even in the newly
				159	* allocated vma (it depends on RCU to make sure that the
				160	* anon_vma isn't actually destroyed).
				161	*
				162	* As a result, we need to do proper anon_vma locking even
				163	* for the new allocation. At the same time, we do not want
				164	* to do any locking for the common case of already having
				165	* an anon_vma.
				166	*
				167	* This must be called with the mmap_sem held for reading.
				168	*/
				169	int anon_vma_prepare(struct vm_area_struct *vma)
				170	{
				171	struct anon_vma *anon_vma = vma->anon_vma;
				172	struct anon_vma_chain *avc;
				173
				174	might_sleep();
				175	if (unlikely(!anon_vma)) {
				176	struct mm_struct *mm = vma->vm_mm;
				177	struct anon_vma *allocated;
				178
				179	avc = anon_vma_chain_alloc(GFP_KERNEL);
				180	if (!avc)
				181	goto out_enomem;
				182
				183	anon_vma = find_mergeable_anon_vma(vma);
				184	allocated = NULL;
				185	if (!anon_vma) {
				186	anon_vma = anon_vma_alloc();
				187	if (unlikely(!anon_vma))
				188	goto out_enomem_free_avc;
				189	allocated = anon_vma;
				190	}
				191
				192	anon_vma_lock(anon_vma);
				193	/* page_table_lock to protect against threads */
				194	spin_lock(&mm->page_table_lock);
				195	if (likely(!vma->anon_vma)) {
				196	vma->anon_vma = anon_vma;
				197	anon_vma_chain_link(vma, avc, anon_vma);
				198	/* vma reference or self-parent link for new root */
				199	anon_vma->degree++;
				200	allocated = NULL;
				201	avc = NULL;
				202	}
				203	spin_unlock(&mm->page_table_lock);
				204	anon_vma_unlock(anon_vma);
				205
				206	if (unlikely(allocated))
				207	put_anon_vma(allocated);
				208	if (unlikely(avc))
				209	anon_vma_chain_free(avc);
				210	}
				211	return 0;
				212
				213	out_enomem_free_avc:
				214	anon_vma_chain_free(avc);
				215	out_enomem:
				216	return -ENOMEM;
				217	}
				218
				219	/*
				220	* This is a useful helper function for locking the anon_vma root as
				221	* we traverse the vma->anon_vma_chain, looping over anon_vma's that
				222	* have the same vma.
				223	*
				224	* Such anon_vma's should have the same root, so you'd expect to see
				225	* just a single mutex_lock for the whole traversal.
				226	*/
				227	static inline struct anon_vma lock_anon_vma_root(struct anon_vma root, struct anon_vma *anon_vma)
				228	{
				229	struct anon_vma *new_root = anon_vma->root;
				230	if (new_root != root) {
				231	if (WARN_ON_ONCE(root))
				232	mutex_unlock(&root->mutex);
				233	root = new_root;
				234	mutex_lock(&root->mutex);
				235	}
				236	return root;
				237	}
				238
				239	static inline void unlock_anon_vma_root(struct anon_vma *root)
				240	{
				241	if (root)
				242	mutex_unlock(&root->mutex);
				243	}
				244
				245	/*
				246	* Attach the anon_vmas from src to dst.
				247	* Returns 0 on success, -ENOMEM on failure.
				248	*
				249	* If dst->anon_vma is NULL this function tries to find and reuse existing
				250	* anon_vma which has no vmas and only one child anon_vma. This prevents
				251	* degradation of anon_vma hierarchy to endless linear chain in case of
				252	* constantly forking task. On the other hand, an anon_vma with more than one
				253	* child isn't reused even if there was no alive vma, thus rmap walker has a
				254	* good chance of avoiding scanning the whole hierarchy when it searches where
				255	* page is mapped.
				256	*/
				257	int anon_vma_clone(struct vm_area_struct dst, struct vm_area_struct src)
				258	{
				259	struct anon_vma_chain avc, pavc;
				260	struct anon_vma *root = NULL;
				261
				262	list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
				263	struct anon_vma *anon_vma;
				264
				265	avc = anon_vma_chain_alloc(GFP_NOWAIT \| __GFP_NOWARN);
				266	if (unlikely(!avc)) {
				267	unlock_anon_vma_root(root);
				268	root = NULL;
				269	avc = anon_vma_chain_alloc(GFP_KERNEL);
				270	if (!avc)
				271	goto enomem_failure;
				272	}
				273	anon_vma = pavc->anon_vma;
				274	root = lock_anon_vma_root(root, anon_vma);
				275	anon_vma_chain_link(dst, avc, anon_vma);
				276
				277	/*
				278	* Reuse existing anon_vma if its degree lower than two,
				279	* that means it has no vma and only one anon_vma child.
				280	*
				281	* Do not chose parent anon_vma, otherwise first child
				282	* will always reuse it. Root anon_vma is never reused:
				283	* it has self-parent reference and at least one child.
				284	*/
				285	if (!dst->anon_vma && anon_vma != src->anon_vma &&
				286	anon_vma->degree < 2)
				287	dst->anon_vma = anon_vma;
				288	}
				289	if (dst->anon_vma)
				290	dst->anon_vma->degree++;
				291	unlock_anon_vma_root(root);
				292	return 0;
				293
				294	enomem_failure:
				295	/*
				296	* dst->anon_vma is dropped here otherwise its degree can be incorrectly
				297	* decremented in unlink_anon_vmas().
				298	* We can safely do this because callers of anon_vma_clone() don't care
				299	* about dst->anon_vma if anon_vma_clone() failed.
				300	*/
				301	dst->anon_vma = NULL;
				302	unlink_anon_vmas(dst);
				303	return -ENOMEM;
				304	}
				305
				306	/*
				307	* Some rmap walk that needs to find all ptes/hugepmds without false
				308	* negatives (like migrate and split_huge_page) running concurrent
				309	* with operations that copy or move pagetables (like mremap() and
				310	* fork()) to be safe. They depend on the anon_vma "same_anon_vma"
				311	* list to be in a certain order: the dst_vma must be placed after the
				312	* src_vma in the list. This is always guaranteed by fork() but
				313	* mremap() needs to call this function to enforce it in case the
				314	* dst_vma isn't newly allocated and chained with the anon_vma_clone()
				315	* function but just an extension of a pre-existing vma through
				316	* vma_merge.
				317	*
				318	* NOTE: the same_anon_vma list can still be changed by other
				319	* processes while mremap runs because mremap doesn't hold the
				320	* anon_vma mutex to prevent modifications to the list while it
				321	* runs. All we need to enforce is that the relative order of this
				322	* process vmas isn't changing (we don't care about other vmas
				323	* order). Each vma corresponds to an anon_vma_chain structure so
				324	* there's no risk that other processes calling anon_vma_moveto_tail()
				325	* and changing the same_anon_vma list under mremap() will screw with
				326	* the relative order of this process vmas in the list, because we
				327	* they can't alter the order of any vma that belongs to this
				328	* process. And there can't be another anon_vma_moveto_tail() running
				329	* concurrently with mremap() coming from this process because we hold
				330	* the mmap_sem for the whole mremap(). fork() ordering dependency
				331	* also shouldn't be affected because fork() only cares that the
				332	* parent vmas are placed in the list before the child vmas and
				333	* anon_vma_moveto_tail() won't reorder vmas from either the fork()
				334	* parent or child.
				335	*/
				336	void anon_vma_moveto_tail(struct vm_area_struct *dst)
				337	{
				338	struct anon_vma_chain *pavc;
				339	struct anon_vma *root = NULL;
				340
				341	list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
				342	struct anon_vma *anon_vma = pavc->anon_vma;
				343	VM_BUG_ON(pavc->vma != dst);
				344	root = lock_anon_vma_root(root, anon_vma);
				345	list_del(&pavc->same_anon_vma);
				346	list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
				347	}
				348	unlock_anon_vma_root(root);
				349	}
				350
				351	/*
				352	* Attach vma to its own anon_vma, as well as to the anon_vmas that
				353	* the corresponding VMA in the parent process is attached to.
				354	* Returns 0 on success, non-zero on failure.
				355	*/
				356	int anon_vma_fork(struct vm_area_struct vma, struct vm_area_struct pvma)
				357	{
				358	struct anon_vma_chain *avc;
				359	struct anon_vma *anon_vma;
				360
				361	/* Don't bother if the parent process has no anon_vma here. */
				362	if (!pvma->anon_vma)
				363	return 0;
				364
				365	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
				366	vma->anon_vma = NULL;
				367
				368	/*
				369	* First, attach the new VMA to the parent VMA's anon_vmas,
				370	* so rmap can find non-COWed pages in child processes.
				371	*/
				372	if (anon_vma_clone(vma, pvma))
				373	return -ENOMEM;
				374
				375	/* An existing anon_vma has been reused, all done then. */
				376	if (vma->anon_vma)
				377	return 0;
				378
				379	/* Then add our own anon_vma. */
				380	anon_vma = anon_vma_alloc();
				381	if (!anon_vma)
				382	goto out_error;
				383	avc = anon_vma_chain_alloc(GFP_KERNEL);
				384	if (!avc)
				385	goto out_error_free_anon_vma;
				386
				387	/*
				388	* The root anon_vma's spinlock is the lock actually used when we
				389	* lock any of the anon_vmas in this anon_vma tree.
				390	*/
				391	anon_vma->root = pvma->anon_vma->root;
				392	anon_vma->parent = pvma->anon_vma;
				393	/*
				394	* With refcounts, an anon_vma can stay around longer than the
				395	* process it belongs to. The root anon_vma needs to be pinned until
				396	* this anon_vma is freed, because the lock lives in the root.
				397	*/
				398	get_anon_vma(anon_vma->root);
				399	/* Mark this anon_vma as the one where our new (COWed) pages go. */
				400	vma->anon_vma = anon_vma;
				401	anon_vma_lock(anon_vma);
				402	anon_vma_chain_link(vma, avc, anon_vma);
				403	anon_vma->parent->degree++;
				404	anon_vma_unlock(anon_vma);
				405
				406	return 0;
				407
				408	out_error_free_anon_vma:
				409	put_anon_vma(anon_vma);
				410	out_error:
				411	unlink_anon_vmas(vma);
				412	return -ENOMEM;
				413	}
				414
				415	void unlink_anon_vmas(struct vm_area_struct *vma)
				416	{
				417	struct anon_vma_chain avc, next;
				418	struct anon_vma *root = NULL;
				419
				420	/*
				421	* Unlink each anon_vma chained to the VMA. This list is ordered
				422	* from newest to oldest, ensuring the root anon_vma gets freed last.
				423	*/
				424	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
				425	struct anon_vma *anon_vma = avc->anon_vma;
				426
				427	root = lock_anon_vma_root(root, anon_vma);
				428	list_del(&avc->same_anon_vma);
				429
				430	/*
				431	* Leave empty anon_vmas on the list - we'll need
				432	* to free them outside the lock.
				433	*/
				434	if (list_empty(&anon_vma->head)) {
				435	anon_vma->parent->degree--;
				436	continue;
				437	}
				438
				439	list_del(&avc->same_vma);
				440	anon_vma_chain_free(avc);
				441	}
				442	if (vma->anon_vma)
				443	vma->anon_vma->degree--;
				444	unlock_anon_vma_root(root);
				445
				446	/*
				447	* Iterate the list once more, it now only contains empty and unlinked
				448	* anon_vmas, destroy them. Could not do before due to __put_anon_vma()
				449	* needing to acquire the anon_vma->root->mutex.
				450	*/
				451	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
				452	struct anon_vma *anon_vma = avc->anon_vma;
				453
				454	BUG_ON(anon_vma->degree);
				455	put_anon_vma(anon_vma);
				456
				457	list_del(&avc->same_vma);
				458	anon_vma_chain_free(avc);
				459	}
				460	}
				461
				462	static void anon_vma_ctor(void *data)
				463	{
				464	struct anon_vma *anon_vma = data;
				465
				466	mutex_init(&anon_vma->mutex);
				467	atomic_set(&anon_vma->refcount, 0);
				468	INIT_LIST_HEAD(&anon_vma->head);
				469	}
				470
				471	void __init anon_vma_init(void)
				472	{
				473	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
				474	0, SLAB_DESTROY_BY_RCU\|SLAB_PANIC, anon_vma_ctor);
				475	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
				476	}
				477
				478	/*
				479	* Getting a lock on a stable anon_vma from a page off the LRU is tricky!
				480	*
				481	* Since there is no serialization what so ever against page_remove_rmap()
				482	* the best this function can do is return a locked anon_vma that might
				483	* have been relevant to this page.
				484	*
				485	* The page might have been remapped to a different anon_vma or the anon_vma
				486	* returned may already be freed (and even reused).
				487	*
				488	* In case it was remapped to a different anon_vma, the new anon_vma will be a
				489	* child of the old anon_vma, and the anon_vma lifetime rules will therefore
				490	* ensure that any anon_vma obtained from the page will still be valid for as
				491	* long as we observe page_mapped() [ hence all those page_mapped() tests ].
				492	*
				493	* All users of this function must be very careful when walking the anon_vma
				494	* chain and verify that the page in question is indeed mapped in it
				495	* [ something equivalent to page_mapped_in_vma() ].
				496	*
				497	* Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
				498	* that the anon_vma pointer from page->mapping is valid if there is a
				499	* mapcount, we can dereference the anon_vma after observing those.
				500	*/
				501	struct anon_vma page_get_anon_vma(struct page page)
				502	{
				503	struct anon_vma *anon_vma = NULL;
				504	unsigned long anon_mapping;
				505
				506	rcu_read_lock();
				507	anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
				508	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
				509	goto out;
				510	if (!page_mapped(page))
				511	goto out;
				512
				513	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
				514	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
				515	anon_vma = NULL;
				516	goto out;
				517	}
				518
				519	/*
				520	* If this page is still mapped, then its anon_vma cannot have been
				521	* freed. But if it has been unmapped, we have no security against the
				522	* anon_vma structure being freed and reused (for another anon_vma:
				523	* SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
				524	* above cannot corrupt).
				525	*/
				526	if (!page_mapped(page)) {
				527	rcu_read_unlock();
				528	put_anon_vma(anon_vma);
				529	return NULL;
				530	}
				531	out:
				532	rcu_read_unlock();
				533
				534	return anon_vma;
				535	}
				536
				537	/*
				538	* Similar to page_get_anon_vma() except it locks the anon_vma.
				539	*
				540	* Its a little more complex as it tries to keep the fast path to a single
				541	* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
				542	* reference like with page_get_anon_vma() and then block on the mutex.
				543	*/
				544	struct anon_vma page_lock_anon_vma(struct page page)
				545	{
				546	struct anon_vma *anon_vma = NULL;
				547	struct anon_vma *root_anon_vma;
				548	unsigned long anon_mapping;
				549
				550	rcu_read_lock();
				551	anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
				552	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
				553	goto out;
				554	if (!page_mapped(page))
				555	goto out;
				556
				557	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
				558	root_anon_vma = ACCESS_ONCE(anon_vma->root);
				559	if (mutex_trylock(&root_anon_vma->mutex)) {
				560	/*
				561	* If the page is still mapped, then this anon_vma is still
				562	* its anon_vma, and holding the mutex ensures that it will
				563	* not go away, see anon_vma_free().
				564	*/
				565	if (!page_mapped(page)) {
				566	mutex_unlock(&root_anon_vma->mutex);
				567	anon_vma = NULL;
				568	}
				569	goto out;
				570	}
				571
				572	/* trylock failed, we got to sleep */
				573	if (!atomic_inc_not_zero(&anon_vma->refcount)) {
				574	anon_vma = NULL;
				575	goto out;
				576	}
				577
				578	if (!page_mapped(page)) {
				579	rcu_read_unlock();
				580	put_anon_vma(anon_vma);
				581	return NULL;
				582	}
				583
				584	/* we pinned the anon_vma, its safe to sleep */
				585	rcu_read_unlock();
				586	anon_vma_lock(anon_vma);
				587
				588	if (atomic_dec_and_test(&anon_vma->refcount)) {
				589	/*
				590	* Oops, we held the last refcount, release the lock
				591	* and bail -- can't simply use put_anon_vma() because
				592	* we'll deadlock on the anon_vma_lock() recursion.
				593	*/
				594	anon_vma_unlock(anon_vma);
				595	__put_anon_vma(anon_vma);
				596	anon_vma = NULL;
				597	}
				598
				599	return anon_vma;
				600
				601	out:
				602	rcu_read_unlock();
				603	return anon_vma;
				604	}
				605
				606	void page_unlock_anon_vma(struct anon_vma *anon_vma)
				607	{
				608	anon_vma_unlock(anon_vma);
				609	}
				610
				611	/*
				612	* At what user virtual address is page expected in @vma?
				613	* Returns virtual address or -EFAULT if page's index/offset is not
				614	* within the range mapped the @vma.
				615	*/
				616	inline unsigned long
				617	vma_address(struct page page, struct vm_area_struct vma)
				618	{
				619	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
				620	unsigned long address;
				621
				622	if (unlikely(is_vm_hugetlb_page(vma)))
				623	pgoff = page->index << huge_page_order(page_hstate(page));
				624	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
				625	if (unlikely(address < vma->vm_start \|\| address >= vma->vm_end)) {
				626	/* page should be within @vma mapping range */
				627	return -EFAULT;
				628	}
				629	return address;
				630	}
				631
				632	/*
				633	* At what user virtual address is page expected in vma?
				634	* Caller should check the page is actually part of the vma.
				635	*/
				636	unsigned long page_address_in_vma(struct page page, struct vm_area_struct vma)
				637	{
				638	if (PageAnon(page)) {
				639	struct anon_vma *page__anon_vma = page_anon_vma(page);
				640	/*
				641	* Note: swapoff's unuse_vma() is more efficient with this
				642	* check, and needs it to match anon_vma when KSM is active.
				643	*/
				644	if (!vma->anon_vma \|\| !page__anon_vma \|\|
				645	vma->anon_vma->root != page__anon_vma->root)
				646	return -EFAULT;
				647	} else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
				648	if (!vma->vm_file \|\|
				649	vma->vm_file->f_mapping != page->mapping)
				650	return -EFAULT;
				651	} else
				652	return -EFAULT;
				653	return vma_address(page, vma);
				654	}
				655
				656	/*
				657	* Check that @page is mapped at @address into @mm.
				658	*
				659	* If @sync is false, page_check_address may perform a racy check to avoid
				660	* the page table lock when the pte is not present (helpful when reclaiming
				661	* highly shared pages).
				662	*
				663	* On success returns with pte mapped and locked.
				664	*/
				665	pte_t __page_check_address(struct page page, struct mm_struct *mm,
				666	unsigned long address, spinlock_t **ptlp, int sync)
				667	{
				668	pgd_t *pgd;
				669	pud_t *pud;
				670	pmd_t *pmd;
				671	pte_t *pte;
				672	spinlock_t *ptl;
				673
				674	if (unlikely(PageHuge(page))) {
				675	/* when pud is not present, pte will be NULL */
				676	pte = huge_pte_offset(mm, address);
				677	if (!pte)
				678	return NULL;
				679
				680	ptl = &mm->page_table_lock;
				681	goto check;
				682	}
				683
				684	pgd = pgd_offset(mm, address);
				685	if (!pgd_present(*pgd))
				686	return NULL;
				687
				688	pud = pud_offset(pgd, address);
				689	if (!pud_present(*pud))
				690	return NULL;
				691
				692	pmd = pmd_offset(pud, address);
				693	if (!pmd_present(*pmd))
				694	return NULL;
				695	if (pmd_trans_huge(*pmd))
				696	return NULL;
				697
				698	pte = pte_offset_map(pmd, address);
				699	/* Make a quick check before getting the lock */
				700	if (!sync && !pte_present(*pte)) {
				701	pte_unmap(pte);
				702	return NULL;
				703	}
				704
				705	ptl = pte_lockptr(mm, pmd);
				706	check:
				707	spin_lock(ptl);
				708	if (pte_present(pte) && page_to_pfn(page) == pte_pfn(pte)) {
				709	*ptlp = ptl;
				710	return pte;
				711	}
				712	pte_unmap_unlock(pte, ptl);
				713	return NULL;
				714	}
				715
				716	/**
				717	* page_mapped_in_vma - check whether a page is really mapped in a VMA
				718	* @page: the page to test
				719	* @vma: the VMA to test
				720	*
				721	* Returns 1 if the page is mapped into the page tables of the VMA, 0
				722	* if the page is not mapped into the page tables of this VMA. Only
				723	* valid for normal file or anonymous VMAs.
				724	*/
				725	int page_mapped_in_vma(struct page page, struct vm_area_struct vma)
				726	{
				727	unsigned long address;
				728	pte_t *pte;
				729	spinlock_t *ptl;
				730
				731	address = vma_address(page, vma);
				732	if (address == -EFAULT) /* out of vma range */
				733	return 0;
				734	pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
				735	if (!pte) /* the page is not in this mm */
				736	return 0;
				737	pte_unmap_unlock(pte, ptl);
				738
				739	return 1;
				740	}
				741
				742	/*
				743	* Subfunctions of page_referenced: page_referenced_one called
				744	* repeatedly from either page_referenced_anon or page_referenced_file.
				745	*/
				746	int page_referenced_one(struct page page, struct vm_area_struct vma,
				747	unsigned long address, unsigned int *mapcount,
				748	unsigned long *vm_flags)
				749	{
				750	struct mm_struct *mm = vma->vm_mm;
				751	int referenced = 0;
				752
				753	if (unlikely(PageTransHuge(page))) {
				754	pmd_t *pmd;
				755
				756	spin_lock(&mm->page_table_lock);
				757	/*
				758	* rmap might return false positives; we must filter
				759	* these out using page_check_address_pmd().
				760	*/
				761	pmd = page_check_address_pmd(page, mm, address,
				762	PAGE_CHECK_ADDRESS_PMD_FLAG);
				763	if (!pmd) {
				764	spin_unlock(&mm->page_table_lock);
				765	goto out;
				766	}
				767
				768	if (vma->vm_flags & VM_LOCKED) {
				769	spin_unlock(&mm->page_table_lock);
				770	mapcount = 0; / break early from loop */
				771	*vm_flags \|= VM_LOCKED;
				772	goto out;
				773	}
				774
				775	/* go ahead even if the pmd is pmd_trans_splitting() */
				776	if (pmdp_clear_flush_young_notify(vma, address, pmd))
				777	referenced++;
				778	spin_unlock(&mm->page_table_lock);
				779	} else {
				780	pte_t *pte;
				781	spinlock_t *ptl;
				782
				783	/*
				784	* rmap might return false positives; we must filter
				785	* these out using page_check_address().
				786	*/
				787	pte = page_check_address(page, mm, address, &ptl, 0);
				788	if (!pte)
				789	goto out;
				790
				791	if (vma->vm_flags & VM_LOCKED) {
				792	pte_unmap_unlock(pte, ptl);
				793	mapcount = 0; / break early from loop */
				794	*vm_flags \|= VM_LOCKED;
				795	goto out;
				796	}
				797
				798	if (ptep_clear_flush_young_notify(vma, address, pte)) {
				799	/*
				800	* Don't treat a reference through a sequentially read
				801	* mapping as such. If the page has been used in
				802	* another mapping, we will catch it; if this other
				803	* mapping is already gone, the unmap path will have
				804	* set PG_referenced or activated the page.
				805	*/
				806	if (likely(!VM_SequentialReadHint(vma)))
				807	referenced++;
				808	}
				809	pte_unmap_unlock(pte, ptl);
				810	}
				811
				812	/* Pretend the page is referenced if the task has the
				813	swap token and is in the middle of a page fault. */
				814	if (mm != current->mm && has_swap_token(mm) &&
				815	rwsem_is_locked(&mm->mmap_sem))
				816	referenced++;
				817
				818	(*mapcount)--;
				819
				820	if (referenced)
				821	*vm_flags \|= vma->vm_flags;
				822	out:
				823	return referenced;
				824	}
				825
				826	static int page_referenced_anon(struct page *page,
				827	struct mem_cgroup *memcg,
				828	unsigned long *vm_flags)
				829	{
				830	unsigned int mapcount;
				831	struct anon_vma *anon_vma;
				832	struct anon_vma_chain *avc;
				833	int referenced = 0;
				834
				835	anon_vma = page_lock_anon_vma(page);
				836	if (!anon_vma)
				837	return referenced;
				838
				839	mapcount = page_mapcount(page);
				840	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
				841	struct vm_area_struct *vma = avc->vma;
				842	unsigned long address = vma_address(page, vma);
				843	if (address == -EFAULT)
				844	continue;
				845	/*
				846	* If we are reclaiming on behalf of a cgroup, skip
				847	* counting on behalf of references from different
				848	* cgroups
				849	*/
				850	if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
				851	continue;
				852	referenced += page_referenced_one(page, vma, address,
				853	&mapcount, vm_flags);
				854	if (!mapcount)
				855	break;
				856	}
				857
				858	page_unlock_anon_vma(anon_vma);
				859	return referenced;
				860	}
				861
				862	/**
				863	* page_referenced_file - referenced check for object-based rmap
				864	* @page: the page we're checking references on.
				865	* @memcg: target memory control group
				866	* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
				867	*
				868	* For an object-based mapped page, find all the places it is mapped and
				869	* check/clear the referenced flag. This is done by following the page->mapping
				870	* pointer, then walking the chain of vmas it holds. It returns the number
				871	* of references it found.
				872	*
				873	* This function is only called from page_referenced for object-based pages.
				874	*/
				875	static int page_referenced_file(struct page *page,
				876	struct mem_cgroup *memcg,
				877	unsigned long *vm_flags)
				878	{
				879	unsigned int mapcount;
				880	struct address_space *mapping = page->mapping;
				881	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
				882	struct vm_area_struct *vma;
				883	struct prio_tree_iter iter;
				884	int referenced = 0;
				885
				886	/*
				887	* The caller's checks on page->mapping and !PageAnon have made
				888	* sure that this is a file page: the check for page->mapping
				889	* excludes the case just before it gets set on an anon page.
				890	*/
				891	BUG_ON(PageAnon(page));
				892
				893	/*
				894	* The page lock not only makes sure that page->mapping cannot
				895	* suddenly be NULLified by truncation, it makes sure that the
				896	* structure at mapping cannot be freed and reused yet,
				897	* so we can safely take mapping->i_mmap_mutex.
				898	*/
				899	BUG_ON(!PageLocked(page));
				900
				901	mutex_lock(&mapping->i_mmap_mutex);
				902
				903	/*
				904	* i_mmap_mutex does not stabilize mapcount at all, but mapcount
				905	* is more likely to be accurate if we note it after spinning.
				906	*/
				907	mapcount = page_mapcount(page);
				908
				909	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
				910	unsigned long address = vma_address(page, vma);
				911	if (address == -EFAULT)
				912	continue;
				913	/*
				914	* If we are reclaiming on behalf of a cgroup, skip
				915	* counting on behalf of references from different
				916	* cgroups
				917	*/
				918	if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
				919	continue;
				920	referenced += page_referenced_one(page, vma, address,
				921	&mapcount, vm_flags);
				922	if (!mapcount)
				923	break;
				924	}
				925
				926	mutex_unlock(&mapping->i_mmap_mutex);
				927	return referenced;
				928	}
				929
				930	/**
				931	* page_referenced - test if the page was referenced
				932	* @page: the page to test
				933	* @is_locked: caller holds lock on the page
				934	* @memcg: target memory cgroup
				935	* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
				936	*
				937	* Quick test_and_clear_referenced for all mappings to a page,
				938	* returns the number of ptes which referenced the page.
				939	*/
				940	int page_referenced(struct page *page,
				941	int is_locked,
				942	struct mem_cgroup *memcg,
				943	unsigned long *vm_flags)
				944	{
				945	int referenced = 0;
				946	int we_locked = 0;
				947
				948	*vm_flags = 0;
				949	if (page_mapped(page) && page_rmapping(page)) {
				950	if (!is_locked && (!PageAnon(page) \|\| PageKsm(page))) {
				951	we_locked = trylock_page(page);
				952	if (!we_locked) {
				953	referenced++;
				954	goto out;
				955	}
				956	}
				957	if (unlikely(PageKsm(page)))
				958	referenced += page_referenced_ksm(page, memcg,
				959	vm_flags);
				960	else if (PageAnon(page))
				961	referenced += page_referenced_anon(page, memcg,
				962	vm_flags);
				963	else if (page->mapping)
				964	referenced += page_referenced_file(page, memcg,
				965	vm_flags);
				966	if (we_locked)
				967	unlock_page(page);
				968
				969	if (page_test_and_clear_young(page_to_pfn(page)))
				970	referenced++;
				971	}
				972	out:
				973	return referenced;
				974	}
				975
				976	static int page_mkclean_one(struct page page, struct vm_area_struct vma,
				977	unsigned long address)
				978	{
				979	struct mm_struct *mm = vma->vm_mm;
				980	pte_t *pte;
				981	spinlock_t *ptl;
				982	int ret = 0;
				983
				984	pte = page_check_address(page, mm, address, &ptl, 1);
				985	if (!pte)
				986	goto out;
				987
				988	if (pte_dirty(pte) \|\| pte_write(pte)) {
				989	pte_t entry;
				990
				991	flush_cache_page(vma, address, pte_pfn(*pte));
				992	entry = ptep_clear_flush_notify(vma, address, pte);
				993	entry = pte_wrprotect(entry);
				994	entry = pte_mkclean(entry);
				995	set_pte_at(mm, address, pte, entry);
				996	ret = 1;
				997	}
				998
				999	pte_unmap_unlock(pte, ptl);
				1000	out:
				1001	return ret;
				1002	}
				1003
				1004	static int page_mkclean_file(struct address_space mapping, struct page page)
				1005	{
				1006	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
				1007	struct vm_area_struct *vma;
				1008	struct prio_tree_iter iter;
				1009	int ret = 0;
				1010
				1011	BUG_ON(PageAnon(page));
				1012
				1013	mutex_lock(&mapping->i_mmap_mutex);
				1014	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
				1015	if (vma->vm_flags & VM_SHARED) {
				1016	unsigned long address = vma_address(page, vma);
				1017	if (address == -EFAULT)
				1018	continue;
				1019	ret += page_mkclean_one(page, vma, address);
				1020	}
				1021	}
				1022	mutex_unlock(&mapping->i_mmap_mutex);
				1023	return ret;
				1024	}
				1025
				1026	int page_mkclean(struct page *page)
				1027	{
				1028	int ret = 0;
				1029
				1030	BUG_ON(!PageLocked(page));
				1031
				1032	if (page_mapped(page)) {
				1033	struct address_space *mapping = page_mapping(page);
				1034	if (mapping)
				1035	ret = page_mkclean_file(mapping, page);
				1036	}
				1037
				1038	return ret;
				1039	}
				1040	EXPORT_SYMBOL_GPL(page_mkclean);
				1041
				1042	/**
				1043	* page_move_anon_rmap - move a page to our anon_vma
				1044	* @page: the page to move to our anon_vma
				1045	* @vma: the vma the page belongs to
				1046	* @address: the user virtual address mapped
				1047	*
				1048	* When a page belongs exclusively to one process after a COW event,
				1049	* that page can be moved into the anon_vma that belongs to just that
				1050	* process, so the rmap code will not search the parent or sibling
				1051	* processes.
				1052	*/
				1053	void page_move_anon_rmap(struct page *page,
				1054	struct vm_area_struct *vma, unsigned long address)
				1055	{
				1056	struct anon_vma *anon_vma = vma->anon_vma;
				1057
				1058	VM_BUG_ON(!PageLocked(page));
				1059	VM_BUG_ON(!anon_vma);
				1060	VM_BUG_ON(page->index != linear_page_index(vma, address));
				1061
				1062	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
				1063	page->mapping = (struct address_space *) anon_vma;
				1064	}
				1065
				1066	/**
				1067	* __page_set_anon_rmap - set up new anonymous rmap
				1068	* @page: Page to add to rmap
				1069	* @vma: VM area to add page to.
				1070	* @address: User virtual address of the mapping
				1071	* @exclusive: the page is exclusively owned by the current process
				1072	*/
				1073	static void __page_set_anon_rmap(struct page *page,
				1074	struct vm_area_struct *vma, unsigned long address, int exclusive)
				1075	{
				1076	struct anon_vma *anon_vma = vma->anon_vma;
				1077
				1078	BUG_ON(!anon_vma);
				1079
				1080	if (PageAnon(page))
				1081	return;
				1082
				1083	/*
				1084	* If the page isn't exclusively mapped into this vma,
				1085	* we must use the _oldest_ possible anon_vma for the
				1086	* page mapping!
				1087	*/
				1088	if (!exclusive)
				1089	anon_vma = anon_vma->root;
				1090
				1091	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
				1092	page->mapping = (struct address_space *) anon_vma;
				1093	page->index = linear_page_index(vma, address);
				1094	}
				1095
				1096	/**
				1097	* __page_check_anon_rmap - sanity check anonymous rmap addition
				1098	* @page: the page to add the mapping to
				1099	* @vma: the vm area in which the mapping is added
				1100	* @address: the user virtual address mapped
				1101	*/
				1102	static void __page_check_anon_rmap(struct page *page,
				1103	struct vm_area_struct *vma, unsigned long address)
				1104	{
				1105	#ifdef CONFIG_DEBUG_VM
				1106	/*
				1107	* The page's anon-rmap details (mapping and index) are guaranteed to
				1108	* be set up correctly at this point.
				1109	*
				1110	* We have exclusion against page_add_anon_rmap because the caller
				1111	* always holds the page locked, except if called from page_dup_rmap,
				1112	* in which case the page is already known to be setup.
				1113	*
				1114	* We have exclusion against page_add_new_anon_rmap because those pages
				1115	* are initially only visible via the pagetables, and the pte is locked
				1116	* over the call to page_add_new_anon_rmap.
				1117	*/
				1118	BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
				1119	BUG_ON(page->index != linear_page_index(vma, address));
				1120	#endif
				1121	}
				1122
				1123	/**
				1124	* page_add_anon_rmap - add pte mapping to an anonymous page
				1125	* @page: the page to add the mapping to
				1126	* @vma: the vm area in which the mapping is added
				1127	* @address: the user virtual address mapped
				1128	*
				1129	* The caller needs to hold the pte lock, and the page must be locked in
				1130	* the anon_vma case: to serialize mapping,index checking after setting,
				1131	* and to ensure that PageAnon is not being upgraded racily to PageKsm
				1132	* (but PageKsm is never downgraded to PageAnon).
				1133	*/
				1134	void page_add_anon_rmap(struct page *page,
				1135	struct vm_area_struct *vma, unsigned long address)
				1136	{
				1137	do_page_add_anon_rmap(page, vma, address, 0);
				1138	}
				1139
				1140	/*
				1141	* Special version of the above for do_swap_page, which often runs
				1142	* into pages that are exclusively owned by the current process.
				1143	* Everybody else should continue to use page_add_anon_rmap above.
				1144	*/
				1145	void do_page_add_anon_rmap(struct page *page,
				1146	struct vm_area_struct *vma, unsigned long address, int exclusive)
				1147	{
				1148	int first = atomic_inc_and_test(&page->_mapcount);
				1149	if (first) {
				1150	if (!PageTransHuge(page))
				1151	__inc_zone_page_state(page, NR_ANON_PAGES);
				1152	else
				1153	__inc_zone_page_state(page,
				1154	NR_ANON_TRANSPARENT_HUGEPAGES);
				1155	}
				1156	if (unlikely(PageKsm(page)))
				1157	return;
				1158
				1159	VM_BUG_ON(!PageLocked(page));
				1160	/* address might be in next vma when migration races vma_adjust */
				1161	if (first)
				1162	__page_set_anon_rmap(page, vma, address, exclusive);
				1163	else
				1164	__page_check_anon_rmap(page, vma, address);
				1165	}
				1166
				1167	/**
				1168	* page_add_new_anon_rmap - add pte mapping to a new anonymous page
				1169	* @page: the page to add the mapping to
				1170	* @vma: the vm area in which the mapping is added
				1171	* @address: the user virtual address mapped
				1172	*
				1173	* Same as page_add_anon_rmap but must only be called on new pages.
				1174	* This means the inc-and-test can be bypassed.
				1175	* Page does not have to be locked.
				1176	*/
				1177	void page_add_new_anon_rmap(struct page *page,
				1178	struct vm_area_struct *vma, unsigned long address)
				1179	{
				1180	VM_BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
				1181	SetPageSwapBacked(page);
				1182	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
				1183	if (!PageTransHuge(page))
				1184	__inc_zone_page_state(page, NR_ANON_PAGES);
				1185	else
				1186	__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
				1187	__page_set_anon_rmap(page, vma, address, 1);
				1188	if (page_evictable(page, vma))
				1189	lru_cache_add_lru(page, LRU_ACTIVE_ANON);
				1190	else
				1191	add_page_to_unevictable_list(page);
				1192	}
				1193
				1194	/**
				1195	* page_add_file_rmap - add pte mapping to a file page
				1196	* @page: the page to add the mapping to
				1197	*
				1198	* The caller needs to hold the pte lock.
				1199	*/
				1200	void page_add_file_rmap(struct page *page)
				1201	{
				1202	bool locked;
				1203	unsigned long flags;
				1204
				1205	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
				1206	if (atomic_inc_and_test(&page->_mapcount)) {
				1207	__inc_zone_page_state(page, NR_FILE_MAPPED);
				1208	mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
				1209	}
				1210	mem_cgroup_end_update_page_stat(page, &locked, &flags);
				1211	}
				1212
				1213	/**
				1214	* page_remove_rmap - take down pte mapping from a page
				1215	* @page: page to remove mapping from
				1216	*
				1217	* The caller needs to hold the pte lock.
				1218	*/
				1219	void page_remove_rmap(struct page *page)
				1220	{
				1221	struct address_space *mapping = page_mapping(page);
				1222	bool anon = PageAnon(page);
				1223	bool locked;
				1224	unsigned long flags;
				1225
				1226	/*
				1227	* The anon case has no mem_cgroup page_stat to update; but may
				1228	* uncharge_page() below, where the lock ordering can deadlock if
				1229	* we hold the lock against page_stat move: so avoid it on anon.
				1230	*/
				1231	if (!anon)
				1232	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
				1233
				1234	/* page still mapped by someone else? */
				1235	if (!atomic_add_negative(-1, &page->_mapcount))
				1236	goto out;
				1237
				1238	/*
				1239	* Now that the last pte has gone, s390 must transfer dirty
				1240	* flag from storage key to struct page. We can usually skip
				1241	* this if the page is anon, so about to be freed; but perhaps
				1242	* not if it's in swapcache - there might be another pte slot
				1243	* containing the swap entry, but page not yet written to swap.
				1244	*
				1245	* And we can skip it on file pages, so long as the filesystem
				1246	* participates in dirty tracking; but need to catch shm and tmpfs
				1247	* and ramfs pages which have been modified since creation by read
				1248	* fault.
				1249	*
				1250	* Note that mapping must be decided above, before decrementing
				1251	* mapcount (which luckily provides a barrier): once page is unmapped,
				1252	* it could be truncated and page->mapping reset to NULL at any moment.
				1253	* Note also that we are relying on page_mapping(page) to set mapping
				1254	* to &swapper_space when PageSwapCache(page).
				1255	*/
				1256	if (mapping && !mapping_cap_account_dirty(mapping) &&
				1257	page_test_and_clear_dirty(page_to_pfn(page), 1))
				1258	set_page_dirty(page);
				1259	/*
				1260	* Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
				1261	* and not charged by memcg for now.
				1262	*/
				1263	if (unlikely(PageHuge(page)))
				1264	goto out;
				1265	if (anon) {
				1266	mem_cgroup_uncharge_page(page);
				1267	if (!PageTransHuge(page))
				1268	__dec_zone_page_state(page, NR_ANON_PAGES);
				1269	else
				1270	__dec_zone_page_state(page,
				1271	NR_ANON_TRANSPARENT_HUGEPAGES);
				1272	} else {
				1273	__dec_zone_page_state(page, NR_FILE_MAPPED);
				1274	mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
				1275	}
				1276	/*
				1277	* It would be tidy to reset the PageAnon mapping here,
				1278	* but that might overwrite a racing page_add_anon_rmap
				1279	* which increments mapcount after us but sets mapping
				1280	* before us: so leave the reset to free_hot_cold_page,
				1281	* and remember that it's only reliable while mapped.
				1282	* Leaving it set also helps swapoff to reinstate ptes
				1283	* faster for those pages still in swapcache.
				1284	*/
				1285	out:
				1286	if (!anon)
				1287	mem_cgroup_end_update_page_stat(page, &locked, &flags);
				1288	}
				1289
				1290	/*
				1291	* Subfunctions of try_to_unmap: try_to_unmap_one called
				1292	* repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
				1293	*/
				1294	int try_to_unmap_one(struct page page, struct vm_area_struct vma,
				1295	unsigned long address, enum ttu_flags flags)
				1296	{
				1297	struct mm_struct *mm = vma->vm_mm;
				1298	pte_t *pte;
				1299	pte_t pteval;
				1300	spinlock_t *ptl;
				1301	int ret = SWAP_AGAIN;
				1302
				1303	pte = page_check_address(page, mm, address, &ptl, 0);
				1304	if (!pte)
				1305	goto out;
				1306
				1307	/*
				1308	* If the page is mlock()d, we cannot swap it out.
				1309	* If it's recently referenced (perhaps page_referenced
				1310	* skipped over this mm) then we should reactivate it.
				1311	*/
				1312	if (!(flags & TTU_IGNORE_MLOCK)) {
				1313	if (vma->vm_flags & VM_LOCKED)
				1314	goto out_mlock;
				1315
				1316	if (TTU_ACTION(flags) == TTU_MUNLOCK)
				1317	goto out_unmap;
				1318	}
				1319	if (!(flags & TTU_IGNORE_ACCESS)) {
				1320	if (ptep_clear_flush_young_notify(vma, address, pte)) {
				1321	ret = SWAP_FAIL;
				1322	goto out_unmap;
				1323	}
				1324	}
				1325
				1326	/* Nuke the page table entry. */
				1327	flush_cache_page(vma, address, page_to_pfn(page));
				1328	pteval = ptep_clear_flush_notify(vma, address, pte);
				1329
				1330	/* Move the dirty bit to the physical page now the pte is gone. */
				1331	if (pte_dirty(pteval))
				1332	set_page_dirty(page);
				1333
				1334	/* Update high watermark before we lower rss */
				1335	update_hiwater_rss(mm);
				1336
				1337	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
				1338	if (PageAnon(page))
				1339	dec_mm_counter(mm, MM_ANONPAGES);
				1340	else
				1341	dec_mm_counter(mm, MM_FILEPAGES);
				1342	set_pte_at(mm, address, pte,
				1343	swp_entry_to_pte(make_hwpoison_entry(page)));
				1344	} else if (PageAnon(page)) {
				1345	swp_entry_t entry = { .val = page_private(page) };
				1346
				1347	if (PageSwapCache(page)) {
				1348	/*
				1349	* Store the swap location in the pte.
				1350	* See handle_pte_fault() ...
				1351	*/
				1352	if (swap_duplicate(entry) < 0) {
				1353	set_pte_at(mm, address, pte, pteval);
				1354	ret = SWAP_FAIL;
				1355	goto out_unmap;
				1356	}
				1357	if (list_empty(&mm->mmlist)) {
				1358	spin_lock(&mmlist_lock);
				1359	if (list_empty(&mm->mmlist))
				1360	list_add(&mm->mmlist, &init_mm.mmlist);
				1361	spin_unlock(&mmlist_lock);
				1362	}
				1363	dec_mm_counter(mm, MM_ANONPAGES);
				1364	inc_mm_counter(mm, MM_SWAPENTS);
				1365	} else if (IS_ENABLED(CONFIG_MIGRATION)) {
				1366	/*
				1367	* Store the pfn of the page in a special migration
				1368	* pte. do_swap_page() will wait until the migration
				1369	* pte is removed and then restart fault handling.
				1370	*/
				1371	BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
				1372	entry = make_migration_entry(page, pte_write(pteval));
				1373	}
				1374	set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
				1375	BUG_ON(pte_file(*pte));
				1376	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
				1377	(TTU_ACTION(flags) == TTU_MIGRATION)) {
				1378	/* Establish migration entry for a file page */
				1379	swp_entry_t entry;
				1380	entry = make_migration_entry(page, pte_write(pteval));
				1381	set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
				1382	} else
				1383	dec_mm_counter(mm, MM_FILEPAGES);
				1384
				1385	page_remove_rmap(page);
				1386	page_cache_release(page);
				1387
				1388	out_unmap:
				1389	pte_unmap_unlock(pte, ptl);
				1390	out:
				1391	return ret;
				1392
				1393	out_mlock:
				1394	pte_unmap_unlock(pte, ptl);
				1395
				1396
				1397	/*
				1398	* We need mmap_sem locking, Otherwise VM_LOCKED check makes
				1399	* unstable result and race. Plus, We can't wait here because
				1400	* we now hold anon_vma->mutex or mapping->i_mmap_mutex.
				1401	* if trylock failed, the page remain in evictable lru and later
				1402	* vmscan could retry to move the page to unevictable lru if the
				1403	* page is actually mlocked.
				1404	*/
				1405	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
				1406	if (vma->vm_flags & VM_LOCKED) {
				1407	mlock_vma_page(page);
				1408	ret = SWAP_MLOCK;
				1409	}
				1410	up_read(&vma->vm_mm->mmap_sem);
				1411	}
				1412	return ret;
				1413	}
				1414
				1415	/*
				1416	* objrmap doesn't work for nonlinear VMAs because the assumption that
				1417	* offset-into-file correlates with offset-into-virtual-addresses does not hold.
				1418	* Consequently, given a particular page and its ->index, we cannot locate the
				1419	* ptes which are mapping that page without an exhaustive linear search.
				1420	*
				1421	* So what this code does is a mini "virtual scan" of each nonlinear VMA which
				1422	* maps the file to which the target page belongs. The ->vm_private_data field
				1423	* holds the current cursor into that scan. Successive searches will circulate
				1424	* around the vma's virtual address space.
				1425	*
				1426	* So as more replacement pressure is applied to the pages in a nonlinear VMA,
				1427	* more scanning pressure is placed against them as well. Eventually pages
				1428	* will become fully unmapped and are eligible for eviction.
				1429	*
				1430	* For very sparsely populated VMAs this is a little inefficient - chances are
				1431	* there there won't be many ptes located within the scan cluster. In this case
				1432	* maybe we could scan further - to the end of the pte page, perhaps.
				1433	*
				1434	* Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
				1435	* acquire it without blocking. If vma locked, mlock the pages in the cluster,
				1436	* rather than unmapping them. If we encounter the "check_page" that vmscan is
				1437	* trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
				1438	*/
				1439	#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
				1440	#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
				1441
				1442	static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
				1443	struct vm_area_struct vma, struct page check_page)
				1444	{
				1445	struct mm_struct *mm = vma->vm_mm;
				1446	pgd_t *pgd;
				1447	pud_t *pud;
				1448	pmd_t *pmd;
				1449	pte_t *pte;
				1450	pte_t pteval;
				1451	spinlock_t *ptl;
				1452	struct page *page;
				1453	unsigned long address;
				1454	unsigned long end;
				1455	int ret = SWAP_AGAIN;
				1456	int locked_vma = 0;
				1457
				1458	address = (vma->vm_start + cursor) & CLUSTER_MASK;
				1459	end = address + CLUSTER_SIZE;
				1460	if (address < vma->vm_start)
				1461	address = vma->vm_start;
				1462	if (end > vma->vm_end)
				1463	end = vma->vm_end;
				1464
				1465	pgd = pgd_offset(mm, address);
				1466	if (!pgd_present(*pgd))
				1467	return ret;
				1468
				1469	pud = pud_offset(pgd, address);
				1470	if (!pud_present(*pud))
				1471	return ret;
				1472
				1473	pmd = pmd_offset(pud, address);
				1474	if (!pmd_present(*pmd))
				1475	return ret;
				1476
				1477	/*
				1478	* If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
				1479	* keep the sem while scanning the cluster for mlocking pages.
				1480	*/
				1481	if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
				1482	locked_vma = (vma->vm_flags & VM_LOCKED);
				1483	if (!locked_vma)
				1484	up_read(&vma->vm_mm->mmap_sem); /* don't need it */
				1485	}
				1486
				1487	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
				1488
				1489	/* Update high watermark before we lower rss */
				1490	update_hiwater_rss(mm);
				1491
				1492	for (; address < end; pte++, address += PAGE_SIZE) {
				1493	if (!pte_present(*pte))
				1494	continue;
				1495	page = vm_normal_page(vma, address, *pte);
				1496	BUG_ON(!page \|\| PageAnon(page));
				1497
				1498	if (locked_vma) {
				1499	if (page == check_page) {
				1500	/* we know we have check_page locked */
				1501	mlock_vma_page(page);
				1502	ret = SWAP_MLOCK;
				1503	} else if (trylock_page(page)) {
				1504	/*
				1505	* If we can lock the page, perform mlock.
				1506	* Otherwise leave the page alone, it will be
				1507	* eventually encountered again later.
				1508	*/
				1509	mlock_vma_page(page);
				1510	unlock_page(page);
				1511	}
				1512	continue; /* don't unmap */
				1513	}
				1514
				1515	if (ptep_clear_flush_young_notify(vma, address, pte))
				1516	continue;
				1517
				1518	/* Nuke the page table entry. */
				1519	flush_cache_page(vma, address, pte_pfn(*pte));
				1520	pteval = ptep_clear_flush_notify(vma, address, pte);
				1521
				1522	/* If nonlinear, store the file page offset in the pte. */
				1523	if (page->index != linear_page_index(vma, address))
				1524	set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
				1525
				1526	/* Move the dirty bit to the physical page now the pte is gone. */
				1527	if (pte_dirty(pteval))
				1528	set_page_dirty(page);
				1529
				1530	page_remove_rmap(page);
				1531	page_cache_release(page);
				1532	dec_mm_counter(mm, MM_FILEPAGES);
				1533	(*mapcount)--;
				1534	}
				1535	pte_unmap_unlock(pte - 1, ptl);
				1536	if (locked_vma)
				1537	up_read(&vma->vm_mm->mmap_sem);
				1538	return ret;
				1539	}
				1540
				1541	bool is_vma_temporary_stack(struct vm_area_struct *vma)
				1542	{
				1543	int maybe_stack = vma->vm_flags & (VM_GROWSDOWN \| VM_GROWSUP);
				1544
				1545	if (!maybe_stack)
				1546	return false;
				1547
				1548	if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
				1549	VM_STACK_INCOMPLETE_SETUP)
				1550	return true;
				1551
				1552	return false;
				1553	}
				1554
				1555	/**
				1556	* try_to_unmap_anon - unmap or unlock anonymous page using the object-based
				1557	* rmap method
				1558	* @page: the page to unmap/unlock
				1559	* @flags: action and flags
				1560	*
				1561	* Find all the mappings of a page using the mapping pointer and the vma chains
				1562	* contained in the anon_vma struct it points to.
				1563	*
				1564	* This function is only called from try_to_unmap/try_to_munlock for
				1565	* anonymous pages.
				1566	* When called from try_to_munlock(), the mmap_sem of the mm containing the vma
				1567	* where the page was found will be held for write. So, we won't recheck
				1568	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
				1569	* 'LOCKED.
				1570	*/
				1571	static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
				1572	{
				1573	struct anon_vma *anon_vma;
				1574	struct anon_vma_chain *avc;
				1575	int ret = SWAP_AGAIN;
				1576
				1577	anon_vma = page_lock_anon_vma(page);
				1578	if (!anon_vma)
				1579	return ret;
				1580
				1581	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
				1582	struct vm_area_struct *vma = avc->vma;
				1583	unsigned long address;
				1584
				1585	/*
				1586	* During exec, a temporary VMA is setup and later moved.
				1587	* The VMA is moved under the anon_vma lock but not the
				1588	* page tables leading to a race where migration cannot
				1589	* find the migration ptes. Rather than increasing the
				1590	* locking requirements of exec(), migration skips
				1591	* temporary VMAs until after exec() completes.
				1592	*/
				1593	if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
				1594	is_vma_temporary_stack(vma))
				1595	continue;
				1596
				1597	address = vma_address(page, vma);
				1598	if (address == -EFAULT)
				1599	continue;
				1600	ret = try_to_unmap_one(page, vma, address, flags);
				1601	if (ret != SWAP_AGAIN \|\| !page_mapped(page))
				1602	break;
				1603	}
				1604
				1605	page_unlock_anon_vma(anon_vma);
				1606	return ret;
				1607	}
				1608
				1609	/**
				1610	* try_to_unmap_file - unmap/unlock file page using the object-based rmap method
				1611	* @page: the page to unmap/unlock
				1612	* @flags: action and flags
				1613	*
				1614	* Find all the mappings of a page using the mapping pointer and the vma chains
				1615	* contained in the address_space struct it points to.
				1616	*
				1617	* This function is only called from try_to_unmap/try_to_munlock for
				1618	* object-based pages.
				1619	* When called from try_to_munlock(), the mmap_sem of the mm containing the vma
				1620	* where the page was found will be held for write. So, we won't recheck
				1621	* vm_flags for that VMA. That should be OK, because that vma shouldn't be
				1622	* 'LOCKED.
				1623	*/
				1624	static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
				1625	{
				1626	struct address_space *mapping = page->mapping;
				1627	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
				1628	struct vm_area_struct *vma;
				1629	struct prio_tree_iter iter;
				1630	int ret = SWAP_AGAIN;
				1631	unsigned long cursor;
				1632	unsigned long max_nl_cursor = 0;
				1633	unsigned long max_nl_size = 0;
				1634	unsigned int mapcount;
				1635
				1636	mutex_lock(&mapping->i_mmap_mutex);
				1637	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
				1638	unsigned long address = vma_address(page, vma);
				1639	if (address == -EFAULT)
				1640	continue;
				1641	ret = try_to_unmap_one(page, vma, address, flags);
				1642	if (ret != SWAP_AGAIN \|\| !page_mapped(page))
				1643	goto out;
				1644	}
				1645
				1646	if (list_empty(&mapping->i_mmap_nonlinear))
				1647	goto out;
				1648
				1649	/*
				1650	* We don't bother to try to find the munlocked page in nonlinears.
				1651	* It's costly. Instead, later, page reclaim logic may call
				1652	* try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
				1653	*/
				1654	if (TTU_ACTION(flags) == TTU_MUNLOCK)
				1655	goto out;
				1656
				1657	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
				1658	shared.vm_set.list) {
				1659	cursor = (unsigned long) vma->vm_private_data;
				1660	if (cursor > max_nl_cursor)
				1661	max_nl_cursor = cursor;
				1662	cursor = vma->vm_end - vma->vm_start;
				1663	if (cursor > max_nl_size)
				1664	max_nl_size = cursor;
				1665	}
				1666
				1667	if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
				1668	ret = SWAP_FAIL;
				1669	goto out;
				1670	}
				1671
				1672	/*
				1673	* We don't try to search for this page in the nonlinear vmas,
				1674	* and page_referenced wouldn't have found it anyway. Instead
				1675	* just walk the nonlinear vmas trying to age and unmap some.
				1676	* The mapcount of the page we came in with is irrelevant,
				1677	* but even so use it as a guide to how hard we should try?
				1678	*/
				1679	mapcount = page_mapcount(page);
				1680	if (!mapcount)
				1681	goto out;
				1682	cond_resched();
				1683
				1684	max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
				1685	if (max_nl_cursor == 0)
				1686	max_nl_cursor = CLUSTER_SIZE;
				1687
				1688	do {
				1689	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
				1690	shared.vm_set.list) {
				1691	cursor = (unsigned long) vma->vm_private_data;
				1692	while ( cursor < max_nl_cursor &&
				1693	cursor < vma->vm_end - vma->vm_start) {
				1694	if (try_to_unmap_cluster(cursor, &mapcount,
				1695	vma, page) == SWAP_MLOCK)
				1696	ret = SWAP_MLOCK;
				1697	cursor += CLUSTER_SIZE;
				1698	vma->vm_private_data = (void *) cursor;
				1699	if ((int)mapcount <= 0)
				1700	goto out;
				1701	}
				1702	vma->vm_private_data = (void *) max_nl_cursor;
				1703	}
				1704	cond_resched();
				1705	max_nl_cursor += CLUSTER_SIZE;
				1706	} while (max_nl_cursor <= max_nl_size);
				1707
				1708	/*
				1709	* Don't loop forever (perhaps all the remaining pages are
				1710	* in locked vmas). Reset cursor on all unreserved nonlinear
				1711	* vmas, now forgetting on which ones it had fallen behind.
				1712	*/
				1713	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
				1714	vma->vm_private_data = NULL;
				1715	out:
				1716	mutex_unlock(&mapping->i_mmap_mutex);
				1717	return ret;
				1718	}
				1719
				1720	/**
				1721	* try_to_unmap - try to remove all page table mappings to a page
				1722	* @page: the page to get unmapped
				1723	* @flags: action and flags
				1724	*
				1725	* Tries to remove all the page table entries which are mapping this
				1726	* page, used in the pageout path. Caller must hold the page lock.
				1727	* Return values are:
				1728	*
				1729	* SWAP_SUCCESS - we succeeded in removing all mappings
				1730	* SWAP_AGAIN - we missed a mapping, try again later
				1731	* SWAP_FAIL - the page is unswappable
				1732	* SWAP_MLOCK - page is mlocked.
				1733	*/
				1734	int try_to_unmap(struct page *page, enum ttu_flags flags)
				1735	{
				1736	int ret;
				1737
				1738	BUG_ON(!PageLocked(page));
				1739	VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
				1740
				1741	if (unlikely(PageKsm(page)))
				1742	ret = try_to_unmap_ksm(page, flags);
				1743	else if (PageAnon(page))
				1744	ret = try_to_unmap_anon(page, flags);
				1745	else
				1746	ret = try_to_unmap_file(page, flags);
				1747	if (ret != SWAP_MLOCK && !page_mapped(page))
				1748	ret = SWAP_SUCCESS;
				1749	return ret;
				1750	}
				1751
				1752	/**
				1753	* try_to_munlock - try to munlock a page
				1754	* @page: the page to be munlocked
				1755	*
				1756	* Called from munlock code. Checks all of the VMAs mapping the page
				1757	* to make sure nobody else has this page mlocked. The page will be
				1758	* returned with PG_mlocked cleared if no other vmas have it mlocked.
				1759	*
				1760	* Return values are:
				1761	*
				1762	* SWAP_AGAIN - no vma is holding page mlocked, or,
				1763	* SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
				1764	* SWAP_FAIL - page cannot be located at present
				1765	* SWAP_MLOCK - page is now mlocked.
				1766	*/
				1767	int try_to_munlock(struct page *page)
				1768	{
				1769	VM_BUG_ON(!PageLocked(page) \|\| PageLRU(page));
				1770
				1771	if (unlikely(PageKsm(page)))
				1772	return try_to_unmap_ksm(page, TTU_MUNLOCK);
				1773	else if (PageAnon(page))
				1774	return try_to_unmap_anon(page, TTU_MUNLOCK);
				1775	else
				1776	return try_to_unmap_file(page, TTU_MUNLOCK);
				1777	}
				1778
				1779	void __put_anon_vma(struct anon_vma *anon_vma)
				1780	{
				1781	struct anon_vma *root = anon_vma->root;
				1782
				1783	anon_vma_free(anon_vma);
				1784	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
				1785	anon_vma_free(root);
				1786	}
				1787
				1788	#ifdef CONFIG_MIGRATION
				1789	/*
				1790	* rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
				1791	* Called by migrate.c to remove migration ptes, but might be used more later.
				1792	*/
				1793	static int rmap_walk_anon(struct page page, int (rmap_one)(struct page *,
				1794	struct vm_area_struct , unsigned long, void ), void *arg)
				1795	{
				1796	struct anon_vma *anon_vma;
				1797	struct anon_vma_chain *avc;
				1798	int ret = SWAP_AGAIN;
				1799
				1800	/*
				1801	* Note: remove_migration_ptes() cannot use page_lock_anon_vma()
				1802	* because that depends on page_mapped(); but not all its usages
				1803	* are holding mmap_sem. Users without mmap_sem are required to
				1804	* take a reference count to prevent the anon_vma disappearing
				1805	*/
				1806	anon_vma = page_anon_vma(page);
				1807	if (!anon_vma)
				1808	return ret;
				1809	anon_vma_lock(anon_vma);
				1810	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
				1811	struct vm_area_struct *vma = avc->vma;
				1812	unsigned long address = vma_address(page, vma);
				1813	if (address == -EFAULT)
				1814	continue;
				1815	ret = rmap_one(page, vma, address, arg);
				1816	if (ret != SWAP_AGAIN)
				1817	break;
				1818	}
				1819	anon_vma_unlock(anon_vma);
				1820	return ret;
				1821	}
				1822
				1823	static int rmap_walk_file(struct page page, int (rmap_one)(struct page *,
				1824	struct vm_area_struct , unsigned long, void ), void *arg)
				1825	{
				1826	struct address_space *mapping = page->mapping;
				1827	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
				1828	struct vm_area_struct *vma;
				1829	struct prio_tree_iter iter;
				1830	int ret = SWAP_AGAIN;
				1831
				1832	if (!mapping)
				1833	return ret;
				1834	mutex_lock(&mapping->i_mmap_mutex);
				1835	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
				1836	unsigned long address = vma_address(page, vma);
				1837	if (address == -EFAULT)
				1838	continue;
				1839	ret = rmap_one(page, vma, address, arg);
				1840	if (ret != SWAP_AGAIN)
				1841	break;
				1842	}
				1843	/*
				1844	* No nonlinear handling: being always shared, nonlinear vmas
				1845	* never contain migration ptes. Decide what to do about this
				1846	* limitation to linear when we need rmap_walk() on nonlinear.
				1847	*/
				1848	mutex_unlock(&mapping->i_mmap_mutex);
				1849	return ret;
				1850	}
				1851
				1852	int rmap_walk(struct page page, int (rmap_one)(struct page *,
				1853	struct vm_area_struct , unsigned long, void ), void *arg)
				1854	{
				1855	VM_BUG_ON(!PageLocked(page));
				1856
				1857	if (unlikely(PageKsm(page)))
				1858	return rmap_walk_ksm(page, rmap_one, arg);
				1859	else if (PageAnon(page))
				1860	return rmap_walk_anon(page, rmap_one, arg);
				1861	else
				1862	return rmap_walk_file(page, rmap_one, arg);
				1863	}
				1864	#endif /* CONFIG_MIGRATION */
				1865
				1866	#ifdef CONFIG_HUGETLB_PAGE
				1867	/*
				1868	* The following three functions are for anonymous (private mapped) hugepages.
				1869	* Unlike common anonymous pages, anonymous hugepages have no accounting code
				1870	* and no lru code, because we handle hugepages differently from common pages.
				1871	*/
				1872	static void __hugepage_set_anon_rmap(struct page *page,
				1873	struct vm_area_struct *vma, unsigned long address, int exclusive)
				1874	{
				1875	struct anon_vma *anon_vma = vma->anon_vma;
				1876
				1877	BUG_ON(!anon_vma);
				1878
				1879	if (PageAnon(page))
				1880	return;
				1881	if (!exclusive)
				1882	anon_vma = anon_vma->root;
				1883
				1884	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
				1885	page->mapping = (struct address_space *) anon_vma;
				1886	page->index = linear_page_index(vma, address);
				1887	}
				1888
				1889	void hugepage_add_anon_rmap(struct page *page,
				1890	struct vm_area_struct *vma, unsigned long address)
				1891	{
				1892	struct anon_vma *anon_vma = vma->anon_vma;
				1893	int first;
				1894
				1895	BUG_ON(!PageLocked(page));
				1896	BUG_ON(!anon_vma);
				1897	/* address might be in next vma when migration races vma_adjust */
				1898	first = atomic_inc_and_test(&page->_mapcount);
				1899	if (first)
				1900	__hugepage_set_anon_rmap(page, vma, address, 0);
				1901	}
				1902
				1903	void hugepage_add_new_anon_rmap(struct page *page,
				1904	struct vm_area_struct *vma, unsigned long address)
				1905	{
				1906	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
				1907	atomic_set(&page->_mapcount, 0);
				1908	__hugepage_set_anon_rmap(page, vma, address, 1);
				1909	}
				1910	#endif /* CONFIG_HUGETLB_PAGE */