Blame - ap/os/linux/linux-3.4.x/mm/shmem.c - T106_DC

blob: 4cb186474353579fd73ff7f0f5c6d7f06facf11e [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1	/*
				2	* Resizable virtual memory filesystem for Linux.
				3	*
				4	* Copyright (C) 2000 Linus Torvalds.
				5	* 2000 Transmeta Corp.
				6	* 2000-2001 Christoph Rohland
				7	* 2000-2001 SAP AG
				8	* 2002 Red Hat Inc.
				9	* Copyright (C) 2002-2011 Hugh Dickins.
				10	* Copyright (C) 2011 Google Inc.
				11	* Copyright (C) 2002-2005 VERITAS Software Corporation.
				12	* Copyright (C) 2004 Andi Kleen, SuSE Labs
				13	*
				14	* Extended attribute support for tmpfs:
				15	* Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
				16	* Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
				17	*
				18	* tiny-shmem:
				19	* Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com>
				20	*
				21	* This file is released under the GPL.
				22	*/
				23
				24	#include <linux/fs.h>
				25	#include <linux/init.h>
				26	#include <linux/vfs.h>
				27	#include <linux/mount.h>
				28	#include <linux/pagemap.h>
				29	#include <linux/file.h>
				30	#include <linux/mm.h>
				31	#include <linux/export.h>
				32	#include <linux/swap.h>
				33
				34	static struct vfsmount *shm_mnt;
				35
				36	#ifdef CONFIG_SHMEM
				37	/*
				38	* This virtual memory filesystem is heavily based on the ramfs. It
				39	* extends ramfs by the ability to use swap and honor resource limits
				40	* which makes it a completely usable filesystem.
				41	*/
				42
				43	#include <linux/xattr.h>
				44	#include <linux/exportfs.h>
				45	#include <linux/posix_acl.h>
				46	#include <linux/generic_acl.h>
				47	#include <linux/mman.h>
				48	#include <linux/string.h>
				49	#include <linux/slab.h>
				50	#include <linux/backing-dev.h>
				51	#include <linux/shmem_fs.h>
				52	#include <linux/writeback.h>
				53	#include <linux/blkdev.h>
				54	#include <linux/pagevec.h>
				55	#include <linux/percpu_counter.h>
				56	#include <linux/splice.h>
				57	#include <linux/security.h>
				58	#include <linux/swapops.h>
				59	#include <linux/mempolicy.h>
				60	#include <linux/namei.h>
				61	#include <linux/ctype.h>
				62	#include <linux/migrate.h>
				63	#include <linux/highmem.h>
				64	#include <linux/seq_file.h>
				65	#include <linux/magic.h>
				66
				67	#include <asm/uaccess.h>
				68	#include <asm/pgtable.h>
				69
				70	#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
				71	#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
				72
				73	/* Pretend that each entry is of this size in directory's i_size */
				74	#define BOGO_DIRENT_SIZE 20
				75
				76	/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
				77	#define SHORT_SYMLINK_LEN 128
				78
				79	/*
				80	* vmtruncate_range() communicates with shmem_fault via
				81	* inode->i_private (with i_mutex making sure that it has only one user at
				82	* a time): we would prefer not to enlarge the shmem inode just for that.
				83	*/
				84	struct shmem_falloc {
				85	wait_queue_head_t waitq; / faults into hole wait for punch to end */
				86	pgoff_t start; /* start of range currently being fallocated */
				87	pgoff_t next; /* the next page offset to be fallocated */
				88	};
				89
				90	struct shmem_xattr {
				91	struct list_head list; /* anchored by shmem_inode_info->xattr_list */
				92	char name; / xattr name */
				93	size_t size;
				94	char value[0];
				95	};
				96
				97	/* Flag allocation requirements to shmem_getpage */
				98	enum sgp_type {
				99	SGP_READ, /* don't exceed i_size, don't allocate page */
				100	SGP_CACHE, /* don't exceed i_size, may allocate page */
				101	SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
				102	SGP_WRITE, /* may exceed i_size, may allocate page */
				103	};
				104
				105	#ifdef CONFIG_TMPFS
				106	static unsigned long shmem_default_max_blocks(void)
				107	{
				108	return totalram_pages / 2;
				109	}
				110
				111	static unsigned long shmem_default_max_inodes(void)
				112	{
				113	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
				114	}
				115	#endif
				116
				117	static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
				118	struct page *pagep, enum sgp_type sgp, gfp_t gfp, int fault_type);
				119
				120	static inline int shmem_getpage(struct inode *inode, pgoff_t index,
				121	struct page *pagep, enum sgp_type sgp, int fault_type)
				122	{
				123	return shmem_getpage_gfp(inode, index, pagep, sgp,
				124	mapping_gfp_mask(inode->i_mapping), fault_type);
				125	}
				126
				127	static inline struct shmem_sb_info SHMEM_SB(struct super_block sb)
				128	{
				129	return sb->s_fs_info;
				130	}
				131
				132	/*
				133	* shmem_file_setup pre-accounts the whole fixed size of a VM object,
				134	* for shared memory and for shared anonymous (/dev/zero) mappings
				135	* (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1),
				136	* consistent with the pre-accounting of private mappings ...
				137	*/
				138	static inline int shmem_acct_size(unsigned long flags, loff_t size)
				139	{
				140	return (flags & VM_NORESERVE) ?
				141	0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
				142	}
				143
				144	static inline void shmem_unacct_size(unsigned long flags, loff_t size)
				145	{
				146	if (!(flags & VM_NORESERVE))
				147	vm_unacct_memory(VM_ACCT(size));
				148	}
				149
				150	/*
				151	* ... whereas tmpfs objects are accounted incrementally as
				152	* pages are allocated, in order to allow huge sparse files.
				153	* shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM,
				154	* so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM.
				155	*/
				156	static inline int shmem_acct_block(unsigned long flags)
				157	{
				158	return (flags & VM_NORESERVE) ?
				159	security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
				160	}
				161
				162	static inline void shmem_unacct_blocks(unsigned long flags, long pages)
				163	{
				164	if (flags & VM_NORESERVE)
				165	vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
				166	}
				167
				168	static const struct super_operations shmem_ops;
				169	static const struct address_space_operations shmem_aops;
				170	static const struct file_operations shmem_file_operations;
				171	static const struct inode_operations shmem_inode_operations;
				172	static const struct inode_operations shmem_dir_inode_operations;
				173	static const struct inode_operations shmem_special_inode_operations;
				174	static const struct vm_operations_struct shmem_vm_ops;
				175
				176	static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
				177	.ra_pages = 0, /* No readahead */
				178	.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK \| BDI_CAP_SWAP_BACKED,
				179	};
				180
				181	static LIST_HEAD(shmem_swaplist);
				182	static DEFINE_MUTEX(shmem_swaplist_mutex);
				183
				184	static int shmem_reserve_inode(struct super_block *sb)
				185	{
				186	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
				187	if (sbinfo->max_inodes) {
				188	spin_lock(&sbinfo->stat_lock);
				189	if (!sbinfo->free_inodes) {
				190	spin_unlock(&sbinfo->stat_lock);
				191	return -ENOSPC;
				192	}
				193	sbinfo->free_inodes--;
				194	spin_unlock(&sbinfo->stat_lock);
				195	}
				196	return 0;
				197	}
				198
				199	static void shmem_free_inode(struct super_block *sb)
				200	{
				201	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
				202	if (sbinfo->max_inodes) {
				203	spin_lock(&sbinfo->stat_lock);
				204	sbinfo->free_inodes++;
				205	spin_unlock(&sbinfo->stat_lock);
				206	}
				207	}
				208
				209	/**
				210	* shmem_recalc_inode - recalculate the block usage of an inode
				211	* @inode: inode to recalc
				212	*
				213	* We have to calculate the free blocks since the mm can drop
				214	* undirtied hole pages behind our back.
				215	*
				216	* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
				217	* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
				218	*
				219	* It has to be called with the spinlock held.
				220	*/
				221	static void shmem_recalc_inode(struct inode *inode)
				222	{
				223	struct shmem_inode_info *info = SHMEM_I(inode);
				224	long freed;
				225
				226	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
				227	if (freed > 0) {
				228	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
				229	if (sbinfo->max_blocks)
				230	percpu_counter_add(&sbinfo->used_blocks, -freed);
				231	info->alloced -= freed;
				232	inode->i_blocks -= freed * BLOCKS_PER_PAGE;
				233	shmem_unacct_blocks(info->flags, freed);
				234	}
				235	}
				236
				237	/*
				238	* Replace item expected in radix tree by a new item, while holding tree lock.
				239	*/
				240	static int shmem_radix_tree_replace(struct address_space *mapping,
				241	pgoff_t index, void expected, void replacement)
				242	{
				243	void **pslot;
				244	void *item = NULL;
				245
				246	VM_BUG_ON(!expected);
				247	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
				248	if (pslot)
				249	item = radix_tree_deref_slot_protected(pslot,
				250	&mapping->tree_lock);
				251	if (item != expected)
				252	return -ENOENT;
				253	if (replacement)
				254	radix_tree_replace_slot(pslot, replacement);
				255	else
				256	radix_tree_delete(&mapping->page_tree, index);
				257	return 0;
				258	}
				259
				260	/*
				261	* Like add_to_page_cache_locked, but error if expected item has gone.
				262	*/
				263	static int shmem_add_to_page_cache(struct page *page,
				264	struct address_space *mapping,
				265	pgoff_t index, gfp_t gfp, void *expected)
				266	{
				267	int error = 0;
				268
				269	VM_BUG_ON(!PageLocked(page));
				270	VM_BUG_ON(!PageSwapBacked(page));
				271
				272	if (!expected)
				273	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
				274	if (!error) {
				275	page_cache_get(page);
				276	page->mapping = mapping;
				277	page->index = index;
				278
				279	spin_lock_irq(&mapping->tree_lock);
				280	if (!expected)
				281	error = radix_tree_insert(&mapping->page_tree,
				282	index, page);
				283	else
				284	error = shmem_radix_tree_replace(mapping, index,
				285	expected, page);
				286	if (!error) {
				287	mapping->nrpages++;
				288	__inc_zone_page_state(page, NR_FILE_PAGES);
				289	#ifdef CONFIG_LIMIT_PAGE_CACHE
				290	__inc_zone_page_state(page, NR_TMPFS_PAGES);
				291	#endif
				292	__inc_zone_page_state(page, NR_SHMEM);
				293	spin_unlock_irq(&mapping->tree_lock);
				294	} else {
				295	page->mapping = NULL;
				296	spin_unlock_irq(&mapping->tree_lock);
				297	page_cache_release(page);
				298	}
				299	if (!expected)
				300	radix_tree_preload_end();
				301	}
				302	if (error)
				303	mem_cgroup_uncharge_cache_page(page);
				304	return error;
				305	}
				306
				307	/*
				308	* Like delete_from_page_cache, but substitutes swap for page.
				309	*/
				310	static void shmem_delete_from_page_cache(struct page page, void radswap)
				311	{
				312	struct address_space *mapping = page->mapping;
				313	int error;
				314	unsigned long flags;
				315
				316	spin_lock_irq(&mapping->tree_lock);
				317	local_irq_save(flags);
				318	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
				319	page->mapping = NULL;
				320	mapping->nrpages--;
				321	__dec_zone_page_state(page, NR_FILE_PAGES);
				322	#ifdef CONFIG_LIMIT_PAGE_CACHE
				323	__dec_zone_page_state(page, NR_TMPFS_PAGES);
				324	#endif
				325	__dec_zone_page_state(page, NR_SHMEM);
				326	local_irq_restore(flags);
				327	spin_unlock_irq(&mapping->tree_lock);
				328	page_cache_release(page);
				329	BUG_ON(error);
				330	}
				331
				332	/*
				333	* Like find_get_pages, but collecting swap entries as well as pages.
				334	*/
				335	static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
				336	pgoff_t start, unsigned int nr_pages,
				337	struct page *pages, pgoff_t indices)
				338	{
				339	unsigned int i;
				340	unsigned int ret;
				341	unsigned int nr_found;
				342
				343	rcu_read_lock();
				344	restart:
				345	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
				346	(void ***)pages, indices, start, nr_pages);
				347	ret = 0;
				348	for (i = 0; i < nr_found; i++) {
				349	struct page *page;
				350	repeat:
				351	page = radix_tree_deref_slot((void **)pages[i]);
				352	if (unlikely(!page))
				353	continue;
				354	if (radix_tree_exception(page)) {
				355	if (radix_tree_deref_retry(page))
				356	goto restart;
				357	/*
				358	* Otherwise, we must be storing a swap entry
				359	* here as an exceptional entry: so return it
				360	* without attempting to raise page count.
				361	*/
				362	goto export;
				363	}
				364	if (!page_cache_get_speculative(page))
				365	goto repeat;
				366
				367	/* Has the page moved? */
				368	if (unlikely(page != ((void *)pages[i]))) {
				369	page_cache_release(page);
				370	goto repeat;
				371	}
				372	export:
				373	indices[ret] = indices[i];
				374	pages[ret] = page;
				375	ret++;
				376	}
				377	if (unlikely(!ret && nr_found))
				378	goto restart;
				379	rcu_read_unlock();
				380	return ret;
				381	}
				382
				383	/*
				384	* Remove swap entry from radix tree, free the swap and its page cache.
				385	*/
				386	static int shmem_free_swap(struct address_space *mapping,
				387	pgoff_t index, void *radswap)
				388	{
				389	int error;
				390
				391	spin_lock_irq(&mapping->tree_lock);
				392	error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
				393	spin_unlock_irq(&mapping->tree_lock);
				394	if (!error)
				395	free_swap_and_cache(radix_to_swp_entry(radswap));
				396	return error;
				397	}
				398
				399	/*
				400	* Pagevec may contain swap entries, so shuffle up pages before releasing.
				401	*/
				402	static void shmem_deswap_pagevec(struct pagevec *pvec)
				403	{
				404	int i, j;
				405
				406	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
				407	struct page *page = pvec->pages[i];
				408	if (!radix_tree_exceptional_entry(page))
				409	pvec->pages[j++] = page;
				410	}
				411	pvec->nr = j;
				412	}
				413
				414	/*
				415	* SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
				416	*/
				417	void shmem_unlock_mapping(struct address_space *mapping)
				418	{
				419	struct pagevec pvec;
				420	pgoff_t indices[PAGEVEC_SIZE];
				421	pgoff_t index = 0;
				422
				423	pagevec_init(&pvec, 0);
				424	/*
				425	* Minor point, but we might as well stop if someone else SHM_LOCKs it.
				426	*/
				427	while (!mapping_unevictable(mapping)) {
				428	/*
				429	* Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
				430	* has finished, if it hits a row of PAGEVEC_SIZE swap entries.
				431	*/
				432	pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
				433	PAGEVEC_SIZE, pvec.pages, indices);
				434	if (!pvec.nr)
				435	break;
				436	index = indices[pvec.nr - 1] + 1;
				437	shmem_deswap_pagevec(&pvec);
				438	check_move_unevictable_pages(pvec.pages, pvec.nr);
				439	pagevec_release(&pvec);
				440	cond_resched();
				441	}
				442	}
				443
				444	/*
				445	* Remove range of pages and swap entries from radix tree, and free them.
				446	*/
				447	void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
				448	{
				449	struct address_space *mapping = inode->i_mapping;
				450	struct shmem_inode_info *info = SHMEM_I(inode);
				451	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
				452	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
				453	pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
				454	struct pagevec pvec;
				455	pgoff_t indices[PAGEVEC_SIZE];
				456	long nr_swaps_freed = 0;
				457	pgoff_t index;
				458	int i;
				459
				460	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
				461
				462	pagevec_init(&pvec, 0);
				463	index = start;
				464	while (index <= end) {
				465	pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
				466	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
				467	pvec.pages, indices);
				468	if (!pvec.nr)
				469	break;
				470	mem_cgroup_uncharge_start();
				471	for (i = 0; i < pagevec_count(&pvec); i++) {
				472	struct page *page = pvec.pages[i];
				473
				474	index = indices[i];
				475	if (index > end)
				476	break;
				477
				478	if (radix_tree_exceptional_entry(page)) {
				479	nr_swaps_freed += !shmem_free_swap(mapping,
				480	index, page);
				481	continue;
				482	}
				483
				484	if (!trylock_page(page))
				485	continue;
				486	if (page->mapping == mapping) {
				487	VM_BUG_ON(PageWriteback(page));
				488	truncate_inode_page(mapping, page);
				489	}
				490	unlock_page(page);
				491	}
				492	shmem_deswap_pagevec(&pvec);
				493	pagevec_release(&pvec);
				494	mem_cgroup_uncharge_end();
				495	cond_resched();
				496	index++;
				497	}
				498
				499	if (partial) {
				500	struct page *page = NULL;
				501	shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
				502	if (page) {
				503	zero_user_segment(page, partial, PAGE_CACHE_SIZE);
				504	set_page_dirty(page);
				505	unlock_page(page);
				506	page_cache_release(page);
				507	}
				508	}
				509
				510	index = start;
				511	while (index <= end) {
				512	cond_resched();
				513	pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
				514	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
				515	pvec.pages, indices);
				516	if (!pvec.nr) {
				517	/* If all gone or hole-punch, we're done */
				518	if (index == start \|\| end != -1)
				519	break;
				520	/* But if truncating, restart to make sure all gone */
				521	index = start;
				522	continue;
				523	}
				524	mem_cgroup_uncharge_start();
				525	for (i = 0; i < pagevec_count(&pvec); i++) {
				526	struct page *page = pvec.pages[i];
				527
				528	index = indices[i];
				529	if (index > end)
				530	break;
				531
				532	if (radix_tree_exceptional_entry(page)) {
				533	if (shmem_free_swap(mapping, index, page)) {
				534	/* Swap was replaced by page: retry */
				535	index--;
				536	break;
				537	}
				538	nr_swaps_freed++;
				539	continue;
				540	}
				541
				542	lock_page(page);
				543	if (page->mapping == mapping) {
				544	VM_BUG_ON(PageWriteback(page));
				545	truncate_inode_page(mapping, page);
				546	} else {
				547	/* Page was replaced by swap: retry */
				548	unlock_page(page);
				549	index--;
				550	break;
				551	}
				552	unlock_page(page);
				553	}
				554	shmem_deswap_pagevec(&pvec);
				555	pagevec_release(&pvec);
				556	mem_cgroup_uncharge_end();
				557	index++;
				558	}
				559
				560	spin_lock(&info->lock);
				561	info->swapped -= nr_swaps_freed;
				562	shmem_recalc_inode(inode);
				563	spin_unlock(&info->lock);
				564
				565	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				566	}
				567	EXPORT_SYMBOL_GPL(shmem_truncate_range);
				568
				569	static int shmem_setattr(struct dentry dentry, struct iattr attr)
				570	{
				571	struct inode *inode = dentry->d_inode;
				572	int error;
				573
				574	error = inode_change_ok(inode, attr);
				575	if (error)
				576	return error;
				577
				578	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
				579	loff_t oldsize = inode->i_size;
				580	loff_t newsize = attr->ia_size;
				581
				582	if (newsize != oldsize) {
				583	i_size_write(inode, newsize);
				584	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				585	}
				586	if (newsize < oldsize) {
				587	loff_t holebegin = round_up(newsize, PAGE_SIZE);
				588	unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
				589	shmem_truncate_range(inode, newsize, (loff_t)-1);
				590	/* unmap again to remove racily COWed private pages */
				591	unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
				592	}
				593	}
				594
				595	setattr_copy(inode, attr);
				596	#ifdef CONFIG_TMPFS_POSIX_ACL
				597	if (attr->ia_valid & ATTR_MODE)
				598	error = generic_acl_chmod(inode);
				599	#endif
				600	return error;
				601	}
				602
				603	static void shmem_evict_inode(struct inode *inode)
				604	{
				605	struct shmem_inode_info *info = SHMEM_I(inode);
				606	struct shmem_xattr xattr, nxattr;
				607
				608	if (inode->i_mapping->a_ops == &shmem_aops) {
				609	shmem_unacct_size(info->flags, inode->i_size);
				610	inode->i_size = 0;
				611	shmem_truncate_range(inode, 0, (loff_t)-1);
				612	if (!list_empty(&info->swaplist)) {
				613	mutex_lock(&shmem_swaplist_mutex);
				614	list_del_init(&info->swaplist);
				615	mutex_unlock(&shmem_swaplist_mutex);
				616	}
				617	} else
				618	kfree(info->symlink);
				619
				620	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
				621	kfree(xattr->name);
				622	kfree(xattr);
				623	}
				624	WARN_ON(inode->i_blocks);
				625	shmem_free_inode(inode->i_sb);
				626	end_writeback(inode);
				627	}
				628
				629	/*
				630	* If swap found in inode, free it and move page from swapcache to filecache.
				631	*/
				632	static int shmem_unuse_inode(struct shmem_inode_info *info,
				633	swp_entry_t swap, struct page *page)
				634	{
				635	struct address_space *mapping = info->vfs_inode.i_mapping;
				636	void *radswap;
				637	pgoff_t index;
				638	int error;
				639
				640	radswap = swp_to_radix_entry(swap);
				641	index = radix_tree_locate_item(&mapping->page_tree, radswap);
				642	if (index == -1)
				643	return 0;
				644
				645	/*
				646	* Move _head_ to start search for next from here.
				647	* But be careful: shmem_evict_inode checks list_empty without taking
				648	* mutex, and there's an instant in list_move_tail when info->swaplist
				649	* would appear empty, if it were the only one on shmem_swaplist.
				650	*/
				651	if (shmem_swaplist.next != &info->swaplist)
				652	list_move_tail(&shmem_swaplist, &info->swaplist);
				653
				654	/*
				655	* We rely on shmem_swaplist_mutex, not only to protect the swaplist,
				656	* but also to hold up shmem_evict_inode(): so inode cannot be freed
				657	* beneath us (pagelock doesn't help until the page is in pagecache).
				658	*/
				659	error = shmem_add_to_page_cache(page, mapping, index,
				660	GFP_NOWAIT, radswap);
				661	/* which does mem_cgroup_uncharge_cache_page on error */
				662
				663	if (error != -ENOMEM) {
				664	/*
				665	* Truncation and eviction use free_swap_and_cache(), which
				666	* only does trylock page: if we raced, best clean up here.
				667	*/
				668	delete_from_swap_cache(page);
				669	set_page_dirty(page);
				670	if (!error) {
				671	spin_lock(&info->lock);
				672	info->swapped--;
				673	spin_unlock(&info->lock);
				674	swap_free(swap);
				675	}
				676	error = 1; /* not an error, but entry was found */
				677	}
				678	return error;
				679	}
				680
				681	/*
				682	* Search through swapped inodes to find and replace swap by page.
				683	*/
				684	int shmem_unuse(swp_entry_t swap, struct page *page)
				685	{
				686	struct list_head this, next;
				687	struct shmem_inode_info *info;
				688	int found = 0;
				689	int error;
				690
				691	/*
				692	* Charge page using GFP_KERNEL while we can wait, before taking
				693	* the shmem_swaplist_mutex which might hold up shmem_writepage().
				694	* Charged back to the user (not to caller) when swap account is used.
				695	*/
				696	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
				697	if (error)
				698	goto out;
				699	/* No radix_tree_preload: swap entry keeps a place for page in tree */
				700
				701	mutex_lock(&shmem_swaplist_mutex);
				702	list_for_each_safe(this, next, &shmem_swaplist) {
				703	info = list_entry(this, struct shmem_inode_info, swaplist);
				704	if (info->swapped)
				705	found = shmem_unuse_inode(info, swap, page);
				706	else
				707	list_del_init(&info->swaplist);
				708	cond_resched();
				709	if (found)
				710	break;
				711	}
				712	mutex_unlock(&shmem_swaplist_mutex);
				713
				714	if (!found)
				715	mem_cgroup_uncharge_cache_page(page);
				716	if (found < 0)
				717	error = found;
				718	out:
				719	unlock_page(page);
				720	page_cache_release(page);
				721	return error;
				722	}
				723
				724	/*
				725	* Move the page from the page cache to the swap cache.
				726	*/
				727	static int shmem_writepage(struct page page, struct writeback_control wbc)
				728	{
				729	struct shmem_inode_info *info;
				730	struct address_space *mapping;
				731	struct inode *inode;
				732	swp_entry_t swap;
				733	pgoff_t index;
				734
				735	BUG_ON(!PageLocked(page));
				736	mapping = page->mapping;
				737	index = page->index;
				738	inode = mapping->host;
				739	info = SHMEM_I(inode);
				740	if (info->flags & VM_LOCKED)
				741	goto redirty;
				742	if (!total_swap_pages)
				743	goto redirty;
				744
				745	/*
				746	* shmem_backing_dev_info's capabilities prevent regular writeback or
				747	* sync from ever calling shmem_writepage; but a stacking filesystem
				748	* might use ->writepage of its underlying filesystem, in which case
				749	* tmpfs should write out to swap only in response to memory pressure,
				750	* and not for the writeback threads or sync.
				751	*/
				752	if (!wbc->for_reclaim) {
				753	WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
				754	goto redirty;
				755	}
				756	swap = get_swap_page();
				757	if (!swap.val)
				758	goto redirty;
				759
				760	/*
				761	* Add inode to shmem_unuse()'s list of swapped-out inodes,
				762	* if it's not already there. Do it now before the page is
				763	* moved to swap cache, when its pagelock no longer protects
				764	* the inode from eviction. But don't unlock the mutex until
				765	* we've incremented swapped, because shmem_unuse_inode() will
				766	* prune a !swapped inode from the swaplist under this mutex.
				767	*/
				768	mutex_lock(&shmem_swaplist_mutex);
				769	if (list_empty(&info->swaplist))
				770	list_add_tail(&info->swaplist, &shmem_swaplist);
				771
				772	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
				773	swap_shmem_alloc(swap);
				774	shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
				775
				776	spin_lock(&info->lock);
				777	info->swapped++;
				778	shmem_recalc_inode(inode);
				779	spin_unlock(&info->lock);
				780
				781	mutex_unlock(&shmem_swaplist_mutex);
				782	BUG_ON(page_mapped(page));
				783	swap_writepage(page, wbc);
				784	return 0;
				785	}
				786
				787	mutex_unlock(&shmem_swaplist_mutex);
				788	swapcache_free(swap, NULL);
				789	redirty:
				790	set_page_dirty(page);
				791	if (wbc->for_reclaim)
				792	return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
				793	unlock_page(page);
				794	return 0;
				795	}
				796
				797	#ifdef CONFIG_NUMA
				798	#ifdef CONFIG_TMPFS
				799	static void shmem_show_mpol(struct seq_file seq, struct mempolicy mpol)
				800	{
				801	char buffer[64];
				802
				803	if (!mpol \|\| mpol->mode == MPOL_DEFAULT)
				804	return; /* show nothing */
				805
				806	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
				807
				808	seq_printf(seq, ",mpol=%s", buffer);
				809	}
				810
				811	static struct mempolicy shmem_get_sbmpol(struct shmem_sb_info sbinfo)
				812	{
				813	struct mempolicy *mpol = NULL;
				814	if (sbinfo->mpol) {
				815	spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
				816	mpol = sbinfo->mpol;
				817	mpol_get(mpol);
				818	spin_unlock(&sbinfo->stat_lock);
				819	}
				820	return mpol;
				821	}
				822	#endif /* CONFIG_TMPFS */
				823
				824	static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
				825	struct shmem_inode_info *info, pgoff_t index)
				826	{
				827	struct vm_area_struct pvma;
				828	struct page *page;
				829
				830	/* Create a pseudo vma that just contains the policy */
				831	pvma.vm_start = 0;
				832	pvma.vm_pgoff = index;
				833	pvma.vm_ops = NULL;
				834	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
				835
				836	page = swapin_readahead(swap, gfp, &pvma, 0);
				837
				838	/* Drop reference taken by mpol_shared_policy_lookup() */
				839	mpol_cond_put(pvma.vm_policy);
				840
				841	return page;
				842	}
				843
				844	static struct page *shmem_alloc_page(gfp_t gfp,
				845	struct shmem_inode_info *info, pgoff_t index)
				846	{
				847	struct vm_area_struct pvma;
				848	struct page *page;
				849
				850	/* Create a pseudo vma that just contains the policy */
				851	pvma.vm_start = 0;
				852	pvma.vm_pgoff = index;
				853	pvma.vm_ops = NULL;
				854	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
				855
				856	page = alloc_page_vma(gfp, &pvma, 0);
				857
				858	/* Drop reference taken by mpol_shared_policy_lookup() */
				859	mpol_cond_put(pvma.vm_policy);
				860
				861	return page;
				862	}
				863	#else /* !CONFIG_NUMA */
				864	#ifdef CONFIG_TMPFS
				865	static inline void shmem_show_mpol(struct seq_file seq, struct mempolicy mpol)
				866	{
				867	}
				868	#endif /* CONFIG_TMPFS */
				869
				870	static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
				871	struct shmem_inode_info *info, pgoff_t index)
				872	{
				873	return swapin_readahead(swap, gfp, NULL, 0);
				874	}
				875
				876	static inline struct page *shmem_alloc_page(gfp_t gfp,
				877	struct shmem_inode_info *info, pgoff_t index)
				878	{
				879	return alloc_page(gfp);
				880	}
				881	#endif /* CONFIG_NUMA */
				882
				883	#if !defined(CONFIG_NUMA) \|\| !defined(CONFIG_TMPFS)
				884	static inline struct mempolicy shmem_get_sbmpol(struct shmem_sb_info sbinfo)
				885	{
				886	return NULL;
				887	}
				888	#endif
				889
				890	/*
				891	* shmem_getpage_gfp - find page in cache, or get from swap, or allocate
				892	*
				893	* If we allocate a new one we do not mark it dirty. That's up to the
				894	* vm. If we swap it in we mark it dirty since we also free the swap
				895	* entry since a page cannot live in both the swap and page cache
				896	*/
				897	static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
				898	struct page *pagep, enum sgp_type sgp, gfp_t gfp, int fault_type)
				899	{
				900	struct address_space *mapping = inode->i_mapping;
				901	struct shmem_inode_info *info;
				902	struct shmem_sb_info *sbinfo;
				903	struct page *page;
				904	swp_entry_t swap;
				905	int error;
				906	int once = 0;
				907
				908	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
				909	return -EFBIG;
				910	repeat:
				911	swap.val = 0;
				912	page = find_lock_page(mapping, index);
				913	if (radix_tree_exceptional_entry(page)) {
				914	swap = radix_to_swp_entry(page);
				915	page = NULL;
				916	}
				917
				918	if (sgp != SGP_WRITE &&
				919	((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
				920	error = -EINVAL;
				921	goto failed;
				922	}
				923
				924	if (page \|\| (sgp == SGP_READ && !swap.val)) {
				925	/*
				926	* Once we can get the page lock, it must be uptodate:
				927	* if there were an error in reading back from swap,
				928	* the page would not be inserted into the filecache.
				929	*/
				930	BUG_ON(page && !PageUptodate(page));
				931	*pagep = page;
				932	return 0;
				933	}
				934
				935	/*
				936	* Fast cache lookup did not find it:
				937	* bring it back from swap or allocate.
				938	*/
				939	info = SHMEM_I(inode);
				940	sbinfo = SHMEM_SB(inode->i_sb);
				941
				942	if (swap.val) {
				943	/* Look it up and read it in.. */
				944	page = lookup_swap_cache(swap);
				945	if (!page) {
				946	/* here we actually do the io */
				947	if (fault_type)
				948	*fault_type \|= VM_FAULT_MAJOR;
				949	page = shmem_swapin(swap, gfp, info, index);
				950	if (!page) {
				951	error = -ENOMEM;
				952	goto failed;
				953	}
				954	}
				955
				956	/* We have to do this with page locked to prevent races */
				957	lock_page(page);
				958	if (!PageUptodate(page)) {
				959	error = -EIO;
				960	goto failed;
				961	}
				962	wait_on_page_writeback(page);
				963
				964	/* Someone may have already done it for us */
				965	if (page->mapping) {
				966	if (page->mapping == mapping &&
				967	page->index == index)
				968	goto done;
				969	error = -EEXIST;
				970	goto failed;
				971	}
				972
				973	error = mem_cgroup_cache_charge(page, current->mm,
				974	gfp & GFP_RECLAIM_MASK);
				975	if (!error)
				976	error = shmem_add_to_page_cache(page, mapping, index,
				977	gfp, swp_to_radix_entry(swap));
				978	if (error)
				979	goto failed;
				980
				981	spin_lock(&info->lock);
				982	info->swapped--;
				983	shmem_recalc_inode(inode);
				984	spin_unlock(&info->lock);
				985
				986	delete_from_swap_cache(page);
				987	set_page_dirty(page);
				988	swap_free(swap);
				989
				990	} else {
				991	if (shmem_acct_block(info->flags)) {
				992	error = -ENOSPC;
				993	goto failed;
				994	}
				995	if (sbinfo->max_blocks) {
				996	if (percpu_counter_compare(&sbinfo->used_blocks,
				997	sbinfo->max_blocks) >= 0) {
				998	error = -ENOSPC;
				999	goto unacct;
				1000	}
				1001	percpu_counter_inc(&sbinfo->used_blocks);
				1002	}
				1003
				1004	page = shmem_alloc_page(gfp, info, index);
				1005	if (!page) {
				1006	error = -ENOMEM;
				1007	goto decused;
				1008	}
				1009
				1010	SetPageSwapBacked(page);
				1011	__set_page_locked(page);
				1012	error = mem_cgroup_cache_charge(page, current->mm,
				1013	gfp & GFP_RECLAIM_MASK);
				1014	if (!error)
				1015	error = shmem_add_to_page_cache(page, mapping, index,
				1016	gfp, NULL);
				1017	if (error)
				1018	goto decused;
				1019	lru_cache_add_anon(page);
				1020
				1021	spin_lock(&info->lock);
				1022	info->alloced++;
				1023	inode->i_blocks += BLOCKS_PER_PAGE;
				1024	shmem_recalc_inode(inode);
				1025	spin_unlock(&info->lock);
				1026
				1027	clear_highpage(page);
				1028	flush_dcache_page(page);
				1029	SetPageUptodate(page);
				1030	if (sgp == SGP_DIRTY)
				1031	set_page_dirty(page);
				1032	}
				1033	done:
				1034	/* Perhaps the file has been truncated since we checked */
				1035	if (sgp != SGP_WRITE &&
				1036	((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
				1037	error = -EINVAL;
				1038	goto trunc;
				1039	}
				1040	*pagep = page;
				1041	return 0;
				1042
				1043	/*
				1044	* Error recovery.
				1045	*/
				1046	trunc:
				1047	ClearPageDirty(page);
				1048	delete_from_page_cache(page);
				1049	spin_lock(&info->lock);
				1050	info->alloced--;
				1051	inode->i_blocks -= BLOCKS_PER_PAGE;
				1052	spin_unlock(&info->lock);
				1053	decused:
				1054	if (sbinfo->max_blocks)
				1055	percpu_counter_add(&sbinfo->used_blocks, -1);
				1056	unacct:
				1057	shmem_unacct_blocks(info->flags, 1);
				1058	failed:
				1059	if (swap.val && error != -EINVAL) {
				1060	struct page *test = find_get_page(mapping, index);
				1061	if (test && !radix_tree_exceptional_entry(test))
				1062	page_cache_release(test);
				1063	/* Have another try if the entry has changed */
				1064	if (test != swp_to_radix_entry(swap))
				1065	error = -EEXIST;
				1066	}
				1067	if (page) {
				1068	unlock_page(page);
				1069	page_cache_release(page);
				1070	}
				1071	if (error == -ENOSPC && !once++) {
				1072	info = SHMEM_I(inode);
				1073	spin_lock(&info->lock);
				1074	shmem_recalc_inode(inode);
				1075	spin_unlock(&info->lock);
				1076	goto repeat;
				1077	}
				1078	if (error == -EEXIST)
				1079	goto repeat;
				1080	return error;
				1081	}
				1082
				1083	static int shmem_fault(struct vm_area_struct vma, struct vm_fault vmf)
				1084	{
				1085	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
				1086	int error;
				1087	int ret = VM_FAULT_LOCKED;
				1088
				1089	/*
				1090	* Trinity finds that probing a hole which tmpfs is punching can
				1091	* prevent the hole-punch from ever completing: which in turn
				1092	* locks writers out with its hold on i_mutex. So refrain from
				1093	* faulting pages into the hole while it's being punched. Although
				1094	* shmem_truncate_range() does remove the additions, it may be unable to
				1095	* keep up, as each new page needs its own unmap_mapping_range() call,
				1096	* and the i_mmap tree grows ever slower to scan if new vmas are added.
				1097	*
				1098	* It does not matter if we sometimes reach this check just before the
				1099	* hole-punch begins, so that one fault then races with the punch:
				1100	* we just need to make racing faults a rare case.
				1101	*
				1102	* The implementation below would be much simpler if we just used a
				1103	* standard mutex or completion: but we cannot take i_mutex in fault,
				1104	* and bloating every shmem inode for this unlikely case would be sad.
				1105	*/
				1106	if (unlikely(inode->i_private)) {
				1107	struct shmem_falloc *shmem_falloc;
				1108
				1109	spin_lock(&inode->i_lock);
				1110	shmem_falloc = inode->i_private;
				1111	if (shmem_falloc &&
				1112	vmf->pgoff >= shmem_falloc->start &&
				1113	vmf->pgoff < shmem_falloc->next) {
				1114	wait_queue_head_t *shmem_falloc_waitq;
				1115	DEFINE_WAIT(shmem_fault_wait);
				1116
				1117	ret = VM_FAULT_NOPAGE;
				1118	if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
				1119	!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
				1120	/* It's polite to up mmap_sem if we can */
				1121	up_read(&vma->vm_mm->mmap_sem);
				1122	ret = VM_FAULT_RETRY;
				1123	}
				1124
				1125	shmem_falloc_waitq = shmem_falloc->waitq;
				1126	prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
				1127	TASK_UNINTERRUPTIBLE);
				1128	spin_unlock(&inode->i_lock);
				1129	schedule();
				1130
				1131	/*
				1132	* shmem_falloc_waitq points into the vmtruncate_range()
				1133	* stack of the hole-punching task: shmem_falloc_waitq
				1134	* is usually invalid by the time we reach here, but
				1135	* finish_wait() does not dereference it in that case;
				1136	* though i_lock needed lest racing with wake_up_all().
				1137	*/
				1138	spin_lock(&inode->i_lock);
				1139	finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
				1140	spin_unlock(&inode->i_lock);
				1141	return ret;
				1142	}
				1143	spin_unlock(&inode->i_lock);
				1144	}
				1145
				1146	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
				1147	if (error)
				1148	return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
				1149
				1150	if (ret & VM_FAULT_MAJOR) {
				1151	count_vm_event(PGMAJFAULT);
				1152	mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
				1153	}
				1154	return ret;
				1155	}
				1156
				1157	int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
				1158	{
				1159	/*
				1160	* If the underlying filesystem is not going to provide
				1161	* a way to truncate a range of blocks (punch a hole) -
				1162	* we should return failure right now.
				1163	* Only CONFIG_SHMEM shmem.c ever supported i_op->truncate_range().
				1164	*/
				1165	if (inode->i_op->truncate_range != shmem_truncate_range)
				1166	return -ENOSYS;
				1167
				1168	mutex_lock(&inode->i_mutex);
				1169	{
				1170	struct shmem_falloc shmem_falloc;
				1171	struct address_space *mapping = inode->i_mapping;
				1172	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
				1173	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
				1174	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
				1175
				1176	shmem_falloc.waitq = &shmem_falloc_waitq;
				1177	shmem_falloc.start = unmap_start >> PAGE_SHIFT;
				1178	shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
				1179	spin_lock(&inode->i_lock);
				1180	inode->i_private = &shmem_falloc;
				1181	spin_unlock(&inode->i_lock);
				1182
				1183	if ((u64)unmap_end > (u64)unmap_start)
				1184	unmap_mapping_range(mapping, unmap_start,
				1185	1 + unmap_end - unmap_start, 0);
				1186	shmem_truncate_range(inode, lstart, lend);
				1187	/* No need to unmap again: hole-punching leaves COWed pages */
				1188
				1189	spin_lock(&inode->i_lock);
				1190	inode->i_private = NULL;
				1191	wake_up_all(&shmem_falloc_waitq);
				1192	spin_unlock(&inode->i_lock);
				1193	}
				1194	mutex_unlock(&inode->i_mutex);
				1195	return 0;
				1196	}
				1197
				1198	#ifdef CONFIG_NUMA
				1199	static int shmem_set_policy(struct vm_area_struct vma, struct mempolicy mpol)
				1200	{
				1201	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
				1202	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
				1203	}
				1204
				1205	static struct mempolicy shmem_get_policy(struct vm_area_struct vma,
				1206	unsigned long addr)
				1207	{
				1208	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
				1209	pgoff_t index;
				1210
				1211	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
				1212	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
				1213	}
				1214	#endif
				1215
				1216	int shmem_lock(struct file file, int lock, struct user_struct user)
				1217	{
				1218	struct inode *inode = file->f_path.dentry->d_inode;
				1219	struct shmem_inode_info *info = SHMEM_I(inode);
				1220	int retval = -ENOMEM;
				1221
				1222	spin_lock(&info->lock);
				1223	if (lock && !(info->flags & VM_LOCKED)) {
				1224	if (!user_shm_lock(inode->i_size, user))
				1225	goto out_nomem;
				1226	info->flags \|= VM_LOCKED;
				1227	mapping_set_unevictable(file->f_mapping);
				1228	}
				1229	if (!lock && (info->flags & VM_LOCKED) && user) {
				1230	user_shm_unlock(inode->i_size, user);
				1231	info->flags &= ~VM_LOCKED;
				1232	mapping_clear_unevictable(file->f_mapping);
				1233	}
				1234	retval = 0;
				1235
				1236	out_nomem:
				1237	spin_unlock(&info->lock);
				1238	return retval;
				1239	}
				1240
				1241	static int shmem_mmap(struct file file, struct vm_area_struct vma)
				1242	{
				1243	file_accessed(file);
				1244	vma->vm_ops = &shmem_vm_ops;
				1245	vma->vm_flags \|= VM_CAN_NONLINEAR;
				1246	return 0;
				1247	}
				1248
				1249	static struct inode shmem_get_inode(struct super_block sb, const struct inode *dir,
				1250	umode_t mode, dev_t dev, unsigned long flags)
				1251	{
				1252	struct inode *inode;
				1253	struct shmem_inode_info *info;
				1254	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
				1255
				1256	if (shmem_reserve_inode(sb))
				1257	return NULL;
				1258
				1259	inode = new_inode(sb);
				1260	if (inode) {
				1261	inode->i_ino = get_next_ino();
				1262	inode_init_owner(inode, dir, mode);
				1263	inode->i_blocks = 0;
				1264	inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
				1265	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
				1266	inode->i_generation = get_seconds();
				1267	#ifdef CONFIG_LIMIT_PAGE_CACHE
				1268	mapping_set_gfp_mask(inode->i_mapping,
				1269	mapping_gfp_mask(inode->i_mapping) \| __GFP_PAGETMPFS);
				1270	#endif
				1271	info = SHMEM_I(inode);
				1272	memset(info, 0, (char )inode - (char )info);
				1273	spin_lock_init(&info->lock);
				1274	info->flags = flags & VM_NORESERVE;
				1275	INIT_LIST_HEAD(&info->swaplist);
				1276	INIT_LIST_HEAD(&info->xattr_list);
				1277	cache_no_acl(inode);
				1278
				1279	switch (mode & S_IFMT) {
				1280	default:
				1281	inode->i_op = &shmem_special_inode_operations;
				1282	init_special_inode(inode, mode, dev);
				1283	break;
				1284	case S_IFREG:
				1285	inode->i_mapping->a_ops = &shmem_aops;
				1286	inode->i_op = &shmem_inode_operations;
				1287	inode->i_fop = &shmem_file_operations;
				1288	mpol_shared_policy_init(&info->policy,
				1289	shmem_get_sbmpol(sbinfo));
				1290	break;
				1291	case S_IFDIR:
				1292	inc_nlink(inode);
				1293	/* Some things misbehave if size == 0 on a directory */
				1294	inode->i_size = 2 * BOGO_DIRENT_SIZE;
				1295	inode->i_op = &shmem_dir_inode_operations;
				1296	inode->i_fop = &simple_dir_operations;
				1297	break;
				1298	case S_IFLNK:
				1299	/*
				1300	* Must not load anything in the rbtree,
				1301	* mpol_free_shared_policy will not be called.
				1302	*/
				1303	mpol_shared_policy_init(&info->policy, NULL);
				1304	break;
				1305	}
				1306	} else
				1307	shmem_free_inode(sb);
				1308	return inode;
				1309	}
				1310
				1311	#ifdef CONFIG_TMPFS
				1312	static const struct inode_operations shmem_symlink_inode_operations;
				1313	static const struct inode_operations shmem_short_symlink_operations;
				1314
				1315	#ifdef CONFIG_TMPFS_XATTR
				1316	static int shmem_initxattrs(struct inode , const struct xattr , void *);
				1317	#else
				1318	#define shmem_initxattrs NULL
				1319	#endif
				1320
				1321	static int
				1322	shmem_write_begin(struct file file, struct address_space mapping,
				1323	loff_t pos, unsigned len, unsigned flags,
				1324	struct page pagep, void fsdata)
				1325	{
				1326	struct inode *inode = mapping->host;
				1327	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
				1328	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
				1329	}
				1330
				1331	static int
				1332	shmem_write_end(struct file file, struct address_space mapping,
				1333	loff_t pos, unsigned len, unsigned copied,
				1334	struct page page, void fsdata)
				1335	{
				1336	struct inode *inode = mapping->host;
				1337
				1338	if (pos + copied > inode->i_size)
				1339	i_size_write(inode, pos + copied);
				1340
				1341	set_page_dirty(page);
				1342	unlock_page(page);
				1343	page_cache_release(page);
				1344
				1345	return copied;
				1346	}
				1347
				1348	static void do_shmem_file_read(struct file filp, loff_t ppos, read_descriptor_t *desc, read_actor_t actor)
				1349	{
				1350	struct inode *inode = filp->f_path.dentry->d_inode;
				1351	struct address_space *mapping = inode->i_mapping;
				1352	pgoff_t index;
				1353	unsigned long offset;
				1354	enum sgp_type sgp = SGP_READ;
				1355
				1356	/*
				1357	* Might this read be for a stacking filesystem? Then when reading
				1358	* holes of a sparse file, we actually need to allocate those pages,
				1359	* and even mark them dirty, so it cannot exceed the max_blocks limit.
				1360	*/
				1361	if (segment_eq(get_fs(), KERNEL_DS))
				1362	sgp = SGP_DIRTY;
				1363
				1364	index = *ppos >> PAGE_CACHE_SHIFT;
				1365	offset = *ppos & ~PAGE_CACHE_MASK;
				1366
				1367	for (;;) {
				1368	struct page *page = NULL;
				1369	pgoff_t end_index;
				1370	unsigned long nr, ret;
				1371	loff_t i_size = i_size_read(inode);
				1372
				1373	end_index = i_size >> PAGE_CACHE_SHIFT;
				1374	if (index > end_index)
				1375	break;
				1376	if (index == end_index) {
				1377	nr = i_size & ~PAGE_CACHE_MASK;
				1378	if (nr <= offset)
				1379	break;
				1380	}
				1381
				1382	desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
				1383	if (desc->error) {
				1384	if (desc->error == -EINVAL)
				1385	desc->error = 0;
				1386	break;
				1387	}
				1388	if (page)
				1389	unlock_page(page);
				1390
				1391	/*
				1392	* We must evaluate after, since reads (unlike writes)
				1393	* are called without i_mutex protection against truncate
				1394	*/
				1395	nr = PAGE_CACHE_SIZE;
				1396	i_size = i_size_read(inode);
				1397	end_index = i_size >> PAGE_CACHE_SHIFT;
				1398	if (index == end_index) {
				1399	nr = i_size & ~PAGE_CACHE_MASK;
				1400	if (nr <= offset) {
				1401	if (page)
				1402	page_cache_release(page);
				1403	break;
				1404	}
				1405	}
				1406	nr -= offset;
				1407
				1408	if (page) {
				1409	/*
				1410	* If users can be writing to this page using arbitrary
				1411	* virtual addresses, take care about potential aliasing
				1412	* before reading the page on the kernel side.
				1413	*/
				1414	if (mapping_writably_mapped(mapping))
				1415	flush_dcache_page(page);
				1416	/*
				1417	* Mark the page accessed if we read the beginning.
				1418	*/
				1419	if (!offset)
				1420	mark_page_accessed(page);
				1421	} else {
				1422	page = ZERO_PAGE(0);
				1423	page_cache_get(page);
				1424	}
				1425
				1426	/*
				1427	* Ok, we have the page, and it's up-to-date, so
				1428	* now we can copy it to user space...
				1429	*
				1430	* The actor routine returns how many bytes were actually used..
				1431	* NOTE! This may not be the same as how much of a user buffer
				1432	* we filled up (we may be padding etc), so we can only update
				1433	* "pos" here (the actor routine has to update the user buffer
				1434	* pointers and the remaining count).
				1435	*/
				1436	ret = actor(desc, page, offset, nr);
				1437	offset += ret;
				1438	index += offset >> PAGE_CACHE_SHIFT;
				1439	offset &= ~PAGE_CACHE_MASK;
				1440
				1441	page_cache_release(page);
				1442	if (ret != nr \|\| !desc->count)
				1443	break;
				1444
				1445	cond_resched();
				1446	}
				1447
				1448	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
				1449	file_accessed(filp);
				1450	}
				1451
				1452	static ssize_t shmem_file_aio_read(struct kiocb *iocb,
				1453	const struct iovec *iov, unsigned long nr_segs, loff_t pos)
				1454	{
				1455	struct file *filp = iocb->ki_filp;
				1456	ssize_t retval;
				1457	unsigned long seg;
				1458	size_t count;
				1459	loff_t *ppos = &iocb->ki_pos;
				1460
				1461	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
				1462	if (retval)
				1463	return retval;
				1464
				1465	for (seg = 0; seg < nr_segs; seg++) {
				1466	read_descriptor_t desc;
				1467
				1468	desc.written = 0;
				1469	desc.arg.buf = iov[seg].iov_base;
				1470	desc.count = iov[seg].iov_len;
				1471	if (desc.count == 0)
				1472	continue;
				1473	desc.error = 0;
				1474	do_shmem_file_read(filp, ppos, &desc, file_read_actor);
				1475	retval += desc.written;
				1476	if (desc.error) {
				1477	retval = retval ?: desc.error;
				1478	break;
				1479	}
				1480	if (desc.count > 0)
				1481	break;
				1482	}
				1483	return retval;
				1484	}
				1485
				1486	static ssize_t shmem_file_splice_read(struct file in, loff_t ppos,
				1487	struct pipe_inode_info *pipe, size_t len,
				1488	unsigned int flags)
				1489	{
				1490	struct address_space *mapping = in->f_mapping;
				1491	struct inode *inode = mapping->host;
				1492	unsigned int loff, nr_pages, req_pages;
				1493	struct page *pages[PIPE_DEF_BUFFERS];
				1494	struct partial_page partial[PIPE_DEF_BUFFERS];
				1495	struct page *page;
				1496	pgoff_t index, end_index;
				1497	loff_t isize, left;
				1498	int error, page_nr;
				1499	struct splice_pipe_desc spd = {
				1500	.pages = pages,
				1501	.partial = partial,
				1502	.nr_pages_max = PIPE_DEF_BUFFERS,
				1503	.flags = flags,
				1504	.ops = &page_cache_pipe_buf_ops,
				1505	.spd_release = spd_release_page,
				1506	};
				1507
				1508	isize = i_size_read(inode);
				1509	if (unlikely(*ppos >= isize))
				1510	return 0;
				1511
				1512	left = isize - *ppos;
				1513	if (unlikely(left < len))
				1514	len = left;
				1515
				1516	if (splice_grow_spd(pipe, &spd))
				1517	return -ENOMEM;
				1518
				1519	index = *ppos >> PAGE_CACHE_SHIFT;
				1520	loff = *ppos & ~PAGE_CACHE_MASK;
				1521	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
				1522	nr_pages = min(req_pages, pipe->buffers);
				1523
				1524	spd.nr_pages = find_get_pages_contig(mapping, index,
				1525	nr_pages, spd.pages);
				1526	index += spd.nr_pages;
				1527	error = 0;
				1528
				1529	while (spd.nr_pages < nr_pages) {
				1530	error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL);
				1531	if (error)
				1532	break;
				1533	unlock_page(page);
				1534	spd.pages[spd.nr_pages++] = page;
				1535	index++;
				1536	}
				1537
				1538	index = *ppos >> PAGE_CACHE_SHIFT;
				1539	nr_pages = spd.nr_pages;
				1540	spd.nr_pages = 0;
				1541
				1542	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
				1543	unsigned int this_len;
				1544
				1545	if (!len)
				1546	break;
				1547
				1548	this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
				1549	page = spd.pages[page_nr];
				1550
				1551	if (!PageUptodate(page) \|\| page->mapping != mapping) {
				1552	error = shmem_getpage(inode, index, &page,
				1553	SGP_CACHE, NULL);
				1554	if (error)
				1555	break;
				1556	unlock_page(page);
				1557	page_cache_release(spd.pages[page_nr]);
				1558	spd.pages[page_nr] = page;
				1559	}
				1560
				1561	isize = i_size_read(inode);
				1562	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
				1563	if (unlikely(!isize \|\| index > end_index))
				1564	break;
				1565
				1566	if (end_index == index) {
				1567	unsigned int plen;
				1568
				1569	plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
				1570	if (plen <= loff)
				1571	break;
				1572
				1573	this_len = min(this_len, plen - loff);
				1574	len = this_len;
				1575	}
				1576
				1577	spd.partial[page_nr].offset = loff;
				1578	spd.partial[page_nr].len = this_len;
				1579	len -= this_len;
				1580	loff = 0;
				1581	spd.nr_pages++;
				1582	index++;
				1583	}
				1584
				1585	while (page_nr < nr_pages)
				1586	page_cache_release(spd.pages[page_nr++]);
				1587
				1588	if (spd.nr_pages)
				1589	error = splice_to_pipe(pipe, &spd);
				1590
				1591	splice_shrink_spd(&spd);
				1592
				1593	if (error > 0) {
				1594	*ppos += error;
				1595	file_accessed(in);
				1596	}
				1597	return error;
				1598	}
				1599
				1600	static int shmem_statfs(struct dentry dentry, struct kstatfs buf)
				1601	{
				1602	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
				1603
				1604	buf->f_type = TMPFS_MAGIC;
				1605	buf->f_bsize = PAGE_CACHE_SIZE;
				1606	buf->f_namelen = NAME_MAX;
				1607	if (sbinfo->max_blocks) {
				1608	buf->f_blocks = sbinfo->max_blocks;
				1609	buf->f_bavail =
				1610	buf->f_bfree = sbinfo->max_blocks -
				1611	percpu_counter_sum(&sbinfo->used_blocks);
				1612	}
				1613	if (sbinfo->max_inodes) {
				1614	buf->f_files = sbinfo->max_inodes;
				1615	buf->f_ffree = sbinfo->free_inodes;
				1616	}
				1617	/* else leave those fields 0 like simple_statfs */
				1618	return 0;
				1619	}
				1620
				1621	/*
				1622	* File creation. Allocate an inode, and we're done..
				1623	*/
				1624	static int
				1625	shmem_mknod(struct inode dir, struct dentry dentry, umode_t mode, dev_t dev)
				1626	{
				1627	struct inode *inode;
				1628	int error = -ENOSPC;
				1629
				1630	inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
				1631	if (inode) {
				1632	error = security_inode_init_security(inode, dir,
				1633	&dentry->d_name,
				1634	shmem_initxattrs, NULL);
				1635	if (error) {
				1636	if (error != -EOPNOTSUPP) {
				1637	iput(inode);
				1638	return error;
				1639	}
				1640	}
				1641	#ifdef CONFIG_TMPFS_POSIX_ACL
				1642	error = generic_acl_init(inode, dir);
				1643	if (error) {
				1644	iput(inode);
				1645	return error;
				1646	}
				1647	#else
				1648	error = 0;
				1649	#endif
				1650	dir->i_size += BOGO_DIRENT_SIZE;
				1651	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
				1652	d_instantiate(dentry, inode);
				1653	dget(dentry); /* Extra count - pin the dentry in core */
				1654	}
				1655	return error;
				1656	}
				1657
				1658	static int shmem_mkdir(struct inode dir, struct dentry dentry, umode_t mode)
				1659	{
				1660	int error;
				1661
				1662	if ((error = shmem_mknod(dir, dentry, mode \| S_IFDIR, 0)))
				1663	return error;
				1664	inc_nlink(dir);
				1665	return 0;
				1666	}
				1667
				1668	static int shmem_create(struct inode dir, struct dentry dentry, umode_t mode,
				1669	struct nameidata *nd)
				1670	{
				1671	return shmem_mknod(dir, dentry, mode \| S_IFREG, 0);
				1672	}
				1673
				1674	/*
				1675	* Link a file..
				1676	*/
				1677	static int shmem_link(struct dentry old_dentry, struct inode dir, struct dentry *dentry)
				1678	{
				1679	struct inode *inode = old_dentry->d_inode;
				1680	int ret;
				1681
				1682	/*
				1683	* No ordinary (disk based) filesystem counts links as inodes;
				1684	* but each new link needs a new dentry, pinning lowmem, and
				1685	* tmpfs dentries cannot be pruned until they are unlinked.
				1686	*/
				1687	ret = shmem_reserve_inode(inode->i_sb);
				1688	if (ret)
				1689	goto out;
				1690
				1691	dir->i_size += BOGO_DIRENT_SIZE;
				1692	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
				1693	inc_nlink(inode);
				1694	ihold(inode); /* New dentry reference */
				1695	dget(dentry); /* Extra pinning count for the created dentry */
				1696	d_instantiate(dentry, inode);
				1697	out:
				1698	return ret;
				1699	}
				1700
				1701	static int shmem_unlink(struct inode dir, struct dentry dentry)
				1702	{
				1703	struct inode *inode = dentry->d_inode;
				1704
				1705	if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
				1706	shmem_free_inode(inode->i_sb);
				1707
				1708	dir->i_size -= BOGO_DIRENT_SIZE;
				1709	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
				1710	drop_nlink(inode);
				1711	dput(dentry); /* Undo the count from "create" - this does all the work */
				1712	return 0;
				1713	}
				1714
				1715	static int shmem_rmdir(struct inode dir, struct dentry dentry)
				1716	{
				1717	if (!simple_empty(dentry))
				1718	return -ENOTEMPTY;
				1719
				1720	drop_nlink(dentry->d_inode);
				1721	drop_nlink(dir);
				1722	return shmem_unlink(dir, dentry);
				1723	}
				1724
				1725	/*
				1726	* The VFS layer already does all the dentry stuff for rename,
				1727	* we just have to decrement the usage count for the target if
				1728	* it exists so that the VFS layer correctly free's it when it
				1729	* gets overwritten.
				1730	*/
				1731	static int shmem_rename(struct inode old_dir, struct dentry old_dentry, struct inode new_dir, struct dentry new_dentry)
				1732	{
				1733	struct inode *inode = old_dentry->d_inode;
				1734	int they_are_dirs = S_ISDIR(inode->i_mode);
				1735
				1736	if (!simple_empty(new_dentry))
				1737	return -ENOTEMPTY;
				1738
				1739	if (new_dentry->d_inode) {
				1740	(void) shmem_unlink(new_dir, new_dentry);
				1741	if (they_are_dirs) {
				1742	drop_nlink(new_dentry->d_inode);
				1743	drop_nlink(old_dir);
				1744	}
				1745	} else if (they_are_dirs) {
				1746	drop_nlink(old_dir);
				1747	inc_nlink(new_dir);
				1748	}
				1749
				1750	old_dir->i_size -= BOGO_DIRENT_SIZE;
				1751	new_dir->i_size += BOGO_DIRENT_SIZE;
				1752	old_dir->i_ctime = old_dir->i_mtime =
				1753	new_dir->i_ctime = new_dir->i_mtime =
				1754	inode->i_ctime = CURRENT_TIME;
				1755	return 0;
				1756	}
				1757
				1758	static int shmem_symlink(struct inode dir, struct dentry dentry, const char *symname)
				1759	{
				1760	int error;
				1761	int len;
				1762	struct inode *inode;
				1763	struct page *page;
				1764	char *kaddr;
				1765	struct shmem_inode_info *info;
				1766
				1767	len = strlen(symname) + 1;
				1768	if (len > PAGE_CACHE_SIZE)
				1769	return -ENAMETOOLONG;
				1770
				1771	inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK\|S_IRWXUGO, 0, VM_NORESERVE);
				1772	if (!inode)
				1773	return -ENOSPC;
				1774
				1775	error = security_inode_init_security(inode, dir, &dentry->d_name,
				1776	shmem_initxattrs, NULL);
				1777	if (error) {
				1778	if (error != -EOPNOTSUPP) {
				1779	iput(inode);
				1780	return error;
				1781	}
				1782	error = 0;
				1783	}
				1784
				1785	info = SHMEM_I(inode);
				1786	inode->i_size = len-1;
				1787	if (len <= SHORT_SYMLINK_LEN) {
				1788	info->symlink = kmemdup(symname, len, GFP_KERNEL);
				1789	if (!info->symlink) {
				1790	iput(inode);
				1791	return -ENOMEM;
				1792	}
				1793	inode->i_op = &shmem_short_symlink_operations;
				1794	} else {
				1795	error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
				1796	if (error) {
				1797	iput(inode);
				1798	return error;
				1799	}
				1800	inode->i_mapping->a_ops = &shmem_aops;
				1801	inode->i_op = &shmem_symlink_inode_operations;
				1802	kaddr = kmap_atomic(page);
				1803	memcpy(kaddr, symname, len);
				1804	kunmap_atomic(kaddr);
				1805	set_page_dirty(page);
				1806	unlock_page(page);
				1807	page_cache_release(page);
				1808	}
				1809	dir->i_size += BOGO_DIRENT_SIZE;
				1810	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
				1811	d_instantiate(dentry, inode);
				1812	dget(dentry);
				1813	return 0;
				1814	}
				1815
				1816	static void shmem_follow_short_symlink(struct dentry dentry, struct nameidata *nd)
				1817	{
				1818	nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
				1819	return NULL;
				1820	}
				1821
				1822	static void shmem_follow_link(struct dentry dentry, struct nameidata *nd)
				1823	{
				1824	struct page *page = NULL;
				1825	int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
				1826	nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
				1827	if (page)
				1828	unlock_page(page);
				1829	return page;
				1830	}
				1831
				1832	static void shmem_put_link(struct dentry dentry, struct nameidata nd, void *cookie)
				1833	{
				1834	if (!IS_ERR(nd_get_link(nd))) {
				1835	struct page *page = cookie;
				1836	kunmap(page);
				1837	mark_page_accessed(page);
				1838	page_cache_release(page);
				1839	}
				1840	}
				1841
				1842	#ifdef CONFIG_TMPFS_XATTR
				1843	/*
				1844	* Superblocks without xattr inode operations may get some security.* xattr
				1845	* support from the LSM "for free". As soon as we have any other xattrs
				1846	* like ACLs, we also need to implement the security.* handlers at
				1847	* filesystem level, though.
				1848	*/
				1849
				1850	/*
				1851	* Allocate new xattr and copy in the value; but leave the name to callers.
				1852	*/
				1853	static struct shmem_xattr shmem_xattr_alloc(const void value, size_t size)
				1854	{
				1855	struct shmem_xattr *new_xattr;
				1856	size_t len;
				1857
				1858	/* wrap around? */
				1859	len = sizeof(*new_xattr) + size;
				1860	if (len <= sizeof(*new_xattr))
				1861	return NULL;
				1862
				1863	new_xattr = kmalloc(len, GFP_KERNEL);
				1864	if (!new_xattr)
				1865	return NULL;
				1866
				1867	new_xattr->size = size;
				1868	memcpy(new_xattr->value, value, size);
				1869	return new_xattr;
				1870	}
				1871
				1872	/*
				1873	* Callback for security_inode_init_security() for acquiring xattrs.
				1874	*/
				1875	static int shmem_initxattrs(struct inode *inode,
				1876	const struct xattr *xattr_array,
				1877	void *fs_info)
				1878	{
				1879	struct shmem_inode_info *info = SHMEM_I(inode);
				1880	const struct xattr *xattr;
				1881	struct shmem_xattr *new_xattr;
				1882	size_t len;
				1883
				1884	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
				1885	new_xattr = shmem_xattr_alloc(xattr->value, xattr->value_len);
				1886	if (!new_xattr)
				1887	return -ENOMEM;
				1888
				1889	len = strlen(xattr->name) + 1;
				1890	new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
				1891	GFP_KERNEL);
				1892	if (!new_xattr->name) {
				1893	kfree(new_xattr);
				1894	return -ENOMEM;
				1895	}
				1896
				1897	memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
				1898	XATTR_SECURITY_PREFIX_LEN);
				1899	memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
				1900	xattr->name, len);
				1901
				1902	spin_lock(&info->lock);
				1903	list_add(&new_xattr->list, &info->xattr_list);
				1904	spin_unlock(&info->lock);
				1905	}
				1906
				1907	return 0;
				1908	}
				1909
				1910	static int shmem_xattr_get(struct dentry dentry, const char name,
				1911	void *buffer, size_t size)
				1912	{
				1913	struct shmem_inode_info *info;
				1914	struct shmem_xattr *xattr;
				1915	int ret = -ENODATA;
				1916
				1917	info = SHMEM_I(dentry->d_inode);
				1918
				1919	spin_lock(&info->lock);
				1920	list_for_each_entry(xattr, &info->xattr_list, list) {
				1921	if (strcmp(name, xattr->name))
				1922	continue;
				1923
				1924	ret = xattr->size;
				1925	if (buffer) {
				1926	if (size < xattr->size)
				1927	ret = -ERANGE;
				1928	else
				1929	memcpy(buffer, xattr->value, xattr->size);
				1930	}
				1931	break;
				1932	}
				1933	spin_unlock(&info->lock);
				1934	return ret;
				1935	}
				1936
				1937	static int shmem_xattr_set(struct inode inode, const char name,
				1938	const void *value, size_t size, int flags)
				1939	{
				1940	struct shmem_inode_info *info = SHMEM_I(inode);
				1941	struct shmem_xattr *xattr;
				1942	struct shmem_xattr *new_xattr = NULL;
				1943	int err = 0;
				1944
				1945	/* value == NULL means remove */
				1946	if (value) {
				1947	new_xattr = shmem_xattr_alloc(value, size);
				1948	if (!new_xattr)
				1949	return -ENOMEM;
				1950
				1951	new_xattr->name = kstrdup(name, GFP_KERNEL);
				1952	if (!new_xattr->name) {
				1953	kfree(new_xattr);
				1954	return -ENOMEM;
				1955	}
				1956	}
				1957
				1958	spin_lock(&info->lock);
				1959	list_for_each_entry(xattr, &info->xattr_list, list) {
				1960	if (!strcmp(name, xattr->name)) {
				1961	if (flags & XATTR_CREATE) {
				1962	xattr = new_xattr;
				1963	err = -EEXIST;
				1964	} else if (new_xattr) {
				1965	list_replace(&xattr->list, &new_xattr->list);
				1966	} else {
				1967	list_del(&xattr->list);
				1968	}
				1969	goto out;
				1970	}
				1971	}
				1972	if (flags & XATTR_REPLACE) {
				1973	xattr = new_xattr;
				1974	err = -ENODATA;
				1975	} else {
				1976	list_add(&new_xattr->list, &info->xattr_list);
				1977	xattr = NULL;
				1978	}
				1979	out:
				1980	spin_unlock(&info->lock);
				1981	if (xattr)
				1982	kfree(xattr->name);
				1983	kfree(xattr);
				1984	return err;
				1985	}
				1986
				1987	static const struct xattr_handler *shmem_xattr_handlers[] = {
				1988	#ifdef CONFIG_TMPFS_POSIX_ACL
				1989	&generic_acl_access_handler,
				1990	&generic_acl_default_handler,
				1991	#endif
				1992	NULL
				1993	};
				1994
				1995	static int shmem_xattr_validate(const char *name)
				1996	{
				1997	struct { const char *prefix; size_t len; } arr[] = {
				1998	{ XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
				1999	{ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
				2000	};
				2001	int i;
				2002
				2003	for (i = 0; i < ARRAY_SIZE(arr); i++) {
				2004	size_t preflen = arr[i].len;
				2005	if (strncmp(name, arr[i].prefix, preflen) == 0) {
				2006	if (!name[preflen])
				2007	return -EINVAL;
				2008	return 0;
				2009	}
				2010	}
				2011	return -EOPNOTSUPP;
				2012	}
				2013
				2014	static ssize_t shmem_getxattr(struct dentry dentry, const char name,
				2015	void *buffer, size_t size)
				2016	{
				2017	int err;
				2018
				2019	/*
				2020	* If this is a request for a synthetic attribute in the system.*
				2021	* namespace use the generic infrastructure to resolve a handler
				2022	* for it via sb->s_xattr.
				2023	*/
				2024	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
				2025	return generic_getxattr(dentry, name, buffer, size);
				2026
				2027	err = shmem_xattr_validate(name);
				2028	if (err)
				2029	return err;
				2030
				2031	return shmem_xattr_get(dentry, name, buffer, size);
				2032	}
				2033
				2034	static int shmem_setxattr(struct dentry dentry, const char name,
				2035	const void *value, size_t size, int flags)
				2036	{
				2037	int err;
				2038
				2039	/*
				2040	* If this is a request for a synthetic attribute in the system.*
				2041	* namespace use the generic infrastructure to resolve a handler
				2042	* for it via sb->s_xattr.
				2043	*/
				2044	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
				2045	return generic_setxattr(dentry, name, value, size, flags);
				2046
				2047	err = shmem_xattr_validate(name);
				2048	if (err)
				2049	return err;
				2050
				2051	if (size == 0)
				2052	value = ""; /* empty EA, do not remove */
				2053
				2054	return shmem_xattr_set(dentry->d_inode, name, value, size, flags);
				2055
				2056	}
				2057
				2058	static int shmem_removexattr(struct dentry dentry, const char name)
				2059	{
				2060	int err;
				2061
				2062	/*
				2063	* If this is a request for a synthetic attribute in the system.*
				2064	* namespace use the generic infrastructure to resolve a handler
				2065	* for it via sb->s_xattr.
				2066	*/
				2067	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
				2068	return generic_removexattr(dentry, name);
				2069
				2070	err = shmem_xattr_validate(name);
				2071	if (err)
				2072	return err;
				2073
				2074	return shmem_xattr_set(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
				2075	}
				2076
				2077	static bool xattr_is_trusted(const char *name)
				2078	{
				2079	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
				2080	}
				2081
				2082	static ssize_t shmem_listxattr(struct dentry dentry, char buffer, size_t size)
				2083	{
				2084	bool trusted = capable(CAP_SYS_ADMIN);
				2085	struct shmem_xattr *xattr;
				2086	struct shmem_inode_info *info;
				2087	size_t used = 0;
				2088
				2089	info = SHMEM_I(dentry->d_inode);
				2090
				2091	spin_lock(&info->lock);
				2092	list_for_each_entry(xattr, &info->xattr_list, list) {
				2093	size_t len;
				2094
				2095	/* skip "trusted." attributes for unprivileged callers */
				2096	if (!trusted && xattr_is_trusted(xattr->name))
				2097	continue;
				2098
				2099	len = strlen(xattr->name) + 1;
				2100	used += len;
				2101	if (buffer) {
				2102	if (size < used) {
				2103	used = -ERANGE;
				2104	break;
				2105	}
				2106	memcpy(buffer, xattr->name, len);
				2107	buffer += len;
				2108	}
				2109	}
				2110	spin_unlock(&info->lock);
				2111
				2112	return used;
				2113	}
				2114	#endif /* CONFIG_TMPFS_XATTR */
				2115
				2116	static const struct inode_operations shmem_short_symlink_operations = {
				2117	.readlink = generic_readlink,
				2118	.follow_link = shmem_follow_short_symlink,
				2119	#ifdef CONFIG_TMPFS_XATTR
				2120	.setxattr = shmem_setxattr,
				2121	.getxattr = shmem_getxattr,
				2122	.listxattr = shmem_listxattr,
				2123	.removexattr = shmem_removexattr,
				2124	#endif
				2125	};
				2126
				2127	static const struct inode_operations shmem_symlink_inode_operations = {
				2128	.readlink = generic_readlink,
				2129	.follow_link = shmem_follow_link,
				2130	.put_link = shmem_put_link,
				2131	#ifdef CONFIG_TMPFS_XATTR
				2132	.setxattr = shmem_setxattr,
				2133	.getxattr = shmem_getxattr,
				2134	.listxattr = shmem_listxattr,
				2135	.removexattr = shmem_removexattr,
				2136	#endif
				2137	};
				2138
				2139	static struct dentry shmem_get_parent(struct dentry child)
				2140	{
				2141	return ERR_PTR(-ESTALE);
				2142	}
				2143
				2144	static int shmem_match(struct inode ino, void vfh)
				2145	{
				2146	__u32 *fh = vfh;
				2147	__u64 inum = fh[2];
				2148	inum = (inum << 32) \| fh[1];
				2149	return ino->i_ino == inum && fh[0] == ino->i_generation;
				2150	}
				2151
				2152	static struct dentry shmem_fh_to_dentry(struct super_block sb,
				2153	struct fid *fid, int fh_len, int fh_type)
				2154	{
				2155	struct inode *inode;
				2156	struct dentry *dentry = NULL;
				2157	u64 inum;
				2158
				2159	if (fh_len < 3)
				2160	return NULL;
				2161
				2162	inum = fid->raw[2];
				2163	inum = (inum << 32) \| fid->raw[1];
				2164
				2165	inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
				2166	shmem_match, fid->raw);
				2167	if (inode) {
				2168	dentry = d_find_alias(inode);
				2169	iput(inode);
				2170	}
				2171
				2172	return dentry;
				2173	}
				2174
				2175	static int shmem_encode_fh(struct dentry dentry, __u32 fh, int *len,
				2176	int connectable)
				2177	{
				2178	struct inode *inode = dentry->d_inode;
				2179
				2180	if (*len < 3) {
				2181	*len = 3;
				2182	return 255;
				2183	}
				2184
				2185	if (inode_unhashed(inode)) {
				2186	/* Unfortunately insert_inode_hash is not idempotent,
				2187	* so as we hash inodes here rather than at creation
				2188	* time, we need a lock to ensure we only try
				2189	* to do it once
				2190	*/
				2191	static DEFINE_SPINLOCK(lock);
				2192	spin_lock(&lock);
				2193	if (inode_unhashed(inode))
				2194	__insert_inode_hash(inode,
				2195	inode->i_ino + inode->i_generation);
				2196	spin_unlock(&lock);
				2197	}
				2198
				2199	fh[0] = inode->i_generation;
				2200	fh[1] = inode->i_ino;
				2201	fh[2] = ((__u64)inode->i_ino) >> 32;
				2202
				2203	*len = 3;
				2204	return 1;
				2205	}
				2206
				2207	static const struct export_operations shmem_export_ops = {
				2208	.get_parent = shmem_get_parent,
				2209	.encode_fh = shmem_encode_fh,
				2210	.fh_to_dentry = shmem_fh_to_dentry,
				2211	};
				2212
				2213	static int shmem_parse_options(char options, struct shmem_sb_info sbinfo,
				2214	bool remount)
				2215	{
				2216	char this_char, value, *rest;
				2217
				2218	while (options != NULL) {
				2219	this_char = options;
				2220	for (;;) {
				2221	/*
				2222	* NUL-terminate this option: unfortunately,
				2223	* mount options form a comma-separated list,
				2224	* but mpol's nodelist may also contain commas.
				2225	*/
				2226	options = strchr(options, ',');
				2227	if (options == NULL)
				2228	break;
				2229	options++;
				2230	if (!isdigit(*options)) {
				2231	options[-1] = '\0';
				2232	break;
				2233	}
				2234	}
				2235	if (!*this_char)
				2236	continue;
				2237	if ((value = strchr(this_char,'=')) != NULL) {
				2238	*value++ = 0;
				2239	} else {
				2240	printk(KERN_ERR
				2241	"tmpfs: No value for mount option '%s'\n",
				2242	this_char);
				2243	return 1;
				2244	}
				2245
				2246	if (!strcmp(this_char,"size")) {
				2247	unsigned long long size;
				2248	size = memparse(value,&rest);
				2249	if (*rest == '%') {
				2250	size <<= PAGE_SHIFT;
				2251	size *= totalram_pages;
				2252	do_div(size, 100);
				2253	rest++;
				2254	}
				2255	if (*rest)
				2256	goto bad_val;
				2257	sbinfo->max_blocks =
				2258	DIV_ROUND_UP(size, PAGE_CACHE_SIZE);
				2259	} else if (!strcmp(this_char,"nr_blocks")) {
				2260	sbinfo->max_blocks = memparse(value, &rest);
				2261	if (*rest)
				2262	goto bad_val;
				2263	} else if (!strcmp(this_char,"nr_inodes")) {
				2264	sbinfo->max_inodes = memparse(value, &rest);
				2265	if (*rest)
				2266	goto bad_val;
				2267	} else if (!strcmp(this_char,"mode")) {
				2268	if (remount)
				2269	continue;
				2270	sbinfo->mode = simple_strtoul(value, &rest, 8) & 07777;
				2271	if (*rest)
				2272	goto bad_val;
				2273	} else if (!strcmp(this_char,"uid")) {
				2274	if (remount)
				2275	continue;
				2276	sbinfo->uid = simple_strtoul(value, &rest, 0);
				2277	if (*rest)
				2278	goto bad_val;
				2279	} else if (!strcmp(this_char,"gid")) {
				2280	if (remount)
				2281	continue;
				2282	sbinfo->gid = simple_strtoul(value, &rest, 0);
				2283	if (*rest)
				2284	goto bad_val;
				2285	} else if (!strcmp(this_char,"mpol")) {
				2286	if (mpol_parse_str(value, &sbinfo->mpol, 1))
				2287	goto bad_val;
				2288	} else {
				2289	printk(KERN_ERR "tmpfs: Bad mount option %s\n",
				2290	this_char);
				2291	return 1;
				2292	}
				2293	}
				2294	return 0;
				2295
				2296	bad_val:
				2297	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
				2298	value, this_char);
				2299	return 1;
				2300
				2301	}
				2302
				2303	static int shmem_remount_fs(struct super_block sb, int flags, char *data)
				2304	{
				2305	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
				2306	struct shmem_sb_info config = *sbinfo;
				2307	unsigned long inodes;
				2308	int error = -EINVAL;
				2309
				2310	config.mpol = NULL;
				2311	if (shmem_parse_options(data, &config, true))
				2312	return error;
				2313
				2314	spin_lock(&sbinfo->stat_lock);
				2315	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
				2316	if (percpu_counter_compare(&sbinfo->used_blocks, config.max_blocks) > 0)
				2317	goto out;
				2318	if (config.max_inodes < inodes)
				2319	goto out;
				2320	/*
				2321	* Those tests disallow limited->unlimited while any are in use;
				2322	* but we must separately disallow unlimited->limited, because
				2323	* in that case we have no record of how much is already in use.
				2324	*/
				2325	if (config.max_blocks && !sbinfo->max_blocks)
				2326	goto out;
				2327	if (config.max_inodes && !sbinfo->max_inodes)
				2328	goto out;
				2329
				2330	error = 0;
				2331	sbinfo->max_blocks = config.max_blocks;
				2332	sbinfo->max_inodes = config.max_inodes;
				2333	sbinfo->free_inodes = config.max_inodes - inodes;
				2334
				2335	/*
				2336	* Preserve previous mempolicy unless mpol remount option was specified.
				2337	*/
				2338	if (config.mpol) {
				2339	mpol_put(sbinfo->mpol);
				2340	sbinfo->mpol = config.mpol; /* transfers initial ref */
				2341	}
				2342	out:
				2343	spin_unlock(&sbinfo->stat_lock);
				2344	return error;
				2345	}
				2346
				2347	static int shmem_show_options(struct seq_file seq, struct dentry root)
				2348	{
				2349	struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb);
				2350
				2351	if (sbinfo->max_blocks != shmem_default_max_blocks())
				2352	seq_printf(seq, ",size=%luk",
				2353	sbinfo->max_blocks << (PAGE_CACHE_SHIFT - 10));
				2354	if (sbinfo->max_inodes != shmem_default_max_inodes())
				2355	seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes);
				2356	if (sbinfo->mode != (S_IRWXUGO \| S_ISVTX))
				2357	seq_printf(seq, ",mode=%03ho", sbinfo->mode);
				2358	if (sbinfo->uid != 0)
				2359	seq_printf(seq, ",uid=%u", sbinfo->uid);
				2360	if (sbinfo->gid != 0)
				2361	seq_printf(seq, ",gid=%u", sbinfo->gid);
				2362	shmem_show_mpol(seq, sbinfo->mpol);
				2363	return 0;
				2364	}
				2365	#endif /* CONFIG_TMPFS */
				2366
				2367	static void shmem_put_super(struct super_block *sb)
				2368	{
				2369	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
				2370
				2371	percpu_counter_destroy(&sbinfo->used_blocks);
				2372	kfree(sbinfo);
				2373	sb->s_fs_info = NULL;
				2374	}
				2375
				2376	int shmem_fill_super(struct super_block sb, void data, int silent)
				2377	{
				2378	struct inode *inode;
				2379	struct shmem_sb_info *sbinfo;
				2380	int err = -ENOMEM;
				2381
				2382	/* Round up to L1_CACHE_BYTES to resist false sharing */
				2383	sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
				2384	L1_CACHE_BYTES), GFP_KERNEL);
				2385	if (!sbinfo)
				2386	return -ENOMEM;
				2387
				2388	sbinfo->mode = S_IRWXUGO \| S_ISVTX;
				2389	sbinfo->uid = current_fsuid();
				2390	sbinfo->gid = current_fsgid();
				2391	sb->s_fs_info = sbinfo;
				2392
				2393	#ifdef CONFIG_TMPFS
				2394	/*
				2395	* Per default we only allow half of the physical ram per
				2396	* tmpfs instance, limiting inodes to one per page of lowmem;
				2397	* but the internal instance is left unlimited.
				2398	*/
				2399	if (!(sb->s_flags & MS_NOUSER)) {
				2400	sbinfo->max_blocks = shmem_default_max_blocks();
				2401	sbinfo->max_inodes = shmem_default_max_inodes();
				2402	if (shmem_parse_options(data, sbinfo, false)) {
				2403	err = -EINVAL;
				2404	goto failed;
				2405	}
				2406	}
				2407	sb->s_export_op = &shmem_export_ops;
				2408	#else
				2409	sb->s_flags \|= MS_NOUSER;
				2410	#endif
				2411
				2412	spin_lock_init(&sbinfo->stat_lock);
				2413	if (percpu_counter_init(&sbinfo->used_blocks, 0))
				2414	goto failed;
				2415	sbinfo->free_inodes = sbinfo->max_inodes;
				2416
				2417	sb->s_maxbytes = MAX_LFS_FILESIZE;
				2418	sb->s_blocksize = PAGE_CACHE_SIZE;
				2419	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
				2420	sb->s_magic = TMPFS_MAGIC;
				2421	sb->s_op = &shmem_ops;
				2422	sb->s_time_gran = 1;
				2423	#ifdef CONFIG_TMPFS_XATTR
				2424	sb->s_xattr = shmem_xattr_handlers;
				2425	#endif
				2426	#ifdef CONFIG_TMPFS_POSIX_ACL
				2427	sb->s_flags \|= MS_POSIXACL;
				2428	#endif
				2429
				2430	inode = shmem_get_inode(sb, NULL, S_IFDIR \| sbinfo->mode, 0, VM_NORESERVE);
				2431	if (!inode)
				2432	goto failed;
				2433	inode->i_uid = sbinfo->uid;
				2434	inode->i_gid = sbinfo->gid;
				2435	sb->s_root = d_make_root(inode);
				2436	if (!sb->s_root)
				2437	goto failed;
				2438	return 0;
				2439
				2440	failed:
				2441	shmem_put_super(sb);
				2442	return err;
				2443	}
				2444
				2445	static struct kmem_cache *shmem_inode_cachep;
				2446
				2447	static struct inode shmem_alloc_inode(struct super_block sb)
				2448	{
				2449	struct shmem_inode_info *info;
				2450	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
				2451	if (!info)
				2452	return NULL;
				2453	return &info->vfs_inode;
				2454	}
				2455
				2456	static void shmem_destroy_callback(struct rcu_head *head)
				2457	{
				2458	struct inode *inode = container_of(head, struct inode, i_rcu);
				2459	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
				2460	}
				2461
				2462	static void shmem_destroy_inode(struct inode *inode)
				2463	{
				2464	if (S_ISREG(inode->i_mode))
				2465	mpol_free_shared_policy(&SHMEM_I(inode)->policy);
				2466	call_rcu(&inode->i_rcu, shmem_destroy_callback);
				2467	}
				2468
				2469	static void shmem_init_inode(void *foo)
				2470	{
				2471	struct shmem_inode_info *info = foo;
				2472	inode_init_once(&info->vfs_inode);
				2473	}
				2474
				2475	static int shmem_init_inodecache(void)
				2476	{
				2477	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
				2478	sizeof(struct shmem_inode_info),
				2479	0, SLAB_PANIC, shmem_init_inode);
				2480	return 0;
				2481	}
				2482
				2483	static void shmem_destroy_inodecache(void)
				2484	{
				2485	kmem_cache_destroy(shmem_inode_cachep);
				2486	}
				2487
				2488	static const struct address_space_operations shmem_aops = {
				2489	.writepage = shmem_writepage,
				2490	.set_page_dirty = __set_page_dirty_no_writeback,
				2491	#ifdef CONFIG_TMPFS
				2492	.write_begin = shmem_write_begin,
				2493	.write_end = shmem_write_end,
				2494	#endif
				2495	.migratepage = migrate_page,
				2496	.error_remove_page = generic_error_remove_page,
				2497	};
				2498
				2499	static const struct file_operations shmem_file_operations = {
				2500	.mmap = shmem_mmap,
				2501	#ifdef CONFIG_TMPFS
				2502	.llseek = generic_file_llseek,
				2503	.read = do_sync_read,
				2504	.write = do_sync_write,
				2505	.aio_read = shmem_file_aio_read,
				2506	.aio_write = generic_file_aio_write,
				2507	.fsync = noop_fsync,
				2508	.splice_read = shmem_file_splice_read,
				2509	.splice_write = generic_file_splice_write,
				2510	#endif
				2511	};
				2512
				2513	static const struct inode_operations shmem_inode_operations = {
				2514	.setattr = shmem_setattr,
				2515	.truncate_range = shmem_truncate_range,
				2516	#ifdef CONFIG_TMPFS_XATTR
				2517	.setxattr = shmem_setxattr,
				2518	.getxattr = shmem_getxattr,
				2519	.listxattr = shmem_listxattr,
				2520	.removexattr = shmem_removexattr,
				2521	#endif
				2522	};
				2523
				2524	static const struct inode_operations shmem_dir_inode_operations = {
				2525	#ifdef CONFIG_TMPFS
				2526	.create = shmem_create,
				2527	.lookup = simple_lookup,
				2528	.link = shmem_link,
				2529	.unlink = shmem_unlink,
				2530	.symlink = shmem_symlink,
				2531	.mkdir = shmem_mkdir,
				2532	.rmdir = shmem_rmdir,
				2533	.mknod = shmem_mknod,
				2534	.rename = shmem_rename,
				2535	#endif
				2536	#ifdef CONFIG_TMPFS_XATTR
				2537	.setxattr = shmem_setxattr,
				2538	.getxattr = shmem_getxattr,
				2539	.listxattr = shmem_listxattr,
				2540	.removexattr = shmem_removexattr,
				2541	#endif
				2542	#ifdef CONFIG_TMPFS_POSIX_ACL
				2543	.setattr = shmem_setattr,
				2544	#endif
				2545	};
				2546
				2547	static const struct inode_operations shmem_special_inode_operations = {
				2548	#ifdef CONFIG_TMPFS_XATTR
				2549	.setxattr = shmem_setxattr,
				2550	.getxattr = shmem_getxattr,
				2551	.listxattr = shmem_listxattr,
				2552	.removexattr = shmem_removexattr,
				2553	#endif
				2554	#ifdef CONFIG_TMPFS_POSIX_ACL
				2555	.setattr = shmem_setattr,
				2556	#endif
				2557	};
				2558
				2559	static const struct super_operations shmem_ops = {
				2560	.alloc_inode = shmem_alloc_inode,
				2561	.destroy_inode = shmem_destroy_inode,
				2562	#ifdef CONFIG_TMPFS
				2563	.statfs = shmem_statfs,
				2564	.remount_fs = shmem_remount_fs,
				2565	.show_options = shmem_show_options,
				2566	#endif
				2567	.evict_inode = shmem_evict_inode,
				2568	.drop_inode = generic_delete_inode,
				2569	.put_super = shmem_put_super,
				2570	};
				2571
				2572	static const struct vm_operations_struct shmem_vm_ops = {
				2573	.fault = shmem_fault,
				2574	#ifdef CONFIG_NUMA
				2575	.set_policy = shmem_set_policy,
				2576	.get_policy = shmem_get_policy,
				2577	#endif
				2578	};
				2579
				2580	static struct dentry shmem_mount(struct file_system_type fs_type,
				2581	int flags, const char dev_name, void data)
				2582	{
				2583	return mount_nodev(fs_type, flags, data, shmem_fill_super);
				2584	}
				2585
				2586	static struct file_system_type shmem_fs_type = {
				2587	.owner = THIS_MODULE,
				2588	.name = "tmpfs",
				2589	.mount = shmem_mount,
				2590	.kill_sb = kill_litter_super,
				2591	};
				2592
				2593	int __init shmem_init(void)
				2594	{
				2595	int error;
				2596
				2597	error = bdi_init(&shmem_backing_dev_info);
				2598	if (error)
				2599	goto out4;
				2600
				2601	error = shmem_init_inodecache();
				2602	if (error)
				2603	goto out3;
				2604
				2605	error = register_filesystem(&shmem_fs_type);
				2606	if (error) {
				2607	printk(KERN_ERR "Could not register tmpfs\n");
				2608	goto out2;
				2609	}
				2610
				2611	shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
				2612	shmem_fs_type.name, NULL);
				2613	if (IS_ERR(shm_mnt)) {
				2614	error = PTR_ERR(shm_mnt);
				2615	printk(KERN_ERR "Could not kern_mount tmpfs\n");
				2616	goto out1;
				2617	}
				2618	return 0;
				2619
				2620	out1:
				2621	unregister_filesystem(&shmem_fs_type);
				2622	out2:
				2623	shmem_destroy_inodecache();
				2624	out3:
				2625	bdi_destroy(&shmem_backing_dev_info);
				2626	out4:
				2627	shm_mnt = ERR_PTR(error);
				2628	return error;
				2629	}
				2630
				2631	#else /* !CONFIG_SHMEM */
				2632
				2633	/*
				2634	* tiny-shmem: simple shmemfs and tmpfs using ramfs code
				2635	*
				2636	* This is intended for small system where the benefits of the full
				2637	* shmem code (swap-backed and resource-limited) are outweighed by
				2638	* their complexity. On systems without swap this code should be
				2639	* effectively equivalent, but much lighter weight.
				2640	*/
				2641
				2642	#include <linux/ramfs.h>
				2643
				2644	static struct file_system_type shmem_fs_type = {
				2645	.name = "tmpfs",
				2646	.mount = ramfs_mount,
				2647	.kill_sb = kill_litter_super,
				2648	};
				2649
				2650	int __init shmem_init(void)
				2651	{
				2652	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
				2653
				2654	shm_mnt = kern_mount(&shmem_fs_type);
				2655	BUG_ON(IS_ERR(shm_mnt));
				2656
				2657	return 0;
				2658	}
				2659
				2660	int shmem_unuse(swp_entry_t swap, struct page *page)
				2661	{
				2662	return 0;
				2663	}
				2664
				2665	int shmem_lock(struct file file, int lock, struct user_struct user)
				2666	{
				2667	return 0;
				2668	}
				2669
				2670	void shmem_unlock_mapping(struct address_space *mapping)
				2671	{
				2672	}
				2673
				2674	void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
				2675	{
				2676	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
				2677	}
				2678	EXPORT_SYMBOL_GPL(shmem_truncate_range);
				2679
				2680	int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
				2681	{
				2682	/* Only CONFIG_SHMEM shmem.c ever supported i_op->truncate_range(). */
				2683	return -ENOSYS;
				2684	}
				2685
				2686	#define shmem_vm_ops generic_file_vm_ops
				2687	#define shmem_file_operations ramfs_file_operations
				2688	#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
				2689	#define shmem_acct_size(flags, size) 0
				2690	#define shmem_unacct_size(flags, size) do {} while (0)
				2691
				2692	#endif /* CONFIG_SHMEM */
				2693
				2694	/* common code */
				2695
				2696	/**
				2697	* shmem_file_setup - get an unlinked file living in tmpfs
				2698	* @name: name for dentry (to be seen in /proc/<pid>/maps
				2699	* @size: size to be set for the file
				2700	* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
				2701	*/
				2702	struct file shmem_file_setup(const char name, loff_t size, unsigned long flags)
				2703	{
				2704	int error;
				2705	struct file *file;
				2706	struct inode *inode;
				2707	struct path path;
				2708	struct dentry *root;
				2709	struct qstr this;
				2710
				2711	if (IS_ERR(shm_mnt))
				2712	return (void *)shm_mnt;
				2713
				2714	if (size < 0 \|\| size > MAX_LFS_FILESIZE)
				2715	return ERR_PTR(-EINVAL);
				2716
				2717	if (shmem_acct_size(flags, size))
				2718	return ERR_PTR(-ENOMEM);
				2719
				2720	error = -ENOMEM;
				2721	this.name = name;
				2722	this.len = strlen(name);
				2723	this.hash = 0; /* will go */
				2724	root = shm_mnt->mnt_root;
				2725	path.dentry = d_alloc(root, &this);
				2726	if (!path.dentry)
				2727	goto put_memory;
				2728	path.mnt = mntget(shm_mnt);
				2729
				2730	error = -ENOSPC;
				2731	inode = shmem_get_inode(root->d_sb, NULL, S_IFREG \| S_IRWXUGO, 0, flags);
				2732	if (!inode)
				2733	goto put_dentry;
				2734
				2735	d_instantiate(path.dentry, inode);
				2736	inode->i_size = size;
				2737	clear_nlink(inode); /* It is unlinked */
				2738	#ifndef CONFIG_MMU
				2739	error = ramfs_nommu_expand_for_mapping(inode, size);
				2740	if (error)
				2741	goto put_dentry;
				2742	#endif
				2743
				2744	error = -ENFILE;
				2745	file = alloc_file(&path, FMODE_WRITE \| FMODE_READ,
				2746	&shmem_file_operations);
				2747	if (!file)
				2748	goto put_dentry;
				2749
				2750	return file;
				2751
				2752	put_dentry:
				2753	path_put(&path);
				2754	put_memory:
				2755	shmem_unacct_size(flags, size);
				2756	return ERR_PTR(error);
				2757	}
				2758	EXPORT_SYMBOL_GPL(shmem_file_setup);
				2759
				2760	void shmem_set_file(struct vm_area_struct vma, struct file file)
				2761	{
				2762	if (vma->vm_file)
				2763	fput(vma->vm_file);
				2764	vma->vm_file = file;
				2765	vma->vm_ops = &shmem_vm_ops;
				2766	vma->vm_flags \|= VM_CAN_NONLINEAR;
				2767	}
				2768
				2769	/**
				2770	* shmem_zero_setup - setup a shared anonymous mapping
				2771	* @vma: the vma to be mmapped is prepared by do_mmap_pgoff
				2772	*/
				2773	int shmem_zero_setup(struct vm_area_struct *vma)
				2774	{
				2775	struct file *file;
				2776	loff_t size = vma->vm_end - vma->vm_start;
				2777
				2778	file = shmem_file_setup("dev/zero", size, vma->vm_flags);
				2779	if (IS_ERR(file))
				2780	return PTR_ERR(file);
				2781
				2782	shmem_set_file(vma, file);
				2783	return 0;
				2784	}
				2785
				2786	/**
				2787	* shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
				2788	* @mapping: the page's address_space
				2789	* @index: the page index
				2790	* @gfp: the page allocator flags to use if allocating
				2791	*
				2792	* This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
				2793	* with any new page allocations done using the specified allocation flags.
				2794	* But read_cache_page_gfp() uses the ->readpage() method: which does not
				2795	* suit tmpfs, since it may have pages in swapcache, and needs to find those
				2796	* for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
				2797	*
				2798	* i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY \| __GFP_NOWARN in
				2799	* with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily.
				2800	*/
				2801	struct page shmem_read_mapping_page_gfp(struct address_space mapping,
				2802	pgoff_t index, gfp_t gfp)
				2803	{
				2804	#ifdef CONFIG_SHMEM
				2805	struct inode *inode = mapping->host;
				2806	struct page *page;
				2807	int error;
				2808
				2809	BUG_ON(mapping->a_ops != &shmem_aops);
				2810	error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL);
				2811	if (error)
				2812	page = ERR_PTR(error);
				2813	else
				2814	unlock_page(page);
				2815	return page;
				2816	#else
				2817	/*
				2818	* The tiny !SHMEM case uses ramfs without swap
				2819	*/
				2820	return read_cache_page_gfp(mapping, index, gfp);
				2821	#endif
				2822	}
				2823	EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);