Blame - src/kernel/linux/v4.14/fs/nfsd/vfs.c - T103

blob: a64065ad8851d1bfa539d29b47355424a8f80b48 [file] [log] [blame]

rjw	1f88458	2022-01-06 17:20:42 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* File operations used by nfsd. Some of these have been ripped from
				4	* other parts of the kernel because they weren't exported, others
				5	* are partial duplicates with added or changed functionality.
				6	*
				7	* Note that several functions dget() the dentry upon which they want
				8	* to act, most notably those that create directory entries. Response
				9	* dentry's are dput()'d if necessary in the release callback.
				10	* So if you notice code paths that apparently fail to dput() the
				11	* dentry, don't worry--they have been taken care of.
				12	*
				13	* Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
				14	* Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
				15	*/
				16
				17	#include <linux/fs.h>
				18	#include <linux/file.h>
				19	#include <linux/splice.h>
				20	#include <linux/falloc.h>
				21	#include <linux/fcntl.h>
				22	#include <linux/namei.h>
				23	#include <linux/delay.h>
				24	#include <linux/fsnotify.h>
				25	#include <linux/posix_acl_xattr.h>
				26	#include <linux/xattr.h>
				27	#include <linux/jhash.h>
				28	#include <linux/ima.h>
				29	#include <linux/slab.h>
				30	#include <linux/uaccess.h>
				31	#include <linux/exportfs.h>
				32	#include <linux/writeback.h>
				33	#include <linux/security.h>
				34
				35	#ifdef CONFIG_NFSD_V3
				36	#include "xdr3.h"
				37	#endif /* CONFIG_NFSD_V3 */
				38
				39	#ifdef CONFIG_NFSD_V4
				40	#include "../internal.h"
				41	#include "acl.h"
				42	#include "idmap.h"
				43	#endif /* CONFIG_NFSD_V4 */
				44
				45	#include "nfsd.h"
				46	#include "vfs.h"
				47	#include "trace.h"
				48
				49	#define NFSDDBG_FACILITY NFSDDBG_FILEOP
				50
				51
				52	/*
				53	* This is a cache of readahead params that help us choose the proper
				54	* readahead strategy. Initially, we set all readahead parameters to 0
				55	* and let the VFS handle things.
				56	* If you increase the number of cached files very much, you'll need to
				57	* add a hash table here.
				58	*/
				59	struct raparms {
				60	struct raparms *p_next;
				61	unsigned int p_count;
				62	ino_t p_ino;
				63	dev_t p_dev;
				64	int p_set;
				65	struct file_ra_state p_ra;
				66	unsigned int p_hindex;
				67	};
				68
				69	struct raparm_hbucket {
				70	struct raparms *pb_head;
				71	spinlock_t pb_lock;
				72	} ____cacheline_aligned_in_smp;
				73
				74	#define RAPARM_HASH_BITS 4
				75	#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
				76	#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
				77	static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
				78
				79	/*
				80	* Called from nfsd_lookup and encode_dirent. Check if we have crossed
				81	* a mount point.
				82	* Returns -EAGAIN or -ETIMEDOUT leaving dpp and expp unchanged,
				83	* or nfs_ok having possibly changed dpp and expp
				84	*/
				85	int
				86	nfsd_cross_mnt(struct svc_rqst rqstp, struct dentry *dpp,
				87	struct svc_export **expp)
				88	{
				89	struct svc_export exp = expp, *exp2 = NULL;
				90	struct dentry dentry = dpp;
				91	struct path path = {.mnt = mntget(exp->ex_path.mnt),
				92	.dentry = dget(dentry)};
				93	int err = 0;
				94
				95	err = follow_down(&path);
				96	if (err < 0)
				97	goto out;
				98	if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
				99	nfsd_mountpoint(dentry, exp) == 2) {
				100	/* This is only a mountpoint in some other namespace */
				101	path_put(&path);
				102	goto out;
				103	}
				104
				105	exp2 = rqst_exp_get_by_name(rqstp, &path);
				106	if (IS_ERR(exp2)) {
				107	err = PTR_ERR(exp2);
				108	/*
				109	* We normally allow NFS clients to continue
				110	* "underneath" a mountpoint that is not exported.
				111	* The exception is V4ROOT, where no traversal is ever
				112	* allowed without an explicit export of the new
				113	* directory.
				114	*/
				115	if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
				116	err = 0;
				117	path_put(&path);
				118	goto out;
				119	}
				120	if (nfsd_v4client(rqstp) \|\|
				121	(exp->ex_flags & NFSEXP_CROSSMOUNT) \|\| EX_NOHIDE(exp2)) {
				122	/* successfully crossed mount point */
				123	/*
				124	* This is subtle: path.dentry is not on path.mnt
				125	* at this point. The only reason we are safe is that
				126	* original mnt is pinned down by exp, so we should
				127	* put path before putting exp
				128	*/
				129	*dpp = path.dentry;
				130	path.dentry = dentry;
				131	*expp = exp2;
				132	exp2 = exp;
				133	}
				134	path_put(&path);
				135	exp_put(exp2);
				136	out:
				137	return err;
				138	}
				139
				140	static void follow_to_parent(struct path *path)
				141	{
				142	struct dentry *dp;
				143
				144	while (path->dentry == path->mnt->mnt_root && follow_up(path))
				145	;
				146	dp = dget_parent(path->dentry);
				147	dput(path->dentry);
				148	path->dentry = dp;
				149	}
				150
				151	static int nfsd_lookup_parent(struct svc_rqst rqstp, struct dentry dparent, struct svc_export exp, struct dentry dentryp)
				152	{
				153	struct svc_export *exp2;
				154	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
				155	.dentry = dget(dparent)};
				156
				157	follow_to_parent(&path);
				158
				159	exp2 = rqst_exp_parent(rqstp, &path);
				160	if (PTR_ERR(exp2) == -ENOENT) {
				161	*dentryp = dget(dparent);
				162	} else if (IS_ERR(exp2)) {
				163	path_put(&path);
				164	return PTR_ERR(exp2);
				165	} else {
				166	*dentryp = dget(path.dentry);
				167	exp_put(*exp);
				168	*exp = exp2;
				169	}
				170	path_put(&path);
				171	return 0;
				172	}
				173
				174	/*
				175	* For nfsd purposes, we treat V4ROOT exports as though there was an
				176	* export at every directory.
				177	* We return:
				178	* '1' if this dentry must be an export point,
				179	* '2' if it might be, if there is really a mount here, and
				180	* '0' if there is no chance of an export point here.
				181	*/
				182	int nfsd_mountpoint(struct dentry dentry, struct svc_export exp)
				183	{
				184	if (!d_inode(dentry))
				185	return 0;
				186	if (exp->ex_flags & NFSEXP_V4ROOT)
				187	return 1;
				188	if (nfsd4_is_junction(dentry))
				189	return 1;
				190	if (d_mountpoint(dentry))
				191	/*
				192	* Might only be a mountpoint in a different namespace,
				193	* but we need to check.
				194	*/
				195	return 2;
				196	return 0;
				197	}
				198
				199	__be32
				200	nfsd_lookup_dentry(struct svc_rqst rqstp, struct svc_fh fhp,
				201	const char *name, unsigned int len,
				202	struct svc_export exp_ret, struct dentry dentry_ret)
				203	{
				204	struct svc_export *exp;
				205	struct dentry *dparent;
				206	struct dentry *dentry;
				207	int host_err;
				208
				209	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
				210
				211	dparent = fhp->fh_dentry;
				212	exp = exp_get(fhp->fh_export);
				213
				214	/* Lookup the name, but don't follow links */
				215	if (isdotent(name, len)) {
				216	if (len==1)
				217	dentry = dget(dparent);
				218	else if (dparent != exp->ex_path.dentry)
				219	dentry = dget_parent(dparent);
				220	else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
				221	dentry = dget(dparent); /* .. == . just like at / */
				222	else {
				223	/* checking mountpoint crossing is very different when stepping up */
				224	host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
				225	if (host_err)
				226	goto out_nfserr;
				227	}
				228	} else {
				229	/*
				230	* In the nfsd4_open() case, this may be held across
				231	* subsequent open and delegation acquisition which may
				232	* need to take the child's i_mutex:
				233	*/
				234	fh_lock_nested(fhp, I_MUTEX_PARENT);
				235	dentry = lookup_one_len(name, dparent, len);
				236	host_err = PTR_ERR(dentry);
				237	if (IS_ERR(dentry))
				238	goto out_nfserr;
				239	if (nfsd_mountpoint(dentry, exp)) {
				240	/*
				241	* We don't need the i_mutex after all. It's
				242	* still possible we could open this (regular
				243	* files can be mountpoints too), but the
				244	* i_mutex is just there to prevent renames of
				245	* something that we might be about to delegate,
				246	* and a mountpoint won't be renamed:
				247	*/
				248	fh_unlock(fhp);
				249	if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
				250	dput(dentry);
				251	goto out_nfserr;
				252	}
				253	}
				254	}
				255	*dentry_ret = dentry;
				256	*exp_ret = exp;
				257	return 0;
				258
				259	out_nfserr:
				260	exp_put(exp);
				261	return nfserrno(host_err);
				262	}
				263
				264	/*
				265	* Look up one component of a pathname.
				266	* N.B. After this call _both_ fhp and resfh need an fh_put
				267	*
				268	* If the lookup would cross a mountpoint, and the mounted filesystem
				269	* is exported to the client with NFSEXP_NOHIDE, then the lookup is
				270	* accepted as it stands and the mounted directory is
				271	* returned. Otherwise the covered directory is returned.
				272	* NOTE: this mountpoint crossing is not supported properly by all
				273	* clients and is explicitly disallowed for NFSv3
				274	* NeilBrown <neilb@cse.unsw.edu.au>
				275	*/
				276	__be32
				277	nfsd_lookup(struct svc_rqst rqstp, struct svc_fh fhp, const char *name,
				278	unsigned int len, struct svc_fh *resfh)
				279	{
				280	struct svc_export *exp;
				281	struct dentry *dentry;
				282	__be32 err;
				283
				284	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				285	if (err)
				286	return err;
				287	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
				288	if (err)
				289	return err;
				290	err = check_nfsd_access(exp, rqstp);
				291	if (err)
				292	goto out;
				293	/*
				294	* Note: we compose the file handle now, but as the
				295	* dentry may be negative, it may need to be updated.
				296	*/
				297	err = fh_compose(resfh, exp, dentry, fhp);
				298	if (!err && d_really_is_negative(dentry))
				299	err = nfserr_noent;
				300	out:
				301	dput(dentry);
				302	exp_put(exp);
				303	return err;
				304	}
				305
				306	/*
				307	* Commit metadata changes to stable storage.
				308	*/
				309	static int
				310	commit_metadata(struct svc_fh *fhp)
				311	{
				312	struct inode *inode = d_inode(fhp->fh_dentry);
				313	const struct export_operations *export_ops = inode->i_sb->s_export_op;
				314
				315	if (!EX_ISSYNC(fhp->fh_export))
				316	return 0;
				317
				318	if (export_ops->commit_metadata)
				319	return export_ops->commit_metadata(inode);
				320	return sync_inode_metadata(inode, 1);
				321	}
				322
				323	/*
				324	* Go over the attributes and take care of the small differences between
				325	* NFS semantics and what Linux expects.
				326	*/
				327	static void
				328	nfsd_sanitize_attrs(struct inode inode, struct iattr iap)
				329	{
				330	/* sanitize the mode change */
				331	if (iap->ia_valid & ATTR_MODE) {
				332	iap->ia_mode &= S_IALLUGO;
				333	iap->ia_mode \|= (inode->i_mode & ~S_IALLUGO);
				334	}
				335
				336	/* Revoke setuid/setgid on chown */
				337	if (!S_ISDIR(inode->i_mode) &&
				338	((iap->ia_valid & ATTR_UID) \|\| (iap->ia_valid & ATTR_GID))) {
				339	iap->ia_valid \|= ATTR_KILL_PRIV;
				340	if (iap->ia_valid & ATTR_MODE) {
				341	/* we're setting mode too, just clear the sid bits /
				342	iap->ia_mode &= ~S_ISUID;
				343	if (iap->ia_mode & S_IXGRP)
				344	iap->ia_mode &= ~S_ISGID;
				345	} else {
				346	/* set ATTR_KILL_* bits and let VFS handle it */
				347	iap->ia_valid \|= (ATTR_KILL_SUID \| ATTR_KILL_SGID);
				348	}
				349	}
				350	}
				351
				352	static __be32
				353	nfsd_get_write_access(struct svc_rqst rqstp, struct svc_fh fhp,
				354	struct iattr *iap)
				355	{
				356	struct inode *inode = d_inode(fhp->fh_dentry);
				357	int host_err;
				358
				359	if (iap->ia_size < inode->i_size) {
				360	__be32 err;
				361
				362	err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
				363	NFSD_MAY_TRUNC \| NFSD_MAY_OWNER_OVERRIDE);
				364	if (err)
				365	return err;
				366	}
				367
				368	host_err = get_write_access(inode);
				369	if (host_err)
				370	goto out_nfserrno;
				371
				372	host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
				373	if (host_err)
				374	goto out_put_write_access;
				375	return 0;
				376
				377	out_put_write_access:
				378	put_write_access(inode);
				379	out_nfserrno:
				380	return nfserrno(host_err);
				381	}
				382
				383	/*
				384	* Set various file attributes. After this call fhp needs an fh_put.
				385	*/
				386	__be32
				387	nfsd_setattr(struct svc_rqst rqstp, struct svc_fh fhp, struct iattr *iap,
				388	int check_guard, time_t guardtime)
				389	{
				390	struct dentry *dentry;
				391	struct inode *inode;
				392	int accmode = NFSD_MAY_SATTR;
				393	umode_t ftype = 0;
				394	__be32 err;
				395	int host_err;
				396	bool get_write_count;
				397	bool size_change = (iap->ia_valid & ATTR_SIZE);
				398
				399	if (iap->ia_valid & ATTR_SIZE) {
				400	accmode \|= NFSD_MAY_WRITE\|NFSD_MAY_OWNER_OVERRIDE;
				401	ftype = S_IFREG;
				402	}
				403
				404	/*
				405	* If utimes(2) and friends are called with times not NULL, we should
				406	* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
				407	* will return EACCESS, when the caller's effective UID does not match
				408	* the owner of the file, and the caller is not privileged. In this
				409	* situation, we should return EPERM(notify_change will return this).
				410	*/
				411	if (iap->ia_valid & (ATTR_ATIME \| ATTR_MTIME)) {
				412	accmode \|= NFSD_MAY_OWNER_OVERRIDE;
				413	if (!(iap->ia_valid & (ATTR_ATIME_SET \| ATTR_MTIME_SET)))
				414	accmode \|= NFSD_MAY_WRITE;
				415	}
				416
				417	/* Callers that do fh_verify should do the fh_want_write: */
				418	get_write_count = !fhp->fh_dentry;
				419
				420	/* Get inode */
				421	err = fh_verify(rqstp, fhp, ftype, accmode);
				422	if (err)
				423	return err;
				424	if (get_write_count) {
				425	host_err = fh_want_write(fhp);
				426	if (host_err)
				427	goto out;
				428	}
				429
				430	dentry = fhp->fh_dentry;
				431	inode = d_inode(dentry);
				432
				433	/* Ignore any mode updates on symlinks */
				434	if (S_ISLNK(inode->i_mode))
				435	iap->ia_valid &= ~ATTR_MODE;
				436
				437	if (!iap->ia_valid)
				438	return 0;
				439
				440	nfsd_sanitize_attrs(inode, iap);
				441
				442	if (check_guard && guardtime != inode->i_ctime.tv_sec)
				443	return nfserr_notsync;
				444
				445	/*
				446	* The size case is special, it changes the file in addition to the
				447	* attributes, and file systems don't expect it to be mixed with
				448	* "random" attribute changes. We thus split out the size change
				449	* into a separate call to ->setattr, and do the rest as a separate
				450	* setattr call.
				451	*/
				452	if (size_change) {
				453	err = nfsd_get_write_access(rqstp, fhp, iap);
				454	if (err)
				455	return err;
				456	}
				457
				458	fh_lock(fhp);
				459	if (size_change) {
				460	/*
				461	* RFC5661, Section 18.30.4:
				462	* Changing the size of a file with SETATTR indirectly
				463	* changes the time_modify and change attributes.
				464	*
				465	* (and similar for the older RFCs)
				466	*/
				467	struct iattr size_attr = {
				468	.ia_valid = ATTR_SIZE \| ATTR_CTIME \| ATTR_MTIME,
				469	.ia_size = iap->ia_size,
				470	};
				471
				472	host_err = notify_change(dentry, &size_attr, NULL);
				473	if (host_err)
				474	goto out_unlock;
				475	iap->ia_valid &= ~ATTR_SIZE;
				476
				477	/*
				478	* Avoid the additional setattr call below if the only other
				479	* attribute that the client sends is the mtime, as we update
				480	* it as part of the size change above.
				481	*/
				482	if ((iap->ia_valid & ~ATTR_MTIME) == 0)
				483	goto out_unlock;
				484	}
				485
				486	iap->ia_valid \|= ATTR_CTIME;
				487	host_err = notify_change(dentry, iap, NULL);
				488
				489	out_unlock:
				490	fh_unlock(fhp);
				491	if (size_change)
				492	put_write_access(inode);
				493	out:
				494	if (!host_err)
				495	host_err = commit_metadata(fhp);
				496	return nfserrno(host_err);
				497	}
				498
				499	#if defined(CONFIG_NFSD_V4)
				500	/*
				501	* NFS junction information is stored in an extended attribute.
				502	*/
				503	#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
				504
				505	/**
				506	* nfsd4_is_junction - Test if an object could be an NFS junction
				507	*
				508	* @dentry: object to test
				509	*
				510	* Returns 1 if "dentry" appears to contain NFS junction information.
				511	* Otherwise 0 is returned.
				512	*/
				513	int nfsd4_is_junction(struct dentry *dentry)
				514	{
				515	struct inode *inode = d_inode(dentry);
				516
				517	if (inode == NULL)
				518	return 0;
				519	if (inode->i_mode & S_IXUGO)
				520	return 0;
				521	if (!(inode->i_mode & S_ISVTX))
				522	return 0;
				523	if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
				524	return 0;
				525	return 1;
				526	}
				527	#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
				528	__be32 nfsd4_set_nfs4_label(struct svc_rqst rqstp, struct svc_fh fhp,
				529	struct xdr_netobj *label)
				530	{
				531	__be32 error;
				532	int host_error;
				533	struct dentry *dentry;
				534
				535	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
				536	if (error)
				537	return error;
				538
				539	dentry = fhp->fh_dentry;
				540
				541	inode_lock(d_inode(dentry));
				542	host_error = security_inode_setsecctx(dentry, label->data, label->len);
				543	inode_unlock(d_inode(dentry));
				544	return nfserrno(host_error);
				545	}
				546	#else
				547	__be32 nfsd4_set_nfs4_label(struct svc_rqst rqstp, struct svc_fh fhp,
				548	struct xdr_netobj *label)
				549	{
				550	return nfserr_notsupp;
				551	}
				552	#endif
				553
				554	__be32 nfsd4_clone_file_range(struct file src, u64 src_pos, struct file dst,
				555	u64 dst_pos, u64 count)
				556	{
				557	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
				558	count));
				559	}
				560
				561	ssize_t nfsd_copy_file_range(struct file src, u64 src_pos, struct file dst,
				562	u64 dst_pos, u64 count)
				563	{
				564
				565	/*
				566	* Limit copy to 4MB to prevent indefinitely blocking an nfsd
				567	* thread and client rpc slot. The choice of 4MB is somewhat
				568	* arbitrary. We might instead base this on r/wsize, or make it
				569	* tunable, or use a time instead of a byte limit, or implement
				570	* asynchronous copy. In theory a client could also recognize a
				571	* limit like this and pipeline multiple COPY requests.
				572	*/
				573	count = min_t(u64, count, 1 << 22);
				574	return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
				575	}
				576
				577	__be32 nfsd4_vfs_fallocate(struct svc_rqst rqstp, struct svc_fh fhp,
				578	struct file *file, loff_t offset, loff_t len,
				579	int flags)
				580	{
				581	int error;
				582
				583	if (!S_ISREG(file_inode(file)->i_mode))
				584	return nfserr_inval;
				585
				586	error = vfs_fallocate(file, flags, offset, len);
				587	if (!error)
				588	error = commit_metadata(fhp);
				589
				590	return nfserrno(error);
				591	}
				592	#endif /* defined(CONFIG_NFSD_V4) */
				593
				594	#ifdef CONFIG_NFSD_V3
				595	/*
				596	* Check server access rights to a file system object
				597	*/
				598	struct accessmap {
				599	u32 access;
				600	int how;
				601	};
				602	static struct accessmap nfs3_regaccess[] = {
				603	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				604	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				605	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_TRUNC },
				606	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
				607
				608	{ 0, 0 }
				609	};
				610
				611	static struct accessmap nfs3_diraccess[] = {
				612	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				613	{ NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
				614	{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC\|NFSD_MAY_WRITE\|NFSD_MAY_TRUNC},
				615	{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC\|NFSD_MAY_WRITE },
				616	{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
				617
				618	{ 0, 0 }
				619	};
				620
				621	static struct accessmap nfs3_anyaccess[] = {
				622	/* Some clients - Solaris 2.6 at least, make an access call
				623	* to the server to check for access for things like /dev/null
				624	* (which really, the server doesn't care about). So
				625	* We provide simple access checking for them, looking
				626	* mainly at mode bits, and we make sure to ignore read-only
				627	* filesystem checks
				628	*/
				629	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				630	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				631	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				632	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				633
				634	{ 0, 0 }
				635	};
				636
				637	__be32
				638	nfsd_access(struct svc_rqst rqstp, struct svc_fh fhp, u32 access, u32 supported)
				639	{
				640	struct accessmap *map;
				641	struct svc_export *export;
				642	struct dentry *dentry;
				643	u32 query, result = 0, sresult = 0;
				644	__be32 error;
				645
				646	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
				647	if (error)
				648	goto out;
				649
				650	export = fhp->fh_export;
				651	dentry = fhp->fh_dentry;
				652
				653	if (d_is_reg(dentry))
				654	map = nfs3_regaccess;
				655	else if (d_is_dir(dentry))
				656	map = nfs3_diraccess;
				657	else
				658	map = nfs3_anyaccess;
				659
				660
				661	query = *access;
				662	for (; map->access; map++) {
				663	if (map->access & query) {
				664	__be32 err2;
				665
				666	sresult \|= map->access;
				667
				668	err2 = nfsd_permission(rqstp, export, dentry, map->how);
				669	switch (err2) {
				670	case nfs_ok:
				671	result \|= map->access;
				672	break;
				673
				674	/* the following error codes just mean the access was not allowed,
				675	* rather than an error occurred */
				676	case nfserr_rofs:
				677	case nfserr_acces:
				678	case nfserr_perm:
				679	/* simply don't "or" in the access bit. */
				680	break;
				681	default:
				682	error = err2;
				683	goto out;
				684	}
				685	}
				686	}
				687	*access = result;
				688	if (supported)
				689	*supported = sresult;
				690
				691	out:
				692	return error;
				693	}
				694	#endif /* CONFIG_NFSD_V3 */
				695
				696	static int nfsd_open_break_lease(struct inode *inode, int access)
				697	{
				698	unsigned int mode;
				699
				700	if (access & NFSD_MAY_NOT_BREAK_LEASE)
				701	return 0;
				702	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
				703	return break_lease(inode, mode \| O_NONBLOCK);
				704	}
				705
				706	/*
				707	* Open an existing file or directory.
				708	* The may_flags argument indicates the type of open (read/write/lock)
				709	* and additional flags.
				710	* N.B. After this call fhp needs an fh_put
				711	*/
				712	__be32
				713	nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
				714	int may_flags, struct file **filp)
				715	{
				716	struct path path;
				717	struct inode *inode;
				718	struct file *file;
				719	int flags = O_RDONLY\|O_LARGEFILE;
				720	__be32 err;
				721	int host_err = 0;
				722
				723	validate_process_creds();
				724
				725	/*
				726	* If we get here, then the client has already done an "open",
				727	* and (hopefully) checked permission - so allow OWNER_OVERRIDE
				728	* in case a chmod has now revoked permission.
				729	*
				730	* Arguably we should also allow the owner override for
				731	* directories, but we never have and it doesn't seem to have
				732	* caused anyone a problem. If we were to change this, note
				733	* also that our filldir callbacks would need a variant of
				734	* lookup_one_len that doesn't check permissions.
				735	*/
				736	if (type == S_IFREG)
				737	may_flags \|= NFSD_MAY_OWNER_OVERRIDE;
				738	err = fh_verify(rqstp, fhp, type, may_flags);
				739	if (err)
				740	goto out;
				741
				742	path.mnt = fhp->fh_export->ex_path.mnt;
				743	path.dentry = fhp->fh_dentry;
				744	inode = d_inode(path.dentry);
				745
				746	/* Disallow write access to files with the append-only bit set
				747	* or any access when mandatory locking enabled
				748	*/
				749	err = nfserr_perm;
				750	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
				751	goto out;
				752	/*
				753	* We must ignore files (but only files) which might have mandatory
				754	* locks on them because there is no way to know if the accesser has
				755	* the lock.
				756	*/
				757	if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
				758	goto out;
				759
				760	if (!inode->i_fop)
				761	goto out;
				762
				763	host_err = nfsd_open_break_lease(inode, may_flags);
				764	if (host_err) /* NOMEM or WOULDBLOCK */
				765	goto out_nfserr;
				766
				767	if (may_flags & NFSD_MAY_WRITE) {
				768	if (may_flags & NFSD_MAY_READ)
				769	flags = O_RDWR\|O_LARGEFILE;
				770	else
				771	flags = O_WRONLY\|O_LARGEFILE;
				772	}
				773
				774	file = dentry_open(&path, flags, current_cred());
				775	if (IS_ERR(file)) {
				776	host_err = PTR_ERR(file);
				777	goto out_nfserr;
				778	}
				779
				780	host_err = ima_file_check(file, may_flags, 0);
				781	if (host_err) {
				782	fput(file);
				783	goto out_nfserr;
				784	}
				785
				786	if (may_flags & NFSD_MAY_64BIT_COOKIE)
				787	file->f_mode \|= FMODE_64BITHASH;
				788	else
				789	file->f_mode \|= FMODE_32BITHASH;
				790
				791	*filp = file;
				792	out_nfserr:
				793	err = nfserrno(host_err);
				794	out:
				795	validate_process_creds();
				796	return err;
				797	}
				798
				799	struct raparms *
				800	nfsd_init_raparms(struct file *file)
				801	{
				802	struct inode *inode = file_inode(file);
				803	dev_t dev = inode->i_sb->s_dev;
				804	ino_t ino = inode->i_ino;
				805	struct raparms ra, rap, *frap = NULL;
				806	int depth = 0;
				807	unsigned int hash;
				808	struct raparm_hbucket *rab;
				809
				810	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
				811	rab = &raparm_hash[hash];
				812
				813	spin_lock(&rab->pb_lock);
				814	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
				815	if (ra->p_ino == ino && ra->p_dev == dev)
				816	goto found;
				817	depth++;
				818	if (ra->p_count == 0)
				819	frap = rap;
				820	}
				821	depth = nfsdstats.ra_size;
				822	if (!frap) {
				823	spin_unlock(&rab->pb_lock);
				824	return NULL;
				825	}
				826	rap = frap;
				827	ra = *frap;
				828	ra->p_dev = dev;
				829	ra->p_ino = ino;
				830	ra->p_set = 0;
				831	ra->p_hindex = hash;
				832	found:
				833	if (rap != &rab->pb_head) {
				834	*rap = ra->p_next;
				835	ra->p_next = rab->pb_head;
				836	rab->pb_head = ra;
				837	}
				838	ra->p_count++;
				839	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
				840	spin_unlock(&rab->pb_lock);
				841
				842	if (ra->p_set)
				843	file->f_ra = ra->p_ra;
				844	return ra;
				845	}
				846
				847	void nfsd_put_raparams(struct file file, struct raparms ra)
				848	{
				849	struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
				850
				851	spin_lock(&rab->pb_lock);
				852	ra->p_ra = file->f_ra;
				853	ra->p_set = 1;
				854	ra->p_count--;
				855	spin_unlock(&rab->pb_lock);
				856	}
				857
				858	/*
				859	* Grab and keep cached pages associated with a file in the svc_rqst
				860	* so that they can be passed to the network sendmsg/sendpage routines
				861	* directly. They will be released after the sending has completed.
				862	*/
				863	static int
				864	nfsd_splice_actor(struct pipe_inode_info pipe, struct pipe_buffer buf,
				865	struct splice_desc *sd)
				866	{
				867	struct svc_rqst *rqstp = sd->u.data;
				868	struct page **pp = rqstp->rq_next_page;
				869	struct page *page = buf->page;
				870	size_t size;
				871
				872	size = sd->len;
				873
				874	if (rqstp->rq_res.page_len == 0) {
				875	get_page(page);
				876	put_page(*rqstp->rq_next_page);
				877	*(rqstp->rq_next_page++) = page;
				878	rqstp->rq_res.page_base = buf->offset;
				879	rqstp->rq_res.page_len = size;
				880	} else if (page != pp[-1]) {
				881	get_page(page);
				882	if (*rqstp->rq_next_page)
				883	put_page(*rqstp->rq_next_page);
				884	*(rqstp->rq_next_page++) = page;
				885	rqstp->rq_res.page_len += size;
				886	} else
				887	rqstp->rq_res.page_len += size;
				888
				889	return size;
				890	}
				891
				892	static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
				893	struct splice_desc *sd)
				894	{
				895	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
				896	}
				897
				898	static __be32
				899	nfsd_finish_read(struct file file, unsigned long count, int host_err)
				900	{
				901	if (host_err >= 0) {
				902	nfsdstats.io_read += host_err;
				903	*count = host_err;
				904	fsnotify_access(file);
				905	return 0;
				906	} else
				907	return nfserrno(host_err);
				908	}
				909
				910	__be32 nfsd_splice_read(struct svc_rqst *rqstp,
				911	struct file file, loff_t offset, unsigned long count)
				912	{
				913	struct splice_desc sd = {
				914	.len = 0,
				915	.total_len = *count,
				916	.pos = offset,
				917	.u.data = rqstp,
				918	};
				919	int host_err;
				920
				921	rqstp->rq_next_page = rqstp->rq_respages + 1;
				922	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
				923	return nfsd_finish_read(file, count, host_err);
				924	}
				925
				926	__be32 nfsd_readv(struct file file, loff_t offset, struct kvec vec, int vlen,
				927	unsigned long *count)
				928	{
				929	struct iov_iter iter;
				930	int host_err;
				931
				932	iov_iter_kvec(&iter, READ \| ITER_KVEC, vec, vlen, *count);
				933	host_err = vfs_iter_read(file, &iter, &offset, 0);
				934
				935	return nfsd_finish_read(file, count, host_err);
				936	}
				937
				938	/*
				939	* Gathered writes: If another process is currently writing to the file,
				940	* there's a high chance this is another nfsd (triggered by a bulk write
				941	* from a client's biod). Rather than syncing the file with each write
				942	* request, we sleep for 10 msec.
				943	*
				944	* I don't know if this roughly approximates C. Juszak's idea of
				945	* gathered writes, but it's a nice and simple solution (IMHO), and it
				946	* seems to work:-)
				947	*
				948	* Note: we do this only in the NFSv2 case, since v3 and higher have a
				949	* better tool (separate unstable writes and commits) for solving this
				950	* problem.
				951	*/
				952	static int wait_for_concurrent_writes(struct file *file)
				953	{
				954	struct inode *inode = file_inode(file);
				955	static ino_t last_ino;
				956	static dev_t last_dev;
				957	int err = 0;
				958
				959	if (atomic_read(&inode->i_writecount) > 1
				960	\|\| (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
				961	dprintk("nfsd: write defer %d\n", task_pid_nr(current));
				962	msleep(10);
				963	dprintk("nfsd: write resume %d\n", task_pid_nr(current));
				964	}
				965
				966	if (inode->i_state & I_DIRTY) {
				967	dprintk("nfsd: write sync %d\n", task_pid_nr(current));
				968	err = vfs_fsync(file, 0);
				969	}
				970	last_ino = inode->i_ino;
				971	last_dev = inode->i_sb->s_dev;
				972	return err;
				973	}
				974
				975	__be32
				976	nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				977	loff_t offset, struct kvec *vec, int vlen,
				978	unsigned long *cnt, int stable)
				979	{
				980	struct svc_export *exp;
				981	struct iov_iter iter;
				982	__be32 err = 0;
				983	int host_err;
				984	int use_wgather;
				985	loff_t pos = offset;
				986	unsigned int pflags = current->flags;
				987	rwf_t flags = 0;
				988
				989	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
				990	/*
				991	* We want less throttling in balance_dirty_pages()
				992	* and shrink_inactive_list() so that nfs to
				993	* localhost doesn't cause nfsd to lock up due to all
				994	* the client's dirty pages or its congested queue.
				995	*/
				996	current->flags \|= PF_LESS_THROTTLE;
				997
				998	exp = fhp->fh_export;
				999	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
				1000
				1001	if (!EX_ISSYNC(exp))
				1002	stable = NFS_UNSTABLE;
				1003
				1004	if (stable && !use_wgather)
				1005	flags \|= RWF_SYNC;
				1006
				1007	iov_iter_kvec(&iter, WRITE \| ITER_KVEC, vec, vlen, *cnt);
				1008	host_err = vfs_iter_write(file, &iter, &pos, flags);
				1009	if (host_err < 0)
				1010	goto out_nfserr;
				1011	*cnt = host_err;
				1012	nfsdstats.io_write += host_err;
				1013	fsnotify_modify(file);
				1014
				1015	if (stable && use_wgather)
				1016	host_err = wait_for_concurrent_writes(file);
				1017
				1018	out_nfserr:
				1019	dprintk("nfsd: write complete host_err=%d\n", host_err);
				1020	if (host_err >= 0)
				1021	err = 0;
				1022	else
				1023	err = nfserrno(host_err);
				1024	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
				1025	current_restore_flags(pflags, PF_LESS_THROTTLE);
				1026	return err;
				1027	}
				1028
				1029	/*
				1030	* Read data from a file. count must contain the requested read count
				1031	* on entry. On return, *count contains the number of bytes actually read.
				1032	* N.B. After this call fhp needs an fh_put
				1033	*/
				1034	__be32 nfsd_read(struct svc_rqst rqstp, struct svc_fh fhp,
				1035	loff_t offset, struct kvec vec, int vlen, unsigned long count)
				1036	{
				1037	struct file *file;
				1038	struct raparms *ra;
				1039	__be32 err;
				1040
				1041	trace_read_start(rqstp, fhp, offset, vlen);
				1042	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
				1043	if (err)
				1044	return err;
				1045
				1046	ra = nfsd_init_raparms(file);
				1047
				1048	trace_read_opened(rqstp, fhp, offset, vlen);
				1049
				1050	if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
				1051	err = nfsd_splice_read(rqstp, file, offset, count);
				1052	else
				1053	err = nfsd_readv(file, offset, vec, vlen, count);
				1054
				1055	trace_read_io_done(rqstp, fhp, offset, vlen);
				1056
				1057	if (ra)
				1058	nfsd_put_raparams(file, ra);
				1059	fput(file);
				1060
				1061	trace_read_done(rqstp, fhp, offset, vlen);
				1062
				1063	return err;
				1064	}
				1065
				1066	/*
				1067	* Write data to a file.
				1068	* The stable flag requests synchronous writes.
				1069	* N.B. After this call fhp needs an fh_put
				1070	*/
				1071	__be32
				1072	nfsd_write(struct svc_rqst rqstp, struct svc_fh fhp, loff_t offset,
				1073	struct kvec vec, int vlen, unsigned long cnt, int stable)
				1074	{
				1075	struct file *file = NULL;
				1076	__be32 err = 0;
				1077
				1078	trace_write_start(rqstp, fhp, offset, vlen);
				1079
				1080	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
				1081	if (err)
				1082	goto out;
				1083
				1084	trace_write_opened(rqstp, fhp, offset, vlen);
				1085	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
				1086	trace_write_io_done(rqstp, fhp, offset, vlen);
				1087	fput(file);
				1088	out:
				1089	trace_write_done(rqstp, fhp, offset, vlen);
				1090	return err;
				1091	}
				1092
				1093	#ifdef CONFIG_NFSD_V3
				1094	/*
				1095	* Commit all pending writes to stable storage.
				1096	*
				1097	* Note: we only guarantee that data that lies within the range specified
				1098	* by the 'offset' and 'count' parameters will be synced.
				1099	*
				1100	* Unfortunately we cannot lock the file to make sure we return full WCC
				1101	* data to the client, as locking happens lower down in the filesystem.
				1102	*/
				1103	__be32
				1104	nfsd_commit(struct svc_rqst rqstp, struct svc_fh fhp,
				1105	loff_t offset, unsigned long count)
				1106	{
				1107	struct file *file;
				1108	loff_t end = LLONG_MAX;
				1109	__be32 err = nfserr_inval;
				1110
				1111	if (offset < 0)
				1112	goto out;
				1113	if (count != 0) {
				1114	end = offset + (loff_t)count - 1;
				1115	if (end < offset)
				1116	goto out;
				1117	}
				1118
				1119	err = nfsd_open(rqstp, fhp, S_IFREG,
				1120	NFSD_MAY_WRITE\|NFSD_MAY_NOT_BREAK_LEASE, &file);
				1121	if (err)
				1122	goto out;
				1123	if (EX_ISSYNC(fhp->fh_export)) {
				1124	int err2 = vfs_fsync_range(file, offset, end, 0);
				1125
				1126	if (err2 != -EINVAL)
				1127	err = nfserrno(err2);
				1128	else
				1129	err = nfserr_notsupp;
				1130	}
				1131
				1132	fput(file);
				1133	out:
				1134	return err;
				1135	}
				1136	#endif /* CONFIG_NFSD_V3 */
				1137
				1138	static __be32
				1139	nfsd_create_setattr(struct svc_rqst rqstp, struct svc_fh resfhp,
				1140	struct iattr *iap)
				1141	{
				1142	/*
				1143	* Mode has already been set earlier in create:
				1144	*/
				1145	iap->ia_valid &= ~ATTR_MODE;
				1146	/*
				1147	* Setting uid/gid works only for root. Irix appears to
				1148	* send along the gid on create when it tries to implement
				1149	* setgid directories via NFS:
				1150	*/
				1151	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
				1152	iap->ia_valid &= ~(ATTR_UID\|ATTR_GID);
				1153	if (iap->ia_valid)
				1154	return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
				1155	/* Callers expect file metadata to be committed here */
				1156	return nfserrno(commit_metadata(resfhp));
				1157	}
				1158
				1159	/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
				1160	* setting size to 0 may fail for some specific file systems by the permission
				1161	* checking which requires WRITE permission but the mode is 000.
				1162	* we ignore the resizing(to 0) on the just new created file, since the size is
				1163	* 0 after file created.
				1164	*
				1165	* call this only after vfs_create() is called.
				1166	* */
				1167	static void
				1168	nfsd_check_ignore_resizing(struct iattr *iap)
				1169	{
				1170	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
				1171	iap->ia_valid &= ~ATTR_SIZE;
				1172	}
				1173
				1174	/* The parent directory should already be locked: */
				1175	__be32
				1176	nfsd_create_locked(struct svc_rqst rqstp, struct svc_fh fhp,
				1177	char fname, int flen, struct iattr iap,
				1178	int type, dev_t rdev, struct svc_fh *resfhp)
				1179	{
				1180	struct dentry dentry, dchild;
				1181	struct inode *dirp;
				1182	__be32 err;
				1183	__be32 err2;
				1184	int host_err;
				1185
				1186	dentry = fhp->fh_dentry;
				1187	dirp = d_inode(dentry);
				1188
				1189	dchild = dget(resfhp->fh_dentry);
				1190	if (!fhp->fh_locked) {
				1191	WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
				1192	dentry);
				1193	err = nfserr_io;
				1194	goto out;
				1195	}
				1196
				1197	err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
				1198	if (err)
				1199	goto out;
				1200
				1201	if (!(iap->ia_valid & ATTR_MODE))
				1202	iap->ia_mode = 0;
				1203	iap->ia_mode = (iap->ia_mode & S_IALLUGO) \| type;
				1204
				1205	if (!IS_POSIXACL(dirp))
				1206	iap->ia_mode &= ~current_umask();
				1207
				1208	err = 0;
				1209	host_err = 0;
				1210	switch (type) {
				1211	case S_IFREG:
				1212	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
				1213	if (!host_err)
				1214	nfsd_check_ignore_resizing(iap);
				1215	break;
				1216	case S_IFDIR:
				1217	host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
				1218	break;
				1219	case S_IFCHR:
				1220	case S_IFBLK:
				1221	case S_IFIFO:
				1222	case S_IFSOCK:
				1223	host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
				1224	break;
				1225	default:
				1226	printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
				1227	type);
				1228	host_err = -EINVAL;
				1229	}
				1230	if (host_err < 0)
				1231	goto out_nfserr;
				1232
				1233	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1234
				1235	/*
				1236	* nfsd_create_setattr already committed the child. Transactional
				1237	* filesystems had a chance to commit changes for both parent and
				1238	* child simultaneously making the following commit_metadata a
				1239	* noop.
				1240	*/
				1241	err2 = nfserrno(commit_metadata(fhp));
				1242	if (err2)
				1243	err = err2;
				1244	/*
				1245	* Update the file handle to get the new inode info.
				1246	*/
				1247	if (!err)
				1248	err = fh_update(resfhp);
				1249	out:
				1250	dput(dchild);
				1251	return err;
				1252
				1253	out_nfserr:
				1254	err = nfserrno(host_err);
				1255	goto out;
				1256	}
				1257
				1258	/*
				1259	* Create a filesystem object (regular, directory, special).
				1260	* Note that the parent directory is left locked.
				1261	*
				1262	* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
				1263	*/
				1264	__be32
				1265	nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1266	char fname, int flen, struct iattr iap,
				1267	int type, dev_t rdev, struct svc_fh *resfhp)
				1268	{
				1269	struct dentry dentry, dchild = NULL;
				1270	struct inode *dirp;
				1271	__be32 err;
				1272	int host_err;
				1273
				1274	if (isdotent(fname, flen))
				1275	return nfserr_exist;
				1276
				1277	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
				1278	if (err)
				1279	return err;
				1280
				1281	dentry = fhp->fh_dentry;
				1282	dirp = d_inode(dentry);
				1283
				1284	host_err = fh_want_write(fhp);
				1285	if (host_err)
				1286	return nfserrno(host_err);
				1287
				1288	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1289	dchild = lookup_one_len(fname, dentry, flen);
				1290	host_err = PTR_ERR(dchild);
				1291	if (IS_ERR(dchild))
				1292	return nfserrno(host_err);
				1293	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1294	/*
				1295	* We unconditionally drop our ref to dchild as fh_compose will have
				1296	* already grabbed its own ref for it.
				1297	*/
				1298	dput(dchild);
				1299	if (err)
				1300	return err;
				1301	return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
				1302	rdev, resfhp);
				1303	}
				1304
				1305	#ifdef CONFIG_NFSD_V3
				1306
				1307	/*
				1308	* NFSv3 and NFSv4 version of nfsd_create
				1309	*/
				1310	__be32
				1311	do_nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1312	char fname, int flen, struct iattr iap,
				1313	struct svc_fh resfhp, int createmode, u32 verifier,
				1314	bool truncp, bool created)
				1315	{
				1316	struct dentry dentry, dchild = NULL;
				1317	struct inode *dirp;
				1318	__be32 err;
				1319	int host_err;
				1320	__u32 v_mtime=0, v_atime=0;
				1321
				1322	err = nfserr_perm;
				1323	if (!flen)
				1324	goto out;
				1325	err = nfserr_exist;
				1326	if (isdotent(fname, flen))
				1327	goto out;
				1328	if (!(iap->ia_valid & ATTR_MODE))
				1329	iap->ia_mode = 0;
				1330	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				1331	if (err)
				1332	goto out;
				1333
				1334	dentry = fhp->fh_dentry;
				1335	dirp = d_inode(dentry);
				1336
				1337	host_err = fh_want_write(fhp);
				1338	if (host_err)
				1339	goto out_nfserr;
				1340
				1341	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1342
				1343	/*
				1344	* Compose the response file handle.
				1345	*/
				1346	dchild = lookup_one_len(fname, dentry, flen);
				1347	host_err = PTR_ERR(dchild);
				1348	if (IS_ERR(dchild))
				1349	goto out_nfserr;
				1350
				1351	/* If file doesn't exist, check for permissions to create one */
				1352	if (d_really_is_negative(dchild)) {
				1353	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1354	if (err)
				1355	goto out;
				1356	}
				1357
				1358	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1359	if (err)
				1360	goto out;
				1361
				1362	if (nfsd_create_is_exclusive(createmode)) {
				1363	/* solaris7 gets confused (bugid 4218508) if these have
				1364	* the high bit set, so just clear the high bits. If this is
				1365	* ever changed to use different attrs for storing the
				1366	* verifier, then do_open_lookup() will also need to be fixed
				1367	* accordingly.
				1368	*/
				1369	v_mtime = verifier[0]&0x7fffffff;
				1370	v_atime = verifier[1]&0x7fffffff;
				1371	}
				1372
				1373	if (d_really_is_positive(dchild)) {
				1374	err = 0;
				1375
				1376	switch (createmode) {
				1377	case NFS3_CREATE_UNCHECKED:
				1378	if (! d_is_reg(dchild))
				1379	goto out;
				1380	else if (truncp) {
				1381	/* in nfsv4, we need to treat this case a little
				1382	* differently. we don't want to truncate the
				1383	* file now; this would be wrong if the OPEN
				1384	* fails for some other reason. furthermore,
				1385	* if the size is nonzero, we should ignore it
				1386	* according to spec!
				1387	*/
				1388	*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
				1389	}
				1390	else {
				1391	iap->ia_valid &= ATTR_SIZE;
				1392	goto set_attr;
				1393	}
				1394	break;
				1395	case NFS3_CREATE_EXCLUSIVE:
				1396	if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
				1397	&& d_inode(dchild)->i_atime.tv_sec == v_atime
				1398	&& d_inode(dchild)->i_size == 0 ) {
				1399	if (created)
				1400	*created = 1;
				1401	break;
				1402	}
				1403	case NFS4_CREATE_EXCLUSIVE4_1:
				1404	if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
				1405	&& d_inode(dchild)->i_atime.tv_sec == v_atime
				1406	&& d_inode(dchild)->i_size == 0 ) {
				1407	if (created)
				1408	*created = 1;
				1409	goto set_attr;
				1410	}
				1411	/* fallthru */
				1412	case NFS3_CREATE_GUARDED:
				1413	err = nfserr_exist;
				1414	}
				1415	fh_drop_write(fhp);
				1416	goto out;
				1417	}
				1418
				1419	if (!IS_POSIXACL(dirp))
				1420	iap->ia_mode &= ~current_umask();
				1421
				1422	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
				1423	if (host_err < 0) {
				1424	fh_drop_write(fhp);
				1425	goto out_nfserr;
				1426	}
				1427	if (created)
				1428	*created = 1;
				1429
				1430	nfsd_check_ignore_resizing(iap);
				1431
				1432	if (nfsd_create_is_exclusive(createmode)) {
				1433	/* Cram the verifier into atime/mtime */
				1434	iap->ia_valid = ATTR_MTIME\|ATTR_ATIME
				1435	\| ATTR_MTIME_SET\|ATTR_ATIME_SET;
				1436	/* XXX someone who knows this better please fix it for nsec */
				1437	iap->ia_mtime.tv_sec = v_mtime;
				1438	iap->ia_atime.tv_sec = v_atime;
				1439	iap->ia_mtime.tv_nsec = 0;
				1440	iap->ia_atime.tv_nsec = 0;
				1441	}
				1442
				1443	set_attr:
				1444	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1445
				1446	/*
				1447	* nfsd_create_setattr already committed the child
				1448	* (and possibly also the parent).
				1449	*/
				1450	if (!err)
				1451	err = nfserrno(commit_metadata(fhp));
				1452
				1453	/*
				1454	* Update the filehandle to get the new inode info.
				1455	*/
				1456	if (!err)
				1457	err = fh_update(resfhp);
				1458
				1459	out:
				1460	fh_unlock(fhp);
				1461	if (dchild && !IS_ERR(dchild))
				1462	dput(dchild);
				1463	fh_drop_write(fhp);
				1464	return err;
				1465
				1466	out_nfserr:
				1467	err = nfserrno(host_err);
				1468	goto out;
				1469	}
				1470	#endif /* CONFIG_NFSD_V3 */
				1471
				1472	/*
				1473	* Read a symlink. On entry, *lenp must contain the maximum path length that
				1474	* fits into the buffer. On return, it contains the true length.
				1475	* N.B. After this call fhp needs an fh_put
				1476	*/
				1477	__be32
				1478	nfsd_readlink(struct svc_rqst rqstp, struct svc_fh fhp, char buf, int lenp)
				1479	{
				1480	__be32 err;
				1481	const char *link;
				1482	struct path path;
				1483	DEFINE_DELAYED_CALL(done);
				1484	int len;
				1485
				1486	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
				1487	if (unlikely(err))
				1488	return err;
				1489
				1490	path.mnt = fhp->fh_export->ex_path.mnt;
				1491	path.dentry = fhp->fh_dentry;
				1492
				1493	if (unlikely(!d_is_symlink(path.dentry)))
				1494	return nfserr_inval;
				1495
				1496	touch_atime(&path);
				1497
				1498	link = vfs_get_link(path.dentry, &done);
				1499	if (IS_ERR(link))
				1500	return nfserrno(PTR_ERR(link));
				1501
				1502	len = strlen(link);
				1503	if (len < *lenp)
				1504	*lenp = len;
				1505	memcpy(buf, link, *lenp);
				1506	do_delayed_call(&done);
				1507	return 0;
				1508	}
				1509
				1510	/*
				1511	* Create a symlink and look up its inode
				1512	* N.B. After this call _both_ fhp and resfhp need an fh_put
				1513	*/
				1514	__be32
				1515	nfsd_symlink(struct svc_rqst rqstp, struct svc_fh fhp,
				1516	char *fname, int flen,
				1517	char *path,
				1518	struct svc_fh *resfhp)
				1519	{
				1520	struct dentry dentry, dnew;
				1521	__be32 err, cerr;
				1522	int host_err;
				1523
				1524	err = nfserr_noent;
				1525	if (!flen \|\| path[0] == '\0')
				1526	goto out;
				1527	err = nfserr_exist;
				1528	if (isdotent(fname, flen))
				1529	goto out;
				1530
				1531	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1532	if (err)
				1533	goto out;
				1534
				1535	host_err = fh_want_write(fhp);
				1536	if (host_err)
				1537	goto out_nfserr;
				1538
				1539	fh_lock(fhp);
				1540	dentry = fhp->fh_dentry;
				1541	dnew = lookup_one_len(fname, dentry, flen);
				1542	host_err = PTR_ERR(dnew);
				1543	if (IS_ERR(dnew))
				1544	goto out_nfserr;
				1545
				1546	host_err = vfs_symlink(d_inode(dentry), dnew, path);
				1547	err = nfserrno(host_err);
				1548	if (!err)
				1549	err = nfserrno(commit_metadata(fhp));
				1550	fh_unlock(fhp);
				1551
				1552	fh_drop_write(fhp);
				1553
				1554	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
				1555	dput(dnew);
				1556	if (err==0) err = cerr;
				1557	out:
				1558	return err;
				1559
				1560	out_nfserr:
				1561	err = nfserrno(host_err);
				1562	goto out;
				1563	}
				1564
				1565	/*
				1566	* Create a hardlink
				1567	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1568	*/
				1569	__be32
				1570	nfsd_link(struct svc_rqst rqstp, struct svc_fh ffhp,
				1571	char name, int len, struct svc_fh tfhp)
				1572	{
				1573	struct dentry ddir, dnew, *dold;
				1574	struct inode *dirp;
				1575	__be32 err;
				1576	int host_err;
				1577
				1578	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
				1579	if (err)
				1580	goto out;
				1581	err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
				1582	if (err)
				1583	goto out;
				1584	err = nfserr_isdir;
				1585	if (d_is_dir(tfhp->fh_dentry))
				1586	goto out;
				1587	err = nfserr_perm;
				1588	if (!len)
				1589	goto out;
				1590	err = nfserr_exist;
				1591	if (isdotent(name, len))
				1592	goto out;
				1593
				1594	host_err = fh_want_write(tfhp);
				1595	if (host_err) {
				1596	err = nfserrno(host_err);
				1597	goto out;
				1598	}
				1599
				1600	fh_lock_nested(ffhp, I_MUTEX_PARENT);
				1601	ddir = ffhp->fh_dentry;
				1602	dirp = d_inode(ddir);
				1603
				1604	dnew = lookup_one_len(name, ddir, len);
				1605	host_err = PTR_ERR(dnew);
				1606	if (IS_ERR(dnew))
				1607	goto out_nfserr;
				1608
				1609	dold = tfhp->fh_dentry;
				1610
				1611	err = nfserr_noent;
				1612	if (d_really_is_negative(dold))
				1613	goto out_dput;
				1614	host_err = vfs_link(dold, dirp, dnew, NULL);
				1615	if (!host_err) {
				1616	err = nfserrno(commit_metadata(ffhp));
				1617	if (!err)
				1618	err = nfserrno(commit_metadata(tfhp));
				1619	} else {
				1620	if (host_err == -EXDEV && rqstp->rq_vers == 2)
				1621	err = nfserr_acces;
				1622	else
				1623	err = nfserrno(host_err);
				1624	}
				1625	out_dput:
				1626	dput(dnew);
				1627	out_unlock:
				1628	fh_unlock(ffhp);
				1629	fh_drop_write(tfhp);
				1630	out:
				1631	return err;
				1632
				1633	out_nfserr:
				1634	err = nfserrno(host_err);
				1635	goto out_unlock;
				1636	}
				1637
				1638	/*
				1639	* Rename a file
				1640	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1641	*/
				1642	__be32
				1643	nfsd_rename(struct svc_rqst rqstp, struct svc_fh ffhp, char *fname, int flen,
				1644	struct svc_fh tfhp, char tname, int tlen)
				1645	{
				1646	struct dentry fdentry, tdentry, odentry, ndentry, *trap;
				1647	struct inode fdir, tdir;
				1648	__be32 err;
				1649	int host_err;
				1650
				1651	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
				1652	if (err)
				1653	goto out;
				1654	err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
				1655	if (err)
				1656	goto out;
				1657
				1658	fdentry = ffhp->fh_dentry;
				1659	fdir = d_inode(fdentry);
				1660
				1661	tdentry = tfhp->fh_dentry;
				1662	tdir = d_inode(tdentry);
				1663
				1664	err = nfserr_perm;
				1665	if (!flen \|\| isdotent(fname, flen) \|\| !tlen \|\| isdotent(tname, tlen))
				1666	goto out;
				1667
				1668	host_err = fh_want_write(ffhp);
				1669	if (host_err) {
				1670	err = nfserrno(host_err);
				1671	goto out;
				1672	}
				1673
				1674	/* cannot use fh_lock as we need deadlock protective ordering
				1675	* so do it by hand */
				1676	trap = lock_rename(tdentry, fdentry);
				1677	ffhp->fh_locked = tfhp->fh_locked = true;
				1678	fill_pre_wcc(ffhp);
				1679	fill_pre_wcc(tfhp);
				1680
				1681	odentry = lookup_one_len(fname, fdentry, flen);
				1682	host_err = PTR_ERR(odentry);
				1683	if (IS_ERR(odentry))
				1684	goto out_nfserr;
				1685
				1686	host_err = -ENOENT;
				1687	if (d_really_is_negative(odentry))
				1688	goto out_dput_old;
				1689	host_err = -EINVAL;
				1690	if (odentry == trap)
				1691	goto out_dput_old;
				1692
				1693	ndentry = lookup_one_len(tname, tdentry, tlen);
				1694	host_err = PTR_ERR(ndentry);
				1695	if (IS_ERR(ndentry))
				1696	goto out_dput_old;
				1697	host_err = -ENOTEMPTY;
				1698	if (ndentry == trap)
				1699	goto out_dput_new;
				1700
				1701	host_err = -EXDEV;
				1702	if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
				1703	goto out_dput_new;
				1704	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
				1705	goto out_dput_new;
				1706
				1707	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
				1708	if (!host_err) {
				1709	host_err = commit_metadata(tfhp);
				1710	if (!host_err)
				1711	host_err = commit_metadata(ffhp);
				1712	}
				1713	out_dput_new:
				1714	dput(ndentry);
				1715	out_dput_old:
				1716	dput(odentry);
				1717	out_nfserr:
				1718	err = nfserrno(host_err);
				1719	/*
				1720	* We cannot rely on fh_unlock on the two filehandles,
				1721	* as that would do the wrong thing if the two directories
				1722	* were the same, so again we do it by hand.
				1723	*/
				1724	fill_post_wcc(ffhp);
				1725	fill_post_wcc(tfhp);
				1726	unlock_rename(tdentry, fdentry);
				1727	ffhp->fh_locked = tfhp->fh_locked = false;
				1728	fh_drop_write(ffhp);
				1729
				1730	out:
				1731	return err;
				1732	}
				1733
				1734	/*
				1735	* Unlink a file or directory
				1736	* N.B. After this call fhp needs an fh_put
				1737	*/
				1738	__be32
				1739	nfsd_unlink(struct svc_rqst rqstp, struct svc_fh fhp, int type,
				1740	char *fname, int flen)
				1741	{
				1742	struct dentry dentry, rdentry;
				1743	struct inode *dirp;
				1744	__be32 err;
				1745	int host_err;
				1746
				1747	err = nfserr_acces;
				1748	if (!flen \|\| isdotent(fname, flen))
				1749	goto out;
				1750	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
				1751	if (err)
				1752	goto out;
				1753
				1754	host_err = fh_want_write(fhp);
				1755	if (host_err)
				1756	goto out_nfserr;
				1757
				1758	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1759	dentry = fhp->fh_dentry;
				1760	dirp = d_inode(dentry);
				1761
				1762	rdentry = lookup_one_len(fname, dentry, flen);
				1763	host_err = PTR_ERR(rdentry);
				1764	if (IS_ERR(rdentry))
				1765	goto out_nfserr;
				1766
				1767	if (d_really_is_negative(rdentry)) {
				1768	dput(rdentry);
				1769	err = nfserr_noent;
				1770	goto out;
				1771	}
				1772
				1773	if (!type)
				1774	type = d_inode(rdentry)->i_mode & S_IFMT;
				1775
				1776	if (type != S_IFDIR)
				1777	host_err = vfs_unlink(dirp, rdentry, NULL);
				1778	else
				1779	host_err = vfs_rmdir(dirp, rdentry);
				1780	if (!host_err)
				1781	host_err = commit_metadata(fhp);
				1782	dput(rdentry);
				1783
				1784	out_nfserr:
				1785	err = nfserrno(host_err);
				1786	out:
				1787	return err;
				1788	}
				1789
				1790	/*
				1791	* We do this buffering because we must not call back into the file
				1792	* system's ->lookup() method from the filldir callback. That may well
				1793	* deadlock a number of file systems.
				1794	*
				1795	* This is based heavily on the implementation of same in XFS.
				1796	*/
				1797	struct buffered_dirent {
				1798	u64 ino;
				1799	loff_t offset;
				1800	int namlen;
				1801	unsigned int d_type;
				1802	char name[];
				1803	};
				1804
				1805	struct readdir_data {
				1806	struct dir_context ctx;
				1807	char *dirent;
				1808	size_t used;
				1809	int full;
				1810	};
				1811
				1812	static int nfsd_buffered_filldir(struct dir_context ctx, const char name,
				1813	int namlen, loff_t offset, u64 ino,
				1814	unsigned int d_type)
				1815	{
				1816	struct readdir_data *buf =
				1817	container_of(ctx, struct readdir_data, ctx);
				1818	struct buffered_dirent de = (void )(buf->dirent + buf->used);
				1819	unsigned int reclen;
				1820
				1821	reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
				1822	if (buf->used + reclen > PAGE_SIZE) {
				1823	buf->full = 1;
				1824	return -EINVAL;
				1825	}
				1826
				1827	de->namlen = namlen;
				1828	de->offset = offset;
				1829	de->ino = ino;
				1830	de->d_type = d_type;
				1831	memcpy(de->name, name, namlen);
				1832	buf->used += reclen;
				1833
				1834	return 0;
				1835	}
				1836
				1837	static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
				1838	struct readdir_cd cdp, loff_t offsetp)
				1839	{
				1840	struct buffered_dirent *de;
				1841	int host_err;
				1842	int size;
				1843	loff_t offset;
				1844	struct readdir_data buf = {
				1845	.ctx.actor = nfsd_buffered_filldir,
				1846	.dirent = (void *)__get_free_page(GFP_KERNEL)
				1847	};
				1848
				1849	if (!buf.dirent)
				1850	return nfserrno(-ENOMEM);
				1851
				1852	offset = *offsetp;
				1853
				1854	while (1) {
				1855	unsigned int reclen;
				1856
				1857	cdp->err = nfserr_eof; /* will be cleared on successful read */
				1858	buf.used = 0;
				1859	buf.full = 0;
				1860
				1861	host_err = iterate_dir(file, &buf.ctx);
				1862	if (buf.full)
				1863	host_err = 0;
				1864
				1865	if (host_err < 0)
				1866	break;
				1867
				1868	size = buf.used;
				1869
				1870	if (!size)
				1871	break;
				1872
				1873	de = (struct buffered_dirent *)buf.dirent;
				1874	while (size > 0) {
				1875	offset = de->offset;
				1876
				1877	if (func(cdp, de->name, de->namlen, de->offset,
				1878	de->ino, de->d_type))
				1879	break;
				1880
				1881	if (cdp->err != nfs_ok)
				1882	break;
				1883
				1884	reclen = ALIGN(sizeof(*de) + de->namlen,
				1885	sizeof(u64));
				1886	size -= reclen;
				1887	de = (struct buffered_dirent )((char )de + reclen);
				1888	}
				1889	if (size > 0) /* We bailed out early */
				1890	break;
				1891
				1892	offset = vfs_llseek(file, 0, SEEK_CUR);
				1893	}
				1894
				1895	free_page((unsigned long)(buf.dirent));
				1896
				1897	if (host_err)
				1898	return nfserrno(host_err);
				1899
				1900	*offsetp = offset;
				1901	return cdp->err;
				1902	}
				1903
				1904	/*
				1905	* Read entries from a directory.
				1906	* The NFSv3/4 verifier we ignore for now.
				1907	*/
				1908	__be32
				1909	nfsd_readdir(struct svc_rqst rqstp, struct svc_fh fhp, loff_t *offsetp,
				1910	struct readdir_cd *cdp, nfsd_filldir_t func)
				1911	{
				1912	__be32 err;
				1913	struct file *file;
				1914	loff_t offset = *offsetp;
				1915	int may_flags = NFSD_MAY_READ;
				1916
				1917	/* NFSv2 only supports 32 bit cookies */
				1918	if (rqstp->rq_vers > 2)
				1919	may_flags \|= NFSD_MAY_64BIT_COOKIE;
				1920
				1921	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
				1922	if (err)
				1923	goto out;
				1924
				1925	offset = vfs_llseek(file, offset, SEEK_SET);
				1926	if (offset < 0) {
				1927	err = nfserrno((int)offset);
				1928	goto out_close;
				1929	}
				1930
				1931	err = nfsd_buffered_readdir(file, func, cdp, offsetp);
				1932
				1933	if (err == nfserr_eof \|\| err == nfserr_toosmall)
				1934	err = nfs_ok; /* can still be found in ->err */
				1935	out_close:
				1936	fput(file);
				1937	out:
				1938	return err;
				1939	}
				1940
				1941	/*
				1942	* Get file system stats
				1943	* N.B. After this call fhp needs an fh_put
				1944	*/
				1945	__be32
				1946	nfsd_statfs(struct svc_rqst rqstp, struct svc_fh fhp, struct kstatfs *stat, int access)
				1947	{
				1948	__be32 err;
				1949
				1950	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP \| access);
				1951	if (!err) {
				1952	struct path path = {
				1953	.mnt = fhp->fh_export->ex_path.mnt,
				1954	.dentry = fhp->fh_dentry,
				1955	};
				1956	if (vfs_statfs(&path, stat))
				1957	err = nfserr_io;
				1958	}
				1959	return err;
				1960	}
				1961
				1962	static int exp_rdonly(struct svc_rqst rqstp, struct svc_export exp)
				1963	{
				1964	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
				1965	}
				1966
				1967	/*
				1968	* Check for a user's access permissions to this inode.
				1969	*/
				1970	__be32
				1971	nfsd_permission(struct svc_rqst rqstp, struct svc_export exp,
				1972	struct dentry *dentry, int acc)
				1973	{
				1974	struct inode *inode = d_inode(dentry);
				1975	int err;
				1976
				1977	if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
				1978	return 0;
				1979	#if 0
				1980	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
				1981	acc,
				1982	(acc & NFSD_MAY_READ)? " read" : "",
				1983	(acc & NFSD_MAY_WRITE)? " write" : "",
				1984	(acc & NFSD_MAY_EXEC)? " exec" : "",
				1985	(acc & NFSD_MAY_SATTR)? " sattr" : "",
				1986	(acc & NFSD_MAY_TRUNC)? " trunc" : "",
				1987	(acc & NFSD_MAY_LOCK)? " lock" : "",
				1988	(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
				1989	inode->i_mode,
				1990	IS_IMMUTABLE(inode)? " immut" : "",
				1991	IS_APPEND(inode)? " append" : "",
				1992	__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
				1993	dprintk(" owner %d/%d user %d/%d\n",
				1994	inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
				1995	#endif
				1996
				1997	/* Normally we reject any write/sattr etc access on a read-only file
				1998	* system. But if it is IRIX doing check on write-access for a
				1999	* device special file, we ignore rofs.
				2000	*/
				2001	if (!(acc & NFSD_MAY_LOCAL_ACCESS))
				2002	if (acc & (NFSD_MAY_WRITE \| NFSD_MAY_SATTR \| NFSD_MAY_TRUNC)) {
				2003	if (exp_rdonly(rqstp, exp) \|\|
				2004	__mnt_is_readonly(exp->ex_path.mnt))
				2005	return nfserr_rofs;
				2006	if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
				2007	return nfserr_perm;
				2008	}
				2009	if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
				2010	return nfserr_perm;
				2011
				2012	if (acc & NFSD_MAY_LOCK) {
				2013	/* If we cannot rely on authentication in NLM requests,
				2014	* just allow locks, otherwise require read permission, or
				2015	* ownership
				2016	*/
				2017	if (exp->ex_flags & NFSEXP_NOAUTHNLM)
				2018	return 0;
				2019	else
				2020	acc = NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE;
				2021	}
				2022	/*
				2023	* The file owner always gets access permission for accesses that
				2024	* would normally be checked at open time. This is to make
				2025	* file access work even when the client has done a fchmod(fd, 0).
				2026	*
				2027	* However, `cp foo bar' should fail nevertheless when bar is
				2028	* readonly. A sensible way to do this might be to reject all
				2029	* attempts to truncate a read-only file, because a creat() call
				2030	* always implies file truncation.
				2031	* ... but this isn't really fair. A process may reasonably call
				2032	* ftruncate on an open file descriptor on a file with perm 000.
				2033	* We must trust the client to do permission checking - using "ACCESS"
				2034	* with NFSv3.
				2035	*/
				2036	if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
				2037	uid_eq(inode->i_uid, current_fsuid()))
				2038	return 0;
				2039
				2040	/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
				2041	err = inode_permission(inode, acc & (MAY_READ\|MAY_WRITE\|MAY_EXEC));
				2042
				2043	/* Allow read access to binaries even when mode 111 */
				2044	if (err == -EACCES && S_ISREG(inode->i_mode) &&
				2045	(acc == (NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE) \|\|
				2046	acc == (NFSD_MAY_READ \| NFSD_MAY_READ_IF_EXEC)))
				2047	err = inode_permission(inode, MAY_EXEC);
				2048
				2049	return err? nfserrno(err) : 0;
				2050	}
				2051
				2052	void
				2053	nfsd_racache_shutdown(void)
				2054	{
				2055	struct raparms raparm, last_raparm;
				2056	unsigned int i;
				2057
				2058	dprintk("nfsd: freeing readahead buffers.\n");
				2059
				2060	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2061	raparm = raparm_hash[i].pb_head;
				2062	while(raparm) {
				2063	last_raparm = raparm;
				2064	raparm = raparm->p_next;
				2065	kfree(last_raparm);
				2066	}
				2067	raparm_hash[i].pb_head = NULL;
				2068	}
				2069	}
				2070	/*
				2071	* Initialize readahead param cache
				2072	*/
				2073	int
				2074	nfsd_racache_init(int cache_size)
				2075	{
				2076	int i;
				2077	int j = 0;
				2078	int nperbucket;
				2079	struct raparms **raparm = NULL;
				2080
				2081
				2082	if (raparm_hash[0].pb_head)
				2083	return 0;
				2084	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
				2085	nperbucket = max(2, nperbucket);
				2086	cache_size = nperbucket * RAPARM_HASH_SIZE;
				2087
				2088	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
				2089
				2090	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2091	spin_lock_init(&raparm_hash[i].pb_lock);
				2092
				2093	raparm = &raparm_hash[i].pb_head;
				2094	for (j = 0; j < nperbucket; j++) {
				2095	*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
				2096	if (!*raparm)
				2097	goto out_nomem;
				2098	raparm = &(*raparm)->p_next;
				2099	}
				2100	*raparm = NULL;
				2101	}
				2102
				2103	nfsdstats.ra_size = cache_size;
				2104	return 0;
				2105
				2106	out_nomem:
				2107	dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
				2108	nfsd_racache_shutdown();
				2109	return -ENOMEM;
				2110	}