Blame - src/kernel/linux/v4.19/fs/nfsd/vfs.c - T800

blob: 4fe8db3149506c59e098812de14ccebab7acdf61 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* File operations used by nfsd. Some of these have been ripped from
				4	* other parts of the kernel because they weren't exported, others
				5	* are partial duplicates with added or changed functionality.
				6	*
				7	* Note that several functions dget() the dentry upon which they want
				8	* to act, most notably those that create directory entries. Response
				9	* dentry's are dput()'d if necessary in the release callback.
				10	* So if you notice code paths that apparently fail to dput() the
				11	* dentry, don't worry--they have been taken care of.
				12	*
				13	* Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
				14	* Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
				15	*/
				16
				17	#include <linux/fs.h>
				18	#include <linux/file.h>
				19	#include <linux/splice.h>
				20	#include <linux/falloc.h>
				21	#include <linux/fcntl.h>
				22	#include <linux/namei.h>
				23	#include <linux/delay.h>
				24	#include <linux/fsnotify.h>
				25	#include <linux/posix_acl_xattr.h>
				26	#include <linux/xattr.h>
				27	#include <linux/jhash.h>
				28	#include <linux/ima.h>
				29	#include <linux/slab.h>
				30	#include <linux/uaccess.h>
				31	#include <linux/exportfs.h>
				32	#include <linux/writeback.h>
				33	#include <linux/security.h>
				34
				35	#ifdef CONFIG_NFSD_V3
				36	#include "xdr3.h"
				37	#endif /* CONFIG_NFSD_V3 */
				38
				39	#ifdef CONFIG_NFSD_V4
				40	#include "../internal.h"
				41	#include "acl.h"
				42	#include "idmap.h"
				43	#endif /* CONFIG_NFSD_V4 */
				44
				45	#include "nfsd.h"
				46	#include "vfs.h"
				47	#include "trace.h"
				48
				49	#define NFSDDBG_FACILITY NFSDDBG_FILEOP
				50
				51
				52	/*
				53	* This is a cache of readahead params that help us choose the proper
				54	* readahead strategy. Initially, we set all readahead parameters to 0
				55	* and let the VFS handle things.
				56	* If you increase the number of cached files very much, you'll need to
				57	* add a hash table here.
				58	*/
				59	struct raparms {
				60	struct raparms *p_next;
				61	unsigned int p_count;
				62	ino_t p_ino;
				63	dev_t p_dev;
				64	int p_set;
				65	struct file_ra_state p_ra;
				66	unsigned int p_hindex;
				67	};
				68
				69	struct raparm_hbucket {
				70	struct raparms *pb_head;
				71	spinlock_t pb_lock;
				72	} ____cacheline_aligned_in_smp;
				73
				74	#define RAPARM_HASH_BITS 4
				75	#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
				76	#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
				77	static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
				78
				79	/*
				80	* Called from nfsd_lookup and encode_dirent. Check if we have crossed
				81	* a mount point.
				82	* Returns -EAGAIN or -ETIMEDOUT leaving dpp and expp unchanged,
				83	* or nfs_ok having possibly changed dpp and expp
				84	*/
				85	int
				86	nfsd_cross_mnt(struct svc_rqst rqstp, struct dentry *dpp,
				87	struct svc_export **expp)
				88	{
				89	struct svc_export exp = expp, *exp2 = NULL;
				90	struct dentry dentry = dpp;
				91	struct path path = {.mnt = mntget(exp->ex_path.mnt),
				92	.dentry = dget(dentry)};
				93	int err = 0;
				94
				95	err = follow_down(&path);
				96	if (err < 0)
				97	goto out;
				98	if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
				99	nfsd_mountpoint(dentry, exp) == 2) {
				100	/* This is only a mountpoint in some other namespace */
				101	path_put(&path);
				102	goto out;
				103	}
				104
				105	exp2 = rqst_exp_get_by_name(rqstp, &path);
				106	if (IS_ERR(exp2)) {
				107	err = PTR_ERR(exp2);
				108	/*
				109	* We normally allow NFS clients to continue
				110	* "underneath" a mountpoint that is not exported.
				111	* The exception is V4ROOT, where no traversal is ever
				112	* allowed without an explicit export of the new
				113	* directory.
				114	*/
				115	if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
				116	err = 0;
				117	path_put(&path);
				118	goto out;
				119	}
				120	if (nfsd_v4client(rqstp) \|\|
				121	(exp->ex_flags & NFSEXP_CROSSMOUNT) \|\| EX_NOHIDE(exp2)) {
				122	/* successfully crossed mount point */
				123	/*
				124	* This is subtle: path.dentry is not on path.mnt
				125	* at this point. The only reason we are safe is that
				126	* original mnt is pinned down by exp, so we should
				127	* put path before putting exp
				128	*/
				129	*dpp = path.dentry;
				130	path.dentry = dentry;
				131	*expp = exp2;
				132	exp2 = exp;
				133	}
				134	path_put(&path);
				135	exp_put(exp2);
				136	out:
				137	return err;
				138	}
				139
				140	static void follow_to_parent(struct path *path)
				141	{
				142	struct dentry *dp;
				143
				144	while (path->dentry == path->mnt->mnt_root && follow_up(path))
				145	;
				146	dp = dget_parent(path->dentry);
				147	dput(path->dentry);
				148	path->dentry = dp;
				149	}
				150
				151	static int nfsd_lookup_parent(struct svc_rqst rqstp, struct dentry dparent, struct svc_export exp, struct dentry dentryp)
				152	{
				153	struct svc_export *exp2;
				154	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
				155	.dentry = dget(dparent)};
				156
				157	follow_to_parent(&path);
				158
				159	exp2 = rqst_exp_parent(rqstp, &path);
				160	if (PTR_ERR(exp2) == -ENOENT) {
				161	*dentryp = dget(dparent);
				162	} else if (IS_ERR(exp2)) {
				163	path_put(&path);
				164	return PTR_ERR(exp2);
				165	} else {
				166	*dentryp = dget(path.dentry);
				167	exp_put(*exp);
				168	*exp = exp2;
				169	}
				170	path_put(&path);
				171	return 0;
				172	}
				173
				174	/*
				175	* For nfsd purposes, we treat V4ROOT exports as though there was an
				176	* export at every directory.
				177	* We return:
				178	* '1' if this dentry must be an export point,
				179	* '2' if it might be, if there is really a mount here, and
				180	* '0' if there is no chance of an export point here.
				181	*/
				182	int nfsd_mountpoint(struct dentry dentry, struct svc_export exp)
				183	{
				184	if (!d_inode(dentry))
				185	return 0;
				186	if (exp->ex_flags & NFSEXP_V4ROOT)
				187	return 1;
				188	if (nfsd4_is_junction(dentry))
				189	return 1;
				190	if (d_mountpoint(dentry))
				191	/*
				192	* Might only be a mountpoint in a different namespace,
				193	* but we need to check.
				194	*/
				195	return 2;
				196	return 0;
				197	}
				198
				199	__be32
				200	nfsd_lookup_dentry(struct svc_rqst rqstp, struct svc_fh fhp,
				201	const char *name, unsigned int len,
				202	struct svc_export exp_ret, struct dentry dentry_ret)
				203	{
				204	struct svc_export *exp;
				205	struct dentry *dparent;
				206	struct dentry *dentry;
				207	int host_err;
				208
				209	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
				210
				211	dparent = fhp->fh_dentry;
				212	exp = exp_get(fhp->fh_export);
				213
				214	/* Lookup the name, but don't follow links */
				215	if (isdotent(name, len)) {
				216	if (len==1)
				217	dentry = dget(dparent);
				218	else if (dparent != exp->ex_path.dentry)
				219	dentry = dget_parent(dparent);
				220	else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
				221	dentry = dget(dparent); /* .. == . just like at / */
				222	else {
				223	/* checking mountpoint crossing is very different when stepping up */
				224	host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
				225	if (host_err)
				226	goto out_nfserr;
				227	}
				228	} else {
				229	/*
				230	* In the nfsd4_open() case, this may be held across
				231	* subsequent open and delegation acquisition which may
				232	* need to take the child's i_mutex:
				233	*/
				234	fh_lock_nested(fhp, I_MUTEX_PARENT);
				235	dentry = lookup_one_len(name, dparent, len);
				236	host_err = PTR_ERR(dentry);
				237	if (IS_ERR(dentry))
				238	goto out_nfserr;
				239	if (nfsd_mountpoint(dentry, exp)) {
				240	/*
				241	* We don't need the i_mutex after all. It's
				242	* still possible we could open this (regular
				243	* files can be mountpoints too), but the
				244	* i_mutex is just there to prevent renames of
				245	* something that we might be about to delegate,
				246	* and a mountpoint won't be renamed:
				247	*/
				248	fh_unlock(fhp);
				249	if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
				250	dput(dentry);
				251	goto out_nfserr;
				252	}
				253	}
				254	}
				255	*dentry_ret = dentry;
				256	*exp_ret = exp;
				257	return 0;
				258
				259	out_nfserr:
				260	exp_put(exp);
				261	return nfserrno(host_err);
				262	}
				263
				264	/*
				265	* Look up one component of a pathname.
				266	* N.B. After this call _both_ fhp and resfh need an fh_put
				267	*
				268	* If the lookup would cross a mountpoint, and the mounted filesystem
				269	* is exported to the client with NFSEXP_NOHIDE, then the lookup is
				270	* accepted as it stands and the mounted directory is
				271	* returned. Otherwise the covered directory is returned.
				272	* NOTE: this mountpoint crossing is not supported properly by all
				273	* clients and is explicitly disallowed for NFSv3
				274	* NeilBrown <neilb@cse.unsw.edu.au>
				275	*/
				276	__be32
				277	nfsd_lookup(struct svc_rqst rqstp, struct svc_fh fhp, const char *name,
				278	unsigned int len, struct svc_fh *resfh)
				279	{
				280	struct svc_export *exp;
				281	struct dentry *dentry;
				282	__be32 err;
				283
				284	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				285	if (err)
				286	return err;
				287	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
				288	if (err)
				289	return err;
				290	err = check_nfsd_access(exp, rqstp);
				291	if (err)
				292	goto out;
				293	/*
				294	* Note: we compose the file handle now, but as the
				295	* dentry may be negative, it may need to be updated.
				296	*/
				297	err = fh_compose(resfh, exp, dentry, fhp);
				298	if (!err && d_really_is_negative(dentry))
				299	err = nfserr_noent;
				300	out:
				301	dput(dentry);
				302	exp_put(exp);
				303	return err;
				304	}
				305
				306	/*
				307	* Commit metadata changes to stable storage.
				308	*/
				309	static int
				310	commit_metadata(struct svc_fh *fhp)
				311	{
				312	struct inode *inode = d_inode(fhp->fh_dentry);
				313	const struct export_operations *export_ops = inode->i_sb->s_export_op;
				314
				315	if (!EX_ISSYNC(fhp->fh_export))
				316	return 0;
				317
				318	if (export_ops->commit_metadata)
				319	return export_ops->commit_metadata(inode);
				320	return sync_inode_metadata(inode, 1);
				321	}
				322
				323	/*
				324	* Go over the attributes and take care of the small differences between
				325	* NFS semantics and what Linux expects.
				326	*/
				327	static void
				328	nfsd_sanitize_attrs(struct inode inode, struct iattr iap)
				329	{
				330	/* sanitize the mode change */
				331	if (iap->ia_valid & ATTR_MODE) {
				332	iap->ia_mode &= S_IALLUGO;
				333	iap->ia_mode \|= (inode->i_mode & ~S_IALLUGO);
				334	}
				335
				336	/* Revoke setuid/setgid on chown */
				337	if (!S_ISDIR(inode->i_mode) &&
				338	((iap->ia_valid & ATTR_UID) \|\| (iap->ia_valid & ATTR_GID))) {
				339	iap->ia_valid \|= ATTR_KILL_PRIV;
				340	if (iap->ia_valid & ATTR_MODE) {
				341	/* we're setting mode too, just clear the sid bits /
				342	iap->ia_mode &= ~S_ISUID;
				343	if (iap->ia_mode & S_IXGRP)
				344	iap->ia_mode &= ~S_ISGID;
				345	} else {
				346	/* set ATTR_KILL_* bits and let VFS handle it */
				347	iap->ia_valid \|= (ATTR_KILL_SUID \| ATTR_KILL_SGID);
				348	}
				349	}
				350	}
				351
				352	static __be32
				353	nfsd_get_write_access(struct svc_rqst rqstp, struct svc_fh fhp,
				354	struct iattr *iap)
				355	{
				356	struct inode *inode = d_inode(fhp->fh_dentry);
				357	int host_err;
				358
				359	if (iap->ia_size < inode->i_size) {
				360	__be32 err;
				361
				362	err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
				363	NFSD_MAY_TRUNC \| NFSD_MAY_OWNER_OVERRIDE);
				364	if (err)
				365	return err;
				366	}
				367
				368	host_err = get_write_access(inode);
				369	if (host_err)
				370	goto out_nfserrno;
				371
				372	host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
				373	if (host_err)
				374	goto out_put_write_access;
				375	return 0;
				376
				377	out_put_write_access:
				378	put_write_access(inode);
				379	out_nfserrno:
				380	return nfserrno(host_err);
				381	}
				382
				383	/*
				384	* Set various file attributes. After this call fhp needs an fh_put.
				385	*/
				386	__be32
				387	nfsd_setattr(struct svc_rqst rqstp, struct svc_fh fhp, struct iattr *iap,
				388	int check_guard, time_t guardtime)
				389	{
				390	struct dentry *dentry;
				391	struct inode *inode;
				392	int accmode = NFSD_MAY_SATTR;
				393	umode_t ftype = 0;
				394	__be32 err;
				395	int host_err;
				396	bool get_write_count;
				397	bool size_change = (iap->ia_valid & ATTR_SIZE);
				398
				399	if (iap->ia_valid & ATTR_SIZE) {
				400	accmode \|= NFSD_MAY_WRITE\|NFSD_MAY_OWNER_OVERRIDE;
				401	ftype = S_IFREG;
				402	}
				403
				404	/*
				405	* If utimes(2) and friends are called with times not NULL, we should
				406	* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
				407	* will return EACCESS, when the caller's effective UID does not match
				408	* the owner of the file, and the caller is not privileged. In this
				409	* situation, we should return EPERM(notify_change will return this).
				410	*/
				411	if (iap->ia_valid & (ATTR_ATIME \| ATTR_MTIME)) {
				412	accmode \|= NFSD_MAY_OWNER_OVERRIDE;
				413	if (!(iap->ia_valid & (ATTR_ATIME_SET \| ATTR_MTIME_SET)))
				414	accmode \|= NFSD_MAY_WRITE;
				415	}
				416
				417	/* Callers that do fh_verify should do the fh_want_write: */
				418	get_write_count = !fhp->fh_dentry;
				419
				420	/* Get inode */
				421	err = fh_verify(rqstp, fhp, ftype, accmode);
				422	if (err)
				423	return err;
				424	if (get_write_count) {
				425	host_err = fh_want_write(fhp);
				426	if (host_err)
				427	goto out;
				428	}
				429
				430	dentry = fhp->fh_dentry;
				431	inode = d_inode(dentry);
				432
				433	/* Ignore any mode updates on symlinks */
				434	if (S_ISLNK(inode->i_mode))
				435	iap->ia_valid &= ~ATTR_MODE;
				436
				437	if (!iap->ia_valid)
				438	return 0;
				439
				440	nfsd_sanitize_attrs(inode, iap);
				441
				442	if (check_guard && guardtime != inode->i_ctime.tv_sec)
				443	return nfserr_notsync;
				444
				445	/*
				446	* The size case is special, it changes the file in addition to the
				447	* attributes, and file systems don't expect it to be mixed with
				448	* "random" attribute changes. We thus split out the size change
				449	* into a separate call to ->setattr, and do the rest as a separate
				450	* setattr call.
				451	*/
				452	if (size_change) {
				453	err = nfsd_get_write_access(rqstp, fhp, iap);
				454	if (err)
				455	return err;
				456	}
				457
				458	fh_lock(fhp);
				459	if (size_change) {
				460	/*
				461	* RFC5661, Section 18.30.4:
				462	* Changing the size of a file with SETATTR indirectly
				463	* changes the time_modify and change attributes.
				464	*
				465	* (and similar for the older RFCs)
				466	*/
				467	struct iattr size_attr = {
				468	.ia_valid = ATTR_SIZE \| ATTR_CTIME \| ATTR_MTIME,
				469	.ia_size = iap->ia_size,
				470	};
				471
				472	host_err = notify_change(dentry, &size_attr, NULL);
				473	if (host_err)
				474	goto out_unlock;
				475	iap->ia_valid &= ~ATTR_SIZE;
				476
				477	/*
				478	* Avoid the additional setattr call below if the only other
				479	* attribute that the client sends is the mtime, as we update
				480	* it as part of the size change above.
				481	*/
				482	if ((iap->ia_valid & ~ATTR_MTIME) == 0)
				483	goto out_unlock;
				484	}
				485
				486	iap->ia_valid \|= ATTR_CTIME;
				487	host_err = notify_change(dentry, iap, NULL);
				488
				489	out_unlock:
				490	fh_unlock(fhp);
				491	if (size_change)
				492	put_write_access(inode);
				493	out:
				494	if (!host_err)
				495	host_err = commit_metadata(fhp);
				496	return nfserrno(host_err);
				497	}
				498
				499	#if defined(CONFIG_NFSD_V4)
				500	/*
				501	* NFS junction information is stored in an extended attribute.
				502	*/
				503	#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
				504
				505	/**
				506	* nfsd4_is_junction - Test if an object could be an NFS junction
				507	*
				508	* @dentry: object to test
				509	*
				510	* Returns 1 if "dentry" appears to contain NFS junction information.
				511	* Otherwise 0 is returned.
				512	*/
				513	int nfsd4_is_junction(struct dentry *dentry)
				514	{
				515	struct inode *inode = d_inode(dentry);
				516
				517	if (inode == NULL)
				518	return 0;
				519	if (inode->i_mode & S_IXUGO)
				520	return 0;
				521	if (!(inode->i_mode & S_ISVTX))
				522	return 0;
				523	if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
				524	return 0;
				525	return 1;
				526	}
				527	#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
				528	__be32 nfsd4_set_nfs4_label(struct svc_rqst rqstp, struct svc_fh fhp,
				529	struct xdr_netobj *label)
				530	{
				531	__be32 error;
				532	int host_error;
				533	struct dentry *dentry;
				534
				535	error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
				536	if (error)
				537	return error;
				538
				539	dentry = fhp->fh_dentry;
				540
				541	inode_lock(d_inode(dentry));
				542	host_error = security_inode_setsecctx(dentry, label->data, label->len);
				543	inode_unlock(d_inode(dentry));
				544	return nfserrno(host_error);
				545	}
				546	#else
				547	__be32 nfsd4_set_nfs4_label(struct svc_rqst rqstp, struct svc_fh fhp,
				548	struct xdr_netobj *label)
				549	{
				550	return nfserr_notsupp;
				551	}
				552	#endif
				553
				554	__be32 nfsd4_clone_file_range(struct file src, u64 src_pos, struct file dst,
				555	u64 dst_pos, u64 count)
				556	{
				557	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
				558	count));
				559	}
				560
				561	ssize_t nfsd_copy_file_range(struct file src, u64 src_pos, struct file dst,
				562	u64 dst_pos, u64 count)
				563	{
				564
				565	/*
				566	* Limit copy to 4MB to prevent indefinitely blocking an nfsd
				567	* thread and client rpc slot. The choice of 4MB is somewhat
				568	* arbitrary. We might instead base this on r/wsize, or make it
				569	* tunable, or use a time instead of a byte limit, or implement
				570	* asynchronous copy. In theory a client could also recognize a
				571	* limit like this and pipeline multiple COPY requests.
				572	*/
				573	count = min_t(u64, count, 1 << 22);
				574	return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
				575	}
				576
				577	__be32 nfsd4_vfs_fallocate(struct svc_rqst rqstp, struct svc_fh fhp,
				578	struct file *file, loff_t offset, loff_t len,
				579	int flags)
				580	{
				581	int error;
				582
				583	if (!S_ISREG(file_inode(file)->i_mode))
				584	return nfserr_inval;
				585
				586	error = vfs_fallocate(file, flags, offset, len);
				587	if (!error)
				588	error = commit_metadata(fhp);
				589
				590	return nfserrno(error);
				591	}
				592	#endif /* defined(CONFIG_NFSD_V4) */
				593
				594	#ifdef CONFIG_NFSD_V3
				595	/*
				596	* Check server access rights to a file system object
				597	*/
				598	struct accessmap {
				599	u32 access;
				600	int how;
				601	};
				602	static struct accessmap nfs3_regaccess[] = {
				603	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				604	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				605	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_TRUNC },
				606	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
				607
				608	{ 0, 0 }
				609	};
				610
				611	static struct accessmap nfs3_diraccess[] = {
				612	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				613	{ NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
				614	{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC\|NFSD_MAY_WRITE\|NFSD_MAY_TRUNC},
				615	{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC\|NFSD_MAY_WRITE },
				616	{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
				617
				618	{ 0, 0 }
				619	};
				620
				621	static struct accessmap nfs3_anyaccess[] = {
				622	/* Some clients - Solaris 2.6 at least, make an access call
				623	* to the server to check for access for things like /dev/null
				624	* (which really, the server doesn't care about). So
				625	* We provide simple access checking for them, looking
				626	* mainly at mode bits, and we make sure to ignore read-only
				627	* filesystem checks
				628	*/
				629	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				630	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				631	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				632	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				633
				634	{ 0, 0 }
				635	};
				636
				637	__be32
				638	nfsd_access(struct svc_rqst rqstp, struct svc_fh fhp, u32 access, u32 supported)
				639	{
				640	struct accessmap *map;
				641	struct svc_export *export;
				642	struct dentry *dentry;
				643	u32 query, result = 0, sresult = 0;
				644	__be32 error;
				645
				646	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
				647	if (error)
				648	goto out;
				649
				650	export = fhp->fh_export;
				651	dentry = fhp->fh_dentry;
				652
				653	if (d_is_reg(dentry))
				654	map = nfs3_regaccess;
				655	else if (d_is_dir(dentry))
				656	map = nfs3_diraccess;
				657	else
				658	map = nfs3_anyaccess;
				659
				660
				661	query = *access;
				662	for (; map->access; map++) {
				663	if (map->access & query) {
				664	__be32 err2;
				665
				666	sresult \|= map->access;
				667
				668	err2 = nfsd_permission(rqstp, export, dentry, map->how);
				669	switch (err2) {
				670	case nfs_ok:
				671	result \|= map->access;
				672	break;
				673
				674	/* the following error codes just mean the access was not allowed,
				675	* rather than an error occurred */
				676	case nfserr_rofs:
				677	case nfserr_acces:
				678	case nfserr_perm:
				679	/* simply don't "or" in the access bit. */
				680	break;
				681	default:
				682	error = err2;
				683	goto out;
				684	}
				685	}
				686	}
				687	*access = result;
				688	if (supported)
				689	*supported = sresult;
				690
				691	out:
				692	return error;
				693	}
				694	#endif /* CONFIG_NFSD_V3 */
				695
				696	static int nfsd_open_break_lease(struct inode *inode, int access)
				697	{
				698	unsigned int mode;
				699
				700	if (access & NFSD_MAY_NOT_BREAK_LEASE)
				701	return 0;
				702	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
				703	return break_lease(inode, mode \| O_NONBLOCK);
				704	}
				705
				706	/*
				707	* Open an existing file or directory.
				708	* The may_flags argument indicates the type of open (read/write/lock)
				709	* and additional flags.
				710	* N.B. After this call fhp needs an fh_put
				711	*/
				712	__be32
				713	nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
				714	int may_flags, struct file **filp)
				715	{
				716	struct path path;
				717	struct inode *inode;
				718	struct file *file;
				719	int flags = O_RDONLY\|O_LARGEFILE;
				720	__be32 err;
				721	int host_err = 0;
				722
				723	validate_process_creds();
				724
				725	/*
				726	* If we get here, then the client has already done an "open",
				727	* and (hopefully) checked permission - so allow OWNER_OVERRIDE
				728	* in case a chmod has now revoked permission.
				729	*
				730	* Arguably we should also allow the owner override for
				731	* directories, but we never have and it doesn't seem to have
				732	* caused anyone a problem. If we were to change this, note
				733	* also that our filldir callbacks would need a variant of
				734	* lookup_one_len that doesn't check permissions.
				735	*/
				736	if (type == S_IFREG)
				737	may_flags \|= NFSD_MAY_OWNER_OVERRIDE;
				738	err = fh_verify(rqstp, fhp, type, may_flags);
				739	if (err)
				740	goto out;
				741
				742	path.mnt = fhp->fh_export->ex_path.mnt;
				743	path.dentry = fhp->fh_dentry;
				744	inode = d_inode(path.dentry);
				745
				746	/* Disallow write access to files with the append-only bit set
				747	* or any access when mandatory locking enabled
				748	*/
				749	err = nfserr_perm;
				750	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
				751	goto out;
				752	/*
				753	* We must ignore files (but only files) which might have mandatory
				754	* locks on them because there is no way to know if the accesser has
				755	* the lock.
				756	*/
				757	if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
				758	goto out;
				759
				760	if (!inode->i_fop)
				761	goto out;
				762
				763	host_err = nfsd_open_break_lease(inode, may_flags);
				764	if (host_err) /* NOMEM or WOULDBLOCK */
				765	goto out_nfserr;
				766
				767	if (may_flags & NFSD_MAY_WRITE) {
				768	if (may_flags & NFSD_MAY_READ)
				769	flags = O_RDWR\|O_LARGEFILE;
				770	else
				771	flags = O_WRONLY\|O_LARGEFILE;
				772	}
				773
				774	file = dentry_open(&path, flags, current_cred());
				775	if (IS_ERR(file)) {
				776	host_err = PTR_ERR(file);
				777	goto out_nfserr;
				778	}
				779
				780	host_err = ima_file_check(file, may_flags);
				781	if (host_err) {
				782	fput(file);
				783	goto out_nfserr;
				784	}
				785
				786	if (may_flags & NFSD_MAY_64BIT_COOKIE)
				787	file->f_mode \|= FMODE_64BITHASH;
				788	else
				789	file->f_mode \|= FMODE_32BITHASH;
				790
				791	*filp = file;
				792	out_nfserr:
				793	err = nfserrno(host_err);
				794	out:
				795	validate_process_creds();
				796	return err;
				797	}
				798
				799	struct raparms *
				800	nfsd_init_raparms(struct file *file)
				801	{
				802	struct inode *inode = file_inode(file);
				803	dev_t dev = inode->i_sb->s_dev;
				804	ino_t ino = inode->i_ino;
				805	struct raparms ra, rap, *frap = NULL;
				806	int depth = 0;
				807	unsigned int hash;
				808	struct raparm_hbucket *rab;
				809
				810	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
				811	rab = &raparm_hash[hash];
				812
				813	spin_lock(&rab->pb_lock);
				814	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
				815	if (ra->p_ino == ino && ra->p_dev == dev)
				816	goto found;
				817	depth++;
				818	if (ra->p_count == 0)
				819	frap = rap;
				820	}
				821	depth = nfsdstats.ra_size;
				822	if (!frap) {
				823	spin_unlock(&rab->pb_lock);
				824	return NULL;
				825	}
				826	rap = frap;
				827	ra = *frap;
				828	ra->p_dev = dev;
				829	ra->p_ino = ino;
				830	ra->p_set = 0;
				831	ra->p_hindex = hash;
				832	found:
				833	if (rap != &rab->pb_head) {
				834	*rap = ra->p_next;
				835	ra->p_next = rab->pb_head;
				836	rab->pb_head = ra;
				837	}
				838	ra->p_count++;
				839	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
				840	spin_unlock(&rab->pb_lock);
				841
				842	if (ra->p_set)
				843	file->f_ra = ra->p_ra;
				844	return ra;
				845	}
				846
				847	void nfsd_put_raparams(struct file file, struct raparms ra)
				848	{
				849	struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
				850
				851	spin_lock(&rab->pb_lock);
				852	ra->p_ra = file->f_ra;
				853	ra->p_set = 1;
				854	ra->p_count--;
				855	spin_unlock(&rab->pb_lock);
				856	}
				857
				858	/*
				859	* Grab and keep cached pages associated with a file in the svc_rqst
				860	* so that they can be passed to the network sendmsg/sendpage routines
				861	* directly. They will be released after the sending has completed.
				862	*/
				863	static int
				864	nfsd_splice_actor(struct pipe_inode_info pipe, struct pipe_buffer buf,
				865	struct splice_desc *sd)
				866	{
				867	struct svc_rqst *rqstp = sd->u.data;
				868	struct page **pp = rqstp->rq_next_page;
				869	struct page *page = buf->page;
				870	size_t size;
				871
				872	size = sd->len;
				873
				874	if (rqstp->rq_res.page_len == 0) {
				875	get_page(page);
				876	put_page(*rqstp->rq_next_page);
				877	*(rqstp->rq_next_page++) = page;
				878	rqstp->rq_res.page_base = buf->offset;
				879	rqstp->rq_res.page_len = size;
				880	} else if (page != pp[-1]) {
				881	get_page(page);
				882	if (*rqstp->rq_next_page)
				883	put_page(*rqstp->rq_next_page);
				884	*(rqstp->rq_next_page++) = page;
				885	rqstp->rq_res.page_len += size;
				886	} else
				887	rqstp->rq_res.page_len += size;
				888
				889	return size;
				890	}
				891
				892	static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
				893	struct splice_desc *sd)
				894	{
				895	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
				896	}
				897
				898	static __be32 nfsd_finish_read(struct svc_rqst rqstp, struct svc_fh fhp,
				899	struct file *file, loff_t offset,
				900	unsigned long *count, int host_err)
				901	{
				902	if (host_err >= 0) {
				903	nfsdstats.io_read += host_err;
				904	*count = host_err;
				905	fsnotify_access(file);
				906	trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
				907	return 0;
				908	} else {
				909	trace_nfsd_read_err(rqstp, fhp, offset, host_err);
				910	return nfserrno(host_err);
				911	}
				912	}
				913
				914	__be32 nfsd_splice_read(struct svc_rqst rqstp, struct svc_fh fhp,
				915	struct file file, loff_t offset, unsigned long count)
				916	{
				917	struct splice_desc sd = {
				918	.len = 0,
				919	.total_len = *count,
				920	.pos = offset,
				921	.u.data = rqstp,
				922	};
				923	int host_err;
				924
				925	trace_nfsd_read_splice(rqstp, fhp, offset, *count);
				926	rqstp->rq_next_page = rqstp->rq_respages + 1;
				927	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
				928	return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
				929	}
				930
				931	__be32 nfsd_readv(struct svc_rqst rqstp, struct svc_fh fhp,
				932	struct file *file, loff_t offset,
				933	struct kvec vec, int vlen, unsigned long count)
				934	{
				935	struct iov_iter iter;
				936	int host_err;
				937
				938	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
				939	iov_iter_kvec(&iter, READ \| ITER_KVEC, vec, vlen, *count);
				940	host_err = vfs_iter_read(file, &iter, &offset, 0);
				941	return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
				942	}
				943
				944	/*
				945	* Gathered writes: If another process is currently writing to the file,
				946	* there's a high chance this is another nfsd (triggered by a bulk write
				947	* from a client's biod). Rather than syncing the file with each write
				948	* request, we sleep for 10 msec.
				949	*
				950	* I don't know if this roughly approximates C. Juszak's idea of
				951	* gathered writes, but it's a nice and simple solution (IMHO), and it
				952	* seems to work:-)
				953	*
				954	* Note: we do this only in the NFSv2 case, since v3 and higher have a
				955	* better tool (separate unstable writes and commits) for solving this
				956	* problem.
				957	*/
				958	static int wait_for_concurrent_writes(struct file *file)
				959	{
				960	struct inode *inode = file_inode(file);
				961	static ino_t last_ino;
				962	static dev_t last_dev;
				963	int err = 0;
				964
				965	if (atomic_read(&inode->i_writecount) > 1
				966	\|\| (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
				967	dprintk("nfsd: write defer %d\n", task_pid_nr(current));
				968	msleep(10);
				969	dprintk("nfsd: write resume %d\n", task_pid_nr(current));
				970	}
				971
				972	if (inode->i_state & I_DIRTY) {
				973	dprintk("nfsd: write sync %d\n", task_pid_nr(current));
				974	err = vfs_fsync(file, 0);
				975	}
				976	last_ino = inode->i_ino;
				977	last_dev = inode->i_sb->s_dev;
				978	return err;
				979	}
				980
				981	__be32
				982	nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				983	loff_t offset, struct kvec *vec, int vlen,
				984	unsigned long *cnt, int stable)
				985	{
				986	struct svc_export *exp;
				987	struct iov_iter iter;
				988	__be32 nfserr;
				989	int host_err;
				990	int use_wgather;
				991	loff_t pos = offset;
				992	unsigned int pflags = current->flags;
				993	rwf_t flags = 0;
				994
				995	trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
				996
				997	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
				998	/*
				999	* We want less throttling in balance_dirty_pages()
				1000	* and shrink_inactive_list() so that nfs to
				1001	* localhost doesn't cause nfsd to lock up due to all
				1002	* the client's dirty pages or its congested queue.
				1003	*/
				1004	current->flags \|= PF_LESS_THROTTLE;
				1005
				1006	exp = fhp->fh_export;
				1007	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
				1008
				1009	if (!EX_ISSYNC(exp))
				1010	stable = NFS_UNSTABLE;
				1011
				1012	if (stable && !use_wgather)
				1013	flags \|= RWF_SYNC;
				1014
				1015	iov_iter_kvec(&iter, WRITE \| ITER_KVEC, vec, vlen, *cnt);
				1016	host_err = vfs_iter_write(file, &iter, &pos, flags);
				1017	if (host_err < 0)
				1018	goto out_nfserr;
				1019	nfsdstats.io_write += *cnt;
				1020	fsnotify_modify(file);
				1021
				1022	if (stable && use_wgather)
				1023	host_err = wait_for_concurrent_writes(file);
				1024
				1025	out_nfserr:
				1026	if (host_err >= 0) {
				1027	trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt);
				1028	nfserr = nfs_ok;
				1029	} else {
				1030	trace_nfsd_write_err(rqstp, fhp, offset, host_err);
				1031	nfserr = nfserrno(host_err);
				1032	}
				1033	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
				1034	current_restore_flags(pflags, PF_LESS_THROTTLE);
				1035	return nfserr;
				1036	}
				1037
				1038	/*
				1039	* Read data from a file. count must contain the requested read count
				1040	* on entry. On return, *count contains the number of bytes actually read.
				1041	* N.B. After this call fhp needs an fh_put
				1042	*/
				1043	__be32 nfsd_read(struct svc_rqst rqstp, struct svc_fh fhp,
				1044	loff_t offset, struct kvec vec, int vlen, unsigned long count)
				1045	{
				1046	struct file *file;
				1047	struct raparms *ra;
				1048	__be32 err;
				1049
				1050	trace_nfsd_read_start(rqstp, fhp, offset, *count);
				1051	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
				1052	if (err)
				1053	return err;
				1054
				1055	ra = nfsd_init_raparms(file);
				1056
				1057	if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
				1058	err = nfsd_splice_read(rqstp, fhp, file, offset, count);
				1059	else
				1060	err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
				1061
				1062	if (ra)
				1063	nfsd_put_raparams(file, ra);
				1064	fput(file);
				1065
				1066	trace_nfsd_read_done(rqstp, fhp, offset, *count);
				1067
				1068	return err;
				1069	}
				1070
				1071	/*
				1072	* Write data to a file.
				1073	* The stable flag requests synchronous writes.
				1074	* N.B. After this call fhp needs an fh_put
				1075	*/
				1076	__be32
				1077	nfsd_write(struct svc_rqst rqstp, struct svc_fh fhp, loff_t offset,
				1078	struct kvec vec, int vlen, unsigned long cnt, int stable)
				1079	{
				1080	struct file *file = NULL;
				1081	__be32 err = 0;
				1082
				1083	trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
				1084
				1085	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
				1086	if (err)
				1087	goto out;
				1088
				1089	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
				1090	fput(file);
				1091	out:
				1092	trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
				1093	return err;
				1094	}
				1095
				1096	#ifdef CONFIG_NFSD_V3
				1097	/*
				1098	* Commit all pending writes to stable storage.
				1099	*
				1100	* Note: we only guarantee that data that lies within the range specified
				1101	* by the 'offset' and 'count' parameters will be synced.
				1102	*
				1103	* Unfortunately we cannot lock the file to make sure we return full WCC
				1104	* data to the client, as locking happens lower down in the filesystem.
				1105	*/
				1106	__be32
				1107	nfsd_commit(struct svc_rqst rqstp, struct svc_fh fhp,
				1108	loff_t offset, unsigned long count)
				1109	{
				1110	struct file *file;
				1111	loff_t end = LLONG_MAX;
				1112	__be32 err = nfserr_inval;
				1113
				1114	if (offset < 0)
				1115	goto out;
				1116	if (count != 0) {
				1117	end = offset + (loff_t)count - 1;
				1118	if (end < offset)
				1119	goto out;
				1120	}
				1121
				1122	err = nfsd_open(rqstp, fhp, S_IFREG,
				1123	NFSD_MAY_WRITE\|NFSD_MAY_NOT_BREAK_LEASE, &file);
				1124	if (err)
				1125	goto out;
				1126	if (EX_ISSYNC(fhp->fh_export)) {
				1127	int err2 = vfs_fsync_range(file, offset, end, 0);
				1128
				1129	if (err2 != -EINVAL)
				1130	err = nfserrno(err2);
				1131	else
				1132	err = nfserr_notsupp;
				1133	}
				1134
				1135	fput(file);
				1136	out:
				1137	return err;
				1138	}
				1139	#endif /* CONFIG_NFSD_V3 */
				1140
				1141	static __be32
				1142	nfsd_create_setattr(struct svc_rqst rqstp, struct svc_fh resfhp,
				1143	struct iattr *iap)
				1144	{
				1145	/*
				1146	* Mode has already been set earlier in create:
				1147	*/
				1148	iap->ia_valid &= ~ATTR_MODE;
				1149	/*
				1150	* Setting uid/gid works only for root. Irix appears to
				1151	* send along the gid on create when it tries to implement
				1152	* setgid directories via NFS:
				1153	*/
				1154	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
				1155	iap->ia_valid &= ~(ATTR_UID\|ATTR_GID);
				1156	if (iap->ia_valid)
				1157	return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
				1158	/* Callers expect file metadata to be committed here */
				1159	return nfserrno(commit_metadata(resfhp));
				1160	}
				1161
				1162	/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
				1163	* setting size to 0 may fail for some specific file systems by the permission
				1164	* checking which requires WRITE permission but the mode is 000.
				1165	* we ignore the resizing(to 0) on the just new created file, since the size is
				1166	* 0 after file created.
				1167	*
				1168	* call this only after vfs_create() is called.
				1169	* */
				1170	static void
				1171	nfsd_check_ignore_resizing(struct iattr *iap)
				1172	{
				1173	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
				1174	iap->ia_valid &= ~ATTR_SIZE;
				1175	}
				1176
				1177	/* The parent directory should already be locked: */
				1178	__be32
				1179	nfsd_create_locked(struct svc_rqst rqstp, struct svc_fh fhp,
				1180	char fname, int flen, struct iattr iap,
				1181	int type, dev_t rdev, struct svc_fh *resfhp)
				1182	{
				1183	struct dentry dentry, dchild;
				1184	struct inode *dirp;
				1185	__be32 err;
				1186	__be32 err2;
				1187	int host_err;
				1188
				1189	dentry = fhp->fh_dentry;
				1190	dirp = d_inode(dentry);
				1191
				1192	dchild = dget(resfhp->fh_dentry);
				1193	if (!fhp->fh_locked) {
				1194	WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
				1195	dentry);
				1196	err = nfserr_io;
				1197	goto out;
				1198	}
				1199
				1200	err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
				1201	if (err)
				1202	goto out;
				1203
				1204	if (!(iap->ia_valid & ATTR_MODE))
				1205	iap->ia_mode = 0;
				1206	iap->ia_mode = (iap->ia_mode & S_IALLUGO) \| type;
				1207
				1208	err = 0;
				1209	host_err = 0;
				1210	switch (type) {
				1211	case S_IFREG:
				1212	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
				1213	if (!host_err)
				1214	nfsd_check_ignore_resizing(iap);
				1215	break;
				1216	case S_IFDIR:
				1217	host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
				1218	if (!host_err && unlikely(d_unhashed(dchild))) {
				1219	struct dentry *d;
				1220	d = lookup_one_len(dchild->d_name.name,
				1221	dchild->d_parent,
				1222	dchild->d_name.len);
				1223	if (IS_ERR(d)) {
				1224	host_err = PTR_ERR(d);
				1225	break;
				1226	}
				1227	if (unlikely(d_is_negative(d))) {
				1228	dput(d);
				1229	err = nfserr_serverfault;
				1230	goto out;
				1231	}
				1232	dput(resfhp->fh_dentry);
				1233	resfhp->fh_dentry = dget(d);
				1234	err = fh_update(resfhp);
				1235	dput(dchild);
				1236	dchild = d;
				1237	if (err)
				1238	goto out;
				1239	}
				1240	break;
				1241	case S_IFCHR:
				1242	case S_IFBLK:
				1243	case S_IFIFO:
				1244	case S_IFSOCK:
				1245	host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
				1246	break;
				1247	default:
				1248	printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
				1249	type);
				1250	host_err = -EINVAL;
				1251	}
				1252	if (host_err < 0)
				1253	goto out_nfserr;
				1254
				1255	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1256
				1257	/*
				1258	* nfsd_create_setattr already committed the child. Transactional
				1259	* filesystems had a chance to commit changes for both parent and
				1260	* child simultaneously making the following commit_metadata a
				1261	* noop.
				1262	*/
				1263	err2 = nfserrno(commit_metadata(fhp));
				1264	if (err2)
				1265	err = err2;
				1266	/*
				1267	* Update the file handle to get the new inode info.
				1268	*/
				1269	if (!err)
				1270	err = fh_update(resfhp);
				1271	out:
				1272	dput(dchild);
				1273	return err;
				1274
				1275	out_nfserr:
				1276	err = nfserrno(host_err);
				1277	goto out;
				1278	}
				1279
				1280	/*
				1281	* Create a filesystem object (regular, directory, special).
				1282	* Note that the parent directory is left locked.
				1283	*
				1284	* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
				1285	*/
				1286	__be32
				1287	nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1288	char fname, int flen, struct iattr iap,
				1289	int type, dev_t rdev, struct svc_fh *resfhp)
				1290	{
				1291	struct dentry dentry, dchild = NULL;
				1292	struct inode *dirp;
				1293	__be32 err;
				1294	int host_err;
				1295
				1296	if (isdotent(fname, flen))
				1297	return nfserr_exist;
				1298
				1299	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
				1300	if (err)
				1301	return err;
				1302
				1303	dentry = fhp->fh_dentry;
				1304	dirp = d_inode(dentry);
				1305
				1306	host_err = fh_want_write(fhp);
				1307	if (host_err)
				1308	return nfserrno(host_err);
				1309
				1310	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1311	dchild = lookup_one_len(fname, dentry, flen);
				1312	host_err = PTR_ERR(dchild);
				1313	if (IS_ERR(dchild))
				1314	return nfserrno(host_err);
				1315	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1316	/*
				1317	* We unconditionally drop our ref to dchild as fh_compose will have
				1318	* already grabbed its own ref for it.
				1319	*/
				1320	dput(dchild);
				1321	if (err)
				1322	return err;
				1323	return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
				1324	rdev, resfhp);
				1325	}
				1326
				1327	#ifdef CONFIG_NFSD_V3
				1328
				1329	/*
				1330	* NFSv3 and NFSv4 version of nfsd_create
				1331	*/
				1332	__be32
				1333	do_nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1334	char fname, int flen, struct iattr iap,
				1335	struct svc_fh resfhp, int createmode, u32 verifier,
				1336	bool truncp, bool created)
				1337	{
				1338	struct dentry dentry, dchild = NULL;
				1339	struct inode *dirp;
				1340	__be32 err;
				1341	int host_err;
				1342	__u32 v_mtime=0, v_atime=0;
				1343
				1344	err = nfserr_perm;
				1345	if (!flen)
				1346	goto out;
				1347	err = nfserr_exist;
				1348	if (isdotent(fname, flen))
				1349	goto out;
				1350	if (!(iap->ia_valid & ATTR_MODE))
				1351	iap->ia_mode = 0;
				1352	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				1353	if (err)
				1354	goto out;
				1355
				1356	dentry = fhp->fh_dentry;
				1357	dirp = d_inode(dentry);
				1358
				1359	host_err = fh_want_write(fhp);
				1360	if (host_err)
				1361	goto out_nfserr;
				1362
				1363	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1364
				1365	/*
				1366	* Compose the response file handle.
				1367	*/
				1368	dchild = lookup_one_len(fname, dentry, flen);
				1369	host_err = PTR_ERR(dchild);
				1370	if (IS_ERR(dchild))
				1371	goto out_nfserr;
				1372
				1373	/* If file doesn't exist, check for permissions to create one */
				1374	if (d_really_is_negative(dchild)) {
				1375	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1376	if (err)
				1377	goto out;
				1378	}
				1379
				1380	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1381	if (err)
				1382	goto out;
				1383
				1384	if (nfsd_create_is_exclusive(createmode)) {
				1385	/* solaris7 gets confused (bugid 4218508) if these have
				1386	* the high bit set, so just clear the high bits. If this is
				1387	* ever changed to use different attrs for storing the
				1388	* verifier, then do_open_lookup() will also need to be fixed
				1389	* accordingly.
				1390	*/
				1391	v_mtime = verifier[0]&0x7fffffff;
				1392	v_atime = verifier[1]&0x7fffffff;
				1393	}
				1394
				1395	if (d_really_is_positive(dchild)) {
				1396	err = 0;
				1397
				1398	switch (createmode) {
				1399	case NFS3_CREATE_UNCHECKED:
				1400	if (! d_is_reg(dchild))
				1401	goto out;
				1402	else if (truncp) {
				1403	/* in nfsv4, we need to treat this case a little
				1404	* differently. we don't want to truncate the
				1405	* file now; this would be wrong if the OPEN
				1406	* fails for some other reason. furthermore,
				1407	* if the size is nonzero, we should ignore it
				1408	* according to spec!
				1409	*/
				1410	*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
				1411	}
				1412	else {
				1413	iap->ia_valid &= ATTR_SIZE;
				1414	goto set_attr;
				1415	}
				1416	break;
				1417	case NFS3_CREATE_EXCLUSIVE:
				1418	if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
				1419	&& d_inode(dchild)->i_atime.tv_sec == v_atime
				1420	&& d_inode(dchild)->i_size == 0 ) {
				1421	if (created)
				1422	*created = 1;
				1423	break;
				1424	}
				1425	case NFS4_CREATE_EXCLUSIVE4_1:
				1426	if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
				1427	&& d_inode(dchild)->i_atime.tv_sec == v_atime
				1428	&& d_inode(dchild)->i_size == 0 ) {
				1429	if (created)
				1430	*created = 1;
				1431	goto set_attr;
				1432	}
				1433	/* fallthru */
				1434	case NFS3_CREATE_GUARDED:
				1435	err = nfserr_exist;
				1436	}
				1437	fh_drop_write(fhp);
				1438	goto out;
				1439	}
				1440
				1441	host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
				1442	if (host_err < 0) {
				1443	fh_drop_write(fhp);
				1444	goto out_nfserr;
				1445	}
				1446	if (created)
				1447	*created = 1;
				1448
				1449	nfsd_check_ignore_resizing(iap);
				1450
				1451	if (nfsd_create_is_exclusive(createmode)) {
				1452	/* Cram the verifier into atime/mtime */
				1453	iap->ia_valid = ATTR_MTIME\|ATTR_ATIME
				1454	\| ATTR_MTIME_SET\|ATTR_ATIME_SET;
				1455	/* XXX someone who knows this better please fix it for nsec */
				1456	iap->ia_mtime.tv_sec = v_mtime;
				1457	iap->ia_atime.tv_sec = v_atime;
				1458	iap->ia_mtime.tv_nsec = 0;
				1459	iap->ia_atime.tv_nsec = 0;
				1460	}
				1461
				1462	set_attr:
				1463	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1464
				1465	/*
				1466	* nfsd_create_setattr already committed the child
				1467	* (and possibly also the parent).
				1468	*/
				1469	if (!err)
				1470	err = nfserrno(commit_metadata(fhp));
				1471
				1472	/*
				1473	* Update the filehandle to get the new inode info.
				1474	*/
				1475	if (!err)
				1476	err = fh_update(resfhp);
				1477
				1478	out:
				1479	fh_unlock(fhp);
				1480	if (dchild && !IS_ERR(dchild))
				1481	dput(dchild);
				1482	fh_drop_write(fhp);
				1483	return err;
				1484
				1485	out_nfserr:
				1486	err = nfserrno(host_err);
				1487	goto out;
				1488	}
				1489	#endif /* CONFIG_NFSD_V3 */
				1490
				1491	/*
				1492	* Read a symlink. On entry, *lenp must contain the maximum path length that
				1493	* fits into the buffer. On return, it contains the true length.
				1494	* N.B. After this call fhp needs an fh_put
				1495	*/
				1496	__be32
				1497	nfsd_readlink(struct svc_rqst rqstp, struct svc_fh fhp, char buf, int lenp)
				1498	{
				1499	__be32 err;
				1500	const char *link;
				1501	struct path path;
				1502	DEFINE_DELAYED_CALL(done);
				1503	int len;
				1504
				1505	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
				1506	if (unlikely(err))
				1507	return err;
				1508
				1509	path.mnt = fhp->fh_export->ex_path.mnt;
				1510	path.dentry = fhp->fh_dentry;
				1511
				1512	if (unlikely(!d_is_symlink(path.dentry)))
				1513	return nfserr_inval;
				1514
				1515	touch_atime(&path);
				1516
				1517	link = vfs_get_link(path.dentry, &done);
				1518	if (IS_ERR(link))
				1519	return nfserrno(PTR_ERR(link));
				1520
				1521	len = strlen(link);
				1522	if (len < *lenp)
				1523	*lenp = len;
				1524	memcpy(buf, link, *lenp);
				1525	do_delayed_call(&done);
				1526	return 0;
				1527	}
				1528
				1529	/*
				1530	* Create a symlink and look up its inode
				1531	* N.B. After this call _both_ fhp and resfhp need an fh_put
				1532	*/
				1533	__be32
				1534	nfsd_symlink(struct svc_rqst rqstp, struct svc_fh fhp,
				1535	char *fname, int flen,
				1536	char *path,
				1537	struct svc_fh *resfhp)
				1538	{
				1539	struct dentry dentry, dnew;
				1540	__be32 err, cerr;
				1541	int host_err;
				1542
				1543	err = nfserr_noent;
				1544	if (!flen \|\| path[0] == '\0')
				1545	goto out;
				1546	err = nfserr_exist;
				1547	if (isdotent(fname, flen))
				1548	goto out;
				1549
				1550	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1551	if (err)
				1552	goto out;
				1553
				1554	host_err = fh_want_write(fhp);
				1555	if (host_err)
				1556	goto out_nfserr;
				1557
				1558	fh_lock(fhp);
				1559	dentry = fhp->fh_dentry;
				1560	dnew = lookup_one_len(fname, dentry, flen);
				1561	host_err = PTR_ERR(dnew);
				1562	if (IS_ERR(dnew))
				1563	goto out_nfserr;
				1564
				1565	host_err = vfs_symlink(d_inode(dentry), dnew, path);
				1566	err = nfserrno(host_err);
				1567	if (!err)
				1568	err = nfserrno(commit_metadata(fhp));
				1569	fh_unlock(fhp);
				1570
				1571	fh_drop_write(fhp);
				1572
				1573	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
				1574	dput(dnew);
				1575	if (err==0) err = cerr;
				1576	out:
				1577	return err;
				1578
				1579	out_nfserr:
				1580	err = nfserrno(host_err);
				1581	goto out;
				1582	}
				1583
				1584	/*
				1585	* Create a hardlink
				1586	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1587	*/
				1588	__be32
				1589	nfsd_link(struct svc_rqst rqstp, struct svc_fh ffhp,
				1590	char name, int len, struct svc_fh tfhp)
				1591	{
				1592	struct dentry ddir, dnew, *dold;
				1593	struct inode *dirp;
				1594	__be32 err;
				1595	int host_err;
				1596
				1597	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
				1598	if (err)
				1599	goto out;
				1600	err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
				1601	if (err)
				1602	goto out;
				1603	err = nfserr_isdir;
				1604	if (d_is_dir(tfhp->fh_dentry))
				1605	goto out;
				1606	err = nfserr_perm;
				1607	if (!len)
				1608	goto out;
				1609	err = nfserr_exist;
				1610	if (isdotent(name, len))
				1611	goto out;
				1612
				1613	host_err = fh_want_write(tfhp);
				1614	if (host_err) {
				1615	err = nfserrno(host_err);
				1616	goto out;
				1617	}
				1618
				1619	fh_lock_nested(ffhp, I_MUTEX_PARENT);
				1620	ddir = ffhp->fh_dentry;
				1621	dirp = d_inode(ddir);
				1622
				1623	dnew = lookup_one_len(name, ddir, len);
				1624	host_err = PTR_ERR(dnew);
				1625	if (IS_ERR(dnew))
				1626	goto out_nfserr;
				1627
				1628	dold = tfhp->fh_dentry;
				1629
				1630	err = nfserr_noent;
				1631	if (d_really_is_negative(dold))
				1632	goto out_dput;
				1633	host_err = vfs_link(dold, dirp, dnew, NULL);
				1634	if (!host_err) {
				1635	err = nfserrno(commit_metadata(ffhp));
				1636	if (!err)
				1637	err = nfserrno(commit_metadata(tfhp));
				1638	} else {
				1639	if (host_err == -EXDEV && rqstp->rq_vers == 2)
				1640	err = nfserr_acces;
				1641	else
				1642	err = nfserrno(host_err);
				1643	}
				1644	out_dput:
				1645	dput(dnew);
				1646	out_unlock:
				1647	fh_unlock(ffhp);
				1648	fh_drop_write(tfhp);
				1649	out:
				1650	return err;
				1651
				1652	out_nfserr:
				1653	err = nfserrno(host_err);
				1654	goto out_unlock;
				1655	}
				1656
				1657	/*
				1658	* Rename a file
				1659	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1660	*/
				1661	__be32
				1662	nfsd_rename(struct svc_rqst rqstp, struct svc_fh ffhp, char *fname, int flen,
				1663	struct svc_fh tfhp, char tname, int tlen)
				1664	{
				1665	struct dentry fdentry, tdentry, odentry, ndentry, *trap;
				1666	struct inode fdir, tdir;
				1667	__be32 err;
				1668	int host_err;
				1669
				1670	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
				1671	if (err)
				1672	goto out;
				1673	err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
				1674	if (err)
				1675	goto out;
				1676
				1677	fdentry = ffhp->fh_dentry;
				1678	fdir = d_inode(fdentry);
				1679
				1680	tdentry = tfhp->fh_dentry;
				1681	tdir = d_inode(tdentry);
				1682
				1683	err = nfserr_perm;
				1684	if (!flen \|\| isdotent(fname, flen) \|\| !tlen \|\| isdotent(tname, tlen))
				1685	goto out;
				1686
				1687	host_err = fh_want_write(ffhp);
				1688	if (host_err) {
				1689	err = nfserrno(host_err);
				1690	goto out;
				1691	}
				1692
				1693	/* cannot use fh_lock as we need deadlock protective ordering
				1694	* so do it by hand */
				1695	trap = lock_rename(tdentry, fdentry);
				1696	ffhp->fh_locked = tfhp->fh_locked = true;
				1697	fill_pre_wcc(ffhp);
				1698	fill_pre_wcc(tfhp);
				1699
				1700	odentry = lookup_one_len(fname, fdentry, flen);
				1701	host_err = PTR_ERR(odentry);
				1702	if (IS_ERR(odentry))
				1703	goto out_nfserr;
				1704
				1705	host_err = -ENOENT;
				1706	if (d_really_is_negative(odentry))
				1707	goto out_dput_old;
				1708	host_err = -EINVAL;
				1709	if (odentry == trap)
				1710	goto out_dput_old;
				1711
				1712	ndentry = lookup_one_len(tname, tdentry, tlen);
				1713	host_err = PTR_ERR(ndentry);
				1714	if (IS_ERR(ndentry))
				1715	goto out_dput_old;
				1716	host_err = -ENOTEMPTY;
				1717	if (ndentry == trap)
				1718	goto out_dput_new;
				1719
				1720	host_err = -EXDEV;
				1721	if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
				1722	goto out_dput_new;
				1723	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
				1724	goto out_dput_new;
				1725
				1726	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
				1727	if (!host_err) {
				1728	host_err = commit_metadata(tfhp);
				1729	if (!host_err)
				1730	host_err = commit_metadata(ffhp);
				1731	}
				1732	out_dput_new:
				1733	dput(ndentry);
				1734	out_dput_old:
				1735	dput(odentry);
				1736	out_nfserr:
				1737	err = nfserrno(host_err);
				1738	/*
				1739	* We cannot rely on fh_unlock on the two filehandles,
				1740	* as that would do the wrong thing if the two directories
				1741	* were the same, so again we do it by hand.
				1742	*/
				1743	fill_post_wcc(ffhp);
				1744	fill_post_wcc(tfhp);
				1745	unlock_rename(tdentry, fdentry);
				1746	ffhp->fh_locked = tfhp->fh_locked = false;
				1747	fh_drop_write(ffhp);
				1748
				1749	out:
				1750	return err;
				1751	}
				1752
				1753	/*
				1754	* Unlink a file or directory
				1755	* N.B. After this call fhp needs an fh_put
				1756	*/
				1757	__be32
				1758	nfsd_unlink(struct svc_rqst rqstp, struct svc_fh fhp, int type,
				1759	char *fname, int flen)
				1760	{
				1761	struct dentry dentry, rdentry;
				1762	struct inode *dirp;
				1763	__be32 err;
				1764	int host_err;
				1765
				1766	err = nfserr_acces;
				1767	if (!flen \|\| isdotent(fname, flen))
				1768	goto out;
				1769	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
				1770	if (err)
				1771	goto out;
				1772
				1773	host_err = fh_want_write(fhp);
				1774	if (host_err)
				1775	goto out_nfserr;
				1776
				1777	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1778	dentry = fhp->fh_dentry;
				1779	dirp = d_inode(dentry);
				1780
				1781	rdentry = lookup_one_len(fname, dentry, flen);
				1782	host_err = PTR_ERR(rdentry);
				1783	if (IS_ERR(rdentry))
				1784	goto out_nfserr;
				1785
				1786	if (d_really_is_negative(rdentry)) {
				1787	dput(rdentry);
				1788	err = nfserr_noent;
				1789	goto out;
				1790	}
				1791
				1792	if (!type)
				1793	type = d_inode(rdentry)->i_mode & S_IFMT;
				1794
				1795	if (type != S_IFDIR)
				1796	host_err = vfs_unlink(dirp, rdentry, NULL);
				1797	else
				1798	host_err = vfs_rmdir(dirp, rdentry);
				1799	if (!host_err)
				1800	host_err = commit_metadata(fhp);
				1801	dput(rdentry);
				1802
				1803	out_nfserr:
				1804	err = nfserrno(host_err);
				1805	out:
				1806	return err;
				1807	}
				1808
				1809	/*
				1810	* We do this buffering because we must not call back into the file
				1811	* system's ->lookup() method from the filldir callback. That may well
				1812	* deadlock a number of file systems.
				1813	*
				1814	* This is based heavily on the implementation of same in XFS.
				1815	*/
				1816	struct buffered_dirent {
				1817	u64 ino;
				1818	loff_t offset;
				1819	int namlen;
				1820	unsigned int d_type;
				1821	char name[];
				1822	};
				1823
				1824	struct readdir_data {
				1825	struct dir_context ctx;
				1826	char *dirent;
				1827	size_t used;
				1828	int full;
				1829	};
				1830
				1831	static int nfsd_buffered_filldir(struct dir_context ctx, const char name,
				1832	int namlen, loff_t offset, u64 ino,
				1833	unsigned int d_type)
				1834	{
				1835	struct readdir_data *buf =
				1836	container_of(ctx, struct readdir_data, ctx);
				1837	struct buffered_dirent de = (void )(buf->dirent + buf->used);
				1838	unsigned int reclen;
				1839
				1840	reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
				1841	if (buf->used + reclen > PAGE_SIZE) {
				1842	buf->full = 1;
				1843	return -EINVAL;
				1844	}
				1845
				1846	de->namlen = namlen;
				1847	de->offset = offset;
				1848	de->ino = ino;
				1849	de->d_type = d_type;
				1850	memcpy(de->name, name, namlen);
				1851	buf->used += reclen;
				1852
				1853	return 0;
				1854	}
				1855
				1856	static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
				1857	struct readdir_cd cdp, loff_t offsetp)
				1858	{
				1859	struct buffered_dirent *de;
				1860	int host_err;
				1861	int size;
				1862	loff_t offset;
				1863	struct readdir_data buf = {
				1864	.ctx.actor = nfsd_buffered_filldir,
				1865	.dirent = (void *)__get_free_page(GFP_KERNEL)
				1866	};
				1867
				1868	if (!buf.dirent)
				1869	return nfserrno(-ENOMEM);
				1870
				1871	offset = *offsetp;
				1872
				1873	while (1) {
				1874	unsigned int reclen;
				1875
				1876	cdp->err = nfserr_eof; /* will be cleared on successful read */
				1877	buf.used = 0;
				1878	buf.full = 0;
				1879
				1880	host_err = iterate_dir(file, &buf.ctx);
				1881	if (buf.full)
				1882	host_err = 0;
				1883
				1884	if (host_err < 0)
				1885	break;
				1886
				1887	size = buf.used;
				1888
				1889	if (!size)
				1890	break;
				1891
				1892	de = (struct buffered_dirent *)buf.dirent;
				1893	while (size > 0) {
				1894	offset = de->offset;
				1895
				1896	if (func(cdp, de->name, de->namlen, de->offset,
				1897	de->ino, de->d_type))
				1898	break;
				1899
				1900	if (cdp->err != nfs_ok)
				1901	break;
				1902
				1903	reclen = ALIGN(sizeof(*de) + de->namlen,
				1904	sizeof(u64));
				1905	size -= reclen;
				1906	de = (struct buffered_dirent )((char )de + reclen);
				1907	}
				1908	if (size > 0) /* We bailed out early */
				1909	break;
				1910
				1911	offset = vfs_llseek(file, 0, SEEK_CUR);
				1912	}
				1913
				1914	free_page((unsigned long)(buf.dirent));
				1915
				1916	if (host_err)
				1917	return nfserrno(host_err);
				1918
				1919	*offsetp = offset;
				1920	return cdp->err;
				1921	}
				1922
				1923	/*
				1924	* Read entries from a directory.
				1925	* The NFSv3/4 verifier we ignore for now.
				1926	*/
				1927	__be32
				1928	nfsd_readdir(struct svc_rqst rqstp, struct svc_fh fhp, loff_t *offsetp,
				1929	struct readdir_cd *cdp, nfsd_filldir_t func)
				1930	{
				1931	__be32 err;
				1932	struct file *file;
				1933	loff_t offset = *offsetp;
				1934	int may_flags = NFSD_MAY_READ;
				1935
				1936	/* NFSv2 only supports 32 bit cookies */
				1937	if (rqstp->rq_vers > 2)
				1938	may_flags \|= NFSD_MAY_64BIT_COOKIE;
				1939
				1940	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
				1941	if (err)
				1942	goto out;
				1943
				1944	offset = vfs_llseek(file, offset, SEEK_SET);
				1945	if (offset < 0) {
				1946	err = nfserrno((int)offset);
				1947	goto out_close;
				1948	}
				1949
				1950	err = nfsd_buffered_readdir(file, func, cdp, offsetp);
				1951
				1952	if (err == nfserr_eof \|\| err == nfserr_toosmall)
				1953	err = nfs_ok; /* can still be found in ->err */
				1954	out_close:
				1955	fput(file);
				1956	out:
				1957	return err;
				1958	}
				1959
				1960	/*
				1961	* Get file system stats
				1962	* N.B. After this call fhp needs an fh_put
				1963	*/
				1964	__be32
				1965	nfsd_statfs(struct svc_rqst rqstp, struct svc_fh fhp, struct kstatfs *stat, int access)
				1966	{
				1967	__be32 err;
				1968
				1969	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP \| access);
				1970	if (!err) {
				1971	struct path path = {
				1972	.mnt = fhp->fh_export->ex_path.mnt,
				1973	.dentry = fhp->fh_dentry,
				1974	};
				1975	if (vfs_statfs(&path, stat))
				1976	err = nfserr_io;
				1977	}
				1978	return err;
				1979	}
				1980
				1981	static int exp_rdonly(struct svc_rqst rqstp, struct svc_export exp)
				1982	{
				1983	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
				1984	}
				1985
				1986	/*
				1987	* Check for a user's access permissions to this inode.
				1988	*/
				1989	__be32
				1990	nfsd_permission(struct svc_rqst rqstp, struct svc_export exp,
				1991	struct dentry *dentry, int acc)
				1992	{
				1993	struct inode *inode = d_inode(dentry);
				1994	int err;
				1995
				1996	if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
				1997	return 0;
				1998	#if 0
				1999	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
				2000	acc,
				2001	(acc & NFSD_MAY_READ)? " read" : "",
				2002	(acc & NFSD_MAY_WRITE)? " write" : "",
				2003	(acc & NFSD_MAY_EXEC)? " exec" : "",
				2004	(acc & NFSD_MAY_SATTR)? " sattr" : "",
				2005	(acc & NFSD_MAY_TRUNC)? " trunc" : "",
				2006	(acc & NFSD_MAY_LOCK)? " lock" : "",
				2007	(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
				2008	inode->i_mode,
				2009	IS_IMMUTABLE(inode)? " immut" : "",
				2010	IS_APPEND(inode)? " append" : "",
				2011	__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
				2012	dprintk(" owner %d/%d user %d/%d\n",
				2013	inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
				2014	#endif
				2015
				2016	/* Normally we reject any write/sattr etc access on a read-only file
				2017	* system. But if it is IRIX doing check on write-access for a
				2018	* device special file, we ignore rofs.
				2019	*/
				2020	if (!(acc & NFSD_MAY_LOCAL_ACCESS))
				2021	if (acc & (NFSD_MAY_WRITE \| NFSD_MAY_SATTR \| NFSD_MAY_TRUNC)) {
				2022	if (exp_rdonly(rqstp, exp) \|\|
				2023	__mnt_is_readonly(exp->ex_path.mnt))
				2024	return nfserr_rofs;
				2025	if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
				2026	return nfserr_perm;
				2027	}
				2028	if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
				2029	return nfserr_perm;
				2030
				2031	if (acc & NFSD_MAY_LOCK) {
				2032	/* If we cannot rely on authentication in NLM requests,
				2033	* just allow locks, otherwise require read permission, or
				2034	* ownership
				2035	*/
				2036	if (exp->ex_flags & NFSEXP_NOAUTHNLM)
				2037	return 0;
				2038	else
				2039	acc = NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE;
				2040	}
				2041	/*
				2042	* The file owner always gets access permission for accesses that
				2043	* would normally be checked at open time. This is to make
				2044	* file access work even when the client has done a fchmod(fd, 0).
				2045	*
				2046	* However, `cp foo bar' should fail nevertheless when bar is
				2047	* readonly. A sensible way to do this might be to reject all
				2048	* attempts to truncate a read-only file, because a creat() call
				2049	* always implies file truncation.
				2050	* ... but this isn't really fair. A process may reasonably call
				2051	* ftruncate on an open file descriptor on a file with perm 000.
				2052	* We must trust the client to do permission checking - using "ACCESS"
				2053	* with NFSv3.
				2054	*/
				2055	if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
				2056	uid_eq(inode->i_uid, current_fsuid()))
				2057	return 0;
				2058
				2059	/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
				2060	err = inode_permission(inode, acc & (MAY_READ\|MAY_WRITE\|MAY_EXEC));
				2061
				2062	/* Allow read access to binaries even when mode 111 */
				2063	if (err == -EACCES && S_ISREG(inode->i_mode) &&
				2064	(acc == (NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE) \|\|
				2065	acc == (NFSD_MAY_READ \| NFSD_MAY_READ_IF_EXEC)))
				2066	err = inode_permission(inode, MAY_EXEC);
				2067
				2068	return err? nfserrno(err) : 0;
				2069	}
				2070
				2071	void
				2072	nfsd_racache_shutdown(void)
				2073	{
				2074	struct raparms raparm, last_raparm;
				2075	unsigned int i;
				2076
				2077	dprintk("nfsd: freeing readahead buffers.\n");
				2078
				2079	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2080	raparm = raparm_hash[i].pb_head;
				2081	while(raparm) {
				2082	last_raparm = raparm;
				2083	raparm = raparm->p_next;
				2084	kfree(last_raparm);
				2085	}
				2086	raparm_hash[i].pb_head = NULL;
				2087	}
				2088	}
				2089	/*
				2090	* Initialize readahead param cache
				2091	*/
				2092	int
				2093	nfsd_racache_init(int cache_size)
				2094	{
				2095	int i;
				2096	int j = 0;
				2097	int nperbucket;
				2098	struct raparms **raparm = NULL;
				2099
				2100
				2101	if (raparm_hash[0].pb_head)
				2102	return 0;
				2103	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
				2104	nperbucket = max(2, nperbucket);
				2105	cache_size = nperbucket * RAPARM_HASH_SIZE;
				2106
				2107	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
				2108
				2109	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2110	spin_lock_init(&raparm_hash[i].pb_lock);
				2111
				2112	raparm = &raparm_hash[i].pb_head;
				2113	for (j = 0; j < nperbucket; j++) {
				2114	*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
				2115	if (!*raparm)
				2116	goto out_nomem;
				2117	raparm = &(*raparm)->p_next;
				2118	}
				2119	*raparm = NULL;
				2120	}
				2121
				2122	nfsdstats.ra_size = cache_size;
				2123	return 0;
				2124
				2125	out_nomem:
				2126	dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
				2127	nfsd_racache_shutdown();
				2128	return -ENOMEM;
				2129	}