Blame - ap/os/linux/linux-3.4.x/fs/nfsd/vfs.c - T106_DC

blob: 36620e65158b487c1e13e17168053ed187146d33 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/*
				2	* File operations used by nfsd. Some of these have been ripped from
				3	* other parts of the kernel because they weren't exported, others
				4	* are partial duplicates with added or changed functionality.
				5	*
				6	* Note that several functions dget() the dentry upon which they want
				7	* to act, most notably those that create directory entries. Response
				8	* dentry's are dput()'d if necessary in the release callback.
				9	* So if you notice code paths that apparently fail to dput() the
				10	* dentry, don't worry--they have been taken care of.
				11	*
				12	* Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
				13	* Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
				14	*/
				15
				16	#include <linux/fs.h>
				17	#include <linux/file.h>
				18	#include <linux/splice.h>
				19	#include <linux/fcntl.h>
				20	#include <linux/namei.h>
				21	#include <linux/delay.h>
				22	#include <linux/fsnotify.h>
				23	#include <linux/posix_acl_xattr.h>
				24	#include <linux/xattr.h>
				25	#include <linux/jhash.h>
				26	#include <linux/ima.h>
				27	#include <linux/slab.h>
				28	#include <asm/uaccess.h>
				29	#include <linux/exportfs.h>
				30	#include <linux/writeback.h>
				31
				32	#ifdef CONFIG_NFSD_V3
				33	#include "xdr3.h"
				34	#endif /* CONFIG_NFSD_V3 */
				35
				36	#ifdef CONFIG_NFSD_V4
				37	#include "acl.h"
				38	#include "idmap.h"
				39	#endif /* CONFIG_NFSD_V4 */
				40
				41	#include "nfsd.h"
				42	#include "vfs.h"
				43
				44	#define NFSDDBG_FACILITY NFSDDBG_FILEOP
				45
				46
				47	/*
				48	* This is a cache of readahead params that help us choose the proper
				49	* readahead strategy. Initially, we set all readahead parameters to 0
				50	* and let the VFS handle things.
				51	* If you increase the number of cached files very much, you'll need to
				52	* add a hash table here.
				53	*/
				54	struct raparms {
				55	struct raparms *p_next;
				56	unsigned int p_count;
				57	ino_t p_ino;
				58	dev_t p_dev;
				59	int p_set;
				60	struct file_ra_state p_ra;
				61	unsigned int p_hindex;
				62	};
				63
				64	struct raparm_hbucket {
				65	struct raparms *pb_head;
				66	spinlock_t pb_lock;
				67	} ____cacheline_aligned_in_smp;
				68
				69	#define RAPARM_HASH_BITS 4
				70	#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
				71	#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
				72	static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
				73
				74	/*
				75	* Called from nfsd_lookup and encode_dirent. Check if we have crossed
				76	* a mount point.
				77	* Returns -EAGAIN or -ETIMEDOUT leaving dpp and expp unchanged,
				78	* or nfs_ok having possibly changed dpp and expp
				79	*/
				80	int
				81	nfsd_cross_mnt(struct svc_rqst rqstp, struct dentry *dpp,
				82	struct svc_export **expp)
				83	{
				84	struct svc_export exp = expp, *exp2 = NULL;
				85	struct dentry dentry = dpp;
				86	struct path path = {.mnt = mntget(exp->ex_path.mnt),
				87	.dentry = dget(dentry)};
				88	int err = 0;
				89
				90	err = follow_down(&path);
				91	if (err < 0)
				92	goto out;
				93
				94	exp2 = rqst_exp_get_by_name(rqstp, &path);
				95	if (IS_ERR(exp2)) {
				96	err = PTR_ERR(exp2);
				97	/*
				98	* We normally allow NFS clients to continue
				99	* "underneath" a mountpoint that is not exported.
				100	* The exception is V4ROOT, where no traversal is ever
				101	* allowed without an explicit export of the new
				102	* directory.
				103	*/
				104	if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
				105	err = 0;
				106	path_put(&path);
				107	goto out;
				108	}
				109	if (nfsd_v4client(rqstp) \|\|
				110	(exp->ex_flags & NFSEXP_CROSSMOUNT) \|\| EX_NOHIDE(exp2)) {
				111	/* successfully crossed mount point */
				112	/*
				113	* This is subtle: path.dentry is not on path.mnt
				114	* at this point. The only reason we are safe is that
				115	* original mnt is pinned down by exp, so we should
				116	* put path before putting exp
				117	*/
				118	*dpp = path.dentry;
				119	path.dentry = dentry;
				120	*expp = exp2;
				121	exp2 = exp;
				122	}
				123	path_put(&path);
				124	exp_put(exp2);
				125	out:
				126	return err;
				127	}
				128
				129	static void follow_to_parent(struct path *path)
				130	{
				131	struct dentry *dp;
				132
				133	while (path->dentry == path->mnt->mnt_root && follow_up(path))
				134	;
				135	dp = dget_parent(path->dentry);
				136	dput(path->dentry);
				137	path->dentry = dp;
				138	}
				139
				140	static int nfsd_lookup_parent(struct svc_rqst rqstp, struct dentry dparent, struct svc_export exp, struct dentry dentryp)
				141	{
				142	struct svc_export *exp2;
				143	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
				144	.dentry = dget(dparent)};
				145
				146	follow_to_parent(&path);
				147
				148	exp2 = rqst_exp_parent(rqstp, &path);
				149	if (PTR_ERR(exp2) == -ENOENT) {
				150	*dentryp = dget(dparent);
				151	} else if (IS_ERR(exp2)) {
				152	path_put(&path);
				153	return PTR_ERR(exp2);
				154	} else {
				155	*dentryp = dget(path.dentry);
				156	exp_put(*exp);
				157	*exp = exp2;
				158	}
				159	path_put(&path);
				160	return 0;
				161	}
				162
				163	/*
				164	* For nfsd purposes, we treat V4ROOT exports as though there was an
				165	* export at every directory.
				166	*/
				167	int nfsd_mountpoint(struct dentry dentry, struct svc_export exp)
				168	{
				169	if (d_mountpoint(dentry))
				170	return 1;
				171	if (nfsd4_is_junction(dentry))
				172	return 1;
				173	if (!(exp->ex_flags & NFSEXP_V4ROOT))
				174	return 0;
				175	return dentry->d_inode != NULL;
				176	}
				177
				178	__be32
				179	nfsd_lookup_dentry(struct svc_rqst rqstp, struct svc_fh fhp,
				180	const char *name, unsigned int len,
				181	struct svc_export exp_ret, struct dentry dentry_ret)
				182	{
				183	struct svc_export *exp;
				184	struct dentry *dparent;
				185	struct dentry *dentry;
				186	int host_err;
				187
				188	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
				189
				190	dparent = fhp->fh_dentry;
				191	exp = fhp->fh_export;
				192	exp_get(exp);
				193
				194	/* Lookup the name, but don't follow links */
				195	if (isdotent(name, len)) {
				196	if (len==1)
				197	dentry = dget(dparent);
				198	else if (dparent != exp->ex_path.dentry)
				199	dentry = dget_parent(dparent);
				200	else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
				201	dentry = dget(dparent); /* .. == . just like at / */
				202	else {
				203	/* checking mountpoint crossing is very different when stepping up */
				204	host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
				205	if (host_err)
				206	goto out_nfserr;
				207	}
				208	} else {
				209	fh_lock(fhp);
				210	dentry = lookup_one_len(name, dparent, len);
				211	host_err = PTR_ERR(dentry);
				212	if (IS_ERR(dentry))
				213	goto out_nfserr;
				214	/*
				215	* check if we have crossed a mount point ...
				216	*/
				217	if (nfsd_mountpoint(dentry, exp)) {
				218	if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
				219	dput(dentry);
				220	goto out_nfserr;
				221	}
				222	}
				223	}
				224	*dentry_ret = dentry;
				225	*exp_ret = exp;
				226	return 0;
				227
				228	out_nfserr:
				229	exp_put(exp);
				230	return nfserrno(host_err);
				231	}
				232
				233	/*
				234	* Look up one component of a pathname.
				235	* N.B. After this call _both_ fhp and resfh need an fh_put
				236	*
				237	* If the lookup would cross a mountpoint, and the mounted filesystem
				238	* is exported to the client with NFSEXP_NOHIDE, then the lookup is
				239	* accepted as it stands and the mounted directory is
				240	* returned. Otherwise the covered directory is returned.
				241	* NOTE: this mountpoint crossing is not supported properly by all
				242	* clients and is explicitly disallowed for NFSv3
				243	* NeilBrown <neilb@cse.unsw.edu.au>
				244	*/
				245	__be32
				246	nfsd_lookup(struct svc_rqst rqstp, struct svc_fh fhp, const char *name,
				247	unsigned int len, struct svc_fh *resfh)
				248	{
				249	struct svc_export *exp;
				250	struct dentry *dentry;
				251	__be32 err;
				252
				253	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				254	if (err)
				255	return err;
				256	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
				257	if (err)
				258	return err;
				259	err = check_nfsd_access(exp, rqstp);
				260	if (err)
				261	goto out;
				262	/*
				263	* Note: we compose the file handle now, but as the
				264	* dentry may be negative, it may need to be updated.
				265	*/
				266	err = fh_compose(resfh, exp, dentry, fhp);
				267	if (!err && !dentry->d_inode)
				268	err = nfserr_noent;
				269	out:
				270	dput(dentry);
				271	exp_put(exp);
				272	return err;
				273	}
				274
				275	static int nfsd_break_lease(struct inode *inode)
				276	{
				277	if (!S_ISREG(inode->i_mode))
				278	return 0;
				279	return break_lease(inode, O_WRONLY \| O_NONBLOCK);
				280	}
				281
				282	/*
				283	* Commit metadata changes to stable storage.
				284	*/
				285	static int
				286	commit_metadata(struct svc_fh *fhp)
				287	{
				288	struct inode *inode = fhp->fh_dentry->d_inode;
				289	const struct export_operations *export_ops = inode->i_sb->s_export_op;
				290
				291	if (!EX_ISSYNC(fhp->fh_export))
				292	return 0;
				293
				294	if (export_ops->commit_metadata)
				295	return export_ops->commit_metadata(inode);
				296	return sync_inode_metadata(inode, 1);
				297	}
				298
				299	/*
				300	* Go over the attributes and take care of the small differences between
				301	* NFS semantics and what Linux expects.
				302	*/
				303	static void
				304	nfsd_sanitize_attrs(struct inode inode, struct iattr iap)
				305	{
				306	/*
				307	* NFSv2 does not differentiate between "set-[ac]time-to-now"
				308	* which only requires access, and "set-[ac]time-to-X" which
				309	* requires ownership.
				310	* So if it looks like it might be "set both to the same time which
				311	* is close to now", and if inode_change_ok fails, then we
				312	* convert to "set to now" instead of "set to explicit time"
				313	*
				314	* We only call inode_change_ok as the last test as technically
				315	* it is not an interface that we should be using.
				316	*/
				317	#define BOTH_TIME_SET (ATTR_ATIME_SET \| ATTR_MTIME_SET)
				318	#define MAX_TOUCH_TIME_ERROR (30*60)
				319	if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
				320	iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
				321	/*
				322	* Looks probable.
				323	*
				324	* Now just make sure time is in the right ballpark.
				325	* Solaris, at least, doesn't seem to care what the time
				326	* request is. We require it be within 30 minutes of now.
				327	*/
				328	time_t delta = iap->ia_atime.tv_sec - get_seconds();
				329	if (delta < 0)
				330	delta = -delta;
				331	if (delta < MAX_TOUCH_TIME_ERROR &&
				332	inode_change_ok(inode, iap) != 0) {
				333	/*
				334	* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
				335	* This will cause notify_change to set these times
				336	* to "now"
				337	*/
				338	iap->ia_valid &= ~BOTH_TIME_SET;
				339	}
				340	}
				341
				342	/* sanitize the mode change */
				343	if (iap->ia_valid & ATTR_MODE) {
				344	iap->ia_mode &= S_IALLUGO;
				345	iap->ia_mode \|= (inode->i_mode & ~S_IALLUGO);
				346	}
				347
				348	/* Revoke setuid/setgid on chown */
				349	if (!S_ISDIR(inode->i_mode) &&
				350	(((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) \|\|
				351	((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) {
				352	iap->ia_valid \|= ATTR_KILL_PRIV;
				353	if (iap->ia_valid & ATTR_MODE) {
				354	/* we're setting mode too, just clear the sid bits /
				355	iap->ia_mode &= ~S_ISUID;
				356	if (iap->ia_mode & S_IXGRP)
				357	iap->ia_mode &= ~S_ISGID;
				358	} else {
				359	/* set ATTR_KILL_* bits and let VFS handle it */
				360	iap->ia_valid \|= (ATTR_KILL_SUID \| ATTR_KILL_SGID);
				361	}
				362	}
				363	}
				364
				365	static __be32
				366	nfsd_get_write_access(struct svc_rqst rqstp, struct svc_fh fhp,
				367	struct iattr *iap)
				368	{
				369	struct inode *inode = fhp->fh_dentry->d_inode;
				370	int host_err;
				371
				372	if (iap->ia_size < inode->i_size) {
				373	__be32 err;
				374
				375	err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
				376	NFSD_MAY_TRUNC \| NFSD_MAY_OWNER_OVERRIDE);
				377	if (err)
				378	return err;
				379	}
				380
				381	host_err = get_write_access(inode);
				382	if (host_err)
				383	goto out_nfserrno;
				384
				385	host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
				386	if (host_err)
				387	goto out_put_write_access;
				388	return 0;
				389
				390	out_put_write_access:
				391	put_write_access(inode);
				392	out_nfserrno:
				393	return nfserrno(host_err);
				394	}
				395
				396	/*
				397	* Set various file attributes. After this call fhp needs an fh_put.
				398	*/
				399	__be32
				400	nfsd_setattr(struct svc_rqst rqstp, struct svc_fh fhp, struct iattr *iap,
				401	int check_guard, time_t guardtime)
				402	{
				403	struct dentry *dentry;
				404	struct inode *inode;
				405	int accmode = NFSD_MAY_SATTR;
				406	umode_t ftype = 0;
				407	__be32 err;
				408	int host_err;
				409	bool get_write_count;
				410	int size_change = 0;
				411
				412	if (iap->ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_SIZE))
				413	accmode \|= NFSD_MAY_WRITE\|NFSD_MAY_OWNER_OVERRIDE;
				414	if (iap->ia_valid & ATTR_SIZE)
				415	ftype = S_IFREG;
				416
				417	/* Callers that do fh_verify should do the fh_want_write: */
				418	get_write_count = !fhp->fh_dentry;
				419
				420	/* Get inode */
				421	err = fh_verify(rqstp, fhp, ftype, accmode);
				422	if (err)
				423	goto out;
				424	if (get_write_count) {
				425	host_err = fh_want_write(fhp);
				426	if (host_err)
				427	return nfserrno(host_err);
				428	}
				429
				430	dentry = fhp->fh_dentry;
				431	inode = dentry->d_inode;
				432
				433	/* Ignore any mode updates on symlinks */
				434	if (S_ISLNK(inode->i_mode))
				435	iap->ia_valid &= ~ATTR_MODE;
				436
				437	if (!iap->ia_valid)
				438	goto out;
				439
				440	nfsd_sanitize_attrs(inode, iap);
				441
				442	/*
				443	* The size case is special, it changes the file in addition to the
				444	* attributes.
				445	*/
				446	if (iap->ia_valid & ATTR_SIZE) {
				447	err = nfsd_get_write_access(rqstp, fhp, iap);
				448	if (err)
				449	goto out;
				450	size_change = 1;
				451	}
				452
				453	iap->ia_valid \|= ATTR_CTIME;
				454
				455	if (check_guard && guardtime != inode->i_ctime.tv_sec) {
				456	err = nfserr_notsync;
				457	goto out_put_write_access;
				458	}
				459
				460	host_err = nfsd_break_lease(inode);
				461	if (host_err)
				462	goto out_put_write_access_nfserror;
				463
				464	fh_lock(fhp);
				465	host_err = notify_change(dentry, iap);
				466	fh_unlock(fhp);
				467
				468	out_put_write_access_nfserror:
				469	err = nfserrno(host_err);
				470	out_put_write_access:
				471	if (size_change)
				472	put_write_access(inode);
				473	if (!err)
				474	commit_metadata(fhp);
				475	out:
				476	return err;
				477	}
				478
				479	#if defined(CONFIG_NFSD_V2_ACL) \|\| \
				480	defined(CONFIG_NFSD_V3_ACL) \|\| \
				481	defined(CONFIG_NFSD_V4)
				482	static ssize_t nfsd_getxattr(struct dentry dentry, char key, void **buf)
				483	{
				484	ssize_t buflen;
				485	ssize_t ret;
				486
				487	buflen = vfs_getxattr(dentry, key, NULL, 0);
				488	if (buflen <= 0)
				489	return buflen;
				490
				491	*buf = kmalloc(buflen, GFP_KERNEL);
				492	if (!*buf)
				493	return -ENOMEM;
				494
				495	ret = vfs_getxattr(dentry, key, *buf, buflen);
				496	if (ret < 0)
				497	kfree(*buf);
				498	return ret;
				499	}
				500	#endif
				501
				502	#if defined(CONFIG_NFSD_V4)
				503	static int
				504	set_nfsv4_acl_one(struct dentry dentry, struct posix_acl pacl, char *key)
				505	{
				506	int len;
				507	size_t buflen;
				508	char *buf = NULL;
				509	int error = 0;
				510
				511	if (!pacl)
				512	return vfs_setxattr(dentry, key, NULL, 0, 0);
				513
				514	buflen = posix_acl_xattr_size(pacl->a_count);
				515	buf = kmalloc(buflen, GFP_KERNEL);
				516	error = -ENOMEM;
				517	if (buf == NULL)
				518	goto out;
				519
				520	len = posix_acl_to_xattr(pacl, buf, buflen);
				521	if (len < 0) {
				522	error = len;
				523	goto out;
				524	}
				525
				526	error = vfs_setxattr(dentry, key, buf, len, 0);
				527	out:
				528	kfree(buf);
				529	return error;
				530	}
				531
				532	__be32
				533	nfsd4_set_nfs4_acl(struct svc_rqst rqstp, struct svc_fh fhp,
				534	struct nfs4_acl *acl)
				535	{
				536	__be32 error;
				537	int host_error;
				538	struct dentry *dentry;
				539	struct inode *inode;
				540	struct posix_acl pacl = NULL, dpacl = NULL;
				541	unsigned int flags = 0;
				542
				543	/* Get inode */
				544	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
				545	if (error)
				546	return error;
				547
				548	dentry = fhp->fh_dentry;
				549	inode = dentry->d_inode;
				550	if (S_ISDIR(inode->i_mode))
				551	flags = NFS4_ACL_DIR;
				552
				553	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
				554	if (host_error == -EINVAL) {
				555	return nfserr_attrnotsupp;
				556	} else if (host_error < 0)
				557	goto out_nfserr;
				558
				559	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
				560	if (host_error < 0)
				561	goto out_release;
				562
				563	if (S_ISDIR(inode->i_mode))
				564	host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
				565
				566	out_release:
				567	posix_acl_release(pacl);
				568	posix_acl_release(dpacl);
				569	out_nfserr:
				570	if (host_error == -EOPNOTSUPP)
				571	return nfserr_attrnotsupp;
				572	else
				573	return nfserrno(host_error);
				574	}
				575
				576	static struct posix_acl *
				577	_get_posix_acl(struct dentry dentry, char key)
				578	{
				579	void *buf = NULL;
				580	struct posix_acl *pacl = NULL;
				581	int buflen;
				582
				583	buflen = nfsd_getxattr(dentry, key, &buf);
				584	if (!buflen)
				585	buflen = -ENODATA;
				586	if (buflen <= 0)
				587	return ERR_PTR(buflen);
				588
				589	pacl = posix_acl_from_xattr(buf, buflen);
				590	kfree(buf);
				591	return pacl;
				592	}
				593
				594	int
				595	nfsd4_get_nfs4_acl(struct svc_rqst rqstp, struct dentry dentry, struct nfs4_acl **acl)
				596	{
				597	struct inode *inode = dentry->d_inode;
				598	int error = 0;
				599	struct posix_acl pacl = NULL, dpacl = NULL;
				600	unsigned int flags = 0;
				601
				602	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
				603	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
				604	pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
				605	if (IS_ERR(pacl)) {
				606	error = PTR_ERR(pacl);
				607	pacl = NULL;
				608	goto out;
				609	}
				610
				611	if (S_ISDIR(inode->i_mode)) {
				612	dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
				613	if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
				614	dpacl = NULL;
				615	else if (IS_ERR(dpacl)) {
				616	error = PTR_ERR(dpacl);
				617	dpacl = NULL;
				618	goto out;
				619	}
				620	flags = NFS4_ACL_DIR;
				621	}
				622
				623	*acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
				624	if (IS_ERR(*acl)) {
				625	error = PTR_ERR(*acl);
				626	*acl = NULL;
				627	}
				628	out:
				629	posix_acl_release(pacl);
				630	posix_acl_release(dpacl);
				631	return error;
				632	}
				633
				634	/*
				635	* NFS junction information is stored in an extended attribute.
				636	*/
				637	#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
				638
				639	/**
				640	* nfsd4_is_junction - Test if an object could be an NFS junction
				641	*
				642	* @dentry: object to test
				643	*
				644	* Returns 1 if "dentry" appears to contain NFS junction information.
				645	* Otherwise 0 is returned.
				646	*/
				647	int nfsd4_is_junction(struct dentry *dentry)
				648	{
				649	struct inode *inode = dentry->d_inode;
				650
				651	if (inode == NULL)
				652	return 0;
				653	if (inode->i_mode & S_IXUGO)
				654	return 0;
				655	if (!(inode->i_mode & S_ISVTX))
				656	return 0;
				657	if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
				658	return 0;
				659	return 1;
				660	}
				661	#endif /* defined(CONFIG_NFSD_V4) */
				662
				663	#ifdef CONFIG_NFSD_V3
				664	/*
				665	* Check server access rights to a file system object
				666	*/
				667	struct accessmap {
				668	u32 access;
				669	int how;
				670	};
				671	static struct accessmap nfs3_regaccess[] = {
				672	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				673	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				674	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_TRUNC },
				675	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
				676
				677	{ 0, 0 }
				678	};
				679
				680	static struct accessmap nfs3_diraccess[] = {
				681	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				682	{ NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
				683	{ NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC\|NFSD_MAY_WRITE\|NFSD_MAY_TRUNC},
				684	{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC\|NFSD_MAY_WRITE },
				685	{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
				686
				687	{ 0, 0 }
				688	};
				689
				690	static struct accessmap nfs3_anyaccess[] = {
				691	/* Some clients - Solaris 2.6 at least, make an access call
				692	* to the server to check for access for things like /dev/null
				693	* (which really, the server doesn't care about). So
				694	* We provide simple access checking for them, looking
				695	* mainly at mode bits, and we make sure to ignore read-only
				696	* filesystem checks
				697	*/
				698	{ NFS3_ACCESS_READ, NFSD_MAY_READ },
				699	{ NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
				700	{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				701	{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE\|NFSD_MAY_LOCAL_ACCESS },
				702
				703	{ 0, 0 }
				704	};
				705
				706	__be32
				707	nfsd_access(struct svc_rqst rqstp, struct svc_fh fhp, u32 access, u32 supported)
				708	{
				709	struct accessmap *map;
				710	struct svc_export *export;
				711	struct dentry *dentry;
				712	u32 query, result = 0, sresult = 0;
				713	__be32 error;
				714
				715	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
				716	if (error)
				717	goto out;
				718
				719	export = fhp->fh_export;
				720	dentry = fhp->fh_dentry;
				721
				722	if (S_ISREG(dentry->d_inode->i_mode))
				723	map = nfs3_regaccess;
				724	else if (S_ISDIR(dentry->d_inode->i_mode))
				725	map = nfs3_diraccess;
				726	else
				727	map = nfs3_anyaccess;
				728
				729
				730	query = *access;
				731	for (; map->access; map++) {
				732	if (map->access & query) {
				733	__be32 err2;
				734
				735	sresult \|= map->access;
				736
				737	err2 = nfsd_permission(rqstp, export, dentry, map->how);
				738	switch (err2) {
				739	case nfs_ok:
				740	result \|= map->access;
				741	break;
				742
				743	/* the following error codes just mean the access was not allowed,
				744	* rather than an error occurred */
				745	case nfserr_rofs:
				746	case nfserr_acces:
				747	case nfserr_perm:
				748	/* simply don't "or" in the access bit. */
				749	break;
				750	default:
				751	error = err2;
				752	goto out;
				753	}
				754	}
				755	}
				756	*access = result;
				757	if (supported)
				758	*supported = sresult;
				759
				760	out:
				761	return error;
				762	}
				763	#endif /* CONFIG_NFSD_V3 */
				764
				765	static int nfsd_open_break_lease(struct inode *inode, int access)
				766	{
				767	unsigned int mode;
				768
				769	if (access & NFSD_MAY_NOT_BREAK_LEASE)
				770	return 0;
				771	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
				772	return break_lease(inode, mode \| O_NONBLOCK);
				773	}
				774
				775	/*
				776	* Open an existing file or directory.
				777	* The may_flags argument indicates the type of open (read/write/lock)
				778	* and additional flags.
				779	* N.B. After this call fhp needs an fh_put
				780	*/
				781	__be32
				782	nfsd_open(struct svc_rqst rqstp, struct svc_fh fhp, umode_t type,
				783	int may_flags, struct file **filp)
				784	{
				785	struct dentry *dentry;
				786	struct inode *inode;
				787	int flags = O_RDONLY\|O_LARGEFILE;
				788	__be32 err;
				789	int host_err = 0;
				790
				791	validate_process_creds();
				792
				793	/*
				794	* If we get here, then the client has already done an "open",
				795	* and (hopefully) checked permission - so allow OWNER_OVERRIDE
				796	* in case a chmod has now revoked permission.
				797	*/
				798	err = fh_verify(rqstp, fhp, type, may_flags \| NFSD_MAY_OWNER_OVERRIDE);
				799	if (err)
				800	goto out;
				801
				802	dentry = fhp->fh_dentry;
				803	inode = dentry->d_inode;
				804
				805	/* Disallow write access to files with the append-only bit set
				806	* or any access when mandatory locking enabled
				807	*/
				808	err = nfserr_perm;
				809	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
				810	goto out;
				811	/*
				812	* We must ignore files (but only files) which might have mandatory
				813	* locks on them because there is no way to know if the accesser has
				814	* the lock.
				815	*/
				816	if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
				817	goto out;
				818
				819	if (!inode->i_fop)
				820	goto out;
				821
				822	host_err = nfsd_open_break_lease(inode, may_flags);
				823	if (host_err) /* NOMEM or WOULDBLOCK */
				824	goto out_nfserr;
				825
				826	if (may_flags & NFSD_MAY_WRITE) {
				827	if (may_flags & NFSD_MAY_READ)
				828	flags = O_RDWR\|O_LARGEFILE;
				829	else
				830	flags = O_WRONLY\|O_LARGEFILE;
				831	}
				832	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
				833	flags, current_cred());
				834	if (IS_ERR(*filp)) {
				835	host_err = PTR_ERR(*filp);
				836	*filp = NULL;
				837	} else {
				838	host_err = ima_file_check(*filp, may_flags);
				839
				840	if (may_flags & NFSD_MAY_64BIT_COOKIE)
				841	(*filp)->f_mode \|= FMODE_64BITHASH;
				842	else
				843	(*filp)->f_mode \|= FMODE_32BITHASH;
				844	}
				845
				846	out_nfserr:
				847	err = nfserrno(host_err);
				848	out:
				849	validate_process_creds();
				850	return err;
				851	}
				852
				853	/*
				854	* Close a file.
				855	*/
				856	void
				857	nfsd_close(struct file *filp)
				858	{
				859	fput(filp);
				860	}
				861
				862	/*
				863	* Obtain the readahead parameters for the file
				864	* specified by (dev, ino).
				865	*/
				866
				867	static inline struct raparms *
				868	nfsd_get_raparms(dev_t dev, ino_t ino)
				869	{
				870	struct raparms ra, rap, *frap = NULL;
				871	int depth = 0;
				872	unsigned int hash;
				873	struct raparm_hbucket *rab;
				874
				875	hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
				876	rab = &raparm_hash[hash];
				877
				878	spin_lock(&rab->pb_lock);
				879	for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
				880	if (ra->p_ino == ino && ra->p_dev == dev)
				881	goto found;
				882	depth++;
				883	if (ra->p_count == 0)
				884	frap = rap;
				885	}
				886	depth = nfsdstats.ra_size;
				887	if (!frap) {
				888	spin_unlock(&rab->pb_lock);
				889	return NULL;
				890	}
				891	rap = frap;
				892	ra = *frap;
				893	ra->p_dev = dev;
				894	ra->p_ino = ino;
				895	ra->p_set = 0;
				896	ra->p_hindex = hash;
				897	found:
				898	if (rap != &rab->pb_head) {
				899	*rap = ra->p_next;
				900	ra->p_next = rab->pb_head;
				901	rab->pb_head = ra;
				902	}
				903	ra->p_count++;
				904	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
				905	spin_unlock(&rab->pb_lock);
				906	return ra;
				907	}
				908
				909	/*
				910	* Grab and keep cached pages associated with a file in the svc_rqst
				911	* so that they can be passed to the network sendmsg/sendpage routines
				912	* directly. They will be released after the sending has completed.
				913	*/
				914	static int
				915	nfsd_splice_actor(struct pipe_inode_info pipe, struct pipe_buffer buf,
				916	struct splice_desc *sd)
				917	{
				918	struct svc_rqst *rqstp = sd->u.data;
				919	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
				920	struct page *page = buf->page;
				921	size_t size;
				922
				923	size = sd->len;
				924
				925	if (rqstp->rq_res.page_len == 0) {
				926	get_page(page);
				927	put_page(*pp);
				928	*pp = page;
				929	rqstp->rq_resused++;
				930	rqstp->rq_res.page_base = buf->offset;
				931	rqstp->rq_res.page_len = size;
				932	} else if (page != pp[-1]) {
				933	get_page(page);
				934	if (*pp)
				935	put_page(*pp);
				936	*pp = page;
				937	rqstp->rq_resused++;
				938	rqstp->rq_res.page_len += size;
				939	} else
				940	rqstp->rq_res.page_len += size;
				941
				942	return size;
				943	}
				944
				945	static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
				946	struct splice_desc *sd)
				947	{
				948	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
				949	}
				950
				951	static __be32
				952	nfsd_vfs_read(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				953	loff_t offset, struct kvec vec, int vlen, unsigned long count)
				954	{
				955	mm_segment_t oldfs;
				956	__be32 err;
				957	int host_err;
				958
				959	err = nfserr_perm;
				960
				961	if (file->f_op->splice_read && rqstp->rq_splice_ok) {
				962	struct splice_desc sd = {
				963	.len = 0,
				964	.total_len = *count,
				965	.pos = offset,
				966	.u.data = rqstp,
				967	};
				968
				969	rqstp->rq_resused = 1;
				970	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
				971	} else {
				972	oldfs = get_fs();
				973	set_fs(KERNEL_DS);
				974	host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
				975	set_fs(oldfs);
				976	}
				977
				978	if (host_err >= 0) {
				979	nfsdstats.io_read += host_err;
				980	*count = host_err;
				981	err = 0;
				982	fsnotify_access(file);
				983	} else
				984	err = nfserrno(host_err);
				985	return err;
				986	}
				987
				988	static void kill_suid(struct dentry *dentry)
				989	{
				990	struct iattr ia;
				991	ia.ia_valid = ATTR_KILL_SUID \| ATTR_KILL_SGID \| ATTR_KILL_PRIV;
				992
				993	mutex_lock(&dentry->d_inode->i_mutex);
				994	notify_change(dentry, &ia);
				995	mutex_unlock(&dentry->d_inode->i_mutex);
				996	}
				997
				998	/*
				999	* Gathered writes: If another process is currently writing to the file,
				1000	* there's a high chance this is another nfsd (triggered by a bulk write
				1001	* from a client's biod). Rather than syncing the file with each write
				1002	* request, we sleep for 10 msec.
				1003	*
				1004	* I don't know if this roughly approximates C. Juszak's idea of
				1005	* gathered writes, but it's a nice and simple solution (IMHO), and it
				1006	* seems to work:-)
				1007	*
				1008	* Note: we do this only in the NFSv2 case, since v3 and higher have a
				1009	* better tool (separate unstable writes and commits) for solving this
				1010	* problem.
				1011	*/
				1012	static int wait_for_concurrent_writes(struct file *file)
				1013	{
				1014	struct inode *inode = file->f_path.dentry->d_inode;
				1015	static ino_t last_ino;
				1016	static dev_t last_dev;
				1017	int err = 0;
				1018
				1019	if (atomic_read(&inode->i_writecount) > 1
				1020	\|\| (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
				1021	dprintk("nfsd: write defer %d\n", task_pid_nr(current));
				1022	msleep(10);
				1023	dprintk("nfsd: write resume %d\n", task_pid_nr(current));
				1024	}
				1025
				1026	if (inode->i_state & I_DIRTY) {
				1027	dprintk("nfsd: write sync %d\n", task_pid_nr(current));
				1028	err = vfs_fsync(file, 0);
				1029	}
				1030	last_ino = inode->i_ino;
				1031	last_dev = inode->i_sb->s_dev;
				1032	return err;
				1033	}
				1034
				1035	static __be32
				1036	nfsd_vfs_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				1037	loff_t offset, struct kvec *vec, int vlen,
				1038	unsigned long cnt, int stablep)
				1039	{
				1040	struct svc_export *exp;
				1041	struct dentry *dentry;
				1042	struct inode *inode;
				1043	mm_segment_t oldfs;
				1044	__be32 err = 0;
				1045	int host_err;
				1046	int stable = *stablep;
				1047	int use_wgather;
				1048
				1049	dentry = file->f_path.dentry;
				1050	inode = dentry->d_inode;
				1051	exp = fhp->fh_export;
				1052
				1053	/*
				1054	* Request sync writes if
				1055	* - the sync export option has been set, or
				1056	* - the client requested O_SYNC behavior (NFSv3 feature).
				1057	* - The file system doesn't support fsync().
				1058	* When NFSv2 gathered writes have been configured for this volume,
				1059	* flushing the data to disk is handled separately below.
				1060	*/
				1061	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
				1062
				1063	if (!file->f_op->fsync) {/* COMMIT3 cannot work */
				1064	stable = 2;
				1065	stablep = 2; / FILE_SYNC */
				1066	}
				1067
				1068	if (!EX_ISSYNC(exp))
				1069	stable = 0;
				1070	if (stable && !use_wgather) {
				1071	spin_lock(&file->f_lock);
				1072	file->f_flags \|= O_SYNC;
				1073	spin_unlock(&file->f_lock);
				1074	}
				1075
				1076	/* Write the data. */
				1077	oldfs = get_fs(); set_fs(KERNEL_DS);
				1078	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
				1079	set_fs(oldfs);
				1080	if (host_err < 0)
				1081	goto out_nfserr;
				1082	*cnt = host_err;
				1083	nfsdstats.io_write += host_err;
				1084	fsnotify_modify(file);
				1085
				1086	/* clear setuid/setgid flag after write */
				1087	if (inode->i_mode & (S_ISUID \| S_ISGID))
				1088	kill_suid(dentry);
				1089
				1090	if (stable && use_wgather)
				1091	host_err = wait_for_concurrent_writes(file);
				1092
				1093	out_nfserr:
				1094	dprintk("nfsd: write complete host_err=%d\n", host_err);
				1095	if (host_err >= 0)
				1096	err = 0;
				1097	else
				1098	err = nfserrno(host_err);
				1099	return err;
				1100	}
				1101
				1102	/*
				1103	* Read data from a file. count must contain the requested read count
				1104	* on entry. On return, *count contains the number of bytes actually read.
				1105	* N.B. After this call fhp needs an fh_put
				1106	*/
				1107	__be32 nfsd_read(struct svc_rqst rqstp, struct svc_fh fhp,
				1108	loff_t offset, struct kvec vec, int vlen, unsigned long count)
				1109	{
				1110	struct file *file;
				1111	struct inode *inode;
				1112	struct raparms *ra;
				1113	__be32 err;
				1114
				1115	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
				1116	if (err)
				1117	return err;
				1118
				1119	inode = file->f_path.dentry->d_inode;
				1120
				1121	/* Get readahead parameters */
				1122	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
				1123
				1124	if (ra && ra->p_set)
				1125	file->f_ra = ra->p_ra;
				1126
				1127	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
				1128
				1129	/* Write back readahead params */
				1130	if (ra) {
				1131	struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
				1132	spin_lock(&rab->pb_lock);
				1133	ra->p_ra = file->f_ra;
				1134	ra->p_set = 1;
				1135	ra->p_count--;
				1136	spin_unlock(&rab->pb_lock);
				1137	}
				1138
				1139	nfsd_close(file);
				1140	return err;
				1141	}
				1142
				1143	/* As above, but use the provided file descriptor. */
				1144	__be32
				1145	nfsd_read_file(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				1146	loff_t offset, struct kvec *vec, int vlen,
				1147	unsigned long *count)
				1148	{
				1149	__be32 err;
				1150
				1151	if (file) {
				1152	err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
				1153	NFSD_MAY_READ\|NFSD_MAY_OWNER_OVERRIDE);
				1154	if (err)
				1155	goto out;
				1156	err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
				1157	} else /* Note file may still be NULL in NFSv4 special stateid case: */
				1158	err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
				1159	out:
				1160	return err;
				1161	}
				1162
				1163	/*
				1164	* Write data to a file.
				1165	* The stable flag requests synchronous writes.
				1166	* N.B. After this call fhp needs an fh_put
				1167	*/
				1168	__be32
				1169	nfsd_write(struct svc_rqst rqstp, struct svc_fh fhp, struct file *file,
				1170	loff_t offset, struct kvec vec, int vlen, unsigned long cnt,
				1171	int *stablep)
				1172	{
				1173	__be32 err = 0;
				1174
				1175	if (file) {
				1176	err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
				1177	NFSD_MAY_WRITE\|NFSD_MAY_OWNER_OVERRIDE);
				1178	if (err)
				1179	goto out;
				1180	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
				1181	stablep);
				1182	} else {
				1183	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
				1184	if (err)
				1185	goto out;
				1186
				1187	if (cnt)
				1188	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
				1189	cnt, stablep);
				1190	nfsd_close(file);
				1191	}
				1192	out:
				1193	return err;
				1194	}
				1195
				1196	#ifdef CONFIG_NFSD_V3
				1197	/*
				1198	* Commit all pending writes to stable storage.
				1199	*
				1200	* Note: we only guarantee that data that lies within the range specified
				1201	* by the 'offset' and 'count' parameters will be synced.
				1202	*
				1203	* Unfortunately we cannot lock the file to make sure we return full WCC
				1204	* data to the client, as locking happens lower down in the filesystem.
				1205	*/
				1206	__be32
				1207	nfsd_commit(struct svc_rqst rqstp, struct svc_fh fhp,
				1208	loff_t offset, unsigned long count)
				1209	{
				1210	struct file *file;
				1211	loff_t end = LLONG_MAX;
				1212	__be32 err = nfserr_inval;
				1213
				1214	if (offset < 0)
				1215	goto out;
				1216	if (count != 0) {
				1217	end = offset + (loff_t)count - 1;
				1218	if (end < offset)
				1219	goto out;
				1220	}
				1221
				1222	err = nfsd_open(rqstp, fhp, S_IFREG,
				1223	NFSD_MAY_WRITE\|NFSD_MAY_NOT_BREAK_LEASE, &file);
				1224	if (err)
				1225	goto out;
				1226	if (EX_ISSYNC(fhp->fh_export)) {
				1227	int err2 = vfs_fsync_range(file, offset, end, 0);
				1228
				1229	if (err2 != -EINVAL)
				1230	err = nfserrno(err2);
				1231	else
				1232	err = nfserr_notsupp;
				1233	}
				1234
				1235	nfsd_close(file);
				1236	out:
				1237	return err;
				1238	}
				1239	#endif /* CONFIG_NFSD_V3 */
				1240
				1241	static __be32
				1242	nfsd_create_setattr(struct svc_rqst rqstp, struct svc_fh resfhp,
				1243	struct iattr *iap)
				1244	{
				1245	/*
				1246	* Mode has already been set earlier in create:
				1247	*/
				1248	iap->ia_valid &= ~ATTR_MODE;
				1249	/*
				1250	* Setting uid/gid works only for root. Irix appears to
				1251	* send along the gid on create when it tries to implement
				1252	* setgid directories via NFS:
				1253	*/
				1254	if (current_fsuid() != 0)
				1255	iap->ia_valid &= ~(ATTR_UID\|ATTR_GID);
				1256	if (iap->ia_valid)
				1257	return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
				1258	return 0;
				1259	}
				1260
				1261	/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
				1262	* setting size to 0 may fail for some specific file systems by the permission
				1263	* checking which requires WRITE permission but the mode is 000.
				1264	* we ignore the resizing(to 0) on the just new created file, since the size is
				1265	* 0 after file created.
				1266	*
				1267	* call this only after vfs_create() is called.
				1268	* */
				1269	static void
				1270	nfsd_check_ignore_resizing(struct iattr *iap)
				1271	{
				1272	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
				1273	iap->ia_valid &= ~ATTR_SIZE;
				1274	}
				1275
				1276	/*
				1277	* Create a file (regular, directory, device, fifo); UNIX sockets
				1278	* not yet implemented.
				1279	* If the response fh has been verified, the parent directory should
				1280	* already be locked. Note that the parent directory is left locked.
				1281	*
				1282	* N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
				1283	*/
				1284	__be32
				1285	nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1286	char fname, int flen, struct iattr iap,
				1287	int type, dev_t rdev, struct svc_fh *resfhp)
				1288	{
				1289	struct dentry dentry, dchild = NULL;
				1290	struct inode *dirp;
				1291	__be32 err;
				1292	__be32 err2;
				1293	int host_err;
				1294
				1295	err = nfserr_perm;
				1296	if (!flen)
				1297	goto out;
				1298	err = nfserr_exist;
				1299	if (isdotent(fname, flen))
				1300	goto out;
				1301
				1302	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1303	if (err)
				1304	goto out;
				1305
				1306	dentry = fhp->fh_dentry;
				1307	dirp = dentry->d_inode;
				1308
				1309	err = nfserr_notdir;
				1310	if (!dirp->i_op->lookup)
				1311	goto out;
				1312	/*
				1313	* Check whether the response file handle has been verified yet.
				1314	* If it has, the parent directory should already be locked.
				1315	*/
				1316	if (!resfhp->fh_dentry) {
				1317	/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
				1318	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1319	dchild = lookup_one_len(fname, dentry, flen);
				1320	host_err = PTR_ERR(dchild);
				1321	if (IS_ERR(dchild))
				1322	goto out_nfserr;
				1323	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1324	if (err)
				1325	goto out;
				1326	} else {
				1327	/* called from nfsd_proc_create */
				1328	dchild = dget(resfhp->fh_dentry);
				1329	if (!fhp->fh_locked) {
				1330	/* not actually possible */
				1331	printk(KERN_ERR
				1332	"nfsd_create: parent %s/%s not locked!\n",
				1333	dentry->d_parent->d_name.name,
				1334	dentry->d_name.name);
				1335	err = nfserr_io;
				1336	goto out;
				1337	}
				1338	}
				1339	/*
				1340	* Make sure the child dentry is still negative ...
				1341	*/
				1342	err = nfserr_exist;
				1343	if (dchild->d_inode) {
				1344	dprintk("nfsd_create: dentry %s/%s not negative!\n",
				1345	dentry->d_name.name, dchild->d_name.name);
				1346	goto out;
				1347	}
				1348
				1349	if (!(iap->ia_valid & ATTR_MODE))
				1350	iap->ia_mode = 0;
				1351	iap->ia_mode = (iap->ia_mode & S_IALLUGO) \| type;
				1352
				1353	err = nfserr_inval;
				1354	if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
				1355	printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
				1356	type);
				1357	goto out;
				1358	}
				1359
				1360	host_err = fh_want_write(fhp);
				1361	if (host_err)
				1362	goto out_nfserr;
				1363
				1364	/*
				1365	* Get the dir op function pointer.
				1366	*/
				1367	err = 0;
				1368	switch (type) {
				1369	case S_IFREG:
				1370	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
				1371	if (!host_err)
				1372	nfsd_check_ignore_resizing(iap);
				1373	break;
				1374	case S_IFDIR:
				1375	host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
				1376	break;
				1377	case S_IFCHR:
				1378	case S_IFBLK:
				1379	case S_IFIFO:
				1380	case S_IFSOCK:
				1381	host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
				1382	break;
				1383	}
				1384	if (host_err < 0) {
				1385	fh_drop_write(fhp);
				1386	goto out_nfserr;
				1387	}
				1388
				1389	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1390
				1391	/*
				1392	* nfsd_setattr already committed the child. Transactional filesystems
				1393	* had a chance to commit changes for both parent and child
				1394	* simultaneously making the following commit_metadata a noop.
				1395	*/
				1396	err2 = nfserrno(commit_metadata(fhp));
				1397	if (err2)
				1398	err = err2;
				1399	fh_drop_write(fhp);
				1400	/*
				1401	* Update the file handle to get the new inode info.
				1402	*/
				1403	if (!err)
				1404	err = fh_update(resfhp);
				1405	out:
				1406	if (dchild && !IS_ERR(dchild))
				1407	dput(dchild);
				1408	return err;
				1409
				1410	out_nfserr:
				1411	err = nfserrno(host_err);
				1412	goto out;
				1413	}
				1414
				1415	#ifdef CONFIG_NFSD_V3
				1416
				1417	static inline int nfsd_create_is_exclusive(int createmode)
				1418	{
				1419	return createmode == NFS3_CREATE_EXCLUSIVE
				1420	\|\| createmode == NFS4_CREATE_EXCLUSIVE4_1;
				1421	}
				1422
				1423	/*
				1424	* NFSv3 and NFSv4 version of nfsd_create
				1425	*/
				1426	__be32
				1427	do_nfsd_create(struct svc_rqst rqstp, struct svc_fh fhp,
				1428	char fname, int flen, struct iattr iap,
				1429	struct svc_fh resfhp, int createmode, u32 verifier,
				1430	bool truncp, bool created)
				1431	{
				1432	struct dentry dentry, dchild = NULL;
				1433	struct inode *dirp;
				1434	__be32 err;
				1435	int host_err;
				1436	__u32 v_mtime=0, v_atime=0;
				1437
				1438	err = nfserr_perm;
				1439	if (!flen)
				1440	goto out;
				1441	err = nfserr_exist;
				1442	if (isdotent(fname, flen))
				1443	goto out;
				1444	if (!(iap->ia_valid & ATTR_MODE))
				1445	iap->ia_mode = 0;
				1446	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
				1447	if (err)
				1448	goto out;
				1449
				1450	dentry = fhp->fh_dentry;
				1451	dirp = dentry->d_inode;
				1452
				1453	/* Get all the sanity checks out of the way before
				1454	* we lock the parent. */
				1455	err = nfserr_notdir;
				1456	if (!dirp->i_op->lookup)
				1457	goto out;
				1458	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1459
				1460	/*
				1461	* Compose the response file handle.
				1462	*/
				1463	dchild = lookup_one_len(fname, dentry, flen);
				1464	host_err = PTR_ERR(dchild);
				1465	if (IS_ERR(dchild))
				1466	goto out_nfserr;
				1467
				1468	/* If file doesn't exist, check for permissions to create one */
				1469	if (!dchild->d_inode) {
				1470	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1471	if (err)
				1472	goto out;
				1473	}
				1474
				1475	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
				1476	if (err)
				1477	goto out;
				1478
				1479	if (nfsd_create_is_exclusive(createmode)) {
				1480	/* solaris7 gets confused (bugid 4218508) if these have
				1481	* the high bit set, so just clear the high bits. If this is
				1482	* ever changed to use different attrs for storing the
				1483	* verifier, then do_open_lookup() will also need to be fixed
				1484	* accordingly.
				1485	*/
				1486	v_mtime = verifier[0]&0x7fffffff;
				1487	v_atime = verifier[1]&0x7fffffff;
				1488	}
				1489
				1490	host_err = fh_want_write(fhp);
				1491	if (host_err)
				1492	goto out_nfserr;
				1493	if (dchild->d_inode) {
				1494	err = 0;
				1495
				1496	switch (createmode) {
				1497	case NFS3_CREATE_UNCHECKED:
				1498	if (! S_ISREG(dchild->d_inode->i_mode))
				1499	goto out;
				1500	else if (truncp) {
				1501	/* in nfsv4, we need to treat this case a little
				1502	* differently. we don't want to truncate the
				1503	* file now; this would be wrong if the OPEN
				1504	* fails for some other reason. furthermore,
				1505	* if the size is nonzero, we should ignore it
				1506	* according to spec!
				1507	*/
				1508	*truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
				1509	}
				1510	else {
				1511	iap->ia_valid &= ATTR_SIZE;
				1512	goto set_attr;
				1513	}
				1514	break;
				1515	case NFS3_CREATE_EXCLUSIVE:
				1516	if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
				1517	&& dchild->d_inode->i_atime.tv_sec == v_atime
				1518	&& dchild->d_inode->i_size == 0 ) {
				1519	if (created)
				1520	*created = 1;
				1521	break;
				1522	}
				1523	case NFS4_CREATE_EXCLUSIVE4_1:
				1524	if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
				1525	&& dchild->d_inode->i_atime.tv_sec == v_atime
				1526	&& dchild->d_inode->i_size == 0 ) {
				1527	if (created)
				1528	*created = 1;
				1529	goto set_attr;
				1530	}
				1531	/* fallthru */
				1532	case NFS3_CREATE_GUARDED:
				1533	err = nfserr_exist;
				1534	}
				1535	fh_drop_write(fhp);
				1536	goto out;
				1537	}
				1538
				1539	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
				1540	if (host_err < 0) {
				1541	fh_drop_write(fhp);
				1542	goto out_nfserr;
				1543	}
				1544	if (created)
				1545	*created = 1;
				1546
				1547	nfsd_check_ignore_resizing(iap);
				1548
				1549	if (nfsd_create_is_exclusive(createmode)) {
				1550	/* Cram the verifier into atime/mtime */
				1551	iap->ia_valid = ATTR_MTIME\|ATTR_ATIME
				1552	\| ATTR_MTIME_SET\|ATTR_ATIME_SET;
				1553	/* XXX someone who knows this better please fix it for nsec */
				1554	iap->ia_mtime.tv_sec = v_mtime;
				1555	iap->ia_atime.tv_sec = v_atime;
				1556	iap->ia_mtime.tv_nsec = 0;
				1557	iap->ia_atime.tv_nsec = 0;
				1558	}
				1559
				1560	set_attr:
				1561	err = nfsd_create_setattr(rqstp, resfhp, iap);
				1562
				1563	/*
				1564	* nfsd_setattr already committed the child (and possibly also the parent).
				1565	*/
				1566	if (!err)
				1567	err = nfserrno(commit_metadata(fhp));
				1568
				1569	fh_drop_write(fhp);
				1570	/*
				1571	* Update the filehandle to get the new inode info.
				1572	*/
				1573	if (!err)
				1574	err = fh_update(resfhp);
				1575
				1576	out:
				1577	fh_unlock(fhp);
				1578	if (dchild && !IS_ERR(dchild))
				1579	dput(dchild);
				1580	return err;
				1581
				1582	out_nfserr:
				1583	err = nfserrno(host_err);
				1584	goto out;
				1585	}
				1586	#endif /* CONFIG_NFSD_V3 */
				1587
				1588	/*
				1589	* Read a symlink. On entry, *lenp must contain the maximum path length that
				1590	* fits into the buffer. On return, it contains the true length.
				1591	* N.B. After this call fhp needs an fh_put
				1592	*/
				1593	__be32
				1594	nfsd_readlink(struct svc_rqst rqstp, struct svc_fh fhp, char buf, int lenp)
				1595	{
				1596	struct inode *inode;
				1597	mm_segment_t oldfs;
				1598	__be32 err;
				1599	int host_err;
				1600	struct path path;
				1601
				1602	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
				1603	if (err)
				1604	goto out;
				1605
				1606	path.mnt = fhp->fh_export->ex_path.mnt;
				1607	path.dentry = fhp->fh_dentry;
				1608	inode = path.dentry->d_inode;
				1609
				1610	err = nfserr_inval;
				1611	if (!inode->i_op->readlink)
				1612	goto out;
				1613
				1614	touch_atime(&path);
				1615	/* N.B. Why does this call need a get_fs()??
				1616	* Remove the set_fs and watch the fireworks:-) --okir
				1617	*/
				1618
				1619	oldfs = get_fs(); set_fs(KERNEL_DS);
				1620	host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
				1621	set_fs(oldfs);
				1622
				1623	if (host_err < 0)
				1624	goto out_nfserr;
				1625	*lenp = host_err;
				1626	err = 0;
				1627	out:
				1628	return err;
				1629
				1630	out_nfserr:
				1631	err = nfserrno(host_err);
				1632	goto out;
				1633	}
				1634
				1635	/*
				1636	* Create a symlink and look up its inode
				1637	* N.B. After this call _both_ fhp and resfhp need an fh_put
				1638	*/
				1639	__be32
				1640	nfsd_symlink(struct svc_rqst rqstp, struct svc_fh fhp,
				1641	char *fname, int flen,
				1642	char *path, int plen,
				1643	struct svc_fh *resfhp,
				1644	struct iattr *iap)
				1645	{
				1646	struct dentry dentry, dnew;
				1647	__be32 err, cerr;
				1648	int host_err;
				1649
				1650	err = nfserr_noent;
				1651	if (!flen \|\| !plen)
				1652	goto out;
				1653	err = nfserr_exist;
				1654	if (isdotent(fname, flen))
				1655	goto out;
				1656
				1657	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
				1658	if (err)
				1659	goto out;
				1660	fh_lock(fhp);
				1661	dentry = fhp->fh_dentry;
				1662	dnew = lookup_one_len(fname, dentry, flen);
				1663	host_err = PTR_ERR(dnew);
				1664	if (IS_ERR(dnew))
				1665	goto out_nfserr;
				1666
				1667	host_err = fh_want_write(fhp);
				1668	if (host_err)
				1669	goto out_nfserr;
				1670
				1671	if (unlikely(path[plen] != 0)) {
				1672	char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
				1673	if (path_alloced == NULL)
				1674	host_err = -ENOMEM;
				1675	else {
				1676	strncpy(path_alloced, path, plen);
				1677	path_alloced[plen] = 0;
				1678	host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
				1679	kfree(path_alloced);
				1680	}
				1681	} else
				1682	host_err = vfs_symlink(dentry->d_inode, dnew, path);
				1683	err = nfserrno(host_err);
				1684	if (!err)
				1685	err = nfserrno(commit_metadata(fhp));
				1686	fh_unlock(fhp);
				1687
				1688	fh_drop_write(fhp);
				1689
				1690	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
				1691	dput(dnew);
				1692	if (err==0) err = cerr;
				1693	out:
				1694	return err;
				1695
				1696	out_nfserr:
				1697	err = nfserrno(host_err);
				1698	goto out;
				1699	}
				1700
				1701	/*
				1702	* Create a hardlink
				1703	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1704	*/
				1705	__be32
				1706	nfsd_link(struct svc_rqst rqstp, struct svc_fh ffhp,
				1707	char name, int len, struct svc_fh tfhp)
				1708	{
				1709	struct dentry ddir, dnew, *dold;
				1710	struct inode *dirp;
				1711	__be32 err;
				1712	int host_err;
				1713
				1714	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
				1715	if (err)
				1716	goto out;
				1717	err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
				1718	if (err)
				1719	goto out;
				1720	err = nfserr_isdir;
				1721	if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode))
				1722	goto out;
				1723	err = nfserr_perm;
				1724	if (!len)
				1725	goto out;
				1726	err = nfserr_exist;
				1727	if (isdotent(name, len))
				1728	goto out;
				1729
				1730	fh_lock_nested(ffhp, I_MUTEX_PARENT);
				1731	ddir = ffhp->fh_dentry;
				1732	dirp = ddir->d_inode;
				1733
				1734	dnew = lookup_one_len(name, ddir, len);
				1735	host_err = PTR_ERR(dnew);
				1736	if (IS_ERR(dnew))
				1737	goto out_nfserr;
				1738
				1739	dold = tfhp->fh_dentry;
				1740
				1741	host_err = fh_want_write(tfhp);
				1742	if (host_err) {
				1743	err = nfserrno(host_err);
				1744	goto out_dput;
				1745	}
				1746	err = nfserr_noent;
				1747	if (!dold->d_inode)
				1748	goto out_drop_write;
				1749	host_err = nfsd_break_lease(dold->d_inode);
				1750	if (host_err) {
				1751	err = nfserrno(host_err);
				1752	goto out_drop_write;
				1753	}
				1754	host_err = vfs_link(dold, dirp, dnew);
				1755	if (!host_err) {
				1756	err = nfserrno(commit_metadata(ffhp));
				1757	if (!err)
				1758	err = nfserrno(commit_metadata(tfhp));
				1759	} else {
				1760	if (host_err == -EXDEV && rqstp->rq_vers == 2)
				1761	err = nfserr_acces;
				1762	else
				1763	err = nfserrno(host_err);
				1764	}
				1765	out_drop_write:
				1766	fh_drop_write(tfhp);
				1767	out_dput:
				1768	dput(dnew);
				1769	out_unlock:
				1770	fh_unlock(ffhp);
				1771	out:
				1772	return err;
				1773
				1774	out_nfserr:
				1775	err = nfserrno(host_err);
				1776	goto out_unlock;
				1777	}
				1778
				1779	/*
				1780	* Rename a file
				1781	* N.B. After this call _both_ ffhp and tfhp need an fh_put
				1782	*/
				1783	__be32
				1784	nfsd_rename(struct svc_rqst rqstp, struct svc_fh ffhp, char *fname, int flen,
				1785	struct svc_fh tfhp, char tname, int tlen)
				1786	{
				1787	struct dentry fdentry, tdentry, odentry, ndentry, *trap;
				1788	struct inode fdir, tdir;
				1789	__be32 err;
				1790	int host_err;
				1791
				1792	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
				1793	if (err)
				1794	goto out;
				1795	err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
				1796	if (err)
				1797	goto out;
				1798
				1799	fdentry = ffhp->fh_dentry;
				1800	fdir = fdentry->d_inode;
				1801
				1802	tdentry = tfhp->fh_dentry;
				1803	tdir = tdentry->d_inode;
				1804
				1805	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
				1806	if (ffhp->fh_export != tfhp->fh_export)
				1807	goto out;
				1808
				1809	err = nfserr_perm;
				1810	if (!flen \|\| isdotent(fname, flen) \|\| !tlen \|\| isdotent(tname, tlen))
				1811	goto out;
				1812
				1813	/* cannot use fh_lock as we need deadlock protective ordering
				1814	* so do it by hand */
				1815	trap = lock_rename(tdentry, fdentry);
				1816	ffhp->fh_locked = tfhp->fh_locked = 1;
				1817	fill_pre_wcc(ffhp);
				1818	fill_pre_wcc(tfhp);
				1819
				1820	odentry = lookup_one_len(fname, fdentry, flen);
				1821	host_err = PTR_ERR(odentry);
				1822	if (IS_ERR(odentry))
				1823	goto out_nfserr;
				1824
				1825	host_err = -ENOENT;
				1826	if (!odentry->d_inode)
				1827	goto out_dput_old;
				1828	host_err = -EINVAL;
				1829	if (odentry == trap)
				1830	goto out_dput_old;
				1831
				1832	ndentry = lookup_one_len(tname, tdentry, tlen);
				1833	host_err = PTR_ERR(ndentry);
				1834	if (IS_ERR(ndentry))
				1835	goto out_dput_old;
				1836	host_err = -ENOTEMPTY;
				1837	if (ndentry == trap)
				1838	goto out_dput_new;
				1839
				1840	host_err = -EXDEV;
				1841	if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
				1842	goto out_dput_new;
				1843	host_err = fh_want_write(ffhp);
				1844	if (host_err)
				1845	goto out_dput_new;
				1846
				1847	host_err = nfsd_break_lease(odentry->d_inode);
				1848	if (host_err)
				1849	goto out_drop_write;
				1850	if (ndentry->d_inode) {
				1851	host_err = nfsd_break_lease(ndentry->d_inode);
				1852	if (host_err)
				1853	goto out_drop_write;
				1854	}
				1855	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
				1856	if (!host_err) {
				1857	host_err = commit_metadata(tfhp);
				1858	if (!host_err)
				1859	host_err = commit_metadata(ffhp);
				1860	}
				1861	out_drop_write:
				1862	fh_drop_write(ffhp);
				1863	out_dput_new:
				1864	dput(ndentry);
				1865	out_dput_old:
				1866	dput(odentry);
				1867	out_nfserr:
				1868	err = nfserrno(host_err);
				1869
				1870	/* we cannot reply on fh_unlock on the two filehandles,
				1871	* as that would do the wrong thing if the two directories
				1872	* were the same, so again we do it by hand
				1873	*/
				1874	fill_post_wcc(ffhp);
				1875	fill_post_wcc(tfhp);
				1876	unlock_rename(tdentry, fdentry);
				1877	ffhp->fh_locked = tfhp->fh_locked = 0;
				1878
				1879	out:
				1880	return err;
				1881	}
				1882
				1883	/*
				1884	* Unlink a file or directory
				1885	* N.B. After this call fhp needs an fh_put
				1886	*/
				1887	__be32
				1888	nfsd_unlink(struct svc_rqst rqstp, struct svc_fh fhp, int type,
				1889	char *fname, int flen)
				1890	{
				1891	struct dentry dentry, rdentry;
				1892	struct inode *dirp;
				1893	__be32 err;
				1894	int host_err;
				1895
				1896	err = nfserr_acces;
				1897	if (!flen \|\| isdotent(fname, flen))
				1898	goto out;
				1899	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
				1900	if (err)
				1901	goto out;
				1902
				1903	fh_lock_nested(fhp, I_MUTEX_PARENT);
				1904	dentry = fhp->fh_dentry;
				1905	dirp = dentry->d_inode;
				1906
				1907	rdentry = lookup_one_len(fname, dentry, flen);
				1908	host_err = PTR_ERR(rdentry);
				1909	if (IS_ERR(rdentry))
				1910	goto out_nfserr;
				1911
				1912	if (!rdentry->d_inode) {
				1913	dput(rdentry);
				1914	err = nfserr_noent;
				1915	goto out;
				1916	}
				1917
				1918	if (!type)
				1919	type = rdentry->d_inode->i_mode & S_IFMT;
				1920
				1921	host_err = fh_want_write(fhp);
				1922	if (host_err)
				1923	goto out_put;
				1924
				1925	host_err = nfsd_break_lease(rdentry->d_inode);
				1926	if (host_err)
				1927	goto out_drop_write;
				1928	if (type != S_IFDIR)
				1929	host_err = vfs_unlink(dirp, rdentry);
				1930	else
				1931	host_err = vfs_rmdir(dirp, rdentry);
				1932	if (!host_err)
				1933	host_err = commit_metadata(fhp);
				1934	out_drop_write:
				1935	fh_drop_write(fhp);
				1936	out_put:
				1937	dput(rdentry);
				1938
				1939	out_nfserr:
				1940	err = nfserrno(host_err);
				1941	out:
				1942	return err;
				1943	}
				1944
				1945	/*
				1946	* We do this buffering because we must not call back into the file
				1947	* system's ->lookup() method from the filldir callback. That may well
				1948	* deadlock a number of file systems.
				1949	*
				1950	* This is based heavily on the implementation of same in XFS.
				1951	*/
				1952	struct buffered_dirent {
				1953	u64 ino;
				1954	loff_t offset;
				1955	int namlen;
				1956	unsigned int d_type;
				1957	char name[];
				1958	};
				1959
				1960	struct readdir_data {
				1961	char *dirent;
				1962	size_t used;
				1963	int full;
				1964	};
				1965
				1966	static int nfsd_buffered_filldir(void __buf, const char name, int namlen,
				1967	loff_t offset, u64 ino, unsigned int d_type)
				1968	{
				1969	struct readdir_data *buf = __buf;
				1970	struct buffered_dirent de = (void )(buf->dirent + buf->used);
				1971	unsigned int reclen;
				1972
				1973	reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
				1974	if (buf->used + reclen > PAGE_SIZE) {
				1975	buf->full = 1;
				1976	return -EINVAL;
				1977	}
				1978
				1979	de->namlen = namlen;
				1980	de->offset = offset;
				1981	de->ino = ino;
				1982	de->d_type = d_type;
				1983	memcpy(de->name, name, namlen);
				1984	buf->used += reclen;
				1985
				1986	return 0;
				1987	}
				1988
				1989	static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
				1990	struct readdir_cd cdp, loff_t offsetp)
				1991	{
				1992	struct readdir_data buf;
				1993	struct buffered_dirent *de;
				1994	int host_err;
				1995	int size;
				1996	loff_t offset;
				1997
				1998	buf.dirent = (void *)__get_free_page(GFP_KERNEL);
				1999	if (!buf.dirent)
				2000	return nfserrno(-ENOMEM);
				2001
				2002	offset = *offsetp;
				2003
				2004	while (1) {
				2005	struct inode *dir_inode = file->f_path.dentry->d_inode;
				2006	unsigned int reclen;
				2007
				2008	cdp->err = nfserr_eof; /* will be cleared on successful read */
				2009	buf.used = 0;
				2010	buf.full = 0;
				2011
				2012	host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
				2013	if (buf.full)
				2014	host_err = 0;
				2015
				2016	if (host_err < 0)
				2017	break;
				2018
				2019	size = buf.used;
				2020
				2021	if (!size)
				2022	break;
				2023
				2024	/*
				2025	* Various filldir functions may end up calling back into
				2026	* lookup_one_len() and the file system's ->lookup() method.
				2027	* These expect i_mutex to be held, as it would within readdir.
				2028	*/
				2029	host_err = mutex_lock_killable(&dir_inode->i_mutex);
				2030	if (host_err)
				2031	break;
				2032
				2033	de = (struct buffered_dirent *)buf.dirent;
				2034	while (size > 0) {
				2035	offset = de->offset;
				2036
				2037	if (func(cdp, de->name, de->namlen, de->offset,
				2038	de->ino, de->d_type))
				2039	break;
				2040
				2041	if (cdp->err != nfs_ok)
				2042	break;
				2043
				2044	reclen = ALIGN(sizeof(*de) + de->namlen,
				2045	sizeof(u64));
				2046	size -= reclen;
				2047	de = (struct buffered_dirent )((char )de + reclen);
				2048	}
				2049	mutex_unlock(&dir_inode->i_mutex);
				2050	if (size > 0) /* We bailed out early */
				2051	break;
				2052
				2053	offset = vfs_llseek(file, 0, SEEK_CUR);
				2054	}
				2055
				2056	free_page((unsigned long)(buf.dirent));
				2057
				2058	if (host_err)
				2059	return nfserrno(host_err);
				2060
				2061	*offsetp = offset;
				2062	return cdp->err;
				2063	}
				2064
				2065	/*
				2066	* Read entries from a directory.
				2067	* The NFSv3/4 verifier we ignore for now.
				2068	*/
				2069	__be32
				2070	nfsd_readdir(struct svc_rqst rqstp, struct svc_fh fhp, loff_t *offsetp,
				2071	struct readdir_cd *cdp, filldir_t func)
				2072	{
				2073	__be32 err;
				2074	struct file *file;
				2075	loff_t offset = *offsetp;
				2076	int may_flags = NFSD_MAY_READ;
				2077
				2078	/* NFSv2 only supports 32 bit cookies */
				2079	if (rqstp->rq_vers > 2)
				2080	may_flags \|= NFSD_MAY_64BIT_COOKIE;
				2081
				2082	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
				2083	if (err)
				2084	goto out;
				2085
				2086	offset = vfs_llseek(file, offset, 0);
				2087	if (offset < 0) {
				2088	err = nfserrno((int)offset);
				2089	goto out_close;
				2090	}
				2091
				2092	err = nfsd_buffered_readdir(file, func, cdp, offsetp);
				2093
				2094	if (err == nfserr_eof \|\| err == nfserr_toosmall)
				2095	err = nfs_ok; /* can still be found in ->err */
				2096	out_close:
				2097	nfsd_close(file);
				2098	out:
				2099	return err;
				2100	}
				2101
				2102	/*
				2103	* Get file system stats
				2104	* N.B. After this call fhp needs an fh_put
				2105	*/
				2106	__be32
				2107	nfsd_statfs(struct svc_rqst rqstp, struct svc_fh fhp, struct kstatfs *stat, int access)
				2108	{
				2109	__be32 err;
				2110
				2111	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP \| access);
				2112	if (!err) {
				2113	struct path path = {
				2114	.mnt = fhp->fh_export->ex_path.mnt,
				2115	.dentry = fhp->fh_dentry,
				2116	};
				2117	if (vfs_statfs(&path, stat))
				2118	err = nfserr_io;
				2119	}
				2120	return err;
				2121	}
				2122
				2123	static int exp_rdonly(struct svc_rqst rqstp, struct svc_export exp)
				2124	{
				2125	return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
				2126	}
				2127
				2128	/*
				2129	* Check for a user's access permissions to this inode.
				2130	*/
				2131	__be32
				2132	nfsd_permission(struct svc_rqst rqstp, struct svc_export exp,
				2133	struct dentry *dentry, int acc)
				2134	{
				2135	struct inode *inode = dentry->d_inode;
				2136	int err;
				2137
				2138	if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
				2139	return 0;
				2140	#if 0
				2141	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
				2142	acc,
				2143	(acc & NFSD_MAY_READ)? " read" : "",
				2144	(acc & NFSD_MAY_WRITE)? " write" : "",
				2145	(acc & NFSD_MAY_EXEC)? " exec" : "",
				2146	(acc & NFSD_MAY_SATTR)? " sattr" : "",
				2147	(acc & NFSD_MAY_TRUNC)? " trunc" : "",
				2148	(acc & NFSD_MAY_LOCK)? " lock" : "",
				2149	(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
				2150	inode->i_mode,
				2151	IS_IMMUTABLE(inode)? " immut" : "",
				2152	IS_APPEND(inode)? " append" : "",
				2153	__mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
				2154	dprintk(" owner %d/%d user %d/%d\n",
				2155	inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
				2156	#endif
				2157
				2158	/* Normally we reject any write/sattr etc access on a read-only file
				2159	* system. But if it is IRIX doing check on write-access for a
				2160	* device special file, we ignore rofs.
				2161	*/
				2162	if (!(acc & NFSD_MAY_LOCAL_ACCESS))
				2163	if (acc & (NFSD_MAY_WRITE \| NFSD_MAY_SATTR \| NFSD_MAY_TRUNC)) {
				2164	if (exp_rdonly(rqstp, exp) \|\|
				2165	__mnt_is_readonly(exp->ex_path.mnt))
				2166	return nfserr_rofs;
				2167	if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
				2168	return nfserr_perm;
				2169	}
				2170	if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
				2171	return nfserr_perm;
				2172
				2173	if (acc & NFSD_MAY_LOCK) {
				2174	/* If we cannot rely on authentication in NLM requests,
				2175	* just allow locks, otherwise require read permission, or
				2176	* ownership
				2177	*/
				2178	if (exp->ex_flags & NFSEXP_NOAUTHNLM)
				2179	return 0;
				2180	else
				2181	acc = NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE;
				2182	}
				2183	/*
				2184	* The file owner always gets access permission for accesses that
				2185	* would normally be checked at open time. This is to make
				2186	* file access work even when the client has done a fchmod(fd, 0).
				2187	*
				2188	* However, `cp foo bar' should fail nevertheless when bar is
				2189	* readonly. A sensible way to do this might be to reject all
				2190	* attempts to truncate a read-only file, because a creat() call
				2191	* always implies file truncation.
				2192	* ... but this isn't really fair. A process may reasonably call
				2193	* ftruncate on an open file descriptor on a file with perm 000.
				2194	* We must trust the client to do permission checking - using "ACCESS"
				2195	* with NFSv3.
				2196	*/
				2197	if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
				2198	inode->i_uid == current_fsuid())
				2199	return 0;
				2200
				2201	/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
				2202	err = inode_permission(inode, acc & (MAY_READ\|MAY_WRITE\|MAY_EXEC));
				2203
				2204	/* Allow read access to binaries even when mode 111 */
				2205	if (err == -EACCES && S_ISREG(inode->i_mode) &&
				2206	(acc == (NFSD_MAY_READ \| NFSD_MAY_OWNER_OVERRIDE) \|\|
				2207	acc == (NFSD_MAY_READ \| NFSD_MAY_READ_IF_EXEC)))
				2208	err = inode_permission(inode, MAY_EXEC);
				2209
				2210	return err? nfserrno(err) : 0;
				2211	}
				2212
				2213	void
				2214	nfsd_racache_shutdown(void)
				2215	{
				2216	struct raparms raparm, last_raparm;
				2217	unsigned int i;
				2218
				2219	dprintk("nfsd: freeing readahead buffers.\n");
				2220
				2221	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2222	raparm = raparm_hash[i].pb_head;
				2223	while(raparm) {
				2224	last_raparm = raparm;
				2225	raparm = raparm->p_next;
				2226	kfree(last_raparm);
				2227	}
				2228	raparm_hash[i].pb_head = NULL;
				2229	}
				2230	}
				2231	/*
				2232	* Initialize readahead param cache
				2233	*/
				2234	int
				2235	nfsd_racache_init(int cache_size)
				2236	{
				2237	int i;
				2238	int j = 0;
				2239	int nperbucket;
				2240	struct raparms **raparm = NULL;
				2241
				2242
				2243	if (raparm_hash[0].pb_head)
				2244	return 0;
				2245	nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
				2246	if (nperbucket < 2)
				2247	nperbucket = 2;
				2248	cache_size = nperbucket * RAPARM_HASH_SIZE;
				2249
				2250	dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
				2251
				2252	for (i = 0; i < RAPARM_HASH_SIZE; i++) {
				2253	spin_lock_init(&raparm_hash[i].pb_lock);
				2254
				2255	raparm = &raparm_hash[i].pb_head;
				2256	for (j = 0; j < nperbucket; j++) {
				2257	*raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
				2258	if (!*raparm)
				2259	goto out_nomem;
				2260	raparm = &(*raparm)->p_next;
				2261	}
				2262	*raparm = NULL;
				2263	}
				2264
				2265	nfsdstats.ra_size = cache_size;
				2266	return 0;
				2267
				2268	out_nomem:
				2269	dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
				2270	nfsd_racache_shutdown();
				2271	return -ENOMEM;
				2272	}
				2273
				2274	#if defined(CONFIG_NFSD_V2_ACL) \|\| defined(CONFIG_NFSD_V3_ACL)
				2275	struct posix_acl *
				2276	nfsd_get_posix_acl(struct svc_fh *fhp, int type)
				2277	{
				2278	struct inode *inode = fhp->fh_dentry->d_inode;
				2279	char *name;
				2280	void *value = NULL;
				2281	ssize_t size;
				2282	struct posix_acl *acl;
				2283
				2284	if (!IS_POSIXACL(inode))
				2285	return ERR_PTR(-EOPNOTSUPP);
				2286
				2287	switch (type) {
				2288	case ACL_TYPE_ACCESS:
				2289	name = POSIX_ACL_XATTR_ACCESS;
				2290	break;
				2291	case ACL_TYPE_DEFAULT:
				2292	name = POSIX_ACL_XATTR_DEFAULT;
				2293	break;
				2294	default:
				2295	return ERR_PTR(-EOPNOTSUPP);
				2296	}
				2297
				2298	size = nfsd_getxattr(fhp->fh_dentry, name, &value);
				2299	if (size < 0)
				2300	return ERR_PTR(size);
				2301
				2302	acl = posix_acl_from_xattr(value, size);
				2303	kfree(value);
				2304	return acl;
				2305	}
				2306
				2307	int
				2308	nfsd_set_posix_acl(struct svc_fh fhp, int type, struct posix_acl acl)
				2309	{
				2310	struct inode *inode = fhp->fh_dentry->d_inode;
				2311	char *name;
				2312	void *value = NULL;
				2313	size_t size;
				2314	int error;
				2315
				2316	if (!IS_POSIXACL(inode) \|\|
				2317	!inode->i_op->setxattr \|\| !inode->i_op->removexattr)
				2318	return -EOPNOTSUPP;
				2319	switch(type) {
				2320	case ACL_TYPE_ACCESS:
				2321	name = POSIX_ACL_XATTR_ACCESS;
				2322	break;
				2323	case ACL_TYPE_DEFAULT:
				2324	name = POSIX_ACL_XATTR_DEFAULT;
				2325	break;
				2326	default:
				2327	return -EOPNOTSUPP;
				2328	}
				2329
				2330	if (acl && acl->a_count) {
				2331	size = posix_acl_xattr_size(acl->a_count);
				2332	value = kmalloc(size, GFP_KERNEL);
				2333	if (!value)
				2334	return -ENOMEM;
				2335	error = posix_acl_to_xattr(acl, value, size);
				2336	if (error < 0)
				2337	goto getout;
				2338	size = error;
				2339	} else
				2340	size = 0;
				2341
				2342	error = fh_want_write(fhp);
				2343	if (error)
				2344	goto getout;
				2345	if (size)
				2346	error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
				2347	else {
				2348	if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT)
				2349	error = 0;
				2350	else {
				2351	error = vfs_removexattr(fhp->fh_dentry, name);
				2352	if (error == -ENODATA)
				2353	error = 0;
				2354	}
				2355	}
				2356	fh_drop_write(fhp);
				2357
				2358	getout:
				2359	kfree(value);
				2360	return error;
				2361	}
				2362	#endif /* defined(CONFIG_NFSD_V2_ACL) \|\| defined(CONFIG_NFSD_V3_ACL) */