Blame - src/kernel/linux/v4.19/fs/nfs/write.c - T800

blob: 117ffd90419e28348dca0c5a7f02808ab11ec678 [file] [log] [blame]

xj	b04a402	2021-11-25 15:01:52 +0800	[diff] [blame]	1	/*
				2	* linux/fs/nfs/write.c
				3	*
				4	* Write file data over NFS.
				5	*
				6	* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
				7	*/
				8
				9	#include <linux/types.h>
				10	#include <linux/slab.h>
				11	#include <linux/mm.h>
				12	#include <linux/pagemap.h>
				13	#include <linux/file.h>
				14	#include <linux/writeback.h>
				15	#include <linux/swap.h>
				16	#include <linux/migrate.h>
				17
				18	#include <linux/sunrpc/clnt.h>
				19	#include <linux/nfs_fs.h>
				20	#include <linux/nfs_mount.h>
				21	#include <linux/nfs_page.h>
				22	#include <linux/backing-dev.h>
				23	#include <linux/export.h>
				24	#include <linux/freezer.h>
				25	#include <linux/wait.h>
				26	#include <linux/iversion.h>
				27
				28	#include <linux/uaccess.h>
				29
				30	#include "delegation.h"
				31	#include "internal.h"
				32	#include "iostat.h"
				33	#include "nfs4_fs.h"
				34	#include "fscache.h"
				35	#include "pnfs.h"
				36
				37	#include "nfstrace.h"
				38
				39	#define NFSDBG_FACILITY NFSDBG_PAGECACHE
				40
				41	#define MIN_POOL_WRITE (32)
				42	#define MIN_POOL_COMMIT (4)
				43
				44	struct nfs_io_completion {
				45	void (complete)(void data);
				46	void *data;
				47	struct kref refcount;
				48	};
				49
				50	/*
				51	* Local function declarations
				52	*/
				53	static void nfs_redirty_request(struct nfs_page *req);
				54	static const struct rpc_call_ops nfs_commit_ops;
				55	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
				56	static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
				57	static const struct nfs_rw_ops nfs_rw_write_ops;
				58	static void nfs_clear_request_commit(struct nfs_page *req);
				59	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				60	struct inode *inode);
				61	static struct nfs_page *
				62	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
				63	struct page *page);
				64
				65	static struct kmem_cache *nfs_wdata_cachep;
				66	static mempool_t *nfs_wdata_mempool;
				67	static struct kmem_cache *nfs_cdata_cachep;
				68	static mempool_t *nfs_commit_mempool;
				69
				70	struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
				71	{
				72	struct nfs_commit_data *p;
				73
				74	if (never_fail)
				75	p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
				76	else {
				77	/* It is OK to do some reclaim, not no safe to wait
				78	* for anything to be returned to the pool.
				79	* mempool_alloc() cannot handle that particular combination,
				80	* so we need two separate attempts.
				81	*/
				82	p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
				83	if (!p)
				84	p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO \|
				85	__GFP_NOWARN \| __GFP_NORETRY);
				86	if (!p)
				87	return NULL;
				88	}
				89
				90	memset(p, 0, sizeof(*p));
				91	INIT_LIST_HEAD(&p->pages);
				92	return p;
				93	}
				94	EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
				95
				96	void nfs_commit_free(struct nfs_commit_data *p)
				97	{
				98	mempool_free(p, nfs_commit_mempool);
				99	}
				100	EXPORT_SYMBOL_GPL(nfs_commit_free);
				101
				102	static struct nfs_pgio_header *nfs_writehdr_alloc(void)
				103	{
				104	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
				105
				106	memset(p, 0, sizeof(*p));
				107	p->rw_mode = FMODE_WRITE;
				108	return p;
				109	}
				110
				111	static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
				112	{
				113	mempool_free(hdr, nfs_wdata_mempool);
				114	}
				115
				116	static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
				117	{
				118	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
				119	}
				120
				121	static void nfs_io_completion_init(struct nfs_io_completion *ioc,
				122	void (complete)(void ), void *data)
				123	{
				124	ioc->complete = complete;
				125	ioc->data = data;
				126	kref_init(&ioc->refcount);
				127	}
				128
				129	static void nfs_io_completion_release(struct kref *kref)
				130	{
				131	struct nfs_io_completion *ioc = container_of(kref,
				132	struct nfs_io_completion, refcount);
				133	ioc->complete(ioc->data);
				134	kfree(ioc);
				135	}
				136
				137	static void nfs_io_completion_get(struct nfs_io_completion *ioc)
				138	{
				139	if (ioc != NULL)
				140	kref_get(&ioc->refcount);
				141	}
				142
				143	static void nfs_io_completion_put(struct nfs_io_completion *ioc)
				144	{
				145	if (ioc != NULL)
				146	kref_put(&ioc->refcount, nfs_io_completion_release);
				147	}
				148
				149	static struct nfs_page *
				150	nfs_page_private_request(struct page *page)
				151	{
				152	if (!PagePrivate(page))
				153	return NULL;
				154	return (struct nfs_page *)page_private(page);
				155	}
				156
				157	/*
				158	* nfs_page_find_head_request_locked - find head request associated with @page
				159	*
				160	* must be called while holding the inode lock.
				161	*
				162	* returns matching head request with reference held, or NULL if not found.
				163	*/
				164	static struct nfs_page *
				165	nfs_page_find_private_request(struct page *page)
				166	{
				167	struct address_space *mapping = page_file_mapping(page);
				168	struct nfs_page *req;
				169
				170	if (!PagePrivate(page))
				171	return NULL;
				172	spin_lock(&mapping->private_lock);
				173	req = nfs_page_private_request(page);
				174	if (req) {
				175	WARN_ON_ONCE(req->wb_head != req);
				176	kref_get(&req->wb_kref);
				177	}
				178	spin_unlock(&mapping->private_lock);
				179	return req;
				180	}
				181
				182	static struct nfs_page *
				183	nfs_page_find_swap_request(struct page *page)
				184	{
				185	struct inode *inode = page_file_mapping(page)->host;
				186	struct nfs_inode *nfsi = NFS_I(inode);
				187	struct nfs_page *req = NULL;
				188	if (!PageSwapCache(page))
				189	return NULL;
				190	mutex_lock(&nfsi->commit_mutex);
				191	if (PageSwapCache(page)) {
				192	req = nfs_page_search_commits_for_head_request_locked(nfsi,
				193	page);
				194	if (req) {
				195	WARN_ON_ONCE(req->wb_head != req);
				196	kref_get(&req->wb_kref);
				197	}
				198	}
				199	mutex_unlock(&nfsi->commit_mutex);
				200	return req;
				201	}
				202
				203	/*
				204	* nfs_page_find_head_request - find head request associated with @page
				205	*
				206	* returns matching head request with reference held, or NULL if not found.
				207	*/
				208	static struct nfs_page nfs_page_find_head_request(struct page page)
				209	{
				210	struct nfs_page *req;
				211
				212	req = nfs_page_find_private_request(page);
				213	if (!req)
				214	req = nfs_page_find_swap_request(page);
				215	return req;
				216	}
				217
				218	/* Adjust the file length if we're writing beyond the end */
				219	static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
				220	{
				221	struct inode *inode = page_file_mapping(page)->host;
				222	loff_t end, i_size;
				223	pgoff_t end_index;
				224
				225	spin_lock(&inode->i_lock);
				226	i_size = i_size_read(inode);
				227	end_index = (i_size - 1) >> PAGE_SHIFT;
				228	if (i_size > 0 && page_index(page) < end_index)
				229	goto out;
				230	end = page_file_offset(page) + ((loff_t)offset+count);
				231	if (i_size >= end)
				232	goto out;
				233	i_size_write(inode, end);
				234	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
				235	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
				236	out:
				237	spin_unlock(&inode->i_lock);
				238	}
				239
				240	/* A writeback failed: mark the page as bad, and invalidate the page cache */
				241	static void nfs_set_pageerror(struct address_space *mapping)
				242	{
				243	nfs_zap_mapping(mapping->host, mapping);
				244	}
				245
				246	/*
				247	* nfs_page_group_search_locked
				248	* @head - head request of page group
				249	* @page_offset - offset into page
				250	*
				251	* Search page group with head @head to find a request that contains the
				252	* page offset @page_offset.
				253	*
				254	* Returns a pointer to the first matching nfs request, or NULL if no
				255	* match is found.
				256	*
				257	* Must be called with the page group lock held
				258	*/
				259	static struct nfs_page *
				260	nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
				261	{
				262	struct nfs_page *req;
				263
				264	req = head;
				265	do {
				266	if (page_offset >= req->wb_pgbase &&
				267	page_offset < (req->wb_pgbase + req->wb_bytes))
				268	return req;
				269
				270	req = req->wb_this_page;
				271	} while (req != head);
				272
				273	return NULL;
				274	}
				275
				276	/*
				277	* nfs_page_group_covers_page
				278	* @head - head request of page group
				279	*
				280	* Return true if the page group with head @head covers the whole page,
				281	* returns false otherwise
				282	*/
				283	static bool nfs_page_group_covers_page(struct nfs_page *req)
				284	{
				285	struct nfs_page *tmp;
				286	unsigned int pos = 0;
				287	unsigned int len = nfs_page_length(req->wb_page);
				288
				289	nfs_page_group_lock(req);
				290
				291	for (;;) {
				292	tmp = nfs_page_group_search_locked(req->wb_head, pos);
				293	if (!tmp)
				294	break;
				295	pos = tmp->wb_pgbase + tmp->wb_bytes;
				296	}
				297
				298	nfs_page_group_unlock(req);
				299	return pos >= len;
				300	}
				301
				302	/* We can set the PG_uptodate flag if we see that a write request
				303	* covers the full page.
				304	*/
				305	static void nfs_mark_uptodate(struct nfs_page *req)
				306	{
				307	if (PageUptodate(req->wb_page))
				308	return;
				309	if (!nfs_page_group_covers_page(req))
				310	return;
				311	SetPageUptodate(req->wb_page);
				312	}
				313
				314	static int wb_priority(struct writeback_control *wbc)
				315	{
				316	int ret = 0;
				317
				318	if (wbc->sync_mode == WB_SYNC_ALL)
				319	ret = FLUSH_COND_STABLE;
				320	return ret;
				321	}
				322
				323	/*
				324	* NFS congestion control
				325	*/
				326
				327	int nfs_congestion_kb;
				328
				329	#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
				330	#define NFS_CONGESTION_OFF_THRESH \
				331	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
				332
				333	static void nfs_set_page_writeback(struct page *page)
				334	{
				335	struct inode *inode = page_file_mapping(page)->host;
				336	struct nfs_server *nfss = NFS_SERVER(inode);
				337	int ret = test_set_page_writeback(page);
				338
				339	WARN_ON_ONCE(ret != 0);
				340
				341	if (atomic_long_inc_return(&nfss->writeback) >
				342	NFS_CONGESTION_ON_THRESH)
				343	set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
				344	}
				345
				346	static void nfs_end_page_writeback(struct nfs_page *req)
				347	{
				348	struct inode *inode = page_file_mapping(req->wb_page)->host;
				349	struct nfs_server *nfss = NFS_SERVER(inode);
				350	bool is_done;
				351
				352	is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
				353	nfs_unlock_request(req);
				354	if (!is_done)
				355	return;
				356
				357	end_page_writeback(req->wb_page);
				358	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
				359	clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
				360	}
				361
				362	/*
				363	* nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
				364	*
				365	* this is a helper function for nfs_lock_and_join_requests
				366	*
				367	* @inode - inode associated with request page group, must be holding inode lock
				368	* @head - head request of page group, must be holding head lock
				369	* @req - request that couldn't lock and needs to wait on the req bit lock
				370	*
				371	* NOTE: this must be called holding page_group bit lock
				372	* which will be released before returning.
				373	*
				374	* returns 0 on success, < 0 on error.
				375	*/
				376	static void
				377	nfs_unroll_locks(struct inode inode, struct nfs_page head,
				378	struct nfs_page *req)
				379	{
				380	struct nfs_page *tmp;
				381
				382	/* relinquish all the locks successfully grabbed this run */
				383	for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
				384	if (!kref_read(&tmp->wb_kref))
				385	continue;
				386	nfs_unlock_and_release_request(tmp);
				387	}
				388	}
				389
				390	/*
				391	* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
				392	*
				393	* @destroy_list - request list (using wb_this_page) terminated by @old_head
				394	* @old_head - the old head of the list
				395	*
				396	* All subrequests must be locked and removed from all lists, so at this point
				397	* they are only "active" in this function, and possibly in nfs_wait_on_request
				398	* with a reference held by some other context.
				399	*/
				400	static void
				401	nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
				402	struct nfs_page *old_head,
				403	struct inode *inode)
				404	{
				405	while (destroy_list) {
				406	struct nfs_page *subreq = destroy_list;
				407
				408	destroy_list = (subreq->wb_this_page == old_head) ?
				409	NULL : subreq->wb_this_page;
				410
				411	WARN_ON_ONCE(old_head != subreq->wb_head);
				412
				413	/* make sure old group is not used */
				414	subreq->wb_this_page = subreq;
				415
				416	clear_bit(PG_REMOVE, &subreq->wb_flags);
				417
				418	/* Note: races with nfs_page_group_destroy() */
				419	if (!kref_read(&subreq->wb_kref)) {
				420	/* Check if we raced with nfs_page_group_destroy() */
				421	if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
				422	nfs_free_request(subreq);
				423	continue;
				424	}
				425
				426	subreq->wb_head = subreq;
				427
				428	if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
				429	nfs_release_request(subreq);
				430	atomic_long_dec(&NFS_I(inode)->nrequests);
				431	}
				432
				433	/* subreq is now totally disconnected from page group or any
				434	* write / commit lists. last chance to wake any waiters */
				435	nfs_unlock_and_release_request(subreq);
				436	}
				437	}
				438
				439	/*
				440	* nfs_lock_and_join_requests - join all subreqs to the head req and return
				441	* a locked reference, cancelling any pending
				442	* operations for this page.
				443	*
				444	* @page - the page used to lookup the "page group" of nfs_page structures
				445	*
				446	* This function joins all sub requests to the head request by first
				447	* locking all requests in the group, cancelling any pending operations
				448	* and finally updating the head request to cover the whole range covered by
				449	* the (former) group. All subrequests are removed from any write or commit
				450	* lists, unlinked from the group and destroyed.
				451	*
				452	* Returns a locked, referenced pointer to the head request - which after
				453	* this call is guaranteed to be the only request associated with the page.
				454	* Returns NULL if no requests are found for @page, or a ERR_PTR if an
				455	* error was encountered.
				456	*/
				457	static struct nfs_page *
				458	nfs_lock_and_join_requests(struct page *page)
				459	{
				460	struct inode *inode = page_file_mapping(page)->host;
				461	struct nfs_page head, subreq;
				462	struct nfs_page *destroy_list = NULL;
				463	unsigned int total_bytes;
				464	int ret;
				465
				466	try_again:
				467	/*
				468	* A reference is taken only on the head request which acts as a
				469	* reference to the whole page group - the group will not be destroyed
				470	* until the head reference is released.
				471	*/
				472	head = nfs_page_find_head_request(page);
				473	if (!head)
				474	return NULL;
				475
				476	/* lock the page head first in order to avoid an ABBA inefficiency */
				477	if (!nfs_lock_request(head)) {
				478	ret = nfs_wait_on_request(head);
				479	nfs_release_request(head);
				480	if (ret < 0)
				481	return ERR_PTR(ret);
				482	goto try_again;
				483	}
				484
				485	/* Ensure that nobody removed the request before we locked it */
				486	if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
				487	nfs_unlock_and_release_request(head);
				488	goto try_again;
				489	}
				490
				491	ret = nfs_page_group_lock(head);
				492	if (ret < 0)
				493	goto release_request;
				494
				495	/* lock each request in the page group */
				496	total_bytes = head->wb_bytes;
				497	for (subreq = head->wb_this_page; subreq != head;
				498	subreq = subreq->wb_this_page) {
				499
				500	if (!kref_get_unless_zero(&subreq->wb_kref)) {
				501	if (subreq->wb_offset == head->wb_offset + total_bytes)
				502	total_bytes += subreq->wb_bytes;
				503	continue;
				504	}
				505
				506	while (!nfs_lock_request(subreq)) {
				507	/*
				508	* Unlock page to allow nfs_page_group_sync_on_bit()
				509	* to succeed
				510	*/
				511	nfs_page_group_unlock(head);
				512	ret = nfs_wait_on_request(subreq);
				513	if (!ret)
				514	ret = nfs_page_group_lock(head);
				515	if (ret < 0) {
				516	nfs_unroll_locks(inode, head, subreq);
				517	nfs_release_request(subreq);
				518	goto release_request;
				519	}
				520	}
				521	/*
				522	* Subrequests are always contiguous, non overlapping
				523	* and in order - but may be repeated (mirrored writes).
				524	*/
				525	if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
				526	/* keep track of how many bytes this group covers */
				527	total_bytes += subreq->wb_bytes;
				528	} else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset \|\|
				529	((subreq->wb_offset + subreq->wb_bytes) >
				530	(head->wb_offset + total_bytes)))) {
				531	nfs_page_group_unlock(head);
				532	nfs_unroll_locks(inode, head, subreq);
				533	nfs_unlock_and_release_request(subreq);
				534	ret = -EIO;
				535	goto release_request;
				536	}
				537	}
				538
				539	/* Now that all requests are locked, make sure they aren't on any list.
				540	* Commit list removal accounting is done after locks are dropped */
				541	subreq = head;
				542	do {
				543	nfs_clear_request_commit(subreq);
				544	subreq = subreq->wb_this_page;
				545	} while (subreq != head);
				546
				547	/* unlink subrequests from head, destroy them later */
				548	if (head->wb_this_page != head) {
				549	/* destroy list will be terminated by head */
				550	destroy_list = head->wb_this_page;
				551	head->wb_this_page = head;
				552
				553	/* change head request to cover whole range that
				554	* the former page group covered */
				555	head->wb_bytes = total_bytes;
				556	}
				557
				558	/* Postpone destruction of this request */
				559	if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
				560	set_bit(PG_INODE_REF, &head->wb_flags);
				561	kref_get(&head->wb_kref);
				562	atomic_long_inc(&NFS_I(inode)->nrequests);
				563	}
				564
				565	nfs_page_group_unlock(head);
				566
				567	nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
				568
				569	/* Did we lose a race with nfs_inode_remove_request()? */
				570	if (!(PagePrivate(page) \|\| PageSwapCache(page))) {
				571	nfs_unlock_and_release_request(head);
				572	return NULL;
				573	}
				574
				575	/* still holds ref on head from nfs_page_find_head_request
				576	* and still has lock on head from lock loop */
				577	return head;
				578
				579	release_request:
				580	nfs_unlock_and_release_request(head);
				581	return ERR_PTR(ret);
				582	}
				583
				584	static void nfs_write_error_remove_page(struct nfs_page *req)
				585	{
				586	nfs_end_page_writeback(req);
				587	generic_error_remove_page(page_file_mapping(req->wb_page),
				588	req->wb_page);
				589	nfs_release_request(req);
				590	}
				591
				592	static bool
				593	nfs_error_is_fatal_on_server(int err)
				594	{
				595	switch (err) {
				596	case 0:
				597	case -ERESTARTSYS:
				598	case -EINTR:
				599	return false;
				600	}
				601	return nfs_error_is_fatal(err);
				602	}
				603
				604	/*
				605	* Find an associated nfs write request, and prepare to flush it out
				606	* May return an error if the user signalled nfs_wait_on_request().
				607	*/
				608	static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
				609	struct page *page)
				610	{
				611	struct nfs_page *req;
				612	int ret = 0;
				613
				614	req = nfs_lock_and_join_requests(page);
				615	if (!req)
				616	goto out;
				617	ret = PTR_ERR(req);
				618	if (IS_ERR(req))
				619	goto out;
				620
				621	nfs_set_page_writeback(page);
				622	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
				623
				624	ret = req->wb_context->error;
				625	/* If there is a fatal error that covers this write, just exit */
				626	if (nfs_error_is_fatal_on_server(ret))
				627	goto out_launder;
				628
				629	ret = 0;
				630	if (!nfs_pageio_add_request(pgio, req)) {
				631	ret = pgio->pg_error;
				632	/*
				633	* Remove the problematic req upon fatal errors on the server
				634	*/
				635	if (nfs_error_is_fatal(ret)) {
				636	nfs_context_set_write_error(req->wb_context, ret);
				637	if (nfs_error_is_fatal_on_server(ret))
				638	goto out_launder;
				639	} else
				640	ret = -EAGAIN;
				641	nfs_redirty_request(req);
				642	} else
				643	nfs_add_stats(page_file_mapping(page)->host,
				644	NFSIOS_WRITEPAGES, 1);
				645	out:
				646	return ret;
				647	out_launder:
				648	nfs_write_error_remove_page(req);
				649	return ret;
				650	}
				651
				652	static int nfs_do_writepage(struct page page, struct writeback_control wbc,
				653	struct nfs_pageio_descriptor *pgio)
				654	{
				655	int ret;
				656
				657	nfs_pageio_cond_complete(pgio, page_index(page));
				658	ret = nfs_page_async_flush(pgio, page);
				659	if (ret == -EAGAIN) {
				660	redirty_page_for_writepage(wbc, page);
				661	ret = 0;
				662	}
				663	return ret;
				664	}
				665
				666	/*
				667	* Write an mmapped page to the server.
				668	*/
				669	static int nfs_writepage_locked(struct page *page,
				670	struct writeback_control *wbc)
				671	{
				672	struct nfs_pageio_descriptor pgio;
				673	struct inode *inode = page_file_mapping(page)->host;
				674	int err;
				675
				676	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
				677	nfs_pageio_init_write(&pgio, inode, 0,
				678	false, &nfs_async_write_completion_ops);
				679	err = nfs_do_writepage(page, wbc, &pgio);
				680	nfs_pageio_complete(&pgio);
				681	if (err < 0)
				682	return err;
				683	if (pgio.pg_error < 0)
				684	return pgio.pg_error;
				685	return 0;
				686	}
				687
				688	int nfs_writepage(struct page page, struct writeback_control wbc)
				689	{
				690	int ret;
				691
				692	ret = nfs_writepage_locked(page, wbc);
				693	unlock_page(page);
				694	return ret;
				695	}
				696
				697	static int nfs_writepages_callback(struct page page, struct writeback_control wbc, void *data)
				698	{
				699	int ret;
				700
				701	ret = nfs_do_writepage(page, wbc, data);
				702	unlock_page(page);
				703	return ret;
				704	}
				705
				706	static void nfs_io_completion_commit(void *inode)
				707	{
				708	nfs_commit_inode(inode, 0);
				709	}
				710
				711	int nfs_writepages(struct address_space mapping, struct writeback_control wbc)
				712	{
				713	struct inode *inode = mapping->host;
				714	struct nfs_pageio_descriptor pgio;
				715	struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
				716	int err;
				717
				718	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
				719
				720	if (ioc)
				721	nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
				722
				723	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
				724	&nfs_async_write_completion_ops);
				725	pgio.pg_io_completion = ioc;
				726	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
				727	nfs_pageio_complete(&pgio);
				728	nfs_io_completion_put(ioc);
				729
				730	if (err < 0)
				731	goto out_err;
				732	err = pgio.pg_error;
				733	if (err < 0)
				734	goto out_err;
				735	return 0;
				736	out_err:
				737	return err;
				738	}
				739
				740	/*
				741	* Insert a write request into an inode
				742	*/
				743	static void nfs_inode_add_request(struct inode inode, struct nfs_page req)
				744	{
				745	struct address_space *mapping = page_file_mapping(req->wb_page);
				746	struct nfs_inode *nfsi = NFS_I(inode);
				747
				748	WARN_ON_ONCE(req->wb_this_page != req);
				749
				750	/* Lock the request! */
				751	nfs_lock_request(req);
				752
				753	/*
				754	* Swap-space should not get truncated. Hence no need to plug the race
				755	* with invalidate/truncate.
				756	*/
				757	spin_lock(&mapping->private_lock);
				758	if (!nfs_have_writebacks(inode) &&
				759	NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
				760	inode_inc_iversion_raw(inode);
				761	if (likely(!PageSwapCache(req->wb_page))) {
				762	set_bit(PG_MAPPED, &req->wb_flags);
				763	SetPagePrivate(req->wb_page);
				764	set_page_private(req->wb_page, (unsigned long)req);
				765	}
				766	spin_unlock(&mapping->private_lock);
				767	atomic_long_inc(&nfsi->nrequests);
				768	/* this a head request for a page group - mark it as having an
				769	* extra reference so sub groups can follow suit.
				770	* This flag also informs pgio layer when to bump nrequests when
				771	* adding subrequests. */
				772	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
				773	kref_get(&req->wb_kref);
				774	}
				775
				776	/*
				777	* Remove a write request from an inode
				778	*/
				779	static void nfs_inode_remove_request(struct nfs_page *req)
				780	{
				781	struct address_space *mapping = page_file_mapping(req->wb_page);
				782	struct inode *inode = mapping->host;
				783	struct nfs_inode *nfsi = NFS_I(inode);
				784	struct nfs_page *head;
				785
				786	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
				787	head = req->wb_head;
				788
				789	spin_lock(&mapping->private_lock);
				790	if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
				791	set_page_private(head->wb_page, 0);
				792	ClearPagePrivate(head->wb_page);
				793	clear_bit(PG_MAPPED, &head->wb_flags);
				794	}
				795	spin_unlock(&mapping->private_lock);
				796	}
				797
				798	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
				799	nfs_release_request(req);
				800	atomic_long_dec(&nfsi->nrequests);
				801	}
				802	}
				803
				804	static void
				805	nfs_mark_request_dirty(struct nfs_page *req)
				806	{
				807	if (req->wb_page)
				808	__set_page_dirty_nobuffers(req->wb_page);
				809	}
				810
				811	/*
				812	* nfs_page_search_commits_for_head_request_locked
				813	*
				814	* Search through commit lists on @inode for the head request for @page.
				815	* Must be called while holding the inode (which is cinfo) lock.
				816	*
				817	* Returns the head request if found, or NULL if not found.
				818	*/
				819	static struct nfs_page *
				820	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
				821	struct page *page)
				822	{
				823	struct nfs_page freq, t;
				824	struct nfs_commit_info cinfo;
				825	struct inode *inode = &nfsi->vfs_inode;
				826
				827	nfs_init_cinfo_from_inode(&cinfo, inode);
				828
				829	/* search through pnfs commit lists */
				830	freq = pnfs_search_commit_reqs(inode, &cinfo, page);
				831	if (freq)
				832	return freq->wb_head;
				833
				834	/* Linearly search the commit list for the correct request */
				835	list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
				836	if (freq->wb_page == page)
				837	return freq->wb_head;
				838	}
				839
				840	return NULL;
				841	}
				842
				843	/**
				844	* nfs_request_add_commit_list_locked - add request to a commit list
				845	* @req: pointer to a struct nfs_page
				846	* @dst: commit list head
				847	* @cinfo: holds list lock and accounting info
				848	*
				849	* This sets the PG_CLEAN bit, updates the cinfo count of
				850	* number of outstanding requests requiring a commit as well as
				851	* the MM page stats.
				852	*
				853	* The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
				854	* nfs_page lock.
				855	*/
				856	void
				857	nfs_request_add_commit_list_locked(struct nfs_page req, struct list_head dst,
				858	struct nfs_commit_info *cinfo)
				859	{
				860	set_bit(PG_CLEAN, &req->wb_flags);
				861	nfs_list_add_request(req, dst);
				862	atomic_long_inc(&cinfo->mds->ncommit);
				863	}
				864	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
				865
				866	/**
				867	* nfs_request_add_commit_list - add request to a commit list
				868	* @req: pointer to a struct nfs_page
				869	* @dst: commit list head
				870	* @cinfo: holds list lock and accounting info
				871	*
				872	* This sets the PG_CLEAN bit, updates the cinfo count of
				873	* number of outstanding requests requiring a commit as well as
				874	* the MM page stats.
				875	*
				876	* The caller must _not_ hold the cinfo->lock, but must be
				877	* holding the nfs_page lock.
				878	*/
				879	void
				880	nfs_request_add_commit_list(struct nfs_page req, struct nfs_commit_info cinfo)
				881	{
				882	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
				883	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
				884	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
				885	if (req->wb_page)
				886	nfs_mark_page_unstable(req->wb_page, cinfo);
				887	}
				888	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
				889
				890	/**
				891	* nfs_request_remove_commit_list - Remove request from a commit list
				892	* @req: pointer to a nfs_page
				893	* @cinfo: holds list lock and accounting info
				894	*
				895	* This clears the PG_CLEAN bit, and updates the cinfo's count of
				896	* number of outstanding requests requiring a commit
				897	* It does not update the MM page stats.
				898	*
				899	* The caller _must_ hold the cinfo->lock and the nfs_page lock.
				900	*/
				901	void
				902	nfs_request_remove_commit_list(struct nfs_page *req,
				903	struct nfs_commit_info *cinfo)
				904	{
				905	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
				906	return;
				907	nfs_list_remove_request(req);
				908	atomic_long_dec(&cinfo->mds->ncommit);
				909	}
				910	EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
				911
				912	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				913	struct inode *inode)
				914	{
				915	cinfo->inode = inode;
				916	cinfo->mds = &NFS_I(inode)->commit_info;
				917	cinfo->ds = pnfs_get_ds_info(inode);
				918	cinfo->dreq = NULL;
				919	cinfo->completion_ops = &nfs_commit_completion_ops;
				920	}
				921
				922	void nfs_init_cinfo(struct nfs_commit_info *cinfo,
				923	struct inode *inode,
				924	struct nfs_direct_req *dreq)
				925	{
				926	if (dreq)
				927	nfs_init_cinfo_from_dreq(cinfo, dreq);
				928	else
				929	nfs_init_cinfo_from_inode(cinfo, inode);
				930	}
				931	EXPORT_SYMBOL_GPL(nfs_init_cinfo);
				932
				933	/*
				934	* Add a request to the inode's commit list.
				935	*/
				936	void
				937	nfs_mark_request_commit(struct nfs_page req, struct pnfs_layout_segment lseg,
				938	struct nfs_commit_info *cinfo, u32 ds_commit_idx)
				939	{
				940	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
				941	return;
				942	nfs_request_add_commit_list(req, cinfo);
				943	}
				944
				945	static void
				946	nfs_clear_page_commit(struct page *page)
				947	{
				948	dec_node_page_state(page, NR_UNSTABLE_NFS);
				949	dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
				950	WB_RECLAIMABLE);
				951	}
				952
				953	/* Called holding the request lock on @req */
				954	static void
				955	nfs_clear_request_commit(struct nfs_page *req)
				956	{
				957	if (test_bit(PG_CLEAN, &req->wb_flags)) {
				958	struct inode *inode = d_inode(req->wb_context->dentry);
				959	struct nfs_commit_info cinfo;
				960
				961	nfs_init_cinfo_from_inode(&cinfo, inode);
				962	mutex_lock(&NFS_I(inode)->commit_mutex);
				963	if (!pnfs_clear_request_commit(req, &cinfo)) {
				964	nfs_request_remove_commit_list(req, &cinfo);
				965	}
				966	mutex_unlock(&NFS_I(inode)->commit_mutex);
				967	nfs_clear_page_commit(req->wb_page);
				968	}
				969	}
				970
				971	int nfs_write_need_commit(struct nfs_pgio_header *hdr)
				972	{
				973	if (hdr->verf.committed == NFS_DATA_SYNC)
				974	return hdr->lseg == NULL;
				975	return hdr->verf.committed != NFS_FILE_SYNC;
				976	}
				977
				978	static void nfs_async_write_init(struct nfs_pgio_header *hdr)
				979	{
				980	nfs_io_completion_get(hdr->io_completion);
				981	}
				982
				983	static void nfs_write_completion(struct nfs_pgio_header *hdr)
				984	{
				985	struct nfs_commit_info cinfo;
				986	unsigned long bytes = 0;
				987
				988	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
				989	goto out;
				990	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
				991	while (!list_empty(&hdr->pages)) {
				992	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
				993
				994	bytes += req->wb_bytes;
				995	nfs_list_remove_request(req);
				996	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
				997	(hdr->good_bytes < bytes)) {
				998	nfs_set_pageerror(page_file_mapping(req->wb_page));
				999	nfs_context_set_write_error(req->wb_context, hdr->error);
				1000	goto remove_req;
				1001	}
				1002	if (nfs_write_need_commit(hdr)) {
				1003	memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
				1004	nfs_mark_request_commit(req, hdr->lseg, &cinfo,
				1005	hdr->pgio_mirror_idx);
				1006	goto next;
				1007	}
				1008	remove_req:
				1009	nfs_inode_remove_request(req);
				1010	next:
				1011	nfs_end_page_writeback(req);
				1012	nfs_release_request(req);
				1013	}
				1014	out:
				1015	nfs_io_completion_put(hdr->io_completion);
				1016	hdr->release(hdr);
				1017	}
				1018
				1019	unsigned long
				1020	nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
				1021	{
				1022	return atomic_long_read(&cinfo->mds->ncommit);
				1023	}
				1024
				1025	/* NFS_I(cinfo->inode)->commit_mutex held by caller */
				1026	int
				1027	nfs_scan_commit_list(struct list_head src, struct list_head dst,
				1028	struct nfs_commit_info *cinfo, int max)
				1029	{
				1030	struct nfs_page req, tmp;
				1031	int ret = 0;
				1032
				1033	restart:
				1034	list_for_each_entry_safe(req, tmp, src, wb_list) {
				1035	kref_get(&req->wb_kref);
				1036	if (!nfs_lock_request(req)) {
				1037	int status;
				1038
				1039	/* Prevent deadlock with nfs_lock_and_join_requests */
				1040	if (!list_empty(dst)) {
				1041	nfs_release_request(req);
				1042	continue;
				1043	}
				1044	/* Ensure we make progress to prevent livelock */
				1045	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
				1046	status = nfs_wait_on_request(req);
				1047	nfs_release_request(req);
				1048	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
				1049	if (status < 0)
				1050	break;
				1051	goto restart;
				1052	}
				1053	nfs_request_remove_commit_list(req, cinfo);
				1054	clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
				1055	nfs_list_add_request(req, dst);
				1056	ret++;
				1057	if ((ret == max) && !cinfo->dreq)
				1058	break;
				1059	cond_resched();
				1060	}
				1061	return ret;
				1062	}
				1063	EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
				1064
				1065	/*
				1066	* nfs_scan_commit - Scan an inode for commit requests
				1067	* @inode: NFS inode to scan
				1068	* @dst: mds destination list
				1069	* @cinfo: mds and ds lists of reqs ready to commit
				1070	*
				1071	* Moves requests from the inode's 'commit' request list.
				1072	* The requests are not checked to ensure that they form a contiguous set.
				1073	*/
				1074	int
				1075	nfs_scan_commit(struct inode inode, struct list_head dst,
				1076	struct nfs_commit_info *cinfo)
				1077	{
				1078	int ret = 0;
				1079
				1080	if (!atomic_long_read(&cinfo->mds->ncommit))
				1081	return 0;
				1082	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
				1083	if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
				1084	const int max = INT_MAX;
				1085
				1086	ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
				1087	cinfo, max);
				1088	ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
				1089	}
				1090	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
				1091	return ret;
				1092	}
				1093
				1094	/*
				1095	* Search for an existing write request, and attempt to update
				1096	* it to reflect a new dirty region on a given page.
				1097	*
				1098	* If the attempt fails, then the existing request is flushed out
				1099	* to disk.
				1100	*/
				1101	static struct nfs_page nfs_try_to_update_request(struct inode inode,
				1102	struct page *page,
				1103	unsigned int offset,
				1104	unsigned int bytes)
				1105	{
				1106	struct nfs_page *req;
				1107	unsigned int rqend;
				1108	unsigned int end;
				1109	int error;
				1110
				1111	end = offset + bytes;
				1112
				1113	req = nfs_lock_and_join_requests(page);
				1114	if (IS_ERR_OR_NULL(req))
				1115	return req;
				1116
				1117	rqend = req->wb_offset + req->wb_bytes;
				1118	/*
				1119	* Tell the caller to flush out the request if
				1120	* the offsets are non-contiguous.
				1121	* Note: nfs_flush_incompatible() will already
				1122	* have flushed out requests having wrong owners.
				1123	*/
				1124	if (offset > rqend \|\| end < req->wb_offset)
				1125	goto out_flushme;
				1126
				1127	/* Okay, the request matches. Update the region */
				1128	if (offset < req->wb_offset) {
				1129	req->wb_offset = offset;
				1130	req->wb_pgbase = offset;
				1131	}
				1132	if (end > rqend)
				1133	req->wb_bytes = end - req->wb_offset;
				1134	else
				1135	req->wb_bytes = rqend - req->wb_offset;
				1136	return req;
				1137	out_flushme:
				1138	/*
				1139	* Note: we mark the request dirty here because
				1140	* nfs_lock_and_join_requests() cannot preserve
				1141	* commit flags, so we have to replay the write.
				1142	*/
				1143	nfs_mark_request_dirty(req);
				1144	nfs_unlock_and_release_request(req);
				1145	error = nfs_wb_page(inode, page);
				1146	return (error < 0) ? ERR_PTR(error) : NULL;
				1147	}
				1148
				1149	/*
				1150	* Try to update an existing write request, or create one if there is none.
				1151	*
				1152	* Note: Should always be called with the Page Lock held to prevent races
				1153	* if we have to add a new request. Also assumes that the caller has
				1154	* already called nfs_flush_incompatible() if necessary.
				1155	*/
				1156	static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
				1157	struct page *page, unsigned int offset, unsigned int bytes)
				1158	{
				1159	struct inode *inode = page_file_mapping(page)->host;
				1160	struct nfs_page *req;
				1161
				1162	req = nfs_try_to_update_request(inode, page, offset, bytes);
				1163	if (req != NULL)
				1164	goto out;
				1165	req = nfs_create_request(ctx, page, NULL, offset, bytes);
				1166	if (IS_ERR(req))
				1167	goto out;
				1168	nfs_inode_add_request(inode, req);
				1169	out:
				1170	return req;
				1171	}
				1172
				1173	static int nfs_writepage_setup(struct nfs_open_context ctx, struct page page,
				1174	unsigned int offset, unsigned int count)
				1175	{
				1176	struct nfs_page *req;
				1177
				1178	req = nfs_setup_write_request(ctx, page, offset, count);
				1179	if (IS_ERR(req))
				1180	return PTR_ERR(req);
				1181	/* Update file length */
				1182	nfs_grow_file(page, offset, count);
				1183	nfs_mark_uptodate(req);
				1184	nfs_mark_request_dirty(req);
				1185	nfs_unlock_and_release_request(req);
				1186	return 0;
				1187	}
				1188
				1189	int nfs_flush_incompatible(struct file file, struct page page)
				1190	{
				1191	struct nfs_open_context *ctx = nfs_file_open_context(file);
				1192	struct nfs_lock_context *l_ctx;
				1193	struct file_lock_context *flctx = file_inode(file)->i_flctx;
				1194	struct nfs_page *req;
				1195	int do_flush, status;
				1196	/*
				1197	* Look for a request corresponding to this page. If there
				1198	* is one, and it belongs to another file, we flush it out
				1199	* before we try to copy anything into the page. Do this
				1200	* due to the lack of an ACCESS-type call in NFSv2.
				1201	* Also do the same if we find a request from an existing
				1202	* dropped page.
				1203	*/
				1204	do {
				1205	req = nfs_page_find_head_request(page);
				1206	if (req == NULL)
				1207	return 0;
				1208	l_ctx = req->wb_lock_context;
				1209	do_flush = req->wb_page != page \|\|
				1210	!nfs_match_open_context(req->wb_context, ctx);
				1211	if (l_ctx && flctx &&
				1212	!(list_empty_careful(&flctx->flc_posix) &&
				1213	list_empty_careful(&flctx->flc_flock))) {
				1214	do_flush \|= l_ctx->lockowner != current->files;
				1215	}
				1216	nfs_release_request(req);
				1217	if (!do_flush)
				1218	return 0;
				1219	status = nfs_wb_page(page_file_mapping(page)->host, page);
				1220	} while (status == 0);
				1221	return status;
				1222	}
				1223
				1224	/*
				1225	* Avoid buffered writes when a open context credential's key would
				1226	* expire soon.
				1227	*
				1228	* Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
				1229	*
				1230	* Return 0 and set a credential flag which triggers the inode to flush
				1231	* and performs NFS_FILE_SYNC writes if the key will expired within
				1232	* RPC_KEY_EXPIRE_TIMEO.
				1233	*/
				1234	int
				1235	nfs_key_timeout_notify(struct file filp, struct inode inode)
				1236	{
				1237	struct nfs_open_context *ctx = nfs_file_open_context(filp);
				1238	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
				1239
				1240	return rpcauth_key_timeout_notify(auth, ctx->cred);
				1241	}
				1242
				1243	/*
				1244	* Test if the open context credential key is marked to expire soon.
				1245	*/
				1246	bool nfs_ctx_key_to_expire(struct nfs_open_context ctx, struct inode inode)
				1247	{
				1248	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
				1249
				1250	return rpcauth_cred_key_to_expire(auth, ctx->cred);
				1251	}
				1252
				1253	/*
				1254	* If the page cache is marked as unsafe or invalid, then we can't rely on
				1255	* the PageUptodate() flag. In this case, we will need to turn off
				1256	* write optimisations that depend on the page contents being correct.
				1257	*/
				1258	static bool nfs_write_pageuptodate(struct page page, struct inode inode)
				1259	{
				1260	struct nfs_inode *nfsi = NFS_I(inode);
				1261
				1262	if (nfs_have_delegated_attributes(inode))
				1263	goto out;
				1264	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
				1265	return false;
				1266	smp_rmb();
				1267	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
				1268	return false;
				1269	out:
				1270	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
				1271	return false;
				1272	return PageUptodate(page) != 0;
				1273	}
				1274
				1275	static bool
				1276	is_whole_file_wrlock(struct file_lock *fl)
				1277	{
				1278	return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
				1279	fl->fl_type == F_WRLCK;
				1280	}
				1281
				1282	/* If we know the page is up to date, and we're not using byte range locks (or
				1283	* if we have the whole file locked for writing), it may be more efficient to
				1284	* extend the write to cover the entire page in order to avoid fragmentation
				1285	* inefficiencies.
				1286	*
				1287	* If the file is opened for synchronous writes then we can just skip the rest
				1288	* of the checks.
				1289	*/
				1290	static int nfs_can_extend_write(struct file file, struct page page, struct inode *inode)
				1291	{
				1292	int ret;
				1293	struct file_lock_context *flctx = inode->i_flctx;
				1294	struct file_lock *fl;
				1295
				1296	if (file->f_flags & O_DSYNC)
				1297	return 0;
				1298	if (!nfs_write_pageuptodate(page, inode))
				1299	return 0;
				1300	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
				1301	return 1;
				1302	if (!flctx \|\| (list_empty_careful(&flctx->flc_flock) &&
				1303	list_empty_careful(&flctx->flc_posix)))
				1304	return 1;
				1305
				1306	/* Check to see if there are whole file write locks */
				1307	ret = 0;
				1308	spin_lock(&flctx->flc_lock);
				1309	if (!list_empty(&flctx->flc_posix)) {
				1310	fl = list_first_entry(&flctx->flc_posix, struct file_lock,
				1311	fl_list);
				1312	if (is_whole_file_wrlock(fl))
				1313	ret = 1;
				1314	} else if (!list_empty(&flctx->flc_flock)) {
				1315	fl = list_first_entry(&flctx->flc_flock, struct file_lock,
				1316	fl_list);
				1317	if (fl->fl_type == F_WRLCK)
				1318	ret = 1;
				1319	}
				1320	spin_unlock(&flctx->flc_lock);
				1321	return ret;
				1322	}
				1323
				1324	/*
				1325	* Update and possibly write a cached page of an NFS file.
				1326	*
				1327	* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
				1328	* things with a page scheduled for an RPC call (e.g. invalidate it).
				1329	*/
				1330	int nfs_updatepage(struct file file, struct page page,
				1331	unsigned int offset, unsigned int count)
				1332	{
				1333	struct nfs_open_context *ctx = nfs_file_open_context(file);
				1334	struct address_space *mapping = page_file_mapping(page);
				1335	struct inode *inode = mapping->host;
				1336	int status = 0;
				1337
				1338	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
				1339
				1340	dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
				1341	file, count, (long long)(page_file_offset(page) + offset));
				1342
				1343	if (!count)
				1344	goto out;
				1345
				1346	if (nfs_can_extend_write(file, page, inode)) {
				1347	count = max(count + offset, nfs_page_length(page));
				1348	offset = 0;
				1349	}
				1350
				1351	status = nfs_writepage_setup(ctx, page, offset, count);
				1352	if (status < 0)
				1353	nfs_set_pageerror(mapping);
				1354	else
				1355	__set_page_dirty_nobuffers(page);
				1356	out:
				1357	dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
				1358	status, (long long)i_size_read(inode));
				1359	return status;
				1360	}
				1361
				1362	static int flush_task_priority(int how)
				1363	{
				1364	switch (how & (FLUSH_HIGHPRI\|FLUSH_LOWPRI)) {
				1365	case FLUSH_HIGHPRI:
				1366	return RPC_PRIORITY_HIGH;
				1367	case FLUSH_LOWPRI:
				1368	return RPC_PRIORITY_LOW;
				1369	}
				1370	return RPC_PRIORITY_NORMAL;
				1371	}
				1372
				1373	static void nfs_initiate_write(struct nfs_pgio_header *hdr,
				1374	struct rpc_message *msg,
				1375	const struct nfs_rpc_ops *rpc_ops,
				1376	struct rpc_task_setup *task_setup_data, int how)
				1377	{
				1378	int priority = flush_task_priority(how);
				1379
				1380	task_setup_data->priority = priority;
				1381	rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
				1382	trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
				1383	hdr->args.stable);
				1384	}
				1385
				1386	/* If a nfs_flush_* function fails, it should remove reqs from @head and
				1387	* call this on each, which will prepare them to be retried on next
				1388	* writeback using standard nfs.
				1389	*/
				1390	static void nfs_redirty_request(struct nfs_page *req)
				1391	{
				1392	nfs_mark_request_dirty(req);
				1393	set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
				1394	nfs_end_page_writeback(req);
				1395	nfs_release_request(req);
				1396	}
				1397
				1398	static void nfs_async_write_error(struct list_head *head, int error)
				1399	{
				1400	struct nfs_page *req;
				1401
				1402	while (!list_empty(head)) {
				1403	req = nfs_list_entry(head->next);
				1404	nfs_list_remove_request(req);
				1405	if (nfs_error_is_fatal(error)) {
				1406	nfs_context_set_write_error(req->wb_context, error);
				1407	if (nfs_error_is_fatal_on_server(error)) {
				1408	nfs_write_error_remove_page(req);
				1409	continue;
				1410	}
				1411	}
				1412	nfs_redirty_request(req);
				1413	}
				1414	}
				1415
				1416	static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
				1417	{
				1418	nfs_async_write_error(&hdr->pages, 0);
				1419	filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
				1420	hdr->args.offset + hdr->args.count - 1);
				1421	}
				1422
				1423	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
				1424	.init_hdr = nfs_async_write_init,
				1425	.error_cleanup = nfs_async_write_error,
				1426	.completion = nfs_write_completion,
				1427	.reschedule_io = nfs_async_write_reschedule_io,
				1428	};
				1429
				1430	void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
				1431	struct inode *inode, int ioflags, bool force_mds,
				1432	const struct nfs_pgio_completion_ops *compl_ops)
				1433	{
				1434	struct nfs_server *server = NFS_SERVER(inode);
				1435	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
				1436
				1437	#ifdef CONFIG_NFS_V4_1
				1438	if (server->pnfs_curr_ld && !force_mds)
				1439	pg_ops = server->pnfs_curr_ld->pg_write_ops;
				1440	#endif
				1441	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
				1442	server->wsize, ioflags);
				1443	}
				1444	EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
				1445
				1446	void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
				1447	{
				1448	struct nfs_pgio_mirror *mirror;
				1449
				1450	if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
				1451	pgio->pg_ops->pg_cleanup(pgio);
				1452
				1453	pgio->pg_ops = &nfs_pgio_rw_ops;
				1454
				1455	nfs_pageio_stop_mirroring(pgio);
				1456
				1457	mirror = &pgio->pg_mirrors[0];
				1458	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
				1459	}
				1460	EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
				1461
				1462
				1463	void nfs_commit_prepare(struct rpc_task task, void calldata)
				1464	{
				1465	struct nfs_commit_data *data = calldata;
				1466
				1467	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
				1468	}
				1469
				1470	/*
				1471	* Special version of should_remove_suid() that ignores capabilities.
				1472	*/
				1473	static int nfs_should_remove_suid(const struct inode *inode)
				1474	{
				1475	umode_t mode = inode->i_mode;
				1476	int kill = 0;
				1477
				1478	/* suid always must be killed */
				1479	if (unlikely(mode & S_ISUID))
				1480	kill = ATTR_KILL_SUID;
				1481
				1482	/*
				1483	* sgid without any exec bits is just a mandatory locking mark; leave
				1484	* it alone. If some exec bits are set, it's a real sgid; kill it.
				1485	*/
				1486	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
				1487	kill \|= ATTR_KILL_SGID;
				1488
				1489	if (unlikely(kill && S_ISREG(mode)))
				1490	return kill;
				1491
				1492	return 0;
				1493	}
				1494
				1495	static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
				1496	struct nfs_fattr *fattr)
				1497	{
				1498	struct nfs_pgio_args *argp = &hdr->args;
				1499	struct nfs_pgio_res *resp = &hdr->res;
				1500	u64 size = argp->offset + resp->count;
				1501
				1502	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
				1503	fattr->size = size;
				1504	if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
				1505	fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
				1506	return;
				1507	}
				1508	if (size != fattr->size)
				1509	return;
				1510	/* Set attribute barrier */
				1511	nfs_fattr_set_barrier(fattr);
				1512	/* ...and update size */
				1513	fattr->valid \|= NFS_ATTR_FATTR_SIZE;
				1514	}
				1515
				1516	void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
				1517	{
				1518	struct nfs_fattr *fattr = &hdr->fattr;
				1519	struct inode *inode = hdr->inode;
				1520
				1521	spin_lock(&inode->i_lock);
				1522	nfs_writeback_check_extend(hdr, fattr);
				1523	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
				1524	spin_unlock(&inode->i_lock);
				1525	}
				1526	EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
				1527
				1528	/*
				1529	* This function is called when the WRITE call is complete.
				1530	*/
				1531	static int nfs_writeback_done(struct rpc_task *task,
				1532	struct nfs_pgio_header *hdr,
				1533	struct inode *inode)
				1534	{
				1535	int status;
				1536
				1537	/*
				1538	* ->write_done will attempt to use post-op attributes to detect
				1539	* conflicting writes by other clients. A strict interpretation
				1540	* of close-to-open would allow us to continue caching even if
				1541	* another writer had changed the file, but some applications
				1542	* depend on tighter cache coherency when writing.
				1543	*/
				1544	status = NFS_PROTO(inode)->write_done(task, hdr);
				1545	if (status != 0)
				1546	return status;
				1547
				1548	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
				1549	trace_nfs_writeback_done(inode, task->tk_status,
				1550	hdr->args.offset, hdr->res.verf);
				1551
				1552	if (hdr->res.verf->committed < hdr->args.stable &&
				1553	task->tk_status >= 0) {
				1554	/* We tried a write call, but the server did not
				1555	* commit data to stable storage even though we
				1556	* requested it.
				1557	* Note: There is a known bug in Tru64 < 5.0 in which
				1558	* the server reports NFS_DATA_SYNC, but performs
				1559	* NFS_FILE_SYNC. We therefore implement this checking
				1560	* as a dprintk() in order to avoid filling syslog.
				1561	*/
				1562	static unsigned long complain;
				1563
				1564	/* Note this will print the MDS for a DS write */
				1565	if (time_before(complain, jiffies)) {
				1566	dprintk("NFS: faulty NFS server %s:"
				1567	" (committed = %d) != (stable = %d)\n",
				1568	NFS_SERVER(inode)->nfs_client->cl_hostname,
				1569	hdr->res.verf->committed, hdr->args.stable);
				1570	complain = jiffies + 300 * HZ;
				1571	}
				1572	}
				1573
				1574	/* Deal with the suid/sgid bit corner case */
				1575	if (nfs_should_remove_suid(inode)) {
				1576	spin_lock(&inode->i_lock);
				1577	NFS_I(inode)->cache_validity \|= NFS_INO_INVALID_OTHER;
				1578	spin_unlock(&inode->i_lock);
				1579	}
				1580	return 0;
				1581	}
				1582
				1583	/*
				1584	* This function is called when the WRITE call is complete.
				1585	*/
				1586	static void nfs_writeback_result(struct rpc_task *task,
				1587	struct nfs_pgio_header *hdr)
				1588	{
				1589	struct nfs_pgio_args *argp = &hdr->args;
				1590	struct nfs_pgio_res *resp = &hdr->res;
				1591
				1592	if (resp->count < argp->count) {
				1593	static unsigned long complain;
				1594
				1595	/* This a short write! */
				1596	nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
				1597
				1598	/* Has the server at least made some progress? */
				1599	if (resp->count == 0) {
				1600	if (time_before(complain, jiffies)) {
				1601	printk(KERN_WARNING
				1602	"NFS: Server wrote zero bytes, expected %u.\n",
				1603	argp->count);
				1604	complain = jiffies + 300 * HZ;
				1605	}
				1606	nfs_set_pgio_error(hdr, -EIO, argp->offset);
				1607	task->tk_status = -EIO;
				1608	return;
				1609	}
				1610
				1611	/* For non rpc-based layout drivers, retry-through-MDS */
				1612	if (!task->tk_ops) {
				1613	hdr->pnfs_error = -EAGAIN;
				1614	return;
				1615	}
				1616
				1617	/* Was this an NFSv2 write or an NFSv3 stable write? */
				1618	if (resp->verf->committed != NFS_UNSTABLE) {
				1619	/* Resend from where the server left off */
				1620	hdr->mds_offset += resp->count;
				1621	argp->offset += resp->count;
				1622	argp->pgbase += resp->count;
				1623	argp->count -= resp->count;
				1624	} else {
				1625	/* Resend as a stable write in order to avoid
				1626	* headaches in the case of a server crash.
				1627	*/
				1628	argp->stable = NFS_FILE_SYNC;
				1629	}
				1630	rpc_restart_call_prepare(task);
				1631	}
				1632	}
				1633
				1634	static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
				1635	{
				1636	return wait_var_event_killable(&cinfo->rpcs_out,
				1637	!atomic_read(&cinfo->rpcs_out));
				1638	}
				1639
				1640	static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
				1641	{
				1642	atomic_inc(&cinfo->rpcs_out);
				1643	}
				1644
				1645	static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
				1646	{
				1647	if (atomic_dec_and_test(&cinfo->rpcs_out))
				1648	wake_up_var(&cinfo->rpcs_out);
				1649	}
				1650
				1651	void nfs_commitdata_release(struct nfs_commit_data *data)
				1652	{
				1653	put_nfs_open_context(data->context);
				1654	nfs_commit_free(data);
				1655	}
				1656	EXPORT_SYMBOL_GPL(nfs_commitdata_release);
				1657
				1658	int nfs_initiate_commit(struct rpc_clnt clnt, struct nfs_commit_data data,
				1659	const struct nfs_rpc_ops *nfs_ops,
				1660	const struct rpc_call_ops *call_ops,
				1661	int how, int flags)
				1662	{
				1663	struct rpc_task *task;
				1664	int priority = flush_task_priority(how);
				1665	struct rpc_message msg = {
				1666	.rpc_argp = &data->args,
				1667	.rpc_resp = &data->res,
				1668	.rpc_cred = data->cred,
				1669	};
				1670	struct rpc_task_setup task_setup_data = {
				1671	.task = &data->task,
				1672	.rpc_client = clnt,
				1673	.rpc_message = &msg,
				1674	.callback_ops = call_ops,
				1675	.callback_data = data,
				1676	.workqueue = nfsiod_workqueue,
				1677	.flags = RPC_TASK_ASYNC \| flags,
				1678	.priority = priority,
				1679	};
				1680	/* Set up the initial task struct. */
				1681	nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
				1682	trace_nfs_initiate_commit(data);
				1683
				1684	dprintk("NFS: initiated commit call\n");
				1685
				1686	task = rpc_run_task(&task_setup_data);
				1687	if (IS_ERR(task))
				1688	return PTR_ERR(task);
				1689	if (how & FLUSH_SYNC)
				1690	rpc_wait_for_completion_task(task);
				1691	rpc_put_task(task);
				1692	return 0;
				1693	}
				1694	EXPORT_SYMBOL_GPL(nfs_initiate_commit);
				1695
				1696	static loff_t nfs_get_lwb(struct list_head *head)
				1697	{
				1698	loff_t lwb = 0;
				1699	struct nfs_page *req;
				1700
				1701	list_for_each_entry(req, head, wb_list)
				1702	if (lwb < (req_offset(req) + req->wb_bytes))
				1703	lwb = req_offset(req) + req->wb_bytes;
				1704
				1705	return lwb;
				1706	}
				1707
				1708	/*
				1709	* Set up the argument/result storage required for the RPC call.
				1710	*/
				1711	void nfs_init_commit(struct nfs_commit_data *data,
				1712	struct list_head *head,
				1713	struct pnfs_layout_segment *lseg,
				1714	struct nfs_commit_info *cinfo)
				1715	{
				1716	struct nfs_page *first = nfs_list_entry(head->next);
				1717	struct inode *inode = d_inode(first->wb_context->dentry);
				1718
				1719	/* Set up the RPC argument and reply structs
				1720	* NB: take care not to mess about with data->commit et al. */
				1721
				1722	list_splice_init(head, &data->pages);
				1723
				1724	data->inode = inode;
				1725	data->cred = first->wb_context->cred;
				1726	data->lseg = lseg; /* reference transferred */
				1727	/* only set lwb for pnfs commit */
				1728	if (lseg)
				1729	data->lwb = nfs_get_lwb(&data->pages);
				1730	data->mds_ops = &nfs_commit_ops;
				1731	data->completion_ops = cinfo->completion_ops;
				1732	data->dreq = cinfo->dreq;
				1733
				1734	data->args.fh = NFS_FH(data->inode);
				1735	/* Note: we always request a commit of the entire inode */
				1736	data->args.offset = 0;
				1737	data->args.count = 0;
				1738	data->context = get_nfs_open_context(first->wb_context);
				1739	data->res.fattr = &data->fattr;
				1740	data->res.verf = &data->verf;
				1741	nfs_fattr_init(&data->fattr);
				1742	}
				1743	EXPORT_SYMBOL_GPL(nfs_init_commit);
				1744
				1745	void nfs_retry_commit(struct list_head *page_list,
				1746	struct pnfs_layout_segment *lseg,
				1747	struct nfs_commit_info *cinfo,
				1748	u32 ds_commit_idx)
				1749	{
				1750	struct nfs_page *req;
				1751
				1752	while (!list_empty(page_list)) {
				1753	req = nfs_list_entry(page_list->next);
				1754	nfs_list_remove_request(req);
				1755	nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
				1756	if (!cinfo->dreq)
				1757	nfs_clear_page_commit(req->wb_page);
				1758	nfs_unlock_and_release_request(req);
				1759	}
				1760	}
				1761	EXPORT_SYMBOL_GPL(nfs_retry_commit);
				1762
				1763	static void
				1764	nfs_commit_resched_write(struct nfs_commit_info *cinfo,
				1765	struct nfs_page *req)
				1766	{
				1767	__set_page_dirty_nobuffers(req->wb_page);
				1768	}
				1769
				1770	/*
				1771	* Commit dirty pages
				1772	*/
				1773	static int
				1774	nfs_commit_list(struct inode inode, struct list_head head, int how,
				1775	struct nfs_commit_info *cinfo)
				1776	{
				1777	struct nfs_commit_data *data;
				1778
				1779	/* another commit raced with us */
				1780	if (list_empty(head))
				1781	return 0;
				1782
				1783	data = nfs_commitdata_alloc(true);
				1784
				1785	/* Set up the argument struct */
				1786	nfs_init_commit(data, head, NULL, cinfo);
				1787	atomic_inc(&cinfo->mds->rpcs_out);
				1788	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
				1789	data->mds_ops, how, 0);
				1790	}
				1791
				1792	/*
				1793	* COMMIT call returned
				1794	*/
				1795	static void nfs_commit_done(struct rpc_task task, void calldata)
				1796	{
				1797	struct nfs_commit_data *data = calldata;
				1798
				1799	dprintk("NFS: %5u nfs_commit_done (status %d)\n",
				1800	task->tk_pid, task->tk_status);
				1801
				1802	/* Call the NFS version-specific code */
				1803	NFS_PROTO(data->inode)->commit_done(task, data);
				1804	trace_nfs_commit_done(data);
				1805	}
				1806
				1807	static void nfs_commit_release_pages(struct nfs_commit_data *data)
				1808	{
				1809	struct nfs_page *req;
				1810	int status = data->task.tk_status;
				1811	struct nfs_commit_info cinfo;
				1812	struct nfs_server *nfss;
				1813
				1814	while (!list_empty(&data->pages)) {
				1815	req = nfs_list_entry(data->pages.next);
				1816	nfs_list_remove_request(req);
				1817	if (req->wb_page)
				1818	nfs_clear_page_commit(req->wb_page);
				1819
				1820	dprintk("NFS: commit (%s/%llu %d@%lld)",
				1821	req->wb_context->dentry->d_sb->s_id,
				1822	(unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
				1823	req->wb_bytes,
				1824	(long long)req_offset(req));
				1825	if (status < 0) {
				1826	nfs_context_set_write_error(req->wb_context, status);
				1827	if (req->wb_page)
				1828	nfs_inode_remove_request(req);
				1829	dprintk_cont(", error = %d\n", status);
				1830	goto next;
				1831	}
				1832
				1833	/* Okay, COMMIT succeeded, apparently. Check the verifier
				1834	* returned by the server against all stored verfs. */
				1835	if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
				1836	/* We have a match */
				1837	if (req->wb_page)
				1838	nfs_inode_remove_request(req);
				1839	dprintk_cont(" OK\n");
				1840	goto next;
				1841	}
				1842	/* We have a mismatch. Write the page again */
				1843	dprintk_cont(" mismatch\n");
				1844	nfs_mark_request_dirty(req);
				1845	set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
				1846	next:
				1847	nfs_unlock_and_release_request(req);
				1848	/* Latency breaker */
				1849	cond_resched();
				1850	}
				1851	nfss = NFS_SERVER(data->inode);
				1852	if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
				1853	clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
				1854
				1855	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
				1856	nfs_commit_end(cinfo.mds);
				1857	}
				1858
				1859	static void nfs_commit_release(void *calldata)
				1860	{
				1861	struct nfs_commit_data *data = calldata;
				1862
				1863	data->completion_ops->completion(data);
				1864	nfs_commitdata_release(calldata);
				1865	}
				1866
				1867	static const struct rpc_call_ops nfs_commit_ops = {
				1868	.rpc_call_prepare = nfs_commit_prepare,
				1869	.rpc_call_done = nfs_commit_done,
				1870	.rpc_release = nfs_commit_release,
				1871	};
				1872
				1873	static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
				1874	.completion = nfs_commit_release_pages,
				1875	.resched_write = nfs_commit_resched_write,
				1876	};
				1877
				1878	int nfs_generic_commit_list(struct inode inode, struct list_head head,
				1879	int how, struct nfs_commit_info *cinfo)
				1880	{
				1881	int status;
				1882
				1883	status = pnfs_commit_list(inode, head, how, cinfo);
				1884	if (status == PNFS_NOT_ATTEMPTED)
				1885	status = nfs_commit_list(inode, head, how, cinfo);
				1886	return status;
				1887	}
				1888
				1889	static int __nfs_commit_inode(struct inode *inode, int how,
				1890	struct writeback_control *wbc)
				1891	{
				1892	LIST_HEAD(head);
				1893	struct nfs_commit_info cinfo;
				1894	int may_wait = how & FLUSH_SYNC;
				1895	int ret, nscan;
				1896
				1897	nfs_init_cinfo_from_inode(&cinfo, inode);
				1898	nfs_commit_begin(cinfo.mds);
				1899	for (;;) {
				1900	ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
				1901	if (ret <= 0)
				1902	break;
				1903	ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
				1904	if (ret < 0)
				1905	break;
				1906	ret = 0;
				1907	if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
				1908	if (nscan < wbc->nr_to_write)
				1909	wbc->nr_to_write -= nscan;
				1910	else
				1911	wbc->nr_to_write = 0;
				1912	}
				1913	if (nscan < INT_MAX)
				1914	break;
				1915	cond_resched();
				1916	}
				1917	nfs_commit_end(cinfo.mds);
				1918	if (ret \|\| !may_wait)
				1919	return ret;
				1920	return wait_on_commit(cinfo.mds);
				1921	}
				1922
				1923	int nfs_commit_inode(struct inode *inode, int how)
				1924	{
				1925	return __nfs_commit_inode(inode, how, NULL);
				1926	}
				1927	EXPORT_SYMBOL_GPL(nfs_commit_inode);
				1928
				1929	int nfs_write_inode(struct inode inode, struct writeback_control wbc)
				1930	{
				1931	struct nfs_inode *nfsi = NFS_I(inode);
				1932	int flags = FLUSH_SYNC;
				1933	int ret = 0;
				1934
				1935	if (wbc->sync_mode == WB_SYNC_NONE) {
				1936	/* no commits means nothing needs to be done */
				1937	if (!atomic_long_read(&nfsi->commit_info.ncommit))
				1938	goto check_requests_outstanding;
				1939
				1940	/* Don't commit yet if this is a non-blocking flush and there
				1941	* are a lot of outstanding writes for this mapping.
				1942	*/
				1943	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
				1944	goto out_mark_dirty;
				1945
				1946	/* don't wait for the COMMIT response */
				1947	flags = 0;
				1948	}
				1949
				1950	ret = __nfs_commit_inode(inode, flags, wbc);
				1951	if (!ret) {
				1952	if (flags & FLUSH_SYNC)
				1953	return 0;
				1954	} else if (atomic_long_read(&nfsi->commit_info.ncommit))
				1955	goto out_mark_dirty;
				1956
				1957	check_requests_outstanding:
				1958	if (!atomic_read(&nfsi->commit_info.rpcs_out))
				1959	return ret;
				1960	out_mark_dirty:
				1961	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
				1962	return ret;
				1963	}
				1964	EXPORT_SYMBOL_GPL(nfs_write_inode);
				1965
				1966	/*
				1967	* Wrapper for filemap_write_and_wait_range()
				1968	*
				1969	* Needed for pNFS in order to ensure data becomes visible to the
				1970	* client.
				1971	*/
				1972	int nfs_filemap_write_and_wait_range(struct address_space *mapping,
				1973	loff_t lstart, loff_t lend)
				1974	{
				1975	int ret;
				1976
				1977	ret = filemap_write_and_wait_range(mapping, lstart, lend);
				1978	if (ret == 0)
				1979	ret = pnfs_sync_inode(mapping->host, true);
				1980	return ret;
				1981	}
				1982	EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
				1983
				1984	/*
				1985	* flush the inode to disk.
				1986	*/
				1987	int nfs_wb_all(struct inode *inode)
				1988	{
				1989	int ret;
				1990
				1991	trace_nfs_writeback_inode_enter(inode);
				1992
				1993	ret = filemap_write_and_wait(inode->i_mapping);
				1994	if (ret)
				1995	goto out;
				1996	ret = nfs_commit_inode(inode, FLUSH_SYNC);
				1997	if (ret < 0)
				1998	goto out;
				1999	pnfs_sync_inode(inode, true);
				2000	ret = 0;
				2001
				2002	out:
				2003	trace_nfs_writeback_inode_exit(inode, ret);
				2004	return ret;
				2005	}
				2006	EXPORT_SYMBOL_GPL(nfs_wb_all);
				2007
				2008	int nfs_wb_page_cancel(struct inode inode, struct page page)
				2009	{
				2010	struct nfs_page *req;
				2011	int ret = 0;
				2012
				2013	wait_on_page_writeback(page);
				2014
				2015	/* blocking call to cancel all requests and join to a single (head)
				2016	* request */
				2017	req = nfs_lock_and_join_requests(page);
				2018
				2019	if (IS_ERR(req)) {
				2020	ret = PTR_ERR(req);
				2021	} else if (req) {
				2022	/* all requests from this page have been cancelled by
				2023	* nfs_lock_and_join_requests, so just remove the head
				2024	* request from the inode / page_private pointer and
				2025	* release it */
				2026	nfs_inode_remove_request(req);
				2027	nfs_unlock_and_release_request(req);
				2028	}
				2029
				2030	return ret;
				2031	}
				2032
				2033	/*
				2034	* Write back all requests on one page - we do this before reading it.
				2035	*/
				2036	int nfs_wb_page(struct inode inode, struct page page)
				2037	{
				2038	loff_t range_start = page_file_offset(page);
				2039	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
				2040	struct writeback_control wbc = {
				2041	.sync_mode = WB_SYNC_ALL,
				2042	.nr_to_write = 0,
				2043	.range_start = range_start,
				2044	.range_end = range_end,
				2045	};
				2046	int ret;
				2047
				2048	trace_nfs_writeback_page_enter(inode);
				2049
				2050	for (;;) {
				2051	wait_on_page_writeback(page);
				2052	if (clear_page_dirty_for_io(page)) {
				2053	ret = nfs_writepage_locked(page, &wbc);
				2054	if (ret < 0)
				2055	goto out_error;
				2056	continue;
				2057	}
				2058	ret = 0;
				2059	if (!PagePrivate(page))
				2060	break;
				2061	ret = nfs_commit_inode(inode, FLUSH_SYNC);
				2062	if (ret < 0)
				2063	goto out_error;
				2064	}
				2065	out_error:
				2066	trace_nfs_writeback_page_exit(inode, ret);
				2067	return ret;
				2068	}
				2069
				2070	#ifdef CONFIG_MIGRATION
				2071	int nfs_migrate_page(struct address_space mapping, struct page newpage,
				2072	struct page *page, enum migrate_mode mode)
				2073	{
				2074	/*
				2075	* If PagePrivate is set, then the page is currently associated with
				2076	* an in-progress read or write request. Don't try to migrate it.
				2077	*
				2078	* FIXME: we could do this in principle, but we'll need a way to ensure
				2079	* that we can safely release the inode reference while holding
				2080	* the page lock.
				2081	*/
				2082	if (PagePrivate(page))
				2083	return -EBUSY;
				2084
				2085	if (!nfs_fscache_release_page(page, GFP_KERNEL))
				2086	return -EBUSY;
				2087
				2088	return migrate_page(mapping, newpage, page, mode);
				2089	}
				2090	#endif
				2091
				2092	int __init nfs_init_writepagecache(void)
				2093	{
				2094	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
				2095	sizeof(struct nfs_pgio_header),
				2096	0, SLAB_HWCACHE_ALIGN,
				2097	NULL);
				2098	if (nfs_wdata_cachep == NULL)
				2099	return -ENOMEM;
				2100
				2101	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
				2102	nfs_wdata_cachep);
				2103	if (nfs_wdata_mempool == NULL)
				2104	goto out_destroy_write_cache;
				2105
				2106	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
				2107	sizeof(struct nfs_commit_data),
				2108	0, SLAB_HWCACHE_ALIGN,
				2109	NULL);
				2110	if (nfs_cdata_cachep == NULL)
				2111	goto out_destroy_write_mempool;
				2112
				2113	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
				2114	nfs_cdata_cachep);
				2115	if (nfs_commit_mempool == NULL)
				2116	goto out_destroy_commit_cache;
				2117
				2118	/*
				2119	* NFS congestion size, scale with available memory.
				2120	*
				2121	* 64MB: 8192k
				2122	* 128MB: 11585k
				2123	* 256MB: 16384k
				2124	* 512MB: 23170k
				2125	* 1GB: 32768k
				2126	* 2GB: 46340k
				2127	* 4GB: 65536k
				2128	* 8GB: 92681k
				2129	* 16GB: 131072k
				2130	*
				2131	* This allows larger machines to have larger/more transfers.
				2132	* Limit the default to 256M
				2133	*/
				2134	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
				2135	if (nfs_congestion_kb > 256*1024)
				2136	nfs_congestion_kb = 256*1024;
				2137
				2138	return 0;
				2139
				2140	out_destroy_commit_cache:
				2141	kmem_cache_destroy(nfs_cdata_cachep);
				2142	out_destroy_write_mempool:
				2143	mempool_destroy(nfs_wdata_mempool);
				2144	out_destroy_write_cache:
				2145	kmem_cache_destroy(nfs_wdata_cachep);
				2146	return -ENOMEM;
				2147	}
				2148
				2149	void nfs_destroy_writepagecache(void)
				2150	{
				2151	mempool_destroy(nfs_commit_mempool);
				2152	kmem_cache_destroy(nfs_cdata_cachep);
				2153	mempool_destroy(nfs_wdata_mempool);
				2154	kmem_cache_destroy(nfs_wdata_cachep);
				2155	}
				2156
				2157	static const struct nfs_rw_ops nfs_rw_write_ops = {
				2158	.rw_alloc_header = nfs_writehdr_alloc,
				2159	.rw_free_header = nfs_writehdr_free,
				2160	.rw_done = nfs_writeback_done,
				2161	.rw_result = nfs_writeback_result,
				2162	.rw_initiate = nfs_initiate_write,
				2163	};