Blame - marvell/linux/fs/nfs/write.c - T108

blob: 10ce264a64567e70b637e12fac4e8736efc9e9ba [file] [log] [blame]

b.liu	e958203	2025-04-17 19:18:16 +0800	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* linux/fs/nfs/write.c
				4	*
				5	* Write file data over NFS.
				6	*
				7	* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
				8	*/
				9
				10	#include <linux/types.h>
				11	#include <linux/slab.h>
				12	#include <linux/mm.h>
				13	#include <linux/pagemap.h>
				14	#include <linux/file.h>
				15	#include <linux/writeback.h>
				16	#include <linux/swap.h>
				17	#include <linux/migrate.h>
				18
				19	#include <linux/sunrpc/clnt.h>
				20	#include <linux/nfs_fs.h>
				21	#include <linux/nfs_mount.h>
				22	#include <linux/nfs_page.h>
				23	#include <linux/backing-dev.h>
				24	#include <linux/export.h>
				25	#include <linux/freezer.h>
				26	#include <linux/wait.h>
				27	#include <linux/iversion.h>
				28
				29	#include <linux/uaccess.h>
				30	#include <linux/sched/mm.h>
				31
				32	#include "delegation.h"
				33	#include "internal.h"
				34	#include "iostat.h"
				35	#include "nfs4_fs.h"
				36	#include "fscache.h"
				37	#include "pnfs.h"
				38
				39	#include "nfstrace.h"
				40
				41	#define NFSDBG_FACILITY NFSDBG_PAGECACHE
				42
				43	#define MIN_POOL_WRITE (32)
				44	#define MIN_POOL_COMMIT (4)
				45
				46	struct nfs_io_completion {
				47	void (complete)(void data);
				48	void *data;
				49	struct kref refcount;
				50	};
				51
				52	/*
				53	* Local function declarations
				54	*/
				55	static void nfs_redirty_request(struct nfs_page *req);
				56	static const struct rpc_call_ops nfs_commit_ops;
				57	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
				58	static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
				59	static const struct nfs_rw_ops nfs_rw_write_ops;
				60	static void nfs_inode_remove_request(struct nfs_page *req);
				61	static void nfs_clear_request_commit(struct nfs_page *req);
				62	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				63	struct inode *inode);
				64	static struct nfs_page *
				65	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
				66	struct page *page);
				67
				68	static struct kmem_cache *nfs_wdata_cachep;
				69	static mempool_t *nfs_wdata_mempool;
				70	static struct kmem_cache *nfs_cdata_cachep;
				71	static mempool_t *nfs_commit_mempool;
				72
				73	struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
				74	{
				75	struct nfs_commit_data *p;
				76
				77	if (never_fail)
				78	p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
				79	else {
				80	/* It is OK to do some reclaim, not no safe to wait
				81	* for anything to be returned to the pool.
				82	* mempool_alloc() cannot handle that particular combination,
				83	* so we need two separate attempts.
				84	*/
				85	p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
				86	if (!p)
				87	p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO \|
				88	__GFP_NOWARN \| __GFP_NORETRY);
				89	if (!p)
				90	return NULL;
				91	}
				92
				93	memset(p, 0, sizeof(*p));
				94	INIT_LIST_HEAD(&p->pages);
				95	return p;
				96	}
				97	EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
				98
				99	void nfs_commit_free(struct nfs_commit_data *p)
				100	{
				101	mempool_free(p, nfs_commit_mempool);
				102	}
				103	EXPORT_SYMBOL_GPL(nfs_commit_free);
				104
				105	static struct nfs_pgio_header *nfs_writehdr_alloc(void)
				106	{
				107	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL);
				108
				109	memset(p, 0, sizeof(*p));
				110	p->rw_mode = FMODE_WRITE;
				111	return p;
				112	}
				113
				114	static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
				115	{
				116	mempool_free(hdr, nfs_wdata_mempool);
				117	}
				118
				119	static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
				120	{
				121	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
				122	}
				123
				124	static void nfs_io_completion_init(struct nfs_io_completion *ioc,
				125	void (complete)(void ), void *data)
				126	{
				127	ioc->complete = complete;
				128	ioc->data = data;
				129	kref_init(&ioc->refcount);
				130	}
				131
				132	static void nfs_io_completion_release(struct kref *kref)
				133	{
				134	struct nfs_io_completion *ioc = container_of(kref,
				135	struct nfs_io_completion, refcount);
				136	ioc->complete(ioc->data);
				137	kfree(ioc);
				138	}
				139
				140	static void nfs_io_completion_get(struct nfs_io_completion *ioc)
				141	{
				142	if (ioc != NULL)
				143	kref_get(&ioc->refcount);
				144	}
				145
				146	static void nfs_io_completion_put(struct nfs_io_completion *ioc)
				147	{
				148	if (ioc != NULL)
				149	kref_put(&ioc->refcount, nfs_io_completion_release);
				150	}
				151
				152	static struct nfs_page *
				153	nfs_page_private_request(struct page *page)
				154	{
				155	if (!PagePrivate(page))
				156	return NULL;
				157	return (struct nfs_page *)page_private(page);
				158	}
				159
				160	/*
				161	* nfs_page_find_head_request_locked - find head request associated with @page
				162	*
				163	* must be called while holding the inode lock.
				164	*
				165	* returns matching head request with reference held, or NULL if not found.
				166	*/
				167	static struct nfs_page *
				168	nfs_page_find_private_request(struct page *page)
				169	{
				170	struct address_space *mapping = page_file_mapping(page);
				171	struct nfs_page *req;
				172
				173	if (!PagePrivate(page))
				174	return NULL;
				175	spin_lock(&mapping->private_lock);
				176	req = nfs_page_private_request(page);
				177	if (req) {
				178	WARN_ON_ONCE(req->wb_head != req);
				179	kref_get(&req->wb_kref);
				180	}
				181	spin_unlock(&mapping->private_lock);
				182	return req;
				183	}
				184
				185	static struct nfs_page *
				186	nfs_page_find_swap_request(struct page *page)
				187	{
				188	struct inode *inode = page_file_mapping(page)->host;
				189	struct nfs_inode *nfsi = NFS_I(inode);
				190	struct nfs_page *req = NULL;
				191	if (!PageSwapCache(page))
				192	return NULL;
				193	mutex_lock(&nfsi->commit_mutex);
				194	if (PageSwapCache(page)) {
				195	req = nfs_page_search_commits_for_head_request_locked(nfsi,
				196	page);
				197	if (req) {
				198	WARN_ON_ONCE(req->wb_head != req);
				199	kref_get(&req->wb_kref);
				200	}
				201	}
				202	mutex_unlock(&nfsi->commit_mutex);
				203	return req;
				204	}
				205
				206	/*
				207	* nfs_page_find_head_request - find head request associated with @page
				208	*
				209	* returns matching head request with reference held, or NULL if not found.
				210	*/
				211	static struct nfs_page nfs_page_find_head_request(struct page page)
				212	{
				213	struct nfs_page *req;
				214
				215	req = nfs_page_find_private_request(page);
				216	if (!req)
				217	req = nfs_page_find_swap_request(page);
				218	return req;
				219	}
				220
				221	/* Adjust the file length if we're writing beyond the end */
				222	static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
				223	{
				224	struct inode *inode = page_file_mapping(page)->host;
				225	loff_t end, i_size;
				226	pgoff_t end_index;
				227
				228	spin_lock(&inode->i_lock);
				229	i_size = i_size_read(inode);
				230	end_index = (i_size - 1) >> PAGE_SHIFT;
				231	if (i_size > 0 && page_index(page) < end_index)
				232	goto out;
				233	end = page_file_offset(page) + ((loff_t)offset+count);
				234	if (i_size >= end)
				235	goto out;
				236	i_size_write(inode, end);
				237	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
				238	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
				239	out:
				240	spin_unlock(&inode->i_lock);
				241	}
				242
				243	/* A writeback failed: mark the page as bad, and invalidate the page cache */
				244	static void nfs_set_pageerror(struct address_space *mapping)
				245	{
				246	struct inode *inode = mapping->host;
				247
				248	nfs_zap_mapping(mapping->host, mapping);
				249	/* Force file size revalidation */
				250	spin_lock(&inode->i_lock);
				251	NFS_I(inode)->cache_validity \|= NFS_INO_REVAL_FORCED \|
				252	NFS_INO_REVAL_PAGECACHE \|
				253	NFS_INO_INVALID_SIZE;
				254	spin_unlock(&inode->i_lock);
				255	}
				256
				257	static void nfs_mapping_set_error(struct page *page, int error)
				258	{
				259	SetPageError(page);
				260	mapping_set_error(page_file_mapping(page), error);
				261	}
				262
				263	/*
				264	* nfs_page_group_search_locked
				265	* @head - head request of page group
				266	* @page_offset - offset into page
				267	*
				268	* Search page group with head @head to find a request that contains the
				269	* page offset @page_offset.
				270	*
				271	* Returns a pointer to the first matching nfs request, or NULL if no
				272	* match is found.
				273	*
				274	* Must be called with the page group lock held
				275	*/
				276	static struct nfs_page *
				277	nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
				278	{
				279	struct nfs_page *req;
				280
				281	req = head;
				282	do {
				283	if (page_offset >= req->wb_pgbase &&
				284	page_offset < (req->wb_pgbase + req->wb_bytes))
				285	return req;
				286
				287	req = req->wb_this_page;
				288	} while (req != head);
				289
				290	return NULL;
				291	}
				292
				293	/*
				294	* nfs_page_group_covers_page
				295	* @head - head request of page group
				296	*
				297	* Return true if the page group with head @head covers the whole page,
				298	* returns false otherwise
				299	*/
				300	static bool nfs_page_group_covers_page(struct nfs_page *req)
				301	{
				302	struct nfs_page *tmp;
				303	unsigned int pos = 0;
				304	unsigned int len = nfs_page_length(req->wb_page);
				305
				306	nfs_page_group_lock(req);
				307
				308	for (;;) {
				309	tmp = nfs_page_group_search_locked(req->wb_head, pos);
				310	if (!tmp)
				311	break;
				312	pos = tmp->wb_pgbase + tmp->wb_bytes;
				313	}
				314
				315	nfs_page_group_unlock(req);
				316	return pos >= len;
				317	}
				318
				319	/* We can set the PG_uptodate flag if we see that a write request
				320	* covers the full page.
				321	*/
				322	static void nfs_mark_uptodate(struct nfs_page *req)
				323	{
				324	if (PageUptodate(req->wb_page))
				325	return;
				326	if (!nfs_page_group_covers_page(req))
				327	return;
				328	SetPageUptodate(req->wb_page);
				329	}
				330
				331	static int wb_priority(struct writeback_control *wbc)
				332	{
				333	int ret = 0;
				334
				335	if (wbc->sync_mode == WB_SYNC_ALL)
				336	ret = FLUSH_COND_STABLE;
				337	return ret;
				338	}
				339
				340	/*
				341	* NFS congestion control
				342	*/
				343
				344	int nfs_congestion_kb;
				345
				346	#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10))
				347	#define NFS_CONGESTION_OFF_THRESH \
				348	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
				349
				350	static void nfs_set_page_writeback(struct page *page)
				351	{
				352	struct inode *inode = page_file_mapping(page)->host;
				353	struct nfs_server *nfss = NFS_SERVER(inode);
				354	int ret = test_set_page_writeback(page);
				355
				356	WARN_ON_ONCE(ret != 0);
				357
				358	if (atomic_long_inc_return(&nfss->writeback) >
				359	NFS_CONGESTION_ON_THRESH)
				360	set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
				361	}
				362
				363	static void nfs_end_page_writeback(struct nfs_page *req)
				364	{
				365	struct inode *inode = page_file_mapping(req->wb_page)->host;
				366	struct nfs_server *nfss = NFS_SERVER(inode);
				367	bool is_done;
				368
				369	is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
				370	nfs_unlock_request(req);
				371	if (!is_done)
				372	return;
				373
				374	end_page_writeback(req->wb_page);
				375	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
				376	clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
				377	}
				378
				379	/*
				380	* nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
				381	*
				382	* this is a helper function for nfs_lock_and_join_requests
				383	*
				384	* @inode - inode associated with request page group, must be holding inode lock
				385	* @head - head request of page group, must be holding head lock
				386	* @req - request that couldn't lock and needs to wait on the req bit lock
				387	*
				388	* NOTE: this must be called holding page_group bit lock
				389	* which will be released before returning.
				390	*
				391	* returns 0 on success, < 0 on error.
				392	*/
				393	static void
				394	nfs_unroll_locks(struct inode inode, struct nfs_page head,
				395	struct nfs_page *req)
				396	{
				397	struct nfs_page *tmp;
				398
				399	/* relinquish all the locks successfully grabbed this run */
				400	for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
				401	if (!kref_read(&tmp->wb_kref))
				402	continue;
				403	nfs_unlock_and_release_request(tmp);
				404	}
				405	}
				406
				407	/*
				408	* nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
				409	*
				410	* @destroy_list - request list (using wb_this_page) terminated by @old_head
				411	* @old_head - the old head of the list
				412	*
				413	* All subrequests must be locked and removed from all lists, so at this point
				414	* they are only "active" in this function, and possibly in nfs_wait_on_request
				415	* with a reference held by some other context.
				416	*/
				417	static void
				418	nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
				419	struct nfs_page *old_head,
				420	struct inode *inode)
				421	{
				422	while (destroy_list) {
				423	struct nfs_page *subreq = destroy_list;
				424
				425	destroy_list = (subreq->wb_this_page == old_head) ?
				426	NULL : subreq->wb_this_page;
				427
				428	/* Note: lock subreq in order to change subreq->wb_head */
				429	nfs_page_set_headlock(subreq);
				430	WARN_ON_ONCE(old_head != subreq->wb_head);
				431
				432	/* make sure old group is not used */
				433	subreq->wb_this_page = subreq;
				434	subreq->wb_head = subreq;
				435
				436	clear_bit(PG_REMOVE, &subreq->wb_flags);
				437
				438	/* Note: races with nfs_page_group_destroy() */
				439	if (!kref_read(&subreq->wb_kref)) {
				440	/* Check if we raced with nfs_page_group_destroy() */
				441	if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) {
				442	nfs_page_clear_headlock(subreq);
				443	nfs_free_request(subreq);
				444	} else
				445	nfs_page_clear_headlock(subreq);
				446	continue;
				447	}
				448	nfs_page_clear_headlock(subreq);
				449
				450	nfs_release_request(old_head);
				451
				452	if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
				453	nfs_release_request(subreq);
				454	atomic_long_dec(&NFS_I(inode)->nrequests);
				455	}
				456
				457	/* subreq is now totally disconnected from page group or any
				458	* write / commit lists. last chance to wake any waiters */
				459	nfs_unlock_and_release_request(subreq);
				460	}
				461	}
				462
				463	/*
				464	* nfs_lock_and_join_requests - join all subreqs to the head req and return
				465	* a locked reference, cancelling any pending
				466	* operations for this page.
				467	*
				468	* @page - the page used to lookup the "page group" of nfs_page structures
				469	*
				470	* This function joins all sub requests to the head request by first
				471	* locking all requests in the group, cancelling any pending operations
				472	* and finally updating the head request to cover the whole range covered by
				473	* the (former) group. All subrequests are removed from any write or commit
				474	* lists, unlinked from the group and destroyed.
				475	*
				476	* Returns a locked, referenced pointer to the head request - which after
				477	* this call is guaranteed to be the only request associated with the page.
				478	* Returns NULL if no requests are found for @page, or a ERR_PTR if an
				479	* error was encountered.
				480	*/
				481	static struct nfs_page *
				482	nfs_lock_and_join_requests(struct page *page)
				483	{
				484	struct inode *inode = page_file_mapping(page)->host;
				485	struct nfs_page head, subreq;
				486	struct nfs_page *destroy_list = NULL;
				487	unsigned int total_bytes;
				488	int ret;
				489
				490	try_again:
				491	/*
				492	* A reference is taken only on the head request which acts as a
				493	* reference to the whole page group - the group will not be destroyed
				494	* until the head reference is released.
				495	*/
				496	head = nfs_page_find_head_request(page);
				497	if (!head)
				498	return NULL;
				499
				500	/* lock the page head first in order to avoid an ABBA inefficiency */
				501	if (!nfs_lock_request(head)) {
				502	ret = nfs_wait_on_request(head);
				503	nfs_release_request(head);
				504	if (ret < 0)
				505	return ERR_PTR(ret);
				506	goto try_again;
				507	}
				508
				509	/* Ensure that nobody removed the request before we locked it */
				510	if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
				511	nfs_unlock_and_release_request(head);
				512	goto try_again;
				513	}
				514
				515	ret = nfs_page_group_lock(head);
				516	if (ret < 0)
				517	goto release_request;
				518
				519	/* lock each request in the page group */
				520	total_bytes = head->wb_bytes;
				521	for (subreq = head->wb_this_page; subreq != head;
				522	subreq = subreq->wb_this_page) {
				523
				524	if (!kref_get_unless_zero(&subreq->wb_kref)) {
				525	if (subreq->wb_offset == head->wb_offset + total_bytes)
				526	total_bytes += subreq->wb_bytes;
				527	continue;
				528	}
				529
				530	while (!nfs_lock_request(subreq)) {
				531	/*
				532	* Unlock page to allow nfs_page_group_sync_on_bit()
				533	* to succeed
				534	*/
				535	nfs_page_group_unlock(head);
				536	ret = nfs_wait_on_request(subreq);
				537	if (!ret)
				538	ret = nfs_page_group_lock(head);
				539	if (ret < 0) {
				540	nfs_unroll_locks(inode, head, subreq);
				541	nfs_release_request(subreq);
				542	goto release_request;
				543	}
				544	}
				545	/*
				546	* Subrequests are always contiguous, non overlapping
				547	* and in order - but may be repeated (mirrored writes).
				548	*/
				549	if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
				550	/* keep track of how many bytes this group covers */
				551	total_bytes += subreq->wb_bytes;
				552	} else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset \|\|
				553	((subreq->wb_offset + subreq->wb_bytes) >
				554	(head->wb_offset + total_bytes)))) {
				555	nfs_page_group_unlock(head);
				556	nfs_unroll_locks(inode, head, subreq);
				557	nfs_unlock_and_release_request(subreq);
				558	ret = -EIO;
				559	goto release_request;
				560	}
				561	}
				562
				563	/* Now that all requests are locked, make sure they aren't on any list.
				564	* Commit list removal accounting is done after locks are dropped */
				565	subreq = head;
				566	do {
				567	nfs_clear_request_commit(subreq);
				568	subreq = subreq->wb_this_page;
				569	} while (subreq != head);
				570
				571	/* unlink subrequests from head, destroy them later */
				572	if (head->wb_this_page != head) {
				573	/* destroy list will be terminated by head */
				574	destroy_list = head->wb_this_page;
				575	head->wb_this_page = head;
				576
				577	/* change head request to cover whole range that
				578	* the former page group covered */
				579	head->wb_bytes = total_bytes;
				580	}
				581
				582	/* Postpone destruction of this request */
				583	if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
				584	set_bit(PG_INODE_REF, &head->wb_flags);
				585	kref_get(&head->wb_kref);
				586	atomic_long_inc(&NFS_I(inode)->nrequests);
				587	}
				588
				589	nfs_page_group_unlock(head);
				590
				591	nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
				592
				593	/* Did we lose a race with nfs_inode_remove_request()? */
				594	if (!(PagePrivate(page) \|\| PageSwapCache(page))) {
				595	nfs_unlock_and_release_request(head);
				596	return NULL;
				597	}
				598
				599	/* still holds ref on head from nfs_page_find_head_request
				600	* and still has lock on head from lock loop */
				601	return head;
				602
				603	release_request:
				604	nfs_unlock_and_release_request(head);
				605	return ERR_PTR(ret);
				606	}
				607
				608	static void nfs_write_error(struct nfs_page *req, int error)
				609	{
				610	nfs_set_pageerror(page_file_mapping(req->wb_page));
				611	nfs_mapping_set_error(req->wb_page, error);
				612	nfs_inode_remove_request(req);
				613	nfs_end_page_writeback(req);
				614	nfs_release_request(req);
				615	}
				616
				617	/*
				618	* Find an associated nfs write request, and prepare to flush it out
				619	* May return an error if the user signalled nfs_wait_on_request().
				620	*/
				621	static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
				622	struct page *page)
				623	{
				624	struct nfs_page *req;
				625	int ret = 0;
				626
				627	req = nfs_lock_and_join_requests(page);
				628	if (!req)
				629	goto out;
				630	ret = PTR_ERR(req);
				631	if (IS_ERR(req))
				632	goto out;
				633
				634	nfs_set_page_writeback(page);
				635	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
				636
				637	/* If there is a fatal error that covers this write, just exit */
				638	ret = pgio->pg_error;
				639	if (nfs_error_is_fatal_on_server(ret))
				640	goto out_launder;
				641
				642	ret = 0;
				643	if (!nfs_pageio_add_request(pgio, req)) {
				644	ret = pgio->pg_error;
				645	/*
				646	* Remove the problematic req upon fatal errors on the server
				647	*/
				648	if (nfs_error_is_fatal(ret)) {
				649	if (nfs_error_is_fatal_on_server(ret))
				650	goto out_launder;
				651	} else
				652	ret = -EAGAIN;
				653	nfs_redirty_request(req);
				654	pgio->pg_error = 0;
				655	} else
				656	nfs_add_stats(page_file_mapping(page)->host,
				657	NFSIOS_WRITEPAGES, 1);
				658	out:
				659	return ret;
				660	out_launder:
				661	nfs_write_error(req, ret);
				662	return 0;
				663	}
				664
				665	static int nfs_do_writepage(struct page page, struct writeback_control wbc,
				666	struct nfs_pageio_descriptor *pgio)
				667	{
				668	int ret;
				669
				670	nfs_pageio_cond_complete(pgio, page_index(page));
				671	ret = nfs_page_async_flush(pgio, page);
				672	if (ret == -EAGAIN) {
				673	redirty_page_for_writepage(wbc, page);
				674	ret = AOP_WRITEPAGE_ACTIVATE;
				675	}
				676	return ret;
				677	}
				678
				679	/*
				680	* Write an mmapped page to the server.
				681	*/
				682	static int nfs_writepage_locked(struct page *page,
				683	struct writeback_control *wbc)
				684	{
				685	struct nfs_pageio_descriptor pgio;
				686	struct inode *inode = page_file_mapping(page)->host;
				687	int err;
				688
				689	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
				690	nfs_pageio_init_write(&pgio, inode, 0,
				691	false, &nfs_async_write_completion_ops);
				692	err = nfs_do_writepage(page, wbc, &pgio);
				693	pgio.pg_error = 0;
				694	nfs_pageio_complete(&pgio);
				695	return err;
				696	}
				697
				698	int nfs_writepage(struct page page, struct writeback_control wbc)
				699	{
				700	int ret;
				701
				702	ret = nfs_writepage_locked(page, wbc);
				703	if (ret != AOP_WRITEPAGE_ACTIVATE)
				704	unlock_page(page);
				705	return ret;
				706	}
				707
				708	static int nfs_writepages_callback(struct page page, struct writeback_control wbc, void *data)
				709	{
				710	int ret;
				711
				712	ret = nfs_do_writepage(page, wbc, data);
				713	if (ret != AOP_WRITEPAGE_ACTIVATE)
				714	unlock_page(page);
				715	return ret;
				716	}
				717
				718	static void nfs_io_completion_commit(void *inode)
				719	{
				720	nfs_commit_inode(inode, 0);
				721	}
				722
				723	int nfs_writepages(struct address_space mapping, struct writeback_control wbc)
				724	{
				725	struct inode *inode = mapping->host;
				726	struct nfs_pageio_descriptor pgio;
				727	struct nfs_io_completion *ioc;
				728	int err;
				729
				730	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
				731
				732	ioc = nfs_io_completion_alloc(GFP_KERNEL);
				733	if (ioc)
				734	nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
				735
				736	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
				737	&nfs_async_write_completion_ops);
				738	pgio.pg_io_completion = ioc;
				739	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
				740	pgio.pg_error = 0;
				741	nfs_pageio_complete(&pgio);
				742	nfs_io_completion_put(ioc);
				743
				744	if (err < 0)
				745	goto out_err;
				746	return 0;
				747	out_err:
				748	return err;
				749	}
				750
				751	/*
				752	* Insert a write request into an inode
				753	*/
				754	static void nfs_inode_add_request(struct inode inode, struct nfs_page req)
				755	{
				756	struct address_space *mapping = page_file_mapping(req->wb_page);
				757	struct nfs_inode *nfsi = NFS_I(inode);
				758
				759	WARN_ON_ONCE(req->wb_this_page != req);
				760
				761	/* Lock the request! */
				762	nfs_lock_request(req);
				763
				764	/*
				765	* Swap-space should not get truncated. Hence no need to plug the race
				766	* with invalidate/truncate.
				767	*/
				768	spin_lock(&mapping->private_lock);
				769	if (!nfs_have_writebacks(inode) &&
				770	NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
				771	inode_inc_iversion_raw(inode);
				772	if (likely(!PageSwapCache(req->wb_page))) {
				773	set_bit(PG_MAPPED, &req->wb_flags);
				774	SetPagePrivate(req->wb_page);
				775	set_page_private(req->wb_page, (unsigned long)req);
				776	}
				777	spin_unlock(&mapping->private_lock);
				778	atomic_long_inc(&nfsi->nrequests);
				779	/* this a head request for a page group - mark it as having an
				780	* extra reference so sub groups can follow suit.
				781	* This flag also informs pgio layer when to bump nrequests when
				782	* adding subrequests. */
				783	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
				784	kref_get(&req->wb_kref);
				785	}
				786
				787	/*
				788	* Remove a write request from an inode
				789	*/
				790	static void nfs_inode_remove_request(struct nfs_page *req)
				791	{
				792	struct address_space *mapping = page_file_mapping(req->wb_page);
				793	struct inode *inode = mapping->host;
				794	struct nfs_inode *nfsi = NFS_I(inode);
				795	struct nfs_page *head;
				796
				797	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
				798	head = req->wb_head;
				799
				800	spin_lock(&mapping->private_lock);
				801	if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
				802	set_page_private(head->wb_page, 0);
				803	ClearPagePrivate(head->wb_page);
				804	clear_bit(PG_MAPPED, &head->wb_flags);
				805	}
				806	spin_unlock(&mapping->private_lock);
				807	}
				808
				809	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
				810	nfs_release_request(req);
				811	atomic_long_dec(&nfsi->nrequests);
				812	}
				813	}
				814
				815	static void
				816	nfs_mark_request_dirty(struct nfs_page *req)
				817	{
				818	if (req->wb_page)
				819	__set_page_dirty_nobuffers(req->wb_page);
				820	}
				821
				822	/*
				823	* nfs_page_search_commits_for_head_request_locked
				824	*
				825	* Search through commit lists on @inode for the head request for @page.
				826	* Must be called while holding the inode (which is cinfo) lock.
				827	*
				828	* Returns the head request if found, or NULL if not found.
				829	*/
				830	static struct nfs_page *
				831	nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
				832	struct page *page)
				833	{
				834	struct nfs_page freq, t;
				835	struct nfs_commit_info cinfo;
				836	struct inode *inode = &nfsi->vfs_inode;
				837
				838	nfs_init_cinfo_from_inode(&cinfo, inode);
				839
				840	/* search through pnfs commit lists */
				841	freq = pnfs_search_commit_reqs(inode, &cinfo, page);
				842	if (freq)
				843	return freq->wb_head;
				844
				845	/* Linearly search the commit list for the correct request */
				846	list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
				847	if (freq->wb_page == page)
				848	return freq->wb_head;
				849	}
				850
				851	return NULL;
				852	}
				853
				854	/**
				855	* nfs_request_add_commit_list_locked - add request to a commit list
				856	* @req: pointer to a struct nfs_page
				857	* @dst: commit list head
				858	* @cinfo: holds list lock and accounting info
				859	*
				860	* This sets the PG_CLEAN bit, updates the cinfo count of
				861	* number of outstanding requests requiring a commit as well as
				862	* the MM page stats.
				863	*
				864	* The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
				865	* nfs_page lock.
				866	*/
				867	void
				868	nfs_request_add_commit_list_locked(struct nfs_page req, struct list_head dst,
				869	struct nfs_commit_info *cinfo)
				870	{
				871	set_bit(PG_CLEAN, &req->wb_flags);
				872	nfs_list_add_request(req, dst);
				873	atomic_long_inc(&cinfo->mds->ncommit);
				874	}
				875	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
				876
				877	/**
				878	* nfs_request_add_commit_list - add request to a commit list
				879	* @req: pointer to a struct nfs_page
				880	* @cinfo: holds list lock and accounting info
				881	*
				882	* This sets the PG_CLEAN bit, updates the cinfo count of
				883	* number of outstanding requests requiring a commit as well as
				884	* the MM page stats.
				885	*
				886	* The caller must _not_ hold the cinfo->lock, but must be
				887	* holding the nfs_page lock.
				888	*/
				889	void
				890	nfs_request_add_commit_list(struct nfs_page req, struct nfs_commit_info cinfo)
				891	{
				892	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
				893	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
				894	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
				895	if (req->wb_page)
				896	nfs_mark_page_unstable(req->wb_page, cinfo);
				897	}
				898	EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
				899
				900	/**
				901	* nfs_request_remove_commit_list - Remove request from a commit list
				902	* @req: pointer to a nfs_page
				903	* @cinfo: holds list lock and accounting info
				904	*
				905	* This clears the PG_CLEAN bit, and updates the cinfo's count of
				906	* number of outstanding requests requiring a commit
				907	* It does not update the MM page stats.
				908	*
				909	* The caller _must_ hold the cinfo->lock and the nfs_page lock.
				910	*/
				911	void
				912	nfs_request_remove_commit_list(struct nfs_page *req,
				913	struct nfs_commit_info *cinfo)
				914	{
				915	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
				916	return;
				917	nfs_list_remove_request(req);
				918	atomic_long_dec(&cinfo->mds->ncommit);
				919	}
				920	EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
				921
				922	static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				923	struct inode *inode)
				924	{
				925	cinfo->inode = inode;
				926	cinfo->mds = &NFS_I(inode)->commit_info;
				927	cinfo->ds = pnfs_get_ds_info(inode);
				928	cinfo->dreq = NULL;
				929	cinfo->completion_ops = &nfs_commit_completion_ops;
				930	}
				931
				932	void nfs_init_cinfo(struct nfs_commit_info *cinfo,
				933	struct inode *inode,
				934	struct nfs_direct_req *dreq)
				935	{
				936	if (dreq)
				937	nfs_init_cinfo_from_dreq(cinfo, dreq);
				938	else
				939	nfs_init_cinfo_from_inode(cinfo, inode);
				940	}
				941	EXPORT_SYMBOL_GPL(nfs_init_cinfo);
				942
				943	/*
				944	* Add a request to the inode's commit list.
				945	*/
				946	void
				947	nfs_mark_request_commit(struct nfs_page req, struct pnfs_layout_segment lseg,
				948	struct nfs_commit_info *cinfo, u32 ds_commit_idx)
				949	{
				950	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
				951	return;
				952	nfs_request_add_commit_list(req, cinfo);
				953	}
				954
				955	static void
				956	nfs_clear_page_commit(struct page *page)
				957	{
				958	dec_node_page_state(page, NR_UNSTABLE_NFS);
				959	dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
				960	WB_RECLAIMABLE);
				961	}
				962
				963	/* Called holding the request lock on @req */
				964	static void
				965	nfs_clear_request_commit(struct nfs_page *req)
				966	{
				967	if (test_bit(PG_CLEAN, &req->wb_flags)) {
				968	struct nfs_open_context *ctx = nfs_req_openctx(req);
				969	struct inode *inode = d_inode(ctx->dentry);
				970	struct nfs_commit_info cinfo;
				971
				972	nfs_init_cinfo_from_inode(&cinfo, inode);
				973	mutex_lock(&NFS_I(inode)->commit_mutex);
				974	if (!pnfs_clear_request_commit(req, &cinfo)) {
				975	nfs_request_remove_commit_list(req, &cinfo);
				976	}
				977	mutex_unlock(&NFS_I(inode)->commit_mutex);
				978	nfs_clear_page_commit(req->wb_page);
				979	}
				980	}
				981
				982	int nfs_write_need_commit(struct nfs_pgio_header *hdr)
				983	{
				984	if (hdr->verf.committed == NFS_DATA_SYNC)
				985	return hdr->lseg == NULL;
				986	return hdr->verf.committed != NFS_FILE_SYNC;
				987	}
				988
				989	static void nfs_async_write_init(struct nfs_pgio_header *hdr)
				990	{
				991	nfs_io_completion_get(hdr->io_completion);
				992	}
				993
				994	static void nfs_write_completion(struct nfs_pgio_header *hdr)
				995	{
				996	struct nfs_commit_info cinfo;
				997	unsigned long bytes = 0;
				998
				999	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
				1000	goto out;
				1001	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
				1002	while (!list_empty(&hdr->pages)) {
				1003	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
				1004
				1005	bytes += req->wb_bytes;
				1006	nfs_list_remove_request(req);
				1007	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
				1008	(hdr->good_bytes < bytes)) {
				1009	nfs_set_pageerror(page_file_mapping(req->wb_page));
				1010	nfs_mapping_set_error(req->wb_page, hdr->error);
				1011	goto remove_req;
				1012	}
				1013	if (nfs_write_need_commit(hdr)) {
				1014	/* Reset wb_nio, since the write was successful. */
				1015	req->wb_nio = 0;
				1016	memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
				1017	nfs_mark_request_commit(req, hdr->lseg, &cinfo,
				1018	hdr->pgio_mirror_idx);
				1019	goto next;
				1020	}
				1021	remove_req:
				1022	nfs_inode_remove_request(req);
				1023	next:
				1024	nfs_end_page_writeback(req);
				1025	nfs_release_request(req);
				1026	}
				1027	out:
				1028	nfs_io_completion_put(hdr->io_completion);
				1029	hdr->release(hdr);
				1030	}
				1031
				1032	unsigned long
				1033	nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
				1034	{
				1035	return atomic_long_read(&cinfo->mds->ncommit);
				1036	}
				1037
				1038	/* NFS_I(cinfo->inode)->commit_mutex held by caller */
				1039	int
				1040	nfs_scan_commit_list(struct list_head src, struct list_head dst,
				1041	struct nfs_commit_info *cinfo, int max)
				1042	{
				1043	struct nfs_page req, tmp;
				1044	int ret = 0;
				1045
				1046	list_for_each_entry_safe(req, tmp, src, wb_list) {
				1047	kref_get(&req->wb_kref);
				1048	if (!nfs_lock_request(req)) {
				1049	nfs_release_request(req);
				1050	continue;
				1051	}
				1052	nfs_request_remove_commit_list(req, cinfo);
				1053	clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
				1054	nfs_list_add_request(req, dst);
				1055	ret++;
				1056	if ((ret == max) && !cinfo->dreq)
				1057	break;
				1058	cond_resched();
				1059	}
				1060	return ret;
				1061	}
				1062	EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
				1063
				1064	/*
				1065	* nfs_scan_commit - Scan an inode for commit requests
				1066	* @inode: NFS inode to scan
				1067	* @dst: mds destination list
				1068	* @cinfo: mds and ds lists of reqs ready to commit
				1069	*
				1070	* Moves requests from the inode's 'commit' request list.
				1071	* The requests are not checked to ensure that they form a contiguous set.
				1072	*/
				1073	int
				1074	nfs_scan_commit(struct inode inode, struct list_head dst,
				1075	struct nfs_commit_info *cinfo)
				1076	{
				1077	int ret = 0;
				1078
				1079	if (!atomic_long_read(&cinfo->mds->ncommit))
				1080	return 0;
				1081	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
				1082	if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
				1083	const int max = INT_MAX;
				1084
				1085	ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
				1086	cinfo, max);
				1087	ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
				1088	}
				1089	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
				1090	return ret;
				1091	}
				1092
				1093	/*
				1094	* Search for an existing write request, and attempt to update
				1095	* it to reflect a new dirty region on a given page.
				1096	*
				1097	* If the attempt fails, then the existing request is flushed out
				1098	* to disk.
				1099	*/
				1100	static struct nfs_page nfs_try_to_update_request(struct inode inode,
				1101	struct page *page,
				1102	unsigned int offset,
				1103	unsigned int bytes)
				1104	{
				1105	struct nfs_page *req;
				1106	unsigned int rqend;
				1107	unsigned int end;
				1108	int error;
				1109
				1110	end = offset + bytes;
				1111
				1112	req = nfs_lock_and_join_requests(page);
				1113	if (IS_ERR_OR_NULL(req))
				1114	return req;
				1115
				1116	rqend = req->wb_offset + req->wb_bytes;
				1117	/*
				1118	* Tell the caller to flush out the request if
				1119	* the offsets are non-contiguous.
				1120	* Note: nfs_flush_incompatible() will already
				1121	* have flushed out requests having wrong owners.
				1122	*/
				1123	if (offset > rqend \|\| end < req->wb_offset)
				1124	goto out_flushme;
				1125
				1126	/* Okay, the request matches. Update the region */
				1127	if (offset < req->wb_offset) {
				1128	req->wb_offset = offset;
				1129	req->wb_pgbase = offset;
				1130	}
				1131	if (end > rqend)
				1132	req->wb_bytes = end - req->wb_offset;
				1133	else
				1134	req->wb_bytes = rqend - req->wb_offset;
				1135	req->wb_nio = 0;
				1136	return req;
				1137	out_flushme:
				1138	/*
				1139	* Note: we mark the request dirty here because
				1140	* nfs_lock_and_join_requests() cannot preserve
				1141	* commit flags, so we have to replay the write.
				1142	*/
				1143	nfs_mark_request_dirty(req);
				1144	nfs_unlock_and_release_request(req);
				1145	error = nfs_wb_page(inode, page);
				1146	return (error < 0) ? ERR_PTR(error) : NULL;
				1147	}
				1148
				1149	/*
				1150	* Try to update an existing write request, or create one if there is none.
				1151	*
				1152	* Note: Should always be called with the Page Lock held to prevent races
				1153	* if we have to add a new request. Also assumes that the caller has
				1154	* already called nfs_flush_incompatible() if necessary.
				1155	*/
				1156	static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
				1157	struct page *page, unsigned int offset, unsigned int bytes)
				1158	{
				1159	struct inode *inode = page_file_mapping(page)->host;
				1160	struct nfs_page *req;
				1161
				1162	req = nfs_try_to_update_request(inode, page, offset, bytes);
				1163	if (req != NULL)
				1164	goto out;
				1165	req = nfs_create_request(ctx, page, offset, bytes);
				1166	if (IS_ERR(req))
				1167	goto out;
				1168	nfs_inode_add_request(inode, req);
				1169	out:
				1170	return req;
				1171	}
				1172
				1173	static int nfs_writepage_setup(struct nfs_open_context ctx, struct page page,
				1174	unsigned int offset, unsigned int count)
				1175	{
				1176	struct nfs_page *req;
				1177
				1178	req = nfs_setup_write_request(ctx, page, offset, count);
				1179	if (IS_ERR(req))
				1180	return PTR_ERR(req);
				1181	/* Update file length */
				1182	nfs_grow_file(page, offset, count);
				1183	nfs_mark_uptodate(req);
				1184	nfs_mark_request_dirty(req);
				1185	nfs_unlock_and_release_request(req);
				1186	return 0;
				1187	}
				1188
				1189	int nfs_flush_incompatible(struct file file, struct page page)
				1190	{
				1191	struct nfs_open_context *ctx = nfs_file_open_context(file);
				1192	struct nfs_lock_context *l_ctx;
				1193	struct file_lock_context *flctx = file_inode(file)->i_flctx;
				1194	struct nfs_page *req;
				1195	int do_flush, status;
				1196	/*
				1197	* Look for a request corresponding to this page. If there
				1198	* is one, and it belongs to another file, we flush it out
				1199	* before we try to copy anything into the page. Do this
				1200	* due to the lack of an ACCESS-type call in NFSv2.
				1201	* Also do the same if we find a request from an existing
				1202	* dropped page.
				1203	*/
				1204	do {
				1205	req = nfs_page_find_head_request(page);
				1206	if (req == NULL)
				1207	return 0;
				1208	l_ctx = req->wb_lock_context;
				1209	do_flush = req->wb_page != page \|\|
				1210	!nfs_match_open_context(nfs_req_openctx(req), ctx);
				1211	if (l_ctx && flctx &&
				1212	!(list_empty_careful(&flctx->flc_posix) &&
				1213	list_empty_careful(&flctx->flc_flock))) {
				1214	do_flush \|= l_ctx->lockowner != current->files;
				1215	}
				1216	nfs_release_request(req);
				1217	if (!do_flush)
				1218	return 0;
				1219	status = nfs_wb_page(page_file_mapping(page)->host, page);
				1220	} while (status == 0);
				1221	return status;
				1222	}
				1223
				1224	/*
				1225	* Avoid buffered writes when a open context credential's key would
				1226	* expire soon.
				1227	*
				1228	* Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
				1229	*
				1230	* Return 0 and set a credential flag which triggers the inode to flush
				1231	* and performs NFS_FILE_SYNC writes if the key will expired within
				1232	* RPC_KEY_EXPIRE_TIMEO.
				1233	*/
				1234	int
				1235	nfs_key_timeout_notify(struct file filp, struct inode inode)
				1236	{
				1237	struct nfs_open_context *ctx = nfs_file_open_context(filp);
				1238
				1239	if (nfs_ctx_key_to_expire(ctx, inode) &&
				1240	!ctx->ll_cred)
				1241	/* Already expired! */
				1242	return -EACCES;
				1243	return 0;
				1244	}
				1245
				1246	/*
				1247	* Test if the open context credential key is marked to expire soon.
				1248	*/
				1249	bool nfs_ctx_key_to_expire(struct nfs_open_context ctx, struct inode inode)
				1250	{
				1251	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
				1252	struct rpc_cred *cred = ctx->ll_cred;
				1253	struct auth_cred acred = {
				1254	.cred = ctx->cred,
				1255	};
				1256
				1257	if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) {
				1258	put_rpccred(cred);
				1259	ctx->ll_cred = NULL;
				1260	cred = NULL;
				1261	}
				1262	if (!cred)
				1263	cred = auth->au_ops->lookup_cred(auth, &acred, 0);
				1264	if (!cred \|\| IS_ERR(cred))
				1265	return true;
				1266	ctx->ll_cred = cred;
				1267	return !!(cred->cr_ops->crkey_timeout &&
				1268	cred->cr_ops->crkey_timeout(cred));
				1269	}
				1270
				1271	/*
				1272	* If the page cache is marked as unsafe or invalid, then we can't rely on
				1273	* the PageUptodate() flag. In this case, we will need to turn off
				1274	* write optimisations that depend on the page contents being correct.
				1275	*/
				1276	static bool nfs_write_pageuptodate(struct page page, struct inode inode)
				1277	{
				1278	struct nfs_inode *nfsi = NFS_I(inode);
				1279
				1280	if (nfs_have_delegated_attributes(inode))
				1281	goto out;
				1282	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
				1283	return false;
				1284	smp_rmb();
				1285	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
				1286	return false;
				1287	out:
				1288	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
				1289	return false;
				1290	return PageUptodate(page) != 0;
				1291	}
				1292
				1293	static bool
				1294	is_whole_file_wrlock(struct file_lock *fl)
				1295	{
				1296	return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
				1297	fl->fl_type == F_WRLCK;
				1298	}
				1299
				1300	/* If we know the page is up to date, and we're not using byte range locks (or
				1301	* if we have the whole file locked for writing), it may be more efficient to
				1302	* extend the write to cover the entire page in order to avoid fragmentation
				1303	* inefficiencies.
				1304	*
				1305	* If the file is opened for synchronous writes then we can just skip the rest
				1306	* of the checks.
				1307	*/
				1308	static int nfs_can_extend_write(struct file file, struct page page, struct inode *inode)
				1309	{
				1310	int ret;
				1311	struct file_lock_context *flctx = inode->i_flctx;
				1312	struct file_lock *fl;
				1313
				1314	if (file->f_flags & O_DSYNC)
				1315	return 0;
				1316	if (!nfs_write_pageuptodate(page, inode))
				1317	return 0;
				1318	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
				1319	return 1;
				1320	if (!flctx \|\| (list_empty_careful(&flctx->flc_flock) &&
				1321	list_empty_careful(&flctx->flc_posix)))
				1322	return 1;
				1323
				1324	/* Check to see if there are whole file write locks */
				1325	ret = 0;
				1326	spin_lock(&flctx->flc_lock);
				1327	if (!list_empty(&flctx->flc_posix)) {
				1328	fl = list_first_entry(&flctx->flc_posix, struct file_lock,
				1329	fl_list);
				1330	if (is_whole_file_wrlock(fl))
				1331	ret = 1;
				1332	} else if (!list_empty(&flctx->flc_flock)) {
				1333	fl = list_first_entry(&flctx->flc_flock, struct file_lock,
				1334	fl_list);
				1335	if (fl->fl_type == F_WRLCK)
				1336	ret = 1;
				1337	}
				1338	spin_unlock(&flctx->flc_lock);
				1339	return ret;
				1340	}
				1341
				1342	/*
				1343	* Update and possibly write a cached page of an NFS file.
				1344	*
				1345	* XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
				1346	* things with a page scheduled for an RPC call (e.g. invalidate it).
				1347	*/
				1348	int nfs_updatepage(struct file file, struct page page,
				1349	unsigned int offset, unsigned int count)
				1350	{
				1351	struct nfs_open_context *ctx = nfs_file_open_context(file);
				1352	struct address_space *mapping = page_file_mapping(page);
				1353	struct inode *inode = mapping->host;
				1354	int status = 0;
				1355
				1356	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
				1357
				1358	dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
				1359	file, count, (long long)(page_file_offset(page) + offset));
				1360
				1361	if (!count)
				1362	goto out;
				1363
				1364	if (nfs_can_extend_write(file, page, inode)) {
				1365	count = max(count + offset, nfs_page_length(page));
				1366	offset = 0;
				1367	}
				1368
				1369	status = nfs_writepage_setup(ctx, page, offset, count);
				1370	if (status < 0)
				1371	nfs_set_pageerror(mapping);
				1372	else
				1373	__set_page_dirty_nobuffers(page);
				1374	out:
				1375	dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
				1376	status, (long long)i_size_read(inode));
				1377	return status;
				1378	}
				1379
				1380	static int flush_task_priority(int how)
				1381	{
				1382	switch (how & (FLUSH_HIGHPRI\|FLUSH_LOWPRI)) {
				1383	case FLUSH_HIGHPRI:
				1384	return RPC_PRIORITY_HIGH;
				1385	case FLUSH_LOWPRI:
				1386	return RPC_PRIORITY_LOW;
				1387	}
				1388	return RPC_PRIORITY_NORMAL;
				1389	}
				1390
				1391	static void nfs_initiate_write(struct nfs_pgio_header *hdr,
				1392	struct rpc_message *msg,
				1393	const struct nfs_rpc_ops *rpc_ops,
				1394	struct rpc_task_setup *task_setup_data, int how)
				1395	{
				1396	int priority = flush_task_priority(how);
				1397
				1398	task_setup_data->priority = priority;
				1399	rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
				1400	trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
				1401	hdr->args.stable);
				1402	}
				1403
				1404	/* If a nfs_flush_* function fails, it should remove reqs from @head and
				1405	* call this on each, which will prepare them to be retried on next
				1406	* writeback using standard nfs.
				1407	*/
				1408	static void nfs_redirty_request(struct nfs_page *req)
				1409	{
				1410	/* Bump the transmission count */
				1411	req->wb_nio++;
				1412	nfs_mark_request_dirty(req);
				1413	set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
				1414	nfs_end_page_writeback(req);
				1415	nfs_release_request(req);
				1416	}
				1417
				1418	static void nfs_async_write_error(struct list_head *head, int error)
				1419	{
				1420	struct nfs_page *req;
				1421
				1422	while (!list_empty(head)) {
				1423	req = nfs_list_entry(head->next);
				1424	nfs_list_remove_request(req);
				1425	if (nfs_error_is_fatal_on_server(error))
				1426	nfs_write_error(req, error);
				1427	else
				1428	nfs_redirty_request(req);
				1429	}
				1430	}
				1431
				1432	static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
				1433	{
				1434	nfs_async_write_error(&hdr->pages, 0);
				1435	filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
				1436	hdr->args.offset + hdr->args.count - 1);
				1437	}
				1438
				1439	static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
				1440	.init_hdr = nfs_async_write_init,
				1441	.error_cleanup = nfs_async_write_error,
				1442	.completion = nfs_write_completion,
				1443	.reschedule_io = nfs_async_write_reschedule_io,
				1444	};
				1445
				1446	void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
				1447	struct inode *inode, int ioflags, bool force_mds,
				1448	const struct nfs_pgio_completion_ops *compl_ops)
				1449	{
				1450	struct nfs_server *server = NFS_SERVER(inode);
				1451	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
				1452
				1453	#ifdef CONFIG_NFS_V4_1
				1454	if (server->pnfs_curr_ld && !force_mds)
				1455	pg_ops = server->pnfs_curr_ld->pg_write_ops;
				1456	#endif
				1457	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
				1458	server->wsize, ioflags);
				1459	}
				1460	EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
				1461
				1462	void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
				1463	{
				1464	struct nfs_pgio_mirror *mirror;
				1465
				1466	if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
				1467	pgio->pg_ops->pg_cleanup(pgio);
				1468
				1469	pgio->pg_ops = &nfs_pgio_rw_ops;
				1470
				1471	nfs_pageio_stop_mirroring(pgio);
				1472
				1473	mirror = &pgio->pg_mirrors[0];
				1474	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
				1475	}
				1476	EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
				1477
				1478
				1479	void nfs_commit_prepare(struct rpc_task task, void calldata)
				1480	{
				1481	struct nfs_commit_data *data = calldata;
				1482
				1483	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
				1484	}
				1485
				1486	/*
				1487	* Special version of should_remove_suid() that ignores capabilities.
				1488	*/
				1489	static int nfs_should_remove_suid(const struct inode *inode)
				1490	{
				1491	umode_t mode = inode->i_mode;
				1492	int kill = 0;
				1493
				1494	/* suid always must be killed */
				1495	if (unlikely(mode & S_ISUID))
				1496	kill = ATTR_KILL_SUID;
				1497
				1498	/*
				1499	* sgid without any exec bits is just a mandatory locking mark; leave
				1500	* it alone. If some exec bits are set, it's a real sgid; kill it.
				1501	*/
				1502	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
				1503	kill \|= ATTR_KILL_SGID;
				1504
				1505	if (unlikely(kill && S_ISREG(mode)))
				1506	return kill;
				1507
				1508	return 0;
				1509	}
				1510
				1511	static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
				1512	struct nfs_fattr *fattr)
				1513	{
				1514	struct nfs_pgio_args *argp = &hdr->args;
				1515	struct nfs_pgio_res *resp = &hdr->res;
				1516	u64 size = argp->offset + resp->count;
				1517
				1518	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
				1519	fattr->size = size;
				1520	if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
				1521	fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
				1522	return;
				1523	}
				1524	if (size != fattr->size)
				1525	return;
				1526	/* Set attribute barrier */
				1527	nfs_fattr_set_barrier(fattr);
				1528	/* ...and update size */
				1529	fattr->valid \|= NFS_ATTR_FATTR_SIZE;
				1530	}
				1531
				1532	void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
				1533	{
				1534	struct nfs_fattr *fattr = &hdr->fattr;
				1535	struct inode *inode = hdr->inode;
				1536
				1537	spin_lock(&inode->i_lock);
				1538	nfs_writeback_check_extend(hdr, fattr);
				1539	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
				1540	spin_unlock(&inode->i_lock);
				1541	}
				1542	EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
				1543
				1544	/*
				1545	* This function is called when the WRITE call is complete.
				1546	*/
				1547	static int nfs_writeback_done(struct rpc_task *task,
				1548	struct nfs_pgio_header *hdr,
				1549	struct inode *inode)
				1550	{
				1551	int status;
				1552
				1553	/*
				1554	* ->write_done will attempt to use post-op attributes to detect
				1555	* conflicting writes by other clients. A strict interpretation
				1556	* of close-to-open would allow us to continue caching even if
				1557	* another writer had changed the file, but some applications
				1558	* depend on tighter cache coherency when writing.
				1559	*/
				1560	status = NFS_PROTO(inode)->write_done(task, hdr);
				1561	if (status != 0)
				1562	return status;
				1563
				1564	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
				1565	trace_nfs_writeback_done(inode, task->tk_status,
				1566	hdr->args.offset, hdr->res.verf);
				1567
				1568	if (hdr->res.verf->committed < hdr->args.stable &&
				1569	task->tk_status >= 0) {
				1570	/* We tried a write call, but the server did not
				1571	* commit data to stable storage even though we
				1572	* requested it.
				1573	* Note: There is a known bug in Tru64 < 5.0 in which
				1574	* the server reports NFS_DATA_SYNC, but performs
				1575	* NFS_FILE_SYNC. We therefore implement this checking
				1576	* as a dprintk() in order to avoid filling syslog.
				1577	*/
				1578	static unsigned long complain;
				1579
				1580	/* Note this will print the MDS for a DS write */
				1581	if (time_before(complain, jiffies)) {
				1582	dprintk("NFS: faulty NFS server %s:"
				1583	" (committed = %d) != (stable = %d)\n",
				1584	NFS_SERVER(inode)->nfs_client->cl_hostname,
				1585	hdr->res.verf->committed, hdr->args.stable);
				1586	complain = jiffies + 300 * HZ;
				1587	}
				1588	}
				1589
				1590	/* Deal with the suid/sgid bit corner case */
				1591	if (nfs_should_remove_suid(inode)) {
				1592	spin_lock(&inode->i_lock);
				1593	NFS_I(inode)->cache_validity \|= NFS_INO_INVALID_OTHER;
				1594	spin_unlock(&inode->i_lock);
				1595	}
				1596	return 0;
				1597	}
				1598
				1599	/*
				1600	* This function is called when the WRITE call is complete.
				1601	*/
				1602	static void nfs_writeback_result(struct rpc_task *task,
				1603	struct nfs_pgio_header *hdr)
				1604	{
				1605	struct nfs_pgio_args *argp = &hdr->args;
				1606	struct nfs_pgio_res *resp = &hdr->res;
				1607
				1608	if (resp->count < argp->count) {
				1609	static unsigned long complain;
				1610
				1611	/* This a short write! */
				1612	nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
				1613
				1614	/* Has the server at least made some progress? */
				1615	if (resp->count == 0) {
				1616	if (time_before(complain, jiffies)) {
				1617	printk(KERN_WARNING
				1618	"NFS: Server wrote zero bytes, expected %u.\n",
				1619	argp->count);
				1620	complain = jiffies + 300 * HZ;
				1621	}
				1622	nfs_set_pgio_error(hdr, -EIO, argp->offset);
				1623	task->tk_status = -EIO;
				1624	return;
				1625	}
				1626
				1627	/* For non rpc-based layout drivers, retry-through-MDS */
				1628	if (!task->tk_ops) {
				1629	hdr->pnfs_error = -EAGAIN;
				1630	return;
				1631	}
				1632
				1633	/* Was this an NFSv2 write or an NFSv3 stable write? */
				1634	if (resp->verf->committed != NFS_UNSTABLE) {
				1635	/* Resend from where the server left off */
				1636	hdr->mds_offset += resp->count;
				1637	argp->offset += resp->count;
				1638	argp->pgbase += resp->count;
				1639	argp->count -= resp->count;
				1640	} else {
				1641	/* Resend as a stable write in order to avoid
				1642	* headaches in the case of a server crash.
				1643	*/
				1644	argp->stable = NFS_FILE_SYNC;
				1645	}
				1646	rpc_restart_call_prepare(task);
				1647	}
				1648	}
				1649
				1650	static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
				1651	{
				1652	return wait_var_event_killable(&cinfo->rpcs_out,
				1653	!atomic_read(&cinfo->rpcs_out));
				1654	}
				1655
				1656	static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
				1657	{
				1658	atomic_inc(&cinfo->rpcs_out);
				1659	}
				1660
				1661	static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
				1662	{
				1663	if (atomic_dec_and_test(&cinfo->rpcs_out))
				1664	wake_up_var(&cinfo->rpcs_out);
				1665	}
				1666
				1667	void nfs_commitdata_release(struct nfs_commit_data *data)
				1668	{
				1669	put_nfs_open_context(data->context);
				1670	nfs_commit_free(data);
				1671	}
				1672	EXPORT_SYMBOL_GPL(nfs_commitdata_release);
				1673
				1674	int nfs_initiate_commit(struct rpc_clnt clnt, struct nfs_commit_data data,
				1675	const struct nfs_rpc_ops *nfs_ops,
				1676	const struct rpc_call_ops *call_ops,
				1677	int how, int flags)
				1678	{
				1679	struct rpc_task *task;
				1680	int priority = flush_task_priority(how);
				1681	struct rpc_message msg = {
				1682	.rpc_argp = &data->args,
				1683	.rpc_resp = &data->res,
				1684	.rpc_cred = data->cred,
				1685	};
				1686	struct rpc_task_setup task_setup_data = {
				1687	.task = &data->task,
				1688	.rpc_client = clnt,
				1689	.rpc_message = &msg,
				1690	.callback_ops = call_ops,
				1691	.callback_data = data,
				1692	.workqueue = nfsiod_workqueue,
				1693	.flags = RPC_TASK_ASYNC \| flags,
				1694	.priority = priority,
				1695	};
				1696	/* Set up the initial task struct. */
				1697	nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
				1698	trace_nfs_initiate_commit(data);
				1699
				1700	dprintk("NFS: initiated commit call\n");
				1701
				1702	task = rpc_run_task(&task_setup_data);
				1703	if (IS_ERR(task))
				1704	return PTR_ERR(task);
				1705	if (how & FLUSH_SYNC)
				1706	rpc_wait_for_completion_task(task);
				1707	rpc_put_task(task);
				1708	return 0;
				1709	}
				1710	EXPORT_SYMBOL_GPL(nfs_initiate_commit);
				1711
				1712	static loff_t nfs_get_lwb(struct list_head *head)
				1713	{
				1714	loff_t lwb = 0;
				1715	struct nfs_page *req;
				1716
				1717	list_for_each_entry(req, head, wb_list)
				1718	if (lwb < (req_offset(req) + req->wb_bytes))
				1719	lwb = req_offset(req) + req->wb_bytes;
				1720
				1721	return lwb;
				1722	}
				1723
				1724	/*
				1725	* Set up the argument/result storage required for the RPC call.
				1726	*/
				1727	void nfs_init_commit(struct nfs_commit_data *data,
				1728	struct list_head *head,
				1729	struct pnfs_layout_segment *lseg,
				1730	struct nfs_commit_info *cinfo)
				1731	{
				1732	struct nfs_page *first = nfs_list_entry(head->next);
				1733	struct nfs_open_context *ctx = nfs_req_openctx(first);
				1734	struct inode *inode = d_inode(ctx->dentry);
				1735
				1736	/* Set up the RPC argument and reply structs
				1737	* NB: take care not to mess about with data->commit et al. */
				1738
				1739	list_splice_init(head, &data->pages);
				1740
				1741	data->inode = inode;
				1742	data->cred = ctx->cred;
				1743	data->lseg = lseg; /* reference transferred */
				1744	/* only set lwb for pnfs commit */
				1745	if (lseg)
				1746	data->lwb = nfs_get_lwb(&data->pages);
				1747	data->mds_ops = &nfs_commit_ops;
				1748	data->completion_ops = cinfo->completion_ops;
				1749	data->dreq = cinfo->dreq;
				1750
				1751	data->args.fh = NFS_FH(data->inode);
				1752	/* Note: we always request a commit of the entire inode */
				1753	data->args.offset = 0;
				1754	data->args.count = 0;
				1755	data->context = get_nfs_open_context(ctx);
				1756	data->res.fattr = &data->fattr;
				1757	data->res.verf = &data->verf;
				1758	nfs_fattr_init(&data->fattr);
				1759	}
				1760	EXPORT_SYMBOL_GPL(nfs_init_commit);
				1761
				1762	void nfs_retry_commit(struct list_head *page_list,
				1763	struct pnfs_layout_segment *lseg,
				1764	struct nfs_commit_info *cinfo,
				1765	u32 ds_commit_idx)
				1766	{
				1767	struct nfs_page *req;
				1768
				1769	while (!list_empty(page_list)) {
				1770	req = nfs_list_entry(page_list->next);
				1771	nfs_list_remove_request(req);
				1772	nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
				1773	if (!cinfo->dreq)
				1774	nfs_clear_page_commit(req->wb_page);
				1775	nfs_unlock_and_release_request(req);
				1776	}
				1777	}
				1778	EXPORT_SYMBOL_GPL(nfs_retry_commit);
				1779
				1780	static void
				1781	nfs_commit_resched_write(struct nfs_commit_info *cinfo,
				1782	struct nfs_page *req)
				1783	{
				1784	__set_page_dirty_nobuffers(req->wb_page);
				1785	}
				1786
				1787	/*
				1788	* Commit dirty pages
				1789	*/
				1790	static int
				1791	nfs_commit_list(struct inode inode, struct list_head head, int how,
				1792	struct nfs_commit_info *cinfo)
				1793	{
				1794	struct nfs_commit_data *data;
				1795
				1796	/* another commit raced with us */
				1797	if (list_empty(head))
				1798	return 0;
				1799
				1800	data = nfs_commitdata_alloc(true);
				1801
				1802	/* Set up the argument struct */
				1803	nfs_init_commit(data, head, NULL, cinfo);
				1804	atomic_inc(&cinfo->mds->rpcs_out);
				1805	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
				1806	data->mds_ops, how, 0);
				1807	}
				1808
				1809	/*
				1810	* COMMIT call returned
				1811	*/
				1812	static void nfs_commit_done(struct rpc_task task, void calldata)
				1813	{
				1814	struct nfs_commit_data *data = calldata;
				1815
				1816	dprintk("NFS: %5u nfs_commit_done (status %d)\n",
				1817	task->tk_pid, task->tk_status);
				1818
				1819	/* Call the NFS version-specific code */
				1820	NFS_PROTO(data->inode)->commit_done(task, data);
				1821	trace_nfs_commit_done(data);
				1822	}
				1823
				1824	static void nfs_commit_release_pages(struct nfs_commit_data *data)
				1825	{
				1826	const struct nfs_writeverf *verf = data->res.verf;
				1827	struct nfs_page *req;
				1828	int status = data->task.tk_status;
				1829	struct nfs_commit_info cinfo;
				1830	struct nfs_server *nfss;
				1831
				1832	while (!list_empty(&data->pages)) {
				1833	req = nfs_list_entry(data->pages.next);
				1834	nfs_list_remove_request(req);
				1835	if (req->wb_page)
				1836	nfs_clear_page_commit(req->wb_page);
				1837
				1838	dprintk("NFS: commit (%s/%llu %d@%lld)",
				1839	nfs_req_openctx(req)->dentry->d_sb->s_id,
				1840	(unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
				1841	req->wb_bytes,
				1842	(long long)req_offset(req));
				1843	if (status < 0) {
				1844	if (req->wb_page) {
				1845	nfs_mapping_set_error(req->wb_page, status);
				1846	nfs_inode_remove_request(req);
				1847	}
				1848	dprintk_cont(", error = %d\n", status);
				1849	goto next;
				1850	}
				1851
				1852	/* Okay, COMMIT succeeded, apparently. Check the verifier
				1853	* returned by the server against all stored verfs. */
				1854	if (verf->committed > NFS_UNSTABLE &&
				1855	!nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) {
				1856	/* We have a match */
				1857	if (req->wb_page)
				1858	nfs_inode_remove_request(req);
				1859	dprintk_cont(" OK\n");
				1860	goto next;
				1861	}
				1862	/* We have a mismatch. Write the page again */
				1863	dprintk_cont(" mismatch\n");
				1864	nfs_mark_request_dirty(req);
				1865	set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
				1866	next:
				1867	nfs_unlock_and_release_request(req);
				1868	/* Latency breaker */
				1869	cond_resched();
				1870	}
				1871	nfss = NFS_SERVER(data->inode);
				1872	if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
				1873	clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
				1874
				1875	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
				1876	nfs_commit_end(cinfo.mds);
				1877	}
				1878
				1879	static void nfs_commit_release(void *calldata)
				1880	{
				1881	struct nfs_commit_data *data = calldata;
				1882
				1883	data->completion_ops->completion(data);
				1884	nfs_commitdata_release(calldata);
				1885	}
				1886
				1887	static const struct rpc_call_ops nfs_commit_ops = {
				1888	.rpc_call_prepare = nfs_commit_prepare,
				1889	.rpc_call_done = nfs_commit_done,
				1890	.rpc_release = nfs_commit_release,
				1891	};
				1892
				1893	static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
				1894	.completion = nfs_commit_release_pages,
				1895	.resched_write = nfs_commit_resched_write,
				1896	};
				1897
				1898	int nfs_generic_commit_list(struct inode inode, struct list_head head,
				1899	int how, struct nfs_commit_info *cinfo)
				1900	{
				1901	int status;
				1902
				1903	status = pnfs_commit_list(inode, head, how, cinfo);
				1904	if (status == PNFS_NOT_ATTEMPTED)
				1905	status = nfs_commit_list(inode, head, how, cinfo);
				1906	return status;
				1907	}
				1908
				1909	static int __nfs_commit_inode(struct inode *inode, int how,
				1910	struct writeback_control *wbc)
				1911	{
				1912	LIST_HEAD(head);
				1913	struct nfs_commit_info cinfo;
				1914	int may_wait = how & FLUSH_SYNC;
				1915	int ret, nscan;
				1916
				1917	how &= ~FLUSH_SYNC;
				1918	nfs_init_cinfo_from_inode(&cinfo, inode);
				1919	nfs_commit_begin(cinfo.mds);
				1920	for (;;) {
				1921	ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
				1922	if (ret <= 0)
				1923	break;
				1924	ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
				1925	if (ret < 0)
				1926	break;
				1927	ret = 0;
				1928	if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
				1929	if (nscan < wbc->nr_to_write)
				1930	wbc->nr_to_write -= nscan;
				1931	else
				1932	wbc->nr_to_write = 0;
				1933	}
				1934	if (nscan < INT_MAX)
				1935	break;
				1936	cond_resched();
				1937	}
				1938	nfs_commit_end(cinfo.mds);
				1939	if (ret \|\| !may_wait)
				1940	return ret;
				1941	return wait_on_commit(cinfo.mds);
				1942	}
				1943
				1944	int nfs_commit_inode(struct inode *inode, int how)
				1945	{
				1946	return __nfs_commit_inode(inode, how, NULL);
				1947	}
				1948	EXPORT_SYMBOL_GPL(nfs_commit_inode);
				1949
				1950	int nfs_write_inode(struct inode inode, struct writeback_control wbc)
				1951	{
				1952	struct nfs_inode *nfsi = NFS_I(inode);
				1953	int flags = FLUSH_SYNC;
				1954	int ret = 0;
				1955
				1956	if (wbc->sync_mode == WB_SYNC_NONE) {
				1957	/* no commits means nothing needs to be done */
				1958	if (!atomic_long_read(&nfsi->commit_info.ncommit))
				1959	goto check_requests_outstanding;
				1960
				1961	/* Don't commit yet if this is a non-blocking flush and there
				1962	* are a lot of outstanding writes for this mapping.
				1963	*/
				1964	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
				1965	goto out_mark_dirty;
				1966
				1967	/* don't wait for the COMMIT response */
				1968	flags = 0;
				1969	}
				1970
				1971	ret = __nfs_commit_inode(inode, flags, wbc);
				1972	if (!ret) {
				1973	if (flags & FLUSH_SYNC)
				1974	return 0;
				1975	} else if (atomic_long_read(&nfsi->commit_info.ncommit))
				1976	goto out_mark_dirty;
				1977
				1978	check_requests_outstanding:
				1979	if (!atomic_read(&nfsi->commit_info.rpcs_out))
				1980	return ret;
				1981	out_mark_dirty:
				1982	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
				1983	return ret;
				1984	}
				1985	EXPORT_SYMBOL_GPL(nfs_write_inode);
				1986
				1987	/*
				1988	* Wrapper for filemap_write_and_wait_range()
				1989	*
				1990	* Needed for pNFS in order to ensure data becomes visible to the
				1991	* client.
				1992	*/
				1993	int nfs_filemap_write_and_wait_range(struct address_space *mapping,
				1994	loff_t lstart, loff_t lend)
				1995	{
				1996	int ret;
				1997
				1998	ret = filemap_write_and_wait_range(mapping, lstart, lend);
				1999	if (ret == 0)
				2000	ret = pnfs_sync_inode(mapping->host, true);
				2001	return ret;
				2002	}
				2003	EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
				2004
				2005	/*
				2006	* flush the inode to disk.
				2007	*/
				2008	int nfs_wb_all(struct inode *inode)
				2009	{
				2010	int ret;
				2011
				2012	trace_nfs_writeback_inode_enter(inode);
				2013
				2014	ret = filemap_write_and_wait(inode->i_mapping);
				2015	if (ret)
				2016	goto out;
				2017	ret = nfs_commit_inode(inode, FLUSH_SYNC);
				2018	if (ret < 0)
				2019	goto out;
				2020	pnfs_sync_inode(inode, true);
				2021	ret = 0;
				2022
				2023	out:
				2024	trace_nfs_writeback_inode_exit(inode, ret);
				2025	return ret;
				2026	}
				2027	EXPORT_SYMBOL_GPL(nfs_wb_all);
				2028
				2029	int nfs_wb_page_cancel(struct inode inode, struct page page)
				2030	{
				2031	struct nfs_page *req;
				2032	int ret = 0;
				2033
				2034	wait_on_page_writeback(page);
				2035
				2036	/* blocking call to cancel all requests and join to a single (head)
				2037	* request */
				2038	req = nfs_lock_and_join_requests(page);
				2039
				2040	if (IS_ERR(req)) {
				2041	ret = PTR_ERR(req);
				2042	} else if (req) {
				2043	/* all requests from this page have been cancelled by
				2044	* nfs_lock_and_join_requests, so just remove the head
				2045	* request from the inode / page_private pointer and
				2046	* release it */
				2047	nfs_inode_remove_request(req);
				2048	nfs_unlock_and_release_request(req);
				2049	}
				2050
				2051	return ret;
				2052	}
				2053
				2054	/*
				2055	* Write back all requests on one page - we do this before reading it.
				2056	*/
				2057	int nfs_wb_page(struct inode inode, struct page page)
				2058	{
				2059	loff_t range_start = page_file_offset(page);
				2060	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
				2061	struct writeback_control wbc = {
				2062	.sync_mode = WB_SYNC_ALL,
				2063	.nr_to_write = 0,
				2064	.range_start = range_start,
				2065	.range_end = range_end,
				2066	};
				2067	int ret;
				2068
				2069	trace_nfs_writeback_page_enter(inode);
				2070
				2071	for (;;) {
				2072	wait_on_page_writeback(page);
				2073	if (clear_page_dirty_for_io(page)) {
				2074	ret = nfs_writepage_locked(page, &wbc);
				2075	if (ret < 0)
				2076	goto out_error;
				2077	continue;
				2078	}
				2079	ret = 0;
				2080	if (!PagePrivate(page))
				2081	break;
				2082	ret = nfs_commit_inode(inode, FLUSH_SYNC);
				2083	if (ret < 0)
				2084	goto out_error;
				2085	}
				2086	out_error:
				2087	trace_nfs_writeback_page_exit(inode, ret);
				2088	return ret;
				2089	}
				2090
				2091	#ifdef CONFIG_MIGRATION
				2092	int nfs_migrate_page(struct address_space mapping, struct page newpage,
				2093	struct page *page, enum migrate_mode mode)
				2094	{
				2095	/*
				2096	* If PagePrivate is set, then the page is currently associated with
				2097	* an in-progress read or write request. Don't try to migrate it.
				2098	*
				2099	* FIXME: we could do this in principle, but we'll need a way to ensure
				2100	* that we can safely release the inode reference while holding
				2101	* the page lock.
				2102	*/
				2103	if (PagePrivate(page))
				2104	return -EBUSY;
				2105
				2106	if (!nfs_fscache_release_page(page, GFP_KERNEL))
				2107	return -EBUSY;
				2108
				2109	return migrate_page(mapping, newpage, page, mode);
				2110	}
				2111	#endif
				2112
				2113	int __init nfs_init_writepagecache(void)
				2114	{
				2115	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
				2116	sizeof(struct nfs_pgio_header),
				2117	0, SLAB_HWCACHE_ALIGN,
				2118	NULL);
				2119	if (nfs_wdata_cachep == NULL)
				2120	return -ENOMEM;
				2121
				2122	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
				2123	nfs_wdata_cachep);
				2124	if (nfs_wdata_mempool == NULL)
				2125	goto out_destroy_write_cache;
				2126
				2127	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
				2128	sizeof(struct nfs_commit_data),
				2129	0, SLAB_HWCACHE_ALIGN,
				2130	NULL);
				2131	if (nfs_cdata_cachep == NULL)
				2132	goto out_destroy_write_mempool;
				2133
				2134	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
				2135	nfs_cdata_cachep);
				2136	if (nfs_commit_mempool == NULL)
				2137	goto out_destroy_commit_cache;
				2138
				2139	/*
				2140	* NFS congestion size, scale with available memory.
				2141	*
				2142	* 64MB: 8192k
				2143	* 128MB: 11585k
				2144	* 256MB: 16384k
				2145	* 512MB: 23170k
				2146	* 1GB: 32768k
				2147	* 2GB: 46340k
				2148	* 4GB: 65536k
				2149	* 8GB: 92681k
				2150	* 16GB: 131072k
				2151	*
				2152	* This allows larger machines to have larger/more transfers.
				2153	* Limit the default to 256M
				2154	*/
				2155	nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
				2156	if (nfs_congestion_kb > 256*1024)
				2157	nfs_congestion_kb = 256*1024;
				2158
				2159	return 0;
				2160
				2161	out_destroy_commit_cache:
				2162	kmem_cache_destroy(nfs_cdata_cachep);
				2163	out_destroy_write_mempool:
				2164	mempool_destroy(nfs_wdata_mempool);
				2165	out_destroy_write_cache:
				2166	kmem_cache_destroy(nfs_wdata_cachep);
				2167	return -ENOMEM;
				2168	}
				2169
				2170	void nfs_destroy_writepagecache(void)
				2171	{
				2172	mempool_destroy(nfs_commit_mempool);
				2173	kmem_cache_destroy(nfs_cdata_cachep);
				2174	mempool_destroy(nfs_wdata_mempool);
				2175	kmem_cache_destroy(nfs_wdata_cachep);
				2176	}
				2177
				2178	static const struct nfs_rw_ops nfs_rw_write_ops = {
				2179	.rw_alloc_header = nfs_writehdr_alloc,
				2180	.rw_free_header = nfs_writehdr_free,
				2181	.rw_done = nfs_writeback_done,
				2182	.rw_result = nfs_writeback_result,
				2183	.rw_initiate = nfs_initiate_write,
				2184	};