| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * linux/fs/nfs/write.c | 
|  | 3 | * | 
|  | 4 | * Write file data over NFS. | 
|  | 5 | * | 
|  | 6 | * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> | 
|  | 7 | */ | 
|  | 8 |  | 
|  | 9 | #include <linux/types.h> | 
|  | 10 | #include <linux/slab.h> | 
|  | 11 | #include <linux/mm.h> | 
|  | 12 | #include <linux/pagemap.h> | 
|  | 13 | #include <linux/file.h> | 
|  | 14 | #include <linux/writeback.h> | 
|  | 15 | #include <linux/swap.h> | 
|  | 16 | #include <linux/migrate.h> | 
|  | 17 |  | 
|  | 18 | #include <linux/sunrpc/clnt.h> | 
|  | 19 | #include <linux/nfs_fs.h> | 
|  | 20 | #include <linux/nfs_mount.h> | 
|  | 21 | #include <linux/nfs_page.h> | 
|  | 22 | #include <linux/backing-dev.h> | 
|  | 23 | #include <linux/export.h> | 
|  | 24 |  | 
|  | 25 | #include <asm/uaccess.h> | 
|  | 26 |  | 
|  | 27 | #include "delegation.h" | 
|  | 28 | #include "internal.h" | 
|  | 29 | #include "iostat.h" | 
|  | 30 | #include "nfs4_fs.h" | 
|  | 31 | #include "fscache.h" | 
|  | 32 | #include "pnfs.h" | 
|  | 33 |  | 
|  | 34 | #define NFSDBG_FACILITY		NFSDBG_PAGECACHE | 
|  | 35 |  | 
|  | 36 | #define MIN_POOL_WRITE		(32) | 
|  | 37 | #define MIN_POOL_COMMIT		(4) | 
|  | 38 |  | 
|  | 39 | /* | 
|  | 40 | * Local function declarations | 
|  | 41 | */ | 
|  | 42 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, | 
|  | 43 | struct inode *inode, int ioflags); | 
|  | 44 | static void nfs_redirty_request(struct nfs_page *req); | 
|  | 45 | static const struct rpc_call_ops nfs_write_partial_ops; | 
|  | 46 | static const struct rpc_call_ops nfs_write_full_ops; | 
|  | 47 | static const struct rpc_call_ops nfs_commit_ops; | 
|  | 48 |  | 
|  | 49 | static struct kmem_cache *nfs_wdata_cachep; | 
|  | 50 | static mempool_t *nfs_wdata_mempool; | 
|  | 51 | static mempool_t *nfs_commit_mempool; | 
|  | 52 |  | 
|  | 53 | struct nfs_write_data *nfs_commitdata_alloc(void) | 
|  | 54 | { | 
|  | 55 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); | 
|  | 56 |  | 
|  | 57 | if (p) { | 
|  | 58 | memset(p, 0, sizeof(*p)); | 
|  | 59 | INIT_LIST_HEAD(&p->pages); | 
|  | 60 | } | 
|  | 61 | return p; | 
|  | 62 | } | 
|  | 63 | EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); | 
|  | 64 |  | 
|  | 65 | void nfs_commit_free(struct nfs_write_data *p) | 
|  | 66 | { | 
|  | 67 | if (p && (p->pagevec != &p->page_array[0])) | 
|  | 68 | kfree(p->pagevec); | 
|  | 69 | mempool_free(p, nfs_commit_mempool); | 
|  | 70 | } | 
|  | 71 | EXPORT_SYMBOL_GPL(nfs_commit_free); | 
|  | 72 |  | 
|  | 73 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | 
|  | 74 | { | 
|  | 75 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); | 
|  | 76 |  | 
|  | 77 | if (p) { | 
|  | 78 | memset(p, 0, sizeof(*p)); | 
|  | 79 | INIT_LIST_HEAD(&p->pages); | 
|  | 80 | p->npages = pagecount; | 
|  | 81 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 
|  | 82 | p->pagevec = p->page_array; | 
|  | 83 | else { | 
|  | 84 | p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); | 
|  | 85 | if (!p->pagevec) { | 
|  | 86 | mempool_free(p, nfs_wdata_mempool); | 
|  | 87 | p = NULL; | 
|  | 88 | } | 
|  | 89 | } | 
|  | 90 | } | 
|  | 91 | return p; | 
|  | 92 | } | 
|  | 93 |  | 
|  | 94 | void nfs_writedata_free(struct nfs_write_data *p) | 
|  | 95 | { | 
|  | 96 | if (p && (p->pagevec != &p->page_array[0])) | 
|  | 97 | kfree(p->pagevec); | 
|  | 98 | mempool_free(p, nfs_wdata_mempool); | 
|  | 99 | } | 
|  | 100 |  | 
|  | 101 | void nfs_writedata_release(struct nfs_write_data *wdata) | 
|  | 102 | { | 
|  | 103 | put_nfs_open_context(wdata->args.context); | 
|  | 104 | nfs_writedata_free(wdata); | 
|  | 105 | } | 
|  | 106 |  | 
|  | 107 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | 
|  | 108 | { | 
|  | 109 | ctx->error = error; | 
|  | 110 | smp_wmb(); | 
|  | 111 | set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 
|  | 112 | } | 
|  | 113 |  | 
|  | 114 | static struct nfs_page *nfs_page_find_request_locked(struct page *page) | 
|  | 115 | { | 
|  | 116 | struct nfs_page *req = NULL; | 
|  | 117 |  | 
|  | 118 | if (PagePrivate(page)) { | 
|  | 119 | req = (struct nfs_page *)page_private(page); | 
|  | 120 | if (req != NULL) | 
|  | 121 | kref_get(&req->wb_kref); | 
|  | 122 | } | 
|  | 123 | return req; | 
|  | 124 | } | 
|  | 125 |  | 
|  | 126 | static struct nfs_page *nfs_page_find_request(struct page *page) | 
|  | 127 | { | 
|  | 128 | struct inode *inode = page->mapping->host; | 
|  | 129 | struct nfs_page *req = NULL; | 
|  | 130 |  | 
|  | 131 | spin_lock(&inode->i_lock); | 
|  | 132 | req = nfs_page_find_request_locked(page); | 
|  | 133 | spin_unlock(&inode->i_lock); | 
|  | 134 | return req; | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | /* Adjust the file length if we're writing beyond the end */ | 
|  | 138 | static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) | 
|  | 139 | { | 
|  | 140 | struct inode *inode = page->mapping->host; | 
|  | 141 | loff_t end, i_size; | 
|  | 142 | pgoff_t end_index; | 
|  | 143 |  | 
|  | 144 | spin_lock(&inode->i_lock); | 
|  | 145 | i_size = i_size_read(inode); | 
|  | 146 | end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | 
|  | 147 | if (i_size > 0 && page->index < end_index) | 
|  | 148 | goto out; | 
|  | 149 | end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); | 
|  | 150 | if (i_size >= end) | 
|  | 151 | goto out; | 
|  | 152 | i_size_write(inode, end); | 
|  | 153 | nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); | 
|  | 154 | out: | 
|  | 155 | spin_unlock(&inode->i_lock); | 
|  | 156 | } | 
|  | 157 |  | 
|  | 158 | /* A writeback failed: mark the page as bad, and invalidate the page cache */ | 
|  | 159 | static void nfs_set_pageerror(struct page *page) | 
|  | 160 | { | 
|  | 161 | SetPageError(page); | 
|  | 162 | nfs_zap_mapping(page->mapping->host, page->mapping); | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | /* We can set the PG_uptodate flag if we see that a write request | 
|  | 166 | * covers the full page. | 
|  | 167 | */ | 
|  | 168 | static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) | 
|  | 169 | { | 
|  | 170 | if (PageUptodate(page)) | 
|  | 171 | return; | 
|  | 172 | if (base != 0) | 
|  | 173 | return; | 
|  | 174 | if (count != nfs_page_length(page)) | 
|  | 175 | return; | 
|  | 176 | SetPageUptodate(page); | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | static int wb_priority(struct writeback_control *wbc) | 
|  | 180 | { | 
|  | 181 | if (wbc->for_reclaim) | 
|  | 182 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 
|  | 183 | if (wbc->for_kupdate || wbc->for_background) | 
|  | 184 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; | 
|  | 185 | return FLUSH_COND_STABLE; | 
|  | 186 | } | 
|  | 187 |  | 
|  | 188 | /* | 
|  | 189 | * NFS congestion control | 
|  | 190 | */ | 
|  | 191 |  | 
|  | 192 | int nfs_congestion_kb; | 
|  | 193 |  | 
|  | 194 | #define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10)) | 
|  | 195 | #define NFS_CONGESTION_OFF_THRESH	\ | 
|  | 196 | (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) | 
|  | 197 |  | 
|  | 198 | static int nfs_set_page_writeback(struct page *page) | 
|  | 199 | { | 
|  | 200 | int ret = test_set_page_writeback(page); | 
|  | 201 |  | 
|  | 202 | if (!ret) { | 
|  | 203 | struct inode *inode = page->mapping->host; | 
|  | 204 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 205 |  | 
|  | 206 | page_cache_get(page); | 
|  | 207 | if (atomic_long_inc_return(&nfss->writeback) > | 
|  | 208 | NFS_CONGESTION_ON_THRESH) { | 
|  | 209 | set_bdi_congested(&nfss->backing_dev_info, | 
|  | 210 | BLK_RW_ASYNC); | 
|  | 211 | } | 
|  | 212 | } | 
|  | 213 | return ret; | 
|  | 214 | } | 
|  | 215 |  | 
|  | 216 | static void nfs_end_page_writeback(struct page *page) | 
|  | 217 | { | 
|  | 218 | struct inode *inode = page->mapping->host; | 
|  | 219 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 220 |  | 
|  | 221 | end_page_writeback(page); | 
|  | 222 | page_cache_release(page); | 
|  | 223 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | 
|  | 224 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 
|  | 225 | } | 
|  | 226 |  | 
|  | 227 | static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) | 
|  | 228 | { | 
|  | 229 | struct inode *inode = page->mapping->host; | 
|  | 230 | struct nfs_page *req; | 
|  | 231 | int ret; | 
|  | 232 |  | 
|  | 233 | spin_lock(&inode->i_lock); | 
|  | 234 | for (;;) { | 
|  | 235 | req = nfs_page_find_request_locked(page); | 
|  | 236 | if (req == NULL) | 
|  | 237 | break; | 
|  | 238 | if (nfs_lock_request_dontget(req)) | 
|  | 239 | break; | 
|  | 240 | /* Note: If we hold the page lock, as is the case in nfs_writepage, | 
|  | 241 | *	 then the call to nfs_lock_request_dontget() will always | 
|  | 242 | *	 succeed provided that someone hasn't already marked the | 
|  | 243 | *	 request as dirty (in which case we don't care). | 
|  | 244 | */ | 
|  | 245 | spin_unlock(&inode->i_lock); | 
|  | 246 | if (!nonblock) | 
|  | 247 | ret = nfs_wait_on_request(req); | 
|  | 248 | else | 
|  | 249 | ret = -EAGAIN; | 
|  | 250 | nfs_release_request(req); | 
|  | 251 | if (ret != 0) | 
|  | 252 | return ERR_PTR(ret); | 
|  | 253 | spin_lock(&inode->i_lock); | 
|  | 254 | } | 
|  | 255 | spin_unlock(&inode->i_lock); | 
|  | 256 | return req; | 
|  | 257 | } | 
|  | 258 |  | 
|  | 259 | /* | 
|  | 260 | * Find an associated nfs write request, and prepare to flush it out | 
|  | 261 | * May return an error if the user signalled nfs_wait_on_request(). | 
|  | 262 | */ | 
|  | 263 | static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, | 
|  | 264 | struct page *page, bool nonblock) | 
|  | 265 | { | 
|  | 266 | struct nfs_page *req; | 
|  | 267 | int ret = 0; | 
|  | 268 |  | 
|  | 269 | req = nfs_find_and_lock_request(page, nonblock); | 
|  | 270 | if (!req) | 
|  | 271 | goto out; | 
|  | 272 | ret = PTR_ERR(req); | 
|  | 273 | if (IS_ERR(req)) | 
|  | 274 | goto out; | 
|  | 275 |  | 
|  | 276 | ret = nfs_set_page_writeback(page); | 
|  | 277 | BUG_ON(ret != 0); | 
|  | 278 | BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); | 
|  | 279 |  | 
|  | 280 | if (!nfs_pageio_add_request(pgio, req)) { | 
|  | 281 | nfs_redirty_request(req); | 
|  | 282 | ret = pgio->pg_error; | 
|  | 283 | } | 
|  | 284 | out: | 
|  | 285 | return ret; | 
|  | 286 | } | 
|  | 287 |  | 
|  | 288 | static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) | 
|  | 289 | { | 
|  | 290 | struct inode *inode = page->mapping->host; | 
|  | 291 | int ret; | 
|  | 292 |  | 
|  | 293 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); | 
|  | 294 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); | 
|  | 295 |  | 
|  | 296 | nfs_pageio_cond_complete(pgio, page->index); | 
|  | 297 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); | 
|  | 298 | if (ret == -EAGAIN) { | 
|  | 299 | redirty_page_for_writepage(wbc, page); | 
|  | 300 | ret = 0; | 
|  | 301 | } | 
|  | 302 | return ret; | 
|  | 303 | } | 
|  | 304 |  | 
|  | 305 | /* | 
|  | 306 | * Write an mmapped page to the server. | 
|  | 307 | */ | 
|  | 308 | static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) | 
|  | 309 | { | 
|  | 310 | struct nfs_pageio_descriptor pgio; | 
|  | 311 | int err; | 
|  | 312 |  | 
|  | 313 | nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); | 
|  | 314 | err = nfs_do_writepage(page, wbc, &pgio); | 
|  | 315 | nfs_pageio_complete(&pgio); | 
|  | 316 | if (err < 0) | 
|  | 317 | return err; | 
|  | 318 | if (pgio.pg_error < 0) | 
|  | 319 | return pgio.pg_error; | 
|  | 320 | return 0; | 
|  | 321 | } | 
|  | 322 |  | 
|  | 323 | int nfs_writepage(struct page *page, struct writeback_control *wbc) | 
|  | 324 | { | 
|  | 325 | int ret; | 
|  | 326 |  | 
|  | 327 | ret = nfs_writepage_locked(page, wbc); | 
|  | 328 | unlock_page(page); | 
|  | 329 | return ret; | 
|  | 330 | } | 
|  | 331 |  | 
|  | 332 | static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) | 
|  | 333 | { | 
|  | 334 | int ret; | 
|  | 335 |  | 
|  | 336 | ret = nfs_do_writepage(page, wbc, data); | 
|  | 337 | unlock_page(page); | 
|  | 338 | return ret; | 
|  | 339 | } | 
|  | 340 |  | 
|  | 341 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | 
|  | 342 | { | 
|  | 343 | struct inode *inode = mapping->host; | 
|  | 344 | unsigned long *bitlock = &NFS_I(inode)->flags; | 
|  | 345 | struct nfs_pageio_descriptor pgio; | 
|  | 346 | int err; | 
|  | 347 |  | 
|  | 348 | /* Stop dirtying of new pages while we sync */ | 
|  | 349 | err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, | 
|  | 350 | nfs_wait_bit_killable, TASK_KILLABLE); | 
|  | 351 | if (err) | 
|  | 352 | goto out_err; | 
|  | 353 |  | 
|  | 354 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); | 
|  | 355 |  | 
|  | 356 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); | 
|  | 357 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); | 
|  | 358 | nfs_pageio_complete(&pgio); | 
|  | 359 |  | 
|  | 360 | clear_bit_unlock(NFS_INO_FLUSHING, bitlock); | 
|  | 361 | smp_mb__after_clear_bit(); | 
|  | 362 | wake_up_bit(bitlock, NFS_INO_FLUSHING); | 
|  | 363 |  | 
|  | 364 | if (err < 0) | 
|  | 365 | goto out_err; | 
|  | 366 | err = pgio.pg_error; | 
|  | 367 | if (err < 0) | 
|  | 368 | goto out_err; | 
|  | 369 | return 0; | 
|  | 370 | out_err: | 
|  | 371 | return err; | 
|  | 372 | } | 
|  | 373 |  | 
|  | 374 | /* | 
|  | 375 | * Insert a write request into an inode | 
|  | 376 | */ | 
|  | 377 | static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | 
|  | 378 | { | 
|  | 379 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 380 |  | 
|  | 381 | /* Lock the request! */ | 
|  | 382 | nfs_lock_request_dontget(req); | 
|  | 383 |  | 
|  | 384 | spin_lock(&inode->i_lock); | 
|  | 385 | if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) | 
|  | 386 | inode->i_version++; | 
|  | 387 | set_bit(PG_MAPPED, &req->wb_flags); | 
|  | 388 | SetPagePrivate(req->wb_page); | 
|  | 389 | set_page_private(req->wb_page, (unsigned long)req); | 
|  | 390 | nfsi->npages++; | 
|  | 391 | kref_get(&req->wb_kref); | 
|  | 392 | spin_unlock(&inode->i_lock); | 
|  | 393 | } | 
|  | 394 |  | 
|  | 395 | /* | 
|  | 396 | * Remove a write request from an inode | 
|  | 397 | */ | 
|  | 398 | static void nfs_inode_remove_request(struct nfs_page *req) | 
|  | 399 | { | 
|  | 400 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 401 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 402 |  | 
|  | 403 | BUG_ON (!NFS_WBACK_BUSY(req)); | 
|  | 404 |  | 
|  | 405 | spin_lock(&inode->i_lock); | 
|  | 406 | set_page_private(req->wb_page, 0); | 
|  | 407 | ClearPagePrivate(req->wb_page); | 
|  | 408 | clear_bit(PG_MAPPED, &req->wb_flags); | 
|  | 409 | nfsi->npages--; | 
|  | 410 | spin_unlock(&inode->i_lock); | 
|  | 411 | nfs_release_request(req); | 
|  | 412 | } | 
|  | 413 |  | 
|  | 414 | static void | 
|  | 415 | nfs_mark_request_dirty(struct nfs_page *req) | 
|  | 416 | { | 
|  | 417 | __set_page_dirty_nobuffers(req->wb_page); | 
|  | 418 | } | 
|  | 419 |  | 
|  | 420 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
|  | 421 | /** | 
|  | 422 | * nfs_request_add_commit_list - add request to a commit list | 
|  | 423 | * @req: pointer to a struct nfs_page | 
|  | 424 | * @head: commit list head | 
|  | 425 | * | 
|  | 426 | * This sets the PG_CLEAN bit, updates the inode global count of | 
|  | 427 | * number of outstanding requests requiring a commit as well as | 
|  | 428 | * the MM page stats. | 
|  | 429 | * | 
|  | 430 | * The caller must _not_ hold the inode->i_lock, but must be | 
|  | 431 | * holding the nfs_page lock. | 
|  | 432 | */ | 
|  | 433 | void | 
|  | 434 | nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) | 
|  | 435 | { | 
|  | 436 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 437 |  | 
|  | 438 | set_bit(PG_CLEAN, &(req)->wb_flags); | 
|  | 439 | spin_lock(&inode->i_lock); | 
|  | 440 | nfs_list_add_request(req, head); | 
|  | 441 | NFS_I(inode)->ncommit++; | 
|  | 442 | spin_unlock(&inode->i_lock); | 
|  | 443 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 
|  | 444 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 
|  | 445 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 
|  | 446 | } | 
|  | 447 | EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); | 
|  | 448 |  | 
|  | 449 | /** | 
|  | 450 | * nfs_request_remove_commit_list - Remove request from a commit list | 
|  | 451 | * @req: pointer to a nfs_page | 
|  | 452 | * | 
|  | 453 | * This clears the PG_CLEAN bit, and updates the inode global count of | 
|  | 454 | * number of outstanding requests requiring a commit | 
|  | 455 | * It does not update the MM page stats. | 
|  | 456 | * | 
|  | 457 | * The caller _must_ hold the inode->i_lock and the nfs_page lock. | 
|  | 458 | */ | 
|  | 459 | void | 
|  | 460 | nfs_request_remove_commit_list(struct nfs_page *req) | 
|  | 461 | { | 
|  | 462 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 463 |  | 
|  | 464 | if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) | 
|  | 465 | return; | 
|  | 466 | nfs_list_remove_request(req); | 
|  | 467 | NFS_I(inode)->ncommit--; | 
|  | 468 | } | 
|  | 469 | EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); | 
|  | 470 |  | 
|  | 471 |  | 
|  | 472 | /* | 
|  | 473 | * Add a request to the inode's commit list. | 
|  | 474 | */ | 
|  | 475 | static void | 
|  | 476 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | 
|  | 477 | { | 
|  | 478 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 479 |  | 
|  | 480 | if (pnfs_mark_request_commit(req, lseg)) | 
|  | 481 | return; | 
|  | 482 | nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); | 
|  | 483 | } | 
|  | 484 |  | 
|  | 485 | static void | 
|  | 486 | nfs_clear_page_commit(struct page *page) | 
|  | 487 | { | 
|  | 488 | dec_zone_page_state(page, NR_UNSTABLE_NFS); | 
|  | 489 | dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 
|  | 490 | } | 
|  | 491 |  | 
|  | 492 | static void | 
|  | 493 | nfs_clear_request_commit(struct nfs_page *req) | 
|  | 494 | { | 
|  | 495 | if (test_bit(PG_CLEAN, &req->wb_flags)) { | 
|  | 496 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 497 |  | 
|  | 498 | if (!pnfs_clear_request_commit(req)) { | 
|  | 499 | spin_lock(&inode->i_lock); | 
|  | 500 | nfs_request_remove_commit_list(req); | 
|  | 501 | spin_unlock(&inode->i_lock); | 
|  | 502 | } | 
|  | 503 | nfs_clear_page_commit(req->wb_page); | 
|  | 504 | } | 
|  | 505 | } | 
|  | 506 |  | 
|  | 507 | static inline | 
|  | 508 | int nfs_write_need_commit(struct nfs_write_data *data) | 
|  | 509 | { | 
|  | 510 | if (data->verf.committed == NFS_DATA_SYNC) | 
|  | 511 | return data->lseg == NULL; | 
|  | 512 | else | 
|  | 513 | return data->verf.committed != NFS_FILE_SYNC; | 
|  | 514 | } | 
|  | 515 |  | 
|  | 516 | static inline | 
|  | 517 | int nfs_reschedule_unstable_write(struct nfs_page *req, | 
|  | 518 | struct nfs_write_data *data) | 
|  | 519 | { | 
|  | 520 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 
|  | 521 | nfs_mark_request_commit(req, data->lseg); | 
|  | 522 | return 1; | 
|  | 523 | } | 
|  | 524 | if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { | 
|  | 525 | nfs_mark_request_dirty(req); | 
|  | 526 | return 1; | 
|  | 527 | } | 
|  | 528 | return 0; | 
|  | 529 | } | 
|  | 530 | #else | 
|  | 531 | static void | 
|  | 532 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | 
|  | 533 | { | 
|  | 534 | } | 
|  | 535 |  | 
|  | 536 | static void | 
|  | 537 | nfs_clear_request_commit(struct nfs_page *req) | 
|  | 538 | { | 
|  | 539 | } | 
|  | 540 |  | 
|  | 541 | static inline | 
|  | 542 | int nfs_write_need_commit(struct nfs_write_data *data) | 
|  | 543 | { | 
|  | 544 | return 0; | 
|  | 545 | } | 
|  | 546 |  | 
|  | 547 | static inline | 
|  | 548 | int nfs_reschedule_unstable_write(struct nfs_page *req, | 
|  | 549 | struct nfs_write_data *data) | 
|  | 550 | { | 
|  | 551 | return 0; | 
|  | 552 | } | 
|  | 553 | #endif | 
|  | 554 |  | 
|  | 555 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
|  | 556 | static int | 
|  | 557 | nfs_need_commit(struct nfs_inode *nfsi) | 
|  | 558 | { | 
|  | 559 | return nfsi->ncommit > 0; | 
|  | 560 | } | 
|  | 561 |  | 
|  | 562 | /* i_lock held by caller */ | 
|  | 563 | static int | 
|  | 564 | nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, | 
|  | 565 | spinlock_t *lock) | 
|  | 566 | { | 
|  | 567 | struct nfs_page *req, *tmp; | 
|  | 568 | int ret = 0; | 
|  | 569 |  | 
|  | 570 | list_for_each_entry_safe(req, tmp, src, wb_list) { | 
|  | 571 | if (!nfs_lock_request(req)) | 
|  | 572 | continue; | 
|  | 573 | if (cond_resched_lock(lock)) | 
|  | 574 | list_safe_reset_next(req, tmp, wb_list); | 
|  | 575 | nfs_request_remove_commit_list(req); | 
|  | 576 | nfs_list_add_request(req, dst); | 
|  | 577 | ret++; | 
|  | 578 | if (ret == max) | 
|  | 579 | break; | 
|  | 580 | } | 
|  | 581 | return ret; | 
|  | 582 | } | 
|  | 583 |  | 
|  | 584 | /* | 
|  | 585 | * nfs_scan_commit - Scan an inode for commit requests | 
|  | 586 | * @inode: NFS inode to scan | 
|  | 587 | * @dst: destination list | 
|  | 588 | * | 
|  | 589 | * Moves requests from the inode's 'commit' request list. | 
|  | 590 | * The requests are *not* checked to ensure that they form a contiguous set. | 
|  | 591 | */ | 
|  | 592 | static int | 
|  | 593 | nfs_scan_commit(struct inode *inode, struct list_head *dst) | 
|  | 594 | { | 
|  | 595 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 596 | int ret = 0; | 
|  | 597 |  | 
|  | 598 | spin_lock(&inode->i_lock); | 
|  | 599 | if (nfsi->ncommit > 0) { | 
|  | 600 | const int max = INT_MAX; | 
|  | 601 |  | 
|  | 602 | ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, | 
|  | 603 | &inode->i_lock); | 
|  | 604 | ret += pnfs_scan_commit_lists(inode, max - ret, | 
|  | 605 | &inode->i_lock); | 
|  | 606 | } | 
|  | 607 | spin_unlock(&inode->i_lock); | 
|  | 608 | return ret; | 
|  | 609 | } | 
|  | 610 |  | 
|  | 611 | #else | 
|  | 612 | static inline int nfs_need_commit(struct nfs_inode *nfsi) | 
|  | 613 | { | 
|  | 614 | return 0; | 
|  | 615 | } | 
|  | 616 |  | 
|  | 617 | static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) | 
|  | 618 | { | 
|  | 619 | return 0; | 
|  | 620 | } | 
|  | 621 | #endif | 
|  | 622 |  | 
|  | 623 | /* | 
|  | 624 | * Search for an existing write request, and attempt to update | 
|  | 625 | * it to reflect a new dirty region on a given page. | 
|  | 626 | * | 
|  | 627 | * If the attempt fails, then the existing request is flushed out | 
|  | 628 | * to disk. | 
|  | 629 | */ | 
|  | 630 | static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | 
|  | 631 | struct page *page, | 
|  | 632 | unsigned int offset, | 
|  | 633 | unsigned int bytes) | 
|  | 634 | { | 
|  | 635 | struct nfs_page *req; | 
|  | 636 | unsigned int rqend; | 
|  | 637 | unsigned int end; | 
|  | 638 | int error; | 
|  | 639 |  | 
|  | 640 | if (!PagePrivate(page)) | 
|  | 641 | return NULL; | 
|  | 642 |  | 
|  | 643 | end = offset + bytes; | 
|  | 644 | spin_lock(&inode->i_lock); | 
|  | 645 |  | 
|  | 646 | for (;;) { | 
|  | 647 | req = nfs_page_find_request_locked(page); | 
|  | 648 | if (req == NULL) | 
|  | 649 | goto out_unlock; | 
|  | 650 |  | 
|  | 651 | rqend = req->wb_offset + req->wb_bytes; | 
|  | 652 | /* | 
|  | 653 | * Tell the caller to flush out the request if | 
|  | 654 | * the offsets are non-contiguous. | 
|  | 655 | * Note: nfs_flush_incompatible() will already | 
|  | 656 | * have flushed out requests having wrong owners. | 
|  | 657 | */ | 
|  | 658 | if (offset > rqend | 
|  | 659 | || end < req->wb_offset) | 
|  | 660 | goto out_flushme; | 
|  | 661 |  | 
|  | 662 | if (nfs_lock_request_dontget(req)) | 
|  | 663 | break; | 
|  | 664 |  | 
|  | 665 | /* The request is locked, so wait and then retry */ | 
|  | 666 | spin_unlock(&inode->i_lock); | 
|  | 667 | error = nfs_wait_on_request(req); | 
|  | 668 | nfs_release_request(req); | 
|  | 669 | if (error != 0) | 
|  | 670 | goto out_err; | 
|  | 671 | spin_lock(&inode->i_lock); | 
|  | 672 | } | 
|  | 673 |  | 
|  | 674 | /* Okay, the request matches. Update the region */ | 
|  | 675 | if (offset < req->wb_offset) { | 
|  | 676 | req->wb_offset = offset; | 
|  | 677 | req->wb_pgbase = offset; | 
|  | 678 | } | 
|  | 679 | if (end > rqend) | 
|  | 680 | req->wb_bytes = end - req->wb_offset; | 
|  | 681 | else | 
|  | 682 | req->wb_bytes = rqend - req->wb_offset; | 
|  | 683 | out_unlock: | 
|  | 684 | spin_unlock(&inode->i_lock); | 
|  | 685 | if (req) | 
|  | 686 | nfs_clear_request_commit(req); | 
|  | 687 | return req; | 
|  | 688 | out_flushme: | 
|  | 689 | spin_unlock(&inode->i_lock); | 
|  | 690 | nfs_release_request(req); | 
|  | 691 | error = nfs_wb_page(inode, page); | 
|  | 692 | out_err: | 
|  | 693 | return ERR_PTR(error); | 
|  | 694 | } | 
|  | 695 |  | 
|  | 696 | /* | 
|  | 697 | * Try to update an existing write request, or create one if there is none. | 
|  | 698 | * | 
|  | 699 | * Note: Should always be called with the Page Lock held to prevent races | 
|  | 700 | * if we have to add a new request. Also assumes that the caller has | 
|  | 701 | * already called nfs_flush_incompatible() if necessary. | 
|  | 702 | */ | 
|  | 703 | static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | 
|  | 704 | struct page *page, unsigned int offset, unsigned int bytes) | 
|  | 705 | { | 
|  | 706 | struct inode *inode = page->mapping->host; | 
|  | 707 | struct nfs_page	*req; | 
|  | 708 |  | 
|  | 709 | req = nfs_try_to_update_request(inode, page, offset, bytes); | 
|  | 710 | if (req != NULL) | 
|  | 711 | goto out; | 
|  | 712 | req = nfs_create_request(ctx, inode, page, offset, bytes); | 
|  | 713 | if (IS_ERR(req)) | 
|  | 714 | goto out; | 
|  | 715 | nfs_inode_add_request(inode, req); | 
|  | 716 | out: | 
|  | 717 | return req; | 
|  | 718 | } | 
|  | 719 |  | 
|  | 720 | static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | 
|  | 721 | unsigned int offset, unsigned int count) | 
|  | 722 | { | 
|  | 723 | struct nfs_page	*req; | 
|  | 724 |  | 
|  | 725 | req = nfs_setup_write_request(ctx, page, offset, count); | 
|  | 726 | if (IS_ERR(req)) | 
|  | 727 | return PTR_ERR(req); | 
|  | 728 | /* Update file length */ | 
|  | 729 | nfs_grow_file(page, offset, count); | 
|  | 730 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 
|  | 731 | nfs_mark_request_dirty(req); | 
|  | 732 | nfs_unlock_request(req); | 
|  | 733 | return 0; | 
|  | 734 | } | 
|  | 735 |  | 
|  | 736 | int nfs_flush_incompatible(struct file *file, struct page *page) | 
|  | 737 | { | 
|  | 738 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 
|  | 739 | struct nfs_page	*req; | 
|  | 740 | int do_flush, status; | 
|  | 741 | /* | 
|  | 742 | * Look for a request corresponding to this page. If there | 
|  | 743 | * is one, and it belongs to another file, we flush it out | 
|  | 744 | * before we try to copy anything into the page. Do this | 
|  | 745 | * due to the lack of an ACCESS-type call in NFSv2. | 
|  | 746 | * Also do the same if we find a request from an existing | 
|  | 747 | * dropped page. | 
|  | 748 | */ | 
|  | 749 | do { | 
|  | 750 | req = nfs_page_find_request(page); | 
|  | 751 | if (req == NULL) | 
|  | 752 | return 0; | 
|  | 753 | do_flush = req->wb_page != page || req->wb_context != ctx || | 
|  | 754 | req->wb_lock_context->lockowner != current->files || | 
|  | 755 | req->wb_lock_context->pid != current->tgid; | 
|  | 756 | nfs_release_request(req); | 
|  | 757 | if (!do_flush) | 
|  | 758 | return 0; | 
|  | 759 | status = nfs_wb_page(page->mapping->host, page); | 
|  | 760 | } while (status == 0); | 
|  | 761 | return status; | 
|  | 762 | } | 
|  | 763 |  | 
|  | 764 | /* | 
|  | 765 | * If the page cache is marked as unsafe or invalid, then we can't rely on | 
|  | 766 | * the PageUptodate() flag. In this case, we will need to turn off | 
|  | 767 | * write optimisations that depend on the page contents being correct. | 
|  | 768 | */ | 
|  | 769 | static int nfs_write_pageuptodate(struct page *page, struct inode *inode) | 
|  | 770 | { | 
|  | 771 | return PageUptodate(page) && | 
|  | 772 | !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); | 
|  | 773 | } | 
|  | 774 |  | 
|  | 775 | /* | 
|  | 776 | * Update and possibly write a cached page of an NFS file. | 
|  | 777 | * | 
|  | 778 | * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad | 
|  | 779 | * things with a page scheduled for an RPC call (e.g. invalidate it). | 
|  | 780 | */ | 
|  | 781 | int nfs_updatepage(struct file *file, struct page *page, | 
|  | 782 | unsigned int offset, unsigned int count) | 
|  | 783 | { | 
|  | 784 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 
|  | 785 | struct inode	*inode = page->mapping->host; | 
|  | 786 | int		status = 0; | 
|  | 787 |  | 
|  | 788 | nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); | 
|  | 789 |  | 
|  | 790 | dprintk("NFS:       nfs_updatepage(%s/%s %d@%lld)\n", | 
|  | 791 | file->f_path.dentry->d_parent->d_name.name, | 
|  | 792 | file->f_path.dentry->d_name.name, count, | 
|  | 793 | (long long)(page_offset(page) + offset)); | 
|  | 794 |  | 
|  | 795 | /* If we're not using byte range locks, and we know the page | 
|  | 796 | * is up to date, it may be more efficient to extend the write | 
|  | 797 | * to cover the entire page in order to avoid fragmentation | 
|  | 798 | * inefficiencies. | 
|  | 799 | */ | 
|  | 800 | if (nfs_write_pageuptodate(page, inode) && | 
|  | 801 | inode->i_flock == NULL && | 
|  | 802 | !(file->f_flags & O_DSYNC)) { | 
|  | 803 | count = max(count + offset, nfs_page_length(page)); | 
|  | 804 | offset = 0; | 
|  | 805 | } | 
|  | 806 |  | 
|  | 807 | status = nfs_writepage_setup(ctx, page, offset, count); | 
|  | 808 | if (status < 0) | 
|  | 809 | nfs_set_pageerror(page); | 
|  | 810 | else | 
|  | 811 | __set_page_dirty_nobuffers(page); | 
|  | 812 |  | 
|  | 813 | dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n", | 
|  | 814 | status, (long long)i_size_read(inode)); | 
|  | 815 | return status; | 
|  | 816 | } | 
|  | 817 |  | 
|  | 818 | static void nfs_writepage_release(struct nfs_page *req, | 
|  | 819 | struct nfs_write_data *data) | 
|  | 820 | { | 
|  | 821 | struct page *page = req->wb_page; | 
|  | 822 |  | 
|  | 823 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) | 
|  | 824 | nfs_inode_remove_request(req); | 
|  | 825 | nfs_unlock_request(req); | 
|  | 826 | nfs_end_page_writeback(page); | 
|  | 827 | } | 
|  | 828 |  | 
|  | 829 | static int flush_task_priority(int how) | 
|  | 830 | { | 
|  | 831 | switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { | 
|  | 832 | case FLUSH_HIGHPRI: | 
|  | 833 | return RPC_PRIORITY_HIGH; | 
|  | 834 | case FLUSH_LOWPRI: | 
|  | 835 | return RPC_PRIORITY_LOW; | 
|  | 836 | } | 
|  | 837 | return RPC_PRIORITY_NORMAL; | 
|  | 838 | } | 
|  | 839 |  | 
|  | 840 | int nfs_initiate_write(struct nfs_write_data *data, | 
|  | 841 | struct rpc_clnt *clnt, | 
|  | 842 | const struct rpc_call_ops *call_ops, | 
|  | 843 | int how) | 
|  | 844 | { | 
|  | 845 | struct inode *inode = data->inode; | 
|  | 846 | int priority = flush_task_priority(how); | 
|  | 847 | struct rpc_task *task; | 
|  | 848 | struct rpc_message msg = { | 
|  | 849 | .rpc_argp = &data->args, | 
|  | 850 | .rpc_resp = &data->res, | 
|  | 851 | .rpc_cred = data->cred, | 
|  | 852 | }; | 
|  | 853 | struct rpc_task_setup task_setup_data = { | 
|  | 854 | .rpc_client = clnt, | 
|  | 855 | .task = &data->task, | 
|  | 856 | .rpc_message = &msg, | 
|  | 857 | .callback_ops = call_ops, | 
|  | 858 | .callback_data = data, | 
|  | 859 | .workqueue = nfsiod_workqueue, | 
|  | 860 | .flags = RPC_TASK_ASYNC, | 
|  | 861 | .priority = priority, | 
|  | 862 | }; | 
|  | 863 | int ret = 0; | 
|  | 864 |  | 
|  | 865 | /* Set up the initial task struct.  */ | 
|  | 866 | NFS_PROTO(inode)->write_setup(data, &msg); | 
|  | 867 |  | 
|  | 868 | dprintk("NFS: %5u initiated write call " | 
|  | 869 | "(req %s/%lld, %u bytes @ offset %llu)\n", | 
|  | 870 | data->task.tk_pid, | 
|  | 871 | inode->i_sb->s_id, | 
|  | 872 | (long long)NFS_FILEID(inode), | 
|  | 873 | data->args.count, | 
|  | 874 | (unsigned long long)data->args.offset); | 
|  | 875 |  | 
|  | 876 | task = rpc_run_task(&task_setup_data); | 
|  | 877 | if (IS_ERR(task)) { | 
|  | 878 | ret = PTR_ERR(task); | 
|  | 879 | goto out; | 
|  | 880 | } | 
|  | 881 | if (how & FLUSH_SYNC) { | 
|  | 882 | ret = rpc_wait_for_completion_task(task); | 
|  | 883 | if (ret == 0) | 
|  | 884 | ret = task->tk_status; | 
|  | 885 | } | 
|  | 886 | rpc_put_task(task); | 
|  | 887 | out: | 
|  | 888 | return ret; | 
|  | 889 | } | 
|  | 890 | EXPORT_SYMBOL_GPL(nfs_initiate_write); | 
|  | 891 |  | 
|  | 892 | /* | 
|  | 893 | * Set up the argument/result storage required for the RPC call. | 
|  | 894 | */ | 
|  | 895 | static void nfs_write_rpcsetup(struct nfs_page *req, | 
|  | 896 | struct nfs_write_data *data, | 
|  | 897 | unsigned int count, unsigned int offset, | 
|  | 898 | int how) | 
|  | 899 | { | 
|  | 900 | struct inode *inode = req->wb_context->dentry->d_inode; | 
|  | 901 |  | 
|  | 902 | /* Set up the RPC argument and reply structs | 
|  | 903 | * NB: take care not to mess about with data->commit et al. */ | 
|  | 904 |  | 
|  | 905 | data->req = req; | 
|  | 906 | data->inode = inode = req->wb_context->dentry->d_inode; | 
|  | 907 | data->cred = req->wb_context->cred; | 
|  | 908 |  | 
|  | 909 | data->args.fh     = NFS_FH(inode); | 
|  | 910 | data->args.offset = req_offset(req) + offset; | 
|  | 911 | /* pnfs_set_layoutcommit needs this */ | 
|  | 912 | data->mds_offset = data->args.offset; | 
|  | 913 | data->args.pgbase = req->wb_pgbase + offset; | 
|  | 914 | data->args.pages  = data->pagevec; | 
|  | 915 | data->args.count  = count; | 
|  | 916 | data->args.context = get_nfs_open_context(req->wb_context); | 
|  | 917 | data->args.lock_context = req->wb_lock_context; | 
|  | 918 | data->args.stable  = NFS_UNSTABLE; | 
|  | 919 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | 
|  | 920 | case 0: | 
|  | 921 | break; | 
|  | 922 | case FLUSH_COND_STABLE: | 
|  | 923 | if (nfs_need_commit(NFS_I(inode))) | 
|  | 924 | break; | 
|  | 925 | default: | 
|  | 926 | data->args.stable = NFS_FILE_SYNC; | 
|  | 927 | } | 
|  | 928 |  | 
|  | 929 | data->res.fattr   = &data->fattr; | 
|  | 930 | data->res.count   = count; | 
|  | 931 | data->res.verf    = &data->verf; | 
|  | 932 | nfs_fattr_init(&data->fattr); | 
|  | 933 | } | 
|  | 934 |  | 
|  | 935 | static int nfs_do_write(struct nfs_write_data *data, | 
|  | 936 | const struct rpc_call_ops *call_ops, | 
|  | 937 | int how) | 
|  | 938 | { | 
|  | 939 | struct inode *inode = data->args.context->dentry->d_inode; | 
|  | 940 |  | 
|  | 941 | return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); | 
|  | 942 | } | 
|  | 943 |  | 
|  | 944 | static int nfs_do_multiple_writes(struct list_head *head, | 
|  | 945 | const struct rpc_call_ops *call_ops, | 
|  | 946 | int how) | 
|  | 947 | { | 
|  | 948 | struct nfs_write_data *data; | 
|  | 949 | int ret = 0; | 
|  | 950 |  | 
|  | 951 | while (!list_empty(head)) { | 
|  | 952 | int ret2; | 
|  | 953 |  | 
|  | 954 | data = list_entry(head->next, struct nfs_write_data, list); | 
|  | 955 | list_del_init(&data->list); | 
|  | 956 |  | 
|  | 957 | ret2 = nfs_do_write(data, call_ops, how); | 
|  | 958 | if (ret == 0) | 
|  | 959 | ret = ret2; | 
|  | 960 | } | 
|  | 961 | return ret; | 
|  | 962 | } | 
|  | 963 |  | 
|  | 964 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 
|  | 965 | * call this on each, which will prepare them to be retried on next | 
|  | 966 | * writeback using standard nfs. | 
|  | 967 | */ | 
|  | 968 | static void nfs_redirty_request(struct nfs_page *req) | 
|  | 969 | { | 
|  | 970 | struct page *page = req->wb_page; | 
|  | 971 |  | 
|  | 972 | nfs_mark_request_dirty(req); | 
|  | 973 | nfs_unlock_request(req); | 
|  | 974 | nfs_end_page_writeback(page); | 
|  | 975 | } | 
|  | 976 |  | 
|  | 977 | /* | 
|  | 978 | * Generate multiple small requests to write out a single | 
|  | 979 | * contiguous dirty area on one page. | 
|  | 980 | */ | 
|  | 981 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) | 
|  | 982 | { | 
|  | 983 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); | 
|  | 984 | struct page *page = req->wb_page; | 
|  | 985 | struct nfs_write_data *data; | 
|  | 986 | size_t wsize = desc->pg_bsize, nbytes; | 
|  | 987 | unsigned int offset; | 
|  | 988 | int requests = 0; | 
|  | 989 | int ret = 0; | 
|  | 990 |  | 
|  | 991 | nfs_list_remove_request(req); | 
|  | 992 |  | 
|  | 993 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 
|  | 994 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || | 
|  | 995 | desc->pg_count > wsize)) | 
|  | 996 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 
|  | 997 |  | 
|  | 998 |  | 
|  | 999 | offset = 0; | 
|  | 1000 | nbytes = desc->pg_count; | 
|  | 1001 | do { | 
|  | 1002 | size_t len = min(nbytes, wsize); | 
|  | 1003 |  | 
|  | 1004 | data = nfs_writedata_alloc(1); | 
|  | 1005 | if (!data) | 
|  | 1006 | goto out_bad; | 
|  | 1007 | data->pagevec[0] = page; | 
|  | 1008 | nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); | 
|  | 1009 | list_add(&data->list, res); | 
|  | 1010 | requests++; | 
|  | 1011 | nbytes -= len; | 
|  | 1012 | offset += len; | 
|  | 1013 | } while (nbytes != 0); | 
|  | 1014 | atomic_set(&req->wb_complete, requests); | 
|  | 1015 | desc->pg_rpc_callops = &nfs_write_partial_ops; | 
|  | 1016 | return ret; | 
|  | 1017 |  | 
|  | 1018 | out_bad: | 
|  | 1019 | while (!list_empty(res)) { | 
|  | 1020 | data = list_entry(res->next, struct nfs_write_data, list); | 
|  | 1021 | list_del(&data->list); | 
|  | 1022 | nfs_writedata_release(data); | 
|  | 1023 | } | 
|  | 1024 | nfs_redirty_request(req); | 
|  | 1025 | return -ENOMEM; | 
|  | 1026 | } | 
|  | 1027 |  | 
|  | 1028 | /* | 
|  | 1029 | * Create an RPC task for the given write request and kick it. | 
|  | 1030 | * The page must have been locked by the caller. | 
|  | 1031 | * | 
|  | 1032 | * It may happen that the page we're passed is not marked dirty. | 
|  | 1033 | * This is the case if nfs_updatepage detects a conflicting request | 
|  | 1034 | * that has been written but not committed. | 
|  | 1035 | */ | 
|  | 1036 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) | 
|  | 1037 | { | 
|  | 1038 | struct nfs_page		*req; | 
|  | 1039 | struct page		**pages; | 
|  | 1040 | struct nfs_write_data	*data; | 
|  | 1041 | struct list_head *head = &desc->pg_list; | 
|  | 1042 | int ret = 0; | 
|  | 1043 |  | 
|  | 1044 | data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, | 
|  | 1045 | desc->pg_count)); | 
|  | 1046 | if (!data) { | 
|  | 1047 | while (!list_empty(head)) { | 
|  | 1048 | req = nfs_list_entry(head->next); | 
|  | 1049 | nfs_list_remove_request(req); | 
|  | 1050 | nfs_redirty_request(req); | 
|  | 1051 | } | 
|  | 1052 | ret = -ENOMEM; | 
|  | 1053 | goto out; | 
|  | 1054 | } | 
|  | 1055 | pages = data->pagevec; | 
|  | 1056 | while (!list_empty(head)) { | 
|  | 1057 | req = nfs_list_entry(head->next); | 
|  | 1058 | nfs_list_remove_request(req); | 
|  | 1059 | nfs_list_add_request(req, &data->pages); | 
|  | 1060 | *pages++ = req->wb_page; | 
|  | 1061 | } | 
|  | 1062 | req = nfs_list_entry(data->pages.next); | 
|  | 1063 |  | 
|  | 1064 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 
|  | 1065 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | 
|  | 1066 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 
|  | 1067 |  | 
|  | 1068 | /* Set up the argument struct */ | 
|  | 1069 | nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); | 
|  | 1070 | list_add(&data->list, res); | 
|  | 1071 | desc->pg_rpc_callops = &nfs_write_full_ops; | 
|  | 1072 | out: | 
|  | 1073 | return ret; | 
|  | 1074 | } | 
|  | 1075 |  | 
|  | 1076 | int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) | 
|  | 1077 | { | 
|  | 1078 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | 
|  | 1079 | return nfs_flush_multi(desc, head); | 
|  | 1080 | return nfs_flush_one(desc, head); | 
|  | 1081 | } | 
|  | 1082 |  | 
|  | 1083 | static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 
|  | 1084 | { | 
|  | 1085 | LIST_HEAD(head); | 
|  | 1086 | int ret; | 
|  | 1087 |  | 
|  | 1088 | ret = nfs_generic_flush(desc, &head); | 
|  | 1089 | if (ret == 0) | 
|  | 1090 | ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, | 
|  | 1091 | desc->pg_ioflags); | 
|  | 1092 | return ret; | 
|  | 1093 | } | 
|  | 1094 |  | 
|  | 1095 | static const struct nfs_pageio_ops nfs_pageio_write_ops = { | 
|  | 1096 | .pg_test = nfs_generic_pg_test, | 
|  | 1097 | .pg_doio = nfs_generic_pg_writepages, | 
|  | 1098 | }; | 
|  | 1099 |  | 
|  | 1100 | void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, | 
|  | 1101 | struct inode *inode, int ioflags) | 
|  | 1102 | { | 
|  | 1103 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, | 
|  | 1104 | NFS_SERVER(inode)->wsize, ioflags); | 
|  | 1105 | } | 
|  | 1106 |  | 
|  | 1107 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | 
|  | 1108 | { | 
|  | 1109 | pgio->pg_ops = &nfs_pageio_write_ops; | 
|  | 1110 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | 
|  | 1111 | } | 
|  | 1112 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | 
|  | 1113 |  | 
|  | 1114 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 
|  | 1115 | struct inode *inode, int ioflags) | 
|  | 1116 | { | 
|  | 1117 | if (!pnfs_pageio_init_write(pgio, inode, ioflags)) | 
|  | 1118 | nfs_pageio_init_write_mds(pgio, inode, ioflags); | 
|  | 1119 | } | 
|  | 1120 |  | 
|  | 1121 | /* | 
|  | 1122 | * Handle a write reply that flushed part of a page. | 
|  | 1123 | */ | 
|  | 1124 | static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) | 
|  | 1125 | { | 
|  | 1126 | struct nfs_write_data	*data = calldata; | 
|  | 1127 |  | 
|  | 1128 | dprintk("NFS: %5u write(%s/%lld %d@%lld)", | 
|  | 1129 | task->tk_pid, | 
|  | 1130 | data->req->wb_context->dentry->d_inode->i_sb->s_id, | 
|  | 1131 | (long long) | 
|  | 1132 | NFS_FILEID(data->req->wb_context->dentry->d_inode), | 
|  | 1133 | data->req->wb_bytes, (long long)req_offset(data->req)); | 
|  | 1134 |  | 
|  | 1135 | nfs_writeback_done(task, data); | 
|  | 1136 | } | 
|  | 1137 |  | 
|  | 1138 | static void nfs_writeback_release_partial(void *calldata) | 
|  | 1139 | { | 
|  | 1140 | struct nfs_write_data	*data = calldata; | 
|  | 1141 | struct nfs_page		*req = data->req; | 
|  | 1142 | struct page		*page = req->wb_page; | 
|  | 1143 | int status = data->task.tk_status; | 
|  | 1144 |  | 
|  | 1145 | if (status < 0) { | 
|  | 1146 | nfs_set_pageerror(page); | 
|  | 1147 | nfs_context_set_write_error(req->wb_context, status); | 
|  | 1148 | dprintk(", error = %d\n", status); | 
|  | 1149 | goto out; | 
|  | 1150 | } | 
|  | 1151 |  | 
|  | 1152 | if (nfs_write_need_commit(data)) { | 
|  | 1153 | struct inode *inode = page->mapping->host; | 
|  | 1154 |  | 
|  | 1155 | spin_lock(&inode->i_lock); | 
|  | 1156 | if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { | 
|  | 1157 | /* Do nothing we need to resend the writes */ | 
|  | 1158 | } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 
|  | 1159 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 
|  | 1160 | dprintk(" defer commit\n"); | 
|  | 1161 | } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { | 
|  | 1162 | set_bit(PG_NEED_RESCHED, &req->wb_flags); | 
|  | 1163 | clear_bit(PG_NEED_COMMIT, &req->wb_flags); | 
|  | 1164 | dprintk(" server reboot detected\n"); | 
|  | 1165 | } | 
|  | 1166 | spin_unlock(&inode->i_lock); | 
|  | 1167 | } else | 
|  | 1168 | dprintk(" OK\n"); | 
|  | 1169 |  | 
|  | 1170 | out: | 
|  | 1171 | if (atomic_dec_and_test(&req->wb_complete)) | 
|  | 1172 | nfs_writepage_release(req, data); | 
|  | 1173 | nfs_writedata_release(calldata); | 
|  | 1174 | } | 
|  | 1175 |  | 
|  | 1176 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | 
|  | 1177 | { | 
|  | 1178 | struct nfs_write_data *data = calldata; | 
|  | 1179 | NFS_PROTO(data->inode)->write_rpc_prepare(task, data); | 
|  | 1180 | } | 
|  | 1181 |  | 
|  | 1182 | static const struct rpc_call_ops nfs_write_partial_ops = { | 
|  | 1183 | .rpc_call_prepare = nfs_write_prepare, | 
|  | 1184 | .rpc_call_done = nfs_writeback_done_partial, | 
|  | 1185 | .rpc_release = nfs_writeback_release_partial, | 
|  | 1186 | }; | 
|  | 1187 |  | 
|  | 1188 | /* | 
|  | 1189 | * Handle a write reply that flushes a whole page. | 
|  | 1190 | * | 
|  | 1191 | * FIXME: There is an inherent race with invalidate_inode_pages and | 
|  | 1192 | *	  writebacks since the page->count is kept > 1 for as long | 
|  | 1193 | *	  as the page has a write request pending. | 
|  | 1194 | */ | 
|  | 1195 | static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | 
|  | 1196 | { | 
|  | 1197 | struct nfs_write_data	*data = calldata; | 
|  | 1198 |  | 
|  | 1199 | nfs_writeback_done(task, data); | 
|  | 1200 | } | 
|  | 1201 |  | 
|  | 1202 | static void nfs_writeback_release_full(void *calldata) | 
|  | 1203 | { | 
|  | 1204 | struct nfs_write_data	*data = calldata; | 
|  | 1205 | int status = data->task.tk_status; | 
|  | 1206 |  | 
|  | 1207 | /* Update attributes as result of writeback. */ | 
|  | 1208 | while (!list_empty(&data->pages)) { | 
|  | 1209 | struct nfs_page *req = nfs_list_entry(data->pages.next); | 
|  | 1210 | struct page *page = req->wb_page; | 
|  | 1211 |  | 
|  | 1212 | nfs_list_remove_request(req); | 
|  | 1213 |  | 
|  | 1214 | dprintk("NFS: %5u write (%s/%lld %d@%lld)", | 
|  | 1215 | data->task.tk_pid, | 
|  | 1216 | req->wb_context->dentry->d_inode->i_sb->s_id, | 
|  | 1217 | (long long)NFS_FILEID(req->wb_context->dentry->d_inode), | 
|  | 1218 | req->wb_bytes, | 
|  | 1219 | (long long)req_offset(req)); | 
|  | 1220 |  | 
|  | 1221 | if (status < 0) { | 
|  | 1222 | nfs_set_pageerror(page); | 
|  | 1223 | nfs_context_set_write_error(req->wb_context, status); | 
|  | 1224 | dprintk(", error = %d\n", status); | 
|  | 1225 | goto remove_request; | 
|  | 1226 | } | 
|  | 1227 |  | 
|  | 1228 | if (nfs_write_need_commit(data)) { | 
|  | 1229 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 
|  | 1230 | nfs_mark_request_commit(req, data->lseg); | 
|  | 1231 | dprintk(" marked for commit\n"); | 
|  | 1232 | goto next; | 
|  | 1233 | } | 
|  | 1234 | dprintk(" OK\n"); | 
|  | 1235 | remove_request: | 
|  | 1236 | nfs_inode_remove_request(req); | 
|  | 1237 | next: | 
|  | 1238 | nfs_unlock_request(req); | 
|  | 1239 | nfs_end_page_writeback(page); | 
|  | 1240 | } | 
|  | 1241 | nfs_writedata_release(calldata); | 
|  | 1242 | } | 
|  | 1243 |  | 
|  | 1244 | static const struct rpc_call_ops nfs_write_full_ops = { | 
|  | 1245 | .rpc_call_prepare = nfs_write_prepare, | 
|  | 1246 | .rpc_call_done = nfs_writeback_done_full, | 
|  | 1247 | .rpc_release = nfs_writeback_release_full, | 
|  | 1248 | }; | 
|  | 1249 |  | 
|  | 1250 |  | 
|  | 1251 | /* | 
|  | 1252 | * This function is called when the WRITE call is complete. | 
|  | 1253 | */ | 
|  | 1254 | void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | 
|  | 1255 | { | 
|  | 1256 | struct nfs_writeargs	*argp = &data->args; | 
|  | 1257 | struct nfs_writeres	*resp = &data->res; | 
|  | 1258 | int status; | 
|  | 1259 |  | 
|  | 1260 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", | 
|  | 1261 | task->tk_pid, task->tk_status); | 
|  | 1262 |  | 
|  | 1263 | /* | 
|  | 1264 | * ->write_done will attempt to use post-op attributes to detect | 
|  | 1265 | * conflicting writes by other clients.  A strict interpretation | 
|  | 1266 | * of close-to-open would allow us to continue caching even if | 
|  | 1267 | * another writer had changed the file, but some applications | 
|  | 1268 | * depend on tighter cache coherency when writing. | 
|  | 1269 | */ | 
|  | 1270 | status = NFS_PROTO(data->inode)->write_done(task, data); | 
|  | 1271 | if (status != 0) | 
|  | 1272 | return; | 
|  | 1273 | nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); | 
|  | 1274 |  | 
|  | 1275 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
|  | 1276 | if (resp->verf->committed < argp->stable && task->tk_status >= 0) { | 
|  | 1277 | /* We tried a write call, but the server did not | 
|  | 1278 | * commit data to stable storage even though we | 
|  | 1279 | * requested it. | 
|  | 1280 | * Note: There is a known bug in Tru64 < 5.0 in which | 
|  | 1281 | *	 the server reports NFS_DATA_SYNC, but performs | 
|  | 1282 | *	 NFS_FILE_SYNC. We therefore implement this checking | 
|  | 1283 | *	 as a dprintk() in order to avoid filling syslog. | 
|  | 1284 | */ | 
|  | 1285 | static unsigned long    complain; | 
|  | 1286 |  | 
|  | 1287 | /* Note this will print the MDS for a DS write */ | 
|  | 1288 | if (time_before(complain, jiffies)) { | 
|  | 1289 | dprintk("NFS:       faulty NFS server %s:" | 
|  | 1290 | " (committed = %d) != (stable = %d)\n", | 
|  | 1291 | NFS_SERVER(data->inode)->nfs_client->cl_hostname, | 
|  | 1292 | resp->verf->committed, argp->stable); | 
|  | 1293 | complain = jiffies + 300 * HZ; | 
|  | 1294 | } | 
|  | 1295 | } | 
|  | 1296 | #endif | 
|  | 1297 | /* Is this a short write? */ | 
|  | 1298 | if (task->tk_status >= 0 && resp->count < argp->count) { | 
|  | 1299 | static unsigned long    complain; | 
|  | 1300 |  | 
|  | 1301 | nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); | 
|  | 1302 |  | 
|  | 1303 | /* Has the server at least made some progress? */ | 
|  | 1304 | if (resp->count != 0) { | 
|  | 1305 | /* Was this an NFSv2 write or an NFSv3 stable write? */ | 
|  | 1306 | if (resp->verf->committed != NFS_UNSTABLE) { | 
|  | 1307 | /* Resend from where the server left off */ | 
|  | 1308 | data->mds_offset += resp->count; | 
|  | 1309 | argp->offset += resp->count; | 
|  | 1310 | argp->pgbase += resp->count; | 
|  | 1311 | argp->count -= resp->count; | 
|  | 1312 | } else { | 
|  | 1313 | /* Resend as a stable write in order to avoid | 
|  | 1314 | * headaches in the case of a server crash. | 
|  | 1315 | */ | 
|  | 1316 | argp->stable = NFS_FILE_SYNC; | 
|  | 1317 | } | 
|  | 1318 | rpc_restart_call_prepare(task); | 
|  | 1319 | return; | 
|  | 1320 | } | 
|  | 1321 | if (time_before(complain, jiffies)) { | 
|  | 1322 | printk(KERN_WARNING | 
|  | 1323 | "NFS: Server wrote zero bytes, expected %u.\n", | 
|  | 1324 | argp->count); | 
|  | 1325 | complain = jiffies + 300 * HZ; | 
|  | 1326 | } | 
|  | 1327 | /* Can't do anything about it except throw an error. */ | 
|  | 1328 | task->tk_status = -EIO; | 
|  | 1329 | } | 
|  | 1330 | return; | 
|  | 1331 | } | 
|  | 1332 |  | 
|  | 1333 |  | 
|  | 1334 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 
|  | 1335 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 
|  | 1336 | { | 
|  | 1337 | int ret; | 
|  | 1338 |  | 
|  | 1339 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) | 
|  | 1340 | return 1; | 
|  | 1341 | if (!may_wait) | 
|  | 1342 | return 0; | 
|  | 1343 | ret = out_of_line_wait_on_bit_lock(&nfsi->flags, | 
|  | 1344 | NFS_INO_COMMIT, | 
|  | 1345 | nfs_wait_bit_killable, | 
|  | 1346 | TASK_KILLABLE); | 
|  | 1347 | return (ret < 0) ? ret : 1; | 
|  | 1348 | } | 
|  | 1349 |  | 
|  | 1350 | void nfs_commit_clear_lock(struct nfs_inode *nfsi) | 
|  | 1351 | { | 
|  | 1352 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); | 
|  | 1353 | smp_mb__after_clear_bit(); | 
|  | 1354 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); | 
|  | 1355 | } | 
|  | 1356 | EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); | 
|  | 1357 |  | 
|  | 1358 | void nfs_commitdata_release(void *data) | 
|  | 1359 | { | 
|  | 1360 | struct nfs_write_data *wdata = data; | 
|  | 1361 |  | 
|  | 1362 | put_nfs_open_context(wdata->args.context); | 
|  | 1363 | nfs_commit_free(wdata); | 
|  | 1364 | } | 
|  | 1365 | EXPORT_SYMBOL_GPL(nfs_commitdata_release); | 
|  | 1366 |  | 
|  | 1367 | int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, | 
|  | 1368 | const struct rpc_call_ops *call_ops, | 
|  | 1369 | int how) | 
|  | 1370 | { | 
|  | 1371 | struct rpc_task *task; | 
|  | 1372 | int priority = flush_task_priority(how); | 
|  | 1373 | struct rpc_message msg = { | 
|  | 1374 | .rpc_argp = &data->args, | 
|  | 1375 | .rpc_resp = &data->res, | 
|  | 1376 | .rpc_cred = data->cred, | 
|  | 1377 | }; | 
|  | 1378 | struct rpc_task_setup task_setup_data = { | 
|  | 1379 | .task = &data->task, | 
|  | 1380 | .rpc_client = clnt, | 
|  | 1381 | .rpc_message = &msg, | 
|  | 1382 | .callback_ops = call_ops, | 
|  | 1383 | .callback_data = data, | 
|  | 1384 | .workqueue = nfsiod_workqueue, | 
|  | 1385 | .flags = RPC_TASK_ASYNC, | 
|  | 1386 | .priority = priority, | 
|  | 1387 | }; | 
|  | 1388 | /* Set up the initial task struct.  */ | 
|  | 1389 | NFS_PROTO(data->inode)->commit_setup(data, &msg); | 
|  | 1390 |  | 
|  | 1391 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 
|  | 1392 |  | 
|  | 1393 | task = rpc_run_task(&task_setup_data); | 
|  | 1394 | if (IS_ERR(task)) | 
|  | 1395 | return PTR_ERR(task); | 
|  | 1396 | if (how & FLUSH_SYNC) | 
|  | 1397 | rpc_wait_for_completion_task(task); | 
|  | 1398 | rpc_put_task(task); | 
|  | 1399 | return 0; | 
|  | 1400 | } | 
|  | 1401 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | 
|  | 1402 |  | 
|  | 1403 | /* | 
|  | 1404 | * Set up the argument/result storage required for the RPC call. | 
|  | 1405 | */ | 
|  | 1406 | void nfs_init_commit(struct nfs_write_data *data, | 
|  | 1407 | struct list_head *head, | 
|  | 1408 | struct pnfs_layout_segment *lseg) | 
|  | 1409 | { | 
|  | 1410 | struct nfs_page *first = nfs_list_entry(head->next); | 
|  | 1411 | struct inode *inode = first->wb_context->dentry->d_inode; | 
|  | 1412 |  | 
|  | 1413 | /* Set up the RPC argument and reply structs | 
|  | 1414 | * NB: take care not to mess about with data->commit et al. */ | 
|  | 1415 |  | 
|  | 1416 | list_splice_init(head, &data->pages); | 
|  | 1417 |  | 
|  | 1418 | data->inode	  = inode; | 
|  | 1419 | data->cred	  = first->wb_context->cred; | 
|  | 1420 | data->lseg	  = lseg; /* reference transferred */ | 
|  | 1421 | data->mds_ops     = &nfs_commit_ops; | 
|  | 1422 |  | 
|  | 1423 | data->args.fh     = NFS_FH(data->inode); | 
|  | 1424 | /* Note: we always request a commit of the entire inode */ | 
|  | 1425 | data->args.offset = 0; | 
|  | 1426 | data->args.count  = 0; | 
|  | 1427 | data->args.context = get_nfs_open_context(first->wb_context); | 
|  | 1428 | data->res.count   = 0; | 
|  | 1429 | data->res.fattr   = &data->fattr; | 
|  | 1430 | data->res.verf    = &data->verf; | 
|  | 1431 | nfs_fattr_init(&data->fattr); | 
|  | 1432 | } | 
|  | 1433 | EXPORT_SYMBOL_GPL(nfs_init_commit); | 
|  | 1434 |  | 
|  | 1435 | void nfs_retry_commit(struct list_head *page_list, | 
|  | 1436 | struct pnfs_layout_segment *lseg) | 
|  | 1437 | { | 
|  | 1438 | struct nfs_page *req; | 
|  | 1439 |  | 
|  | 1440 | while (!list_empty(page_list)) { | 
|  | 1441 | req = nfs_list_entry(page_list->next); | 
|  | 1442 | nfs_list_remove_request(req); | 
|  | 1443 | nfs_mark_request_commit(req, lseg); | 
|  | 1444 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 
|  | 1445 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | 
|  | 1446 | BDI_RECLAIMABLE); | 
|  | 1447 | nfs_unlock_request(req); | 
|  | 1448 | } | 
|  | 1449 | } | 
|  | 1450 | EXPORT_SYMBOL_GPL(nfs_retry_commit); | 
|  | 1451 |  | 
|  | 1452 | /* | 
|  | 1453 | * Commit dirty pages | 
|  | 1454 | */ | 
|  | 1455 | static int | 
|  | 1456 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) | 
|  | 1457 | { | 
|  | 1458 | struct nfs_write_data	*data; | 
|  | 1459 |  | 
|  | 1460 | data = nfs_commitdata_alloc(); | 
|  | 1461 |  | 
|  | 1462 | if (!data) | 
|  | 1463 | goto out_bad; | 
|  | 1464 |  | 
|  | 1465 | /* Set up the argument struct */ | 
|  | 1466 | nfs_init_commit(data, head, NULL); | 
|  | 1467 | return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); | 
|  | 1468 | out_bad: | 
|  | 1469 | nfs_retry_commit(head, NULL); | 
|  | 1470 | nfs_commit_clear_lock(NFS_I(inode)); | 
|  | 1471 | return -ENOMEM; | 
|  | 1472 | } | 
|  | 1473 |  | 
|  | 1474 | /* | 
|  | 1475 | * COMMIT call returned | 
|  | 1476 | */ | 
|  | 1477 | static void nfs_commit_done(struct rpc_task *task, void *calldata) | 
|  | 1478 | { | 
|  | 1479 | struct nfs_write_data	*data = calldata; | 
|  | 1480 |  | 
|  | 1481 | dprintk("NFS: %5u nfs_commit_done (status %d)\n", | 
|  | 1482 | task->tk_pid, task->tk_status); | 
|  | 1483 |  | 
|  | 1484 | /* Call the NFS version-specific code */ | 
|  | 1485 | NFS_PROTO(data->inode)->commit_done(task, data); | 
|  | 1486 | } | 
|  | 1487 |  | 
|  | 1488 | void nfs_commit_release_pages(struct nfs_write_data *data) | 
|  | 1489 | { | 
|  | 1490 | struct nfs_page	*req; | 
|  | 1491 | int status = data->task.tk_status; | 
|  | 1492 |  | 
|  | 1493 | while (!list_empty(&data->pages)) { | 
|  | 1494 | req = nfs_list_entry(data->pages.next); | 
|  | 1495 | nfs_list_remove_request(req); | 
|  | 1496 | nfs_clear_page_commit(req->wb_page); | 
|  | 1497 |  | 
|  | 1498 | dprintk("NFS:       commit (%s/%lld %d@%lld)", | 
|  | 1499 | req->wb_context->dentry->d_sb->s_id, | 
|  | 1500 | (long long)NFS_FILEID(req->wb_context->dentry->d_inode), | 
|  | 1501 | req->wb_bytes, | 
|  | 1502 | (long long)req_offset(req)); | 
|  | 1503 | if (status < 0) { | 
|  | 1504 | nfs_context_set_write_error(req->wb_context, status); | 
|  | 1505 | nfs_inode_remove_request(req); | 
|  | 1506 | dprintk(", error = %d\n", status); | 
|  | 1507 | goto next; | 
|  | 1508 | } | 
|  | 1509 |  | 
|  | 1510 | /* Okay, COMMIT succeeded, apparently. Check the verifier | 
|  | 1511 | * returned by the server against all stored verfs. */ | 
|  | 1512 | if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { | 
|  | 1513 | /* We have a match */ | 
|  | 1514 | nfs_inode_remove_request(req); | 
|  | 1515 | dprintk(" OK\n"); | 
|  | 1516 | goto next; | 
|  | 1517 | } | 
|  | 1518 | /* We have a mismatch. Write the page again */ | 
|  | 1519 | dprintk(" mismatch\n"); | 
|  | 1520 | nfs_mark_request_dirty(req); | 
|  | 1521 | next: | 
|  | 1522 | nfs_unlock_request(req); | 
|  | 1523 | } | 
|  | 1524 | } | 
|  | 1525 | EXPORT_SYMBOL_GPL(nfs_commit_release_pages); | 
|  | 1526 |  | 
|  | 1527 | static void nfs_commit_release(void *calldata) | 
|  | 1528 | { | 
|  | 1529 | struct nfs_write_data *data = calldata; | 
|  | 1530 |  | 
|  | 1531 | nfs_commit_release_pages(data); | 
|  | 1532 | nfs_commit_clear_lock(NFS_I(data->inode)); | 
|  | 1533 | nfs_commitdata_release(calldata); | 
|  | 1534 | } | 
|  | 1535 |  | 
|  | 1536 | static const struct rpc_call_ops nfs_commit_ops = { | 
|  | 1537 | .rpc_call_prepare = nfs_write_prepare, | 
|  | 1538 | .rpc_call_done = nfs_commit_done, | 
|  | 1539 | .rpc_release = nfs_commit_release, | 
|  | 1540 | }; | 
|  | 1541 |  | 
|  | 1542 | int nfs_commit_inode(struct inode *inode, int how) | 
|  | 1543 | { | 
|  | 1544 | LIST_HEAD(head); | 
|  | 1545 | int may_wait = how & FLUSH_SYNC; | 
|  | 1546 | int res; | 
|  | 1547 |  | 
|  | 1548 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); | 
|  | 1549 | if (res <= 0) | 
|  | 1550 | goto out_mark_dirty; | 
|  | 1551 | res = nfs_scan_commit(inode, &head); | 
|  | 1552 | if (res) { | 
|  | 1553 | int error; | 
|  | 1554 |  | 
|  | 1555 | error = pnfs_commit_list(inode, &head, how); | 
|  | 1556 | if (error == PNFS_NOT_ATTEMPTED) | 
|  | 1557 | error = nfs_commit_list(inode, &head, how); | 
|  | 1558 | if (error < 0) | 
|  | 1559 | return error; | 
|  | 1560 | if (!may_wait) | 
|  | 1561 | goto out_mark_dirty; | 
|  | 1562 | error = wait_on_bit(&NFS_I(inode)->flags, | 
|  | 1563 | NFS_INO_COMMIT, | 
|  | 1564 | nfs_wait_bit_killable, | 
|  | 1565 | TASK_KILLABLE); | 
|  | 1566 | if (error < 0) | 
|  | 1567 | return error; | 
|  | 1568 | } else | 
|  | 1569 | nfs_commit_clear_lock(NFS_I(inode)); | 
|  | 1570 | return res; | 
|  | 1571 | /* Note: If we exit without ensuring that the commit is complete, | 
|  | 1572 | * we must mark the inode as dirty. Otherwise, future calls to | 
|  | 1573 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure | 
|  | 1574 | * that the data is on the disk. | 
|  | 1575 | */ | 
|  | 1576 | out_mark_dirty: | 
|  | 1577 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 
|  | 1578 | return res; | 
|  | 1579 | } | 
|  | 1580 |  | 
|  | 1581 | static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) | 
|  | 1582 | { | 
|  | 1583 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 1584 | int flags = FLUSH_SYNC; | 
|  | 1585 | int ret = 0; | 
|  | 1586 |  | 
|  | 1587 | /* no commits means nothing needs to be done */ | 
|  | 1588 | if (!nfsi->ncommit) | 
|  | 1589 | return ret; | 
|  | 1590 |  | 
|  | 1591 | if (wbc->sync_mode == WB_SYNC_NONE) { | 
|  | 1592 | /* Don't commit yet if this is a non-blocking flush and there | 
|  | 1593 | * are a lot of outstanding writes for this mapping. | 
|  | 1594 | */ | 
|  | 1595 | if (nfsi->ncommit <= (nfsi->npages >> 1)) | 
|  | 1596 | goto out_mark_dirty; | 
|  | 1597 |  | 
|  | 1598 | /* don't wait for the COMMIT response */ | 
|  | 1599 | flags = 0; | 
|  | 1600 | } | 
|  | 1601 |  | 
|  | 1602 | ret = nfs_commit_inode(inode, flags); | 
|  | 1603 | if (ret >= 0) { | 
|  | 1604 | if (wbc->sync_mode == WB_SYNC_NONE) { | 
|  | 1605 | if (ret < wbc->nr_to_write) | 
|  | 1606 | wbc->nr_to_write -= ret; | 
|  | 1607 | else | 
|  | 1608 | wbc->nr_to_write = 0; | 
|  | 1609 | } | 
|  | 1610 | return 0; | 
|  | 1611 | } | 
|  | 1612 | out_mark_dirty: | 
|  | 1613 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 
|  | 1614 | return ret; | 
|  | 1615 | } | 
|  | 1616 | #else | 
|  | 1617 | static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) | 
|  | 1618 | { | 
|  | 1619 | return 0; | 
|  | 1620 | } | 
|  | 1621 | #endif | 
|  | 1622 |  | 
|  | 1623 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 
|  | 1624 | { | 
|  | 1625 | int ret; | 
|  | 1626 |  | 
|  | 1627 | ret = nfs_commit_unstable_pages(inode, wbc); | 
|  | 1628 | if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { | 
|  | 1629 | int status; | 
|  | 1630 | bool sync = true; | 
|  | 1631 |  | 
|  | 1632 | if (wbc->sync_mode == WB_SYNC_NONE) | 
|  | 1633 | sync = false; | 
|  | 1634 |  | 
|  | 1635 | status = pnfs_layoutcommit_inode(inode, sync); | 
|  | 1636 | if (status < 0) | 
|  | 1637 | return status; | 
|  | 1638 | } | 
|  | 1639 | return ret; | 
|  | 1640 | } | 
|  | 1641 |  | 
|  | 1642 | /* | 
|  | 1643 | * flush the inode to disk. | 
|  | 1644 | */ | 
|  | 1645 | int nfs_wb_all(struct inode *inode) | 
|  | 1646 | { | 
|  | 1647 | struct writeback_control wbc = { | 
|  | 1648 | .sync_mode = WB_SYNC_ALL, | 
|  | 1649 | .nr_to_write = LONG_MAX, | 
|  | 1650 | .range_start = 0, | 
|  | 1651 | .range_end = LLONG_MAX, | 
|  | 1652 | }; | 
|  | 1653 |  | 
|  | 1654 | return sync_inode(inode, &wbc); | 
|  | 1655 | } | 
|  | 1656 |  | 
|  | 1657 | int nfs_wb_page_cancel(struct inode *inode, struct page *page) | 
|  | 1658 | { | 
|  | 1659 | struct nfs_page *req; | 
|  | 1660 | int ret = 0; | 
|  | 1661 |  | 
|  | 1662 | BUG_ON(!PageLocked(page)); | 
|  | 1663 | for (;;) { | 
|  | 1664 | wait_on_page_writeback(page); | 
|  | 1665 | req = nfs_page_find_request(page); | 
|  | 1666 | if (req == NULL) | 
|  | 1667 | break; | 
|  | 1668 | if (nfs_lock_request_dontget(req)) { | 
|  | 1669 | nfs_clear_request_commit(req); | 
|  | 1670 | nfs_inode_remove_request(req); | 
|  | 1671 | /* | 
|  | 1672 | * In case nfs_inode_remove_request has marked the | 
|  | 1673 | * page as being dirty | 
|  | 1674 | */ | 
|  | 1675 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 
|  | 1676 | nfs_unlock_request(req); | 
|  | 1677 | break; | 
|  | 1678 | } | 
|  | 1679 | ret = nfs_wait_on_request(req); | 
|  | 1680 | nfs_release_request(req); | 
|  | 1681 | if (ret < 0) | 
|  | 1682 | break; | 
|  | 1683 | } | 
|  | 1684 | return ret; | 
|  | 1685 | } | 
|  | 1686 |  | 
|  | 1687 | /* | 
|  | 1688 | * Write back all requests on one page - we do this before reading it. | 
|  | 1689 | */ | 
|  | 1690 | int nfs_wb_page(struct inode *inode, struct page *page) | 
|  | 1691 | { | 
|  | 1692 | loff_t range_start = page_offset(page); | 
|  | 1693 | loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); | 
|  | 1694 | struct writeback_control wbc = { | 
|  | 1695 | .sync_mode = WB_SYNC_ALL, | 
|  | 1696 | .nr_to_write = 0, | 
|  | 1697 | .range_start = range_start, | 
|  | 1698 | .range_end = range_end, | 
|  | 1699 | }; | 
|  | 1700 | int ret; | 
|  | 1701 |  | 
|  | 1702 | for (;;) { | 
|  | 1703 | wait_on_page_writeback(page); | 
|  | 1704 | if (clear_page_dirty_for_io(page)) { | 
|  | 1705 | ret = nfs_writepage_locked(page, &wbc); | 
|  | 1706 | if (ret < 0) | 
|  | 1707 | goto out_error; | 
|  | 1708 | continue; | 
|  | 1709 | } | 
|  | 1710 | if (!PagePrivate(page)) | 
|  | 1711 | break; | 
|  | 1712 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | 
|  | 1713 | if (ret < 0) | 
|  | 1714 | goto out_error; | 
|  | 1715 | } | 
|  | 1716 | return 0; | 
|  | 1717 | out_error: | 
|  | 1718 | return ret; | 
|  | 1719 | } | 
|  | 1720 |  | 
|  | 1721 | #ifdef CONFIG_MIGRATION | 
|  | 1722 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | 
|  | 1723 | struct page *page, enum migrate_mode mode) | 
|  | 1724 | { | 
|  | 1725 | /* | 
|  | 1726 | * If PagePrivate is set, then the page is currently associated with | 
|  | 1727 | * an in-progress read or write request. Don't try to migrate it. | 
|  | 1728 | * | 
|  | 1729 | * FIXME: we could do this in principle, but we'll need a way to ensure | 
|  | 1730 | *        that we can safely release the inode reference while holding | 
|  | 1731 | *        the page lock. | 
|  | 1732 | */ | 
|  | 1733 | if (PagePrivate(page)) | 
|  | 1734 | return -EBUSY; | 
|  | 1735 |  | 
|  | 1736 | nfs_fscache_release_page(page, GFP_KERNEL); | 
|  | 1737 |  | 
|  | 1738 | return migrate_page(mapping, newpage, page, mode); | 
|  | 1739 | } | 
|  | 1740 | #endif | 
|  | 1741 |  | 
|  | 1742 | int __init nfs_init_writepagecache(void) | 
|  | 1743 | { | 
|  | 1744 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 
|  | 1745 | sizeof(struct nfs_write_data), | 
|  | 1746 | 0, SLAB_HWCACHE_ALIGN, | 
|  | 1747 | NULL); | 
|  | 1748 | if (nfs_wdata_cachep == NULL) | 
|  | 1749 | return -ENOMEM; | 
|  | 1750 |  | 
|  | 1751 | nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, | 
|  | 1752 | nfs_wdata_cachep); | 
|  | 1753 | if (nfs_wdata_mempool == NULL) | 
|  | 1754 | goto out_destroy_write_cache; | 
|  | 1755 |  | 
|  | 1756 | nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, | 
|  | 1757 | nfs_wdata_cachep); | 
|  | 1758 | if (nfs_commit_mempool == NULL) | 
|  | 1759 | goto out_destroy_write_mempool; | 
|  | 1760 |  | 
|  | 1761 | /* | 
|  | 1762 | * NFS congestion size, scale with available memory. | 
|  | 1763 | * | 
|  | 1764 | *  64MB:    8192k | 
|  | 1765 | * 128MB:   11585k | 
|  | 1766 | * 256MB:   16384k | 
|  | 1767 | * 512MB:   23170k | 
|  | 1768 | *   1GB:   32768k | 
|  | 1769 | *   2GB:   46340k | 
|  | 1770 | *   4GB:   65536k | 
|  | 1771 | *   8GB:   92681k | 
|  | 1772 | *  16GB:  131072k | 
|  | 1773 | * | 
|  | 1774 | * This allows larger machines to have larger/more transfers. | 
|  | 1775 | * Limit the default to 256M | 
|  | 1776 | */ | 
|  | 1777 | nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | 
|  | 1778 | if (nfs_congestion_kb > 256*1024) | 
|  | 1779 | nfs_congestion_kb = 256*1024; | 
|  | 1780 |  | 
|  | 1781 | return 0; | 
|  | 1782 |  | 
|  | 1783 | out_destroy_write_mempool: | 
|  | 1784 | mempool_destroy(nfs_wdata_mempool); | 
|  | 1785 | out_destroy_write_cache: | 
|  | 1786 | kmem_cache_destroy(nfs_wdata_cachep); | 
|  | 1787 | return -ENOMEM; | 
|  | 1788 | } | 
|  | 1789 |  | 
|  | 1790 | void nfs_destroy_writepagecache(void) | 
|  | 1791 | { | 
|  | 1792 | mempool_destroy(nfs_commit_mempool); | 
|  | 1793 | mempool_destroy(nfs_wdata_mempool); | 
|  | 1794 | kmem_cache_destroy(nfs_wdata_cachep); | 
|  | 1795 | } | 
|  | 1796 |  |