| xj | b04a402 | 2021-11-25 15:01:52 +0800 | [diff] [blame] | 1 | /* | 
|  | 2 | *  pNFS functions to call and manage layout drivers. | 
|  | 3 | * | 
|  | 4 | *  Copyright (c) 2002 [year of first publication] | 
|  | 5 | *  The Regents of the University of Michigan | 
|  | 6 | *  All Rights Reserved | 
|  | 7 | * | 
|  | 8 | *  Dean Hildebrand <dhildebz@umich.edu> | 
|  | 9 | * | 
|  | 10 | *  Permission is granted to use, copy, create derivative works, and | 
|  | 11 | *  redistribute this software and such derivative works for any purpose, | 
|  | 12 | *  so long as the name of the University of Michigan is not used in | 
|  | 13 | *  any advertising or publicity pertaining to the use or distribution | 
|  | 14 | *  of this software without specific, written prior authorization. If | 
|  | 15 | *  the above copyright notice or any other identification of the | 
|  | 16 | *  University of Michigan is included in any copy of any portion of | 
|  | 17 | *  this software, then the disclaimer below must also be included. | 
|  | 18 | * | 
|  | 19 | *  This software is provided as is, without representation or warranty | 
|  | 20 | *  of any kind either express or implied, including without limitation | 
|  | 21 | *  the implied warranties of merchantability, fitness for a particular | 
|  | 22 | *  purpose, or noninfringement.  The Regents of the University of | 
|  | 23 | *  Michigan shall not be liable for any damages, including special, | 
|  | 24 | *  indirect, incidental, or consequential damages, with respect to any | 
|  | 25 | *  claim arising out of or in connection with the use of the software, | 
|  | 26 | *  even if it has been or is hereafter advised of the possibility of | 
|  | 27 | *  such damages. | 
|  | 28 | */ | 
|  | 29 |  | 
|  | 30 | #include <linux/nfs_fs.h> | 
|  | 31 | #include <linux/nfs_page.h> | 
|  | 32 | #include <linux/module.h> | 
|  | 33 | #include <linux/sort.h> | 
|  | 34 | #include "internal.h" | 
|  | 35 | #include "pnfs.h" | 
|  | 36 | #include "iostat.h" | 
|  | 37 | #include "nfs4trace.h" | 
|  | 38 | #include "delegation.h" | 
|  | 39 | #include "nfs42.h" | 
|  | 40 | #include "nfs4_fs.h" | 
|  | 41 |  | 
|  | 42 | #define NFSDBG_FACILITY		NFSDBG_PNFS | 
|  | 43 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) | 
|  | 44 |  | 
|  | 45 | /* Locking: | 
|  | 46 | * | 
|  | 47 | * pnfs_spinlock: | 
|  | 48 | *      protects pnfs_modules_tbl. | 
|  | 49 | */ | 
|  | 50 | static DEFINE_SPINLOCK(pnfs_spinlock); | 
|  | 51 |  | 
|  | 52 | /* | 
|  | 53 | * pnfs_modules_tbl holds all pnfs modules | 
|  | 54 | */ | 
|  | 55 | static LIST_HEAD(pnfs_modules_tbl); | 
|  | 56 |  | 
|  | 57 | static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); | 
|  | 58 | static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, | 
|  | 59 | struct list_head *free_me, | 
|  | 60 | const struct pnfs_layout_range *range, | 
|  | 61 | u32 seq); | 
|  | 62 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, | 
|  | 63 | struct list_head *tmp_list); | 
|  | 64 |  | 
|  | 65 | /* Return the registered pnfs layout driver module matching given id */ | 
|  | 66 | static struct pnfs_layoutdriver_type * | 
|  | 67 | find_pnfs_driver_locked(u32 id) | 
|  | 68 | { | 
|  | 69 | struct pnfs_layoutdriver_type *local; | 
|  | 70 |  | 
|  | 71 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | 
|  | 72 | if (local->id == id) | 
|  | 73 | goto out; | 
|  | 74 | local = NULL; | 
|  | 75 | out: | 
|  | 76 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | 
|  | 77 | return local; | 
|  | 78 | } | 
|  | 79 |  | 
|  | 80 | static struct pnfs_layoutdriver_type * | 
|  | 81 | find_pnfs_driver(u32 id) | 
|  | 82 | { | 
|  | 83 | struct pnfs_layoutdriver_type *local; | 
|  | 84 |  | 
|  | 85 | spin_lock(&pnfs_spinlock); | 
|  | 86 | local = find_pnfs_driver_locked(id); | 
|  | 87 | if (local != NULL && !try_module_get(local->owner)) { | 
|  | 88 | dprintk("%s: Could not grab reference on module\n", __func__); | 
|  | 89 | local = NULL; | 
|  | 90 | } | 
|  | 91 | spin_unlock(&pnfs_spinlock); | 
|  | 92 | return local; | 
|  | 93 | } | 
|  | 94 |  | 
|  | 95 | void | 
|  | 96 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 
|  | 97 | { | 
|  | 98 | if (nfss->pnfs_curr_ld) { | 
|  | 99 | if (nfss->pnfs_curr_ld->clear_layoutdriver) | 
|  | 100 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | 
|  | 101 | /* Decrement the MDS count. Purge the deviceid cache if zero */ | 
|  | 102 | if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) | 
|  | 103 | nfs4_deviceid_purge_client(nfss->nfs_client); | 
|  | 104 | module_put(nfss->pnfs_curr_ld->owner); | 
|  | 105 | } | 
|  | 106 | nfss->pnfs_curr_ld = NULL; | 
|  | 107 | } | 
|  | 108 |  | 
|  | 109 | /* | 
|  | 110 | * When the server sends a list of layout types, we choose one in the order | 
|  | 111 | * given in the list below. | 
|  | 112 | * | 
|  | 113 | * FIXME: should this list be configurable in some fashion? module param? | 
|  | 114 | * 	  mount option? something else? | 
|  | 115 | */ | 
|  | 116 | static const u32 ld_prefs[] = { | 
|  | 117 | LAYOUT_SCSI, | 
|  | 118 | LAYOUT_BLOCK_VOLUME, | 
|  | 119 | LAYOUT_OSD2_OBJECTS, | 
|  | 120 | LAYOUT_FLEX_FILES, | 
|  | 121 | LAYOUT_NFSV4_1_FILES, | 
|  | 122 | 0 | 
|  | 123 | }; | 
|  | 124 |  | 
|  | 125 | static int | 
|  | 126 | ld_cmp(const void *e1, const void *e2) | 
|  | 127 | { | 
|  | 128 | u32 ld1 = *((u32 *)e1); | 
|  | 129 | u32 ld2 = *((u32 *)e2); | 
|  | 130 | int i; | 
|  | 131 |  | 
|  | 132 | for (i = 0; ld_prefs[i] != 0; i++) { | 
|  | 133 | if (ld1 == ld_prefs[i]) | 
|  | 134 | return -1; | 
|  | 135 |  | 
|  | 136 | if (ld2 == ld_prefs[i]) | 
|  | 137 | return 1; | 
|  | 138 | } | 
|  | 139 | return 0; | 
|  | 140 | } | 
|  | 141 |  | 
|  | 142 | /* | 
|  | 143 | * Try to set the server's pnfs module to the pnfs layout type specified by id. | 
|  | 144 | * Currently only one pNFS layout driver per filesystem is supported. | 
|  | 145 | * | 
|  | 146 | * @ids array of layout types supported by MDS. | 
|  | 147 | */ | 
|  | 148 | void | 
|  | 149 | set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, | 
|  | 150 | struct nfs_fsinfo *fsinfo) | 
|  | 151 | { | 
|  | 152 | struct pnfs_layoutdriver_type *ld_type = NULL; | 
|  | 153 | u32 id; | 
|  | 154 | int i; | 
|  | 155 |  | 
|  | 156 | if (fsinfo->nlayouttypes == 0) | 
|  | 157 | goto out_no_driver; | 
|  | 158 | if (!(server->nfs_client->cl_exchange_flags & | 
|  | 159 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { | 
|  | 160 | printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n", | 
|  | 161 | __func__, server->nfs_client->cl_exchange_flags); | 
|  | 162 | goto out_no_driver; | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | sort(fsinfo->layouttype, fsinfo->nlayouttypes, | 
|  | 166 | sizeof(*fsinfo->layouttype), ld_cmp, NULL); | 
|  | 167 |  | 
|  | 168 | for (i = 0; i < fsinfo->nlayouttypes; i++) { | 
|  | 169 | id = fsinfo->layouttype[i]; | 
|  | 170 | ld_type = find_pnfs_driver(id); | 
|  | 171 | if (!ld_type) { | 
|  | 172 | request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, | 
|  | 173 | id); | 
|  | 174 | ld_type = find_pnfs_driver(id); | 
|  | 175 | } | 
|  | 176 | if (ld_type) | 
|  | 177 | break; | 
|  | 178 | } | 
|  | 179 |  | 
|  | 180 | if (!ld_type) { | 
|  | 181 | dprintk("%s: No pNFS module found!\n", __func__); | 
|  | 182 | goto out_no_driver; | 
|  | 183 | } | 
|  | 184 |  | 
|  | 185 | server->pnfs_curr_ld = ld_type; | 
|  | 186 | if (ld_type->set_layoutdriver | 
|  | 187 | && ld_type->set_layoutdriver(server, mntfh)) { | 
|  | 188 | printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " | 
|  | 189 | "driver %u.\n", __func__, id); | 
|  | 190 | module_put(ld_type->owner); | 
|  | 191 | goto out_no_driver; | 
|  | 192 | } | 
|  | 193 | /* Bump the MDS count */ | 
|  | 194 | atomic_inc(&server->nfs_client->cl_mds_count); | 
|  | 195 |  | 
|  | 196 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 
|  | 197 | return; | 
|  | 198 |  | 
|  | 199 | out_no_driver: | 
|  | 200 | dprintk("%s: Using NFSv4 I/O\n", __func__); | 
|  | 201 | server->pnfs_curr_ld = NULL; | 
|  | 202 | } | 
|  | 203 |  | 
|  | 204 | int | 
|  | 205 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | 
|  | 206 | { | 
|  | 207 | int status = -EINVAL; | 
|  | 208 | struct pnfs_layoutdriver_type *tmp; | 
|  | 209 |  | 
|  | 210 | if (ld_type->id == 0) { | 
|  | 211 | printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); | 
|  | 212 | return status; | 
|  | 213 | } | 
|  | 214 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | 
|  | 215 | printk(KERN_ERR "NFS: %s Layout driver must provide " | 
|  | 216 | "alloc_lseg and free_lseg.\n", __func__); | 
|  | 217 | return status; | 
|  | 218 | } | 
|  | 219 |  | 
|  | 220 | spin_lock(&pnfs_spinlock); | 
|  | 221 | tmp = find_pnfs_driver_locked(ld_type->id); | 
|  | 222 | if (!tmp) { | 
|  | 223 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | 
|  | 224 | status = 0; | 
|  | 225 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | 
|  | 226 | ld_type->name); | 
|  | 227 | } else { | 
|  | 228 | printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", | 
|  | 229 | __func__, ld_type->id); | 
|  | 230 | } | 
|  | 231 | spin_unlock(&pnfs_spinlock); | 
|  | 232 |  | 
|  | 233 | return status; | 
|  | 234 | } | 
|  | 235 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | 
|  | 236 |  | 
|  | 237 | void | 
|  | 238 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | 
|  | 239 | { | 
|  | 240 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | 
|  | 241 | spin_lock(&pnfs_spinlock); | 
|  | 242 | list_del(&ld_type->pnfs_tblid); | 
|  | 243 | spin_unlock(&pnfs_spinlock); | 
|  | 244 | } | 
|  | 245 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | 
|  | 246 |  | 
|  | 247 | /* | 
|  | 248 | * pNFS client layout cache | 
|  | 249 | */ | 
|  | 250 |  | 
|  | 251 | /* Need to hold i_lock if caller does not already hold reference */ | 
|  | 252 | void | 
|  | 253 | pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 254 | { | 
|  | 255 | refcount_inc(&lo->plh_refcount); | 
|  | 256 | } | 
|  | 257 |  | 
|  | 258 | static struct pnfs_layout_hdr * | 
|  | 259 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) | 
|  | 260 | { | 
|  | 261 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 
|  | 262 | return ld->alloc_layout_hdr(ino, gfp_flags); | 
|  | 263 | } | 
|  | 264 |  | 
|  | 265 | static void | 
|  | 266 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 267 | { | 
|  | 268 | struct nfs_server *server = NFS_SERVER(lo->plh_inode); | 
|  | 269 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | 
|  | 270 |  | 
|  | 271 | if (!list_empty(&lo->plh_layouts)) { | 
|  | 272 | struct nfs_client *clp = server->nfs_client; | 
|  | 273 |  | 
|  | 274 | spin_lock(&clp->cl_lock); | 
|  | 275 | list_del_init(&lo->plh_layouts); | 
|  | 276 | spin_unlock(&clp->cl_lock); | 
|  | 277 | } | 
|  | 278 | put_rpccred(lo->plh_lc_cred); | 
|  | 279 | return ld->free_layout_hdr(lo); | 
|  | 280 | } | 
|  | 281 |  | 
|  | 282 | static void | 
|  | 283 | pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 284 | { | 
|  | 285 | struct nfs_inode *nfsi = NFS_I(lo->plh_inode); | 
|  | 286 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | 
|  | 287 | nfsi->layout = NULL; | 
|  | 288 | /* Reset MDS Threshold I/O counters */ | 
|  | 289 | nfsi->write_io = 0; | 
|  | 290 | nfsi->read_io = 0; | 
|  | 291 | } | 
|  | 292 |  | 
|  | 293 | void | 
|  | 294 | pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 295 | { | 
|  | 296 | struct inode *inode; | 
|  | 297 |  | 
|  | 298 | if (!lo) | 
|  | 299 | return; | 
|  | 300 | inode = lo->plh_inode; | 
|  | 301 | pnfs_layoutreturn_before_put_layout_hdr(lo); | 
|  | 302 |  | 
|  | 303 | if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 
|  | 304 | if (!list_empty(&lo->plh_segs)) | 
|  | 305 | WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); | 
|  | 306 | pnfs_detach_layout_hdr(lo); | 
|  | 307 | spin_unlock(&inode->i_lock); | 
|  | 308 | pnfs_free_layout_hdr(lo); | 
|  | 309 | } | 
|  | 310 | } | 
|  | 311 |  | 
|  | 312 | static void | 
|  | 313 | pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, | 
|  | 314 | u32 seq) | 
|  | 315 | { | 
|  | 316 | if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) | 
|  | 317 | iomode = IOMODE_ANY; | 
|  | 318 | lo->plh_return_iomode = iomode; | 
|  | 319 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | 
|  | 320 | if (seq != 0) { | 
|  | 321 | WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); | 
|  | 322 | lo->plh_return_seq = seq; | 
|  | 323 | } | 
|  | 324 | } | 
|  | 325 |  | 
|  | 326 | static void | 
|  | 327 | pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) | 
|  | 328 | { | 
|  | 329 | struct pnfs_layout_segment *lseg; | 
|  | 330 | lo->plh_return_iomode = 0; | 
|  | 331 | lo->plh_return_seq = 0; | 
|  | 332 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | 
|  | 333 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 
|  | 334 | if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | 
|  | 335 | continue; | 
|  | 336 | pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); | 
|  | 337 | } | 
|  | 338 | } | 
|  | 339 |  | 
|  | 340 | static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | 
|  | 341 | { | 
|  | 342 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | 
|  | 343 | clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); | 
|  | 344 | smp_mb__after_atomic(); | 
|  | 345 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | 
|  | 346 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | 
|  | 347 | } | 
|  | 348 |  | 
|  | 349 | static void | 
|  | 350 | pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, | 
|  | 351 | struct list_head *free_me) | 
|  | 352 | { | 
|  | 353 | clear_bit(NFS_LSEG_ROC, &lseg->pls_flags); | 
|  | 354 | clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 
|  | 355 | if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) | 
|  | 356 | pnfs_lseg_dec_and_remove_zero(lseg, free_me); | 
|  | 357 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | 
|  | 358 | pnfs_lseg_dec_and_remove_zero(lseg, free_me); | 
|  | 359 | } | 
|  | 360 |  | 
|  | 361 | /* | 
|  | 362 | * Update the seqid of a layout stateid | 
|  | 363 | */ | 
|  | 364 | bool nfs4_layoutreturn_refresh_stateid(nfs4_stateid *dst, | 
|  | 365 | struct pnfs_layout_range *dst_range, | 
|  | 366 | struct inode *inode) | 
|  | 367 | { | 
|  | 368 | struct pnfs_layout_hdr *lo; | 
|  | 369 | struct pnfs_layout_range range = { | 
|  | 370 | .iomode = IOMODE_ANY, | 
|  | 371 | .offset = 0, | 
|  | 372 | .length = NFS4_MAX_UINT64, | 
|  | 373 | }; | 
|  | 374 | bool ret = false; | 
|  | 375 | LIST_HEAD(head); | 
|  | 376 | int err; | 
|  | 377 |  | 
|  | 378 | spin_lock(&inode->i_lock); | 
|  | 379 | lo = NFS_I(inode)->layout; | 
|  | 380 | if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { | 
|  | 381 | err = pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); | 
|  | 382 | if (err != -EBUSY) { | 
|  | 383 | dst->seqid = lo->plh_stateid.seqid; | 
|  | 384 | *dst_range = range; | 
|  | 385 | ret = true; | 
|  | 386 | } | 
|  | 387 | } | 
|  | 388 | spin_unlock(&inode->i_lock); | 
|  | 389 | pnfs_free_lseg_list(&head); | 
|  | 390 | return ret; | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | /* | 
|  | 394 | * Mark a pnfs_layout_hdr and all associated layout segments as invalid | 
|  | 395 | * | 
|  | 396 | * In order to continue using the pnfs_layout_hdr, a full recovery | 
|  | 397 | * is required. | 
|  | 398 | * Note that caller must hold inode->i_lock. | 
|  | 399 | */ | 
|  | 400 | int | 
|  | 401 | pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, | 
|  | 402 | struct list_head *lseg_list) | 
|  | 403 | { | 
|  | 404 | struct pnfs_layout_range range = { | 
|  | 405 | .iomode = IOMODE_ANY, | 
|  | 406 | .offset = 0, | 
|  | 407 | .length = NFS4_MAX_UINT64, | 
|  | 408 | }; | 
|  | 409 | struct pnfs_layout_segment *lseg, *next; | 
|  | 410 |  | 
|  | 411 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 
|  | 412 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 
|  | 413 | pnfs_clear_lseg_state(lseg, lseg_list); | 
|  | 414 | pnfs_clear_layoutreturn_info(lo); | 
|  | 415 | pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); | 
|  | 416 | if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && | 
|  | 417 | !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) | 
|  | 418 | pnfs_clear_layoutreturn_waitbit(lo); | 
|  | 419 | return !list_empty(&lo->plh_segs); | 
|  | 420 | } | 
|  | 421 |  | 
|  | 422 | static int | 
|  | 423 | pnfs_iomode_to_fail_bit(u32 iomode) | 
|  | 424 | { | 
|  | 425 | return iomode == IOMODE_RW ? | 
|  | 426 | NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; | 
|  | 427 | } | 
|  | 428 |  | 
|  | 429 | static void | 
|  | 430 | pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) | 
|  | 431 | { | 
|  | 432 | lo->plh_retry_timestamp = jiffies; | 
|  | 433 | if (!test_and_set_bit(fail_bit, &lo->plh_flags)) | 
|  | 434 | refcount_inc(&lo->plh_refcount); | 
|  | 435 | } | 
|  | 436 |  | 
|  | 437 | static void | 
|  | 438 | pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) | 
|  | 439 | { | 
|  | 440 | if (test_and_clear_bit(fail_bit, &lo->plh_flags)) | 
|  | 441 | refcount_dec(&lo->plh_refcount); | 
|  | 442 | } | 
|  | 443 |  | 
|  | 444 | static void | 
|  | 445 | pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) | 
|  | 446 | { | 
|  | 447 | struct inode *inode = lo->plh_inode; | 
|  | 448 | struct pnfs_layout_range range = { | 
|  | 449 | .iomode = iomode, | 
|  | 450 | .offset = 0, | 
|  | 451 | .length = NFS4_MAX_UINT64, | 
|  | 452 | }; | 
|  | 453 | LIST_HEAD(head); | 
|  | 454 |  | 
|  | 455 | spin_lock(&inode->i_lock); | 
|  | 456 | pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | 
|  | 457 | pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); | 
|  | 458 | spin_unlock(&inode->i_lock); | 
|  | 459 | pnfs_free_lseg_list(&head); | 
|  | 460 | dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, | 
|  | 461 | iomode == IOMODE_RW ?  "RW" : "READ"); | 
|  | 462 | } | 
|  | 463 |  | 
|  | 464 | static bool | 
|  | 465 | pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) | 
|  | 466 | { | 
|  | 467 | unsigned long start, end; | 
|  | 468 | int fail_bit = pnfs_iomode_to_fail_bit(iomode); | 
|  | 469 |  | 
|  | 470 | if (test_bit(fail_bit, &lo->plh_flags) == 0) | 
|  | 471 | return false; | 
|  | 472 | end = jiffies; | 
|  | 473 | start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; | 
|  | 474 | if (!time_in_range(lo->plh_retry_timestamp, start, end)) { | 
|  | 475 | /* It is time to retry the failed layoutgets */ | 
|  | 476 | pnfs_layout_clear_fail_bit(lo, fail_bit); | 
|  | 477 | return false; | 
|  | 478 | } | 
|  | 479 | return true; | 
|  | 480 | } | 
|  | 481 |  | 
|  | 482 | static void | 
|  | 483 | pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, | 
|  | 484 | const struct pnfs_layout_range *range, | 
|  | 485 | const nfs4_stateid *stateid) | 
|  | 486 | { | 
|  | 487 | INIT_LIST_HEAD(&lseg->pls_list); | 
|  | 488 | INIT_LIST_HEAD(&lseg->pls_lc_list); | 
|  | 489 | refcount_set(&lseg->pls_refcount, 1); | 
|  | 490 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); | 
|  | 491 | lseg->pls_layout = lo; | 
|  | 492 | lseg->pls_range = *range; | 
|  | 493 | lseg->pls_seq = be32_to_cpu(stateid->seqid); | 
|  | 494 | } | 
|  | 495 |  | 
|  | 496 | static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) | 
|  | 497 | { | 
|  | 498 | if (lseg != NULL) { | 
|  | 499 | struct inode *inode = lseg->pls_layout->plh_inode; | 
|  | 500 | NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg); | 
|  | 501 | } | 
|  | 502 | } | 
|  | 503 |  | 
|  | 504 | static void | 
|  | 505 | pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | 
|  | 506 | struct pnfs_layout_segment *lseg) | 
|  | 507 | { | 
|  | 508 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 
|  | 509 | list_del_init(&lseg->pls_list); | 
|  | 510 | /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ | 
|  | 511 | refcount_dec(&lo->plh_refcount); | 
|  | 512 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | 
|  | 513 | return; | 
|  | 514 | if (list_empty(&lo->plh_segs) && | 
|  | 515 | !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && | 
|  | 516 | !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | 
|  | 517 | if (atomic_read(&lo->plh_outstanding) == 0) | 
|  | 518 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 
|  | 519 | clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 
|  | 520 | } | 
|  | 521 | } | 
|  | 522 |  | 
|  | 523 | static bool | 
|  | 524 | pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, | 
|  | 525 | struct pnfs_layout_segment *lseg) | 
|  | 526 | { | 
|  | 527 | if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && | 
|  | 528 | pnfs_layout_is_valid(lo)) { | 
|  | 529 | pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); | 
|  | 530 | list_move_tail(&lseg->pls_list, &lo->plh_return_segs); | 
|  | 531 | return true; | 
|  | 532 | } | 
|  | 533 | return false; | 
|  | 534 | } | 
|  | 535 |  | 
|  | 536 | void | 
|  | 537 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) | 
|  | 538 | { | 
|  | 539 | struct pnfs_layout_hdr *lo; | 
|  | 540 | struct inode *inode; | 
|  | 541 |  | 
|  | 542 | if (!lseg) | 
|  | 543 | return; | 
|  | 544 |  | 
|  | 545 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | 
|  | 546 | refcount_read(&lseg->pls_refcount), | 
|  | 547 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 
|  | 548 |  | 
|  | 549 | lo = lseg->pls_layout; | 
|  | 550 | inode = lo->plh_inode; | 
|  | 551 |  | 
|  | 552 | if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 
|  | 553 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { | 
|  | 554 | spin_unlock(&inode->i_lock); | 
|  | 555 | return; | 
|  | 556 | } | 
|  | 557 | pnfs_get_layout_hdr(lo); | 
|  | 558 | pnfs_layout_remove_lseg(lo, lseg); | 
|  | 559 | if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) | 
|  | 560 | lseg = NULL; | 
|  | 561 | spin_unlock(&inode->i_lock); | 
|  | 562 | pnfs_free_lseg(lseg); | 
|  | 563 | pnfs_put_layout_hdr(lo); | 
|  | 564 | } | 
|  | 565 | } | 
|  | 566 | EXPORT_SYMBOL_GPL(pnfs_put_lseg); | 
|  | 567 |  | 
|  | 568 | /* | 
|  | 569 | * is l2 fully contained in l1? | 
|  | 570 | *   start1                             end1 | 
|  | 571 | *   [----------------------------------) | 
|  | 572 | *           start2           end2 | 
|  | 573 | *           [----------------) | 
|  | 574 | */ | 
|  | 575 | static bool | 
|  | 576 | pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, | 
|  | 577 | const struct pnfs_layout_range *l2) | 
|  | 578 | { | 
|  | 579 | u64 start1 = l1->offset; | 
|  | 580 | u64 end1 = pnfs_end_offset(start1, l1->length); | 
|  | 581 | u64 start2 = l2->offset; | 
|  | 582 | u64 end2 = pnfs_end_offset(start2, l2->length); | 
|  | 583 |  | 
|  | 584 | return (start1 <= start2) && (end1 >= end2); | 
|  | 585 | } | 
|  | 586 |  | 
|  | 587 | static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, | 
|  | 588 | struct list_head *tmp_list) | 
|  | 589 | { | 
|  | 590 | if (!refcount_dec_and_test(&lseg->pls_refcount)) | 
|  | 591 | return false; | 
|  | 592 | pnfs_layout_remove_lseg(lseg->pls_layout, lseg); | 
|  | 593 | list_add(&lseg->pls_list, tmp_list); | 
|  | 594 | return true; | 
|  | 595 | } | 
|  | 596 |  | 
|  | 597 | /* Returns 1 if lseg is removed from list, 0 otherwise */ | 
|  | 598 | static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | 
|  | 599 | struct list_head *tmp_list) | 
|  | 600 | { | 
|  | 601 | int rv = 0; | 
|  | 602 |  | 
|  | 603 | if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { | 
|  | 604 | /* Remove the reference keeping the lseg in the | 
|  | 605 | * list.  It will now be removed when all | 
|  | 606 | * outstanding io is finished. | 
|  | 607 | */ | 
|  | 608 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | 
|  | 609 | refcount_read(&lseg->pls_refcount)); | 
|  | 610 | if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) | 
|  | 611 | rv = 1; | 
|  | 612 | } | 
|  | 613 | return rv; | 
|  | 614 | } | 
|  | 615 |  | 
|  | 616 | /* | 
|  | 617 | * Compare 2 layout stateid sequence ids, to see which is newer, | 
|  | 618 | * taking into account wraparound issues. | 
|  | 619 | */ | 
|  | 620 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | 
|  | 621 | { | 
|  | 622 | return (s32)(s1 - s2) > 0; | 
|  | 623 | } | 
|  | 624 |  | 
|  | 625 | static bool | 
|  | 626 | pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, | 
|  | 627 | const struct pnfs_layout_range *recall_range) | 
|  | 628 | { | 
|  | 629 | return (recall_range->iomode == IOMODE_ANY || | 
|  | 630 | lseg_range->iomode == recall_range->iomode) && | 
|  | 631 | pnfs_lseg_range_intersecting(lseg_range, recall_range); | 
|  | 632 | } | 
|  | 633 |  | 
|  | 634 | static bool | 
|  | 635 | pnfs_match_lseg_recall(const struct pnfs_layout_segment *lseg, | 
|  | 636 | const struct pnfs_layout_range *recall_range, | 
|  | 637 | u32 seq) | 
|  | 638 | { | 
|  | 639 | if (seq != 0 && pnfs_seqid_is_newer(lseg->pls_seq, seq)) | 
|  | 640 | return false; | 
|  | 641 | if (recall_range == NULL) | 
|  | 642 | return true; | 
|  | 643 | return pnfs_should_free_range(&lseg->pls_range, recall_range); | 
|  | 644 | } | 
|  | 645 |  | 
|  | 646 | /** | 
|  | 647 | * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later | 
|  | 648 | * @lo: layout header containing the lsegs | 
|  | 649 | * @tmp_list: list head where doomed lsegs should go | 
|  | 650 | * @recall_range: optional recall range argument to match (may be NULL) | 
|  | 651 | * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) | 
|  | 652 | * | 
|  | 653 | * Walk the list of lsegs in the layout header, and tear down any that should | 
|  | 654 | * be destroyed. If "recall_range" is specified then the segment must match | 
|  | 655 | * that range. If "seq" is non-zero, then only match segments that were handed | 
|  | 656 | * out at or before that sequence. | 
|  | 657 | * | 
|  | 658 | * Returns number of matching invalid lsegs remaining in list after scanning | 
|  | 659 | * it and purging them. | 
|  | 660 | */ | 
|  | 661 | int | 
|  | 662 | pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 
|  | 663 | struct list_head *tmp_list, | 
|  | 664 | const struct pnfs_layout_range *recall_range, | 
|  | 665 | u32 seq) | 
|  | 666 | { | 
|  | 667 | struct pnfs_layout_segment *lseg, *next; | 
|  | 668 | int remaining = 0; | 
|  | 669 |  | 
|  | 670 | dprintk("%s:Begin lo %p\n", __func__, lo); | 
|  | 671 |  | 
|  | 672 | if (list_empty(&lo->plh_segs)) | 
|  | 673 | return 0; | 
|  | 674 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 
|  | 675 | if (pnfs_match_lseg_recall(lseg, recall_range, seq)) { | 
|  | 676 | dprintk("%s: freeing lseg %p iomode %d seq %u " | 
|  | 677 | "offset %llu length %llu\n", __func__, | 
|  | 678 | lseg, lseg->pls_range.iomode, lseg->pls_seq, | 
|  | 679 | lseg->pls_range.offset, lseg->pls_range.length); | 
|  | 680 | if (!mark_lseg_invalid(lseg, tmp_list)) | 
|  | 681 | remaining++; | 
|  | 682 | } | 
|  | 683 | dprintk("%s:Return %i\n", __func__, remaining); | 
|  | 684 | return remaining; | 
|  | 685 | } | 
|  | 686 |  | 
|  | 687 | static void | 
|  | 688 | pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, | 
|  | 689 | struct list_head *free_me, | 
|  | 690 | const struct pnfs_layout_range *range, | 
|  | 691 | u32 seq) | 
|  | 692 | { | 
|  | 693 | struct pnfs_layout_segment *lseg, *next; | 
|  | 694 |  | 
|  | 695 | list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) { | 
|  | 696 | if (pnfs_match_lseg_recall(lseg, range, seq)) | 
|  | 697 | list_move_tail(&lseg->pls_list, free_me); | 
|  | 698 | } | 
|  | 699 | } | 
|  | 700 |  | 
|  | 701 | /* note free_me must contain lsegs from a single layout_hdr */ | 
|  | 702 | void | 
|  | 703 | pnfs_free_lseg_list(struct list_head *free_me) | 
|  | 704 | { | 
|  | 705 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 706 |  | 
|  | 707 | if (list_empty(free_me)) | 
|  | 708 | return; | 
|  | 709 |  | 
|  | 710 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { | 
|  | 711 | list_del(&lseg->pls_list); | 
|  | 712 | pnfs_free_lseg(lseg); | 
|  | 713 | } | 
|  | 714 | } | 
|  | 715 |  | 
|  | 716 | void | 
|  | 717 | pnfs_destroy_layout(struct nfs_inode *nfsi) | 
|  | 718 | { | 
|  | 719 | struct pnfs_layout_hdr *lo; | 
|  | 720 | LIST_HEAD(tmp_list); | 
|  | 721 |  | 
|  | 722 | spin_lock(&nfsi->vfs_inode.i_lock); | 
|  | 723 | lo = nfsi->layout; | 
|  | 724 | if (lo) { | 
|  | 725 | pnfs_get_layout_hdr(lo); | 
|  | 726 | pnfs_mark_layout_stateid_invalid(lo, &tmp_list); | 
|  | 727 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 
|  | 728 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 
|  | 729 | spin_unlock(&nfsi->vfs_inode.i_lock); | 
|  | 730 | pnfs_free_lseg_list(&tmp_list); | 
|  | 731 | nfs_commit_inode(&nfsi->vfs_inode, 0); | 
|  | 732 | pnfs_put_layout_hdr(lo); | 
|  | 733 | } else | 
|  | 734 | spin_unlock(&nfsi->vfs_inode.i_lock); | 
|  | 735 | } | 
|  | 736 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); | 
|  | 737 |  | 
|  | 738 | static bool | 
|  | 739 | pnfs_layout_add_bulk_destroy_list(struct inode *inode, | 
|  | 740 | struct list_head *layout_list) | 
|  | 741 | { | 
|  | 742 | struct pnfs_layout_hdr *lo; | 
|  | 743 | bool ret = false; | 
|  | 744 |  | 
|  | 745 | spin_lock(&inode->i_lock); | 
|  | 746 | lo = NFS_I(inode)->layout; | 
|  | 747 | if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { | 
|  | 748 | pnfs_get_layout_hdr(lo); | 
|  | 749 | list_add(&lo->plh_bulk_destroy, layout_list); | 
|  | 750 | ret = true; | 
|  | 751 | } | 
|  | 752 | spin_unlock(&inode->i_lock); | 
|  | 753 | return ret; | 
|  | 754 | } | 
|  | 755 |  | 
|  | 756 | /* Caller must hold rcu_read_lock and clp->cl_lock */ | 
|  | 757 | static int | 
|  | 758 | pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, | 
|  | 759 | struct nfs_server *server, | 
|  | 760 | struct list_head *layout_list) | 
|  | 761 | { | 
|  | 762 | struct pnfs_layout_hdr *lo, *next; | 
|  | 763 | struct inode *inode; | 
|  | 764 |  | 
|  | 765 | list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { | 
|  | 766 | if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) | 
|  | 767 | continue; | 
|  | 768 | inode = igrab(lo->plh_inode); | 
|  | 769 | if (inode == NULL) | 
|  | 770 | continue; | 
|  | 771 | list_del_init(&lo->plh_layouts); | 
|  | 772 | if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) | 
|  | 773 | continue; | 
|  | 774 | rcu_read_unlock(); | 
|  | 775 | spin_unlock(&clp->cl_lock); | 
|  | 776 | iput(inode); | 
|  | 777 | spin_lock(&clp->cl_lock); | 
|  | 778 | rcu_read_lock(); | 
|  | 779 | return -EAGAIN; | 
|  | 780 | } | 
|  | 781 | return 0; | 
|  | 782 | } | 
|  | 783 |  | 
|  | 784 | static int | 
|  | 785 | pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | 
|  | 786 | bool is_bulk_recall) | 
|  | 787 | { | 
|  | 788 | struct pnfs_layout_hdr *lo; | 
|  | 789 | struct inode *inode; | 
|  | 790 | LIST_HEAD(lseg_list); | 
|  | 791 | int ret = 0; | 
|  | 792 |  | 
|  | 793 | while (!list_empty(layout_list)) { | 
|  | 794 | lo = list_entry(layout_list->next, struct pnfs_layout_hdr, | 
|  | 795 | plh_bulk_destroy); | 
|  | 796 | dprintk("%s freeing layout for inode %lu\n", __func__, | 
|  | 797 | lo->plh_inode->i_ino); | 
|  | 798 | inode = lo->plh_inode; | 
|  | 799 |  | 
|  | 800 | pnfs_layoutcommit_inode(inode, false); | 
|  | 801 |  | 
|  | 802 | spin_lock(&inode->i_lock); | 
|  | 803 | list_del_init(&lo->plh_bulk_destroy); | 
|  | 804 | if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { | 
|  | 805 | if (is_bulk_recall) | 
|  | 806 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 
|  | 807 | ret = -EAGAIN; | 
|  | 808 | } | 
|  | 809 | spin_unlock(&inode->i_lock); | 
|  | 810 | pnfs_free_lseg_list(&lseg_list); | 
|  | 811 | /* Free all lsegs that are attached to commit buckets */ | 
|  | 812 | nfs_commit_inode(inode, 0); | 
|  | 813 | pnfs_put_layout_hdr(lo); | 
|  | 814 | iput(inode); | 
|  | 815 | } | 
|  | 816 | return ret; | 
|  | 817 | } | 
|  | 818 |  | 
|  | 819 | int | 
|  | 820 | pnfs_destroy_layouts_byfsid(struct nfs_client *clp, | 
|  | 821 | struct nfs_fsid *fsid, | 
|  | 822 | bool is_recall) | 
|  | 823 | { | 
|  | 824 | struct nfs_server *server; | 
|  | 825 | LIST_HEAD(layout_list); | 
|  | 826 |  | 
|  | 827 | spin_lock(&clp->cl_lock); | 
|  | 828 | rcu_read_lock(); | 
|  | 829 | restart: | 
|  | 830 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 
|  | 831 | if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) | 
|  | 832 | continue; | 
|  | 833 | if (pnfs_layout_bulk_destroy_byserver_locked(clp, | 
|  | 834 | server, | 
|  | 835 | &layout_list) != 0) | 
|  | 836 | goto restart; | 
|  | 837 | } | 
|  | 838 | rcu_read_unlock(); | 
|  | 839 | spin_unlock(&clp->cl_lock); | 
|  | 840 |  | 
|  | 841 | if (list_empty(&layout_list)) | 
|  | 842 | return 0; | 
|  | 843 | return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); | 
|  | 844 | } | 
|  | 845 |  | 
|  | 846 | int | 
|  | 847 | pnfs_destroy_layouts_byclid(struct nfs_client *clp, | 
|  | 848 | bool is_recall) | 
|  | 849 | { | 
|  | 850 | struct nfs_server *server; | 
|  | 851 | LIST_HEAD(layout_list); | 
|  | 852 |  | 
|  | 853 | spin_lock(&clp->cl_lock); | 
|  | 854 | rcu_read_lock(); | 
|  | 855 | restart: | 
|  | 856 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 
|  | 857 | if (pnfs_layout_bulk_destroy_byserver_locked(clp, | 
|  | 858 | server, | 
|  | 859 | &layout_list) != 0) | 
|  | 860 | goto restart; | 
|  | 861 | } | 
|  | 862 | rcu_read_unlock(); | 
|  | 863 | spin_unlock(&clp->cl_lock); | 
|  | 864 |  | 
|  | 865 | if (list_empty(&layout_list)) | 
|  | 866 | return 0; | 
|  | 867 | return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); | 
|  | 868 | } | 
|  | 869 |  | 
|  | 870 | /* | 
|  | 871 | * Called by the state manger to remove all layouts established under an | 
|  | 872 | * expired lease. | 
|  | 873 | */ | 
|  | 874 | void | 
|  | 875 | pnfs_destroy_all_layouts(struct nfs_client *clp) | 
|  | 876 | { | 
|  | 877 | nfs4_deviceid_mark_client_invalid(clp); | 
|  | 878 | nfs4_deviceid_purge_client(clp); | 
|  | 879 |  | 
|  | 880 | pnfs_destroy_layouts_byclid(clp, false); | 
|  | 881 | } | 
|  | 882 |  | 
|  | 883 | /* update lo->plh_stateid with new if is more recent */ | 
|  | 884 | void | 
|  | 885 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 
|  | 886 | bool update_barrier) | 
|  | 887 | { | 
|  | 888 | u32 oldseq, newseq, new_barrier = 0; | 
|  | 889 |  | 
|  | 890 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); | 
|  | 891 | newseq = be32_to_cpu(new->seqid); | 
|  | 892 |  | 
|  | 893 | if (!pnfs_layout_is_valid(lo)) { | 
|  | 894 | nfs4_stateid_copy(&lo->plh_stateid, new); | 
|  | 895 | lo->plh_barrier = newseq; | 
|  | 896 | pnfs_clear_layoutreturn_info(lo); | 
|  | 897 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | 
|  | 898 | return; | 
|  | 899 | } | 
|  | 900 | if (pnfs_seqid_is_newer(newseq, oldseq)) { | 
|  | 901 | nfs4_stateid_copy(&lo->plh_stateid, new); | 
|  | 902 | /* | 
|  | 903 | * Because of wraparound, we want to keep the barrier | 
|  | 904 | * "close" to the current seqids. | 
|  | 905 | */ | 
|  | 906 | new_barrier = newseq - atomic_read(&lo->plh_outstanding); | 
|  | 907 | } | 
|  | 908 | if (update_barrier) | 
|  | 909 | new_barrier = be32_to_cpu(new->seqid); | 
|  | 910 | else if (new_barrier == 0) | 
|  | 911 | return; | 
|  | 912 | if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) | 
|  | 913 | lo->plh_barrier = new_barrier; | 
|  | 914 | } | 
|  | 915 |  | 
|  | 916 | static bool | 
|  | 917 | pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, | 
|  | 918 | const nfs4_stateid *stateid) | 
|  | 919 | { | 
|  | 920 | u32 seqid = be32_to_cpu(stateid->seqid); | 
|  | 921 |  | 
|  | 922 | return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); | 
|  | 923 | } | 
|  | 924 |  | 
|  | 925 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 
|  | 926 | static bool | 
|  | 927 | pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) | 
|  | 928 | { | 
|  | 929 | return lo->plh_block_lgets || | 
|  | 930 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 
|  | 931 | } | 
|  | 932 |  | 
|  | 933 | static struct nfs_server * | 
|  | 934 | pnfs_find_server(struct inode *inode, struct nfs_open_context *ctx) | 
|  | 935 | { | 
|  | 936 | struct nfs_server *server; | 
|  | 937 |  | 
|  | 938 | if (inode) { | 
|  | 939 | server = NFS_SERVER(inode); | 
|  | 940 | } else { | 
|  | 941 | struct dentry *parent_dir = dget_parent(ctx->dentry); | 
|  | 942 | server = NFS_SERVER(parent_dir->d_inode); | 
|  | 943 | dput(parent_dir); | 
|  | 944 | } | 
|  | 945 | return server; | 
|  | 946 | } | 
|  | 947 |  | 
|  | 948 | static void nfs4_free_pages(struct page **pages, size_t size) | 
|  | 949 | { | 
|  | 950 | int i; | 
|  | 951 |  | 
|  | 952 | if (!pages) | 
|  | 953 | return; | 
|  | 954 |  | 
|  | 955 | for (i = 0; i < size; i++) { | 
|  | 956 | if (!pages[i]) | 
|  | 957 | break; | 
|  | 958 | __free_page(pages[i]); | 
|  | 959 | } | 
|  | 960 | kfree(pages); | 
|  | 961 | } | 
|  | 962 |  | 
|  | 963 | static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) | 
|  | 964 | { | 
|  | 965 | struct page **pages; | 
|  | 966 | int i; | 
|  | 967 |  | 
|  | 968 | pages = kcalloc(size, sizeof(struct page *), gfp_flags); | 
|  | 969 | if (!pages) { | 
|  | 970 | dprintk("%s: can't alloc array of %zu pages\n", __func__, size); | 
|  | 971 | return NULL; | 
|  | 972 | } | 
|  | 973 |  | 
|  | 974 | for (i = 0; i < size; i++) { | 
|  | 975 | pages[i] = alloc_page(gfp_flags); | 
|  | 976 | if (!pages[i]) { | 
|  | 977 | dprintk("%s: failed to allocate page\n", __func__); | 
|  | 978 | nfs4_free_pages(pages, size); | 
|  | 979 | return NULL; | 
|  | 980 | } | 
|  | 981 | } | 
|  | 982 |  | 
|  | 983 | return pages; | 
|  | 984 | } | 
|  | 985 |  | 
|  | 986 | static struct nfs4_layoutget * | 
|  | 987 | pnfs_alloc_init_layoutget_args(struct inode *ino, | 
|  | 988 | struct nfs_open_context *ctx, | 
|  | 989 | const nfs4_stateid *stateid, | 
|  | 990 | const struct pnfs_layout_range *range, | 
|  | 991 | gfp_t gfp_flags) | 
|  | 992 | { | 
|  | 993 | struct nfs_server *server = pnfs_find_server(ino, ctx); | 
|  | 994 | size_t max_pages = max_response_pages(server); | 
|  | 995 | struct nfs4_layoutget *lgp; | 
|  | 996 |  | 
|  | 997 | dprintk("--> %s\n", __func__); | 
|  | 998 |  | 
|  | 999 | lgp = kzalloc(sizeof(*lgp), gfp_flags); | 
|  | 1000 | if (lgp == NULL) | 
|  | 1001 | return NULL; | 
|  | 1002 |  | 
|  | 1003 | lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); | 
|  | 1004 | if (!lgp->args.layout.pages) { | 
|  | 1005 | kfree(lgp); | 
|  | 1006 | return NULL; | 
|  | 1007 | } | 
|  | 1008 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | 
|  | 1009 | lgp->res.layoutp = &lgp->args.layout; | 
|  | 1010 |  | 
|  | 1011 | /* Don't confuse uninitialised result and success */ | 
|  | 1012 | lgp->res.status = -NFS4ERR_DELAY; | 
|  | 1013 |  | 
|  | 1014 | lgp->args.minlength = PAGE_SIZE; | 
|  | 1015 | if (lgp->args.minlength > range->length) | 
|  | 1016 | lgp->args.minlength = range->length; | 
|  | 1017 | if (ino) { | 
|  | 1018 | loff_t i_size = i_size_read(ino); | 
|  | 1019 |  | 
|  | 1020 | if (range->iomode == IOMODE_READ) { | 
|  | 1021 | if (range->offset >= i_size) | 
|  | 1022 | lgp->args.minlength = 0; | 
|  | 1023 | else if (i_size - range->offset < lgp->args.minlength) | 
|  | 1024 | lgp->args.minlength = i_size - range->offset; | 
|  | 1025 | } | 
|  | 1026 | } | 
|  | 1027 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 
|  | 1028 | pnfs_copy_range(&lgp->args.range, range); | 
|  | 1029 | lgp->args.type = server->pnfs_curr_ld->id; | 
|  | 1030 | lgp->args.inode = ino; | 
|  | 1031 | lgp->args.ctx = get_nfs_open_context(ctx); | 
|  | 1032 | nfs4_stateid_copy(&lgp->args.stateid, stateid); | 
|  | 1033 | lgp->gfp_flags = gfp_flags; | 
|  | 1034 | lgp->cred = get_rpccred(ctx->cred); | 
|  | 1035 | return lgp; | 
|  | 1036 | } | 
|  | 1037 |  | 
|  | 1038 | void pnfs_layoutget_free(struct nfs4_layoutget *lgp) | 
|  | 1039 | { | 
|  | 1040 | size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE; | 
|  | 1041 |  | 
|  | 1042 | nfs4_free_pages(lgp->args.layout.pages, max_pages); | 
|  | 1043 | if (lgp->args.inode) | 
|  | 1044 | pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout); | 
|  | 1045 | put_rpccred(lgp->cred); | 
|  | 1046 | put_nfs_open_context(lgp->args.ctx); | 
|  | 1047 | kfree(lgp); | 
|  | 1048 | } | 
|  | 1049 |  | 
|  | 1050 | static void pnfs_clear_layoutcommit(struct inode *inode, | 
|  | 1051 | struct list_head *head) | 
|  | 1052 | { | 
|  | 1053 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 1054 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 1055 |  | 
|  | 1056 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | 
|  | 1057 | return; | 
|  | 1058 | list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { | 
|  | 1059 | if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | 
|  | 1060 | continue; | 
|  | 1061 | pnfs_lseg_dec_and_remove_zero(lseg, head); | 
|  | 1062 | } | 
|  | 1063 | } | 
|  | 1064 |  | 
|  | 1065 | void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, | 
|  | 1066 | const nfs4_stateid *arg_stateid, | 
|  | 1067 | const struct pnfs_layout_range *range, | 
|  | 1068 | const nfs4_stateid *stateid) | 
|  | 1069 | { | 
|  | 1070 | struct inode *inode = lo->plh_inode; | 
|  | 1071 | LIST_HEAD(freeme); | 
|  | 1072 |  | 
|  | 1073 | spin_lock(&inode->i_lock); | 
|  | 1074 | if (!pnfs_layout_is_valid(lo) || !arg_stateid || | 
|  | 1075 | !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) | 
|  | 1076 | goto out_unlock; | 
|  | 1077 | if (stateid) { | 
|  | 1078 | u32 seq = be32_to_cpu(arg_stateid->seqid); | 
|  | 1079 |  | 
|  | 1080 | pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); | 
|  | 1081 | pnfs_free_returned_lsegs(lo, &freeme, range, seq); | 
|  | 1082 | pnfs_set_layout_stateid(lo, stateid, true); | 
|  | 1083 | } else | 
|  | 1084 | pnfs_mark_layout_stateid_invalid(lo, &freeme); | 
|  | 1085 | out_unlock: | 
|  | 1086 | pnfs_clear_layoutreturn_waitbit(lo); | 
|  | 1087 | spin_unlock(&inode->i_lock); | 
|  | 1088 | pnfs_free_lseg_list(&freeme); | 
|  | 1089 |  | 
|  | 1090 | } | 
|  | 1091 |  | 
|  | 1092 | static bool | 
|  | 1093 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, | 
|  | 1094 | nfs4_stateid *stateid, | 
|  | 1095 | enum pnfs_iomode *iomode) | 
|  | 1096 | { | 
|  | 1097 | /* Serialise LAYOUTGET/LAYOUTRETURN */ | 
|  | 1098 | if (atomic_read(&lo->plh_outstanding) != 0) | 
|  | 1099 | return false; | 
|  | 1100 | if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) | 
|  | 1101 | return false; | 
|  | 1102 | set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); | 
|  | 1103 | pnfs_get_layout_hdr(lo); | 
|  | 1104 | if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { | 
|  | 1105 | if (stateid != NULL) { | 
|  | 1106 | nfs4_stateid_copy(stateid, &lo->plh_stateid); | 
|  | 1107 | if (lo->plh_return_seq != 0) | 
|  | 1108 | stateid->seqid = cpu_to_be32(lo->plh_return_seq); | 
|  | 1109 | } | 
|  | 1110 | if (iomode != NULL) | 
|  | 1111 | *iomode = lo->plh_return_iomode; | 
|  | 1112 | pnfs_clear_layoutreturn_info(lo); | 
|  | 1113 | return true; | 
|  | 1114 | } | 
|  | 1115 | if (stateid != NULL) | 
|  | 1116 | nfs4_stateid_copy(stateid, &lo->plh_stateid); | 
|  | 1117 | if (iomode != NULL) | 
|  | 1118 | *iomode = IOMODE_ANY; | 
|  | 1119 | return true; | 
|  | 1120 | } | 
|  | 1121 |  | 
|  | 1122 | static void | 
|  | 1123 | pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args, | 
|  | 1124 | struct pnfs_layout_hdr *lo, | 
|  | 1125 | const nfs4_stateid *stateid, | 
|  | 1126 | enum pnfs_iomode iomode) | 
|  | 1127 | { | 
|  | 1128 | struct inode *inode = lo->plh_inode; | 
|  | 1129 |  | 
|  | 1130 | args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id; | 
|  | 1131 | args->inode = inode; | 
|  | 1132 | args->range.iomode = iomode; | 
|  | 1133 | args->range.offset = 0; | 
|  | 1134 | args->range.length = NFS4_MAX_UINT64; | 
|  | 1135 | args->layout = lo; | 
|  | 1136 | nfs4_stateid_copy(&args->stateid, stateid); | 
|  | 1137 | } | 
|  | 1138 |  | 
|  | 1139 | static int | 
|  | 1140 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, | 
|  | 1141 | enum pnfs_iomode iomode, bool sync) | 
|  | 1142 | { | 
|  | 1143 | struct inode *ino = lo->plh_inode; | 
|  | 1144 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 
|  | 1145 | struct nfs4_layoutreturn *lrp; | 
|  | 1146 | int status = 0; | 
|  | 1147 |  | 
|  | 1148 | lrp = kzalloc(sizeof(*lrp), GFP_NOFS); | 
|  | 1149 | if (unlikely(lrp == NULL)) { | 
|  | 1150 | status = -ENOMEM; | 
|  | 1151 | spin_lock(&ino->i_lock); | 
|  | 1152 | pnfs_clear_layoutreturn_waitbit(lo); | 
|  | 1153 | spin_unlock(&ino->i_lock); | 
|  | 1154 | pnfs_put_layout_hdr(lo); | 
|  | 1155 | goto out; | 
|  | 1156 | } | 
|  | 1157 |  | 
|  | 1158 | pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); | 
|  | 1159 | lrp->args.ld_private = &lrp->ld_private; | 
|  | 1160 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 
|  | 1161 | lrp->cred = lo->plh_lc_cred; | 
|  | 1162 | if (ld->prepare_layoutreturn) | 
|  | 1163 | ld->prepare_layoutreturn(&lrp->args); | 
|  | 1164 |  | 
|  | 1165 | status = nfs4_proc_layoutreturn(lrp, sync); | 
|  | 1166 | out: | 
|  | 1167 | dprintk("<-- %s status: %d\n", __func__, status); | 
|  | 1168 | return status; | 
|  | 1169 | } | 
|  | 1170 |  | 
|  | 1171 | /* Return true if layoutreturn is needed */ | 
|  | 1172 | static bool | 
|  | 1173 | pnfs_layout_need_return(struct pnfs_layout_hdr *lo) | 
|  | 1174 | { | 
|  | 1175 | struct pnfs_layout_segment *s; | 
|  | 1176 | enum pnfs_iomode iomode; | 
|  | 1177 | u32 seq; | 
|  | 1178 |  | 
|  | 1179 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) | 
|  | 1180 | return false; | 
|  | 1181 |  | 
|  | 1182 | seq = lo->plh_return_seq; | 
|  | 1183 | iomode = lo->plh_return_iomode; | 
|  | 1184 |  | 
|  | 1185 | /* Defer layoutreturn until all recalled lsegs are done */ | 
|  | 1186 | list_for_each_entry(s, &lo->plh_segs, pls_list) { | 
|  | 1187 | if (seq && pnfs_seqid_is_newer(s->pls_seq, seq)) | 
|  | 1188 | continue; | 
|  | 1189 | if (iomode != IOMODE_ANY && s->pls_range.iomode != iomode) | 
|  | 1190 | continue; | 
|  | 1191 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | 
|  | 1192 | return false; | 
|  | 1193 | } | 
|  | 1194 |  | 
|  | 1195 | return true; | 
|  | 1196 | } | 
|  | 1197 |  | 
|  | 1198 | static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 1199 | { | 
|  | 1200 | struct inode *inode= lo->plh_inode; | 
|  | 1201 |  | 
|  | 1202 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) | 
|  | 1203 | return; | 
|  | 1204 | spin_lock(&inode->i_lock); | 
|  | 1205 | if (pnfs_layout_need_return(lo)) { | 
|  | 1206 | nfs4_stateid stateid; | 
|  | 1207 | enum pnfs_iomode iomode; | 
|  | 1208 | bool send; | 
|  | 1209 |  | 
|  | 1210 | send = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); | 
|  | 1211 | spin_unlock(&inode->i_lock); | 
|  | 1212 | if (send) { | 
|  | 1213 | /* Send an async layoutreturn so we dont deadlock */ | 
|  | 1214 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); | 
|  | 1215 | } | 
|  | 1216 | } else | 
|  | 1217 | spin_unlock(&inode->i_lock); | 
|  | 1218 | } | 
|  | 1219 |  | 
|  | 1220 | /* | 
|  | 1221 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | 
|  | 1222 | * when the layout segment list is empty. | 
|  | 1223 | * | 
|  | 1224 | * Note that a pnfs_layout_hdr can exist with an empty layout segment | 
|  | 1225 | * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the | 
|  | 1226 | * deviceid is marked invalid. | 
|  | 1227 | */ | 
|  | 1228 | int | 
|  | 1229 | _pnfs_return_layout(struct inode *ino) | 
|  | 1230 | { | 
|  | 1231 | struct pnfs_layout_hdr *lo = NULL; | 
|  | 1232 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 1233 | LIST_HEAD(tmp_list); | 
|  | 1234 | nfs4_stateid stateid; | 
|  | 1235 | int status = 0; | 
|  | 1236 | bool send, valid_layout; | 
|  | 1237 |  | 
|  | 1238 | dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); | 
|  | 1239 |  | 
|  | 1240 | spin_lock(&ino->i_lock); | 
|  | 1241 | lo = nfsi->layout; | 
|  | 1242 | if (!lo) { | 
|  | 1243 | spin_unlock(&ino->i_lock); | 
|  | 1244 | dprintk("NFS: %s no layout to return\n", __func__); | 
|  | 1245 | goto out; | 
|  | 1246 | } | 
|  | 1247 | /* Reference matched in nfs4_layoutreturn_release */ | 
|  | 1248 | pnfs_get_layout_hdr(lo); | 
|  | 1249 | /* Is there an outstanding layoutreturn ? */ | 
|  | 1250 | if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { | 
|  | 1251 | spin_unlock(&ino->i_lock); | 
|  | 1252 | if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, | 
|  | 1253 | TASK_UNINTERRUPTIBLE)) | 
|  | 1254 | goto out_put_layout_hdr; | 
|  | 1255 | spin_lock(&ino->i_lock); | 
|  | 1256 | } | 
|  | 1257 | valid_layout = pnfs_layout_is_valid(lo); | 
|  | 1258 | pnfs_clear_layoutcommit(ino, &tmp_list); | 
|  | 1259 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); | 
|  | 1260 |  | 
|  | 1261 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | 
|  | 1262 | struct pnfs_layout_range range = { | 
|  | 1263 | .iomode		= IOMODE_ANY, | 
|  | 1264 | .offset		= 0, | 
|  | 1265 | .length		= NFS4_MAX_UINT64, | 
|  | 1266 | }; | 
|  | 1267 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); | 
|  | 1268 | } | 
|  | 1269 |  | 
|  | 1270 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 
|  | 1271 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || | 
|  | 1272 | !valid_layout) { | 
|  | 1273 | spin_unlock(&ino->i_lock); | 
|  | 1274 | dprintk("NFS: %s no layout segments to return\n", __func__); | 
|  | 1275 | goto out_put_layout_hdr; | 
|  | 1276 | } | 
|  | 1277 |  | 
|  | 1278 | send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); | 
|  | 1279 | spin_unlock(&ino->i_lock); | 
|  | 1280 | if (send) | 
|  | 1281 | status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); | 
|  | 1282 | out_put_layout_hdr: | 
|  | 1283 | pnfs_free_lseg_list(&tmp_list); | 
|  | 1284 | pnfs_put_layout_hdr(lo); | 
|  | 1285 | out: | 
|  | 1286 | dprintk("<-- %s status: %d\n", __func__, status); | 
|  | 1287 | return status; | 
|  | 1288 | } | 
|  | 1289 |  | 
|  | 1290 | int | 
|  | 1291 | pnfs_commit_and_return_layout(struct inode *inode) | 
|  | 1292 | { | 
|  | 1293 | struct pnfs_layout_hdr *lo; | 
|  | 1294 | int ret; | 
|  | 1295 |  | 
|  | 1296 | spin_lock(&inode->i_lock); | 
|  | 1297 | lo = NFS_I(inode)->layout; | 
|  | 1298 | if (lo == NULL) { | 
|  | 1299 | spin_unlock(&inode->i_lock); | 
|  | 1300 | return 0; | 
|  | 1301 | } | 
|  | 1302 | pnfs_get_layout_hdr(lo); | 
|  | 1303 | /* Block new layoutgets and read/write to ds */ | 
|  | 1304 | lo->plh_block_lgets++; | 
|  | 1305 | spin_unlock(&inode->i_lock); | 
|  | 1306 | filemap_fdatawait(inode->i_mapping); | 
|  | 1307 | ret = pnfs_layoutcommit_inode(inode, true); | 
|  | 1308 | if (ret == 0) | 
|  | 1309 | ret = _pnfs_return_layout(inode); | 
|  | 1310 | spin_lock(&inode->i_lock); | 
|  | 1311 | lo->plh_block_lgets--; | 
|  | 1312 | spin_unlock(&inode->i_lock); | 
|  | 1313 | pnfs_put_layout_hdr(lo); | 
|  | 1314 | return ret; | 
|  | 1315 | } | 
|  | 1316 |  | 
|  | 1317 | bool pnfs_roc(struct inode *ino, | 
|  | 1318 | struct nfs4_layoutreturn_args *args, | 
|  | 1319 | struct nfs4_layoutreturn_res *res, | 
|  | 1320 | const struct rpc_cred *cred) | 
|  | 1321 | { | 
|  | 1322 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 1323 | struct nfs_open_context *ctx; | 
|  | 1324 | struct nfs4_state *state; | 
|  | 1325 | struct pnfs_layout_hdr *lo; | 
|  | 1326 | struct pnfs_layout_segment *lseg, *next; | 
|  | 1327 | nfs4_stateid stateid; | 
|  | 1328 | enum pnfs_iomode iomode = 0; | 
|  | 1329 | bool layoutreturn = false, roc = false; | 
|  | 1330 | bool skip_read = false; | 
|  | 1331 |  | 
|  | 1332 | if (!nfs_have_layout(ino)) | 
|  | 1333 | return false; | 
|  | 1334 | retry: | 
|  | 1335 | spin_lock(&ino->i_lock); | 
|  | 1336 | lo = nfsi->layout; | 
|  | 1337 | if (!lo || !pnfs_layout_is_valid(lo) || | 
|  | 1338 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 
|  | 1339 | lo = NULL; | 
|  | 1340 | goto out_noroc; | 
|  | 1341 | } | 
|  | 1342 | pnfs_get_layout_hdr(lo); | 
|  | 1343 | if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { | 
|  | 1344 | spin_unlock(&ino->i_lock); | 
|  | 1345 | wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, | 
|  | 1346 | TASK_UNINTERRUPTIBLE); | 
|  | 1347 | pnfs_put_layout_hdr(lo); | 
|  | 1348 | goto retry; | 
|  | 1349 | } | 
|  | 1350 |  | 
|  | 1351 | /* no roc if we hold a delegation */ | 
|  | 1352 | if (nfs4_check_delegation(ino, FMODE_READ)) { | 
|  | 1353 | if (nfs4_check_delegation(ino, FMODE_WRITE)) | 
|  | 1354 | goto out_noroc; | 
|  | 1355 | skip_read = true; | 
|  | 1356 | } | 
|  | 1357 |  | 
|  | 1358 | list_for_each_entry(ctx, &nfsi->open_files, list) { | 
|  | 1359 | state = ctx->state; | 
|  | 1360 | if (state == NULL) | 
|  | 1361 | continue; | 
|  | 1362 | /* Don't return layout if there is open file state */ | 
|  | 1363 | if (state->state & FMODE_WRITE) | 
|  | 1364 | goto out_noroc; | 
|  | 1365 | if (state->state & FMODE_READ) | 
|  | 1366 | skip_read = true; | 
|  | 1367 | } | 
|  | 1368 |  | 
|  | 1369 |  | 
|  | 1370 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { | 
|  | 1371 | if (skip_read && lseg->pls_range.iomode == IOMODE_READ) | 
|  | 1372 | continue; | 
|  | 1373 | /* If we are sending layoutreturn, invalidate all valid lsegs */ | 
|  | 1374 | if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) | 
|  | 1375 | continue; | 
|  | 1376 | /* | 
|  | 1377 | * Note: mark lseg for return so pnfs_layout_remove_lseg | 
|  | 1378 | * doesn't invalidate the layout for us. | 
|  | 1379 | */ | 
|  | 1380 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 
|  | 1381 | if (!mark_lseg_invalid(lseg, &lo->plh_return_segs)) | 
|  | 1382 | continue; | 
|  | 1383 | pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); | 
|  | 1384 | } | 
|  | 1385 |  | 
|  | 1386 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) | 
|  | 1387 | goto out_noroc; | 
|  | 1388 |  | 
|  | 1389 | /* ROC in two conditions: | 
|  | 1390 | * 1. there are ROC lsegs | 
|  | 1391 | * 2. we don't send layoutreturn | 
|  | 1392 | */ | 
|  | 1393 | /* lo ref dropped in pnfs_roc_release() */ | 
|  | 1394 | layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); | 
|  | 1395 | /* If the creds don't match, we can't compound the layoutreturn */ | 
|  | 1396 | if (!layoutreturn || cred != lo->plh_lc_cred) | 
|  | 1397 | goto out_noroc; | 
|  | 1398 |  | 
|  | 1399 | roc = layoutreturn; | 
|  | 1400 | pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); | 
|  | 1401 | res->lrs_present = 0; | 
|  | 1402 | layoutreturn = false; | 
|  | 1403 |  | 
|  | 1404 | out_noroc: | 
|  | 1405 | spin_unlock(&ino->i_lock); | 
|  | 1406 | pnfs_layoutcommit_inode(ino, true); | 
|  | 1407 | if (roc) { | 
|  | 1408 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 
|  | 1409 | if (ld->prepare_layoutreturn) | 
|  | 1410 | ld->prepare_layoutreturn(args); | 
|  | 1411 | pnfs_put_layout_hdr(lo); | 
|  | 1412 | return true; | 
|  | 1413 | } | 
|  | 1414 | if (layoutreturn) | 
|  | 1415 | pnfs_send_layoutreturn(lo, &stateid, iomode, true); | 
|  | 1416 | pnfs_put_layout_hdr(lo); | 
|  | 1417 | return false; | 
|  | 1418 | } | 
|  | 1419 |  | 
|  | 1420 | void pnfs_roc_release(struct nfs4_layoutreturn_args *args, | 
|  | 1421 | struct nfs4_layoutreturn_res *res, | 
|  | 1422 | int ret) | 
|  | 1423 | { | 
|  | 1424 | struct pnfs_layout_hdr *lo = args->layout; | 
|  | 1425 | const nfs4_stateid *arg_stateid = NULL; | 
|  | 1426 | const nfs4_stateid *res_stateid = NULL; | 
|  | 1427 | struct nfs4_xdr_opaque_data *ld_private = args->ld_private; | 
|  | 1428 |  | 
|  | 1429 | switch (ret) { | 
|  | 1430 | case -NFS4ERR_NOMATCHING_LAYOUT: | 
|  | 1431 | break; | 
|  | 1432 | case 0: | 
|  | 1433 | if (res->lrs_present) | 
|  | 1434 | res_stateid = &res->stateid; | 
|  | 1435 | /* Fallthrough */ | 
|  | 1436 | default: | 
|  | 1437 | arg_stateid = &args->stateid; | 
|  | 1438 | } | 
|  | 1439 | pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, | 
|  | 1440 | res_stateid); | 
|  | 1441 | if (ld_private && ld_private->ops && ld_private->ops->free) | 
|  | 1442 | ld_private->ops->free(ld_private); | 
|  | 1443 | pnfs_put_layout_hdr(lo); | 
|  | 1444 | trace_nfs4_layoutreturn_on_close(args->inode, 0); | 
|  | 1445 | } | 
|  | 1446 |  | 
|  | 1447 | bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) | 
|  | 1448 | { | 
|  | 1449 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 1450 | struct pnfs_layout_hdr *lo; | 
|  | 1451 | bool sleep = false; | 
|  | 1452 |  | 
|  | 1453 | /* we might not have grabbed lo reference. so need to check under | 
|  | 1454 | * i_lock */ | 
|  | 1455 | spin_lock(&ino->i_lock); | 
|  | 1456 | lo = nfsi->layout; | 
|  | 1457 | if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | 
|  | 1458 | rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); | 
|  | 1459 | sleep = true; | 
|  | 1460 | } | 
|  | 1461 | spin_unlock(&ino->i_lock); | 
|  | 1462 | return sleep; | 
|  | 1463 | } | 
|  | 1464 |  | 
|  | 1465 | /* | 
|  | 1466 | * Compare two layout segments for sorting into layout cache. | 
|  | 1467 | * We want to preferentially return RW over RO layouts, so ensure those | 
|  | 1468 | * are seen first. | 
|  | 1469 | */ | 
|  | 1470 | static s64 | 
|  | 1471 | pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, | 
|  | 1472 | const struct pnfs_layout_range *l2) | 
|  | 1473 | { | 
|  | 1474 | s64 d; | 
|  | 1475 |  | 
|  | 1476 | /* high offset > low offset */ | 
|  | 1477 | d = l1->offset - l2->offset; | 
|  | 1478 | if (d) | 
|  | 1479 | return d; | 
|  | 1480 |  | 
|  | 1481 | /* short length > long length */ | 
|  | 1482 | d = l2->length - l1->length; | 
|  | 1483 | if (d) | 
|  | 1484 | return d; | 
|  | 1485 |  | 
|  | 1486 | /* read > read/write */ | 
|  | 1487 | return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); | 
|  | 1488 | } | 
|  | 1489 |  | 
|  | 1490 | static bool | 
|  | 1491 | pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, | 
|  | 1492 | const struct pnfs_layout_range *l2) | 
|  | 1493 | { | 
|  | 1494 | return pnfs_lseg_range_cmp(l1, l2) > 0; | 
|  | 1495 | } | 
|  | 1496 |  | 
|  | 1497 | static bool | 
|  | 1498 | pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, | 
|  | 1499 | struct pnfs_layout_segment *old) | 
|  | 1500 | { | 
|  | 1501 | return false; | 
|  | 1502 | } | 
|  | 1503 |  | 
|  | 1504 | void | 
|  | 1505 | pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, | 
|  | 1506 | struct pnfs_layout_segment *lseg, | 
|  | 1507 | bool (*is_after)(const struct pnfs_layout_range *, | 
|  | 1508 | const struct pnfs_layout_range *), | 
|  | 1509 | bool (*do_merge)(struct pnfs_layout_segment *, | 
|  | 1510 | struct pnfs_layout_segment *), | 
|  | 1511 | struct list_head *free_me) | 
|  | 1512 | { | 
|  | 1513 | struct pnfs_layout_segment *lp, *tmp; | 
|  | 1514 |  | 
|  | 1515 | dprintk("%s:Begin\n", __func__); | 
|  | 1516 |  | 
|  | 1517 | list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { | 
|  | 1518 | if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) | 
|  | 1519 | continue; | 
|  | 1520 | if (do_merge(lseg, lp)) { | 
|  | 1521 | mark_lseg_invalid(lp, free_me); | 
|  | 1522 | continue; | 
|  | 1523 | } | 
|  | 1524 | if (is_after(&lseg->pls_range, &lp->pls_range)) | 
|  | 1525 | continue; | 
|  | 1526 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 
|  | 1527 | dprintk("%s: inserted lseg %p " | 
|  | 1528 | "iomode %d offset %llu length %llu before " | 
|  | 1529 | "lp %p iomode %d offset %llu length %llu\n", | 
|  | 1530 | __func__, lseg, lseg->pls_range.iomode, | 
|  | 1531 | lseg->pls_range.offset, lseg->pls_range.length, | 
|  | 1532 | lp, lp->pls_range.iomode, lp->pls_range.offset, | 
|  | 1533 | lp->pls_range.length); | 
|  | 1534 | goto out; | 
|  | 1535 | } | 
|  | 1536 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | 
|  | 1537 | dprintk("%s: inserted lseg %p " | 
|  | 1538 | "iomode %d offset %llu length %llu at tail\n", | 
|  | 1539 | __func__, lseg, lseg->pls_range.iomode, | 
|  | 1540 | lseg->pls_range.offset, lseg->pls_range.length); | 
|  | 1541 | out: | 
|  | 1542 | pnfs_get_layout_hdr(lo); | 
|  | 1543 |  | 
|  | 1544 | dprintk("%s:Return\n", __func__); | 
|  | 1545 | } | 
|  | 1546 | EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); | 
|  | 1547 |  | 
|  | 1548 | static void | 
|  | 1549 | pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, | 
|  | 1550 | struct pnfs_layout_segment *lseg, | 
|  | 1551 | struct list_head *free_me) | 
|  | 1552 | { | 
|  | 1553 | struct inode *inode = lo->plh_inode; | 
|  | 1554 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | 
|  | 1555 |  | 
|  | 1556 | if (ld->add_lseg != NULL) | 
|  | 1557 | ld->add_lseg(lo, lseg, free_me); | 
|  | 1558 | else | 
|  | 1559 | pnfs_generic_layout_insert_lseg(lo, lseg, | 
|  | 1560 | pnfs_lseg_range_is_after, | 
|  | 1561 | pnfs_lseg_no_merge, | 
|  | 1562 | free_me); | 
|  | 1563 | } | 
|  | 1564 |  | 
|  | 1565 | static struct pnfs_layout_hdr * | 
|  | 1566 | alloc_init_layout_hdr(struct inode *ino, | 
|  | 1567 | struct nfs_open_context *ctx, | 
|  | 1568 | gfp_t gfp_flags) | 
|  | 1569 | { | 
|  | 1570 | struct pnfs_layout_hdr *lo; | 
|  | 1571 |  | 
|  | 1572 | lo = pnfs_alloc_layout_hdr(ino, gfp_flags); | 
|  | 1573 | if (!lo) | 
|  | 1574 | return NULL; | 
|  | 1575 | refcount_set(&lo->plh_refcount, 1); | 
|  | 1576 | INIT_LIST_HEAD(&lo->plh_layouts); | 
|  | 1577 | INIT_LIST_HEAD(&lo->plh_segs); | 
|  | 1578 | INIT_LIST_HEAD(&lo->plh_return_segs); | 
|  | 1579 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); | 
|  | 1580 | lo->plh_inode = ino; | 
|  | 1581 | lo->plh_lc_cred = get_rpccred(ctx->cred); | 
|  | 1582 | lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID; | 
|  | 1583 | return lo; | 
|  | 1584 | } | 
|  | 1585 |  | 
|  | 1586 | static struct pnfs_layout_hdr * | 
|  | 1587 | pnfs_find_alloc_layout(struct inode *ino, | 
|  | 1588 | struct nfs_open_context *ctx, | 
|  | 1589 | gfp_t gfp_flags) | 
|  | 1590 | __releases(&ino->i_lock) | 
|  | 1591 | __acquires(&ino->i_lock) | 
|  | 1592 | { | 
|  | 1593 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 1594 | struct pnfs_layout_hdr *new = NULL; | 
|  | 1595 |  | 
|  | 1596 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | 
|  | 1597 |  | 
|  | 1598 | if (nfsi->layout != NULL) | 
|  | 1599 | goto out_existing; | 
|  | 1600 | spin_unlock(&ino->i_lock); | 
|  | 1601 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); | 
|  | 1602 | spin_lock(&ino->i_lock); | 
|  | 1603 |  | 
|  | 1604 | if (likely(nfsi->layout == NULL)) {	/* Won the race? */ | 
|  | 1605 | nfsi->layout = new; | 
|  | 1606 | return new; | 
|  | 1607 | } else if (new != NULL) | 
|  | 1608 | pnfs_free_layout_hdr(new); | 
|  | 1609 | out_existing: | 
|  | 1610 | pnfs_get_layout_hdr(nfsi->layout); | 
|  | 1611 | return nfsi->layout; | 
|  | 1612 | } | 
|  | 1613 |  | 
|  | 1614 | /* | 
|  | 1615 | * iomode matching rules: | 
|  | 1616 | * iomode	lseg	strict match | 
|  | 1617 | *                      iomode | 
|  | 1618 | * -----	-----	------ ----- | 
|  | 1619 | * ANY		READ	N/A    true | 
|  | 1620 | * ANY		RW	N/A    true | 
|  | 1621 | * RW		READ	N/A    false | 
|  | 1622 | * RW		RW	N/A    true | 
|  | 1623 | * READ		READ	N/A    true | 
|  | 1624 | * READ		RW	true   false | 
|  | 1625 | * READ		RW	false  true | 
|  | 1626 | */ | 
|  | 1627 | static bool | 
|  | 1628 | pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, | 
|  | 1629 | const struct pnfs_layout_range *range, | 
|  | 1630 | bool strict_iomode) | 
|  | 1631 | { | 
|  | 1632 | struct pnfs_layout_range range1; | 
|  | 1633 |  | 
|  | 1634 | if ((range->iomode == IOMODE_RW && | 
|  | 1635 | ls_range->iomode != IOMODE_RW) || | 
|  | 1636 | (range->iomode != ls_range->iomode && | 
|  | 1637 | strict_iomode) || | 
|  | 1638 | !pnfs_lseg_range_intersecting(ls_range, range)) | 
|  | 1639 | return false; | 
|  | 1640 |  | 
|  | 1641 | /* range1 covers only the first byte in the range */ | 
|  | 1642 | range1 = *range; | 
|  | 1643 | range1.length = 1; | 
|  | 1644 | return pnfs_lseg_range_contained(ls_range, &range1); | 
|  | 1645 | } | 
|  | 1646 |  | 
|  | 1647 | /* | 
|  | 1648 | * lookup range in layout | 
|  | 1649 | */ | 
|  | 1650 | static struct pnfs_layout_segment * | 
|  | 1651 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, | 
|  | 1652 | struct pnfs_layout_range *range, | 
|  | 1653 | bool strict_iomode) | 
|  | 1654 | { | 
|  | 1655 | struct pnfs_layout_segment *lseg, *ret = NULL; | 
|  | 1656 |  | 
|  | 1657 | dprintk("%s:Begin\n", __func__); | 
|  | 1658 |  | 
|  | 1659 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 
|  | 1660 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 
|  | 1661 | pnfs_lseg_range_match(&lseg->pls_range, range, | 
|  | 1662 | strict_iomode)) { | 
|  | 1663 | ret = pnfs_get_lseg(lseg); | 
|  | 1664 | break; | 
|  | 1665 | } | 
|  | 1666 | } | 
|  | 1667 |  | 
|  | 1668 | dprintk("%s:Return lseg %p ref %d\n", | 
|  | 1669 | __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0); | 
|  | 1670 | return ret; | 
|  | 1671 | } | 
|  | 1672 |  | 
|  | 1673 | /* | 
|  | 1674 | * Use mdsthreshold hints set at each OPEN to determine if I/O should go | 
|  | 1675 | * to the MDS or over pNFS | 
|  | 1676 | * | 
|  | 1677 | * The nfs_inode read_io and write_io fields are cumulative counters reset | 
|  | 1678 | * when there are no layout segments. Note that in pnfs_update_layout iomode | 
|  | 1679 | * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a | 
|  | 1680 | * WRITE request. | 
|  | 1681 | * | 
|  | 1682 | * A return of true means use MDS I/O. | 
|  | 1683 | * | 
|  | 1684 | * From rfc 5661: | 
|  | 1685 | * If a file's size is smaller than the file size threshold, data accesses | 
|  | 1686 | * SHOULD be sent to the metadata server.  If an I/O request has a length that | 
|  | 1687 | * is below the I/O size threshold, the I/O SHOULD be sent to the metadata | 
|  | 1688 | * server.  If both file size and I/O size are provided, the client SHOULD | 
|  | 1689 | * reach or exceed  both thresholds before sending its read or write | 
|  | 1690 | * requests to the data server. | 
|  | 1691 | */ | 
|  | 1692 | static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, | 
|  | 1693 | struct inode *ino, int iomode) | 
|  | 1694 | { | 
|  | 1695 | struct nfs4_threshold *t = ctx->mdsthreshold; | 
|  | 1696 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 1697 | loff_t fsize = i_size_read(ino); | 
|  | 1698 | bool size = false, size_set = false, io = false, io_set = false, ret = false; | 
|  | 1699 |  | 
|  | 1700 | if (t == NULL) | 
|  | 1701 | return ret; | 
|  | 1702 |  | 
|  | 1703 | dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", | 
|  | 1704 | __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); | 
|  | 1705 |  | 
|  | 1706 | switch (iomode) { | 
|  | 1707 | case IOMODE_READ: | 
|  | 1708 | if (t->bm & THRESHOLD_RD) { | 
|  | 1709 | dprintk("%s fsize %llu\n", __func__, fsize); | 
|  | 1710 | size_set = true; | 
|  | 1711 | if (fsize < t->rd_sz) | 
|  | 1712 | size = true; | 
|  | 1713 | } | 
|  | 1714 | if (t->bm & THRESHOLD_RD_IO) { | 
|  | 1715 | dprintk("%s nfsi->read_io %llu\n", __func__, | 
|  | 1716 | nfsi->read_io); | 
|  | 1717 | io_set = true; | 
|  | 1718 | if (nfsi->read_io < t->rd_io_sz) | 
|  | 1719 | io = true; | 
|  | 1720 | } | 
|  | 1721 | break; | 
|  | 1722 | case IOMODE_RW: | 
|  | 1723 | if (t->bm & THRESHOLD_WR) { | 
|  | 1724 | dprintk("%s fsize %llu\n", __func__, fsize); | 
|  | 1725 | size_set = true; | 
|  | 1726 | if (fsize < t->wr_sz) | 
|  | 1727 | size = true; | 
|  | 1728 | } | 
|  | 1729 | if (t->bm & THRESHOLD_WR_IO) { | 
|  | 1730 | dprintk("%s nfsi->write_io %llu\n", __func__, | 
|  | 1731 | nfsi->write_io); | 
|  | 1732 | io_set = true; | 
|  | 1733 | if (nfsi->write_io < t->wr_io_sz) | 
|  | 1734 | io = true; | 
|  | 1735 | } | 
|  | 1736 | break; | 
|  | 1737 | } | 
|  | 1738 | if (size_set && io_set) { | 
|  | 1739 | if (size && io) | 
|  | 1740 | ret = true; | 
|  | 1741 | } else if (size || io) | 
|  | 1742 | ret = true; | 
|  | 1743 |  | 
|  | 1744 | dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); | 
|  | 1745 | return ret; | 
|  | 1746 | } | 
|  | 1747 |  | 
|  | 1748 | static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | 
|  | 1749 | { | 
|  | 1750 | /* | 
|  | 1751 | * send layoutcommit as it can hold up layoutreturn due to lseg | 
|  | 1752 | * reference | 
|  | 1753 | */ | 
|  | 1754 | pnfs_layoutcommit_inode(lo->plh_inode, false); | 
|  | 1755 | return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, | 
|  | 1756 | nfs_wait_bit_killable, | 
|  | 1757 | TASK_KILLABLE); | 
|  | 1758 | } | 
|  | 1759 |  | 
|  | 1760 | static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) | 
|  | 1761 | { | 
|  | 1762 | atomic_inc(&lo->plh_outstanding); | 
|  | 1763 | } | 
|  | 1764 |  | 
|  | 1765 | static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) | 
|  | 1766 | { | 
|  | 1767 | if (atomic_dec_and_test(&lo->plh_outstanding)) | 
|  | 1768 | wake_up_var(&lo->plh_outstanding); | 
|  | 1769 | } | 
|  | 1770 |  | 
|  | 1771 | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) | 
|  | 1772 | { | 
|  | 1773 | unsigned long *bitlock = &lo->plh_flags; | 
|  | 1774 |  | 
|  | 1775 | clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | 
|  | 1776 | smp_mb__after_atomic(); | 
|  | 1777 | wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | 
|  | 1778 | } | 
|  | 1779 |  | 
|  | 1780 | static void _add_to_server_list(struct pnfs_layout_hdr *lo, | 
|  | 1781 | struct nfs_server *server) | 
|  | 1782 | { | 
|  | 1783 | if (list_empty(&lo->plh_layouts)) { | 
|  | 1784 | struct nfs_client *clp = server->nfs_client; | 
|  | 1785 |  | 
|  | 1786 | /* The lo must be on the clp list if there is any | 
|  | 1787 | * chance of a CB_LAYOUTRECALL(FILE) coming in. | 
|  | 1788 | */ | 
|  | 1789 | spin_lock(&clp->cl_lock); | 
|  | 1790 | if (list_empty(&lo->plh_layouts)) | 
|  | 1791 | list_add_tail(&lo->plh_layouts, &server->layouts); | 
|  | 1792 | spin_unlock(&clp->cl_lock); | 
|  | 1793 | } | 
|  | 1794 | } | 
|  | 1795 |  | 
|  | 1796 | /* | 
|  | 1797 | * Layout segment is retreived from the server if not cached. | 
|  | 1798 | * The appropriate layout segment is referenced and returned to the caller. | 
|  | 1799 | */ | 
|  | 1800 | struct pnfs_layout_segment * | 
|  | 1801 | pnfs_update_layout(struct inode *ino, | 
|  | 1802 | struct nfs_open_context *ctx, | 
|  | 1803 | loff_t pos, | 
|  | 1804 | u64 count, | 
|  | 1805 | enum pnfs_iomode iomode, | 
|  | 1806 | bool strict_iomode, | 
|  | 1807 | gfp_t gfp_flags) | 
|  | 1808 | { | 
|  | 1809 | struct pnfs_layout_range arg = { | 
|  | 1810 | .iomode = iomode, | 
|  | 1811 | .offset = pos, | 
|  | 1812 | .length = count, | 
|  | 1813 | }; | 
|  | 1814 | unsigned pg_offset; | 
|  | 1815 | struct nfs_server *server = NFS_SERVER(ino); | 
|  | 1816 | struct nfs_client *clp = server->nfs_client; | 
|  | 1817 | struct pnfs_layout_hdr *lo = NULL; | 
|  | 1818 | struct pnfs_layout_segment *lseg = NULL; | 
|  | 1819 | struct nfs4_layoutget *lgp; | 
|  | 1820 | nfs4_stateid stateid; | 
|  | 1821 | long timeout = 0; | 
|  | 1822 | unsigned long giveup = jiffies + (clp->cl_lease_time << 1); | 
|  | 1823 | bool first; | 
|  | 1824 |  | 
|  | 1825 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) { | 
|  | 1826 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1827 | PNFS_UPDATE_LAYOUT_NO_PNFS); | 
|  | 1828 | goto out; | 
|  | 1829 | } | 
|  | 1830 |  | 
|  | 1831 | if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { | 
|  | 1832 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1833 | PNFS_UPDATE_LAYOUT_MDSTHRESH); | 
|  | 1834 | goto out; | 
|  | 1835 | } | 
|  | 1836 |  | 
|  | 1837 | lookup_again: | 
|  | 1838 | lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp)); | 
|  | 1839 | if (IS_ERR(lseg)) | 
|  | 1840 | goto out; | 
|  | 1841 | first = false; | 
|  | 1842 | spin_lock(&ino->i_lock); | 
|  | 1843 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 
|  | 1844 | if (lo == NULL) { | 
|  | 1845 | spin_unlock(&ino->i_lock); | 
|  | 1846 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1847 | PNFS_UPDATE_LAYOUT_NOMEM); | 
|  | 1848 | goto out; | 
|  | 1849 | } | 
|  | 1850 |  | 
|  | 1851 | /* Do we even need to bother with this? */ | 
|  | 1852 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 
|  | 1853 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1854 | PNFS_UPDATE_LAYOUT_BULK_RECALL); | 
|  | 1855 | dprintk("%s matches recall, use MDS\n", __func__); | 
|  | 1856 | goto out_unlock; | 
|  | 1857 | } | 
|  | 1858 |  | 
|  | 1859 | /* if LAYOUTGET already failed once we don't try again */ | 
|  | 1860 | if (pnfs_layout_io_test_failed(lo, iomode)) { | 
|  | 1861 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1862 | PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); | 
|  | 1863 | goto out_unlock; | 
|  | 1864 | } | 
|  | 1865 |  | 
|  | 1866 | /* | 
|  | 1867 | * If the layout segment list is empty, but there are outstanding | 
|  | 1868 | * layoutget calls, then they might be subject to a layoutrecall. | 
|  | 1869 | */ | 
|  | 1870 | if (list_empty(&lo->plh_segs) && | 
|  | 1871 | atomic_read(&lo->plh_outstanding) != 0) { | 
|  | 1872 | spin_unlock(&ino->i_lock); | 
|  | 1873 | lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, | 
|  | 1874 | !atomic_read(&lo->plh_outstanding))); | 
|  | 1875 | if (IS_ERR(lseg)) | 
|  | 1876 | goto out_put_layout_hdr; | 
|  | 1877 | pnfs_put_layout_hdr(lo); | 
|  | 1878 | goto lookup_again; | 
|  | 1879 | } | 
|  | 1880 |  | 
|  | 1881 | lseg = pnfs_find_lseg(lo, &arg, strict_iomode); | 
|  | 1882 | if (lseg) { | 
|  | 1883 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1884 | PNFS_UPDATE_LAYOUT_FOUND_CACHED); | 
|  | 1885 | goto out_unlock; | 
|  | 1886 | } | 
|  | 1887 |  | 
|  | 1888 | if (!nfs4_valid_open_stateid(ctx->state)) { | 
|  | 1889 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1890 | PNFS_UPDATE_LAYOUT_INVALID_OPEN); | 
|  | 1891 | goto out_unlock; | 
|  | 1892 | } | 
|  | 1893 |  | 
|  | 1894 | /* | 
|  | 1895 | * Choose a stateid for the LAYOUTGET. If we don't have a layout | 
|  | 1896 | * stateid, or it has been invalidated, then we must use the open | 
|  | 1897 | * stateid. | 
|  | 1898 | */ | 
|  | 1899 | if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | 
|  | 1900 |  | 
|  | 1901 | /* | 
|  | 1902 | * The first layoutget for the file. Need to serialize per | 
|  | 1903 | * RFC 5661 Errata 3208. | 
|  | 1904 | */ | 
|  | 1905 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, | 
|  | 1906 | &lo->plh_flags)) { | 
|  | 1907 | spin_unlock(&ino->i_lock); | 
|  | 1908 | lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, | 
|  | 1909 | NFS_LAYOUT_FIRST_LAYOUTGET, | 
|  | 1910 | TASK_KILLABLE)); | 
|  | 1911 | if (IS_ERR(lseg)) | 
|  | 1912 | goto out_put_layout_hdr; | 
|  | 1913 | pnfs_put_layout_hdr(lo); | 
|  | 1914 | dprintk("%s retrying\n", __func__); | 
|  | 1915 | goto lookup_again; | 
|  | 1916 | } | 
|  | 1917 |  | 
|  | 1918 | first = true; | 
|  | 1919 | if (nfs4_select_rw_stateid(ctx->state, | 
|  | 1920 | iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, | 
|  | 1921 | NULL, &stateid, NULL) != 0) { | 
|  | 1922 | trace_pnfs_update_layout(ino, pos, count, | 
|  | 1923 | iomode, lo, lseg, | 
|  | 1924 | PNFS_UPDATE_LAYOUT_INVALID_OPEN); | 
|  | 1925 | goto out_unlock; | 
|  | 1926 | } | 
|  | 1927 | } else { | 
|  | 1928 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | 
|  | 1929 | } | 
|  | 1930 |  | 
|  | 1931 | /* | 
|  | 1932 | * Because we free lsegs before sending LAYOUTRETURN, we need to wait | 
|  | 1933 | * for LAYOUTRETURN even if first is true. | 
|  | 1934 | */ | 
|  | 1935 | if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { | 
|  | 1936 | spin_unlock(&ino->i_lock); | 
|  | 1937 | dprintk("%s wait for layoutreturn\n", __func__); | 
|  | 1938 | lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); | 
|  | 1939 | if (!IS_ERR(lseg)) { | 
|  | 1940 | if (first) | 
|  | 1941 | pnfs_clear_first_layoutget(lo); | 
|  | 1942 | pnfs_put_layout_hdr(lo); | 
|  | 1943 | dprintk("%s retrying\n", __func__); | 
|  | 1944 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, | 
|  | 1945 | lseg, PNFS_UPDATE_LAYOUT_RETRY); | 
|  | 1946 | goto lookup_again; | 
|  | 1947 | } | 
|  | 1948 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1949 | PNFS_UPDATE_LAYOUT_RETURN); | 
|  | 1950 | goto out_put_layout_hdr; | 
|  | 1951 | } | 
|  | 1952 |  | 
|  | 1953 | if (pnfs_layoutgets_blocked(lo)) { | 
|  | 1954 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1955 | PNFS_UPDATE_LAYOUT_BLOCKED); | 
|  | 1956 | goto out_unlock; | 
|  | 1957 | } | 
|  | 1958 | nfs_layoutget_begin(lo); | 
|  | 1959 | spin_unlock(&ino->i_lock); | 
|  | 1960 |  | 
|  | 1961 | _add_to_server_list(lo, server); | 
|  | 1962 |  | 
|  | 1963 | pg_offset = arg.offset & ~PAGE_MASK; | 
|  | 1964 | if (pg_offset) { | 
|  | 1965 | arg.offset -= pg_offset; | 
|  | 1966 | arg.length += pg_offset; | 
|  | 1967 | } | 
|  | 1968 | if (arg.length != NFS4_MAX_UINT64) | 
|  | 1969 | arg.length = PAGE_ALIGN(arg.length); | 
|  | 1970 |  | 
|  | 1971 | lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); | 
|  | 1972 | if (!lgp) { | 
|  | 1973 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, | 
|  | 1974 | PNFS_UPDATE_LAYOUT_NOMEM); | 
|  | 1975 | nfs_layoutget_end(lo); | 
|  | 1976 | goto out_put_layout_hdr; | 
|  | 1977 | } | 
|  | 1978 |  | 
|  | 1979 | lseg = nfs4_proc_layoutget(lgp, &timeout); | 
|  | 1980 | trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, | 
|  | 1981 | PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); | 
|  | 1982 | nfs_layoutget_end(lo); | 
|  | 1983 | if (IS_ERR(lseg)) { | 
|  | 1984 | switch(PTR_ERR(lseg)) { | 
|  | 1985 | case -EBUSY: | 
|  | 1986 | if (time_after(jiffies, giveup)) | 
|  | 1987 | lseg = NULL; | 
|  | 1988 | break; | 
|  | 1989 | case -ERECALLCONFLICT: | 
|  | 1990 | case -EAGAIN: | 
|  | 1991 | break; | 
|  | 1992 | default: | 
|  | 1993 | if (!nfs_error_is_fatal(PTR_ERR(lseg))) { | 
|  | 1994 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | 
|  | 1995 | lseg = NULL; | 
|  | 1996 | } | 
|  | 1997 | goto out_put_layout_hdr; | 
|  | 1998 | } | 
|  | 1999 | if (lseg) { | 
|  | 2000 | if (first) | 
|  | 2001 | pnfs_clear_first_layoutget(lo); | 
|  | 2002 | trace_pnfs_update_layout(ino, pos, count, | 
|  | 2003 | iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); | 
|  | 2004 | pnfs_put_layout_hdr(lo); | 
|  | 2005 | goto lookup_again; | 
|  | 2006 | } | 
|  | 2007 | } else { | 
|  | 2008 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | 
|  | 2009 | } | 
|  | 2010 |  | 
|  | 2011 | out_put_layout_hdr: | 
|  | 2012 | if (first) | 
|  | 2013 | pnfs_clear_first_layoutget(lo); | 
|  | 2014 | pnfs_put_layout_hdr(lo); | 
|  | 2015 | out: | 
|  | 2016 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " | 
|  | 2017 | "(%s, offset: %llu, length: %llu)\n", | 
|  | 2018 | __func__, ino->i_sb->s_id, | 
|  | 2019 | (unsigned long long)NFS_FILEID(ino), | 
|  | 2020 | IS_ERR_OR_NULL(lseg) ? "not found" : "found", | 
|  | 2021 | iomode==IOMODE_RW ?  "read/write" : "read-only", | 
|  | 2022 | (unsigned long long)pos, | 
|  | 2023 | (unsigned long long)count); | 
|  | 2024 | return lseg; | 
|  | 2025 | out_unlock: | 
|  | 2026 | spin_unlock(&ino->i_lock); | 
|  | 2027 | goto out_put_layout_hdr; | 
|  | 2028 | } | 
|  | 2029 | EXPORT_SYMBOL_GPL(pnfs_update_layout); | 
|  | 2030 |  | 
|  | 2031 | static bool | 
|  | 2032 | pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) | 
|  | 2033 | { | 
|  | 2034 | switch (range->iomode) { | 
|  | 2035 | case IOMODE_READ: | 
|  | 2036 | case IOMODE_RW: | 
|  | 2037 | break; | 
|  | 2038 | default: | 
|  | 2039 | return false; | 
|  | 2040 | } | 
|  | 2041 | if (range->offset == NFS4_MAX_UINT64) | 
|  | 2042 | return false; | 
|  | 2043 | if (range->length == 0) | 
|  | 2044 | return false; | 
|  | 2045 | if (range->length != NFS4_MAX_UINT64 && | 
|  | 2046 | range->length > NFS4_MAX_UINT64 - range->offset) | 
|  | 2047 | return false; | 
|  | 2048 | return true; | 
|  | 2049 | } | 
|  | 2050 |  | 
|  | 2051 | static struct pnfs_layout_hdr * | 
|  | 2052 | _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx) | 
|  | 2053 | { | 
|  | 2054 | struct pnfs_layout_hdr *lo; | 
|  | 2055 |  | 
|  | 2056 | spin_lock(&ino->i_lock); | 
|  | 2057 | lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL); | 
|  | 2058 | if (!lo) | 
|  | 2059 | goto out_unlock; | 
|  | 2060 | if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) | 
|  | 2061 | goto out_unlock; | 
|  | 2062 | if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | 
|  | 2063 | goto out_unlock; | 
|  | 2064 | if (pnfs_layoutgets_blocked(lo)) | 
|  | 2065 | goto out_unlock; | 
|  | 2066 | if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags)) | 
|  | 2067 | goto out_unlock; | 
|  | 2068 | nfs_layoutget_begin(lo); | 
|  | 2069 | spin_unlock(&ino->i_lock); | 
|  | 2070 | _add_to_server_list(lo, NFS_SERVER(ino)); | 
|  | 2071 | return lo; | 
|  | 2072 |  | 
|  | 2073 | out_unlock: | 
|  | 2074 | spin_unlock(&ino->i_lock); | 
|  | 2075 | pnfs_put_layout_hdr(lo); | 
|  | 2076 | return NULL; | 
|  | 2077 | } | 
|  | 2078 |  | 
|  | 2079 | extern const nfs4_stateid current_stateid; | 
|  | 2080 |  | 
|  | 2081 | static void _lgopen_prepare_attached(struct nfs4_opendata *data, | 
|  | 2082 | struct nfs_open_context *ctx) | 
|  | 2083 | { | 
|  | 2084 | struct inode *ino = data->dentry->d_inode; | 
|  | 2085 | struct pnfs_layout_range rng = { | 
|  | 2086 | .iomode = (data->o_arg.fmode & FMODE_WRITE) ? | 
|  | 2087 | IOMODE_RW: IOMODE_READ, | 
|  | 2088 | .offset = 0, | 
|  | 2089 | .length = NFS4_MAX_UINT64, | 
|  | 2090 | }; | 
|  | 2091 | struct nfs4_layoutget *lgp; | 
|  | 2092 | struct pnfs_layout_hdr *lo; | 
|  | 2093 |  | 
|  | 2094 | /* Heuristic: don't send layoutget if we have cached data */ | 
|  | 2095 | if (rng.iomode == IOMODE_READ && | 
|  | 2096 | (i_size_read(ino) == 0 || ino->i_mapping->nrpages != 0)) | 
|  | 2097 | return; | 
|  | 2098 |  | 
|  | 2099 | lo = _pnfs_grab_empty_layout(ino, ctx); | 
|  | 2100 | if (!lo) | 
|  | 2101 | return; | 
|  | 2102 | lgp = pnfs_alloc_init_layoutget_args(ino, ctx, ¤t_stateid, | 
|  | 2103 | &rng, GFP_KERNEL); | 
|  | 2104 | if (!lgp) { | 
|  | 2105 | pnfs_clear_first_layoutget(lo); | 
|  | 2106 | pnfs_put_layout_hdr(lo); | 
|  | 2107 | return; | 
|  | 2108 | } | 
|  | 2109 | data->lgp = lgp; | 
|  | 2110 | data->o_arg.lg_args = &lgp->args; | 
|  | 2111 | data->o_res.lg_res = &lgp->res; | 
|  | 2112 | } | 
|  | 2113 |  | 
|  | 2114 | static void _lgopen_prepare_floating(struct nfs4_opendata *data, | 
|  | 2115 | struct nfs_open_context *ctx) | 
|  | 2116 | { | 
|  | 2117 | struct pnfs_layout_range rng = { | 
|  | 2118 | .iomode = (data->o_arg.fmode & FMODE_WRITE) ? | 
|  | 2119 | IOMODE_RW: IOMODE_READ, | 
|  | 2120 | .offset = 0, | 
|  | 2121 | .length = NFS4_MAX_UINT64, | 
|  | 2122 | }; | 
|  | 2123 | struct nfs4_layoutget *lgp; | 
|  | 2124 |  | 
|  | 2125 | lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, ¤t_stateid, | 
|  | 2126 | &rng, GFP_KERNEL); | 
|  | 2127 | if (!lgp) | 
|  | 2128 | return; | 
|  | 2129 | data->lgp = lgp; | 
|  | 2130 | data->o_arg.lg_args = &lgp->args; | 
|  | 2131 | data->o_res.lg_res = &lgp->res; | 
|  | 2132 | } | 
|  | 2133 |  | 
|  | 2134 | void pnfs_lgopen_prepare(struct nfs4_opendata *data, | 
|  | 2135 | struct nfs_open_context *ctx) | 
|  | 2136 | { | 
|  | 2137 | struct nfs_server *server = NFS_SERVER(data->dir->d_inode); | 
|  | 2138 |  | 
|  | 2139 | if (!(pnfs_enabled_sb(server) && | 
|  | 2140 | server->pnfs_curr_ld->flags & PNFS_LAYOUTGET_ON_OPEN)) | 
|  | 2141 | return; | 
|  | 2142 | /* Could check on max_ops, but currently hardcoded high enough */ | 
|  | 2143 | if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN)) | 
|  | 2144 | return; | 
|  | 2145 | if (data->state) | 
|  | 2146 | _lgopen_prepare_attached(data, ctx); | 
|  | 2147 | else | 
|  | 2148 | _lgopen_prepare_floating(data, ctx); | 
|  | 2149 | } | 
|  | 2150 |  | 
|  | 2151 | void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp, | 
|  | 2152 | struct nfs_open_context *ctx) | 
|  | 2153 | { | 
|  | 2154 | struct pnfs_layout_hdr *lo; | 
|  | 2155 | struct pnfs_layout_segment *lseg; | 
|  | 2156 | struct nfs_server *srv = NFS_SERVER(ino); | 
|  | 2157 | u32 iomode; | 
|  | 2158 |  | 
|  | 2159 | if (!lgp) | 
|  | 2160 | return; | 
|  | 2161 | dprintk("%s: entered with status %i\n", __func__, lgp->res.status); | 
|  | 2162 | if (lgp->res.status) { | 
|  | 2163 | switch (lgp->res.status) { | 
|  | 2164 | default: | 
|  | 2165 | break; | 
|  | 2166 | /* | 
|  | 2167 | * Halt lgopen attempts if the server doesn't recognise | 
|  | 2168 | * the "current stateid" value, the layout type, or the | 
|  | 2169 | * layoutget operation as being valid. | 
|  | 2170 | * Also if it complains about too many ops in the compound | 
|  | 2171 | * or of the request/reply being too big. | 
|  | 2172 | */ | 
|  | 2173 | case -NFS4ERR_BAD_STATEID: | 
|  | 2174 | case -NFS4ERR_NOTSUPP: | 
|  | 2175 | case -NFS4ERR_REP_TOO_BIG: | 
|  | 2176 | case -NFS4ERR_REP_TOO_BIG_TO_CACHE: | 
|  | 2177 | case -NFS4ERR_REQ_TOO_BIG: | 
|  | 2178 | case -NFS4ERR_TOO_MANY_OPS: | 
|  | 2179 | case -NFS4ERR_UNKNOWN_LAYOUTTYPE: | 
|  | 2180 | srv->caps &= ~NFS_CAP_LGOPEN; | 
|  | 2181 | } | 
|  | 2182 | return; | 
|  | 2183 | } | 
|  | 2184 | if (!lgp->args.inode) { | 
|  | 2185 | lo = _pnfs_grab_empty_layout(ino, ctx); | 
|  | 2186 | if (!lo) | 
|  | 2187 | return; | 
|  | 2188 | lgp->args.inode = ino; | 
|  | 2189 | } else | 
|  | 2190 | lo = NFS_I(lgp->args.inode)->layout; | 
|  | 2191 |  | 
|  | 2192 | lseg = pnfs_layout_process(lgp); | 
|  | 2193 | if (!IS_ERR(lseg)) { | 
|  | 2194 | iomode = lgp->args.range.iomode; | 
|  | 2195 | pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); | 
|  | 2196 | pnfs_put_lseg(lseg); | 
|  | 2197 | } | 
|  | 2198 | } | 
|  | 2199 |  | 
|  | 2200 | void nfs4_lgopen_release(struct nfs4_layoutget *lgp) | 
|  | 2201 | { | 
|  | 2202 | if (lgp != NULL) { | 
|  | 2203 | struct inode *inode = lgp->args.inode; | 
|  | 2204 | if (inode) { | 
|  | 2205 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | 
|  | 2206 | pnfs_clear_first_layoutget(lo); | 
|  | 2207 | nfs_layoutget_end(lo); | 
|  | 2208 | } | 
|  | 2209 | pnfs_layoutget_free(lgp); | 
|  | 2210 | } | 
|  | 2211 | } | 
|  | 2212 |  | 
|  | 2213 | struct pnfs_layout_segment * | 
|  | 2214 | pnfs_layout_process(struct nfs4_layoutget *lgp) | 
|  | 2215 | { | 
|  | 2216 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | 
|  | 2217 | struct nfs4_layoutget_res *res = &lgp->res; | 
|  | 2218 | struct pnfs_layout_segment *lseg; | 
|  | 2219 | struct inode *ino = lo->plh_inode; | 
|  | 2220 | LIST_HEAD(free_me); | 
|  | 2221 |  | 
|  | 2222 | if (!pnfs_sanity_check_layout_range(&res->range)) | 
|  | 2223 | return ERR_PTR(-EINVAL); | 
|  | 2224 |  | 
|  | 2225 | /* Inject layout blob into I/O device driver */ | 
|  | 2226 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); | 
|  | 2227 | if (IS_ERR_OR_NULL(lseg)) { | 
|  | 2228 | if (!lseg) | 
|  | 2229 | lseg = ERR_PTR(-ENOMEM); | 
|  | 2230 |  | 
|  | 2231 | dprintk("%s: Could not allocate layout: error %ld\n", | 
|  | 2232 | __func__, PTR_ERR(lseg)); | 
|  | 2233 | return lseg; | 
|  | 2234 | } | 
|  | 2235 |  | 
|  | 2236 | pnfs_init_lseg(lo, lseg, &res->range, &res->stateid); | 
|  | 2237 |  | 
|  | 2238 | spin_lock(&ino->i_lock); | 
|  | 2239 | if (pnfs_layoutgets_blocked(lo)) { | 
|  | 2240 | dprintk("%s forget reply due to state\n", __func__); | 
|  | 2241 | goto out_forget; | 
|  | 2242 | } | 
|  | 2243 |  | 
|  | 2244 | if (!pnfs_layout_is_valid(lo)) { | 
|  | 2245 | /* We have a completely new layout */ | 
|  | 2246 | pnfs_set_layout_stateid(lo, &res->stateid, true); | 
|  | 2247 | } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { | 
|  | 2248 | /* existing state ID, make sure the sequence number matches. */ | 
|  | 2249 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { | 
|  | 2250 | dprintk("%s forget reply due to sequence\n", __func__); | 
|  | 2251 | goto out_forget; | 
|  | 2252 | } | 
|  | 2253 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 
|  | 2254 | } else { | 
|  | 2255 | /* | 
|  | 2256 | * We got an entirely new state ID.  Mark all segments for the | 
|  | 2257 | * inode invalid, and retry the layoutget | 
|  | 2258 | */ | 
|  | 2259 | pnfs_mark_layout_stateid_invalid(lo, &free_me); | 
|  | 2260 | goto out_forget; | 
|  | 2261 | } | 
|  | 2262 |  | 
|  | 2263 | pnfs_get_lseg(lseg); | 
|  | 2264 | pnfs_layout_insert_lseg(lo, lseg, &free_me); | 
|  | 2265 |  | 
|  | 2266 |  | 
|  | 2267 | if (res->return_on_close) | 
|  | 2268 | set_bit(NFS_LSEG_ROC, &lseg->pls_flags); | 
|  | 2269 |  | 
|  | 2270 | spin_unlock(&ino->i_lock); | 
|  | 2271 | pnfs_free_lseg_list(&free_me); | 
|  | 2272 | return lseg; | 
|  | 2273 |  | 
|  | 2274 | out_forget: | 
|  | 2275 | spin_unlock(&ino->i_lock); | 
|  | 2276 | lseg->pls_layout = lo; | 
|  | 2277 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 
|  | 2278 | return ERR_PTR(-EAGAIN); | 
|  | 2279 | } | 
|  | 2280 |  | 
|  | 2281 | static int | 
|  | 2282 | mark_lseg_invalid_or_return(struct pnfs_layout_segment *lseg, | 
|  | 2283 | struct list_head *tmp_list) | 
|  | 2284 | { | 
|  | 2285 | if (!mark_lseg_invalid(lseg, tmp_list)) | 
|  | 2286 | return 0; | 
|  | 2287 | pnfs_cache_lseg_for_layoutreturn(lseg->pls_layout, lseg); | 
|  | 2288 | return 1; | 
|  | 2289 | } | 
|  | 2290 |  | 
|  | 2291 | /** | 
|  | 2292 | * pnfs_mark_matching_lsegs_return - Free or return matching layout segments | 
|  | 2293 | * @lo: pointer to layout header | 
|  | 2294 | * @tmp_list: list header to be used with pnfs_free_lseg_list() | 
|  | 2295 | * @return_range: describe layout segment ranges to be returned | 
|  | 2296 | * @seq: stateid seqid to match | 
|  | 2297 | * | 
|  | 2298 | * This function is mainly intended for use by layoutrecall. It attempts | 
|  | 2299 | * to free the layout segment immediately, or else to mark it for return | 
|  | 2300 | * as soon as its reference count drops to zero. | 
|  | 2301 | * | 
|  | 2302 | * Returns | 
|  | 2303 | * - 0: a layoutreturn needs to be scheduled. | 
|  | 2304 | * - EBUSY: there are layout segment that are still in use. | 
|  | 2305 | * - ENOENT: there are no layout segments that need to be returned. | 
|  | 2306 | */ | 
|  | 2307 | int | 
|  | 2308 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | 
|  | 2309 | struct list_head *tmp_list, | 
|  | 2310 | const struct pnfs_layout_range *return_range, | 
|  | 2311 | u32 seq) | 
|  | 2312 | { | 
|  | 2313 | struct pnfs_layout_segment *lseg, *next; | 
|  | 2314 | int remaining = 0; | 
|  | 2315 |  | 
|  | 2316 | dprintk("%s:Begin lo %p\n", __func__, lo); | 
|  | 2317 |  | 
|  | 2318 | assert_spin_locked(&lo->plh_inode->i_lock); | 
|  | 2319 |  | 
|  | 2320 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 
|  | 2321 | if (pnfs_match_lseg_recall(lseg, return_range, seq)) { | 
|  | 2322 | dprintk("%s: marking lseg %p iomode %d " | 
|  | 2323 | "offset %llu length %llu\n", __func__, | 
|  | 2324 | lseg, lseg->pls_range.iomode, | 
|  | 2325 | lseg->pls_range.offset, | 
|  | 2326 | lseg->pls_range.length); | 
|  | 2327 | if (mark_lseg_invalid_or_return(lseg, tmp_list)) | 
|  | 2328 | continue; | 
|  | 2329 | remaining++; | 
|  | 2330 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 
|  | 2331 | } | 
|  | 2332 |  | 
|  | 2333 | if (remaining) { | 
|  | 2334 | pnfs_set_plh_return_info(lo, return_range->iomode, seq); | 
|  | 2335 | return -EBUSY; | 
|  | 2336 | } | 
|  | 2337 |  | 
|  | 2338 | if (!list_empty(&lo->plh_return_segs)) { | 
|  | 2339 | pnfs_set_plh_return_info(lo, return_range->iomode, seq); | 
|  | 2340 | return 0; | 
|  | 2341 | } | 
|  | 2342 |  | 
|  | 2343 | return -ENOENT; | 
|  | 2344 | } | 
|  | 2345 |  | 
|  | 2346 | void pnfs_error_mark_layout_for_return(struct inode *inode, | 
|  | 2347 | struct pnfs_layout_segment *lseg) | 
|  | 2348 | { | 
|  | 2349 | struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; | 
|  | 2350 | struct pnfs_layout_range range = { | 
|  | 2351 | .iomode = lseg->pls_range.iomode, | 
|  | 2352 | .offset = 0, | 
|  | 2353 | .length = NFS4_MAX_UINT64, | 
|  | 2354 | }; | 
|  | 2355 | bool return_now = false; | 
|  | 2356 |  | 
|  | 2357 | spin_lock(&inode->i_lock); | 
|  | 2358 | if (!pnfs_layout_is_valid(lo)) { | 
|  | 2359 | spin_unlock(&inode->i_lock); | 
|  | 2360 | return; | 
|  | 2361 | } | 
|  | 2362 | pnfs_set_plh_return_info(lo, range.iomode, 0); | 
|  | 2363 | /* | 
|  | 2364 | * mark all matching lsegs so that we are sure to have no live | 
|  | 2365 | * segments at hand when sending layoutreturn. See pnfs_put_lseg() | 
|  | 2366 | * for how it works. | 
|  | 2367 | */ | 
|  | 2368 | if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) { | 
|  | 2369 | nfs4_stateid stateid; | 
|  | 2370 | enum pnfs_iomode iomode; | 
|  | 2371 |  | 
|  | 2372 | return_now = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); | 
|  | 2373 | spin_unlock(&inode->i_lock); | 
|  | 2374 | if (return_now) | 
|  | 2375 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); | 
|  | 2376 | } else { | 
|  | 2377 | spin_unlock(&inode->i_lock); | 
|  | 2378 | nfs_commit_inode(inode, 0); | 
|  | 2379 | } | 
|  | 2380 | } | 
|  | 2381 | EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); | 
|  | 2382 |  | 
|  | 2383 | void | 
|  | 2384 | pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) | 
|  | 2385 | { | 
|  | 2386 | if (pgio->pg_lseg == NULL || | 
|  | 2387 | test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags)) | 
|  | 2388 | return; | 
|  | 2389 | pnfs_put_lseg(pgio->pg_lseg); | 
|  | 2390 | pgio->pg_lseg = NULL; | 
|  | 2391 | } | 
|  | 2392 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); | 
|  | 2393 |  | 
|  | 2394 | /* | 
|  | 2395 | * Check for any intersection between the request and the pgio->pg_lseg, | 
|  | 2396 | * and if none, put this pgio->pg_lseg away. | 
|  | 2397 | */ | 
|  | 2398 | static void | 
|  | 2399 | pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 
|  | 2400 | { | 
|  | 2401 | if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { | 
|  | 2402 | pnfs_put_lseg(pgio->pg_lseg); | 
|  | 2403 | pgio->pg_lseg = NULL; | 
|  | 2404 | } | 
|  | 2405 | } | 
|  | 2406 |  | 
|  | 2407 | void | 
|  | 2408 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 
|  | 2409 | { | 
|  | 2410 | u64 rd_size = req->wb_bytes; | 
|  | 2411 |  | 
|  | 2412 | pnfs_generic_pg_check_layout(pgio); | 
|  | 2413 | pnfs_generic_pg_check_range(pgio, req); | 
|  | 2414 | if (pgio->pg_lseg == NULL) { | 
|  | 2415 | if (pgio->pg_dreq == NULL) | 
|  | 2416 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 
|  | 2417 | else | 
|  | 2418 | rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); | 
|  | 2419 |  | 
|  | 2420 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 
|  | 2421 | req->wb_context, | 
|  | 2422 | req_offset(req), | 
|  | 2423 | rd_size, | 
|  | 2424 | IOMODE_READ, | 
|  | 2425 | false, | 
|  | 2426 | GFP_KERNEL); | 
|  | 2427 | if (IS_ERR(pgio->pg_lseg)) { | 
|  | 2428 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 
|  | 2429 | pgio->pg_lseg = NULL; | 
|  | 2430 | return; | 
|  | 2431 | } | 
|  | 2432 | } | 
|  | 2433 | /* If no lseg, fall back to read through mds */ | 
|  | 2434 | if (pgio->pg_lseg == NULL) | 
|  | 2435 | nfs_pageio_reset_read_mds(pgio); | 
|  | 2436 |  | 
|  | 2437 | } | 
|  | 2438 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); | 
|  | 2439 |  | 
|  | 2440 | void | 
|  | 2441 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 
|  | 2442 | struct nfs_page *req, u64 wb_size) | 
|  | 2443 | { | 
|  | 2444 | pnfs_generic_pg_check_layout(pgio); | 
|  | 2445 | pnfs_generic_pg_check_range(pgio, req); | 
|  | 2446 | if (pgio->pg_lseg == NULL) { | 
|  | 2447 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 
|  | 2448 | req->wb_context, | 
|  | 2449 | req_offset(req), | 
|  | 2450 | wb_size, | 
|  | 2451 | IOMODE_RW, | 
|  | 2452 | false, | 
|  | 2453 | GFP_NOFS); | 
|  | 2454 | if (IS_ERR(pgio->pg_lseg)) { | 
|  | 2455 | pgio->pg_error = PTR_ERR(pgio->pg_lseg); | 
|  | 2456 | pgio->pg_lseg = NULL; | 
|  | 2457 | return; | 
|  | 2458 | } | 
|  | 2459 | } | 
|  | 2460 | /* If no lseg, fall back to write through mds */ | 
|  | 2461 | if (pgio->pg_lseg == NULL) | 
|  | 2462 | nfs_pageio_reset_write_mds(pgio); | 
|  | 2463 | } | 
|  | 2464 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 
|  | 2465 |  | 
|  | 2466 | void | 
|  | 2467 | pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) | 
|  | 2468 | { | 
|  | 2469 | if (desc->pg_lseg) { | 
|  | 2470 | pnfs_put_lseg(desc->pg_lseg); | 
|  | 2471 | desc->pg_lseg = NULL; | 
|  | 2472 | } | 
|  | 2473 | } | 
|  | 2474 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); | 
|  | 2475 |  | 
|  | 2476 | /* | 
|  | 2477 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | 
|  | 2478 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | 
|  | 2479 | */ | 
|  | 2480 | size_t | 
|  | 2481 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, | 
|  | 2482 | struct nfs_page *prev, struct nfs_page *req) | 
|  | 2483 | { | 
|  | 2484 | unsigned int size; | 
|  | 2485 | u64 seg_end, req_start, seg_left; | 
|  | 2486 |  | 
|  | 2487 | size = nfs_generic_pg_test(pgio, prev, req); | 
|  | 2488 | if (!size) | 
|  | 2489 | return 0; | 
|  | 2490 |  | 
|  | 2491 | /* | 
|  | 2492 | * 'size' contains the number of bytes left in the current page (up | 
|  | 2493 | * to the original size asked for in @req->wb_bytes). | 
|  | 2494 | * | 
|  | 2495 | * Calculate how many bytes are left in the layout segment | 
|  | 2496 | * and if there are less bytes than 'size', return that instead. | 
|  | 2497 | * | 
|  | 2498 | * Please also note that 'end_offset' is actually the offset of the | 
|  | 2499 | * first byte that lies outside the pnfs_layout_range. FIXME? | 
|  | 2500 | * | 
|  | 2501 | */ | 
|  | 2502 | if (pgio->pg_lseg) { | 
|  | 2503 | seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, | 
|  | 2504 | pgio->pg_lseg->pls_range.length); | 
|  | 2505 | req_start = req_offset(req); | 
|  | 2506 |  | 
|  | 2507 | /* start of request is past the last byte of this segment */ | 
|  | 2508 | if (req_start >= seg_end) | 
|  | 2509 | return 0; | 
|  | 2510 |  | 
|  | 2511 | /* adjust 'size' iff there are fewer bytes left in the | 
|  | 2512 | * segment than what nfs_generic_pg_test returned */ | 
|  | 2513 | seg_left = seg_end - req_start; | 
|  | 2514 | if (seg_left < size) | 
|  | 2515 | size = (unsigned int)seg_left; | 
|  | 2516 | } | 
|  | 2517 |  | 
|  | 2518 | return size; | 
|  | 2519 | } | 
|  | 2520 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 
|  | 2521 |  | 
|  | 2522 | int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) | 
|  | 2523 | { | 
|  | 2524 | struct nfs_pageio_descriptor pgio; | 
|  | 2525 |  | 
|  | 2526 | /* Resend all requests through the MDS */ | 
|  | 2527 | nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, | 
|  | 2528 | hdr->completion_ops); | 
|  | 2529 | set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); | 
|  | 2530 | return nfs_pageio_resend(&pgio, hdr); | 
|  | 2531 | } | 
|  | 2532 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); | 
|  | 2533 |  | 
|  | 2534 | static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) | 
|  | 2535 | { | 
|  | 2536 |  | 
|  | 2537 | dprintk("pnfs write error = %d\n", hdr->pnfs_error); | 
|  | 2538 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | 
|  | 2539 | PNFS_LAYOUTRET_ON_ERROR) { | 
|  | 2540 | pnfs_return_layout(hdr->inode); | 
|  | 2541 | } | 
|  | 2542 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | 
|  | 2543 | hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); | 
|  | 2544 | } | 
|  | 2545 |  | 
|  | 2546 | /* | 
|  | 2547 | * Called by non rpc-based layout drivers | 
|  | 2548 | */ | 
|  | 2549 | void pnfs_ld_write_done(struct nfs_pgio_header *hdr) | 
|  | 2550 | { | 
|  | 2551 | if (likely(!hdr->pnfs_error)) { | 
|  | 2552 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, | 
|  | 2553 | hdr->mds_offset + hdr->res.count); | 
|  | 2554 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); | 
|  | 2555 | } | 
|  | 2556 | trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); | 
|  | 2557 | if (unlikely(hdr->pnfs_error)) | 
|  | 2558 | pnfs_ld_handle_write_error(hdr); | 
|  | 2559 | hdr->mds_ops->rpc_release(hdr); | 
|  | 2560 | } | 
|  | 2561 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 
|  | 2562 |  | 
|  | 2563 | static void | 
|  | 2564 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 
|  | 2565 | struct nfs_pgio_header *hdr) | 
|  | 2566 | { | 
|  | 2567 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | 
|  | 2568 |  | 
|  | 2569 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 
|  | 2570 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); | 
|  | 2571 | nfs_pageio_reset_write_mds(desc); | 
|  | 2572 | mirror->pg_recoalesce = 1; | 
|  | 2573 | } | 
|  | 2574 | hdr->completion_ops->completion(hdr); | 
|  | 2575 | } | 
|  | 2576 |  | 
|  | 2577 | static enum pnfs_try_status | 
|  | 2578 | pnfs_try_to_write_data(struct nfs_pgio_header *hdr, | 
|  | 2579 | const struct rpc_call_ops *call_ops, | 
|  | 2580 | struct pnfs_layout_segment *lseg, | 
|  | 2581 | int how) | 
|  | 2582 | { | 
|  | 2583 | struct inode *inode = hdr->inode; | 
|  | 2584 | enum pnfs_try_status trypnfs; | 
|  | 2585 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 2586 |  | 
|  | 2587 | hdr->mds_ops = call_ops; | 
|  | 2588 |  | 
|  | 2589 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, | 
|  | 2590 | inode->i_ino, hdr->args.count, hdr->args.offset, how); | 
|  | 2591 | trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); | 
|  | 2592 | if (trypnfs != PNFS_NOT_ATTEMPTED) | 
|  | 2593 | nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); | 
|  | 2594 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 
|  | 2595 | return trypnfs; | 
|  | 2596 | } | 
|  | 2597 |  | 
|  | 2598 | static void | 
|  | 2599 | pnfs_do_write(struct nfs_pageio_descriptor *desc, | 
|  | 2600 | struct nfs_pgio_header *hdr, int how) | 
|  | 2601 | { | 
|  | 2602 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 
|  | 2603 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 
|  | 2604 | enum pnfs_try_status trypnfs; | 
|  | 2605 |  | 
|  | 2606 | trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); | 
|  | 2607 | switch (trypnfs) { | 
|  | 2608 | case PNFS_NOT_ATTEMPTED: | 
|  | 2609 | pnfs_write_through_mds(desc, hdr); | 
|  | 2610 | case PNFS_ATTEMPTED: | 
|  | 2611 | break; | 
|  | 2612 | case PNFS_TRY_AGAIN: | 
|  | 2613 | /* cleanup hdr and prepare to redo pnfs */ | 
|  | 2614 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 
|  | 2615 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | 
|  | 2616 | list_splice_init(&hdr->pages, &mirror->pg_list); | 
|  | 2617 | mirror->pg_recoalesce = 1; | 
|  | 2618 | } | 
|  | 2619 | hdr->mds_ops->rpc_release(hdr); | 
|  | 2620 | } | 
|  | 2621 | } | 
|  | 2622 |  | 
|  | 2623 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 
|  | 2624 | { | 
|  | 2625 | pnfs_put_lseg(hdr->lseg); | 
|  | 2626 | nfs_pgio_header_free(hdr); | 
|  | 2627 | } | 
|  | 2628 |  | 
|  | 2629 | int | 
|  | 2630 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 
|  | 2631 | { | 
|  | 2632 | struct nfs_pgio_header *hdr; | 
|  | 2633 | int ret; | 
|  | 2634 |  | 
|  | 2635 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 
|  | 2636 | if (!hdr) { | 
|  | 2637 | desc->pg_error = -ENOMEM; | 
|  | 2638 | return desc->pg_error; | 
|  | 2639 | } | 
|  | 2640 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 
|  | 2641 |  | 
|  | 2642 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 
|  | 2643 | ret = nfs_generic_pgio(desc, hdr); | 
|  | 2644 | if (!ret) | 
|  | 2645 | pnfs_do_write(desc, hdr, desc->pg_ioflags); | 
|  | 2646 |  | 
|  | 2647 | return ret; | 
|  | 2648 | } | 
|  | 2649 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | 
|  | 2650 |  | 
|  | 2651 | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) | 
|  | 2652 | { | 
|  | 2653 | struct nfs_pageio_descriptor pgio; | 
|  | 2654 |  | 
|  | 2655 | /* Resend all requests through the MDS */ | 
|  | 2656 | nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); | 
|  | 2657 | return nfs_pageio_resend(&pgio, hdr); | 
|  | 2658 | } | 
|  | 2659 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); | 
|  | 2660 |  | 
|  | 2661 | static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) | 
|  | 2662 | { | 
|  | 2663 | dprintk("pnfs read error = %d\n", hdr->pnfs_error); | 
|  | 2664 | if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & | 
|  | 2665 | PNFS_LAYOUTRET_ON_ERROR) { | 
|  | 2666 | pnfs_return_layout(hdr->inode); | 
|  | 2667 | } | 
|  | 2668 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) | 
|  | 2669 | hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); | 
|  | 2670 | } | 
|  | 2671 |  | 
|  | 2672 | /* | 
|  | 2673 | * Called by non rpc-based layout drivers | 
|  | 2674 | */ | 
|  | 2675 | void pnfs_ld_read_done(struct nfs_pgio_header *hdr) | 
|  | 2676 | { | 
|  | 2677 | if (likely(!hdr->pnfs_error)) | 
|  | 2678 | hdr->mds_ops->rpc_call_done(&hdr->task, hdr); | 
|  | 2679 | trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); | 
|  | 2680 | if (unlikely(hdr->pnfs_error)) | 
|  | 2681 | pnfs_ld_handle_read_error(hdr); | 
|  | 2682 | hdr->mds_ops->rpc_release(hdr); | 
|  | 2683 | } | 
|  | 2684 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 
|  | 2685 |  | 
|  | 2686 | static void | 
|  | 2687 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 
|  | 2688 | struct nfs_pgio_header *hdr) | 
|  | 2689 | { | 
|  | 2690 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | 
|  | 2691 |  | 
|  | 2692 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 
|  | 2693 | list_splice_tail_init(&hdr->pages, &mirror->pg_list); | 
|  | 2694 | nfs_pageio_reset_read_mds(desc); | 
|  | 2695 | mirror->pg_recoalesce = 1; | 
|  | 2696 | } | 
|  | 2697 | hdr->completion_ops->completion(hdr); | 
|  | 2698 | } | 
|  | 2699 |  | 
|  | 2700 | /* | 
|  | 2701 | * Call the appropriate parallel I/O subsystem read function. | 
|  | 2702 | */ | 
|  | 2703 | static enum pnfs_try_status | 
|  | 2704 | pnfs_try_to_read_data(struct nfs_pgio_header *hdr, | 
|  | 2705 | const struct rpc_call_ops *call_ops, | 
|  | 2706 | struct pnfs_layout_segment *lseg) | 
|  | 2707 | { | 
|  | 2708 | struct inode *inode = hdr->inode; | 
|  | 2709 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 2710 | enum pnfs_try_status trypnfs; | 
|  | 2711 |  | 
|  | 2712 | hdr->mds_ops = call_ops; | 
|  | 2713 |  | 
|  | 2714 | dprintk("%s: Reading ino:%lu %u@%llu\n", | 
|  | 2715 | __func__, inode->i_ino, hdr->args.count, hdr->args.offset); | 
|  | 2716 |  | 
|  | 2717 | trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); | 
|  | 2718 | if (trypnfs != PNFS_NOT_ATTEMPTED) | 
|  | 2719 | nfs_inc_stats(inode, NFSIOS_PNFS_READ); | 
|  | 2720 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 
|  | 2721 | return trypnfs; | 
|  | 2722 | } | 
|  | 2723 |  | 
|  | 2724 | /* Resend all requests through pnfs. */ | 
|  | 2725 | void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) | 
|  | 2726 | { | 
|  | 2727 | struct nfs_pageio_descriptor pgio; | 
|  | 2728 |  | 
|  | 2729 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 
|  | 2730 | /* Prevent deadlocks with layoutreturn! */ | 
|  | 2731 | pnfs_put_lseg(hdr->lseg); | 
|  | 2732 | hdr->lseg = NULL; | 
|  | 2733 |  | 
|  | 2734 | nfs_pageio_init_read(&pgio, hdr->inode, false, | 
|  | 2735 | hdr->completion_ops); | 
|  | 2736 | hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); | 
|  | 2737 | } | 
|  | 2738 | } | 
|  | 2739 | EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); | 
|  | 2740 |  | 
|  | 2741 | static void | 
|  | 2742 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) | 
|  | 2743 | { | 
|  | 2744 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 
|  | 2745 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 
|  | 2746 | enum pnfs_try_status trypnfs; | 
|  | 2747 |  | 
|  | 2748 | trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); | 
|  | 2749 | switch (trypnfs) { | 
|  | 2750 | case PNFS_NOT_ATTEMPTED: | 
|  | 2751 | pnfs_read_through_mds(desc, hdr); | 
|  | 2752 | case PNFS_ATTEMPTED: | 
|  | 2753 | break; | 
|  | 2754 | case PNFS_TRY_AGAIN: | 
|  | 2755 | /* cleanup hdr and prepare to redo pnfs */ | 
|  | 2756 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | 
|  | 2757 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); | 
|  | 2758 | list_splice_init(&hdr->pages, &mirror->pg_list); | 
|  | 2759 | mirror->pg_recoalesce = 1; | 
|  | 2760 | } | 
|  | 2761 | hdr->mds_ops->rpc_release(hdr); | 
|  | 2762 | } | 
|  | 2763 | } | 
|  | 2764 |  | 
|  | 2765 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 
|  | 2766 | { | 
|  | 2767 | pnfs_put_lseg(hdr->lseg); | 
|  | 2768 | nfs_pgio_header_free(hdr); | 
|  | 2769 | } | 
|  | 2770 |  | 
|  | 2771 | int | 
|  | 2772 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 
|  | 2773 | { | 
|  | 2774 | struct nfs_pgio_header *hdr; | 
|  | 2775 | int ret; | 
|  | 2776 |  | 
|  | 2777 | hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); | 
|  | 2778 | if (!hdr) { | 
|  | 2779 | desc->pg_error = -ENOMEM; | 
|  | 2780 | return desc->pg_error; | 
|  | 2781 | } | 
|  | 2782 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 
|  | 2783 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 
|  | 2784 | ret = nfs_generic_pgio(desc, hdr); | 
|  | 2785 | if (!ret) | 
|  | 2786 | pnfs_do_read(desc, hdr); | 
|  | 2787 | return ret; | 
|  | 2788 | } | 
|  | 2789 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | 
|  | 2790 |  | 
|  | 2791 | static void pnfs_clear_layoutcommitting(struct inode *inode) | 
|  | 2792 | { | 
|  | 2793 | unsigned long *bitlock = &NFS_I(inode)->flags; | 
|  | 2794 |  | 
|  | 2795 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | 
|  | 2796 | smp_mb__after_atomic(); | 
|  | 2797 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | 
|  | 2798 | } | 
|  | 2799 |  | 
|  | 2800 | /* | 
|  | 2801 | * There can be multiple RW segments. | 
|  | 2802 | */ | 
|  | 2803 | static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | 
|  | 2804 | { | 
|  | 2805 | struct pnfs_layout_segment *lseg; | 
|  | 2806 |  | 
|  | 2807 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { | 
|  | 2808 | if (lseg->pls_range.iomode == IOMODE_RW && | 
|  | 2809 | test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | 
|  | 2810 | list_add(&lseg->pls_lc_list, listp); | 
|  | 2811 | } | 
|  | 2812 | } | 
|  | 2813 |  | 
|  | 2814 | static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) | 
|  | 2815 | { | 
|  | 2816 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 2817 |  | 
|  | 2818 | /* Matched by references in pnfs_set_layoutcommit */ | 
|  | 2819 | list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { | 
|  | 2820 | list_del_init(&lseg->pls_lc_list); | 
|  | 2821 | pnfs_put_lseg(lseg); | 
|  | 2822 | } | 
|  | 2823 |  | 
|  | 2824 | pnfs_clear_layoutcommitting(inode); | 
|  | 2825 | } | 
|  | 2826 |  | 
|  | 2827 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | 
|  | 2828 | { | 
|  | 2829 | pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); | 
|  | 2830 | } | 
|  | 2831 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 
|  | 2832 |  | 
|  | 2833 | void | 
|  | 2834 | pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, | 
|  | 2835 | loff_t end_pos) | 
|  | 2836 | { | 
|  | 2837 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 2838 | bool mark_as_dirty = false; | 
|  | 2839 |  | 
|  | 2840 | spin_lock(&inode->i_lock); | 
|  | 2841 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 
|  | 2842 | nfsi->layout->plh_lwb = end_pos; | 
|  | 2843 | mark_as_dirty = true; | 
|  | 2844 | dprintk("%s: Set layoutcommit for inode %lu ", | 
|  | 2845 | __func__, inode->i_ino); | 
|  | 2846 | } else if (end_pos > nfsi->layout->plh_lwb) | 
|  | 2847 | nfsi->layout->plh_lwb = end_pos; | 
|  | 2848 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { | 
|  | 2849 | /* references matched in nfs4_layoutcommit_release */ | 
|  | 2850 | pnfs_get_lseg(lseg); | 
|  | 2851 | } | 
|  | 2852 | spin_unlock(&inode->i_lock); | 
|  | 2853 | dprintk("%s: lseg %p end_pos %llu\n", | 
|  | 2854 | __func__, lseg, nfsi->layout->plh_lwb); | 
|  | 2855 |  | 
|  | 2856 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 
|  | 2857 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 
|  | 2858 | if (mark_as_dirty) | 
|  | 2859 | mark_inode_dirty_sync(inode); | 
|  | 2860 | } | 
|  | 2861 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 
|  | 2862 |  | 
|  | 2863 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 
|  | 2864 | { | 
|  | 2865 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | 
|  | 2866 |  | 
|  | 2867 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) | 
|  | 2868 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); | 
|  | 2869 | pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); | 
|  | 2870 | } | 
|  | 2871 |  | 
|  | 2872 | /* | 
|  | 2873 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | 
|  | 2874 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | 
|  | 2875 | * data to disk to allow the server to recover the data if it crashes. | 
|  | 2876 | * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag | 
|  | 2877 | * is off, and a COMMIT is sent to a data server, or | 
|  | 2878 | * if WRITEs to a data server return NFS_DATA_SYNC. | 
|  | 2879 | */ | 
|  | 2880 | int | 
|  | 2881 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 
|  | 2882 | { | 
|  | 2883 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | 
|  | 2884 | struct nfs4_layoutcommit_data *data; | 
|  | 2885 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 2886 | loff_t end_pos; | 
|  | 2887 | int status; | 
|  | 2888 |  | 
|  | 2889 | if (!pnfs_layoutcommit_outstanding(inode)) | 
|  | 2890 | return 0; | 
|  | 2891 |  | 
|  | 2892 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); | 
|  | 2893 |  | 
|  | 2894 | status = -EAGAIN; | 
|  | 2895 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | 
|  | 2896 | if (!sync) | 
|  | 2897 | goto out; | 
|  | 2898 | status = wait_on_bit_lock_action(&nfsi->flags, | 
|  | 2899 | NFS_INO_LAYOUTCOMMITTING, | 
|  | 2900 | nfs_wait_bit_killable, | 
|  | 2901 | TASK_KILLABLE); | 
|  | 2902 | if (status) | 
|  | 2903 | goto out; | 
|  | 2904 | } | 
|  | 2905 |  | 
|  | 2906 | status = -ENOMEM; | 
|  | 2907 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | 
|  | 2908 | data = kzalloc(sizeof(*data), GFP_NOFS); | 
|  | 2909 | if (!data) | 
|  | 2910 | goto clear_layoutcommitting; | 
|  | 2911 |  | 
|  | 2912 | status = 0; | 
|  | 2913 | spin_lock(&inode->i_lock); | 
|  | 2914 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | 
|  | 2915 | goto out_unlock; | 
|  | 2916 |  | 
|  | 2917 | INIT_LIST_HEAD(&data->lseg_list); | 
|  | 2918 | pnfs_list_write_lseg(inode, &data->lseg_list); | 
|  | 2919 |  | 
|  | 2920 | end_pos = nfsi->layout->plh_lwb; | 
|  | 2921 |  | 
|  | 2922 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); | 
|  | 2923 | spin_unlock(&inode->i_lock); | 
|  | 2924 |  | 
|  | 2925 | data->args.inode = inode; | 
|  | 2926 | data->cred = get_rpccred(nfsi->layout->plh_lc_cred); | 
|  | 2927 | nfs_fattr_init(&data->fattr); | 
|  | 2928 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | 
|  | 2929 | data->res.fattr = &data->fattr; | 
|  | 2930 | if (end_pos != 0) | 
|  | 2931 | data->args.lastbytewritten = end_pos - 1; | 
|  | 2932 | else | 
|  | 2933 | data->args.lastbytewritten = U64_MAX; | 
|  | 2934 | data->res.server = NFS_SERVER(inode); | 
|  | 2935 |  | 
|  | 2936 | if (ld->prepare_layoutcommit) { | 
|  | 2937 | status = ld->prepare_layoutcommit(&data->args); | 
|  | 2938 | if (status) { | 
|  | 2939 | put_rpccred(data->cred); | 
|  | 2940 | spin_lock(&inode->i_lock); | 
|  | 2941 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | 
|  | 2942 | if (end_pos > nfsi->layout->plh_lwb) | 
|  | 2943 | nfsi->layout->plh_lwb = end_pos; | 
|  | 2944 | goto out_unlock; | 
|  | 2945 | } | 
|  | 2946 | } | 
|  | 2947 |  | 
|  | 2948 |  | 
|  | 2949 | status = nfs4_proc_layoutcommit(data, sync); | 
|  | 2950 | out: | 
|  | 2951 | if (status) | 
|  | 2952 | mark_inode_dirty_sync(inode); | 
|  | 2953 | dprintk("<-- %s status %d\n", __func__, status); | 
|  | 2954 | return status; | 
|  | 2955 | out_unlock: | 
|  | 2956 | spin_unlock(&inode->i_lock); | 
|  | 2957 | kfree(data); | 
|  | 2958 | clear_layoutcommitting: | 
|  | 2959 | pnfs_clear_layoutcommitting(inode); | 
|  | 2960 | goto out; | 
|  | 2961 | } | 
|  | 2962 | EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); | 
|  | 2963 |  | 
|  | 2964 | int | 
|  | 2965 | pnfs_generic_sync(struct inode *inode, bool datasync) | 
|  | 2966 | { | 
|  | 2967 | return pnfs_layoutcommit_inode(inode, true); | 
|  | 2968 | } | 
|  | 2969 | EXPORT_SYMBOL_GPL(pnfs_generic_sync); | 
|  | 2970 |  | 
|  | 2971 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | 
|  | 2972 | { | 
|  | 2973 | struct nfs4_threshold *thp; | 
|  | 2974 |  | 
|  | 2975 | thp = kzalloc(sizeof(*thp), GFP_NOFS); | 
|  | 2976 | if (!thp) { | 
|  | 2977 | dprintk("%s mdsthreshold allocation failed\n", __func__); | 
|  | 2978 | return NULL; | 
|  | 2979 | } | 
|  | 2980 | return thp; | 
|  | 2981 | } | 
|  | 2982 |  | 
|  | 2983 | #if IS_ENABLED(CONFIG_NFS_V4_2) | 
|  | 2984 | int | 
|  | 2985 | pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) | 
|  | 2986 | { | 
|  | 2987 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | 
|  | 2988 | struct nfs_server *server = NFS_SERVER(inode); | 
|  | 2989 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 2990 | struct nfs42_layoutstat_data *data; | 
|  | 2991 | struct pnfs_layout_hdr *hdr; | 
|  | 2992 | int status = 0; | 
|  | 2993 |  | 
|  | 2994 | if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) | 
|  | 2995 | goto out; | 
|  | 2996 |  | 
|  | 2997 | if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) | 
|  | 2998 | goto out; | 
|  | 2999 |  | 
|  | 3000 | if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) | 
|  | 3001 | goto out; | 
|  | 3002 |  | 
|  | 3003 | spin_lock(&inode->i_lock); | 
|  | 3004 | if (!NFS_I(inode)->layout) { | 
|  | 3005 | spin_unlock(&inode->i_lock); | 
|  | 3006 | goto out_clear_layoutstats; | 
|  | 3007 | } | 
|  | 3008 | hdr = NFS_I(inode)->layout; | 
|  | 3009 | pnfs_get_layout_hdr(hdr); | 
|  | 3010 | spin_unlock(&inode->i_lock); | 
|  | 3011 |  | 
|  | 3012 | data = kzalloc(sizeof(*data), gfp_flags); | 
|  | 3013 | if (!data) { | 
|  | 3014 | status = -ENOMEM; | 
|  | 3015 | goto out_put; | 
|  | 3016 | } | 
|  | 3017 |  | 
|  | 3018 | data->args.fh = NFS_FH(inode); | 
|  | 3019 | data->args.inode = inode; | 
|  | 3020 | status = ld->prepare_layoutstats(&data->args); | 
|  | 3021 | if (status) | 
|  | 3022 | goto out_free; | 
|  | 3023 |  | 
|  | 3024 | status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); | 
|  | 3025 |  | 
|  | 3026 | out: | 
|  | 3027 | dprintk("%s returns %d\n", __func__, status); | 
|  | 3028 | return status; | 
|  | 3029 |  | 
|  | 3030 | out_free: | 
|  | 3031 | kfree(data); | 
|  | 3032 | out_put: | 
|  | 3033 | pnfs_put_layout_hdr(hdr); | 
|  | 3034 | out_clear_layoutstats: | 
|  | 3035 | smp_mb__before_atomic(); | 
|  | 3036 | clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); | 
|  | 3037 | smp_mb__after_atomic(); | 
|  | 3038 | goto out; | 
|  | 3039 | } | 
|  | 3040 | EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); | 
|  | 3041 | #endif | 
|  | 3042 |  | 
|  | 3043 | unsigned int layoutstats_timer; | 
|  | 3044 | module_param(layoutstats_timer, uint, 0644); | 
|  | 3045 | EXPORT_SYMBOL_GPL(layoutstats_timer); |