| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | *  pNFS functions to call and manage layout drivers. | 
|  | 3 | * | 
|  | 4 | *  Copyright (c) 2002 [year of first publication] | 
|  | 5 | *  The Regents of the University of Michigan | 
|  | 6 | *  All Rights Reserved | 
|  | 7 | * | 
|  | 8 | *  Dean Hildebrand <dhildebz@umich.edu> | 
|  | 9 | * | 
|  | 10 | *  Permission is granted to use, copy, create derivative works, and | 
|  | 11 | *  redistribute this software and such derivative works for any purpose, | 
|  | 12 | *  so long as the name of the University of Michigan is not used in | 
|  | 13 | *  any advertising or publicity pertaining to the use or distribution | 
|  | 14 | *  of this software without specific, written prior authorization. If | 
|  | 15 | *  the above copyright notice or any other identification of the | 
|  | 16 | *  University of Michigan is included in any copy of any portion of | 
|  | 17 | *  this software, then the disclaimer below must also be included. | 
|  | 18 | * | 
|  | 19 | *  This software is provided as is, without representation or warranty | 
|  | 20 | *  of any kind either express or implied, including without limitation | 
|  | 21 | *  the implied warranties of merchantability, fitness for a particular | 
|  | 22 | *  purpose, or noninfringement.  The Regents of the University of | 
|  | 23 | *  Michigan shall not be liable for any damages, including special, | 
|  | 24 | *  indirect, incidental, or consequential damages, with respect to any | 
|  | 25 | *  claim arising out of or in connection with the use of the software, | 
|  | 26 | *  even if it has been or is hereafter advised of the possibility of | 
|  | 27 | *  such damages. | 
|  | 28 | */ | 
|  | 29 |  | 
|  | 30 | #include <linux/nfs_fs.h> | 
|  | 31 | #include <linux/nfs_page.h> | 
|  | 32 | #include <linux/module.h> | 
|  | 33 | #include "internal.h" | 
|  | 34 | #include "pnfs.h" | 
|  | 35 | #include "iostat.h" | 
|  | 36 |  | 
|  | 37 | #define NFSDBG_FACILITY		NFSDBG_PNFS | 
|  | 38 |  | 
|  | 39 | /* Locking: | 
|  | 40 | * | 
|  | 41 | * pnfs_spinlock: | 
|  | 42 | *      protects pnfs_modules_tbl. | 
|  | 43 | */ | 
|  | 44 | static DEFINE_SPINLOCK(pnfs_spinlock); | 
|  | 45 |  | 
|  | 46 | /* | 
|  | 47 | * pnfs_modules_tbl holds all pnfs modules | 
|  | 48 | */ | 
|  | 49 | static LIST_HEAD(pnfs_modules_tbl); | 
|  | 50 |  | 
|  | 51 | /* Return the registered pnfs layout driver module matching given id */ | 
|  | 52 | static struct pnfs_layoutdriver_type * | 
|  | 53 | find_pnfs_driver_locked(u32 id) | 
|  | 54 | { | 
|  | 55 | struct pnfs_layoutdriver_type *local; | 
|  | 56 |  | 
|  | 57 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | 
|  | 58 | if (local->id == id) | 
|  | 59 | goto out; | 
|  | 60 | local = NULL; | 
|  | 61 | out: | 
|  | 62 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | 
|  | 63 | return local; | 
|  | 64 | } | 
|  | 65 |  | 
|  | 66 | static struct pnfs_layoutdriver_type * | 
|  | 67 | find_pnfs_driver(u32 id) | 
|  | 68 | { | 
|  | 69 | struct pnfs_layoutdriver_type *local; | 
|  | 70 |  | 
|  | 71 | spin_lock(&pnfs_spinlock); | 
|  | 72 | local = find_pnfs_driver_locked(id); | 
|  | 73 | spin_unlock(&pnfs_spinlock); | 
|  | 74 | return local; | 
|  | 75 | } | 
|  | 76 |  | 
|  | 77 | void | 
|  | 78 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 
|  | 79 | { | 
|  | 80 | if (nfss->pnfs_curr_ld) { | 
|  | 81 | if (nfss->pnfs_curr_ld->clear_layoutdriver) | 
|  | 82 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | 
|  | 83 | module_put(nfss->pnfs_curr_ld->owner); | 
|  | 84 | } | 
|  | 85 | nfss->pnfs_curr_ld = NULL; | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | /* | 
|  | 89 | * Try to set the server's pnfs module to the pnfs layout type specified by id. | 
|  | 90 | * Currently only one pNFS layout driver per filesystem is supported. | 
|  | 91 | * | 
|  | 92 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. | 
|  | 93 | */ | 
|  | 94 | void | 
|  | 95 | set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, | 
|  | 96 | u32 id) | 
|  | 97 | { | 
|  | 98 | struct pnfs_layoutdriver_type *ld_type = NULL; | 
|  | 99 |  | 
|  | 100 | if (id == 0) | 
|  | 101 | goto out_no_driver; | 
|  | 102 | if (!(server->nfs_client->cl_exchange_flags & | 
|  | 103 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { | 
|  | 104 | printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", | 
|  | 105 | __func__, id, server->nfs_client->cl_exchange_flags); | 
|  | 106 | goto out_no_driver; | 
|  | 107 | } | 
|  | 108 | ld_type = find_pnfs_driver(id); | 
|  | 109 | if (!ld_type) { | 
|  | 110 | request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); | 
|  | 111 | ld_type = find_pnfs_driver(id); | 
|  | 112 | if (!ld_type) { | 
|  | 113 | dprintk("%s: No pNFS module found for %u.\n", | 
|  | 114 | __func__, id); | 
|  | 115 | goto out_no_driver; | 
|  | 116 | } | 
|  | 117 | } | 
|  | 118 | if (!try_module_get(ld_type->owner)) { | 
|  | 119 | dprintk("%s: Could not grab reference on module\n", __func__); | 
|  | 120 | goto out_no_driver; | 
|  | 121 | } | 
|  | 122 | server->pnfs_curr_ld = ld_type; | 
|  | 123 | if (ld_type->set_layoutdriver | 
|  | 124 | && ld_type->set_layoutdriver(server, mntfh)) { | 
|  | 125 | printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " | 
|  | 126 | "driver %u.\n", __func__, id); | 
|  | 127 | module_put(ld_type->owner); | 
|  | 128 | goto out_no_driver; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 
|  | 132 | return; | 
|  | 133 |  | 
|  | 134 | out_no_driver: | 
|  | 135 | dprintk("%s: Using NFSv4 I/O\n", __func__); | 
|  | 136 | server->pnfs_curr_ld = NULL; | 
|  | 137 | } | 
|  | 138 |  | 
|  | 139 | int | 
|  | 140 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | 
|  | 141 | { | 
|  | 142 | int status = -EINVAL; | 
|  | 143 | struct pnfs_layoutdriver_type *tmp; | 
|  | 144 |  | 
|  | 145 | if (ld_type->id == 0) { | 
|  | 146 | printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); | 
|  | 147 | return status; | 
|  | 148 | } | 
|  | 149 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | 
|  | 150 | printk(KERN_ERR "NFS: %s Layout driver must provide " | 
|  | 151 | "alloc_lseg and free_lseg.\n", __func__); | 
|  | 152 | return status; | 
|  | 153 | } | 
|  | 154 |  | 
|  | 155 | spin_lock(&pnfs_spinlock); | 
|  | 156 | tmp = find_pnfs_driver_locked(ld_type->id); | 
|  | 157 | if (!tmp) { | 
|  | 158 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | 
|  | 159 | status = 0; | 
|  | 160 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | 
|  | 161 | ld_type->name); | 
|  | 162 | } else { | 
|  | 163 | printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", | 
|  | 164 | __func__, ld_type->id); | 
|  | 165 | } | 
|  | 166 | spin_unlock(&pnfs_spinlock); | 
|  | 167 |  | 
|  | 168 | return status; | 
|  | 169 | } | 
|  | 170 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | 
|  | 171 |  | 
|  | 172 | void | 
|  | 173 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | 
|  | 174 | { | 
|  | 175 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | 
|  | 176 | spin_lock(&pnfs_spinlock); | 
|  | 177 | list_del(&ld_type->pnfs_tblid); | 
|  | 178 | spin_unlock(&pnfs_spinlock); | 
|  | 179 | } | 
|  | 180 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | 
|  | 181 |  | 
|  | 182 | /* | 
|  | 183 | * pNFS client layout cache | 
|  | 184 | */ | 
|  | 185 |  | 
|  | 186 | /* Need to hold i_lock if caller does not already hold reference */ | 
|  | 187 | void | 
|  | 188 | get_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 189 | { | 
|  | 190 | atomic_inc(&lo->plh_refcount); | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | static struct pnfs_layout_hdr * | 
|  | 194 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) | 
|  | 195 | { | 
|  | 196 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | 
|  | 197 | return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : | 
|  | 198 | kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | 
|  | 199 | } | 
|  | 200 |  | 
|  | 201 | static void | 
|  | 202 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 203 | { | 
|  | 204 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | 
|  | 205 | put_rpccred(lo->plh_lc_cred); | 
|  | 206 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | 
|  | 207 | } | 
|  | 208 |  | 
|  | 209 | static void | 
|  | 210 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 211 | { | 
|  | 212 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | 
|  | 213 | BUG_ON(!list_empty(&lo->plh_layouts)); | 
|  | 214 | NFS_I(lo->plh_inode)->layout = NULL; | 
|  | 215 | pnfs_free_layout_hdr(lo); | 
|  | 216 | } | 
|  | 217 |  | 
|  | 218 | static void | 
|  | 219 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | 
|  | 220 | { | 
|  | 221 | if (atomic_dec_and_test(&lo->plh_refcount)) | 
|  | 222 | destroy_layout_hdr(lo); | 
|  | 223 | } | 
|  | 224 |  | 
|  | 225 | void | 
|  | 226 | put_layout_hdr(struct pnfs_layout_hdr *lo) | 
|  | 227 | { | 
|  | 228 | struct inode *inode = lo->plh_inode; | 
|  | 229 |  | 
|  | 230 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 
|  | 231 | destroy_layout_hdr(lo); | 
|  | 232 | spin_unlock(&inode->i_lock); | 
|  | 233 | } | 
|  | 234 | } | 
|  | 235 |  | 
|  | 236 | static void | 
|  | 237 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | 
|  | 238 | { | 
|  | 239 | INIT_LIST_HEAD(&lseg->pls_list); | 
|  | 240 | INIT_LIST_HEAD(&lseg->pls_lc_list); | 
|  | 241 | atomic_set(&lseg->pls_refcount, 1); | 
|  | 242 | smp_mb(); | 
|  | 243 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); | 
|  | 244 | lseg->pls_layout = lo; | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | static void free_lseg(struct pnfs_layout_segment *lseg) | 
|  | 248 | { | 
|  | 249 | struct inode *ino = lseg->pls_layout->plh_inode; | 
|  | 250 |  | 
|  | 251 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 
|  | 252 | /* Matched by get_layout_hdr in pnfs_insert_layout */ | 
|  | 253 | put_layout_hdr(NFS_I(ino)->layout); | 
|  | 254 | } | 
|  | 255 |  | 
|  | 256 | static void | 
|  | 257 | put_lseg_common(struct pnfs_layout_segment *lseg) | 
|  | 258 | { | 
|  | 259 | struct inode *inode = lseg->pls_layout->plh_inode; | 
|  | 260 |  | 
|  | 261 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 
|  | 262 | list_del_init(&lseg->pls_list); | 
|  | 263 | if (list_empty(&lseg->pls_layout->plh_segs)) { | 
|  | 264 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); | 
|  | 265 | /* Matched by initial refcount set in alloc_init_layout_hdr */ | 
|  | 266 | put_layout_hdr_locked(lseg->pls_layout); | 
|  | 267 | } | 
|  | 268 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 
|  | 269 | } | 
|  | 270 |  | 
|  | 271 | void | 
|  | 272 | put_lseg(struct pnfs_layout_segment *lseg) | 
|  | 273 | { | 
|  | 274 | struct inode *inode; | 
|  | 275 |  | 
|  | 276 | if (!lseg) | 
|  | 277 | return; | 
|  | 278 |  | 
|  | 279 | dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, | 
|  | 280 | atomic_read(&lseg->pls_refcount), | 
|  | 281 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 
|  | 282 | inode = lseg->pls_layout->plh_inode; | 
|  | 283 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 
|  | 284 | LIST_HEAD(free_me); | 
|  | 285 |  | 
|  | 286 | put_lseg_common(lseg); | 
|  | 287 | list_add(&lseg->pls_list, &free_me); | 
|  | 288 | spin_unlock(&inode->i_lock); | 
|  | 289 | pnfs_free_lseg_list(&free_me); | 
|  | 290 | } | 
|  | 291 | } | 
|  | 292 | EXPORT_SYMBOL_GPL(put_lseg); | 
|  | 293 |  | 
|  | 294 | static inline u64 | 
|  | 295 | end_offset(u64 start, u64 len) | 
|  | 296 | { | 
|  | 297 | u64 end; | 
|  | 298 |  | 
|  | 299 | end = start + len; | 
|  | 300 | return end >= start ? end : NFS4_MAX_UINT64; | 
|  | 301 | } | 
|  | 302 |  | 
|  | 303 | /* last octet in a range */ | 
|  | 304 | static inline u64 | 
|  | 305 | last_byte_offset(u64 start, u64 len) | 
|  | 306 | { | 
|  | 307 | u64 end; | 
|  | 308 |  | 
|  | 309 | BUG_ON(!len); | 
|  | 310 | end = start + len; | 
|  | 311 | return end > start ? end - 1 : NFS4_MAX_UINT64; | 
|  | 312 | } | 
|  | 313 |  | 
|  | 314 | /* | 
|  | 315 | * is l2 fully contained in l1? | 
|  | 316 | *   start1                             end1 | 
|  | 317 | *   [----------------------------------) | 
|  | 318 | *           start2           end2 | 
|  | 319 | *           [----------------) | 
|  | 320 | */ | 
|  | 321 | static inline int | 
|  | 322 | lo_seg_contained(struct pnfs_layout_range *l1, | 
|  | 323 | struct pnfs_layout_range *l2) | 
|  | 324 | { | 
|  | 325 | u64 start1 = l1->offset; | 
|  | 326 | u64 end1 = end_offset(start1, l1->length); | 
|  | 327 | u64 start2 = l2->offset; | 
|  | 328 | u64 end2 = end_offset(start2, l2->length); | 
|  | 329 |  | 
|  | 330 | return (start1 <= start2) && (end1 >= end2); | 
|  | 331 | } | 
|  | 332 |  | 
|  | 333 | /* | 
|  | 334 | * is l1 and l2 intersecting? | 
|  | 335 | *   start1                             end1 | 
|  | 336 | *   [----------------------------------) | 
|  | 337 | *                              start2           end2 | 
|  | 338 | *                              [----------------) | 
|  | 339 | */ | 
|  | 340 | static inline int | 
|  | 341 | lo_seg_intersecting(struct pnfs_layout_range *l1, | 
|  | 342 | struct pnfs_layout_range *l2) | 
|  | 343 | { | 
|  | 344 | u64 start1 = l1->offset; | 
|  | 345 | u64 end1 = end_offset(start1, l1->length); | 
|  | 346 | u64 start2 = l2->offset; | 
|  | 347 | u64 end2 = end_offset(start2, l2->length); | 
|  | 348 |  | 
|  | 349 | return (end1 == NFS4_MAX_UINT64 || end1 > start2) && | 
|  | 350 | (end2 == NFS4_MAX_UINT64 || end2 > start1); | 
|  | 351 | } | 
|  | 352 |  | 
|  | 353 | static bool | 
|  | 354 | should_free_lseg(struct pnfs_layout_range *lseg_range, | 
|  | 355 | struct pnfs_layout_range *recall_range) | 
|  | 356 | { | 
|  | 357 | return (recall_range->iomode == IOMODE_ANY || | 
|  | 358 | lseg_range->iomode == recall_range->iomode) && | 
|  | 359 | lo_seg_intersecting(lseg_range, recall_range); | 
|  | 360 | } | 
|  | 361 |  | 
|  | 362 | /* Returns 1 if lseg is removed from list, 0 otherwise */ | 
|  | 363 | static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | 
|  | 364 | struct list_head *tmp_list) | 
|  | 365 | { | 
|  | 366 | int rv = 0; | 
|  | 367 |  | 
|  | 368 | if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { | 
|  | 369 | /* Remove the reference keeping the lseg in the | 
|  | 370 | * list.  It will now be removed when all | 
|  | 371 | * outstanding io is finished. | 
|  | 372 | */ | 
|  | 373 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | 
|  | 374 | atomic_read(&lseg->pls_refcount)); | 
|  | 375 | if (atomic_dec_and_test(&lseg->pls_refcount)) { | 
|  | 376 | put_lseg_common(lseg); | 
|  | 377 | list_add(&lseg->pls_list, tmp_list); | 
|  | 378 | rv = 1; | 
|  | 379 | } | 
|  | 380 | } | 
|  | 381 | return rv; | 
|  | 382 | } | 
|  | 383 |  | 
|  | 384 | /* Returns count of number of matching invalid lsegs remaining in list | 
|  | 385 | * after call. | 
|  | 386 | */ | 
|  | 387 | int | 
|  | 388 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 
|  | 389 | struct list_head *tmp_list, | 
|  | 390 | struct pnfs_layout_range *recall_range) | 
|  | 391 | { | 
|  | 392 | struct pnfs_layout_segment *lseg, *next; | 
|  | 393 | int invalid = 0, removed = 0; | 
|  | 394 |  | 
|  | 395 | dprintk("%s:Begin lo %p\n", __func__, lo); | 
|  | 396 |  | 
|  | 397 | if (list_empty(&lo->plh_segs)) { | 
|  | 398 | if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) | 
|  | 399 | put_layout_hdr_locked(lo); | 
|  | 400 | return 0; | 
|  | 401 | } | 
|  | 402 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 
|  | 403 | if (!recall_range || | 
|  | 404 | should_free_lseg(&lseg->pls_range, recall_range)) { | 
|  | 405 | dprintk("%s: freeing lseg %p iomode %d " | 
|  | 406 | "offset %llu length %llu\n", __func__, | 
|  | 407 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, | 
|  | 408 | lseg->pls_range.length); | 
|  | 409 | invalid++; | 
|  | 410 | removed += mark_lseg_invalid(lseg, tmp_list); | 
|  | 411 | } | 
|  | 412 | dprintk("%s:Return %i\n", __func__, invalid - removed); | 
|  | 413 | return invalid - removed; | 
|  | 414 | } | 
|  | 415 |  | 
|  | 416 | /* note free_me must contain lsegs from a single layout_hdr */ | 
|  | 417 | void | 
|  | 418 | pnfs_free_lseg_list(struct list_head *free_me) | 
|  | 419 | { | 
|  | 420 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 421 | struct pnfs_layout_hdr *lo; | 
|  | 422 |  | 
|  | 423 | if (list_empty(free_me)) | 
|  | 424 | return; | 
|  | 425 |  | 
|  | 426 | lo = list_first_entry(free_me, struct pnfs_layout_segment, | 
|  | 427 | pls_list)->pls_layout; | 
|  | 428 |  | 
|  | 429 | if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { | 
|  | 430 | struct nfs_client *clp; | 
|  | 431 |  | 
|  | 432 | clp = NFS_SERVER(lo->plh_inode)->nfs_client; | 
|  | 433 | spin_lock(&clp->cl_lock); | 
|  | 434 | list_del_init(&lo->plh_layouts); | 
|  | 435 | spin_unlock(&clp->cl_lock); | 
|  | 436 | } | 
|  | 437 | list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { | 
|  | 438 | list_del(&lseg->pls_list); | 
|  | 439 | free_lseg(lseg); | 
|  | 440 | } | 
|  | 441 | } | 
|  | 442 |  | 
|  | 443 | void | 
|  | 444 | pnfs_destroy_layout(struct nfs_inode *nfsi) | 
|  | 445 | { | 
|  | 446 | struct pnfs_layout_hdr *lo; | 
|  | 447 | LIST_HEAD(tmp_list); | 
|  | 448 |  | 
|  | 449 | spin_lock(&nfsi->vfs_inode.i_lock); | 
|  | 450 | lo = nfsi->layout; | 
|  | 451 | if (lo) { | 
|  | 452 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 
|  | 453 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 
|  | 454 | } | 
|  | 455 | spin_unlock(&nfsi->vfs_inode.i_lock); | 
|  | 456 | pnfs_free_lseg_list(&tmp_list); | 
|  | 457 | } | 
|  | 458 |  | 
|  | 459 | /* | 
|  | 460 | * Called by the state manger to remove all layouts established under an | 
|  | 461 | * expired lease. | 
|  | 462 | */ | 
|  | 463 | void | 
|  | 464 | pnfs_destroy_all_layouts(struct nfs_client *clp) | 
|  | 465 | { | 
|  | 466 | struct nfs_server *server; | 
|  | 467 | struct pnfs_layout_hdr *lo; | 
|  | 468 | LIST_HEAD(tmp_list); | 
|  | 469 |  | 
|  | 470 | nfs4_deviceid_mark_client_invalid(clp); | 
|  | 471 | nfs4_deviceid_purge_client(clp); | 
|  | 472 |  | 
|  | 473 | spin_lock(&clp->cl_lock); | 
|  | 474 | rcu_read_lock(); | 
|  | 475 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 
|  | 476 | if (!list_empty(&server->layouts)) | 
|  | 477 | list_splice_init(&server->layouts, &tmp_list); | 
|  | 478 | } | 
|  | 479 | rcu_read_unlock(); | 
|  | 480 | spin_unlock(&clp->cl_lock); | 
|  | 481 |  | 
|  | 482 | while (!list_empty(&tmp_list)) { | 
|  | 483 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, | 
|  | 484 | plh_layouts); | 
|  | 485 | dprintk("%s freeing layout for inode %lu\n", __func__, | 
|  | 486 | lo->plh_inode->i_ino); | 
|  | 487 | list_del_init(&lo->plh_layouts); | 
|  | 488 | pnfs_destroy_layout(NFS_I(lo->plh_inode)); | 
|  | 489 | } | 
|  | 490 | } | 
|  | 491 |  | 
|  | 492 | /* update lo->plh_stateid with new if is more recent */ | 
|  | 493 | void | 
|  | 494 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 
|  | 495 | bool update_barrier) | 
|  | 496 | { | 
|  | 497 | u32 oldseq, newseq; | 
|  | 498 |  | 
|  | 499 | oldseq = be32_to_cpu(lo->plh_stateid.seqid); | 
|  | 500 | newseq = be32_to_cpu(new->seqid); | 
|  | 501 | if ((int)(newseq - oldseq) > 0) { | 
|  | 502 | nfs4_stateid_copy(&lo->plh_stateid, new); | 
|  | 503 | if (update_barrier) { | 
|  | 504 | u32 new_barrier = be32_to_cpu(new->seqid); | 
|  | 505 |  | 
|  | 506 | if ((int)(new_barrier - lo->plh_barrier)) | 
|  | 507 | lo->plh_barrier = new_barrier; | 
|  | 508 | } else { | 
|  | 509 | /* Because of wraparound, we want to keep the barrier | 
|  | 510 | * "close" to the current seqids.  It needs to be | 
|  | 511 | * within 2**31 to count as "behind", so if it | 
|  | 512 | * gets too near that limit, give us a litle leeway | 
|  | 513 | * and bring it to within 2**30. | 
|  | 514 | * NOTE - and yes, this is all unsigned arithmetic. | 
|  | 515 | */ | 
|  | 516 | if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) | 
|  | 517 | lo->plh_barrier = newseq - (1 << 30); | 
|  | 518 | } | 
|  | 519 | } | 
|  | 520 | } | 
|  | 521 |  | 
|  | 522 | /* lget is set to 1 if called from inside send_layoutget call chain */ | 
|  | 523 | static bool | 
|  | 524 | pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, | 
|  | 525 | int lget) | 
|  | 526 | { | 
|  | 527 | if ((stateid) && | 
|  | 528 | (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) | 
|  | 529 | return true; | 
|  | 530 | return lo->plh_block_lgets || | 
|  | 531 | test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || | 
|  | 532 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 
|  | 533 | (list_empty(&lo->plh_segs) && | 
|  | 534 | (atomic_read(&lo->plh_outstanding) > lget)); | 
|  | 535 | } | 
|  | 536 |  | 
|  | 537 | int | 
|  | 538 | pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | 
|  | 539 | struct nfs4_state *open_state) | 
|  | 540 | { | 
|  | 541 | int status = 0; | 
|  | 542 |  | 
|  | 543 | dprintk("--> %s\n", __func__); | 
|  | 544 | spin_lock(&lo->plh_inode->i_lock); | 
|  | 545 | if (pnfs_layoutgets_blocked(lo, NULL, 1)) { | 
|  | 546 | status = -EAGAIN; | 
|  | 547 | } else if (list_empty(&lo->plh_segs)) { | 
|  | 548 | int seq; | 
|  | 549 |  | 
|  | 550 | do { | 
|  | 551 | seq = read_seqbegin(&open_state->seqlock); | 
|  | 552 | nfs4_stateid_copy(dst, &open_state->stateid); | 
|  | 553 | } while (read_seqretry(&open_state->seqlock, seq)); | 
|  | 554 | } else | 
|  | 555 | nfs4_stateid_copy(dst, &lo->plh_stateid); | 
|  | 556 | spin_unlock(&lo->plh_inode->i_lock); | 
|  | 557 | dprintk("<-- %s\n", __func__); | 
|  | 558 | return status; | 
|  | 559 | } | 
|  | 560 |  | 
|  | 561 | /* | 
|  | 562 | * Get layout from server. | 
|  | 563 | *    for now, assume that whole file layouts are requested. | 
|  | 564 | *    arg->offset: 0 | 
|  | 565 | *    arg->length: all ones | 
|  | 566 | */ | 
|  | 567 | static struct pnfs_layout_segment * | 
|  | 568 | send_layoutget(struct pnfs_layout_hdr *lo, | 
|  | 569 | struct nfs_open_context *ctx, | 
|  | 570 | struct pnfs_layout_range *range, | 
|  | 571 | gfp_t gfp_flags) | 
|  | 572 | { | 
|  | 573 | struct inode *ino = lo->plh_inode; | 
|  | 574 | struct nfs_server *server = NFS_SERVER(ino); | 
|  | 575 | struct nfs4_layoutget *lgp; | 
|  | 576 | struct pnfs_layout_segment *lseg = NULL; | 
|  | 577 |  | 
|  | 578 | dprintk("--> %s\n", __func__); | 
|  | 579 |  | 
|  | 580 | BUG_ON(ctx == NULL); | 
|  | 581 | lgp = kzalloc(sizeof(*lgp), gfp_flags); | 
|  | 582 | if (lgp == NULL) | 
|  | 583 | return NULL; | 
|  | 584 |  | 
|  | 585 | lgp->args.minlength = PAGE_CACHE_SIZE; | 
|  | 586 | if (lgp->args.minlength > range->length) | 
|  | 587 | lgp->args.minlength = range->length; | 
|  | 588 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 
|  | 589 | lgp->args.range = *range; | 
|  | 590 | lgp->args.type = server->pnfs_curr_ld->id; | 
|  | 591 | lgp->args.inode = ino; | 
|  | 592 | lgp->args.ctx = get_nfs_open_context(ctx); | 
|  | 593 | lgp->lsegpp = &lseg; | 
|  | 594 | lgp->gfp_flags = gfp_flags; | 
|  | 595 |  | 
|  | 596 | /* Synchronously retrieve layout information from server and | 
|  | 597 | * store in lseg. | 
|  | 598 | */ | 
|  | 599 | nfs4_proc_layoutget(lgp, gfp_flags); | 
|  | 600 | if (!lseg) { | 
|  | 601 | /* remember that LAYOUTGET failed and suspend trying */ | 
|  | 602 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); | 
|  | 603 | } | 
|  | 604 |  | 
|  | 605 | return lseg; | 
|  | 606 | } | 
|  | 607 |  | 
|  | 608 | /* Initiates a LAYOUTRETURN(FILE) */ | 
|  | 609 | int | 
|  | 610 | _pnfs_return_layout(struct inode *ino) | 
|  | 611 | { | 
|  | 612 | struct pnfs_layout_hdr *lo = NULL; | 
|  | 613 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 614 | LIST_HEAD(tmp_list); | 
|  | 615 | struct nfs4_layoutreturn *lrp; | 
|  | 616 | nfs4_stateid stateid; | 
|  | 617 | int status = 0; | 
|  | 618 |  | 
|  | 619 | dprintk("--> %s\n", __func__); | 
|  | 620 |  | 
|  | 621 | spin_lock(&ino->i_lock); | 
|  | 622 | lo = nfsi->layout; | 
|  | 623 | if (!lo) { | 
|  | 624 | spin_unlock(&ino->i_lock); | 
|  | 625 | dprintk("%s: no layout to return\n", __func__); | 
|  | 626 | return status; | 
|  | 627 | } | 
|  | 628 | stateid = nfsi->layout->plh_stateid; | 
|  | 629 | /* Reference matched in nfs4_layoutreturn_release */ | 
|  | 630 | get_layout_hdr(lo); | 
|  | 631 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 
|  | 632 | lo->plh_block_lgets++; | 
|  | 633 | spin_unlock(&ino->i_lock); | 
|  | 634 | pnfs_free_lseg_list(&tmp_list); | 
|  | 635 |  | 
|  | 636 | WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); | 
|  | 637 |  | 
|  | 638 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | 
|  | 639 | if (unlikely(lrp == NULL)) { | 
|  | 640 | status = -ENOMEM; | 
|  | 641 | set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); | 
|  | 642 | set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); | 
|  | 643 | put_layout_hdr(lo); | 
|  | 644 | goto out; | 
|  | 645 | } | 
|  | 646 |  | 
|  | 647 | lrp->args.stateid = stateid; | 
|  | 648 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | 
|  | 649 | lrp->args.inode = ino; | 
|  | 650 | lrp->args.layout = lo; | 
|  | 651 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 
|  | 652 |  | 
|  | 653 | status = nfs4_proc_layoutreturn(lrp); | 
|  | 654 | out: | 
|  | 655 | dprintk("<-- %s status: %d\n", __func__, status); | 
|  | 656 | return status; | 
|  | 657 | } | 
|  | 658 |  | 
|  | 659 | bool pnfs_roc(struct inode *ino) | 
|  | 660 | { | 
|  | 661 | struct pnfs_layout_hdr *lo; | 
|  | 662 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 663 | LIST_HEAD(tmp_list); | 
|  | 664 | bool found = false; | 
|  | 665 |  | 
|  | 666 | spin_lock(&ino->i_lock); | 
|  | 667 | lo = NFS_I(ino)->layout; | 
|  | 668 | if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || | 
|  | 669 | test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) | 
|  | 670 | goto out_nolayout; | 
|  | 671 | list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) | 
|  | 672 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 
|  | 673 | mark_lseg_invalid(lseg, &tmp_list); | 
|  | 674 | found = true; | 
|  | 675 | } | 
|  | 676 | if (!found) | 
|  | 677 | goto out_nolayout; | 
|  | 678 | lo->plh_block_lgets++; | 
|  | 679 | get_layout_hdr(lo); /* matched in pnfs_roc_release */ | 
|  | 680 | spin_unlock(&ino->i_lock); | 
|  | 681 | pnfs_free_lseg_list(&tmp_list); | 
|  | 682 | return true; | 
|  | 683 |  | 
|  | 684 | out_nolayout: | 
|  | 685 | spin_unlock(&ino->i_lock); | 
|  | 686 | return false; | 
|  | 687 | } | 
|  | 688 |  | 
|  | 689 | void pnfs_roc_release(struct inode *ino) | 
|  | 690 | { | 
|  | 691 | struct pnfs_layout_hdr *lo; | 
|  | 692 |  | 
|  | 693 | spin_lock(&ino->i_lock); | 
|  | 694 | lo = NFS_I(ino)->layout; | 
|  | 695 | lo->plh_block_lgets--; | 
|  | 696 | put_layout_hdr_locked(lo); | 
|  | 697 | spin_unlock(&ino->i_lock); | 
|  | 698 | } | 
|  | 699 |  | 
|  | 700 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) | 
|  | 701 | { | 
|  | 702 | struct pnfs_layout_hdr *lo; | 
|  | 703 |  | 
|  | 704 | spin_lock(&ino->i_lock); | 
|  | 705 | lo = NFS_I(ino)->layout; | 
|  | 706 | if ((int)(barrier - lo->plh_barrier) > 0) | 
|  | 707 | lo->plh_barrier = barrier; | 
|  | 708 | spin_unlock(&ino->i_lock); | 
|  | 709 | } | 
|  | 710 |  | 
|  | 711 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | 
|  | 712 | { | 
|  | 713 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 714 | struct pnfs_layout_segment *lseg; | 
|  | 715 | bool found = false; | 
|  | 716 |  | 
|  | 717 | spin_lock(&ino->i_lock); | 
|  | 718 | list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) | 
|  | 719 | if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { | 
|  | 720 | found = true; | 
|  | 721 | break; | 
|  | 722 | } | 
|  | 723 | if (!found) { | 
|  | 724 | struct pnfs_layout_hdr *lo = nfsi->layout; | 
|  | 725 | u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); | 
|  | 726 |  | 
|  | 727 | /* Since close does not return a layout stateid for use as | 
|  | 728 | * a barrier, we choose the worst-case barrier. | 
|  | 729 | */ | 
|  | 730 | *barrier = current_seqid + atomic_read(&lo->plh_outstanding); | 
|  | 731 | } | 
|  | 732 | spin_unlock(&ino->i_lock); | 
|  | 733 | return found; | 
|  | 734 | } | 
|  | 735 |  | 
|  | 736 | /* | 
|  | 737 | * Compare two layout segments for sorting into layout cache. | 
|  | 738 | * We want to preferentially return RW over RO layouts, so ensure those | 
|  | 739 | * are seen first. | 
|  | 740 | */ | 
|  | 741 | static s64 | 
|  | 742 | cmp_layout(struct pnfs_layout_range *l1, | 
|  | 743 | struct pnfs_layout_range *l2) | 
|  | 744 | { | 
|  | 745 | s64 d; | 
|  | 746 |  | 
|  | 747 | /* high offset > low offset */ | 
|  | 748 | d = l1->offset - l2->offset; | 
|  | 749 | if (d) | 
|  | 750 | return d; | 
|  | 751 |  | 
|  | 752 | /* short length > long length */ | 
|  | 753 | d = l2->length - l1->length; | 
|  | 754 | if (d) | 
|  | 755 | return d; | 
|  | 756 |  | 
|  | 757 | /* read > read/write */ | 
|  | 758 | return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); | 
|  | 759 | } | 
|  | 760 |  | 
|  | 761 | static void | 
|  | 762 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | 
|  | 763 | struct pnfs_layout_segment *lseg) | 
|  | 764 | { | 
|  | 765 | struct pnfs_layout_segment *lp; | 
|  | 766 |  | 
|  | 767 | dprintk("%s:Begin\n", __func__); | 
|  | 768 |  | 
|  | 769 | assert_spin_locked(&lo->plh_inode->i_lock); | 
|  | 770 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 
|  | 771 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) | 
|  | 772 | continue; | 
|  | 773 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 
|  | 774 | dprintk("%s: inserted lseg %p " | 
|  | 775 | "iomode %d offset %llu length %llu before " | 
|  | 776 | "lp %p iomode %d offset %llu length %llu\n", | 
|  | 777 | __func__, lseg, lseg->pls_range.iomode, | 
|  | 778 | lseg->pls_range.offset, lseg->pls_range.length, | 
|  | 779 | lp, lp->pls_range.iomode, lp->pls_range.offset, | 
|  | 780 | lp->pls_range.length); | 
|  | 781 | goto out; | 
|  | 782 | } | 
|  | 783 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | 
|  | 784 | dprintk("%s: inserted lseg %p " | 
|  | 785 | "iomode %d offset %llu length %llu at tail\n", | 
|  | 786 | __func__, lseg, lseg->pls_range.iomode, | 
|  | 787 | lseg->pls_range.offset, lseg->pls_range.length); | 
|  | 788 | out: | 
|  | 789 | get_layout_hdr(lo); | 
|  | 790 |  | 
|  | 791 | dprintk("%s:Return\n", __func__); | 
|  | 792 | } | 
|  | 793 |  | 
|  | 794 | static struct pnfs_layout_hdr * | 
|  | 795 | alloc_init_layout_hdr(struct inode *ino, | 
|  | 796 | struct nfs_open_context *ctx, | 
|  | 797 | gfp_t gfp_flags) | 
|  | 798 | { | 
|  | 799 | struct pnfs_layout_hdr *lo; | 
|  | 800 |  | 
|  | 801 | lo = pnfs_alloc_layout_hdr(ino, gfp_flags); | 
|  | 802 | if (!lo) | 
|  | 803 | return NULL; | 
|  | 804 | atomic_set(&lo->plh_refcount, 1); | 
|  | 805 | INIT_LIST_HEAD(&lo->plh_layouts); | 
|  | 806 | INIT_LIST_HEAD(&lo->plh_segs); | 
|  | 807 | INIT_LIST_HEAD(&lo->plh_bulk_recall); | 
|  | 808 | lo->plh_inode = ino; | 
|  | 809 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | 
|  | 810 | return lo; | 
|  | 811 | } | 
|  | 812 |  | 
|  | 813 | static struct pnfs_layout_hdr * | 
|  | 814 | pnfs_find_alloc_layout(struct inode *ino, | 
|  | 815 | struct nfs_open_context *ctx, | 
|  | 816 | gfp_t gfp_flags) | 
|  | 817 | { | 
|  | 818 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 819 | struct pnfs_layout_hdr *new = NULL; | 
|  | 820 |  | 
|  | 821 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | 
|  | 822 |  | 
|  | 823 | assert_spin_locked(&ino->i_lock); | 
|  | 824 | if (nfsi->layout) { | 
|  | 825 | if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) | 
|  | 826 | return NULL; | 
|  | 827 | else | 
|  | 828 | return nfsi->layout; | 
|  | 829 | } | 
|  | 830 | spin_unlock(&ino->i_lock); | 
|  | 831 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); | 
|  | 832 | spin_lock(&ino->i_lock); | 
|  | 833 |  | 
|  | 834 | if (likely(nfsi->layout == NULL))	/* Won the race? */ | 
|  | 835 | nfsi->layout = new; | 
|  | 836 | else | 
|  | 837 | pnfs_free_layout_hdr(new); | 
|  | 838 | return nfsi->layout; | 
|  | 839 | } | 
|  | 840 |  | 
|  | 841 | /* | 
|  | 842 | * iomode matching rules: | 
|  | 843 | * iomode	lseg	match | 
|  | 844 | * -----	-----	----- | 
|  | 845 | * ANY		READ	true | 
|  | 846 | * ANY		RW	true | 
|  | 847 | * RW		READ	false | 
|  | 848 | * RW		RW	true | 
|  | 849 | * READ		READ	true | 
|  | 850 | * READ		RW	true | 
|  | 851 | */ | 
|  | 852 | static int | 
|  | 853 | is_matching_lseg(struct pnfs_layout_range *ls_range, | 
|  | 854 | struct pnfs_layout_range *range) | 
|  | 855 | { | 
|  | 856 | struct pnfs_layout_range range1; | 
|  | 857 |  | 
|  | 858 | if ((range->iomode == IOMODE_RW && | 
|  | 859 | ls_range->iomode != IOMODE_RW) || | 
|  | 860 | !lo_seg_intersecting(ls_range, range)) | 
|  | 861 | return 0; | 
|  | 862 |  | 
|  | 863 | /* range1 covers only the first byte in the range */ | 
|  | 864 | range1 = *range; | 
|  | 865 | range1.length = 1; | 
|  | 866 | return lo_seg_contained(ls_range, &range1); | 
|  | 867 | } | 
|  | 868 |  | 
|  | 869 | /* | 
|  | 870 | * lookup range in layout | 
|  | 871 | */ | 
|  | 872 | static struct pnfs_layout_segment * | 
|  | 873 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, | 
|  | 874 | struct pnfs_layout_range *range) | 
|  | 875 | { | 
|  | 876 | struct pnfs_layout_segment *lseg, *ret = NULL; | 
|  | 877 |  | 
|  | 878 | dprintk("%s:Begin\n", __func__); | 
|  | 879 |  | 
|  | 880 | assert_spin_locked(&lo->plh_inode->i_lock); | 
|  | 881 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 
|  | 882 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 
|  | 883 | is_matching_lseg(&lseg->pls_range, range)) { | 
|  | 884 | ret = get_lseg(lseg); | 
|  | 885 | break; | 
|  | 886 | } | 
|  | 887 | if (lseg->pls_range.offset > range->offset) | 
|  | 888 | break; | 
|  | 889 | } | 
|  | 890 |  | 
|  | 891 | dprintk("%s:Return lseg %p ref %d\n", | 
|  | 892 | __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); | 
|  | 893 | return ret; | 
|  | 894 | } | 
|  | 895 |  | 
|  | 896 | /* | 
|  | 897 | * Layout segment is retreived from the server if not cached. | 
|  | 898 | * The appropriate layout segment is referenced and returned to the caller. | 
|  | 899 | */ | 
|  | 900 | struct pnfs_layout_segment * | 
|  | 901 | pnfs_update_layout(struct inode *ino, | 
|  | 902 | struct nfs_open_context *ctx, | 
|  | 903 | loff_t pos, | 
|  | 904 | u64 count, | 
|  | 905 | enum pnfs_iomode iomode, | 
|  | 906 | gfp_t gfp_flags) | 
|  | 907 | { | 
|  | 908 | struct pnfs_layout_range arg = { | 
|  | 909 | .iomode = iomode, | 
|  | 910 | .offset = pos, | 
|  | 911 | .length = count, | 
|  | 912 | }; | 
|  | 913 | unsigned pg_offset; | 
|  | 914 | struct nfs_inode *nfsi = NFS_I(ino); | 
|  | 915 | struct nfs_server *server = NFS_SERVER(ino); | 
|  | 916 | struct nfs_client *clp = server->nfs_client; | 
|  | 917 | struct pnfs_layout_hdr *lo; | 
|  | 918 | struct pnfs_layout_segment *lseg = NULL; | 
|  | 919 | bool first = false; | 
|  | 920 |  | 
|  | 921 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 
|  | 922 | return NULL; | 
|  | 923 | spin_lock(&ino->i_lock); | 
|  | 924 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); | 
|  | 925 | if (lo == NULL) { | 
|  | 926 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | 
|  | 927 | goto out_unlock; | 
|  | 928 | } | 
|  | 929 |  | 
|  | 930 | /* Do we even need to bother with this? */ | 
|  | 931 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 
|  | 932 | dprintk("%s matches recall, use MDS\n", __func__); | 
|  | 933 | goto out_unlock; | 
|  | 934 | } | 
|  | 935 |  | 
|  | 936 | /* if LAYOUTGET already failed once we don't try again */ | 
|  | 937 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) | 
|  | 938 | goto out_unlock; | 
|  | 939 |  | 
|  | 940 | /* Check to see if the layout for the given range already exists */ | 
|  | 941 | lseg = pnfs_find_lseg(lo, &arg); | 
|  | 942 | if (lseg) | 
|  | 943 | goto out_unlock; | 
|  | 944 |  | 
|  | 945 | if (pnfs_layoutgets_blocked(lo, NULL, 0)) | 
|  | 946 | goto out_unlock; | 
|  | 947 | atomic_inc(&lo->plh_outstanding); | 
|  | 948 |  | 
|  | 949 | get_layout_hdr(lo); | 
|  | 950 | if (list_empty(&lo->plh_segs)) | 
|  | 951 | first = true; | 
|  | 952 | spin_unlock(&ino->i_lock); | 
|  | 953 | if (first) { | 
|  | 954 | /* The lo must be on the clp list if there is any | 
|  | 955 | * chance of a CB_LAYOUTRECALL(FILE) coming in. | 
|  | 956 | */ | 
|  | 957 | spin_lock(&clp->cl_lock); | 
|  | 958 | BUG_ON(!list_empty(&lo->plh_layouts)); | 
|  | 959 | list_add_tail(&lo->plh_layouts, &server->layouts); | 
|  | 960 | spin_unlock(&clp->cl_lock); | 
|  | 961 | } | 
|  | 962 |  | 
|  | 963 | pg_offset = arg.offset & ~PAGE_CACHE_MASK; | 
|  | 964 | if (pg_offset) { | 
|  | 965 | arg.offset -= pg_offset; | 
|  | 966 | arg.length += pg_offset; | 
|  | 967 | } | 
|  | 968 | if (arg.length != NFS4_MAX_UINT64) | 
|  | 969 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 
|  | 970 |  | 
|  | 971 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 
|  | 972 | if (!lseg && first) { | 
|  | 973 | spin_lock(&clp->cl_lock); | 
|  | 974 | list_del_init(&lo->plh_layouts); | 
|  | 975 | spin_unlock(&clp->cl_lock); | 
|  | 976 | } | 
|  | 977 | atomic_dec(&lo->plh_outstanding); | 
|  | 978 | put_layout_hdr(lo); | 
|  | 979 | out: | 
|  | 980 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, | 
|  | 981 | nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); | 
|  | 982 | return lseg; | 
|  | 983 | out_unlock: | 
|  | 984 | spin_unlock(&ino->i_lock); | 
|  | 985 | goto out; | 
|  | 986 | } | 
|  | 987 | EXPORT_SYMBOL_GPL(pnfs_update_layout); | 
|  | 988 |  | 
|  | 989 | int | 
|  | 990 | pnfs_layout_process(struct nfs4_layoutget *lgp) | 
|  | 991 | { | 
|  | 992 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | 
|  | 993 | struct nfs4_layoutget_res *res = &lgp->res; | 
|  | 994 | struct pnfs_layout_segment *lseg; | 
|  | 995 | struct inode *ino = lo->plh_inode; | 
|  | 996 | int status = 0; | 
|  | 997 |  | 
|  | 998 | /* Inject layout blob into I/O device driver */ | 
|  | 999 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); | 
|  | 1000 | if (!lseg || IS_ERR(lseg)) { | 
|  | 1001 | if (!lseg) | 
|  | 1002 | status = -ENOMEM; | 
|  | 1003 | else | 
|  | 1004 | status = PTR_ERR(lseg); | 
|  | 1005 | dprintk("%s: Could not allocate layout: error %d\n", | 
|  | 1006 | __func__, status); | 
|  | 1007 | goto out; | 
|  | 1008 | } | 
|  | 1009 |  | 
|  | 1010 | spin_lock(&ino->i_lock); | 
|  | 1011 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 
|  | 1012 | dprintk("%s forget reply due to recall\n", __func__); | 
|  | 1013 | goto out_forget_reply; | 
|  | 1014 | } | 
|  | 1015 |  | 
|  | 1016 | if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { | 
|  | 1017 | dprintk("%s forget reply due to state\n", __func__); | 
|  | 1018 | goto out_forget_reply; | 
|  | 1019 | } | 
|  | 1020 | init_lseg(lo, lseg); | 
|  | 1021 | lseg->pls_range = res->range; | 
|  | 1022 | *lgp->lsegpp = get_lseg(lseg); | 
|  | 1023 | pnfs_insert_layout(lo, lseg); | 
|  | 1024 |  | 
|  | 1025 | if (res->return_on_close) { | 
|  | 1026 | set_bit(NFS_LSEG_ROC, &lseg->pls_flags); | 
|  | 1027 | set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); | 
|  | 1028 | } | 
|  | 1029 |  | 
|  | 1030 | /* Done processing layoutget. Set the layout stateid */ | 
|  | 1031 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 
|  | 1032 | spin_unlock(&ino->i_lock); | 
|  | 1033 | out: | 
|  | 1034 | return status; | 
|  | 1035 |  | 
|  | 1036 | out_forget_reply: | 
|  | 1037 | spin_unlock(&ino->i_lock); | 
|  | 1038 | lseg->pls_layout = lo; | 
|  | 1039 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | 
|  | 1040 | goto out; | 
|  | 1041 | } | 
|  | 1042 |  | 
|  | 1043 | void | 
|  | 1044 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 
|  | 1045 | { | 
|  | 1046 | BUG_ON(pgio->pg_lseg != NULL); | 
|  | 1047 |  | 
|  | 1048 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 
|  | 1049 | req->wb_context, | 
|  | 1050 | req_offset(req), | 
|  | 1051 | req->wb_bytes, | 
|  | 1052 | IOMODE_READ, | 
|  | 1053 | GFP_KERNEL); | 
|  | 1054 | /* If no lseg, fall back to read through mds */ | 
|  | 1055 | if (pgio->pg_lseg == NULL) | 
|  | 1056 | nfs_pageio_reset_read_mds(pgio); | 
|  | 1057 |  | 
|  | 1058 | } | 
|  | 1059 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); | 
|  | 1060 |  | 
|  | 1061 | void | 
|  | 1062 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 
|  | 1063 | { | 
|  | 1064 | BUG_ON(pgio->pg_lseg != NULL); | 
|  | 1065 |  | 
|  | 1066 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 
|  | 1067 | req->wb_context, | 
|  | 1068 | req_offset(req), | 
|  | 1069 | req->wb_bytes, | 
|  | 1070 | IOMODE_RW, | 
|  | 1071 | GFP_NOFS); | 
|  | 1072 | /* If no lseg, fall back to write through mds */ | 
|  | 1073 | if (pgio->pg_lseg == NULL) | 
|  | 1074 | nfs_pageio_reset_write_mds(pgio); | 
|  | 1075 | } | 
|  | 1076 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 
|  | 1077 |  | 
|  | 1078 | bool | 
|  | 1079 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 
|  | 1080 | { | 
|  | 1081 | struct nfs_server *server = NFS_SERVER(inode); | 
|  | 1082 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | 
|  | 1083 |  | 
|  | 1084 | if (ld == NULL) | 
|  | 1085 | return false; | 
|  | 1086 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); | 
|  | 1087 | return true; | 
|  | 1088 | } | 
|  | 1089 |  | 
|  | 1090 | bool | 
|  | 1091 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) | 
|  | 1092 | { | 
|  | 1093 | struct nfs_server *server = NFS_SERVER(inode); | 
|  | 1094 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | 
|  | 1095 |  | 
|  | 1096 | if (ld == NULL) | 
|  | 1097 | return false; | 
|  | 1098 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); | 
|  | 1099 | return true; | 
|  | 1100 | } | 
|  | 1101 |  | 
|  | 1102 | bool | 
|  | 1103 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 
|  | 1104 | struct nfs_page *req) | 
|  | 1105 | { | 
|  | 1106 | if (pgio->pg_lseg == NULL) | 
|  | 1107 | return nfs_generic_pg_test(pgio, prev, req); | 
|  | 1108 |  | 
|  | 1109 | /* | 
|  | 1110 | * Test if a nfs_page is fully contained in the pnfs_layout_range. | 
|  | 1111 | * Note that this test makes several assumptions: | 
|  | 1112 | * - that the previous nfs_page in the struct nfs_pageio_descriptor | 
|  | 1113 | *   is known to lie within the range. | 
|  | 1114 | *   - that the nfs_page being tested is known to be contiguous with the | 
|  | 1115 | *   previous nfs_page. | 
|  | 1116 | *   - Layout ranges are page aligned, so we only have to test the | 
|  | 1117 | *   start offset of the request. | 
|  | 1118 | * | 
|  | 1119 | * Please also note that 'end_offset' is actually the offset of the | 
|  | 1120 | * first byte that lies outside the pnfs_layout_range. FIXME? | 
|  | 1121 | * | 
|  | 1122 | */ | 
|  | 1123 | return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, | 
|  | 1124 | pgio->pg_lseg->pls_range.length); | 
|  | 1125 | } | 
|  | 1126 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 
|  | 1127 |  | 
|  | 1128 | static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) | 
|  | 1129 | { | 
|  | 1130 | struct nfs_pageio_descriptor pgio; | 
|  | 1131 | LIST_HEAD(failed); | 
|  | 1132 |  | 
|  | 1133 | /* Resend all requests through the MDS */ | 
|  | 1134 | nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); | 
|  | 1135 | while (!list_empty(head)) { | 
|  | 1136 | struct nfs_page *req = nfs_list_entry(head->next); | 
|  | 1137 |  | 
|  | 1138 | nfs_list_remove_request(req); | 
|  | 1139 | if (!nfs_pageio_add_request(&pgio, req)) | 
|  | 1140 | nfs_list_add_request(req, &failed); | 
|  | 1141 | } | 
|  | 1142 | nfs_pageio_complete(&pgio); | 
|  | 1143 |  | 
|  | 1144 | if (!list_empty(&failed)) { | 
|  | 1145 | /* For some reason our attempt to resend pages. Mark the | 
|  | 1146 | * overall send request as having failed, and let | 
|  | 1147 | * nfs_writeback_release_full deal with the error. | 
|  | 1148 | */ | 
|  | 1149 | list_move(&failed, head); | 
|  | 1150 | return -EIO; | 
|  | 1151 | } | 
|  | 1152 | return 0; | 
|  | 1153 | } | 
|  | 1154 |  | 
|  | 1155 | /* | 
|  | 1156 | * Called by non rpc-based layout drivers | 
|  | 1157 | */ | 
|  | 1158 | void pnfs_ld_write_done(struct nfs_write_data *data) | 
|  | 1159 | { | 
|  | 1160 | if (likely(!data->pnfs_error)) { | 
|  | 1161 | pnfs_set_layoutcommit(data); | 
|  | 1162 | data->mds_ops->rpc_call_done(&data->task, data); | 
|  | 1163 | } else { | 
|  | 1164 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 
|  | 1165 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | 
|  | 1166 | PNFS_LAYOUTRET_ON_ERROR) { | 
|  | 1167 | /* Don't lo_commit on error, Server will needs to | 
|  | 1168 | * preform a file recovery. | 
|  | 1169 | */ | 
|  | 1170 | clear_bit(NFS_INO_LAYOUTCOMMIT, | 
|  | 1171 | &NFS_I(data->inode)->flags); | 
|  | 1172 | pnfs_return_layout(data->inode); | 
|  | 1173 | } | 
|  | 1174 | data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); | 
|  | 1175 | } | 
|  | 1176 | put_lseg(data->lseg); | 
|  | 1177 | data->mds_ops->rpc_release(data); | 
|  | 1178 | } | 
|  | 1179 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 
|  | 1180 |  | 
|  | 1181 | static void | 
|  | 1182 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 
|  | 1183 | struct nfs_write_data *data) | 
|  | 1184 | { | 
|  | 1185 | list_splice_tail_init(&data->pages, &desc->pg_list); | 
|  | 1186 | if (data->req && list_empty(&data->req->wb_list)) | 
|  | 1187 | nfs_list_add_request(data->req, &desc->pg_list); | 
|  | 1188 | nfs_pageio_reset_write_mds(desc); | 
|  | 1189 | desc->pg_recoalesce = 1; | 
|  | 1190 | put_lseg(data->lseg); | 
|  | 1191 | nfs_writedata_release(data); | 
|  | 1192 | } | 
|  | 1193 |  | 
|  | 1194 | static enum pnfs_try_status | 
|  | 1195 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 
|  | 1196 | const struct rpc_call_ops *call_ops, | 
|  | 1197 | struct pnfs_layout_segment *lseg, | 
|  | 1198 | int how) | 
|  | 1199 | { | 
|  | 1200 | struct inode *inode = wdata->inode; | 
|  | 1201 | enum pnfs_try_status trypnfs; | 
|  | 1202 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 1203 |  | 
|  | 1204 | wdata->mds_ops = call_ops; | 
|  | 1205 | wdata->lseg = get_lseg(lseg); | 
|  | 1206 |  | 
|  | 1207 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, | 
|  | 1208 | inode->i_ino, wdata->args.count, wdata->args.offset, how); | 
|  | 1209 |  | 
|  | 1210 | trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); | 
|  | 1211 | if (trypnfs == PNFS_NOT_ATTEMPTED) { | 
|  | 1212 | put_lseg(wdata->lseg); | 
|  | 1213 | wdata->lseg = NULL; | 
|  | 1214 | } else | 
|  | 1215 | nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); | 
|  | 1216 |  | 
|  | 1217 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 
|  | 1218 | return trypnfs; | 
|  | 1219 | } | 
|  | 1220 |  | 
|  | 1221 | static void | 
|  | 1222 | pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) | 
|  | 1223 | { | 
|  | 1224 | struct nfs_write_data *data; | 
|  | 1225 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 
|  | 1226 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 
|  | 1227 |  | 
|  | 1228 | desc->pg_lseg = NULL; | 
|  | 1229 | while (!list_empty(head)) { | 
|  | 1230 | enum pnfs_try_status trypnfs; | 
|  | 1231 |  | 
|  | 1232 | data = list_entry(head->next, struct nfs_write_data, list); | 
|  | 1233 | list_del_init(&data->list); | 
|  | 1234 |  | 
|  | 1235 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | 
|  | 1236 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 
|  | 1237 | pnfs_write_through_mds(desc, data); | 
|  | 1238 | } | 
|  | 1239 | put_lseg(lseg); | 
|  | 1240 | } | 
|  | 1241 |  | 
|  | 1242 | int | 
|  | 1243 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 
|  | 1244 | { | 
|  | 1245 | LIST_HEAD(head); | 
|  | 1246 | int ret; | 
|  | 1247 |  | 
|  | 1248 | ret = nfs_generic_flush(desc, &head); | 
|  | 1249 | if (ret != 0) { | 
|  | 1250 | put_lseg(desc->pg_lseg); | 
|  | 1251 | desc->pg_lseg = NULL; | 
|  | 1252 | return ret; | 
|  | 1253 | } | 
|  | 1254 | pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); | 
|  | 1255 | return 0; | 
|  | 1256 | } | 
|  | 1257 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | 
|  | 1258 |  | 
|  | 1259 | static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | 
|  | 1260 | { | 
|  | 1261 | struct nfs_pageio_descriptor pgio; | 
|  | 1262 |  | 
|  | 1263 | put_lseg(data->lseg); | 
|  | 1264 | data->lseg = NULL; | 
|  | 1265 | dprintk("pnfs write error = %d\n", data->pnfs_error); | 
|  | 1266 | if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & | 
|  | 1267 | PNFS_LAYOUTRET_ON_ERROR) | 
|  | 1268 | pnfs_return_layout(data->inode); | 
|  | 1269 |  | 
|  | 1270 | nfs_pageio_init_read_mds(&pgio, data->inode); | 
|  | 1271 |  | 
|  | 1272 | while (!list_empty(&data->pages)) { | 
|  | 1273 | struct nfs_page *req = nfs_list_entry(data->pages.next); | 
|  | 1274 |  | 
|  | 1275 | nfs_list_remove_request(req); | 
|  | 1276 | nfs_pageio_add_request(&pgio, req); | 
|  | 1277 | } | 
|  | 1278 | nfs_pageio_complete(&pgio); | 
|  | 1279 | } | 
|  | 1280 |  | 
|  | 1281 | /* | 
|  | 1282 | * Called by non rpc-based layout drivers | 
|  | 1283 | */ | 
|  | 1284 | void pnfs_ld_read_done(struct nfs_read_data *data) | 
|  | 1285 | { | 
|  | 1286 | if (likely(!data->pnfs_error)) { | 
|  | 1287 | __nfs4_read_done_cb(data); | 
|  | 1288 | data->mds_ops->rpc_call_done(&data->task, data); | 
|  | 1289 | } else | 
|  | 1290 | pnfs_ld_handle_read_error(data); | 
|  | 1291 | put_lseg(data->lseg); | 
|  | 1292 | data->mds_ops->rpc_release(data); | 
|  | 1293 | } | 
|  | 1294 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 
|  | 1295 |  | 
|  | 1296 | static void | 
|  | 1297 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 
|  | 1298 | struct nfs_read_data *data) | 
|  | 1299 | { | 
|  | 1300 | list_splice_tail_init(&data->pages, &desc->pg_list); | 
|  | 1301 | if (data->req && list_empty(&data->req->wb_list)) | 
|  | 1302 | nfs_list_add_request(data->req, &desc->pg_list); | 
|  | 1303 | nfs_pageio_reset_read_mds(desc); | 
|  | 1304 | desc->pg_recoalesce = 1; | 
|  | 1305 | nfs_readdata_release(data); | 
|  | 1306 | } | 
|  | 1307 |  | 
|  | 1308 | /* | 
|  | 1309 | * Call the appropriate parallel I/O subsystem read function. | 
|  | 1310 | */ | 
|  | 1311 | static enum pnfs_try_status | 
|  | 1312 | pnfs_try_to_read_data(struct nfs_read_data *rdata, | 
|  | 1313 | const struct rpc_call_ops *call_ops, | 
|  | 1314 | struct pnfs_layout_segment *lseg) | 
|  | 1315 | { | 
|  | 1316 | struct inode *inode = rdata->inode; | 
|  | 1317 | struct nfs_server *nfss = NFS_SERVER(inode); | 
|  | 1318 | enum pnfs_try_status trypnfs; | 
|  | 1319 |  | 
|  | 1320 | rdata->mds_ops = call_ops; | 
|  | 1321 | rdata->lseg = get_lseg(lseg); | 
|  | 1322 |  | 
|  | 1323 | dprintk("%s: Reading ino:%lu %u@%llu\n", | 
|  | 1324 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); | 
|  | 1325 |  | 
|  | 1326 | trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); | 
|  | 1327 | if (trypnfs == PNFS_NOT_ATTEMPTED) { | 
|  | 1328 | put_lseg(rdata->lseg); | 
|  | 1329 | rdata->lseg = NULL; | 
|  | 1330 | } else { | 
|  | 1331 | nfs_inc_stats(inode, NFSIOS_PNFS_READ); | 
|  | 1332 | } | 
|  | 1333 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 
|  | 1334 | return trypnfs; | 
|  | 1335 | } | 
|  | 1336 |  | 
|  | 1337 | static void | 
|  | 1338 | pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) | 
|  | 1339 | { | 
|  | 1340 | struct nfs_read_data *data; | 
|  | 1341 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 
|  | 1342 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 
|  | 1343 |  | 
|  | 1344 | desc->pg_lseg = NULL; | 
|  | 1345 | while (!list_empty(head)) { | 
|  | 1346 | enum pnfs_try_status trypnfs; | 
|  | 1347 |  | 
|  | 1348 | data = list_entry(head->next, struct nfs_read_data, list); | 
|  | 1349 | list_del_init(&data->list); | 
|  | 1350 |  | 
|  | 1351 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | 
|  | 1352 | if (trypnfs == PNFS_NOT_ATTEMPTED) | 
|  | 1353 | pnfs_read_through_mds(desc, data); | 
|  | 1354 | } | 
|  | 1355 | put_lseg(lseg); | 
|  | 1356 | } | 
|  | 1357 |  | 
|  | 1358 | int | 
|  | 1359 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 
|  | 1360 | { | 
|  | 1361 | LIST_HEAD(head); | 
|  | 1362 | int ret; | 
|  | 1363 |  | 
|  | 1364 | ret = nfs_generic_pagein(desc, &head); | 
|  | 1365 | if (ret != 0) { | 
|  | 1366 | put_lseg(desc->pg_lseg); | 
|  | 1367 | desc->pg_lseg = NULL; | 
|  | 1368 | return ret; | 
|  | 1369 | } | 
|  | 1370 | pnfs_do_multiple_reads(desc, &head); | 
|  | 1371 | return 0; | 
|  | 1372 | } | 
|  | 1373 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | 
|  | 1374 |  | 
|  | 1375 | /* | 
|  | 1376 | * There can be multiple RW segments. | 
|  | 1377 | */ | 
|  | 1378 | static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | 
|  | 1379 | { | 
|  | 1380 | struct pnfs_layout_segment *lseg; | 
|  | 1381 |  | 
|  | 1382 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { | 
|  | 1383 | if (lseg->pls_range.iomode == IOMODE_RW && | 
|  | 1384 | test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) | 
|  | 1385 | list_add(&lseg->pls_lc_list, listp); | 
|  | 1386 | } | 
|  | 1387 | } | 
|  | 1388 |  | 
|  | 1389 | static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) | 
|  | 1390 | { | 
|  | 1391 | struct pnfs_layout_segment *lseg, *tmp; | 
|  | 1392 | unsigned long *bitlock = &NFS_I(inode)->flags; | 
|  | 1393 |  | 
|  | 1394 | /* Matched by references in pnfs_set_layoutcommit */ | 
|  | 1395 | list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { | 
|  | 1396 | list_del_init(&lseg->pls_lc_list); | 
|  | 1397 | put_lseg(lseg); | 
|  | 1398 | } | 
|  | 1399 |  | 
|  | 1400 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | 
|  | 1401 | smp_mb__after_clear_bit(); | 
|  | 1402 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | 
|  | 1403 | } | 
|  | 1404 |  | 
|  | 1405 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | 
|  | 1406 | { | 
|  | 1407 | if (lseg->pls_range.iomode == IOMODE_RW) { | 
|  | 1408 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | 
|  | 1409 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | 
|  | 1410 | } else { | 
|  | 1411 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | 
|  | 1412 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 
|  | 1413 | } | 
|  | 1414 | } | 
|  | 1415 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 
|  | 1416 |  | 
|  | 1417 | void | 
|  | 1418 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 
|  | 1419 | { | 
|  | 1420 | struct nfs_inode *nfsi = NFS_I(wdata->inode); | 
|  | 1421 | loff_t end_pos = wdata->mds_offset + wdata->res.count; | 
|  | 1422 | bool mark_as_dirty = false; | 
|  | 1423 |  | 
|  | 1424 | spin_lock(&nfsi->vfs_inode.i_lock); | 
|  | 1425 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 
|  | 1426 | mark_as_dirty = true; | 
|  | 1427 | dprintk("%s: Set layoutcommit for inode %lu ", | 
|  | 1428 | __func__, wdata->inode->i_ino); | 
|  | 1429 | } | 
|  | 1430 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { | 
|  | 1431 | /* references matched in nfs4_layoutcommit_release */ | 
|  | 1432 | get_lseg(wdata->lseg); | 
|  | 1433 | } | 
|  | 1434 | if (end_pos > nfsi->layout->plh_lwb) | 
|  | 1435 | nfsi->layout->plh_lwb = end_pos; | 
|  | 1436 | spin_unlock(&nfsi->vfs_inode.i_lock); | 
|  | 1437 | dprintk("%s: lseg %p end_pos %llu\n", | 
|  | 1438 | __func__, wdata->lseg, nfsi->layout->plh_lwb); | 
|  | 1439 |  | 
|  | 1440 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 
|  | 1441 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 
|  | 1442 | if (mark_as_dirty) | 
|  | 1443 | mark_inode_dirty_sync(wdata->inode); | 
|  | 1444 | } | 
|  | 1445 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 
|  | 1446 |  | 
|  | 1447 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 
|  | 1448 | { | 
|  | 1449 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | 
|  | 1450 |  | 
|  | 1451 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) | 
|  | 1452 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); | 
|  | 1453 | pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); | 
|  | 1454 | } | 
|  | 1455 |  | 
|  | 1456 | /* | 
|  | 1457 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | 
|  | 1458 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | 
|  | 1459 | * data to disk to allow the server to recover the data if it crashes. | 
|  | 1460 | * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag | 
|  | 1461 | * is off, and a COMMIT is sent to a data server, or | 
|  | 1462 | * if WRITEs to a data server return NFS_DATA_SYNC. | 
|  | 1463 | */ | 
|  | 1464 | int | 
|  | 1465 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 
|  | 1466 | { | 
|  | 1467 | struct nfs4_layoutcommit_data *data; | 
|  | 1468 | struct nfs_inode *nfsi = NFS_I(inode); | 
|  | 1469 | loff_t end_pos; | 
|  | 1470 | int status = 0; | 
|  | 1471 |  | 
|  | 1472 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); | 
|  | 1473 |  | 
|  | 1474 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | 
|  | 1475 | return 0; | 
|  | 1476 |  | 
|  | 1477 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | 
|  | 1478 | data = kzalloc(sizeof(*data), GFP_NOFS); | 
|  | 1479 | if (!data) { | 
|  | 1480 | status = -ENOMEM; | 
|  | 1481 | goto out; | 
|  | 1482 | } | 
|  | 1483 |  | 
|  | 1484 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | 
|  | 1485 | goto out_free; | 
|  | 1486 |  | 
|  | 1487 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | 
|  | 1488 | if (!sync) { | 
|  | 1489 | status = -EAGAIN; | 
|  | 1490 | goto out_free; | 
|  | 1491 | } | 
|  | 1492 | status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, | 
|  | 1493 | nfs_wait_bit_killable, TASK_KILLABLE); | 
|  | 1494 | if (status) | 
|  | 1495 | goto out_free; | 
|  | 1496 | } | 
|  | 1497 |  | 
|  | 1498 | INIT_LIST_HEAD(&data->lseg_list); | 
|  | 1499 | spin_lock(&inode->i_lock); | 
|  | 1500 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 
|  | 1501 | clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); | 
|  | 1502 | spin_unlock(&inode->i_lock); | 
|  | 1503 | wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); | 
|  | 1504 | goto out_free; | 
|  | 1505 | } | 
|  | 1506 |  | 
|  | 1507 | pnfs_list_write_lseg(inode, &data->lseg_list); | 
|  | 1508 |  | 
|  | 1509 | end_pos = nfsi->layout->plh_lwb; | 
|  | 1510 | nfsi->layout->plh_lwb = 0; | 
|  | 1511 |  | 
|  | 1512 | nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); | 
|  | 1513 | spin_unlock(&inode->i_lock); | 
|  | 1514 |  | 
|  | 1515 | data->args.inode = inode; | 
|  | 1516 | data->cred = get_rpccred(nfsi->layout->plh_lc_cred); | 
|  | 1517 | nfs_fattr_init(&data->fattr); | 
|  | 1518 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | 
|  | 1519 | data->res.fattr = &data->fattr; | 
|  | 1520 | data->args.lastbytewritten = end_pos - 1; | 
|  | 1521 | data->res.server = NFS_SERVER(inode); | 
|  | 1522 |  | 
|  | 1523 | status = nfs4_proc_layoutcommit(data, sync); | 
|  | 1524 | out: | 
|  | 1525 | if (status) | 
|  | 1526 | mark_inode_dirty_sync(inode); | 
|  | 1527 | dprintk("<-- %s status %d\n", __func__, status); | 
|  | 1528 | return status; | 
|  | 1529 | out_free: | 
|  | 1530 | kfree(data); | 
|  | 1531 | goto out; | 
|  | 1532 | } |