blob: 096b4797213953cf32f5fc339068d2486cf399c2 [file] [log] [blame]
xjb04a4022021-11-25 15:01:52 +08001/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
13#include <linux/blkdev.h>
14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h>
16#include <linux/iomap.h>
17
18#include "gfs2.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
23#include "meta_io.h"
24#include "quota.h"
25#include "rgrp.h"
26#include "log.h"
27#include "super.h"
28#include "trans.h"
29#include "dir.h"
30#include "util.h"
31#include "aops.h"
32#include "trace_gfs2.h"
33
34/* This doesn't need to be that large as max 64 bit pointers in a 4k
35 * block is 512, so __u16 is fine for that. It saves stack space to
36 * keep it small.
37 */
38struct metapath {
39 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
40 __u16 mp_list[GFS2_MAX_META_HEIGHT];
41 int mp_fheight; /* find_metapath height */
42 int mp_aheight; /* actual height (lookup height) */
43};
44
45static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
46
47/**
48 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
49 * @ip: the inode
50 * @dibh: the dinode buffer
51 * @block: the block number that was allocated
52 * @page: The (optional) page. This is looked up if @page is NULL
53 *
54 * Returns: errno
55 */
56
57static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
58 u64 block, struct page *page)
59{
60 struct inode *inode = &ip->i_inode;
61 struct buffer_head *bh;
62 int release = 0;
63
64 if (!page || page->index) {
65 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
66 if (!page)
67 return -ENOMEM;
68 release = 1;
69 }
70
71 if (!PageUptodate(page)) {
72 void *kaddr = kmap(page);
73 u64 dsize = i_size_read(inode);
74
75 if (dsize > gfs2_max_stuffed_size(ip))
76 dsize = gfs2_max_stuffed_size(ip);
77
78 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
79 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
80 kunmap(page);
81
82 SetPageUptodate(page);
83 }
84
85 if (!page_has_buffers(page))
86 create_empty_buffers(page, BIT(inode->i_blkbits),
87 BIT(BH_Uptodate));
88
89 bh = page_buffers(page);
90
91 if (!buffer_mapped(bh))
92 map_bh(bh, inode->i_sb, block);
93
94 set_buffer_uptodate(bh);
95 if (gfs2_is_jdata(ip))
96 gfs2_trans_add_data(ip->i_gl, bh);
97 else {
98 mark_buffer_dirty(bh);
99 gfs2_ordered_add_inode(ip);
100 }
101
102 if (release) {
103 unlock_page(page);
104 put_page(page);
105 }
106
107 return 0;
108}
109
110/**
111 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
112 * @ip: The GFS2 inode to unstuff
113 * @page: The (optional) page. This is looked up if the @page is NULL
114 *
115 * This routine unstuffs a dinode and returns it to a "normal" state such
116 * that the height can be grown in the traditional way.
117 *
118 * Returns: errno
119 */
120
121int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
122{
123 struct buffer_head *bh, *dibh;
124 struct gfs2_dinode *di;
125 u64 block = 0;
126 int isdir = gfs2_is_dir(ip);
127 int error;
128
129 down_write(&ip->i_rw_mutex);
130
131 error = gfs2_meta_inode_buffer(ip, &dibh);
132 if (error)
133 goto out;
134
135 if (i_size_read(&ip->i_inode)) {
136 /* Get a free block, fill it with the stuffed data,
137 and write it out to disk */
138
139 unsigned int n = 1;
140 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
141 if (error)
142 goto out_brelse;
143 if (isdir) {
144 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
145 error = gfs2_dir_get_new_buffer(ip, block, &bh);
146 if (error)
147 goto out_brelse;
148 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
149 dibh, sizeof(struct gfs2_dinode));
150 brelse(bh);
151 } else {
152 error = gfs2_unstuffer_page(ip, dibh, block, page);
153 if (error)
154 goto out_brelse;
155 }
156 }
157
158 /* Set up the pointer to the new block */
159
160 gfs2_trans_add_meta(ip->i_gl, dibh);
161 di = (struct gfs2_dinode *)dibh->b_data;
162 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
163
164 if (i_size_read(&ip->i_inode)) {
165 *(__be64 *)(di + 1) = cpu_to_be64(block);
166 gfs2_add_inode_blocks(&ip->i_inode, 1);
167 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
168 }
169
170 ip->i_height = 1;
171 di->di_height = cpu_to_be16(1);
172
173out_brelse:
174 brelse(dibh);
175out:
176 up_write(&ip->i_rw_mutex);
177 return error;
178}
179
180
181/**
182 * find_metapath - Find path through the metadata tree
183 * @sdp: The superblock
184 * @block: The disk block to look up
185 * @mp: The metapath to return the result in
186 * @height: The pre-calculated height of the metadata tree
187 *
188 * This routine returns a struct metapath structure that defines a path
189 * through the metadata of inode "ip" to get to block "block".
190 *
191 * Example:
192 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
193 * filesystem with a blocksize of 4096.
194 *
195 * find_metapath() would return a struct metapath structure set to:
196 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
197 *
198 * That means that in order to get to the block containing the byte at
199 * offset 101342453, we would load the indirect block pointed to by pointer
200 * 0 in the dinode. We would then load the indirect block pointed to by
201 * pointer 48 in that indirect block. We would then load the data block
202 * pointed to by pointer 165 in that indirect block.
203 *
204 * ----------------------------------------
205 * | Dinode | |
206 * | | 4|
207 * | |0 1 2 3 4 5 9|
208 * | | 6|
209 * ----------------------------------------
210 * |
211 * |
212 * V
213 * ----------------------------------------
214 * | Indirect Block |
215 * | 5|
216 * | 4 4 4 4 4 5 5 1|
217 * |0 5 6 7 8 9 0 1 2|
218 * ----------------------------------------
219 * |
220 * |
221 * V
222 * ----------------------------------------
223 * | Indirect Block |
224 * | 1 1 1 1 1 5|
225 * | 6 6 6 6 6 1|
226 * |0 3 4 5 6 7 2|
227 * ----------------------------------------
228 * |
229 * |
230 * V
231 * ----------------------------------------
232 * | Data block containing offset |
233 * | 101342453 |
234 * | |
235 * | |
236 * ----------------------------------------
237 *
238 */
239
240static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
241 struct metapath *mp, unsigned int height)
242{
243 unsigned int i;
244
245 mp->mp_fheight = height;
246 for (i = height; i--;)
247 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
248}
249
250static inline unsigned int metapath_branch_start(const struct metapath *mp)
251{
252 if (mp->mp_list[0] == 0)
253 return 2;
254 return 1;
255}
256
257/**
258 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
259 * @height: The metadata height (0 = dinode)
260 * @mp: The metapath
261 */
262static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
263{
264 struct buffer_head *bh = mp->mp_bh[height];
265 if (height == 0)
266 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
267 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
268}
269
270/**
271 * metapointer - Return pointer to start of metadata in a buffer
272 * @height: The metadata height (0 = dinode)
273 * @mp: The metapath
274 *
275 * Return a pointer to the block number of the next height of the metadata
276 * tree given a buffer containing the pointer to the current height of the
277 * metadata tree.
278 */
279
280static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
281{
282 __be64 *p = metaptr1(height, mp);
283 return p + mp->mp_list[height];
284}
285
286static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
287{
288 const struct buffer_head *bh = mp->mp_bh[height];
289 return (const __be64 *)(bh->b_data + bh->b_size);
290}
291
292static void clone_metapath(struct metapath *clone, struct metapath *mp)
293{
294 unsigned int hgt;
295
296 *clone = *mp;
297 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
298 get_bh(clone->mp_bh[hgt]);
299}
300
301static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
302{
303 const __be64 *t;
304
305 for (t = start; t < end; t++) {
306 struct buffer_head *rabh;
307
308 if (!*t)
309 continue;
310
311 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
312 if (trylock_buffer(rabh)) {
313 if (!buffer_uptodate(rabh)) {
314 rabh->b_end_io = end_buffer_read_sync;
315 submit_bh(REQ_OP_READ,
316 REQ_RAHEAD | REQ_META | REQ_PRIO,
317 rabh);
318 continue;
319 }
320 unlock_buffer(rabh);
321 }
322 brelse(rabh);
323 }
324}
325
326static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
327 unsigned int x, unsigned int h)
328{
329 for (; x < h; x++) {
330 __be64 *ptr = metapointer(x, mp);
331 u64 dblock = be64_to_cpu(*ptr);
332 int ret;
333
334 if (!dblock)
335 break;
336 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
337 if (ret)
338 return ret;
339 }
340 mp->mp_aheight = x + 1;
341 return 0;
342}
343
344/**
345 * lookup_metapath - Walk the metadata tree to a specific point
346 * @ip: The inode
347 * @mp: The metapath
348 *
349 * Assumes that the inode's buffer has already been looked up and
350 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
351 * by find_metapath().
352 *
353 * If this function encounters part of the tree which has not been
354 * allocated, it returns the current height of the tree at the point
355 * at which it found the unallocated block. Blocks which are found are
356 * added to the mp->mp_bh[] list.
357 *
358 * Returns: error
359 */
360
361static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
362{
363 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
364}
365
366/**
367 * fillup_metapath - fill up buffers for the metadata path to a specific height
368 * @ip: The inode
369 * @mp: The metapath
370 * @h: The height to which it should be mapped
371 *
372 * Similar to lookup_metapath, but does lookups for a range of heights
373 *
374 * Returns: error or the number of buffers filled
375 */
376
377static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
378{
379 unsigned int x = 0;
380 int ret;
381
382 if (h) {
383 /* find the first buffer we need to look up. */
384 for (x = h - 1; x > 0; x--) {
385 if (mp->mp_bh[x])
386 break;
387 }
388 }
389 ret = __fillup_metapath(ip, mp, x, h);
390 if (ret)
391 return ret;
392 return mp->mp_aheight - x - 1;
393}
394
395static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
396{
397 sector_t factor = 1, block = 0;
398 int hgt;
399
400 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
401 if (hgt < mp->mp_aheight)
402 block += mp->mp_list[hgt] * factor;
403 factor *= sdp->sd_inptrs;
404 }
405 return block;
406}
407
408static void release_metapath(struct metapath *mp)
409{
410 int i;
411
412 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
413 if (mp->mp_bh[i] == NULL)
414 break;
415 brelse(mp->mp_bh[i]);
416 mp->mp_bh[i] = NULL;
417 }
418}
419
420/**
421 * gfs2_extent_length - Returns length of an extent of blocks
422 * @bh: The metadata block
423 * @ptr: Current position in @bh
424 * @limit: Max extent length to return
425 * @eob: Set to 1 if we hit "end of block"
426 *
427 * Returns: The length of the extent (minimum of one block)
428 */
429
430static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
431{
432 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
433 const __be64 *first = ptr;
434 u64 d = be64_to_cpu(*ptr);
435
436 *eob = 0;
437 do {
438 ptr++;
439 if (ptr >= end)
440 break;
441 d++;
442 } while(be64_to_cpu(*ptr) == d);
443 if (ptr >= end)
444 *eob = 1;
445 return ptr - first;
446}
447
448enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
449
450/*
451 * gfs2_metadata_walker - walk an indirect block
452 * @mp: Metapath to indirect block
453 * @ptrs: Number of pointers to look at
454 *
455 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
456 * indirect block to follow.
457 */
458typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
459 unsigned int ptrs);
460
461/*
462 * gfs2_walk_metadata - walk a tree of indirect blocks
463 * @inode: The inode
464 * @mp: Starting point of walk
465 * @max_len: Maximum number of blocks to walk
466 * @walker: Called during the walk
467 *
468 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
469 * past the end of metadata, and a negative error code otherwise.
470 */
471
472static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
473 u64 max_len, gfs2_metadata_walker walker)
474{
475 struct gfs2_inode *ip = GFS2_I(inode);
476 struct gfs2_sbd *sdp = GFS2_SB(inode);
477 u64 factor = 1;
478 unsigned int hgt;
479 int ret;
480
481 /*
482 * The walk starts in the lowest allocated indirect block, which may be
483 * before the position indicated by @mp. Adjust @max_len accordingly
484 * to avoid a short walk.
485 */
486 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
487 max_len += mp->mp_list[hgt] * factor;
488 mp->mp_list[hgt] = 0;
489 factor *= sdp->sd_inptrs;
490 }
491
492 for (;;) {
493 u16 start = mp->mp_list[hgt];
494 enum walker_status status;
495 unsigned int ptrs;
496 u64 len;
497
498 /* Walk indirect block. */
499 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
500 len = ptrs * factor;
501 if (len > max_len)
502 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
503 status = walker(mp, ptrs);
504 switch (status) {
505 case WALK_STOP:
506 return 1;
507 case WALK_FOLLOW:
508 BUG_ON(mp->mp_aheight == mp->mp_fheight);
509 ptrs = mp->mp_list[hgt] - start;
510 len = ptrs * factor;
511 break;
512 case WALK_CONTINUE:
513 break;
514 }
515 if (len >= max_len)
516 break;
517 max_len -= len;
518 if (status == WALK_FOLLOW)
519 goto fill_up_metapath;
520
521lower_metapath:
522 /* Decrease height of metapath. */
523 brelse(mp->mp_bh[hgt]);
524 mp->mp_bh[hgt] = NULL;
525 mp->mp_list[hgt] = 0;
526 if (!hgt)
527 break;
528 hgt--;
529 factor *= sdp->sd_inptrs;
530
531 /* Advance in metadata tree. */
532 (mp->mp_list[hgt])++;
533 if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
534 if (!hgt)
535 break;
536 goto lower_metapath;
537 }
538
539fill_up_metapath:
540 /* Increase height of metapath. */
541 ret = fillup_metapath(ip, mp, ip->i_height - 1);
542 if (ret < 0)
543 return ret;
544 hgt += ret;
545 for (; ret; ret--)
546 do_div(factor, sdp->sd_inptrs);
547 mp->mp_aheight = hgt + 1;
548 }
549 return 0;
550}
551
552static enum walker_status gfs2_hole_walker(struct metapath *mp,
553 unsigned int ptrs)
554{
555 const __be64 *start, *ptr, *end;
556 unsigned int hgt;
557
558 hgt = mp->mp_aheight - 1;
559 start = metapointer(hgt, mp);
560 end = start + ptrs;
561
562 for (ptr = start; ptr < end; ptr++) {
563 if (*ptr) {
564 mp->mp_list[hgt] += ptr - start;
565 if (mp->mp_aheight == mp->mp_fheight)
566 return WALK_STOP;
567 return WALK_FOLLOW;
568 }
569 }
570 return WALK_CONTINUE;
571}
572
573/**
574 * gfs2_hole_size - figure out the size of a hole
575 * @inode: The inode
576 * @lblock: The logical starting block number
577 * @len: How far to look (in blocks)
578 * @mp: The metapath at lblock
579 * @iomap: The iomap to store the hole size in
580 *
581 * This function modifies @mp.
582 *
583 * Returns: errno on error
584 */
585static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
586 struct metapath *mp, struct iomap *iomap)
587{
588 struct metapath clone;
589 u64 hole_size;
590 int ret;
591
592 clone_metapath(&clone, mp);
593 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
594 if (ret < 0)
595 goto out;
596
597 if (ret == 1)
598 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
599 else
600 hole_size = len;
601 iomap->length = hole_size << inode->i_blkbits;
602 ret = 0;
603
604out:
605 release_metapath(&clone);
606 return ret;
607}
608
609static inline __be64 *gfs2_indirect_init(struct metapath *mp,
610 struct gfs2_glock *gl, unsigned int i,
611 unsigned offset, u64 bn)
612{
613 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
614 ((i > 1) ? sizeof(struct gfs2_meta_header) :
615 sizeof(struct gfs2_dinode)));
616 BUG_ON(i < 1);
617 BUG_ON(mp->mp_bh[i] != NULL);
618 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
619 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
620 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
621 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
622 ptr += offset;
623 *ptr = cpu_to_be64(bn);
624 return ptr;
625}
626
627enum alloc_state {
628 ALLOC_DATA = 0,
629 ALLOC_GROW_DEPTH = 1,
630 ALLOC_GROW_HEIGHT = 2,
631 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
632};
633
634/**
635 * gfs2_iomap_alloc - Build a metadata tree of the requested height
636 * @inode: The GFS2 inode
637 * @iomap: The iomap structure
638 * @flags: iomap flags
639 * @mp: The metapath, with proper height information calculated
640 *
641 * In this routine we may have to alloc:
642 * i) Indirect blocks to grow the metadata tree height
643 * ii) Indirect blocks to fill in lower part of the metadata tree
644 * iii) Data blocks
645 *
646 * This function is called after gfs2_iomap_get, which works out the
647 * total number of blocks which we need via gfs2_alloc_size.
648 *
649 * We then do the actual allocation asking for an extent at a time (if
650 * enough contiguous free blocks are available, there will only be one
651 * allocation request per call) and uses the state machine to initialise
652 * the blocks in order.
653 *
654 * Right now, this function will allocate at most one indirect block
655 * worth of data -- with a default block size of 4K, that's slightly
656 * less than 2M. If this limitation is ever removed to allow huge
657 * allocations, we would probably still want to limit the iomap size we
658 * return to avoid stalling other tasks during huge writes; the next
659 * iomap iteration would then find the blocks already allocated.
660 *
661 * Returns: errno on error
662 */
663
664static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
665 unsigned flags, struct metapath *mp)
666{
667 struct gfs2_inode *ip = GFS2_I(inode);
668 struct gfs2_sbd *sdp = GFS2_SB(inode);
669 struct buffer_head *dibh = mp->mp_bh[0];
670 u64 bn;
671 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
672 size_t dblks = iomap->length >> inode->i_blkbits;
673 const unsigned end_of_metadata = mp->mp_fheight - 1;
674 int ret;
675 enum alloc_state state;
676 __be64 *ptr;
677 __be64 zero_bn = 0;
678
679 BUG_ON(mp->mp_aheight < 1);
680 BUG_ON(dibh == NULL);
681 BUG_ON(dblks < 1);
682
683 gfs2_trans_add_meta(ip->i_gl, dibh);
684
685 down_write(&ip->i_rw_mutex);
686
687 if (mp->mp_fheight == mp->mp_aheight) {
688 /* Bottom indirect block exists */
689 state = ALLOC_DATA;
690 } else {
691 /* Need to allocate indirect blocks */
692 if (mp->mp_fheight == ip->i_height) {
693 /* Writing into existing tree, extend tree down */
694 iblks = mp->mp_fheight - mp->mp_aheight;
695 state = ALLOC_GROW_DEPTH;
696 } else {
697 /* Building up tree height */
698 state = ALLOC_GROW_HEIGHT;
699 iblks = mp->mp_fheight - ip->i_height;
700 branch_start = metapath_branch_start(mp);
701 iblks += (mp->mp_fheight - branch_start);
702 }
703 }
704
705 /* start of the second part of the function (state machine) */
706
707 blks = dblks + iblks;
708 i = mp->mp_aheight;
709 do {
710 n = blks - alloced;
711 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
712 if (ret)
713 goto out;
714 alloced += n;
715 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
716 gfs2_trans_add_unrevoke(sdp, bn, n);
717 switch (state) {
718 /* Growing height of tree */
719 case ALLOC_GROW_HEIGHT:
720 if (i == 1) {
721 ptr = (__be64 *)(dibh->b_data +
722 sizeof(struct gfs2_dinode));
723 zero_bn = *ptr;
724 }
725 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
726 i++, n--)
727 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
728 if (i - 1 == mp->mp_fheight - ip->i_height) {
729 i--;
730 gfs2_buffer_copy_tail(mp->mp_bh[i],
731 sizeof(struct gfs2_meta_header),
732 dibh, sizeof(struct gfs2_dinode));
733 gfs2_buffer_clear_tail(dibh,
734 sizeof(struct gfs2_dinode) +
735 sizeof(__be64));
736 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
737 sizeof(struct gfs2_meta_header));
738 *ptr = zero_bn;
739 state = ALLOC_GROW_DEPTH;
740 for(i = branch_start; i < mp->mp_fheight; i++) {
741 if (mp->mp_bh[i] == NULL)
742 break;
743 brelse(mp->mp_bh[i]);
744 mp->mp_bh[i] = NULL;
745 }
746 i = branch_start;
747 }
748 if (n == 0)
749 break;
750 /* Branching from existing tree */
751 case ALLOC_GROW_DEPTH:
752 if (i > 1 && i < mp->mp_fheight)
753 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
754 for (; i < mp->mp_fheight && n > 0; i++, n--)
755 gfs2_indirect_init(mp, ip->i_gl, i,
756 mp->mp_list[i-1], bn++);
757 if (i == mp->mp_fheight)
758 state = ALLOC_DATA;
759 if (n == 0)
760 break;
761 /* Tree complete, adding data blocks */
762 case ALLOC_DATA:
763 BUG_ON(n > dblks);
764 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
765 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
766 dblks = n;
767 ptr = metapointer(end_of_metadata, mp);
768 iomap->addr = bn << inode->i_blkbits;
769 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
770 while (n-- > 0)
771 *ptr++ = cpu_to_be64(bn++);
772 break;
773 }
774 } while (iomap->addr == IOMAP_NULL_ADDR);
775
776 iomap->type = IOMAP_MAPPED;
777 iomap->length = (u64)dblks << inode->i_blkbits;
778 ip->i_height = mp->mp_fheight;
779 gfs2_add_inode_blocks(&ip->i_inode, alloced);
780 gfs2_dinode_out(ip, dibh->b_data);
781out:
782 up_write(&ip->i_rw_mutex);
783 return ret;
784}
785
786#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
787
788/**
789 * gfs2_alloc_size - Compute the maximum allocation size
790 * @inode: The inode
791 * @mp: The metapath
792 * @size: Requested size in blocks
793 *
794 * Compute the maximum size of the next allocation at @mp.
795 *
796 * Returns: size in blocks
797 */
798static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
799{
800 struct gfs2_inode *ip = GFS2_I(inode);
801 struct gfs2_sbd *sdp = GFS2_SB(inode);
802 const __be64 *first, *ptr, *end;
803
804 /*
805 * For writes to stuffed files, this function is called twice via
806 * gfs2_iomap_get, before and after unstuffing. The size we return the
807 * first time needs to be large enough to get the reservation and
808 * allocation sizes right. The size we return the second time must
809 * be exact or else gfs2_iomap_alloc won't do the right thing.
810 */
811
812 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
813 unsigned int maxsize = mp->mp_fheight > 1 ?
814 sdp->sd_inptrs : sdp->sd_diptrs;
815 maxsize -= mp->mp_list[mp->mp_fheight - 1];
816 if (size > maxsize)
817 size = maxsize;
818 return size;
819 }
820
821 first = metapointer(ip->i_height - 1, mp);
822 end = metaend(ip->i_height - 1, mp);
823 if (end - first > size)
824 end = first + size;
825 for (ptr = first; ptr < end; ptr++) {
826 if (*ptr)
827 break;
828 }
829 return ptr - first;
830}
831
832/**
833 * gfs2_iomap_get - Map blocks from an inode to disk blocks
834 * @inode: The inode
835 * @pos: Starting position in bytes
836 * @length: Length to map, in bytes
837 * @flags: iomap flags
838 * @iomap: The iomap structure
839 * @mp: The metapath
840 *
841 * Returns: errno
842 */
843static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
844 unsigned flags, struct iomap *iomap,
845 struct metapath *mp)
846{
847 struct gfs2_inode *ip = GFS2_I(inode);
848 struct gfs2_sbd *sdp = GFS2_SB(inode);
849 loff_t size = i_size_read(inode);
850 __be64 *ptr;
851 sector_t lblock;
852 sector_t lblock_stop;
853 int ret;
854 int eob;
855 u64 len;
856 struct buffer_head *dibh = NULL, *bh;
857 u8 height;
858
859 if (!length)
860 return -EINVAL;
861
862 down_read(&ip->i_rw_mutex);
863
864 ret = gfs2_meta_inode_buffer(ip, &dibh);
865 if (ret)
866 goto unlock;
867 mp->mp_bh[0] = dibh;
868
869 if (gfs2_is_stuffed(ip)) {
870 if (flags & IOMAP_WRITE) {
871 loff_t max_size = gfs2_max_stuffed_size(ip);
872
873 if (pos + length > max_size)
874 goto unstuff;
875 iomap->length = max_size;
876 } else {
877 if (pos >= size) {
878 if (flags & IOMAP_REPORT) {
879 ret = -ENOENT;
880 goto unlock;
881 } else {
882 /* report a hole */
883 iomap->offset = pos;
884 iomap->length = length;
885 goto do_alloc;
886 }
887 }
888 iomap->length = size;
889 }
890 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
891 sizeof(struct gfs2_dinode);
892 iomap->type = IOMAP_INLINE;
893 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
894 goto out;
895 }
896
897unstuff:
898 lblock = pos >> inode->i_blkbits;
899 iomap->offset = lblock << inode->i_blkbits;
900 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
901 len = lblock_stop - lblock + 1;
902 iomap->length = len << inode->i_blkbits;
903
904 height = ip->i_height;
905 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
906 height++;
907 find_metapath(sdp, lblock, mp, height);
908 if (height > ip->i_height || gfs2_is_stuffed(ip))
909 goto do_alloc;
910
911 ret = lookup_metapath(ip, mp);
912 if (ret)
913 goto unlock;
914
915 if (mp->mp_aheight != ip->i_height)
916 goto do_alloc;
917
918 ptr = metapointer(ip->i_height - 1, mp);
919 if (*ptr == 0)
920 goto do_alloc;
921
922 bh = mp->mp_bh[ip->i_height - 1];
923 len = gfs2_extent_length(bh, ptr, len, &eob);
924
925 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
926 iomap->length = len << inode->i_blkbits;
927 iomap->type = IOMAP_MAPPED;
928 iomap->flags |= IOMAP_F_MERGED;
929 if (eob)
930 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
931
932out:
933 iomap->bdev = inode->i_sb->s_bdev;
934unlock:
935 up_read(&ip->i_rw_mutex);
936 return ret;
937
938do_alloc:
939 iomap->addr = IOMAP_NULL_ADDR;
940 iomap->type = IOMAP_HOLE;
941 if (flags & IOMAP_REPORT) {
942 if (pos >= size)
943 ret = -ENOENT;
944 else if (height == ip->i_height)
945 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
946 else
947 iomap->length = size - pos;
948 } else if (flags & IOMAP_WRITE) {
949 u64 alloc_size;
950
951 if (flags & IOMAP_DIRECT)
952 goto out; /* (see gfs2_file_direct_write) */
953
954 len = gfs2_alloc_size(inode, mp, len);
955 alloc_size = len << inode->i_blkbits;
956 if (alloc_size < iomap->length)
957 iomap->length = alloc_size;
958 } else {
959 if (pos < size && height == ip->i_height)
960 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
961 }
962 goto out;
963}
964
965static int gfs2_write_lock(struct inode *inode)
966{
967 struct gfs2_inode *ip = GFS2_I(inode);
968 struct gfs2_sbd *sdp = GFS2_SB(inode);
969 int error;
970
971 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
972 error = gfs2_glock_nq(&ip->i_gh);
973 if (error)
974 goto out_uninit;
975 if (&ip->i_inode == sdp->sd_rindex) {
976 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
977
978 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
979 GL_NOCACHE, &m_ip->i_gh);
980 if (error)
981 goto out_unlock;
982 }
983 return 0;
984
985out_unlock:
986 gfs2_glock_dq(&ip->i_gh);
987out_uninit:
988 gfs2_holder_uninit(&ip->i_gh);
989 return error;
990}
991
992static void gfs2_write_unlock(struct inode *inode)
993{
994 struct gfs2_inode *ip = GFS2_I(inode);
995 struct gfs2_sbd *sdp = GFS2_SB(inode);
996
997 if (&ip->i_inode == sdp->sd_rindex) {
998 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
999
1000 gfs2_glock_dq_uninit(&m_ip->i_gh);
1001 }
1002 gfs2_glock_dq_uninit(&ip->i_gh);
1003}
1004
1005static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
1006 unsigned copied, struct page *page,
1007 struct iomap *iomap)
1008{
1009 struct gfs2_inode *ip = GFS2_I(inode);
1010
1011 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
1012}
1013
1014static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1015 loff_t length, unsigned flags,
1016 struct iomap *iomap,
1017 struct metapath *mp)
1018{
1019 struct gfs2_inode *ip = GFS2_I(inode);
1020 struct gfs2_sbd *sdp = GFS2_SB(inode);
1021 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1022 bool unstuff, alloc_required;
1023 int ret;
1024
1025 ret = gfs2_write_lock(inode);
1026 if (ret)
1027 return ret;
1028
1029 unstuff = gfs2_is_stuffed(ip) &&
1030 pos + length > gfs2_max_stuffed_size(ip);
1031
1032 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
1033 if (ret)
1034 goto out_unlock;
1035
1036 alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1037
1038 if (alloc_required || gfs2_is_jdata(ip))
1039 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1040 &ind_blocks);
1041
1042 if (alloc_required) {
1043 struct gfs2_alloc_parms ap = {
1044 .target = data_blocks + ind_blocks
1045 };
1046
1047 ret = gfs2_quota_lock_check(ip, &ap);
1048 if (ret)
1049 goto out_unlock;
1050
1051 ret = gfs2_inplace_reserve(ip, &ap);
1052 if (ret)
1053 goto out_qunlock;
1054 }
1055
1056 rblocks = RES_DINODE + ind_blocks;
1057 if (gfs2_is_jdata(ip))
1058 rblocks += data_blocks;
1059 if (ind_blocks || data_blocks)
1060 rblocks += RES_STATFS + RES_QUOTA;
1061 if (inode == sdp->sd_rindex)
1062 rblocks += 2 * RES_STATFS;
1063 if (alloc_required)
1064 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1065
1066 ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
1067 if (ret)
1068 goto out_trans_fail;
1069
1070 if (unstuff) {
1071 ret = gfs2_unstuff_dinode(ip, NULL);
1072 if (ret)
1073 goto out_trans_end;
1074 release_metapath(mp);
1075 ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
1076 flags, iomap, mp);
1077 if (ret)
1078 goto out_trans_end;
1079 }
1080
1081 if (iomap->type == IOMAP_HOLE) {
1082 ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
1083 if (ret) {
1084 gfs2_trans_end(sdp);
1085 gfs2_inplace_release(ip);
1086 punch_hole(ip, iomap->offset, iomap->length);
1087 goto out_qunlock;
1088 }
1089 }
1090 if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
1091 iomap->page_done = gfs2_iomap_journaled_page_done;
1092 return 0;
1093
1094out_trans_end:
1095 gfs2_trans_end(sdp);
1096out_trans_fail:
1097 if (alloc_required)
1098 gfs2_inplace_release(ip);
1099out_qunlock:
1100 if (alloc_required)
1101 gfs2_quota_unlock(ip);
1102out_unlock:
1103 gfs2_write_unlock(inode);
1104 return ret;
1105}
1106
1107static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1108 unsigned flags, struct iomap *iomap)
1109{
1110 struct gfs2_inode *ip = GFS2_I(inode);
1111 struct metapath mp = { .mp_aheight = 1, };
1112 int ret;
1113
1114 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1115
1116 trace_gfs2_iomap_start(ip, pos, length, flags);
1117 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
1118 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1119 } else {
1120 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1121
1122 /*
1123 * Silently fall back to buffered I/O for stuffed files or if
1124 * we've hot a hole (see gfs2_file_direct_write).
1125 */
1126 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1127 iomap->type != IOMAP_MAPPED)
1128 ret = -ENOTBLK;
1129 }
1130 if (!ret) {
1131 get_bh(mp.mp_bh[0]);
1132 iomap->private = mp.mp_bh[0];
1133 }
1134 release_metapath(&mp);
1135 trace_gfs2_iomap_end(ip, iomap, ret);
1136 return ret;
1137}
1138
1139static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1140 ssize_t written, unsigned flags, struct iomap *iomap)
1141{
1142 struct gfs2_inode *ip = GFS2_I(inode);
1143 struct gfs2_sbd *sdp = GFS2_SB(inode);
1144 struct gfs2_trans *tr = current->journal_info;
1145 struct buffer_head *dibh = iomap->private;
1146
1147 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
1148 goto out;
1149
1150 if (iomap->type != IOMAP_INLINE) {
1151 gfs2_ordered_add_inode(ip);
1152
1153 if (tr->tr_num_buf_new)
1154 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1155 else
1156 gfs2_trans_add_meta(ip->i_gl, dibh);
1157 }
1158
1159 if (inode == sdp->sd_rindex) {
1160 adjust_fs_space(inode);
1161 sdp->sd_rindex_uptodate = 0;
1162 }
1163
1164 gfs2_trans_end(sdp);
1165 gfs2_inplace_release(ip);
1166
1167 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1168 /* Deallocate blocks that were just allocated. */
1169 loff_t blockmask = i_blocksize(inode) - 1;
1170 loff_t end = (pos + length) & ~blockmask;
1171
1172 pos = (pos + written + blockmask) & ~blockmask;
1173 if (pos < end) {
1174 truncate_pagecache_range(inode, pos, end - 1);
1175 punch_hole(ip, pos, end - pos);
1176 }
1177 }
1178
1179 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1180 gfs2_quota_unlock(ip);
1181 gfs2_write_unlock(inode);
1182
1183out:
1184 if (dibh)
1185 brelse(dibh);
1186 return 0;
1187}
1188
1189const struct iomap_ops gfs2_iomap_ops = {
1190 .iomap_begin = gfs2_iomap_begin,
1191 .iomap_end = gfs2_iomap_end,
1192};
1193
1194/**
1195 * gfs2_block_map - Map one or more blocks of an inode to a disk block
1196 * @inode: The inode
1197 * @lblock: The logical block number
1198 * @bh_map: The bh to be mapped
1199 * @create: True if its ok to alloc blocks to satify the request
1200 *
1201 * The size of the requested mapping is defined in bh_map->b_size.
1202 *
1203 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1204 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1205 * bh_map->b_size to indicate the size of the mapping when @lblock and
1206 * successive blocks are mapped, up to the requested size.
1207 *
1208 * Sets buffer_boundary() if a read of metadata will be required
1209 * before the next block can be mapped. Sets buffer_new() if new
1210 * blocks were allocated.
1211 *
1212 * Returns: errno
1213 */
1214
1215int gfs2_block_map(struct inode *inode, sector_t lblock,
1216 struct buffer_head *bh_map, int create)
1217{
1218 struct gfs2_inode *ip = GFS2_I(inode);
1219 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1220 loff_t length = bh_map->b_size;
1221 struct metapath mp = { .mp_aheight = 1, };
1222 struct iomap iomap = { };
1223 int ret;
1224
1225 clear_buffer_mapped(bh_map);
1226 clear_buffer_new(bh_map);
1227 clear_buffer_boundary(bh_map);
1228 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1229
1230 if (create) {
1231 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1232 if (!ret && iomap.type == IOMAP_HOLE)
1233 ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
1234 release_metapath(&mp);
1235 } else {
1236 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1237 release_metapath(&mp);
1238 }
1239 if (ret)
1240 goto out;
1241
1242 if (iomap.length > bh_map->b_size) {
1243 iomap.length = bh_map->b_size;
1244 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1245 }
1246 if (iomap.addr != IOMAP_NULL_ADDR)
1247 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1248 bh_map->b_size = iomap.length;
1249 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1250 set_buffer_boundary(bh_map);
1251 if (iomap.flags & IOMAP_F_NEW)
1252 set_buffer_new(bh_map);
1253
1254out:
1255 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1256 return ret;
1257}
1258
1259/*
1260 * Deprecated: do not use in new code
1261 */
1262int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1263{
1264 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
1265 int ret;
1266 int create = *new;
1267
1268 BUG_ON(!extlen);
1269 BUG_ON(!dblock);
1270 BUG_ON(!new);
1271
1272 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
1273 ret = gfs2_block_map(inode, lblock, &bh, create);
1274 *extlen = bh.b_size >> inode->i_blkbits;
1275 *dblock = bh.b_blocknr;
1276 if (buffer_new(&bh))
1277 *new = 1;
1278 else
1279 *new = 0;
1280 return ret;
1281}
1282
1283/**
1284 * gfs2_block_zero_range - Deal with zeroing out data
1285 *
1286 * This is partly borrowed from ext3.
1287 */
1288static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1289 unsigned int length)
1290{
1291 struct address_space *mapping = inode->i_mapping;
1292 struct gfs2_inode *ip = GFS2_I(inode);
1293 unsigned long index = from >> PAGE_SHIFT;
1294 unsigned offset = from & (PAGE_SIZE-1);
1295 unsigned blocksize, iblock, pos;
1296 struct buffer_head *bh;
1297 struct page *page;
1298 int err;
1299
1300 page = find_or_create_page(mapping, index, GFP_NOFS);
1301 if (!page)
1302 return 0;
1303
1304 blocksize = inode->i_sb->s_blocksize;
1305 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
1306
1307 if (!page_has_buffers(page))
1308 create_empty_buffers(page, blocksize, 0);
1309
1310 /* Find the buffer that contains "offset" */
1311 bh = page_buffers(page);
1312 pos = blocksize;
1313 while (offset >= pos) {
1314 bh = bh->b_this_page;
1315 iblock++;
1316 pos += blocksize;
1317 }
1318
1319 err = 0;
1320
1321 if (!buffer_mapped(bh)) {
1322 gfs2_block_map(inode, iblock, bh, 0);
1323 /* unmapped? It's a hole - nothing to do */
1324 if (!buffer_mapped(bh))
1325 goto unlock;
1326 }
1327
1328 /* Ok, it's mapped. Make sure it's up-to-date */
1329 if (PageUptodate(page))
1330 set_buffer_uptodate(bh);
1331
1332 if (!buffer_uptodate(bh)) {
1333 err = -EIO;
1334 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1335 wait_on_buffer(bh);
1336 /* Uhhuh. Read error. Complain and punt. */
1337 if (!buffer_uptodate(bh))
1338 goto unlock;
1339 err = 0;
1340 }
1341
1342 if (gfs2_is_jdata(ip))
1343 gfs2_trans_add_data(ip->i_gl, bh);
1344 else
1345 gfs2_ordered_add_inode(ip);
1346
1347 zero_user(page, offset, length);
1348 mark_buffer_dirty(bh);
1349unlock:
1350 unlock_page(page);
1351 put_page(page);
1352 return err;
1353}
1354
1355#define GFS2_JTRUNC_REVOKES 8192
1356
1357/**
1358 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1359 * @inode: The inode being truncated
1360 * @oldsize: The original (larger) size
1361 * @newsize: The new smaller size
1362 *
1363 * With jdata files, we have to journal a revoke for each block which is
1364 * truncated. As a result, we need to split this into separate transactions
1365 * if the number of pages being truncated gets too large.
1366 */
1367
1368static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1369{
1370 struct gfs2_sbd *sdp = GFS2_SB(inode);
1371 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1372 u64 chunk;
1373 int error;
1374
1375 while (oldsize != newsize) {
1376 struct gfs2_trans *tr;
1377 unsigned int offs;
1378
1379 chunk = oldsize - newsize;
1380 if (chunk > max_chunk)
1381 chunk = max_chunk;
1382
1383 offs = oldsize & ~PAGE_MASK;
1384 if (offs && chunk > PAGE_SIZE)
1385 chunk = offs + ((chunk - offs) & PAGE_MASK);
1386
1387 truncate_pagecache(inode, oldsize - chunk);
1388 oldsize -= chunk;
1389
1390 tr = current->journal_info;
1391 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1392 continue;
1393
1394 gfs2_trans_end(sdp);
1395 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1396 if (error)
1397 return error;
1398 }
1399
1400 return 0;
1401}
1402
1403static int trunc_start(struct inode *inode, u64 newsize)
1404{
1405 struct gfs2_inode *ip = GFS2_I(inode);
1406 struct gfs2_sbd *sdp = GFS2_SB(inode);
1407 struct buffer_head *dibh = NULL;
1408 int journaled = gfs2_is_jdata(ip);
1409 u64 oldsize = inode->i_size;
1410 int error;
1411
1412 if (journaled)
1413 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1414 else
1415 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1416 if (error)
1417 return error;
1418
1419 error = gfs2_meta_inode_buffer(ip, &dibh);
1420 if (error)
1421 goto out;
1422
1423 gfs2_trans_add_meta(ip->i_gl, dibh);
1424
1425 if (gfs2_is_stuffed(ip)) {
1426 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1427 } else {
1428 unsigned int blocksize = i_blocksize(inode);
1429 unsigned int offs = newsize & (blocksize - 1);
1430 if (offs) {
1431 error = gfs2_block_zero_range(inode, newsize,
1432 blocksize - offs);
1433 if (error)
1434 goto out;
1435 }
1436 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1437 }
1438
1439 i_size_write(inode, newsize);
1440 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1441 gfs2_dinode_out(ip, dibh->b_data);
1442
1443 if (journaled)
1444 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1445 else
1446 truncate_pagecache(inode, newsize);
1447
1448out:
1449 brelse(dibh);
1450 if (current->journal_info)
1451 gfs2_trans_end(sdp);
1452 return error;
1453}
1454
1455int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1456 struct iomap *iomap)
1457{
1458 struct metapath mp = { .mp_aheight = 1, };
1459 int ret;
1460
1461 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1462 if (!ret && iomap->type == IOMAP_HOLE)
1463 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1464 release_metapath(&mp);
1465 return ret;
1466}
1467
1468/**
1469 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1470 * @ip: inode
1471 * @rg_gh: holder of resource group glock
1472 * @bh: buffer head to sweep
1473 * @start: starting point in bh
1474 * @end: end point in bh
1475 * @meta: true if bh points to metadata (rather than data)
1476 * @btotal: place to keep count of total blocks freed
1477 *
1478 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1479 * free, and free them all. However, we do it one rgrp at a time. If this
1480 * block has references to multiple rgrps, we break it into individual
1481 * transactions. This allows other processes to use the rgrps while we're
1482 * focused on a single one, for better concurrency / performance.
1483 * At every transaction boundary, we rewrite the inode into the journal.
1484 * That way the bitmaps are kept consistent with the inode and we can recover
1485 * if we're interrupted by power-outages.
1486 *
1487 * Returns: 0, or return code if an error occurred.
1488 * *btotal has the total number of blocks freed
1489 */
1490static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1491 struct buffer_head *bh, __be64 *start, __be64 *end,
1492 bool meta, u32 *btotal)
1493{
1494 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1495 struct gfs2_rgrpd *rgd;
1496 struct gfs2_trans *tr;
1497 __be64 *p;
1498 int blks_outside_rgrp;
1499 u64 bn, bstart, isize_blks;
1500 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1501 int ret = 0;
1502 bool buf_in_tr = false; /* buffer was added to transaction */
1503
1504more_rgrps:
1505 rgd = NULL;
1506 if (gfs2_holder_initialized(rd_gh)) {
1507 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1508 gfs2_assert_withdraw(sdp,
1509 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1510 }
1511 blks_outside_rgrp = 0;
1512 bstart = 0;
1513 blen = 0;
1514
1515 for (p = start; p < end; p++) {
1516 if (!*p)
1517 continue;
1518 bn = be64_to_cpu(*p);
1519
1520 if (rgd) {
1521 if (!rgrp_contains_block(rgd, bn)) {
1522 blks_outside_rgrp++;
1523 continue;
1524 }
1525 } else {
1526 rgd = gfs2_blk2rgrpd(sdp, bn, true);
1527 if (unlikely(!rgd)) {
1528 ret = -EIO;
1529 goto out;
1530 }
1531 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1532 0, rd_gh);
1533 if (ret)
1534 goto out;
1535
1536 /* Must be done with the rgrp glock held: */
1537 if (gfs2_rs_active(&ip->i_res) &&
1538 rgd == ip->i_res.rs_rbm.rgd)
1539 gfs2_rs_deltree(&ip->i_res);
1540 }
1541
1542 /* The size of our transactions will be unknown until we
1543 actually process all the metadata blocks that relate to
1544 the rgrp. So we estimate. We know it can't be more than
1545 the dinode's i_blocks and we don't want to exceed the
1546 journal flush threshold, sd_log_thresh2. */
1547 if (current->journal_info == NULL) {
1548 unsigned int jblocks_rqsted, revokes;
1549
1550 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1551 RES_INDIRECT;
1552 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1553 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1554 jblocks_rqsted +=
1555 atomic_read(&sdp->sd_log_thresh2);
1556 else
1557 jblocks_rqsted += isize_blks;
1558 revokes = jblocks_rqsted;
1559 if (meta)
1560 revokes += end - start;
1561 else if (ip->i_depth)
1562 revokes += sdp->sd_inptrs;
1563 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1564 if (ret)
1565 goto out_unlock;
1566 down_write(&ip->i_rw_mutex);
1567 }
1568 /* check if we will exceed the transaction blocks requested */
1569 tr = current->journal_info;
1570 if (tr->tr_num_buf_new + RES_STATFS +
1571 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1572 /* We set blks_outside_rgrp to ensure the loop will
1573 be repeated for the same rgrp, but with a new
1574 transaction. */
1575 blks_outside_rgrp++;
1576 /* This next part is tricky. If the buffer was added
1577 to the transaction, we've already set some block
1578 pointers to 0, so we better follow through and free
1579 them, or we will introduce corruption (so break).
1580 This may be impossible, or at least rare, but I
1581 decided to cover the case regardless.
1582
1583 If the buffer was not added to the transaction
1584 (this call), doing so would exceed our transaction
1585 size, so we need to end the transaction and start a
1586 new one (so goto). */
1587
1588 if (buf_in_tr)
1589 break;
1590 goto out_unlock;
1591 }
1592
1593 gfs2_trans_add_meta(ip->i_gl, bh);
1594 buf_in_tr = true;
1595 *p = 0;
1596 if (bstart + blen == bn) {
1597 blen++;
1598 continue;
1599 }
1600 if (bstart) {
1601 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1602 (*btotal) += blen;
1603 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1604 }
1605 bstart = bn;
1606 blen = 1;
1607 }
1608 if (bstart) {
1609 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1610 (*btotal) += blen;
1611 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1612 }
1613out_unlock:
1614 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1615 outside the rgrp we just processed,
1616 do it all over again. */
1617 if (current->journal_info) {
1618 struct buffer_head *dibh;
1619
1620 ret = gfs2_meta_inode_buffer(ip, &dibh);
1621 if (ret)
1622 goto out;
1623
1624 /* Every transaction boundary, we rewrite the dinode
1625 to keep its di_blocks current in case of failure. */
1626 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1627 current_time(&ip->i_inode);
1628 gfs2_trans_add_meta(ip->i_gl, dibh);
1629 gfs2_dinode_out(ip, dibh->b_data);
1630 brelse(dibh);
1631 up_write(&ip->i_rw_mutex);
1632 gfs2_trans_end(sdp);
1633 buf_in_tr = false;
1634 }
1635 gfs2_glock_dq_uninit(rd_gh);
1636 cond_resched();
1637 goto more_rgrps;
1638 }
1639out:
1640 return ret;
1641}
1642
1643static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1644{
1645 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1646 return false;
1647 return true;
1648}
1649
1650/**
1651 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1652 * @mp: starting metapath
1653 * @h: desired height to search
1654 *
1655 * Assumes the metapath is valid (with buffers) out to height h.
1656 * Returns: true if a non-null pointer was found in the metapath buffer
1657 * false if all remaining pointers are NULL in the buffer
1658 */
1659static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1660 unsigned int h,
1661 __u16 *end_list, unsigned int end_aligned)
1662{
1663 struct buffer_head *bh = mp->mp_bh[h];
1664 __be64 *first, *ptr, *end;
1665
1666 first = metaptr1(h, mp);
1667 ptr = first + mp->mp_list[h];
1668 end = (__be64 *)(bh->b_data + bh->b_size);
1669 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1670 bool keep_end = h < end_aligned;
1671 end = first + end_list[h] + keep_end;
1672 }
1673
1674 while (ptr < end) {
1675 if (*ptr) { /* if we have a non-null pointer */
1676 mp->mp_list[h] = ptr - first;
1677 h++;
1678 if (h < GFS2_MAX_META_HEIGHT)
1679 mp->mp_list[h] = 0;
1680 return true;
1681 }
1682 ptr++;
1683 }
1684 return false;
1685}
1686
1687enum dealloc_states {
1688 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1689 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1690 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1691 DEALLOC_DONE = 3, /* process complete */
1692};
1693
1694static inline void
1695metapointer_range(struct metapath *mp, int height,
1696 __u16 *start_list, unsigned int start_aligned,
1697 __u16 *end_list, unsigned int end_aligned,
1698 __be64 **start, __be64 **end)
1699{
1700 struct buffer_head *bh = mp->mp_bh[height];
1701 __be64 *first;
1702
1703 first = metaptr1(height, mp);
1704 *start = first;
1705 if (mp_eq_to_hgt(mp, start_list, height)) {
1706 bool keep_start = height < start_aligned;
1707 *start = first + start_list[height] + keep_start;
1708 }
1709 *end = (__be64 *)(bh->b_data + bh->b_size);
1710 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1711 bool keep_end = height < end_aligned;
1712 *end = first + end_list[height] + keep_end;
1713 }
1714}
1715
1716static inline bool walk_done(struct gfs2_sbd *sdp,
1717 struct metapath *mp, int height,
1718 __u16 *end_list, unsigned int end_aligned)
1719{
1720 __u16 end;
1721
1722 if (end_list) {
1723 bool keep_end = height < end_aligned;
1724 if (!mp_eq_to_hgt(mp, end_list, height))
1725 return false;
1726 end = end_list[height] + keep_end;
1727 } else
1728 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1729 return mp->mp_list[height] >= end;
1730}
1731
1732/**
1733 * punch_hole - deallocate blocks in a file
1734 * @ip: inode to truncate
1735 * @offset: the start of the hole
1736 * @length: the size of the hole (or 0 for truncate)
1737 *
1738 * Punch a hole into a file or truncate a file at a given position. This
1739 * function operates in whole blocks (@offset and @length are rounded
1740 * accordingly); partially filled blocks must be cleared otherwise.
1741 *
1742 * This function works from the bottom up, and from the right to the left. In
1743 * other words, it strips off the highest layer (data) before stripping any of
1744 * the metadata. Doing it this way is best in case the operation is interrupted
1745 * by power failure, etc. The dinode is rewritten in every transaction to
1746 * guarantee integrity.
1747 */
1748static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1749{
1750 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1751 u64 maxsize = sdp->sd_heightsize[ip->i_height];
1752 struct metapath mp = {};
1753 struct buffer_head *dibh, *bh;
1754 struct gfs2_holder rd_gh;
1755 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1756 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1757 __u16 start_list[GFS2_MAX_META_HEIGHT];
1758 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1759 unsigned int start_aligned, uninitialized_var(end_aligned);
1760 unsigned int strip_h = ip->i_height - 1;
1761 u32 btotal = 0;
1762 int ret, state;
1763 int mp_h; /* metapath buffers are read in to this height */
1764 u64 prev_bnr = 0;
1765 __be64 *start, *end;
1766
1767 if (offset >= maxsize) {
1768 /*
1769 * The starting point lies beyond the allocated meta-data;
1770 * there are no blocks do deallocate.
1771 */
1772 return 0;
1773 }
1774
1775 /*
1776 * The start position of the hole is defined by lblock, start_list, and
1777 * start_aligned. The end position of the hole is defined by lend,
1778 * end_list, and end_aligned.
1779 *
1780 * start_aligned and end_aligned define down to which height the start
1781 * and end positions are aligned to the metadata tree (i.e., the
1782 * position is a multiple of the metadata granularity at the height
1783 * above). This determines at which heights additional meta pointers
1784 * needs to be preserved for the remaining data.
1785 */
1786
1787 if (length) {
1788 u64 end_offset = offset + length;
1789 u64 lend;
1790
1791 /*
1792 * Clip the end at the maximum file size for the given height:
1793 * that's how far the metadata goes; files bigger than that
1794 * will have additional layers of indirection.
1795 */
1796 if (end_offset > maxsize)
1797 end_offset = maxsize;
1798 lend = end_offset >> bsize_shift;
1799
1800 if (lblock >= lend)
1801 return 0;
1802
1803 find_metapath(sdp, lend, &mp, ip->i_height);
1804 end_list = __end_list;
1805 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1806
1807 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1808 if (end_list[mp_h])
1809 break;
1810 }
1811 end_aligned = mp_h;
1812 }
1813
1814 find_metapath(sdp, lblock, &mp, ip->i_height);
1815 memcpy(start_list, mp.mp_list, sizeof(start_list));
1816
1817 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1818 if (start_list[mp_h])
1819 break;
1820 }
1821 start_aligned = mp_h;
1822
1823 ret = gfs2_meta_inode_buffer(ip, &dibh);
1824 if (ret)
1825 return ret;
1826
1827 mp.mp_bh[0] = dibh;
1828 ret = lookup_metapath(ip, &mp);
1829 if (ret)
1830 goto out_metapath;
1831
1832 /* issue read-ahead on metadata */
1833 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1834 metapointer_range(&mp, mp_h, start_list, start_aligned,
1835 end_list, end_aligned, &start, &end);
1836 gfs2_metapath_ra(ip->i_gl, start, end);
1837 }
1838
1839 if (mp.mp_aheight == ip->i_height)
1840 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1841 else
1842 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1843
1844 ret = gfs2_rindex_update(sdp);
1845 if (ret)
1846 goto out_metapath;
1847
1848 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1849 if (ret)
1850 goto out_metapath;
1851 gfs2_holder_mark_uninitialized(&rd_gh);
1852
1853 mp_h = strip_h;
1854
1855 while (state != DEALLOC_DONE) {
1856 switch (state) {
1857 /* Truncate a full metapath at the given strip height.
1858 * Note that strip_h == mp_h in order to be in this state. */
1859 case DEALLOC_MP_FULL:
1860 bh = mp.mp_bh[mp_h];
1861 gfs2_assert_withdraw(sdp, bh);
1862 if (gfs2_assert_withdraw(sdp,
1863 prev_bnr != bh->b_blocknr)) {
1864 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1865 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1866 sdp->sd_fsname,
1867 (unsigned long long)ip->i_no_addr,
1868 prev_bnr, ip->i_height, strip_h, mp_h);
1869 }
1870 prev_bnr = bh->b_blocknr;
1871
1872 if (gfs2_metatype_check(sdp, bh,
1873 (mp_h ? GFS2_METATYPE_IN :
1874 GFS2_METATYPE_DI))) {
1875 ret = -EIO;
1876 goto out;
1877 }
1878
1879 /*
1880 * Below, passing end_aligned as 0 gives us the
1881 * metapointer range excluding the end point: the end
1882 * point is the first metapath we must not deallocate!
1883 */
1884
1885 metapointer_range(&mp, mp_h, start_list, start_aligned,
1886 end_list, 0 /* end_aligned */,
1887 &start, &end);
1888 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1889 start, end,
1890 mp_h != ip->i_height - 1,
1891 &btotal);
1892
1893 /* If we hit an error or just swept dinode buffer,
1894 just exit. */
1895 if (ret || !mp_h) {
1896 state = DEALLOC_DONE;
1897 break;
1898 }
1899 state = DEALLOC_MP_LOWER;
1900 break;
1901
1902 /* lower the metapath strip height */
1903 case DEALLOC_MP_LOWER:
1904 /* We're done with the current buffer, so release it,
1905 unless it's the dinode buffer. Then back up to the
1906 previous pointer. */
1907 if (mp_h) {
1908 brelse(mp.mp_bh[mp_h]);
1909 mp.mp_bh[mp_h] = NULL;
1910 }
1911 /* If we can't get any lower in height, we've stripped
1912 off all we can. Next step is to back up and start
1913 stripping the previous level of metadata. */
1914 if (mp_h == 0) {
1915 strip_h--;
1916 memcpy(mp.mp_list, start_list, sizeof(start_list));
1917 mp_h = strip_h;
1918 state = DEALLOC_FILL_MP;
1919 break;
1920 }
1921 mp.mp_list[mp_h] = 0;
1922 mp_h--; /* search one metadata height down */
1923 mp.mp_list[mp_h]++;
1924 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1925 break;
1926 /* Here we've found a part of the metapath that is not
1927 * allocated. We need to search at that height for the
1928 * next non-null pointer. */
1929 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1930 state = DEALLOC_FILL_MP;
1931 mp_h++;
1932 }
1933 /* No more non-null pointers at this height. Back up
1934 to the previous height and try again. */
1935 break; /* loop around in the same state */
1936
1937 /* Fill the metapath with buffers to the given height. */
1938 case DEALLOC_FILL_MP:
1939 /* Fill the buffers out to the current height. */
1940 ret = fillup_metapath(ip, &mp, mp_h);
1941 if (ret < 0)
1942 goto out;
1943
1944 /* On the first pass, issue read-ahead on metadata. */
1945 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1946 unsigned int height = mp.mp_aheight - 1;
1947
1948 /* No read-ahead for data blocks. */
1949 if (mp.mp_aheight - 1 == strip_h)
1950 height--;
1951
1952 for (; height >= mp.mp_aheight - ret; height--) {
1953 metapointer_range(&mp, height,
1954 start_list, start_aligned,
1955 end_list, end_aligned,
1956 &start, &end);
1957 gfs2_metapath_ra(ip->i_gl, start, end);
1958 }
1959 }
1960
1961 /* If buffers found for the entire strip height */
1962 if (mp.mp_aheight - 1 == strip_h) {
1963 state = DEALLOC_MP_FULL;
1964 break;
1965 }
1966 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1967 mp_h = mp.mp_aheight - 1;
1968
1969 /* If we find a non-null block pointer, crawl a bit
1970 higher up in the metapath and try again, otherwise
1971 we need to look lower for a new starting point. */
1972 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1973 mp_h++;
1974 else
1975 state = DEALLOC_MP_LOWER;
1976 break;
1977 }
1978 }
1979
1980 if (btotal) {
1981 if (current->journal_info == NULL) {
1982 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1983 RES_QUOTA, 0);
1984 if (ret)
1985 goto out;
1986 down_write(&ip->i_rw_mutex);
1987 }
1988 gfs2_statfs_change(sdp, 0, +btotal, 0);
1989 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1990 ip->i_inode.i_gid);
1991 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1992 gfs2_trans_add_meta(ip->i_gl, dibh);
1993 gfs2_dinode_out(ip, dibh->b_data);
1994 up_write(&ip->i_rw_mutex);
1995 gfs2_trans_end(sdp);
1996 }
1997
1998out:
1999 if (gfs2_holder_initialized(&rd_gh))
2000 gfs2_glock_dq_uninit(&rd_gh);
2001 if (current->journal_info) {
2002 up_write(&ip->i_rw_mutex);
2003 gfs2_trans_end(sdp);
2004 cond_resched();
2005 }
2006 gfs2_quota_unhold(ip);
2007out_metapath:
2008 release_metapath(&mp);
2009 return ret;
2010}
2011
2012static int trunc_end(struct gfs2_inode *ip)
2013{
2014 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2015 struct buffer_head *dibh;
2016 int error;
2017
2018 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2019 if (error)
2020 return error;
2021
2022 down_write(&ip->i_rw_mutex);
2023
2024 error = gfs2_meta_inode_buffer(ip, &dibh);
2025 if (error)
2026 goto out;
2027
2028 if (!i_size_read(&ip->i_inode)) {
2029 ip->i_height = 0;
2030 ip->i_goal = ip->i_no_addr;
2031 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
2032 gfs2_ordered_del_inode(ip);
2033 }
2034 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2035 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
2036
2037 gfs2_trans_add_meta(ip->i_gl, dibh);
2038 gfs2_dinode_out(ip, dibh->b_data);
2039 brelse(dibh);
2040
2041out:
2042 up_write(&ip->i_rw_mutex);
2043 gfs2_trans_end(sdp);
2044 return error;
2045}
2046
2047/**
2048 * do_shrink - make a file smaller
2049 * @inode: the inode
2050 * @newsize: the size to make the file
2051 *
2052 * Called with an exclusive lock on @inode. The @size must
2053 * be equal to or smaller than the current inode size.
2054 *
2055 * Returns: errno
2056 */
2057
2058static int do_shrink(struct inode *inode, u64 newsize)
2059{
2060 struct gfs2_inode *ip = GFS2_I(inode);
2061 int error;
2062
2063 error = trunc_start(inode, newsize);
2064 if (error < 0)
2065 return error;
2066 if (gfs2_is_stuffed(ip))
2067 return 0;
2068
2069 error = punch_hole(ip, newsize, 0);
2070 if (error == 0)
2071 error = trunc_end(ip);
2072
2073 return error;
2074}
2075
2076void gfs2_trim_blocks(struct inode *inode)
2077{
2078 int ret;
2079
2080 ret = do_shrink(inode, inode->i_size);
2081 WARN_ON(ret != 0);
2082}
2083
2084/**
2085 * do_grow - Touch and update inode size
2086 * @inode: The inode
2087 * @size: The new size
2088 *
2089 * This function updates the timestamps on the inode and
2090 * may also increase the size of the inode. This function
2091 * must not be called with @size any smaller than the current
2092 * inode size.
2093 *
2094 * Although it is not strictly required to unstuff files here,
2095 * earlier versions of GFS2 have a bug in the stuffed file reading
2096 * code which will result in a buffer overrun if the size is larger
2097 * than the max stuffed file size. In order to prevent this from
2098 * occurring, such files are unstuffed, but in other cases we can
2099 * just update the inode size directly.
2100 *
2101 * Returns: 0 on success, or -ve on error
2102 */
2103
2104static int do_grow(struct inode *inode, u64 size)
2105{
2106 struct gfs2_inode *ip = GFS2_I(inode);
2107 struct gfs2_sbd *sdp = GFS2_SB(inode);
2108 struct gfs2_alloc_parms ap = { .target = 1, };
2109 struct buffer_head *dibh;
2110 int error;
2111 int unstuff = 0;
2112
2113 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2114 error = gfs2_quota_lock_check(ip, &ap);
2115 if (error)
2116 return error;
2117
2118 error = gfs2_inplace_reserve(ip, &ap);
2119 if (error)
2120 goto do_grow_qunlock;
2121 unstuff = 1;
2122 }
2123
2124 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2125 (unstuff &&
2126 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
2127 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2128 0 : RES_QUOTA), 0);
2129 if (error)
2130 goto do_grow_release;
2131
2132 if (unstuff) {
2133 error = gfs2_unstuff_dinode(ip, NULL);
2134 if (error)
2135 goto do_end_trans;
2136 }
2137
2138 error = gfs2_meta_inode_buffer(ip, &dibh);
2139 if (error)
2140 goto do_end_trans;
2141
2142 i_size_write(inode, size);
2143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2144 gfs2_trans_add_meta(ip->i_gl, dibh);
2145 gfs2_dinode_out(ip, dibh->b_data);
2146 brelse(dibh);
2147
2148do_end_trans:
2149 gfs2_trans_end(sdp);
2150do_grow_release:
2151 if (unstuff) {
2152 gfs2_inplace_release(ip);
2153do_grow_qunlock:
2154 gfs2_quota_unlock(ip);
2155 }
2156 return error;
2157}
2158
2159/**
2160 * gfs2_setattr_size - make a file a given size
2161 * @inode: the inode
2162 * @newsize: the size to make the file
2163 *
2164 * The file size can grow, shrink, or stay the same size. This
2165 * is called holding i_rwsem and an exclusive glock on the inode
2166 * in question.
2167 *
2168 * Returns: errno
2169 */
2170
2171int gfs2_setattr_size(struct inode *inode, u64 newsize)
2172{
2173 struct gfs2_inode *ip = GFS2_I(inode);
2174 int ret;
2175
2176 BUG_ON(!S_ISREG(inode->i_mode));
2177
2178 ret = inode_newsize_ok(inode, newsize);
2179 if (ret)
2180 return ret;
2181
2182 inode_dio_wait(inode);
2183
2184 ret = gfs2_rsqa_alloc(ip);
2185 if (ret)
2186 goto out;
2187
2188 if (newsize >= inode->i_size) {
2189 ret = do_grow(inode, newsize);
2190 goto out;
2191 }
2192
2193 ret = do_shrink(inode, newsize);
2194out:
2195 gfs2_rsqa_delete(ip, NULL);
2196 return ret;
2197}
2198
2199int gfs2_truncatei_resume(struct gfs2_inode *ip)
2200{
2201 int error;
2202 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
2203 if (!error)
2204 error = trunc_end(ip);
2205 return error;
2206}
2207
2208int gfs2_file_dealloc(struct gfs2_inode *ip)
2209{
2210 return punch_hole(ip, 0, 0);
2211}
2212
2213/**
2214 * gfs2_free_journal_extents - Free cached journal bmap info
2215 * @jd: The journal
2216 *
2217 */
2218
2219void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2220{
2221 struct gfs2_journal_extent *jext;
2222
2223 while(!list_empty(&jd->extent_list)) {
2224 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2225 list_del(&jext->list);
2226 kfree(jext);
2227 }
2228}
2229
2230/**
2231 * gfs2_add_jextent - Add or merge a new extent to extent cache
2232 * @jd: The journal descriptor
2233 * @lblock: The logical block at start of new extent
2234 * @dblock: The physical block at start of new extent
2235 * @blocks: Size of extent in fs blocks
2236 *
2237 * Returns: 0 on success or -ENOMEM
2238 */
2239
2240static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2241{
2242 struct gfs2_journal_extent *jext;
2243
2244 if (!list_empty(&jd->extent_list)) {
2245 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2246 if ((jext->dblock + jext->blocks) == dblock) {
2247 jext->blocks += blocks;
2248 return 0;
2249 }
2250 }
2251
2252 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2253 if (jext == NULL)
2254 return -ENOMEM;
2255 jext->dblock = dblock;
2256 jext->lblock = lblock;
2257 jext->blocks = blocks;
2258 list_add_tail(&jext->list, &jd->extent_list);
2259 jd->nr_extents++;
2260 return 0;
2261}
2262
2263/**
2264 * gfs2_map_journal_extents - Cache journal bmap info
2265 * @sdp: The super block
2266 * @jd: The journal to map
2267 *
2268 * Create a reusable "extent" mapping from all logical
2269 * blocks to all physical blocks for the given journal. This will save
2270 * us time when writing journal blocks. Most journals will have only one
2271 * extent that maps all their logical blocks. That's because gfs2.mkfs
2272 * arranges the journal blocks sequentially to maximize performance.
2273 * So the extent would map the first block for the entire file length.
2274 * However, gfs2_jadd can happen while file activity is happening, so
2275 * those journals may not be sequential. Less likely is the case where
2276 * the users created their own journals by mounting the metafs and
2277 * laying it out. But it's still possible. These journals might have
2278 * several extents.
2279 *
2280 * Returns: 0 on success, or error on failure
2281 */
2282
2283int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2284{
2285 u64 lblock = 0;
2286 u64 lblock_stop;
2287 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2288 struct buffer_head bh;
2289 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2290 u64 size;
2291 int rc;
2292
2293 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2294 size = (lblock_stop - lblock) << shift;
2295 jd->nr_extents = 0;
2296 WARN_ON(!list_empty(&jd->extent_list));
2297
2298 do {
2299 bh.b_state = 0;
2300 bh.b_blocknr = 0;
2301 bh.b_size = size;
2302 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2303 if (rc || !buffer_mapped(&bh))
2304 goto fail;
2305 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2306 if (rc)
2307 goto fail;
2308 size -= bh.b_size;
2309 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2310 } while(size > 0);
2311
2312 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
2313 jd->nr_extents);
2314 return 0;
2315
2316fail:
2317 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2318 rc, jd->jd_jid,
2319 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2320 jd->nr_extents);
2321 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2322 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2323 bh.b_state, (unsigned long long)bh.b_size);
2324 gfs2_free_journal_extents(jd);
2325 return rc;
2326}
2327
2328/**
2329 * gfs2_write_alloc_required - figure out if a write will require an allocation
2330 * @ip: the file being written to
2331 * @offset: the offset to write to
2332 * @len: the number of bytes being written
2333 *
2334 * Returns: 1 if an alloc is required, 0 otherwise
2335 */
2336
2337int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2338 unsigned int len)
2339{
2340 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2341 struct buffer_head bh;
2342 unsigned int shift;
2343 u64 lblock, lblock_stop, size;
2344 u64 end_of_file;
2345
2346 if (!len)
2347 return 0;
2348
2349 if (gfs2_is_stuffed(ip)) {
2350 if (offset + len > gfs2_max_stuffed_size(ip))
2351 return 1;
2352 return 0;
2353 }
2354
2355 shift = sdp->sd_sb.sb_bsize_shift;
2356 BUG_ON(gfs2_is_dir(ip));
2357 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2358 lblock = offset >> shift;
2359 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2360 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
2361 return 1;
2362
2363 size = (lblock_stop - lblock) << shift;
2364 do {
2365 bh.b_state = 0;
2366 bh.b_size = size;
2367 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2368 if (!buffer_mapped(&bh))
2369 return 1;
2370 size -= bh.b_size;
2371 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2372 } while(size > 0);
2373
2374 return 0;
2375}
2376
2377static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2378{
2379 struct gfs2_inode *ip = GFS2_I(inode);
2380 struct buffer_head *dibh;
2381 int error;
2382
2383 if (offset >= inode->i_size)
2384 return 0;
2385 if (offset + length > inode->i_size)
2386 length = inode->i_size - offset;
2387
2388 error = gfs2_meta_inode_buffer(ip, &dibh);
2389 if (error)
2390 return error;
2391 gfs2_trans_add_meta(ip->i_gl, dibh);
2392 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2393 length);
2394 brelse(dibh);
2395 return 0;
2396}
2397
2398static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2399 loff_t length)
2400{
2401 struct gfs2_sbd *sdp = GFS2_SB(inode);
2402 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2403 int error;
2404
2405 while (length) {
2406 struct gfs2_trans *tr;
2407 loff_t chunk;
2408 unsigned int offs;
2409
2410 chunk = length;
2411 if (chunk > max_chunk)
2412 chunk = max_chunk;
2413
2414 offs = offset & ~PAGE_MASK;
2415 if (offs && chunk > PAGE_SIZE)
2416 chunk = offs + ((chunk - offs) & PAGE_MASK);
2417
2418 truncate_pagecache_range(inode, offset, chunk);
2419 offset += chunk;
2420 length -= chunk;
2421
2422 tr = current->journal_info;
2423 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2424 continue;
2425
2426 gfs2_trans_end(sdp);
2427 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2428 if (error)
2429 return error;
2430 }
2431 return 0;
2432}
2433
2434int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2435{
2436 struct inode *inode = file_inode(file);
2437 struct gfs2_inode *ip = GFS2_I(inode);
2438 struct gfs2_sbd *sdp = GFS2_SB(inode);
2439 int error;
2440
2441 if (gfs2_is_jdata(ip))
2442 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2443 GFS2_JTRUNC_REVOKES);
2444 else
2445 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2446 if (error)
2447 return error;
2448
2449 if (gfs2_is_stuffed(ip)) {
2450 error = stuffed_zero_range(inode, offset, length);
2451 if (error)
2452 goto out;
2453 } else {
2454 unsigned int start_off, end_len, blocksize;
2455
2456 blocksize = i_blocksize(inode);
2457 start_off = offset & (blocksize - 1);
2458 end_len = (offset + length) & (blocksize - 1);
2459 if (start_off) {
2460 unsigned int len = length;
2461 if (length > blocksize - start_off)
2462 len = blocksize - start_off;
2463 error = gfs2_block_zero_range(inode, offset, len);
2464 if (error)
2465 goto out;
2466 if (start_off + length < blocksize)
2467 end_len = 0;
2468 }
2469 if (end_len) {
2470 error = gfs2_block_zero_range(inode,
2471 offset + length - end_len, end_len);
2472 if (error)
2473 goto out;
2474 }
2475 }
2476
2477 if (gfs2_is_jdata(ip)) {
2478 BUG_ON(!current->journal_info);
2479 gfs2_journaled_truncate_range(inode, offset, length);
2480 } else
2481 truncate_pagecache_range(inode, offset, offset + length - 1);
2482
2483 file_update_time(file);
2484 mark_inode_dirty(inode);
2485
2486 if (current->journal_info)
2487 gfs2_trans_end(sdp);
2488
2489 if (!gfs2_is_stuffed(ip))
2490 error = punch_hole(ip, offset, length);
2491
2492out:
2493 if (current->journal_info)
2494 gfs2_trans_end(sdp);
2495 return error;
2496}