1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 #include <linux/spinlock.h>
11 #include <linux/completion.h>
12 #include <linux/buffer_head.h>
13 #include <linux/blkdev.h>
14 #include <linux/gfs2_ondisk.h>
15 #include <linux/crc32.h>
16 #include <linux/iomap.h>
17 
18 #include "gfs2.h"
19 #include "incore.h"
20 #include "bmap.h"
21 #include "glock.h"
22 #include "inode.h"
23 #include "meta_io.h"
24 #include "quota.h"
25 #include "rgrp.h"
26 #include "log.h"
27 #include "super.h"
28 #include "trans.h"
29 #include "dir.h"
30 #include "util.h"
31 #include "aops.h"
32 #include "trace_gfs2.h"
33 
34 /* This doesn't need to be that large as max 64 bit pointers in a 4k
35  * block is 512, so __u16 is fine for that. It saves stack space to
36  * keep it small.
37  */
38 struct metapath {
39 	struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
40 	__u16 mp_list[GFS2_MAX_META_HEIGHT];
41 	int mp_fheight; /* find_metapath height */
42 	int mp_aheight; /* actual height (lookup height) */
43 };
44 
45 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
46 
47 /**
48  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
49  * @ip: the inode
50  * @dibh: the dinode buffer
51  * @block: the block number that was allocated
52  * @page: The (optional) page. This is looked up if @page is NULL
53  *
54  * Returns: errno
55  */
56 
gfs2_unstuffer_page(struct gfs2_inode * ip,struct buffer_head * dibh,u64 block,struct page * page)57 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
58 			       u64 block, struct page *page)
59 {
60 	struct inode *inode = &ip->i_inode;
61 	struct buffer_head *bh;
62 	int release = 0;
63 
64 	if (!page || page->index) {
65 		page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
66 		if (!page)
67 			return -ENOMEM;
68 		release = 1;
69 	}
70 
71 	if (!PageUptodate(page)) {
72 		void *kaddr = kmap(page);
73 		u64 dsize = i_size_read(inode);
74 
75 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
76 		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
77 		kunmap(page);
78 
79 		SetPageUptodate(page);
80 	}
81 
82 	if (!page_has_buffers(page))
83 		create_empty_buffers(page, BIT(inode->i_blkbits),
84 				     BIT(BH_Uptodate));
85 
86 	bh = page_buffers(page);
87 
88 	if (!buffer_mapped(bh))
89 		map_bh(bh, inode->i_sb, block);
90 
91 	set_buffer_uptodate(bh);
92 	if (gfs2_is_jdata(ip))
93 		gfs2_trans_add_data(ip->i_gl, bh);
94 	else {
95 		mark_buffer_dirty(bh);
96 		gfs2_ordered_add_inode(ip);
97 	}
98 
99 	if (release) {
100 		unlock_page(page);
101 		put_page(page);
102 	}
103 
104 	return 0;
105 }
106 
107 /**
108  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
109  * @ip: The GFS2 inode to unstuff
110  * @page: The (optional) page. This is looked up if the @page is NULL
111  *
112  * This routine unstuffs a dinode and returns it to a "normal" state such
113  * that the height can be grown in the traditional way.
114  *
115  * Returns: errno
116  */
117 
gfs2_unstuff_dinode(struct gfs2_inode * ip,struct page * page)118 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
119 {
120 	struct buffer_head *bh, *dibh;
121 	struct gfs2_dinode *di;
122 	u64 block = 0;
123 	int isdir = gfs2_is_dir(ip);
124 	int error;
125 
126 	down_write(&ip->i_rw_mutex);
127 
128 	error = gfs2_meta_inode_buffer(ip, &dibh);
129 	if (error)
130 		goto out;
131 
132 	if (i_size_read(&ip->i_inode)) {
133 		/* Get a free block, fill it with the stuffed data,
134 		   and write it out to disk */
135 
136 		unsigned int n = 1;
137 		error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
138 		if (error)
139 			goto out_brelse;
140 		if (isdir) {
141 			gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
142 			error = gfs2_dir_get_new_buffer(ip, block, &bh);
143 			if (error)
144 				goto out_brelse;
145 			gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
146 					      dibh, sizeof(struct gfs2_dinode));
147 			brelse(bh);
148 		} else {
149 			error = gfs2_unstuffer_page(ip, dibh, block, page);
150 			if (error)
151 				goto out_brelse;
152 		}
153 	}
154 
155 	/*  Set up the pointer to the new block  */
156 
157 	gfs2_trans_add_meta(ip->i_gl, dibh);
158 	di = (struct gfs2_dinode *)dibh->b_data;
159 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
160 
161 	if (i_size_read(&ip->i_inode)) {
162 		*(__be64 *)(di + 1) = cpu_to_be64(block);
163 		gfs2_add_inode_blocks(&ip->i_inode, 1);
164 		di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
165 	}
166 
167 	ip->i_height = 1;
168 	di->di_height = cpu_to_be16(1);
169 
170 out_brelse:
171 	brelse(dibh);
172 out:
173 	up_write(&ip->i_rw_mutex);
174 	return error;
175 }
176 
177 
178 /**
179  * find_metapath - Find path through the metadata tree
180  * @sdp: The superblock
181  * @block: The disk block to look up
182  * @mp: The metapath to return the result in
183  * @height: The pre-calculated height of the metadata tree
184  *
185  *   This routine returns a struct metapath structure that defines a path
186  *   through the metadata of inode "ip" to get to block "block".
187  *
188  *   Example:
189  *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
190  *   filesystem with a blocksize of 4096.
191  *
192  *   find_metapath() would return a struct metapath structure set to:
193  *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
194  *
195  *   That means that in order to get to the block containing the byte at
196  *   offset 101342453, we would load the indirect block pointed to by pointer
197  *   0 in the dinode.  We would then load the indirect block pointed to by
198  *   pointer 48 in that indirect block.  We would then load the data block
199  *   pointed to by pointer 165 in that indirect block.
200  *
201  *             ----------------------------------------
202  *             | Dinode |                             |
203  *             |        |                            4|
204  *             |        |0 1 2 3 4 5                 9|
205  *             |        |                            6|
206  *             ----------------------------------------
207  *                       |
208  *                       |
209  *                       V
210  *             ----------------------------------------
211  *             | Indirect Block                       |
212  *             |                                     5|
213  *             |            4 4 4 4 4 5 5            1|
214  *             |0           5 6 7 8 9 0 1            2|
215  *             ----------------------------------------
216  *                                |
217  *                                |
218  *                                V
219  *             ----------------------------------------
220  *             | Indirect Block                       |
221  *             |                         1 1 1 1 1   5|
222  *             |                         6 6 6 6 6   1|
223  *             |0                        3 4 5 6 7   2|
224  *             ----------------------------------------
225  *                                           |
226  *                                           |
227  *                                           V
228  *             ----------------------------------------
229  *             | Data block containing offset         |
230  *             |            101342453                 |
231  *             |                                      |
232  *             |                                      |
233  *             ----------------------------------------
234  *
235  */
236 
find_metapath(const struct gfs2_sbd * sdp,u64 block,struct metapath * mp,unsigned int height)237 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
238 			  struct metapath *mp, unsigned int height)
239 {
240 	unsigned int i;
241 
242 	mp->mp_fheight = height;
243 	for (i = height; i--;)
244 		mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
245 }
246 
metapath_branch_start(const struct metapath * mp)247 static inline unsigned int metapath_branch_start(const struct metapath *mp)
248 {
249 	if (mp->mp_list[0] == 0)
250 		return 2;
251 	return 1;
252 }
253 
254 /**
255  * metaptr1 - Return the first possible metadata pointer in a metapath buffer
256  * @height: The metadata height (0 = dinode)
257  * @mp: The metapath
258  */
metaptr1(unsigned int height,const struct metapath * mp)259 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
260 {
261 	struct buffer_head *bh = mp->mp_bh[height];
262 	if (height == 0)
263 		return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
264 	return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
265 }
266 
267 /**
268  * metapointer - Return pointer to start of metadata in a buffer
269  * @height: The metadata height (0 = dinode)
270  * @mp: The metapath
271  *
272  * Return a pointer to the block number of the next height of the metadata
273  * tree given a buffer containing the pointer to the current height of the
274  * metadata tree.
275  */
276 
metapointer(unsigned int height,const struct metapath * mp)277 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
278 {
279 	__be64 *p = metaptr1(height, mp);
280 	return p + mp->mp_list[height];
281 }
282 
metaend(unsigned int height,const struct metapath * mp)283 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
284 {
285 	const struct buffer_head *bh = mp->mp_bh[height];
286 	return (const __be64 *)(bh->b_data + bh->b_size);
287 }
288 
clone_metapath(struct metapath * clone,struct metapath * mp)289 static void clone_metapath(struct metapath *clone, struct metapath *mp)
290 {
291 	unsigned int hgt;
292 
293 	*clone = *mp;
294 	for (hgt = 0; hgt < mp->mp_aheight; hgt++)
295 		get_bh(clone->mp_bh[hgt]);
296 }
297 
gfs2_metapath_ra(struct gfs2_glock * gl,__be64 * start,__be64 * end)298 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
299 {
300 	const __be64 *t;
301 
302 	for (t = start; t < end; t++) {
303 		struct buffer_head *rabh;
304 
305 		if (!*t)
306 			continue;
307 
308 		rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
309 		if (trylock_buffer(rabh)) {
310 			if (!buffer_uptodate(rabh)) {
311 				rabh->b_end_io = end_buffer_read_sync;
312 				submit_bh(REQ_OP_READ,
313 					  REQ_RAHEAD | REQ_META | REQ_PRIO,
314 					  rabh);
315 				continue;
316 			}
317 			unlock_buffer(rabh);
318 		}
319 		brelse(rabh);
320 	}
321 }
322 
__fillup_metapath(struct gfs2_inode * ip,struct metapath * mp,unsigned int x,unsigned int h)323 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
324 			     unsigned int x, unsigned int h)
325 {
326 	for (; x < h; x++) {
327 		__be64 *ptr = metapointer(x, mp);
328 		u64 dblock = be64_to_cpu(*ptr);
329 		int ret;
330 
331 		if (!dblock)
332 			break;
333 		ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
334 		if (ret)
335 			return ret;
336 	}
337 	mp->mp_aheight = x + 1;
338 	return 0;
339 }
340 
341 /**
342  * lookup_metapath - Walk the metadata tree to a specific point
343  * @ip: The inode
344  * @mp: The metapath
345  *
346  * Assumes that the inode's buffer has already been looked up and
347  * hooked onto mp->mp_bh[0] and that the metapath has been initialised
348  * by find_metapath().
349  *
350  * If this function encounters part of the tree which has not been
351  * allocated, it returns the current height of the tree at the point
352  * at which it found the unallocated block. Blocks which are found are
353  * added to the mp->mp_bh[] list.
354  *
355  * Returns: error
356  */
357 
lookup_metapath(struct gfs2_inode * ip,struct metapath * mp)358 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
359 {
360 	return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
361 }
362 
363 /**
364  * fillup_metapath - fill up buffers for the metadata path to a specific height
365  * @ip: The inode
366  * @mp: The metapath
367  * @h: The height to which it should be mapped
368  *
369  * Similar to lookup_metapath, but does lookups for a range of heights
370  *
371  * Returns: error or the number of buffers filled
372  */
373 
fillup_metapath(struct gfs2_inode * ip,struct metapath * mp,int h)374 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
375 {
376 	unsigned int x = 0;
377 	int ret;
378 
379 	if (h) {
380 		/* find the first buffer we need to look up. */
381 		for (x = h - 1; x > 0; x--) {
382 			if (mp->mp_bh[x])
383 				break;
384 		}
385 	}
386 	ret = __fillup_metapath(ip, mp, x, h);
387 	if (ret)
388 		return ret;
389 	return mp->mp_aheight - x - 1;
390 }
391 
metapath_to_block(struct gfs2_sbd * sdp,struct metapath * mp)392 static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
393 {
394 	sector_t factor = 1, block = 0;
395 	int hgt;
396 
397 	for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
398 		if (hgt < mp->mp_aheight)
399 			block += mp->mp_list[hgt] * factor;
400 		factor *= sdp->sd_inptrs;
401 	}
402 	return block;
403 }
404 
release_metapath(struct metapath * mp)405 static void release_metapath(struct metapath *mp)
406 {
407 	int i;
408 
409 	for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
410 		if (mp->mp_bh[i] == NULL)
411 			break;
412 		brelse(mp->mp_bh[i]);
413 		mp->mp_bh[i] = NULL;
414 	}
415 }
416 
417 /**
418  * gfs2_extent_length - Returns length of an extent of blocks
419  * @bh: The metadata block
420  * @ptr: Current position in @bh
421  * @limit: Max extent length to return
422  * @eob: Set to 1 if we hit "end of block"
423  *
424  * Returns: The length of the extent (minimum of one block)
425  */
426 
gfs2_extent_length(struct buffer_head * bh,__be64 * ptr,size_t limit,int * eob)427 static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
428 {
429 	const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
430 	const __be64 *first = ptr;
431 	u64 d = be64_to_cpu(*ptr);
432 
433 	*eob = 0;
434 	do {
435 		ptr++;
436 		if (ptr >= end)
437 			break;
438 		d++;
439 	} while(be64_to_cpu(*ptr) == d);
440 	if (ptr >= end)
441 		*eob = 1;
442 	return ptr - first;
443 }
444 
445 enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
446 
447 /*
448  * gfs2_metadata_walker - walk an indirect block
449  * @mp: Metapath to indirect block
450  * @ptrs: Number of pointers to look at
451  *
452  * When returning WALK_FOLLOW, the walker must update @mp to point at the right
453  * indirect block to follow.
454  */
455 typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
456 						   unsigned int ptrs);
457 
458 /*
459  * gfs2_walk_metadata - walk a tree of indirect blocks
460  * @inode: The inode
461  * @mp: Starting point of walk
462  * @max_len: Maximum number of blocks to walk
463  * @walker: Called during the walk
464  *
465  * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
466  * past the end of metadata, and a negative error code otherwise.
467  */
468 
gfs2_walk_metadata(struct inode * inode,struct metapath * mp,u64 max_len,gfs2_metadata_walker walker)469 static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
470 		u64 max_len, gfs2_metadata_walker walker)
471 {
472 	struct gfs2_inode *ip = GFS2_I(inode);
473 	struct gfs2_sbd *sdp = GFS2_SB(inode);
474 	u64 factor = 1;
475 	unsigned int hgt;
476 	int ret;
477 
478 	/*
479 	 * The walk starts in the lowest allocated indirect block, which may be
480 	 * before the position indicated by @mp.  Adjust @max_len accordingly
481 	 * to avoid a short walk.
482 	 */
483 	for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
484 		max_len += mp->mp_list[hgt] * factor;
485 		mp->mp_list[hgt] = 0;
486 		factor *= sdp->sd_inptrs;
487 	}
488 
489 	for (;;) {
490 		u16 start = mp->mp_list[hgt];
491 		enum walker_status status;
492 		unsigned int ptrs;
493 		u64 len;
494 
495 		/* Walk indirect block. */
496 		ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
497 		len = ptrs * factor;
498 		if (len > max_len)
499 			ptrs = DIV_ROUND_UP_ULL(max_len, factor);
500 		status = walker(mp, ptrs);
501 		switch (status) {
502 		case WALK_STOP:
503 			return 1;
504 		case WALK_FOLLOW:
505 			BUG_ON(mp->mp_aheight == mp->mp_fheight);
506 			ptrs = mp->mp_list[hgt] - start;
507 			len = ptrs * factor;
508 			break;
509 		case WALK_CONTINUE:
510 			break;
511 		}
512 		if (len >= max_len)
513 			break;
514 		max_len -= len;
515 		if (status == WALK_FOLLOW)
516 			goto fill_up_metapath;
517 
518 lower_metapath:
519 		/* Decrease height of metapath. */
520 		brelse(mp->mp_bh[hgt]);
521 		mp->mp_bh[hgt] = NULL;
522 		mp->mp_list[hgt] = 0;
523 		if (!hgt)
524 			break;
525 		hgt--;
526 		factor *= sdp->sd_inptrs;
527 
528 		/* Advance in metadata tree. */
529 		(mp->mp_list[hgt])++;
530 		if (hgt) {
531 			if (mp->mp_list[hgt] >= sdp->sd_inptrs)
532 				goto lower_metapath;
533 		} else {
534 			if (mp->mp_list[hgt] >= sdp->sd_diptrs)
535 				break;
536 		}
537 
538 fill_up_metapath:
539 		/* Increase height of metapath. */
540 		ret = fillup_metapath(ip, mp, ip->i_height - 1);
541 		if (ret < 0)
542 			return ret;
543 		hgt += ret;
544 		for (; ret; ret--)
545 			do_div(factor, sdp->sd_inptrs);
546 		mp->mp_aheight = hgt + 1;
547 	}
548 	return 0;
549 }
550 
gfs2_hole_walker(struct metapath * mp,unsigned int ptrs)551 static enum walker_status gfs2_hole_walker(struct metapath *mp,
552 					   unsigned int ptrs)
553 {
554 	const __be64 *start, *ptr, *end;
555 	unsigned int hgt;
556 
557 	hgt = mp->mp_aheight - 1;
558 	start = metapointer(hgt, mp);
559 	end = start + ptrs;
560 
561 	for (ptr = start; ptr < end; ptr++) {
562 		if (*ptr) {
563 			mp->mp_list[hgt] += ptr - start;
564 			if (mp->mp_aheight == mp->mp_fheight)
565 				return WALK_STOP;
566 			return WALK_FOLLOW;
567 		}
568 	}
569 	return WALK_CONTINUE;
570 }
571 
572 /**
573  * gfs2_hole_size - figure out the size of a hole
574  * @inode: The inode
575  * @lblock: The logical starting block number
576  * @len: How far to look (in blocks)
577  * @mp: The metapath at lblock
578  * @iomap: The iomap to store the hole size in
579  *
580  * This function modifies @mp.
581  *
582  * Returns: errno on error
583  */
gfs2_hole_size(struct inode * inode,sector_t lblock,u64 len,struct metapath * mp,struct iomap * iomap)584 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
585 			  struct metapath *mp, struct iomap *iomap)
586 {
587 	struct metapath clone;
588 	u64 hole_size;
589 	int ret;
590 
591 	clone_metapath(&clone, mp);
592 	ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
593 	if (ret < 0)
594 		goto out;
595 
596 	if (ret == 1)
597 		hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
598 	else
599 		hole_size = len;
600 	iomap->length = hole_size << inode->i_blkbits;
601 	ret = 0;
602 
603 out:
604 	release_metapath(&clone);
605 	return ret;
606 }
607 
gfs2_indirect_init(struct metapath * mp,struct gfs2_glock * gl,unsigned int i,unsigned offset,u64 bn)608 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
609 					 struct gfs2_glock *gl, unsigned int i,
610 					 unsigned offset, u64 bn)
611 {
612 	__be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
613 		       ((i > 1) ? sizeof(struct gfs2_meta_header) :
614 				 sizeof(struct gfs2_dinode)));
615 	BUG_ON(i < 1);
616 	BUG_ON(mp->mp_bh[i] != NULL);
617 	mp->mp_bh[i] = gfs2_meta_new(gl, bn);
618 	gfs2_trans_add_meta(gl, mp->mp_bh[i]);
619 	gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
620 	gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
621 	ptr += offset;
622 	*ptr = cpu_to_be64(bn);
623 	return ptr;
624 }
625 
626 enum alloc_state {
627 	ALLOC_DATA = 0,
628 	ALLOC_GROW_DEPTH = 1,
629 	ALLOC_GROW_HEIGHT = 2,
630 	/* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
631 };
632 
633 /**
634  * gfs2_iomap_alloc - Build a metadata tree of the requested height
635  * @inode: The GFS2 inode
636  * @iomap: The iomap structure
637  * @flags: iomap flags
638  * @mp: The metapath, with proper height information calculated
639  *
640  * In this routine we may have to alloc:
641  *   i) Indirect blocks to grow the metadata tree height
642  *  ii) Indirect blocks to fill in lower part of the metadata tree
643  * iii) Data blocks
644  *
645  * This function is called after gfs2_iomap_get, which works out the
646  * total number of blocks which we need via gfs2_alloc_size.
647  *
648  * We then do the actual allocation asking for an extent at a time (if
649  * enough contiguous free blocks are available, there will only be one
650  * allocation request per call) and uses the state machine to initialise
651  * the blocks in order.
652  *
653  * Right now, this function will allocate at most one indirect block
654  * worth of data -- with a default block size of 4K, that's slightly
655  * less than 2M.  If this limitation is ever removed to allow huge
656  * allocations, we would probably still want to limit the iomap size we
657  * return to avoid stalling other tasks during huge writes; the next
658  * iomap iteration would then find the blocks already allocated.
659  *
660  * Returns: errno on error
661  */
662 
gfs2_iomap_alloc(struct inode * inode,struct iomap * iomap,unsigned flags,struct metapath * mp)663 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
664 			    unsigned flags, struct metapath *mp)
665 {
666 	struct gfs2_inode *ip = GFS2_I(inode);
667 	struct gfs2_sbd *sdp = GFS2_SB(inode);
668 	struct buffer_head *dibh = mp->mp_bh[0];
669 	u64 bn;
670 	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
671 	size_t dblks = iomap->length >> inode->i_blkbits;
672 	const unsigned end_of_metadata = mp->mp_fheight - 1;
673 	int ret;
674 	enum alloc_state state;
675 	__be64 *ptr;
676 	__be64 zero_bn = 0;
677 
678 	BUG_ON(mp->mp_aheight < 1);
679 	BUG_ON(dibh == NULL);
680 	BUG_ON(dblks < 1);
681 
682 	gfs2_trans_add_meta(ip->i_gl, dibh);
683 
684 	down_write(&ip->i_rw_mutex);
685 
686 	if (mp->mp_fheight == mp->mp_aheight) {
687 		/* Bottom indirect block exists */
688 		state = ALLOC_DATA;
689 	} else {
690 		/* Need to allocate indirect blocks */
691 		if (mp->mp_fheight == ip->i_height) {
692 			/* Writing into existing tree, extend tree down */
693 			iblks = mp->mp_fheight - mp->mp_aheight;
694 			state = ALLOC_GROW_DEPTH;
695 		} else {
696 			/* Building up tree height */
697 			state = ALLOC_GROW_HEIGHT;
698 			iblks = mp->mp_fheight - ip->i_height;
699 			branch_start = metapath_branch_start(mp);
700 			iblks += (mp->mp_fheight - branch_start);
701 		}
702 	}
703 
704 	/* start of the second part of the function (state machine) */
705 
706 	blks = dblks + iblks;
707 	i = mp->mp_aheight;
708 	do {
709 		n = blks - alloced;
710 		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
711 		if (ret)
712 			goto out;
713 		alloced += n;
714 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
715 			gfs2_trans_add_unrevoke(sdp, bn, n);
716 		switch (state) {
717 		/* Growing height of tree */
718 		case ALLOC_GROW_HEIGHT:
719 			if (i == 1) {
720 				ptr = (__be64 *)(dibh->b_data +
721 						 sizeof(struct gfs2_dinode));
722 				zero_bn = *ptr;
723 			}
724 			for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
725 			     i++, n--)
726 				gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
727 			if (i - 1 == mp->mp_fheight - ip->i_height) {
728 				i--;
729 				gfs2_buffer_copy_tail(mp->mp_bh[i],
730 						sizeof(struct gfs2_meta_header),
731 						dibh, sizeof(struct gfs2_dinode));
732 				gfs2_buffer_clear_tail(dibh,
733 						sizeof(struct gfs2_dinode) +
734 						sizeof(__be64));
735 				ptr = (__be64 *)(mp->mp_bh[i]->b_data +
736 					sizeof(struct gfs2_meta_header));
737 				*ptr = zero_bn;
738 				state = ALLOC_GROW_DEPTH;
739 				for(i = branch_start; i < mp->mp_fheight; i++) {
740 					if (mp->mp_bh[i] == NULL)
741 						break;
742 					brelse(mp->mp_bh[i]);
743 					mp->mp_bh[i] = NULL;
744 				}
745 				i = branch_start;
746 			}
747 			if (n == 0)
748 				break;
749 		/* Branching from existing tree */
750 		case ALLOC_GROW_DEPTH:
751 			if (i > 1 && i < mp->mp_fheight)
752 				gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
753 			for (; i < mp->mp_fheight && n > 0; i++, n--)
754 				gfs2_indirect_init(mp, ip->i_gl, i,
755 						   mp->mp_list[i-1], bn++);
756 			if (i == mp->mp_fheight)
757 				state = ALLOC_DATA;
758 			if (n == 0)
759 				break;
760 		/* Tree complete, adding data blocks */
761 		case ALLOC_DATA:
762 			BUG_ON(n > dblks);
763 			BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
764 			gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
765 			dblks = n;
766 			ptr = metapointer(end_of_metadata, mp);
767 			iomap->addr = bn << inode->i_blkbits;
768 			iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
769 			while (n-- > 0)
770 				*ptr++ = cpu_to_be64(bn++);
771 			break;
772 		}
773 	} while (iomap->addr == IOMAP_NULL_ADDR);
774 
775 	iomap->type = IOMAP_MAPPED;
776 	iomap->length = (u64)dblks << inode->i_blkbits;
777 	ip->i_height = mp->mp_fheight;
778 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
779 	gfs2_dinode_out(ip, dibh->b_data);
780 out:
781 	up_write(&ip->i_rw_mutex);
782 	return ret;
783 }
784 
785 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
786 
787 /**
788  * gfs2_alloc_size - Compute the maximum allocation size
789  * @inode: The inode
790  * @mp: The metapath
791  * @size: Requested size in blocks
792  *
793  * Compute the maximum size of the next allocation at @mp.
794  *
795  * Returns: size in blocks
796  */
gfs2_alloc_size(struct inode * inode,struct metapath * mp,u64 size)797 static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
798 {
799 	struct gfs2_inode *ip = GFS2_I(inode);
800 	struct gfs2_sbd *sdp = GFS2_SB(inode);
801 	const __be64 *first, *ptr, *end;
802 
803 	/*
804 	 * For writes to stuffed files, this function is called twice via
805 	 * gfs2_iomap_get, before and after unstuffing. The size we return the
806 	 * first time needs to be large enough to get the reservation and
807 	 * allocation sizes right.  The size we return the second time must
808 	 * be exact or else gfs2_iomap_alloc won't do the right thing.
809 	 */
810 
811 	if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
812 		unsigned int maxsize = mp->mp_fheight > 1 ?
813 			sdp->sd_inptrs : sdp->sd_diptrs;
814 		maxsize -= mp->mp_list[mp->mp_fheight - 1];
815 		if (size > maxsize)
816 			size = maxsize;
817 		return size;
818 	}
819 
820 	first = metapointer(ip->i_height - 1, mp);
821 	end = metaend(ip->i_height - 1, mp);
822 	if (end - first > size)
823 		end = first + size;
824 	for (ptr = first; ptr < end; ptr++) {
825 		if (*ptr)
826 			break;
827 	}
828 	return ptr - first;
829 }
830 
831 /**
832  * gfs2_iomap_get - Map blocks from an inode to disk blocks
833  * @inode: The inode
834  * @pos: Starting position in bytes
835  * @length: Length to map, in bytes
836  * @flags: iomap flags
837  * @iomap: The iomap structure
838  * @mp: The metapath
839  *
840  * Returns: errno
841  */
gfs2_iomap_get(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap,struct metapath * mp)842 static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
843 			  unsigned flags, struct iomap *iomap,
844 			  struct metapath *mp)
845 {
846 	struct gfs2_inode *ip = GFS2_I(inode);
847 	struct gfs2_sbd *sdp = GFS2_SB(inode);
848 	loff_t size = i_size_read(inode);
849 	__be64 *ptr;
850 	sector_t lblock;
851 	sector_t lblock_stop;
852 	int ret;
853 	int eob;
854 	u64 len;
855 	struct buffer_head *dibh = NULL, *bh;
856 	u8 height;
857 
858 	if (!length)
859 		return -EINVAL;
860 
861 	down_read(&ip->i_rw_mutex);
862 
863 	ret = gfs2_meta_inode_buffer(ip, &dibh);
864 	if (ret)
865 		goto unlock;
866 	mp->mp_bh[0] = dibh;
867 
868 	if (gfs2_is_stuffed(ip)) {
869 		if (flags & IOMAP_WRITE) {
870 			loff_t max_size = gfs2_max_stuffed_size(ip);
871 
872 			if (pos + length > max_size)
873 				goto unstuff;
874 			iomap->length = max_size;
875 		} else {
876 			if (pos >= size) {
877 				if (flags & IOMAP_REPORT) {
878 					ret = -ENOENT;
879 					goto unlock;
880 				} else {
881 					iomap->offset = pos;
882 					iomap->length = length;
883 					goto hole_found;
884 				}
885 			}
886 			iomap->length = size;
887 		}
888 		iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
889 			      sizeof(struct gfs2_dinode);
890 		iomap->type = IOMAP_INLINE;
891 		iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
892 		goto out;
893 	}
894 
895 unstuff:
896 	lblock = pos >> inode->i_blkbits;
897 	iomap->offset = lblock << inode->i_blkbits;
898 	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
899 	len = lblock_stop - lblock + 1;
900 	iomap->length = len << inode->i_blkbits;
901 
902 	height = ip->i_height;
903 	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
904 		height++;
905 	find_metapath(sdp, lblock, mp, height);
906 	if (height > ip->i_height || gfs2_is_stuffed(ip))
907 		goto do_alloc;
908 
909 	ret = lookup_metapath(ip, mp);
910 	if (ret)
911 		goto unlock;
912 
913 	if (mp->mp_aheight != ip->i_height)
914 		goto do_alloc;
915 
916 	ptr = metapointer(ip->i_height - 1, mp);
917 	if (*ptr == 0)
918 		goto do_alloc;
919 
920 	bh = mp->mp_bh[ip->i_height - 1];
921 	len = gfs2_extent_length(bh, ptr, len, &eob);
922 
923 	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
924 	iomap->length = len << inode->i_blkbits;
925 	iomap->type = IOMAP_MAPPED;
926 	iomap->flags |= IOMAP_F_MERGED;
927 	if (eob)
928 		iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
929 
930 out:
931 	iomap->bdev = inode->i_sb->s_bdev;
932 unlock:
933 	up_read(&ip->i_rw_mutex);
934 	return ret;
935 
936 do_alloc:
937 	if (flags & IOMAP_REPORT) {
938 		if (pos >= size)
939 			ret = -ENOENT;
940 		else if (height == ip->i_height)
941 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
942 		else
943 			iomap->length = size - iomap->offset;
944 	} else if (flags & IOMAP_WRITE) {
945 		u64 alloc_size;
946 
947 		if (flags & IOMAP_DIRECT)
948 			goto out;  /* (see gfs2_file_direct_write) */
949 
950 		len = gfs2_alloc_size(inode, mp, len);
951 		alloc_size = len << inode->i_blkbits;
952 		if (alloc_size < iomap->length)
953 			iomap->length = alloc_size;
954 	} else {
955 		if (pos < size && height == ip->i_height)
956 			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
957 	}
958 hole_found:
959 	iomap->addr = IOMAP_NULL_ADDR;
960 	iomap->type = IOMAP_HOLE;
961 	goto out;
962 }
963 
gfs2_write_lock(struct inode * inode)964 static int gfs2_write_lock(struct inode *inode)
965 {
966 	struct gfs2_inode *ip = GFS2_I(inode);
967 	struct gfs2_sbd *sdp = GFS2_SB(inode);
968 	int error;
969 
970 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
971 	error = gfs2_glock_nq(&ip->i_gh);
972 	if (error)
973 		goto out_uninit;
974 	if (&ip->i_inode == sdp->sd_rindex) {
975 		struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
976 
977 		error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
978 					   GL_NOCACHE, &m_ip->i_gh);
979 		if (error)
980 			goto out_unlock;
981 	}
982 	return 0;
983 
984 out_unlock:
985 	gfs2_glock_dq(&ip->i_gh);
986 out_uninit:
987 	gfs2_holder_uninit(&ip->i_gh);
988 	return error;
989 }
990 
gfs2_write_unlock(struct inode * inode)991 static void gfs2_write_unlock(struct inode *inode)
992 {
993 	struct gfs2_inode *ip = GFS2_I(inode);
994 	struct gfs2_sbd *sdp = GFS2_SB(inode);
995 
996 	if (&ip->i_inode == sdp->sd_rindex) {
997 		struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
998 
999 		gfs2_glock_dq_uninit(&m_ip->i_gh);
1000 	}
1001 	gfs2_glock_dq_uninit(&ip->i_gh);
1002 }
1003 
gfs2_iomap_journaled_page_done(struct inode * inode,loff_t pos,unsigned copied,struct page * page,struct iomap * iomap)1004 static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
1005 				unsigned copied, struct page *page,
1006 				struct iomap *iomap)
1007 {
1008 	struct gfs2_inode *ip = GFS2_I(inode);
1009 
1010 	gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
1011 }
1012 
gfs2_iomap_begin_write(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap,struct metapath * mp)1013 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1014 				  loff_t length, unsigned flags,
1015 				  struct iomap *iomap,
1016 				  struct metapath *mp)
1017 {
1018 	struct gfs2_inode *ip = GFS2_I(inode);
1019 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1020 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1021 	bool unstuff, alloc_required;
1022 	int ret;
1023 
1024 	ret = gfs2_write_lock(inode);
1025 	if (ret)
1026 		return ret;
1027 
1028 	unstuff = gfs2_is_stuffed(ip) &&
1029 		  pos + length > gfs2_max_stuffed_size(ip);
1030 
1031 	ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
1032 	if (ret)
1033 		goto out_unlock;
1034 
1035 	alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1036 
1037 	if (alloc_required || gfs2_is_jdata(ip))
1038 		gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1039 				       &ind_blocks);
1040 
1041 	if (alloc_required) {
1042 		struct gfs2_alloc_parms ap = {
1043 			.target = data_blocks + ind_blocks
1044 		};
1045 
1046 		ret = gfs2_quota_lock_check(ip, &ap);
1047 		if (ret)
1048 			goto out_unlock;
1049 
1050 		ret = gfs2_inplace_reserve(ip, &ap);
1051 		if (ret)
1052 			goto out_qunlock;
1053 	}
1054 
1055 	rblocks = RES_DINODE + ind_blocks;
1056 	if (gfs2_is_jdata(ip))
1057 		rblocks += data_blocks;
1058 	if (ind_blocks || data_blocks)
1059 		rblocks += RES_STATFS + RES_QUOTA;
1060 	if (inode == sdp->sd_rindex)
1061 		rblocks += 2 * RES_STATFS;
1062 	if (alloc_required)
1063 		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1064 
1065 	ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
1066 	if (ret)
1067 		goto out_trans_fail;
1068 
1069 	if (unstuff) {
1070 		ret = gfs2_unstuff_dinode(ip, NULL);
1071 		if (ret)
1072 			goto out_trans_end;
1073 		release_metapath(mp);
1074 		ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
1075 				     flags, iomap, mp);
1076 		if (ret)
1077 			goto out_trans_end;
1078 	}
1079 
1080 	if (iomap->type == IOMAP_HOLE) {
1081 		ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
1082 		if (ret) {
1083 			gfs2_trans_end(sdp);
1084 			gfs2_inplace_release(ip);
1085 			punch_hole(ip, iomap->offset, iomap->length);
1086 			goto out_qunlock;
1087 		}
1088 	}
1089 	if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
1090 		iomap->page_done = gfs2_iomap_journaled_page_done;
1091 	return 0;
1092 
1093 out_trans_end:
1094 	gfs2_trans_end(sdp);
1095 out_trans_fail:
1096 	if (alloc_required)
1097 		gfs2_inplace_release(ip);
1098 out_qunlock:
1099 	if (alloc_required)
1100 		gfs2_quota_unlock(ip);
1101 out_unlock:
1102 	gfs2_write_unlock(inode);
1103 	return ret;
1104 }
1105 
gfs2_iomap_begin(struct inode * inode,loff_t pos,loff_t length,unsigned flags,struct iomap * iomap)1106 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1107 			    unsigned flags, struct iomap *iomap)
1108 {
1109 	struct gfs2_inode *ip = GFS2_I(inode);
1110 	struct metapath mp = { .mp_aheight = 1, };
1111 	int ret;
1112 
1113 	iomap->flags |= IOMAP_F_BUFFER_HEAD;
1114 
1115 	trace_gfs2_iomap_start(ip, pos, length, flags);
1116 	if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
1117 		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1118 	} else {
1119 		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1120 
1121 		/*
1122 		 * Silently fall back to buffered I/O for stuffed files or if
1123 		 * we've hot a hole (see gfs2_file_direct_write).
1124 		 */
1125 		if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1126 		    iomap->type != IOMAP_MAPPED)
1127 			ret = -ENOTBLK;
1128 	}
1129 	if (!ret) {
1130 		get_bh(mp.mp_bh[0]);
1131 		iomap->private = mp.mp_bh[0];
1132 	}
1133 	release_metapath(&mp);
1134 	trace_gfs2_iomap_end(ip, iomap, ret);
1135 	return ret;
1136 }
1137 
gfs2_iomap_end(struct inode * inode,loff_t pos,loff_t length,ssize_t written,unsigned flags,struct iomap * iomap)1138 static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1139 			  ssize_t written, unsigned flags, struct iomap *iomap)
1140 {
1141 	struct gfs2_inode *ip = GFS2_I(inode);
1142 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1143 	struct gfs2_trans *tr = current->journal_info;
1144 	struct buffer_head *dibh = iomap->private;
1145 
1146 	if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
1147 		goto out;
1148 
1149 	if (iomap->type != IOMAP_INLINE) {
1150 		gfs2_ordered_add_inode(ip);
1151 
1152 		if (tr->tr_num_buf_new)
1153 			__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1154 		else
1155 			gfs2_trans_add_meta(ip->i_gl, dibh);
1156 	}
1157 
1158 	if (inode == sdp->sd_rindex) {
1159 		adjust_fs_space(inode);
1160 		sdp->sd_rindex_uptodate = 0;
1161 	}
1162 
1163 	gfs2_trans_end(sdp);
1164 	gfs2_inplace_release(ip);
1165 
1166 	if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1167 		/* Deallocate blocks that were just allocated. */
1168 		loff_t hstart = round_up(pos + written, i_blocksize(inode));
1169 		loff_t hend = iomap->offset + iomap->length;
1170 
1171 		if (hstart < hend) {
1172 			truncate_pagecache_range(inode, hstart, hend - 1);
1173 			punch_hole(ip, hstart, hend - hstart);
1174 		}
1175 	}
1176 
1177 	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1178 		gfs2_quota_unlock(ip);
1179 	gfs2_write_unlock(inode);
1180 
1181 out:
1182 	if (dibh)
1183 		brelse(dibh);
1184 	return 0;
1185 }
1186 
1187 const struct iomap_ops gfs2_iomap_ops = {
1188 	.iomap_begin = gfs2_iomap_begin,
1189 	.iomap_end = gfs2_iomap_end,
1190 };
1191 
1192 /**
1193  * gfs2_block_map - Map one or more blocks of an inode to a disk block
1194  * @inode: The inode
1195  * @lblock: The logical block number
1196  * @bh_map: The bh to be mapped
1197  * @create: True if its ok to alloc blocks to satify the request
1198  *
1199  * The size of the requested mapping is defined in bh_map->b_size.
1200  *
1201  * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1202  * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
1203  * bh_map->b_size to indicate the size of the mapping when @lblock and
1204  * successive blocks are mapped, up to the requested size.
1205  *
1206  * Sets buffer_boundary() if a read of metadata will be required
1207  * before the next block can be mapped. Sets buffer_new() if new
1208  * blocks were allocated.
1209  *
1210  * Returns: errno
1211  */
1212 
gfs2_block_map(struct inode * inode,sector_t lblock,struct buffer_head * bh_map,int create)1213 int gfs2_block_map(struct inode *inode, sector_t lblock,
1214 		   struct buffer_head *bh_map, int create)
1215 {
1216 	struct gfs2_inode *ip = GFS2_I(inode);
1217 	loff_t pos = (loff_t)lblock << inode->i_blkbits;
1218 	loff_t length = bh_map->b_size;
1219 	struct metapath mp = { .mp_aheight = 1, };
1220 	struct iomap iomap = { };
1221 	int ret;
1222 
1223 	clear_buffer_mapped(bh_map);
1224 	clear_buffer_new(bh_map);
1225 	clear_buffer_boundary(bh_map);
1226 	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1227 
1228 	if (create) {
1229 		ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1230 		if (!ret && iomap.type == IOMAP_HOLE)
1231 			ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
1232 		release_metapath(&mp);
1233 	} else {
1234 		ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1235 		release_metapath(&mp);
1236 	}
1237 	if (ret)
1238 		goto out;
1239 
1240 	if (iomap.length > bh_map->b_size) {
1241 		iomap.length = bh_map->b_size;
1242 		iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
1243 	}
1244 	if (iomap.addr != IOMAP_NULL_ADDR)
1245 		map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1246 	bh_map->b_size = iomap.length;
1247 	if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
1248 		set_buffer_boundary(bh_map);
1249 	if (iomap.flags & IOMAP_F_NEW)
1250 		set_buffer_new(bh_map);
1251 
1252 out:
1253 	trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1254 	return ret;
1255 }
1256 
1257 /*
1258  * Deprecated: do not use in new code
1259  */
gfs2_extent_map(struct inode * inode,u64 lblock,int * new,u64 * dblock,unsigned * extlen)1260 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1261 {
1262 	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
1263 	int ret;
1264 	int create = *new;
1265 
1266 	BUG_ON(!extlen);
1267 	BUG_ON(!dblock);
1268 	BUG_ON(!new);
1269 
1270 	bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
1271 	ret = gfs2_block_map(inode, lblock, &bh, create);
1272 	*extlen = bh.b_size >> inode->i_blkbits;
1273 	*dblock = bh.b_blocknr;
1274 	if (buffer_new(&bh))
1275 		*new = 1;
1276 	else
1277 		*new = 0;
1278 	return ret;
1279 }
1280 
1281 /**
1282  * gfs2_block_zero_range - Deal with zeroing out data
1283  *
1284  * This is partly borrowed from ext3.
1285  */
gfs2_block_zero_range(struct inode * inode,loff_t from,unsigned int length)1286 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1287 				 unsigned int length)
1288 {
1289 	struct address_space *mapping = inode->i_mapping;
1290 	struct gfs2_inode *ip = GFS2_I(inode);
1291 	unsigned long index = from >> PAGE_SHIFT;
1292 	unsigned offset = from & (PAGE_SIZE-1);
1293 	unsigned blocksize, iblock, pos;
1294 	struct buffer_head *bh;
1295 	struct page *page;
1296 	int err;
1297 
1298 	page = find_or_create_page(mapping, index, GFP_NOFS);
1299 	if (!page)
1300 		return 0;
1301 
1302 	blocksize = inode->i_sb->s_blocksize;
1303 	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
1304 
1305 	if (!page_has_buffers(page))
1306 		create_empty_buffers(page, blocksize, 0);
1307 
1308 	/* Find the buffer that contains "offset" */
1309 	bh = page_buffers(page);
1310 	pos = blocksize;
1311 	while (offset >= pos) {
1312 		bh = bh->b_this_page;
1313 		iblock++;
1314 		pos += blocksize;
1315 	}
1316 
1317 	err = 0;
1318 
1319 	if (!buffer_mapped(bh)) {
1320 		gfs2_block_map(inode, iblock, bh, 0);
1321 		/* unmapped? It's a hole - nothing to do */
1322 		if (!buffer_mapped(bh))
1323 			goto unlock;
1324 	}
1325 
1326 	/* Ok, it's mapped. Make sure it's up-to-date */
1327 	if (PageUptodate(page))
1328 		set_buffer_uptodate(bh);
1329 
1330 	if (!buffer_uptodate(bh)) {
1331 		err = -EIO;
1332 		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1333 		wait_on_buffer(bh);
1334 		/* Uhhuh. Read error. Complain and punt. */
1335 		if (!buffer_uptodate(bh))
1336 			goto unlock;
1337 		err = 0;
1338 	}
1339 
1340 	if (gfs2_is_jdata(ip))
1341 		gfs2_trans_add_data(ip->i_gl, bh);
1342 	else
1343 		gfs2_ordered_add_inode(ip);
1344 
1345 	zero_user(page, offset, length);
1346 	mark_buffer_dirty(bh);
1347 unlock:
1348 	unlock_page(page);
1349 	put_page(page);
1350 	return err;
1351 }
1352 
1353 #define GFS2_JTRUNC_REVOKES 8192
1354 
1355 /**
1356  * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1357  * @inode: The inode being truncated
1358  * @oldsize: The original (larger) size
1359  * @newsize: The new smaller size
1360  *
1361  * With jdata files, we have to journal a revoke for each block which is
1362  * truncated. As a result, we need to split this into separate transactions
1363  * if the number of pages being truncated gets too large.
1364  */
1365 
gfs2_journaled_truncate(struct inode * inode,u64 oldsize,u64 newsize)1366 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1367 {
1368 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1369 	u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1370 	u64 chunk;
1371 	int error;
1372 
1373 	while (oldsize != newsize) {
1374 		struct gfs2_trans *tr;
1375 		unsigned int offs;
1376 
1377 		chunk = oldsize - newsize;
1378 		if (chunk > max_chunk)
1379 			chunk = max_chunk;
1380 
1381 		offs = oldsize & ~PAGE_MASK;
1382 		if (offs && chunk > PAGE_SIZE)
1383 			chunk = offs + ((chunk - offs) & PAGE_MASK);
1384 
1385 		truncate_pagecache(inode, oldsize - chunk);
1386 		oldsize -= chunk;
1387 
1388 		tr = current->journal_info;
1389 		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1390 			continue;
1391 
1392 		gfs2_trans_end(sdp);
1393 		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1394 		if (error)
1395 			return error;
1396 	}
1397 
1398 	return 0;
1399 }
1400 
trunc_start(struct inode * inode,u64 newsize)1401 static int trunc_start(struct inode *inode, u64 newsize)
1402 {
1403 	struct gfs2_inode *ip = GFS2_I(inode);
1404 	struct gfs2_sbd *sdp = GFS2_SB(inode);
1405 	struct buffer_head *dibh = NULL;
1406 	int journaled = gfs2_is_jdata(ip);
1407 	u64 oldsize = inode->i_size;
1408 	int error;
1409 
1410 	if (journaled)
1411 		error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1412 	else
1413 		error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1414 	if (error)
1415 		return error;
1416 
1417 	error = gfs2_meta_inode_buffer(ip, &dibh);
1418 	if (error)
1419 		goto out;
1420 
1421 	gfs2_trans_add_meta(ip->i_gl, dibh);
1422 
1423 	if (gfs2_is_stuffed(ip)) {
1424 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1425 	} else {
1426 		unsigned int blocksize = i_blocksize(inode);
1427 		unsigned int offs = newsize & (blocksize - 1);
1428 		if (offs) {
1429 			error = gfs2_block_zero_range(inode, newsize,
1430 						      blocksize - offs);
1431 			if (error)
1432 				goto out;
1433 		}
1434 		ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1435 	}
1436 
1437 	i_size_write(inode, newsize);
1438 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1439 	gfs2_dinode_out(ip, dibh->b_data);
1440 
1441 	if (journaled)
1442 		error = gfs2_journaled_truncate(inode, oldsize, newsize);
1443 	else
1444 		truncate_pagecache(inode, newsize);
1445 
1446 out:
1447 	brelse(dibh);
1448 	if (current->journal_info)
1449 		gfs2_trans_end(sdp);
1450 	return error;
1451 }
1452 
gfs2_iomap_get_alloc(struct inode * inode,loff_t pos,loff_t length,struct iomap * iomap)1453 int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1454 			 struct iomap *iomap)
1455 {
1456 	struct metapath mp = { .mp_aheight = 1, };
1457 	int ret;
1458 
1459 	ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1460 	if (!ret && iomap->type == IOMAP_HOLE)
1461 		ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1462 	release_metapath(&mp);
1463 	return ret;
1464 }
1465 
1466 /**
1467  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1468  * @ip: inode
1469  * @rg_gh: holder of resource group glock
1470  * @bh: buffer head to sweep
1471  * @start: starting point in bh
1472  * @end: end point in bh
1473  * @meta: true if bh points to metadata (rather than data)
1474  * @btotal: place to keep count of total blocks freed
1475  *
1476  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1477  * free, and free them all. However, we do it one rgrp at a time. If this
1478  * block has references to multiple rgrps, we break it into individual
1479  * transactions. This allows other processes to use the rgrps while we're
1480  * focused on a single one, for better concurrency / performance.
1481  * At every transaction boundary, we rewrite the inode into the journal.
1482  * That way the bitmaps are kept consistent with the inode and we can recover
1483  * if we're interrupted by power-outages.
1484  *
1485  * Returns: 0, or return code if an error occurred.
1486  *          *btotal has the total number of blocks freed
1487  */
sweep_bh_for_rgrps(struct gfs2_inode * ip,struct gfs2_holder * rd_gh,struct buffer_head * bh,__be64 * start,__be64 * end,bool meta,u32 * btotal)1488 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1489 			      struct buffer_head *bh, __be64 *start, __be64 *end,
1490 			      bool meta, u32 *btotal)
1491 {
1492 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1493 	struct gfs2_rgrpd *rgd;
1494 	struct gfs2_trans *tr;
1495 	__be64 *p;
1496 	int blks_outside_rgrp;
1497 	u64 bn, bstart, isize_blks;
1498 	s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1499 	int ret = 0;
1500 	bool buf_in_tr = false; /* buffer was added to transaction */
1501 
1502 more_rgrps:
1503 	rgd = NULL;
1504 	if (gfs2_holder_initialized(rd_gh)) {
1505 		rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1506 		gfs2_assert_withdraw(sdp,
1507 			     gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1508 	}
1509 	blks_outside_rgrp = 0;
1510 	bstart = 0;
1511 	blen = 0;
1512 
1513 	for (p = start; p < end; p++) {
1514 		if (!*p)
1515 			continue;
1516 		bn = be64_to_cpu(*p);
1517 
1518 		if (rgd) {
1519 			if (!rgrp_contains_block(rgd, bn)) {
1520 				blks_outside_rgrp++;
1521 				continue;
1522 			}
1523 		} else {
1524 			rgd = gfs2_blk2rgrpd(sdp, bn, true);
1525 			if (unlikely(!rgd)) {
1526 				ret = -EIO;
1527 				goto out;
1528 			}
1529 			ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1530 						 0, rd_gh);
1531 			if (ret)
1532 				goto out;
1533 
1534 			/* Must be done with the rgrp glock held: */
1535 			if (gfs2_rs_active(&ip->i_res) &&
1536 			    rgd == ip->i_res.rs_rbm.rgd)
1537 				gfs2_rs_deltree(&ip->i_res);
1538 		}
1539 
1540 		/* The size of our transactions will be unknown until we
1541 		   actually process all the metadata blocks that relate to
1542 		   the rgrp. So we estimate. We know it can't be more than
1543 		   the dinode's i_blocks and we don't want to exceed the
1544 		   journal flush threshold, sd_log_thresh2. */
1545 		if (current->journal_info == NULL) {
1546 			unsigned int jblocks_rqsted, revokes;
1547 
1548 			jblocks_rqsted = rgd->rd_length + RES_DINODE +
1549 				RES_INDIRECT;
1550 			isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1551 			if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1552 				jblocks_rqsted +=
1553 					atomic_read(&sdp->sd_log_thresh2);
1554 			else
1555 				jblocks_rqsted += isize_blks;
1556 			revokes = jblocks_rqsted;
1557 			if (meta)
1558 				revokes += end - start;
1559 			else if (ip->i_depth)
1560 				revokes += sdp->sd_inptrs;
1561 			ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1562 			if (ret)
1563 				goto out_unlock;
1564 			down_write(&ip->i_rw_mutex);
1565 		}
1566 		/* check if we will exceed the transaction blocks requested */
1567 		tr = current->journal_info;
1568 		if (tr->tr_num_buf_new + RES_STATFS +
1569 		    RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1570 			/* We set blks_outside_rgrp to ensure the loop will
1571 			   be repeated for the same rgrp, but with a new
1572 			   transaction. */
1573 			blks_outside_rgrp++;
1574 			/* This next part is tricky. If the buffer was added
1575 			   to the transaction, we've already set some block
1576 			   pointers to 0, so we better follow through and free
1577 			   them, or we will introduce corruption (so break).
1578 			   This may be impossible, or at least rare, but I
1579 			   decided to cover the case regardless.
1580 
1581 			   If the buffer was not added to the transaction
1582 			   (this call), doing so would exceed our transaction
1583 			   size, so we need to end the transaction and start a
1584 			   new one (so goto). */
1585 
1586 			if (buf_in_tr)
1587 				break;
1588 			goto out_unlock;
1589 		}
1590 
1591 		gfs2_trans_add_meta(ip->i_gl, bh);
1592 		buf_in_tr = true;
1593 		*p = 0;
1594 		if (bstart + blen == bn) {
1595 			blen++;
1596 			continue;
1597 		}
1598 		if (bstart) {
1599 			__gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1600 			(*btotal) += blen;
1601 			gfs2_add_inode_blocks(&ip->i_inode, -blen);
1602 		}
1603 		bstart = bn;
1604 		blen = 1;
1605 	}
1606 	if (bstart) {
1607 		__gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1608 		(*btotal) += blen;
1609 		gfs2_add_inode_blocks(&ip->i_inode, -blen);
1610 	}
1611 out_unlock:
1612 	if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1613 					    outside the rgrp we just processed,
1614 					    do it all over again. */
1615 		if (current->journal_info) {
1616 			struct buffer_head *dibh;
1617 
1618 			ret = gfs2_meta_inode_buffer(ip, &dibh);
1619 			if (ret)
1620 				goto out;
1621 
1622 			/* Every transaction boundary, we rewrite the dinode
1623 			   to keep its di_blocks current in case of failure. */
1624 			ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1625 				current_time(&ip->i_inode);
1626 			gfs2_trans_add_meta(ip->i_gl, dibh);
1627 			gfs2_dinode_out(ip, dibh->b_data);
1628 			brelse(dibh);
1629 			up_write(&ip->i_rw_mutex);
1630 			gfs2_trans_end(sdp);
1631 			buf_in_tr = false;
1632 		}
1633 		gfs2_glock_dq_uninit(rd_gh);
1634 		cond_resched();
1635 		goto more_rgrps;
1636 	}
1637 out:
1638 	return ret;
1639 }
1640 
mp_eq_to_hgt(struct metapath * mp,__u16 * list,unsigned int h)1641 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1642 {
1643 	if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1644 		return false;
1645 	return true;
1646 }
1647 
1648 /**
1649  * find_nonnull_ptr - find a non-null pointer given a metapath and height
1650  * @mp: starting metapath
1651  * @h: desired height to search
1652  *
1653  * Assumes the metapath is valid (with buffers) out to height h.
1654  * Returns: true if a non-null pointer was found in the metapath buffer
1655  *          false if all remaining pointers are NULL in the buffer
1656  */
find_nonnull_ptr(struct gfs2_sbd * sdp,struct metapath * mp,unsigned int h,__u16 * end_list,unsigned int end_aligned)1657 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1658 			     unsigned int h,
1659 			     __u16 *end_list, unsigned int end_aligned)
1660 {
1661 	struct buffer_head *bh = mp->mp_bh[h];
1662 	__be64 *first, *ptr, *end;
1663 
1664 	first = metaptr1(h, mp);
1665 	ptr = first + mp->mp_list[h];
1666 	end = (__be64 *)(bh->b_data + bh->b_size);
1667 	if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1668 		bool keep_end = h < end_aligned;
1669 		end = first + end_list[h] + keep_end;
1670 	}
1671 
1672 	while (ptr < end) {
1673 		if (*ptr) { /* if we have a non-null pointer */
1674 			mp->mp_list[h] = ptr - first;
1675 			h++;
1676 			if (h < GFS2_MAX_META_HEIGHT)
1677 				mp->mp_list[h] = 0;
1678 			return true;
1679 		}
1680 		ptr++;
1681 	}
1682 	return false;
1683 }
1684 
1685 enum dealloc_states {
1686 	DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1687 	DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1688 	DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1689 	DEALLOC_DONE = 3,       /* process complete */
1690 };
1691 
1692 static inline void
metapointer_range(struct metapath * mp,int height,__u16 * start_list,unsigned int start_aligned,__u16 * end_list,unsigned int end_aligned,__be64 ** start,__be64 ** end)1693 metapointer_range(struct metapath *mp, int height,
1694 		  __u16 *start_list, unsigned int start_aligned,
1695 		  __u16 *end_list, unsigned int end_aligned,
1696 		  __be64 **start, __be64 **end)
1697 {
1698 	struct buffer_head *bh = mp->mp_bh[height];
1699 	__be64 *first;
1700 
1701 	first = metaptr1(height, mp);
1702 	*start = first;
1703 	if (mp_eq_to_hgt(mp, start_list, height)) {
1704 		bool keep_start = height < start_aligned;
1705 		*start = first + start_list[height] + keep_start;
1706 	}
1707 	*end = (__be64 *)(bh->b_data + bh->b_size);
1708 	if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1709 		bool keep_end = height < end_aligned;
1710 		*end = first + end_list[height] + keep_end;
1711 	}
1712 }
1713 
walk_done(struct gfs2_sbd * sdp,struct metapath * mp,int height,__u16 * end_list,unsigned int end_aligned)1714 static inline bool walk_done(struct gfs2_sbd *sdp,
1715 			     struct metapath *mp, int height,
1716 			     __u16 *end_list, unsigned int end_aligned)
1717 {
1718 	__u16 end;
1719 
1720 	if (end_list) {
1721 		bool keep_end = height < end_aligned;
1722 		if (!mp_eq_to_hgt(mp, end_list, height))
1723 			return false;
1724 		end = end_list[height] + keep_end;
1725 	} else
1726 		end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1727 	return mp->mp_list[height] >= end;
1728 }
1729 
1730 /**
1731  * punch_hole - deallocate blocks in a file
1732  * @ip: inode to truncate
1733  * @offset: the start of the hole
1734  * @length: the size of the hole (or 0 for truncate)
1735  *
1736  * Punch a hole into a file or truncate a file at a given position.  This
1737  * function operates in whole blocks (@offset and @length are rounded
1738  * accordingly); partially filled blocks must be cleared otherwise.
1739  *
1740  * This function works from the bottom up, and from the right to the left. In
1741  * other words, it strips off the highest layer (data) before stripping any of
1742  * the metadata. Doing it this way is best in case the operation is interrupted
1743  * by power failure, etc.  The dinode is rewritten in every transaction to
1744  * guarantee integrity.
1745  */
punch_hole(struct gfs2_inode * ip,u64 offset,u64 length)1746 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1747 {
1748 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1749 	u64 maxsize = sdp->sd_heightsize[ip->i_height];
1750 	struct metapath mp = {};
1751 	struct buffer_head *dibh, *bh;
1752 	struct gfs2_holder rd_gh;
1753 	unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1754 	u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1755 	__u16 start_list[GFS2_MAX_META_HEIGHT];
1756 	__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1757 	unsigned int start_aligned, end_aligned;
1758 	unsigned int strip_h = ip->i_height - 1;
1759 	u32 btotal = 0;
1760 	int ret, state;
1761 	int mp_h; /* metapath buffers are read in to this height */
1762 	u64 prev_bnr = 0;
1763 	__be64 *start, *end;
1764 
1765 	if (offset >= maxsize) {
1766 		/*
1767 		 * The starting point lies beyond the allocated meta-data;
1768 		 * there are no blocks do deallocate.
1769 		 */
1770 		return 0;
1771 	}
1772 
1773 	/*
1774 	 * The start position of the hole is defined by lblock, start_list, and
1775 	 * start_aligned.  The end position of the hole is defined by lend,
1776 	 * end_list, and end_aligned.
1777 	 *
1778 	 * start_aligned and end_aligned define down to which height the start
1779 	 * and end positions are aligned to the metadata tree (i.e., the
1780 	 * position is a multiple of the metadata granularity at the height
1781 	 * above).  This determines at which heights additional meta pointers
1782 	 * needs to be preserved for the remaining data.
1783 	 */
1784 
1785 	if (length) {
1786 		u64 end_offset = offset + length;
1787 		u64 lend;
1788 
1789 		/*
1790 		 * Clip the end at the maximum file size for the given height:
1791 		 * that's how far the metadata goes; files bigger than that
1792 		 * will have additional layers of indirection.
1793 		 */
1794 		if (end_offset > maxsize)
1795 			end_offset = maxsize;
1796 		lend = end_offset >> bsize_shift;
1797 
1798 		if (lblock >= lend)
1799 			return 0;
1800 
1801 		find_metapath(sdp, lend, &mp, ip->i_height);
1802 		end_list = __end_list;
1803 		memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1804 
1805 		for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1806 			if (end_list[mp_h])
1807 				break;
1808 		}
1809 		end_aligned = mp_h;
1810 	}
1811 
1812 	find_metapath(sdp, lblock, &mp, ip->i_height);
1813 	memcpy(start_list, mp.mp_list, sizeof(start_list));
1814 
1815 	for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1816 		if (start_list[mp_h])
1817 			break;
1818 	}
1819 	start_aligned = mp_h;
1820 
1821 	ret = gfs2_meta_inode_buffer(ip, &dibh);
1822 	if (ret)
1823 		return ret;
1824 
1825 	mp.mp_bh[0] = dibh;
1826 	ret = lookup_metapath(ip, &mp);
1827 	if (ret)
1828 		goto out_metapath;
1829 
1830 	/* issue read-ahead on metadata */
1831 	for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1832 		metapointer_range(&mp, mp_h, start_list, start_aligned,
1833 				  end_list, end_aligned, &start, &end);
1834 		gfs2_metapath_ra(ip->i_gl, start, end);
1835 	}
1836 
1837 	if (mp.mp_aheight == ip->i_height)
1838 		state = DEALLOC_MP_FULL; /* We have a complete metapath */
1839 	else
1840 		state = DEALLOC_FILL_MP; /* deal with partial metapath */
1841 
1842 	ret = gfs2_rindex_update(sdp);
1843 	if (ret)
1844 		goto out_metapath;
1845 
1846 	ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1847 	if (ret)
1848 		goto out_metapath;
1849 	gfs2_holder_mark_uninitialized(&rd_gh);
1850 
1851 	mp_h = strip_h;
1852 
1853 	while (state != DEALLOC_DONE) {
1854 		switch (state) {
1855 		/* Truncate a full metapath at the given strip height.
1856 		 * Note that strip_h == mp_h in order to be in this state. */
1857 		case DEALLOC_MP_FULL:
1858 			bh = mp.mp_bh[mp_h];
1859 			gfs2_assert_withdraw(sdp, bh);
1860 			if (gfs2_assert_withdraw(sdp,
1861 						 prev_bnr != bh->b_blocknr)) {
1862 				printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1863 				       "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1864 				       sdp->sd_fsname,
1865 				       (unsigned long long)ip->i_no_addr,
1866 				       prev_bnr, ip->i_height, strip_h, mp_h);
1867 			}
1868 			prev_bnr = bh->b_blocknr;
1869 
1870 			if (gfs2_metatype_check(sdp, bh,
1871 						(mp_h ? GFS2_METATYPE_IN :
1872 							GFS2_METATYPE_DI))) {
1873 				ret = -EIO;
1874 				goto out;
1875 			}
1876 
1877 			/*
1878 			 * Below, passing end_aligned as 0 gives us the
1879 			 * metapointer range excluding the end point: the end
1880 			 * point is the first metapath we must not deallocate!
1881 			 */
1882 
1883 			metapointer_range(&mp, mp_h, start_list, start_aligned,
1884 					  end_list, 0 /* end_aligned */,
1885 					  &start, &end);
1886 			ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1887 						 start, end,
1888 						 mp_h != ip->i_height - 1,
1889 						 &btotal);
1890 
1891 			/* If we hit an error or just swept dinode buffer,
1892 			   just exit. */
1893 			if (ret || !mp_h) {
1894 				state = DEALLOC_DONE;
1895 				break;
1896 			}
1897 			state = DEALLOC_MP_LOWER;
1898 			break;
1899 
1900 		/* lower the metapath strip height */
1901 		case DEALLOC_MP_LOWER:
1902 			/* We're done with the current buffer, so release it,
1903 			   unless it's the dinode buffer. Then back up to the
1904 			   previous pointer. */
1905 			if (mp_h) {
1906 				brelse(mp.mp_bh[mp_h]);
1907 				mp.mp_bh[mp_h] = NULL;
1908 			}
1909 			/* If we can't get any lower in height, we've stripped
1910 			   off all we can. Next step is to back up and start
1911 			   stripping the previous level of metadata. */
1912 			if (mp_h == 0) {
1913 				strip_h--;
1914 				memcpy(mp.mp_list, start_list, sizeof(start_list));
1915 				mp_h = strip_h;
1916 				state = DEALLOC_FILL_MP;
1917 				break;
1918 			}
1919 			mp.mp_list[mp_h] = 0;
1920 			mp_h--; /* search one metadata height down */
1921 			mp.mp_list[mp_h]++;
1922 			if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1923 				break;
1924 			/* Here we've found a part of the metapath that is not
1925 			 * allocated. We need to search at that height for the
1926 			 * next non-null pointer. */
1927 			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1928 				state = DEALLOC_FILL_MP;
1929 				mp_h++;
1930 			}
1931 			/* No more non-null pointers at this height. Back up
1932 			   to the previous height and try again. */
1933 			break; /* loop around in the same state */
1934 
1935 		/* Fill the metapath with buffers to the given height. */
1936 		case DEALLOC_FILL_MP:
1937 			/* Fill the buffers out to the current height. */
1938 			ret = fillup_metapath(ip, &mp, mp_h);
1939 			if (ret < 0)
1940 				goto out;
1941 
1942 			/* On the first pass, issue read-ahead on metadata. */
1943 			if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1944 				unsigned int height = mp.mp_aheight - 1;
1945 
1946 				/* No read-ahead for data blocks. */
1947 				if (mp.mp_aheight - 1 == strip_h)
1948 					height--;
1949 
1950 				for (; height >= mp.mp_aheight - ret; height--) {
1951 					metapointer_range(&mp, height,
1952 							  start_list, start_aligned,
1953 							  end_list, end_aligned,
1954 							  &start, &end);
1955 					gfs2_metapath_ra(ip->i_gl, start, end);
1956 				}
1957 			}
1958 
1959 			/* If buffers found for the entire strip height */
1960 			if (mp.mp_aheight - 1 == strip_h) {
1961 				state = DEALLOC_MP_FULL;
1962 				break;
1963 			}
1964 			if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1965 				mp_h = mp.mp_aheight - 1;
1966 
1967 			/* If we find a non-null block pointer, crawl a bit
1968 			   higher up in the metapath and try again, otherwise
1969 			   we need to look lower for a new starting point. */
1970 			if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1971 				mp_h++;
1972 			else
1973 				state = DEALLOC_MP_LOWER;
1974 			break;
1975 		}
1976 	}
1977 
1978 	if (btotal) {
1979 		if (current->journal_info == NULL) {
1980 			ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1981 					       RES_QUOTA, 0);
1982 			if (ret)
1983 				goto out;
1984 			down_write(&ip->i_rw_mutex);
1985 		}
1986 		gfs2_statfs_change(sdp, 0, +btotal, 0);
1987 		gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1988 				  ip->i_inode.i_gid);
1989 		ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1990 		gfs2_trans_add_meta(ip->i_gl, dibh);
1991 		gfs2_dinode_out(ip, dibh->b_data);
1992 		up_write(&ip->i_rw_mutex);
1993 		gfs2_trans_end(sdp);
1994 	}
1995 
1996 out:
1997 	if (gfs2_holder_initialized(&rd_gh))
1998 		gfs2_glock_dq_uninit(&rd_gh);
1999 	if (current->journal_info) {
2000 		up_write(&ip->i_rw_mutex);
2001 		gfs2_trans_end(sdp);
2002 		cond_resched();
2003 	}
2004 	gfs2_quota_unhold(ip);
2005 out_metapath:
2006 	release_metapath(&mp);
2007 	return ret;
2008 }
2009 
trunc_end(struct gfs2_inode * ip)2010 static int trunc_end(struct gfs2_inode *ip)
2011 {
2012 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2013 	struct buffer_head *dibh;
2014 	int error;
2015 
2016 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2017 	if (error)
2018 		return error;
2019 
2020 	down_write(&ip->i_rw_mutex);
2021 
2022 	error = gfs2_meta_inode_buffer(ip, &dibh);
2023 	if (error)
2024 		goto out;
2025 
2026 	if (!i_size_read(&ip->i_inode)) {
2027 		ip->i_height = 0;
2028 		ip->i_goal = ip->i_no_addr;
2029 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
2030 		gfs2_ordered_del_inode(ip);
2031 	}
2032 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2033 	ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
2034 
2035 	gfs2_trans_add_meta(ip->i_gl, dibh);
2036 	gfs2_dinode_out(ip, dibh->b_data);
2037 	brelse(dibh);
2038 
2039 out:
2040 	up_write(&ip->i_rw_mutex);
2041 	gfs2_trans_end(sdp);
2042 	return error;
2043 }
2044 
2045 /**
2046  * do_shrink - make a file smaller
2047  * @inode: the inode
2048  * @newsize: the size to make the file
2049  *
2050  * Called with an exclusive lock on @inode. The @size must
2051  * be equal to or smaller than the current inode size.
2052  *
2053  * Returns: errno
2054  */
2055 
do_shrink(struct inode * inode,u64 newsize)2056 static int do_shrink(struct inode *inode, u64 newsize)
2057 {
2058 	struct gfs2_inode *ip = GFS2_I(inode);
2059 	int error;
2060 
2061 	error = trunc_start(inode, newsize);
2062 	if (error < 0)
2063 		return error;
2064 	if (gfs2_is_stuffed(ip))
2065 		return 0;
2066 
2067 	error = punch_hole(ip, newsize, 0);
2068 	if (error == 0)
2069 		error = trunc_end(ip);
2070 
2071 	return error;
2072 }
2073 
gfs2_trim_blocks(struct inode * inode)2074 void gfs2_trim_blocks(struct inode *inode)
2075 {
2076 	int ret;
2077 
2078 	ret = do_shrink(inode, inode->i_size);
2079 	WARN_ON(ret != 0);
2080 }
2081 
2082 /**
2083  * do_grow - Touch and update inode size
2084  * @inode: The inode
2085  * @size: The new size
2086  *
2087  * This function updates the timestamps on the inode and
2088  * may also increase the size of the inode. This function
2089  * must not be called with @size any smaller than the current
2090  * inode size.
2091  *
2092  * Although it is not strictly required to unstuff files here,
2093  * earlier versions of GFS2 have a bug in the stuffed file reading
2094  * code which will result in a buffer overrun if the size is larger
2095  * than the max stuffed file size. In order to prevent this from
2096  * occurring, such files are unstuffed, but in other cases we can
2097  * just update the inode size directly.
2098  *
2099  * Returns: 0 on success, or -ve on error
2100  */
2101 
do_grow(struct inode * inode,u64 size)2102 static int do_grow(struct inode *inode, u64 size)
2103 {
2104 	struct gfs2_inode *ip = GFS2_I(inode);
2105 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2106 	struct gfs2_alloc_parms ap = { .target = 1, };
2107 	struct buffer_head *dibh;
2108 	int error;
2109 	int unstuff = 0;
2110 
2111 	if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
2112 		error = gfs2_quota_lock_check(ip, &ap);
2113 		if (error)
2114 			return error;
2115 
2116 		error = gfs2_inplace_reserve(ip, &ap);
2117 		if (error)
2118 			goto do_grow_qunlock;
2119 		unstuff = 1;
2120 	}
2121 
2122 	error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
2123 				 (unstuff &&
2124 				  gfs2_is_jdata(ip) ? RES_JDATA : 0) +
2125 				 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2126 				  0 : RES_QUOTA), 0);
2127 	if (error)
2128 		goto do_grow_release;
2129 
2130 	if (unstuff) {
2131 		error = gfs2_unstuff_dinode(ip, NULL);
2132 		if (error)
2133 			goto do_end_trans;
2134 	}
2135 
2136 	error = gfs2_meta_inode_buffer(ip, &dibh);
2137 	if (error)
2138 		goto do_end_trans;
2139 
2140 	i_size_write(inode, size);
2141 	ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
2142 	gfs2_trans_add_meta(ip->i_gl, dibh);
2143 	gfs2_dinode_out(ip, dibh->b_data);
2144 	brelse(dibh);
2145 
2146 do_end_trans:
2147 	gfs2_trans_end(sdp);
2148 do_grow_release:
2149 	if (unstuff) {
2150 		gfs2_inplace_release(ip);
2151 do_grow_qunlock:
2152 		gfs2_quota_unlock(ip);
2153 	}
2154 	return error;
2155 }
2156 
2157 /**
2158  * gfs2_setattr_size - make a file a given size
2159  * @inode: the inode
2160  * @newsize: the size to make the file
2161  *
2162  * The file size can grow, shrink, or stay the same size. This
2163  * is called holding i_rwsem and an exclusive glock on the inode
2164  * in question.
2165  *
2166  * Returns: errno
2167  */
2168 
gfs2_setattr_size(struct inode * inode,u64 newsize)2169 int gfs2_setattr_size(struct inode *inode, u64 newsize)
2170 {
2171 	struct gfs2_inode *ip = GFS2_I(inode);
2172 	int ret;
2173 
2174 	BUG_ON(!S_ISREG(inode->i_mode));
2175 
2176 	ret = inode_newsize_ok(inode, newsize);
2177 	if (ret)
2178 		return ret;
2179 
2180 	inode_dio_wait(inode);
2181 
2182 	ret = gfs2_rsqa_alloc(ip);
2183 	if (ret)
2184 		goto out;
2185 
2186 	if (newsize >= inode->i_size) {
2187 		ret = do_grow(inode, newsize);
2188 		goto out;
2189 	}
2190 
2191 	ret = do_shrink(inode, newsize);
2192 out:
2193 	gfs2_rsqa_delete(ip, NULL);
2194 	return ret;
2195 }
2196 
gfs2_truncatei_resume(struct gfs2_inode * ip)2197 int gfs2_truncatei_resume(struct gfs2_inode *ip)
2198 {
2199 	int error;
2200 	error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
2201 	if (!error)
2202 		error = trunc_end(ip);
2203 	return error;
2204 }
2205 
gfs2_file_dealloc(struct gfs2_inode * ip)2206 int gfs2_file_dealloc(struct gfs2_inode *ip)
2207 {
2208 	return punch_hole(ip, 0, 0);
2209 }
2210 
2211 /**
2212  * gfs2_free_journal_extents - Free cached journal bmap info
2213  * @jd: The journal
2214  *
2215  */
2216 
gfs2_free_journal_extents(struct gfs2_jdesc * jd)2217 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2218 {
2219 	struct gfs2_journal_extent *jext;
2220 
2221 	while(!list_empty(&jd->extent_list)) {
2222 		jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2223 		list_del(&jext->list);
2224 		kfree(jext);
2225 	}
2226 }
2227 
2228 /**
2229  * gfs2_add_jextent - Add or merge a new extent to extent cache
2230  * @jd: The journal descriptor
2231  * @lblock: The logical block at start of new extent
2232  * @dblock: The physical block at start of new extent
2233  * @blocks: Size of extent in fs blocks
2234  *
2235  * Returns: 0 on success or -ENOMEM
2236  */
2237 
gfs2_add_jextent(struct gfs2_jdesc * jd,u64 lblock,u64 dblock,u64 blocks)2238 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2239 {
2240 	struct gfs2_journal_extent *jext;
2241 
2242 	if (!list_empty(&jd->extent_list)) {
2243 		jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2244 		if ((jext->dblock + jext->blocks) == dblock) {
2245 			jext->blocks += blocks;
2246 			return 0;
2247 		}
2248 	}
2249 
2250 	jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2251 	if (jext == NULL)
2252 		return -ENOMEM;
2253 	jext->dblock = dblock;
2254 	jext->lblock = lblock;
2255 	jext->blocks = blocks;
2256 	list_add_tail(&jext->list, &jd->extent_list);
2257 	jd->nr_extents++;
2258 	return 0;
2259 }
2260 
2261 /**
2262  * gfs2_map_journal_extents - Cache journal bmap info
2263  * @sdp: The super block
2264  * @jd: The journal to map
2265  *
2266  * Create a reusable "extent" mapping from all logical
2267  * blocks to all physical blocks for the given journal.  This will save
2268  * us time when writing journal blocks.  Most journals will have only one
2269  * extent that maps all their logical blocks.  That's because gfs2.mkfs
2270  * arranges the journal blocks sequentially to maximize performance.
2271  * So the extent would map the first block for the entire file length.
2272  * However, gfs2_jadd can happen while file activity is happening, so
2273  * those journals may not be sequential.  Less likely is the case where
2274  * the users created their own journals by mounting the metafs and
2275  * laying it out.  But it's still possible.  These journals might have
2276  * several extents.
2277  *
2278  * Returns: 0 on success, or error on failure
2279  */
2280 
gfs2_map_journal_extents(struct gfs2_sbd * sdp,struct gfs2_jdesc * jd)2281 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2282 {
2283 	u64 lblock = 0;
2284 	u64 lblock_stop;
2285 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2286 	struct buffer_head bh;
2287 	unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2288 	u64 size;
2289 	int rc;
2290 
2291 	lblock_stop = i_size_read(jd->jd_inode) >> shift;
2292 	size = (lblock_stop - lblock) << shift;
2293 	jd->nr_extents = 0;
2294 	WARN_ON(!list_empty(&jd->extent_list));
2295 
2296 	do {
2297 		bh.b_state = 0;
2298 		bh.b_blocknr = 0;
2299 		bh.b_size = size;
2300 		rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2301 		if (rc || !buffer_mapped(&bh))
2302 			goto fail;
2303 		rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2304 		if (rc)
2305 			goto fail;
2306 		size -= bh.b_size;
2307 		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2308 	} while(size > 0);
2309 
2310 	fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
2311 		jd->nr_extents);
2312 	return 0;
2313 
2314 fail:
2315 	fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2316 		rc, jd->jd_jid,
2317 		(unsigned long long)(i_size_read(jd->jd_inode) - size),
2318 		jd->nr_extents);
2319 	fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2320 		rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2321 		bh.b_state, (unsigned long long)bh.b_size);
2322 	gfs2_free_journal_extents(jd);
2323 	return rc;
2324 }
2325 
2326 /**
2327  * gfs2_write_alloc_required - figure out if a write will require an allocation
2328  * @ip: the file being written to
2329  * @offset: the offset to write to
2330  * @len: the number of bytes being written
2331  *
2332  * Returns: 1 if an alloc is required, 0 otherwise
2333  */
2334 
gfs2_write_alloc_required(struct gfs2_inode * ip,u64 offset,unsigned int len)2335 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
2336 			      unsigned int len)
2337 {
2338 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2339 	struct buffer_head bh;
2340 	unsigned int shift;
2341 	u64 lblock, lblock_stop, size;
2342 	u64 end_of_file;
2343 
2344 	if (!len)
2345 		return 0;
2346 
2347 	if (gfs2_is_stuffed(ip)) {
2348 		if (offset + len > gfs2_max_stuffed_size(ip))
2349 			return 1;
2350 		return 0;
2351 	}
2352 
2353 	shift = sdp->sd_sb.sb_bsize_shift;
2354 	BUG_ON(gfs2_is_dir(ip));
2355 	end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
2356 	lblock = offset >> shift;
2357 	lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2358 	if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
2359 		return 1;
2360 
2361 	size = (lblock_stop - lblock) << shift;
2362 	do {
2363 		bh.b_state = 0;
2364 		bh.b_size = size;
2365 		gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2366 		if (!buffer_mapped(&bh))
2367 			return 1;
2368 		size -= bh.b_size;
2369 		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2370 	} while(size > 0);
2371 
2372 	return 0;
2373 }
2374 
stuffed_zero_range(struct inode * inode,loff_t offset,loff_t length)2375 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2376 {
2377 	struct gfs2_inode *ip = GFS2_I(inode);
2378 	struct buffer_head *dibh;
2379 	int error;
2380 
2381 	if (offset >= inode->i_size)
2382 		return 0;
2383 	if (offset + length > inode->i_size)
2384 		length = inode->i_size - offset;
2385 
2386 	error = gfs2_meta_inode_buffer(ip, &dibh);
2387 	if (error)
2388 		return error;
2389 	gfs2_trans_add_meta(ip->i_gl, dibh);
2390 	memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2391 	       length);
2392 	brelse(dibh);
2393 	return 0;
2394 }
2395 
gfs2_journaled_truncate_range(struct inode * inode,loff_t offset,loff_t length)2396 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2397 					 loff_t length)
2398 {
2399 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2400 	loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2401 	int error;
2402 
2403 	while (length) {
2404 		struct gfs2_trans *tr;
2405 		loff_t chunk;
2406 		unsigned int offs;
2407 
2408 		chunk = length;
2409 		if (chunk > max_chunk)
2410 			chunk = max_chunk;
2411 
2412 		offs = offset & ~PAGE_MASK;
2413 		if (offs && chunk > PAGE_SIZE)
2414 			chunk = offs + ((chunk - offs) & PAGE_MASK);
2415 
2416 		truncate_pagecache_range(inode, offset, chunk);
2417 		offset += chunk;
2418 		length -= chunk;
2419 
2420 		tr = current->journal_info;
2421 		if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2422 			continue;
2423 
2424 		gfs2_trans_end(sdp);
2425 		error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2426 		if (error)
2427 			return error;
2428 	}
2429 	return 0;
2430 }
2431 
__gfs2_punch_hole(struct file * file,loff_t offset,loff_t length)2432 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2433 {
2434 	struct inode *inode = file_inode(file);
2435 	struct gfs2_inode *ip = GFS2_I(inode);
2436 	struct gfs2_sbd *sdp = GFS2_SB(inode);
2437 	int error;
2438 
2439 	if (gfs2_is_jdata(ip))
2440 		error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2441 					 GFS2_JTRUNC_REVOKES);
2442 	else
2443 		error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2444 	if (error)
2445 		return error;
2446 
2447 	if (gfs2_is_stuffed(ip)) {
2448 		error = stuffed_zero_range(inode, offset, length);
2449 		if (error)
2450 			goto out;
2451 	} else {
2452 		unsigned int start_off, end_len, blocksize;
2453 
2454 		blocksize = i_blocksize(inode);
2455 		start_off = offset & (blocksize - 1);
2456 		end_len = (offset + length) & (blocksize - 1);
2457 		if (start_off) {
2458 			unsigned int len = length;
2459 			if (length > blocksize - start_off)
2460 				len = blocksize - start_off;
2461 			error = gfs2_block_zero_range(inode, offset, len);
2462 			if (error)
2463 				goto out;
2464 			if (start_off + length < blocksize)
2465 				end_len = 0;
2466 		}
2467 		if (end_len) {
2468 			error = gfs2_block_zero_range(inode,
2469 				offset + length - end_len, end_len);
2470 			if (error)
2471 				goto out;
2472 		}
2473 	}
2474 
2475 	if (gfs2_is_jdata(ip)) {
2476 		BUG_ON(!current->journal_info);
2477 		gfs2_journaled_truncate_range(inode, offset, length);
2478 	} else
2479 		truncate_pagecache_range(inode, offset, offset + length - 1);
2480 
2481 	file_update_time(file);
2482 	mark_inode_dirty(inode);
2483 
2484 	if (current->journal_info)
2485 		gfs2_trans_end(sdp);
2486 
2487 	if (!gfs2_is_stuffed(ip))
2488 		error = punch_hole(ip, offset, length);
2489 
2490 out:
2491 	if (current->journal_info)
2492 		gfs2_trans_end(sdp);
2493 	return error;
2494 }
2495