1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * btree.c - NILFS B-tree.
4  *
5  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6  *
7  * Written by Koji Sato.
8  */
9 
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/pagevec.h>
14 #include "nilfs.h"
15 #include "page.h"
16 #include "btnode.h"
17 #include "btree.h"
18 #include "alloc.h"
19 #include "dat.h"
20 
21 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22 
nilfs_btree_alloc_path(void)23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24 {
25 	struct nilfs_btree_path *path;
26 	int level = NILFS_BTREE_LEVEL_DATA;
27 
28 	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29 	if (path == NULL)
30 		goto out;
31 
32 	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33 		path[level].bp_bh = NULL;
34 		path[level].bp_sib_bh = NULL;
35 		path[level].bp_index = 0;
36 		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37 		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38 		path[level].bp_op = NULL;
39 	}
40 
41 out:
42 	return path;
43 }
44 
nilfs_btree_free_path(struct nilfs_btree_path * path)45 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46 {
47 	int level = NILFS_BTREE_LEVEL_DATA;
48 
49 	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50 		brelse(path[level].bp_bh);
51 
52 	kmem_cache_free(nilfs_btree_path_cache, path);
53 }
54 
55 /*
56  * B-tree node operations
57  */
nilfs_btree_get_new_block(const struct nilfs_bmap * btree,__u64 ptr,struct buffer_head ** bhp)58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59 				     __u64 ptr, struct buffer_head **bhp)
60 {
61 	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62 	struct address_space *btnc = btnc_inode->i_mapping;
63 	struct buffer_head *bh;
64 
65 	bh = nilfs_btnode_create_block(btnc, ptr);
66 	if (!bh)
67 		return -ENOMEM;
68 
69 	set_buffer_nilfs_volatile(bh);
70 	*bhp = bh;
71 	return 0;
72 }
73 
nilfs_btree_node_get_flags(const struct nilfs_btree_node * node)74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75 {
76 	return node->bn_flags;
77 }
78 
79 static void
nilfs_btree_node_set_flags(struct nilfs_btree_node * node,int flags)80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81 {
82 	node->bn_flags = flags;
83 }
84 
nilfs_btree_node_root(const struct nilfs_btree_node * node)85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86 {
87 	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88 }
89 
nilfs_btree_node_get_level(const struct nilfs_btree_node * node)90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91 {
92 	return node->bn_level;
93 }
94 
95 static void
nilfs_btree_node_set_level(struct nilfs_btree_node * node,int level)96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97 {
98 	node->bn_level = level;
99 }
100 
nilfs_btree_node_get_nchildren(const struct nilfs_btree_node * node)101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102 {
103 	return le16_to_cpu(node->bn_nchildren);
104 }
105 
106 static void
nilfs_btree_node_set_nchildren(struct nilfs_btree_node * node,int nchildren)107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108 {
109 	node->bn_nchildren = cpu_to_le16(nchildren);
110 }
111 
nilfs_btree_node_size(const struct nilfs_bmap * btree)112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113 {
114 	return i_blocksize(btree->b_inode);
115 }
116 
nilfs_btree_nchildren_per_block(const struct nilfs_bmap * btree)117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118 {
119 	return btree->b_nchildren_per_block;
120 }
121 
122 static __le64 *
nilfs_btree_node_dkeys(const struct nilfs_btree_node * node)123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124 {
125 	return (__le64 *)((char *)(node + 1) +
126 			  (nilfs_btree_node_root(node) ?
127 			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128 }
129 
130 static __le64 *
nilfs_btree_node_dptrs(const struct nilfs_btree_node * node,int ncmax)131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132 {
133 	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134 }
135 
136 static __u64
nilfs_btree_node_get_key(const struct nilfs_btree_node * node,int index)137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138 {
139 	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140 }
141 
142 static void
nilfs_btree_node_set_key(struct nilfs_btree_node * node,int index,__u64 key)143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144 {
145 	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146 }
147 
148 static __u64
nilfs_btree_node_get_ptr(const struct nilfs_btree_node * node,int index,int ncmax)149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150 			 int ncmax)
151 {
152 	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153 }
154 
155 static void
nilfs_btree_node_set_ptr(struct nilfs_btree_node * node,int index,__u64 ptr,int ncmax)156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157 			 int ncmax)
158 {
159 	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160 }
161 
nilfs_btree_node_init(struct nilfs_btree_node * node,int flags,int level,int nchildren,int ncmax,const __u64 * keys,const __u64 * ptrs)162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163 				  int level, int nchildren, int ncmax,
164 				  const __u64 *keys, const __u64 *ptrs)
165 {
166 	__le64 *dkeys;
167 	__le64 *dptrs;
168 	int i;
169 
170 	nilfs_btree_node_set_flags(node, flags);
171 	nilfs_btree_node_set_level(node, level);
172 	nilfs_btree_node_set_nchildren(node, nchildren);
173 
174 	dkeys = nilfs_btree_node_dkeys(node);
175 	dptrs = nilfs_btree_node_dptrs(node, ncmax);
176 	for (i = 0; i < nchildren; i++) {
177 		dkeys[i] = cpu_to_le64(keys[i]);
178 		dptrs[i] = cpu_to_le64(ptrs[i]);
179 	}
180 }
181 
182 /* Assume the buffer heads corresponding to left and right are locked. */
nilfs_btree_node_move_left(struct nilfs_btree_node * left,struct nilfs_btree_node * right,int n,int lncmax,int rncmax)183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184 				       struct nilfs_btree_node *right,
185 				       int n, int lncmax, int rncmax)
186 {
187 	__le64 *ldkeys, *rdkeys;
188 	__le64 *ldptrs, *rdptrs;
189 	int lnchildren, rnchildren;
190 
191 	ldkeys = nilfs_btree_node_dkeys(left);
192 	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193 	lnchildren = nilfs_btree_node_get_nchildren(left);
194 
195 	rdkeys = nilfs_btree_node_dkeys(right);
196 	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197 	rnchildren = nilfs_btree_node_get_nchildren(right);
198 
199 	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200 	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201 	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202 	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203 
204 	lnchildren += n;
205 	rnchildren -= n;
206 	nilfs_btree_node_set_nchildren(left, lnchildren);
207 	nilfs_btree_node_set_nchildren(right, rnchildren);
208 }
209 
210 /* Assume that the buffer heads corresponding to left and right are locked. */
nilfs_btree_node_move_right(struct nilfs_btree_node * left,struct nilfs_btree_node * right,int n,int lncmax,int rncmax)211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212 					struct nilfs_btree_node *right,
213 					int n, int lncmax, int rncmax)
214 {
215 	__le64 *ldkeys, *rdkeys;
216 	__le64 *ldptrs, *rdptrs;
217 	int lnchildren, rnchildren;
218 
219 	ldkeys = nilfs_btree_node_dkeys(left);
220 	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221 	lnchildren = nilfs_btree_node_get_nchildren(left);
222 
223 	rdkeys = nilfs_btree_node_dkeys(right);
224 	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225 	rnchildren = nilfs_btree_node_get_nchildren(right);
226 
227 	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228 	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229 	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230 	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231 
232 	lnchildren -= n;
233 	rnchildren += n;
234 	nilfs_btree_node_set_nchildren(left, lnchildren);
235 	nilfs_btree_node_set_nchildren(right, rnchildren);
236 }
237 
238 /* Assume that the buffer head corresponding to node is locked. */
nilfs_btree_node_insert(struct nilfs_btree_node * node,int index,__u64 key,__u64 ptr,int ncmax)239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240 				    __u64 key, __u64 ptr, int ncmax)
241 {
242 	__le64 *dkeys;
243 	__le64 *dptrs;
244 	int nchildren;
245 
246 	dkeys = nilfs_btree_node_dkeys(node);
247 	dptrs = nilfs_btree_node_dptrs(node, ncmax);
248 	nchildren = nilfs_btree_node_get_nchildren(node);
249 	if (index < nchildren) {
250 		memmove(dkeys + index + 1, dkeys + index,
251 			(nchildren - index) * sizeof(*dkeys));
252 		memmove(dptrs + index + 1, dptrs + index,
253 			(nchildren - index) * sizeof(*dptrs));
254 	}
255 	dkeys[index] = cpu_to_le64(key);
256 	dptrs[index] = cpu_to_le64(ptr);
257 	nchildren++;
258 	nilfs_btree_node_set_nchildren(node, nchildren);
259 }
260 
261 /* Assume that the buffer head corresponding to node is locked. */
nilfs_btree_node_delete(struct nilfs_btree_node * node,int index,__u64 * keyp,__u64 * ptrp,int ncmax)262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263 				    __u64 *keyp, __u64 *ptrp, int ncmax)
264 {
265 	__u64 key;
266 	__u64 ptr;
267 	__le64 *dkeys;
268 	__le64 *dptrs;
269 	int nchildren;
270 
271 	dkeys = nilfs_btree_node_dkeys(node);
272 	dptrs = nilfs_btree_node_dptrs(node, ncmax);
273 	key = le64_to_cpu(dkeys[index]);
274 	ptr = le64_to_cpu(dptrs[index]);
275 	nchildren = nilfs_btree_node_get_nchildren(node);
276 	if (keyp != NULL)
277 		*keyp = key;
278 	if (ptrp != NULL)
279 		*ptrp = ptr;
280 
281 	if (index < nchildren - 1) {
282 		memmove(dkeys + index, dkeys + index + 1,
283 			(nchildren - index - 1) * sizeof(*dkeys));
284 		memmove(dptrs + index, dptrs + index + 1,
285 			(nchildren - index - 1) * sizeof(*dptrs));
286 	}
287 	nchildren--;
288 	nilfs_btree_node_set_nchildren(node, nchildren);
289 }
290 
nilfs_btree_node_lookup(const struct nilfs_btree_node * node,__u64 key,int * indexp)291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292 				   __u64 key, int *indexp)
293 {
294 	__u64 nkey;
295 	int index, low, high, s;
296 
297 	/* binary search */
298 	low = 0;
299 	high = nilfs_btree_node_get_nchildren(node) - 1;
300 	index = 0;
301 	s = 0;
302 	while (low <= high) {
303 		index = (low + high) / 2;
304 		nkey = nilfs_btree_node_get_key(node, index);
305 		if (nkey == key) {
306 			s = 0;
307 			goto out;
308 		} else if (nkey < key) {
309 			low = index + 1;
310 			s = -1;
311 		} else {
312 			high = index - 1;
313 			s = 1;
314 		}
315 	}
316 
317 	/* adjust index */
318 	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319 		if (s > 0 && index > 0)
320 			index--;
321 	} else if (s < 0)
322 		index++;
323 
324  out:
325 	*indexp = index;
326 
327 	return s == 0;
328 }
329 
330 /**
331  * nilfs_btree_node_broken - verify consistency of btree node
332  * @node: btree node block to be examined
333  * @size: node size (in bytes)
334  * @inode: host inode of btree
335  * @blocknr: block number
336  *
337  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338  */
nilfs_btree_node_broken(const struct nilfs_btree_node * node,size_t size,struct inode * inode,sector_t blocknr)339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340 				   size_t size, struct inode *inode,
341 				   sector_t blocknr)
342 {
343 	int level, flags, nchildren;
344 	int ret = 0;
345 
346 	level = nilfs_btree_node_get_level(node);
347 	flags = nilfs_btree_node_get_flags(node);
348 	nchildren = nilfs_btree_node_get_nchildren(node);
349 
350 	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351 		     level >= NILFS_BTREE_LEVEL_MAX ||
352 		     (flags & NILFS_BTREE_NODE_ROOT) ||
353 		     nchildren < 0 ||
354 		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355 		nilfs_msg(inode->i_sb, KERN_CRIT,
356 			  "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357 			  inode->i_ino, (unsigned long long)blocknr, level,
358 			  flags, nchildren);
359 		ret = 1;
360 	}
361 	return ret;
362 }
363 
364 /**
365  * nilfs_btree_root_broken - verify consistency of btree root node
366  * @node: btree root node to be examined
367  * @inode: host inode of btree
368  *
369  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370  */
nilfs_btree_root_broken(const struct nilfs_btree_node * node,struct inode * inode)371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372 				   struct inode *inode)
373 {
374 	int level, flags, nchildren;
375 	int ret = 0;
376 
377 	level = nilfs_btree_node_get_level(node);
378 	flags = nilfs_btree_node_get_flags(node);
379 	nchildren = nilfs_btree_node_get_nchildren(node);
380 
381 	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382 		     level >= NILFS_BTREE_LEVEL_MAX ||
383 		     nchildren < 0 ||
384 		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385 		nilfs_msg(inode->i_sb, KERN_CRIT,
386 			  "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387 			  inode->i_ino, level, flags, nchildren);
388 		ret = 1;
389 	}
390 	return ret;
391 }
392 
nilfs_btree_broken_node_block(struct buffer_head * bh)393 int nilfs_btree_broken_node_block(struct buffer_head *bh)
394 {
395 	struct inode *inode;
396 	int ret;
397 
398 	if (buffer_nilfs_checked(bh))
399 		return 0;
400 
401 	inode = bh->b_page->mapping->host;
402 	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403 				      bh->b_size, inode, bh->b_blocknr);
404 	if (likely(!ret))
405 		set_buffer_nilfs_checked(bh);
406 	return ret;
407 }
408 
409 static struct nilfs_btree_node *
nilfs_btree_get_root(const struct nilfs_bmap * btree)410 nilfs_btree_get_root(const struct nilfs_bmap *btree)
411 {
412 	return (struct nilfs_btree_node *)btree->b_u.u_data;
413 }
414 
415 static struct nilfs_btree_node *
nilfs_btree_get_nonroot_node(const struct nilfs_btree_path * path,int level)416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417 {
418 	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419 }
420 
421 static struct nilfs_btree_node *
nilfs_btree_get_sib_node(const struct nilfs_btree_path * path,int level)422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423 {
424 	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425 }
426 
nilfs_btree_height(const struct nilfs_bmap * btree)427 static int nilfs_btree_height(const struct nilfs_bmap *btree)
428 {
429 	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430 }
431 
432 static struct nilfs_btree_node *
nilfs_btree_get_node(const struct nilfs_bmap * btree,const struct nilfs_btree_path * path,int level,int * ncmaxp)433 nilfs_btree_get_node(const struct nilfs_bmap *btree,
434 		     const struct nilfs_btree_path *path,
435 		     int level, int *ncmaxp)
436 {
437 	struct nilfs_btree_node *node;
438 
439 	if (level == nilfs_btree_height(btree) - 1) {
440 		node = nilfs_btree_get_root(btree);
441 		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442 	} else {
443 		node = nilfs_btree_get_nonroot_node(path, level);
444 		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
445 	}
446 	return node;
447 }
448 
nilfs_btree_bad_node(const struct nilfs_bmap * btree,struct nilfs_btree_node * node,int level)449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450 				struct nilfs_btree_node *node, int level)
451 {
452 	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453 		dump_stack();
454 		nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
455 			  "btree level mismatch (ino=%lu): %d != %d",
456 			  btree->b_inode->i_ino,
457 			  nilfs_btree_node_get_level(node), level);
458 		return 1;
459 	}
460 	return 0;
461 }
462 
463 struct nilfs_btree_readahead_info {
464 	struct nilfs_btree_node *node;	/* parent node */
465 	int max_ra_blocks;		/* max nof blocks to read ahead */
466 	int index;			/* current index on the parent node */
467 	int ncmax;			/* nof children in the parent node */
468 };
469 
__nilfs_btree_get_block(const struct nilfs_bmap * btree,__u64 ptr,struct buffer_head ** bhp,const struct nilfs_btree_readahead_info * ra)470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471 				   struct buffer_head **bhp,
472 				   const struct nilfs_btree_readahead_info *ra)
473 {
474 	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475 	struct address_space *btnc = btnc_inode->i_mapping;
476 	struct buffer_head *bh, *ra_bh;
477 	sector_t submit_ptr = 0;
478 	int ret;
479 
480 	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
481 					&submit_ptr);
482 	if (ret) {
483 		if (likely(ret == -EEXIST))
484 			goto out_check;
485 		if (ret == -ENOENT) {
486 			/*
487 			 * Block address translation failed due to invalid
488 			 * value of 'ptr'.  In this case, return internal code
489 			 * -EINVAL (broken bmap) to notify bmap layer of fatal
490 			 * metadata corruption.
491 			 */
492 			ret = -EINVAL;
493 		}
494 		return ret;
495 	}
496 
497 	if (ra) {
498 		int i, n;
499 		__u64 ptr2;
500 
501 		/* read ahead sibling nodes */
502 		for (n = ra->max_ra_blocks, i = ra->index + 1;
503 		     n > 0 && i < ra->ncmax; n--, i++) {
504 			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
505 
506 			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
507 							REQ_OP_READ, REQ_RAHEAD,
508 							&ra_bh, &submit_ptr);
509 			if (likely(!ret || ret == -EEXIST))
510 				brelse(ra_bh);
511 			else if (ret != -EBUSY)
512 				break;
513 			if (!buffer_locked(bh))
514 				goto out_no_wait;
515 		}
516 	}
517 
518 	wait_on_buffer(bh);
519 
520  out_no_wait:
521 	if (!buffer_uptodate(bh)) {
522 		nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
523 			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
524 			  btree->b_inode->i_ino, (unsigned long long)ptr);
525 		brelse(bh);
526 		return -EIO;
527 	}
528 
529  out_check:
530 	if (nilfs_btree_broken_node_block(bh)) {
531 		clear_buffer_uptodate(bh);
532 		brelse(bh);
533 		return -EINVAL;
534 	}
535 
536 	*bhp = bh;
537 	return 0;
538 }
539 
nilfs_btree_get_block(const struct nilfs_bmap * btree,__u64 ptr,struct buffer_head ** bhp)540 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
541 				   struct buffer_head **bhp)
542 {
543 	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
544 }
545 
nilfs_btree_do_lookup(const struct nilfs_bmap * btree,struct nilfs_btree_path * path,__u64 key,__u64 * ptrp,int minlevel,int readahead)546 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
547 				 struct nilfs_btree_path *path,
548 				 __u64 key, __u64 *ptrp, int minlevel,
549 				 int readahead)
550 {
551 	struct nilfs_btree_node *node;
552 	struct nilfs_btree_readahead_info p, *ra;
553 	__u64 ptr;
554 	int level, index, found, ncmax, ret;
555 
556 	node = nilfs_btree_get_root(btree);
557 	level = nilfs_btree_node_get_level(node);
558 	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
559 		return -ENOENT;
560 
561 	found = nilfs_btree_node_lookup(node, key, &index);
562 	ptr = nilfs_btree_node_get_ptr(node, index,
563 				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
564 	path[level].bp_bh = NULL;
565 	path[level].bp_index = index;
566 
567 	ncmax = nilfs_btree_nchildren_per_block(btree);
568 
569 	while (--level >= minlevel) {
570 		ra = NULL;
571 		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
572 			p.node = nilfs_btree_get_node(btree, path, level + 1,
573 						      &p.ncmax);
574 			p.index = index;
575 			p.max_ra_blocks = 7;
576 			ra = &p;
577 		}
578 		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
579 					      ra);
580 		if (ret < 0)
581 			return ret;
582 
583 		node = nilfs_btree_get_nonroot_node(path, level);
584 		if (nilfs_btree_bad_node(btree, node, level))
585 			return -EINVAL;
586 		if (!found)
587 			found = nilfs_btree_node_lookup(node, key, &index);
588 		else
589 			index = 0;
590 		if (index < ncmax) {
591 			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
592 		} else {
593 			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
594 			/* insert */
595 			ptr = NILFS_BMAP_INVALID_PTR;
596 		}
597 		path[level].bp_index = index;
598 	}
599 	if (!found)
600 		return -ENOENT;
601 
602 	if (ptrp != NULL)
603 		*ptrp = ptr;
604 
605 	return 0;
606 }
607 
nilfs_btree_do_lookup_last(const struct nilfs_bmap * btree,struct nilfs_btree_path * path,__u64 * keyp,__u64 * ptrp)608 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
609 				      struct nilfs_btree_path *path,
610 				      __u64 *keyp, __u64 *ptrp)
611 {
612 	struct nilfs_btree_node *node;
613 	__u64 ptr;
614 	int index, level, ncmax, ret;
615 
616 	node = nilfs_btree_get_root(btree);
617 	index = nilfs_btree_node_get_nchildren(node) - 1;
618 	if (index < 0)
619 		return -ENOENT;
620 	level = nilfs_btree_node_get_level(node);
621 	ptr = nilfs_btree_node_get_ptr(node, index,
622 				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
623 	path[level].bp_bh = NULL;
624 	path[level].bp_index = index;
625 	ncmax = nilfs_btree_nchildren_per_block(btree);
626 
627 	for (level--; level > 0; level--) {
628 		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
629 		if (ret < 0)
630 			return ret;
631 		node = nilfs_btree_get_nonroot_node(path, level);
632 		if (nilfs_btree_bad_node(btree, node, level))
633 			return -EINVAL;
634 		index = nilfs_btree_node_get_nchildren(node) - 1;
635 		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
636 		path[level].bp_index = index;
637 	}
638 
639 	if (keyp != NULL)
640 		*keyp = nilfs_btree_node_get_key(node, index);
641 	if (ptrp != NULL)
642 		*ptrp = ptr;
643 
644 	return 0;
645 }
646 
647 /**
648  * nilfs_btree_get_next_key - get next valid key from btree path array
649  * @btree: bmap struct of btree
650  * @path: array of nilfs_btree_path struct
651  * @minlevel: start level
652  * @nextkey: place to store the next valid key
653  *
654  * Return Value: If a next key was found, 0 is returned. Otherwise,
655  * -ENOENT is returned.
656  */
nilfs_btree_get_next_key(const struct nilfs_bmap * btree,const struct nilfs_btree_path * path,int minlevel,__u64 * nextkey)657 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
658 				    const struct nilfs_btree_path *path,
659 				    int minlevel, __u64 *nextkey)
660 {
661 	struct nilfs_btree_node *node;
662 	int maxlevel = nilfs_btree_height(btree) - 1;
663 	int index, next_adj, level;
664 
665 	/* Next index is already set to bp_index for leaf nodes. */
666 	next_adj = 0;
667 	for (level = minlevel; level <= maxlevel; level++) {
668 		if (level == maxlevel)
669 			node = nilfs_btree_get_root(btree);
670 		else
671 			node = nilfs_btree_get_nonroot_node(path, level);
672 
673 		index = path[level].bp_index + next_adj;
674 		if (index < nilfs_btree_node_get_nchildren(node)) {
675 			/* Next key is in this node */
676 			*nextkey = nilfs_btree_node_get_key(node, index);
677 			return 0;
678 		}
679 		/* For non-leaf nodes, next index is stored at bp_index + 1. */
680 		next_adj = 1;
681 	}
682 	return -ENOENT;
683 }
684 
nilfs_btree_lookup(const struct nilfs_bmap * btree,__u64 key,int level,__u64 * ptrp)685 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
686 			      __u64 key, int level, __u64 *ptrp)
687 {
688 	struct nilfs_btree_path *path;
689 	int ret;
690 
691 	path = nilfs_btree_alloc_path();
692 	if (path == NULL)
693 		return -ENOMEM;
694 
695 	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
696 
697 	nilfs_btree_free_path(path);
698 
699 	return ret;
700 }
701 
nilfs_btree_lookup_contig(const struct nilfs_bmap * btree,__u64 key,__u64 * ptrp,unsigned int maxblocks)702 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
703 				     __u64 key, __u64 *ptrp,
704 				     unsigned int maxblocks)
705 {
706 	struct nilfs_btree_path *path;
707 	struct nilfs_btree_node *node;
708 	struct inode *dat = NULL;
709 	__u64 ptr, ptr2;
710 	sector_t blocknr;
711 	int level = NILFS_BTREE_LEVEL_NODE_MIN;
712 	int ret, cnt, index, maxlevel, ncmax;
713 	struct nilfs_btree_readahead_info p;
714 
715 	path = nilfs_btree_alloc_path();
716 	if (path == NULL)
717 		return -ENOMEM;
718 
719 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
720 	if (ret < 0)
721 		goto out;
722 
723 	if (NILFS_BMAP_USE_VBN(btree)) {
724 		dat = nilfs_bmap_get_dat(btree);
725 		ret = nilfs_dat_translate(dat, ptr, &blocknr);
726 		if (ret < 0)
727 			goto out;
728 		ptr = blocknr;
729 	}
730 	cnt = 1;
731 	if (cnt == maxblocks)
732 		goto end;
733 
734 	maxlevel = nilfs_btree_height(btree) - 1;
735 	node = nilfs_btree_get_node(btree, path, level, &ncmax);
736 	index = path[level].bp_index + 1;
737 	for (;;) {
738 		while (index < nilfs_btree_node_get_nchildren(node)) {
739 			if (nilfs_btree_node_get_key(node, index) !=
740 			    key + cnt)
741 				goto end;
742 			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
743 			if (dat) {
744 				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
745 				if (ret < 0)
746 					goto out;
747 				ptr2 = blocknr;
748 			}
749 			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
750 				goto end;
751 			index++;
752 			continue;
753 		}
754 		if (level == maxlevel)
755 			break;
756 
757 		/* look-up right sibling node */
758 		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
759 		p.index = path[level + 1].bp_index + 1;
760 		p.max_ra_blocks = 7;
761 		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
762 		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
763 			break;
764 		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
765 		path[level + 1].bp_index = p.index;
766 
767 		brelse(path[level].bp_bh);
768 		path[level].bp_bh = NULL;
769 
770 		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
771 					      &p);
772 		if (ret < 0)
773 			goto out;
774 		node = nilfs_btree_get_nonroot_node(path, level);
775 		ncmax = nilfs_btree_nchildren_per_block(btree);
776 		index = 0;
777 		path[level].bp_index = index;
778 	}
779  end:
780 	*ptrp = ptr;
781 	ret = cnt;
782  out:
783 	nilfs_btree_free_path(path);
784 	return ret;
785 }
786 
nilfs_btree_promote_key(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 key)787 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
788 				    struct nilfs_btree_path *path,
789 				    int level, __u64 key)
790 {
791 	if (level < nilfs_btree_height(btree) - 1) {
792 		do {
793 			nilfs_btree_node_set_key(
794 				nilfs_btree_get_nonroot_node(path, level),
795 				path[level].bp_index, key);
796 			if (!buffer_dirty(path[level].bp_bh))
797 				mark_buffer_dirty(path[level].bp_bh);
798 		} while ((path[level].bp_index == 0) &&
799 			 (++level < nilfs_btree_height(btree) - 1));
800 	}
801 
802 	/* root */
803 	if (level == nilfs_btree_height(btree) - 1) {
804 		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
805 					 path[level].bp_index, key);
806 	}
807 }
808 
nilfs_btree_do_insert(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)809 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
810 				  struct nilfs_btree_path *path,
811 				  int level, __u64 *keyp, __u64 *ptrp)
812 {
813 	struct nilfs_btree_node *node;
814 	int ncblk;
815 
816 	if (level < nilfs_btree_height(btree) - 1) {
817 		node = nilfs_btree_get_nonroot_node(path, level);
818 		ncblk = nilfs_btree_nchildren_per_block(btree);
819 		nilfs_btree_node_insert(node, path[level].bp_index,
820 					*keyp, *ptrp, ncblk);
821 		if (!buffer_dirty(path[level].bp_bh))
822 			mark_buffer_dirty(path[level].bp_bh);
823 
824 		if (path[level].bp_index == 0)
825 			nilfs_btree_promote_key(btree, path, level + 1,
826 						nilfs_btree_node_get_key(node,
827 									 0));
828 	} else {
829 		node = nilfs_btree_get_root(btree);
830 		nilfs_btree_node_insert(node, path[level].bp_index,
831 					*keyp, *ptrp,
832 					NILFS_BTREE_ROOT_NCHILDREN_MAX);
833 	}
834 }
835 
nilfs_btree_carry_left(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)836 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
837 				   struct nilfs_btree_path *path,
838 				   int level, __u64 *keyp, __u64 *ptrp)
839 {
840 	struct nilfs_btree_node *node, *left;
841 	int nchildren, lnchildren, n, move, ncblk;
842 
843 	node = nilfs_btree_get_nonroot_node(path, level);
844 	left = nilfs_btree_get_sib_node(path, level);
845 	nchildren = nilfs_btree_node_get_nchildren(node);
846 	lnchildren = nilfs_btree_node_get_nchildren(left);
847 	ncblk = nilfs_btree_nchildren_per_block(btree);
848 	move = 0;
849 
850 	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
851 	if (n > path[level].bp_index) {
852 		/* move insert point */
853 		n--;
854 		move = 1;
855 	}
856 
857 	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
858 
859 	if (!buffer_dirty(path[level].bp_bh))
860 		mark_buffer_dirty(path[level].bp_bh);
861 	if (!buffer_dirty(path[level].bp_sib_bh))
862 		mark_buffer_dirty(path[level].bp_sib_bh);
863 
864 	nilfs_btree_promote_key(btree, path, level + 1,
865 				nilfs_btree_node_get_key(node, 0));
866 
867 	if (move) {
868 		brelse(path[level].bp_bh);
869 		path[level].bp_bh = path[level].bp_sib_bh;
870 		path[level].bp_sib_bh = NULL;
871 		path[level].bp_index += lnchildren;
872 		path[level + 1].bp_index--;
873 	} else {
874 		brelse(path[level].bp_sib_bh);
875 		path[level].bp_sib_bh = NULL;
876 		path[level].bp_index -= n;
877 	}
878 
879 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
880 }
881 
nilfs_btree_carry_right(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)882 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
883 				    struct nilfs_btree_path *path,
884 				    int level, __u64 *keyp, __u64 *ptrp)
885 {
886 	struct nilfs_btree_node *node, *right;
887 	int nchildren, rnchildren, n, move, ncblk;
888 
889 	node = nilfs_btree_get_nonroot_node(path, level);
890 	right = nilfs_btree_get_sib_node(path, level);
891 	nchildren = nilfs_btree_node_get_nchildren(node);
892 	rnchildren = nilfs_btree_node_get_nchildren(right);
893 	ncblk = nilfs_btree_nchildren_per_block(btree);
894 	move = 0;
895 
896 	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
897 	if (n > nchildren - path[level].bp_index) {
898 		/* move insert point */
899 		n--;
900 		move = 1;
901 	}
902 
903 	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
904 
905 	if (!buffer_dirty(path[level].bp_bh))
906 		mark_buffer_dirty(path[level].bp_bh);
907 	if (!buffer_dirty(path[level].bp_sib_bh))
908 		mark_buffer_dirty(path[level].bp_sib_bh);
909 
910 	path[level + 1].bp_index++;
911 	nilfs_btree_promote_key(btree, path, level + 1,
912 				nilfs_btree_node_get_key(right, 0));
913 	path[level + 1].bp_index--;
914 
915 	if (move) {
916 		brelse(path[level].bp_bh);
917 		path[level].bp_bh = path[level].bp_sib_bh;
918 		path[level].bp_sib_bh = NULL;
919 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
920 		path[level + 1].bp_index++;
921 	} else {
922 		brelse(path[level].bp_sib_bh);
923 		path[level].bp_sib_bh = NULL;
924 	}
925 
926 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
927 }
928 
nilfs_btree_split(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)929 static void nilfs_btree_split(struct nilfs_bmap *btree,
930 			      struct nilfs_btree_path *path,
931 			      int level, __u64 *keyp, __u64 *ptrp)
932 {
933 	struct nilfs_btree_node *node, *right;
934 	int nchildren, n, move, ncblk;
935 
936 	node = nilfs_btree_get_nonroot_node(path, level);
937 	right = nilfs_btree_get_sib_node(path, level);
938 	nchildren = nilfs_btree_node_get_nchildren(node);
939 	ncblk = nilfs_btree_nchildren_per_block(btree);
940 	move = 0;
941 
942 	n = (nchildren + 1) / 2;
943 	if (n > nchildren - path[level].bp_index) {
944 		n--;
945 		move = 1;
946 	}
947 
948 	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
949 
950 	if (!buffer_dirty(path[level].bp_bh))
951 		mark_buffer_dirty(path[level].bp_bh);
952 	if (!buffer_dirty(path[level].bp_sib_bh))
953 		mark_buffer_dirty(path[level].bp_sib_bh);
954 
955 	if (move) {
956 		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
957 		nilfs_btree_node_insert(right, path[level].bp_index,
958 					*keyp, *ptrp, ncblk);
959 
960 		*keyp = nilfs_btree_node_get_key(right, 0);
961 		*ptrp = path[level].bp_newreq.bpr_ptr;
962 
963 		brelse(path[level].bp_bh);
964 		path[level].bp_bh = path[level].bp_sib_bh;
965 		path[level].bp_sib_bh = NULL;
966 	} else {
967 		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
968 
969 		*keyp = nilfs_btree_node_get_key(right, 0);
970 		*ptrp = path[level].bp_newreq.bpr_ptr;
971 
972 		brelse(path[level].bp_sib_bh);
973 		path[level].bp_sib_bh = NULL;
974 	}
975 
976 	path[level + 1].bp_index++;
977 }
978 
nilfs_btree_grow(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)979 static void nilfs_btree_grow(struct nilfs_bmap *btree,
980 			     struct nilfs_btree_path *path,
981 			     int level, __u64 *keyp, __u64 *ptrp)
982 {
983 	struct nilfs_btree_node *root, *child;
984 	int n, ncblk;
985 
986 	root = nilfs_btree_get_root(btree);
987 	child = nilfs_btree_get_sib_node(path, level);
988 	ncblk = nilfs_btree_nchildren_per_block(btree);
989 
990 	n = nilfs_btree_node_get_nchildren(root);
991 
992 	nilfs_btree_node_move_right(root, child, n,
993 				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
994 	nilfs_btree_node_set_level(root, level + 1);
995 
996 	if (!buffer_dirty(path[level].bp_sib_bh))
997 		mark_buffer_dirty(path[level].bp_sib_bh);
998 
999 	path[level].bp_bh = path[level].bp_sib_bh;
1000 	path[level].bp_sib_bh = NULL;
1001 
1002 	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1003 
1004 	*keyp = nilfs_btree_node_get_key(child, 0);
1005 	*ptrp = path[level].bp_newreq.bpr_ptr;
1006 }
1007 
nilfs_btree_find_near(const struct nilfs_bmap * btree,const struct nilfs_btree_path * path)1008 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1009 				   const struct nilfs_btree_path *path)
1010 {
1011 	struct nilfs_btree_node *node;
1012 	int level, ncmax;
1013 
1014 	if (path == NULL)
1015 		return NILFS_BMAP_INVALID_PTR;
1016 
1017 	/* left sibling */
1018 	level = NILFS_BTREE_LEVEL_NODE_MIN;
1019 	if (path[level].bp_index > 0) {
1020 		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1021 		return nilfs_btree_node_get_ptr(node,
1022 						path[level].bp_index - 1,
1023 						ncmax);
1024 	}
1025 
1026 	/* parent */
1027 	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1028 	if (level <= nilfs_btree_height(btree) - 1) {
1029 		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1030 		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1031 						ncmax);
1032 	}
1033 
1034 	return NILFS_BMAP_INVALID_PTR;
1035 }
1036 
nilfs_btree_find_target_v(const struct nilfs_bmap * btree,const struct nilfs_btree_path * path,__u64 key)1037 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1038 				       const struct nilfs_btree_path *path,
1039 				       __u64 key)
1040 {
1041 	__u64 ptr;
1042 
1043 	ptr = nilfs_bmap_find_target_seq(btree, key);
1044 	if (ptr != NILFS_BMAP_INVALID_PTR)
1045 		/* sequential access */
1046 		return ptr;
1047 
1048 	ptr = nilfs_btree_find_near(btree, path);
1049 	if (ptr != NILFS_BMAP_INVALID_PTR)
1050 		/* near */
1051 		return ptr;
1052 
1053 	/* block group */
1054 	return nilfs_bmap_find_target_in_group(btree);
1055 }
1056 
nilfs_btree_prepare_insert(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int * levelp,__u64 key,__u64 ptr,struct nilfs_bmap_stats * stats)1057 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1058 				      struct nilfs_btree_path *path,
1059 				      int *levelp, __u64 key, __u64 ptr,
1060 				      struct nilfs_bmap_stats *stats)
1061 {
1062 	struct buffer_head *bh;
1063 	struct nilfs_btree_node *node, *parent, *sib;
1064 	__u64 sibptr;
1065 	int pindex, level, ncmax, ncblk, ret;
1066 	struct inode *dat = NULL;
1067 
1068 	stats->bs_nblocks = 0;
1069 	level = NILFS_BTREE_LEVEL_DATA;
1070 
1071 	/* allocate a new ptr for data block */
1072 	if (NILFS_BMAP_USE_VBN(btree)) {
1073 		path[level].bp_newreq.bpr_ptr =
1074 			nilfs_btree_find_target_v(btree, path, key);
1075 		dat = nilfs_bmap_get_dat(btree);
1076 	}
1077 
1078 	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1079 	if (ret < 0)
1080 		goto err_out_data;
1081 
1082 	ncblk = nilfs_btree_nchildren_per_block(btree);
1083 
1084 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1085 	     level < nilfs_btree_height(btree) - 1;
1086 	     level++) {
1087 		node = nilfs_btree_get_nonroot_node(path, level);
1088 		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1089 			path[level].bp_op = nilfs_btree_do_insert;
1090 			stats->bs_nblocks++;
1091 			goto out;
1092 		}
1093 
1094 		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1095 		pindex = path[level + 1].bp_index;
1096 
1097 		/* left sibling */
1098 		if (pindex > 0) {
1099 			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1100 							  ncmax);
1101 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1102 			if (ret < 0)
1103 				goto err_out_child_node;
1104 			sib = (struct nilfs_btree_node *)bh->b_data;
1105 			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1106 				path[level].bp_sib_bh = bh;
1107 				path[level].bp_op = nilfs_btree_carry_left;
1108 				stats->bs_nblocks++;
1109 				goto out;
1110 			} else {
1111 				brelse(bh);
1112 			}
1113 		}
1114 
1115 		/* right sibling */
1116 		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1117 			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1118 							  ncmax);
1119 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1120 			if (ret < 0)
1121 				goto err_out_child_node;
1122 			sib = (struct nilfs_btree_node *)bh->b_data;
1123 			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1124 				path[level].bp_sib_bh = bh;
1125 				path[level].bp_op = nilfs_btree_carry_right;
1126 				stats->bs_nblocks++;
1127 				goto out;
1128 			} else {
1129 				brelse(bh);
1130 			}
1131 		}
1132 
1133 		/* split */
1134 		path[level].bp_newreq.bpr_ptr =
1135 			path[level - 1].bp_newreq.bpr_ptr + 1;
1136 		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1137 						   &path[level].bp_newreq, dat);
1138 		if (ret < 0)
1139 			goto err_out_child_node;
1140 		ret = nilfs_btree_get_new_block(btree,
1141 						path[level].bp_newreq.bpr_ptr,
1142 						&bh);
1143 		if (ret < 0)
1144 			goto err_out_curr_node;
1145 
1146 		stats->bs_nblocks++;
1147 
1148 		sib = (struct nilfs_btree_node *)bh->b_data;
1149 		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1150 		path[level].bp_sib_bh = bh;
1151 		path[level].bp_op = nilfs_btree_split;
1152 	}
1153 
1154 	/* root */
1155 	node = nilfs_btree_get_root(btree);
1156 	if (nilfs_btree_node_get_nchildren(node) <
1157 	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1158 		path[level].bp_op = nilfs_btree_do_insert;
1159 		stats->bs_nblocks++;
1160 		goto out;
1161 	}
1162 
1163 	/* grow */
1164 	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1165 	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1166 	if (ret < 0)
1167 		goto err_out_child_node;
1168 	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1169 					&bh);
1170 	if (ret < 0)
1171 		goto err_out_curr_node;
1172 
1173 	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1174 			      0, level, 0, ncblk, NULL, NULL);
1175 	path[level].bp_sib_bh = bh;
1176 	path[level].bp_op = nilfs_btree_grow;
1177 
1178 	level++;
1179 	path[level].bp_op = nilfs_btree_do_insert;
1180 
1181 	/* a newly-created node block and a data block are added */
1182 	stats->bs_nblocks += 2;
1183 
1184 	/* success */
1185  out:
1186 	*levelp = level;
1187 	return ret;
1188 
1189 	/* error */
1190  err_out_curr_node:
1191 	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1192  err_out_child_node:
1193 	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1194 		nilfs_btnode_delete(path[level].bp_sib_bh);
1195 		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1196 
1197 	}
1198 
1199 	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1200  err_out_data:
1201 	*levelp = level;
1202 	stats->bs_nblocks = 0;
1203 	return ret;
1204 }
1205 
nilfs_btree_commit_insert(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int maxlevel,__u64 key,__u64 ptr)1206 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1207 				      struct nilfs_btree_path *path,
1208 				      int maxlevel, __u64 key, __u64 ptr)
1209 {
1210 	struct inode *dat = NULL;
1211 	int level;
1212 
1213 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1214 	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1215 	if (NILFS_BMAP_USE_VBN(btree)) {
1216 		nilfs_bmap_set_target_v(btree, key, ptr);
1217 		dat = nilfs_bmap_get_dat(btree);
1218 	}
1219 
1220 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1221 		nilfs_bmap_commit_alloc_ptr(btree,
1222 					    &path[level - 1].bp_newreq, dat);
1223 		path[level].bp_op(btree, path, level, &key, &ptr);
1224 	}
1225 
1226 	if (!nilfs_bmap_dirty(btree))
1227 		nilfs_bmap_set_dirty(btree);
1228 }
1229 
nilfs_btree_insert(struct nilfs_bmap * btree,__u64 key,__u64 ptr)1230 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1231 {
1232 	struct nilfs_btree_path *path;
1233 	struct nilfs_bmap_stats stats;
1234 	int level, ret;
1235 
1236 	path = nilfs_btree_alloc_path();
1237 	if (path == NULL)
1238 		return -ENOMEM;
1239 
1240 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1241 				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1242 	if (ret != -ENOENT) {
1243 		if (ret == 0)
1244 			ret = -EEXIST;
1245 		goto out;
1246 	}
1247 
1248 	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1249 	if (ret < 0)
1250 		goto out;
1251 	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1252 	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1253 
1254  out:
1255 	nilfs_btree_free_path(path);
1256 	return ret;
1257 }
1258 
nilfs_btree_do_delete(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1259 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1260 				  struct nilfs_btree_path *path,
1261 				  int level, __u64 *keyp, __u64 *ptrp)
1262 {
1263 	struct nilfs_btree_node *node;
1264 	int ncblk;
1265 
1266 	if (level < nilfs_btree_height(btree) - 1) {
1267 		node = nilfs_btree_get_nonroot_node(path, level);
1268 		ncblk = nilfs_btree_nchildren_per_block(btree);
1269 		nilfs_btree_node_delete(node, path[level].bp_index,
1270 					keyp, ptrp, ncblk);
1271 		if (!buffer_dirty(path[level].bp_bh))
1272 			mark_buffer_dirty(path[level].bp_bh);
1273 		if (path[level].bp_index == 0)
1274 			nilfs_btree_promote_key(btree, path, level + 1,
1275 				nilfs_btree_node_get_key(node, 0));
1276 	} else {
1277 		node = nilfs_btree_get_root(btree);
1278 		nilfs_btree_node_delete(node, path[level].bp_index,
1279 					keyp, ptrp,
1280 					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1281 	}
1282 }
1283 
nilfs_btree_borrow_left(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1284 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1285 				    struct nilfs_btree_path *path,
1286 				    int level, __u64 *keyp, __u64 *ptrp)
1287 {
1288 	struct nilfs_btree_node *node, *left;
1289 	int nchildren, lnchildren, n, ncblk;
1290 
1291 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1292 
1293 	node = nilfs_btree_get_nonroot_node(path, level);
1294 	left = nilfs_btree_get_sib_node(path, level);
1295 	nchildren = nilfs_btree_node_get_nchildren(node);
1296 	lnchildren = nilfs_btree_node_get_nchildren(left);
1297 	ncblk = nilfs_btree_nchildren_per_block(btree);
1298 
1299 	n = (nchildren + lnchildren) / 2 - nchildren;
1300 
1301 	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1302 
1303 	if (!buffer_dirty(path[level].bp_bh))
1304 		mark_buffer_dirty(path[level].bp_bh);
1305 	if (!buffer_dirty(path[level].bp_sib_bh))
1306 		mark_buffer_dirty(path[level].bp_sib_bh);
1307 
1308 	nilfs_btree_promote_key(btree, path, level + 1,
1309 				nilfs_btree_node_get_key(node, 0));
1310 
1311 	brelse(path[level].bp_sib_bh);
1312 	path[level].bp_sib_bh = NULL;
1313 	path[level].bp_index += n;
1314 }
1315 
nilfs_btree_borrow_right(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1316 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1317 				     struct nilfs_btree_path *path,
1318 				     int level, __u64 *keyp, __u64 *ptrp)
1319 {
1320 	struct nilfs_btree_node *node, *right;
1321 	int nchildren, rnchildren, n, ncblk;
1322 
1323 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1324 
1325 	node = nilfs_btree_get_nonroot_node(path, level);
1326 	right = nilfs_btree_get_sib_node(path, level);
1327 	nchildren = nilfs_btree_node_get_nchildren(node);
1328 	rnchildren = nilfs_btree_node_get_nchildren(right);
1329 	ncblk = nilfs_btree_nchildren_per_block(btree);
1330 
1331 	n = (nchildren + rnchildren) / 2 - nchildren;
1332 
1333 	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1334 
1335 	if (!buffer_dirty(path[level].bp_bh))
1336 		mark_buffer_dirty(path[level].bp_bh);
1337 	if (!buffer_dirty(path[level].bp_sib_bh))
1338 		mark_buffer_dirty(path[level].bp_sib_bh);
1339 
1340 	path[level + 1].bp_index++;
1341 	nilfs_btree_promote_key(btree, path, level + 1,
1342 				nilfs_btree_node_get_key(right, 0));
1343 	path[level + 1].bp_index--;
1344 
1345 	brelse(path[level].bp_sib_bh);
1346 	path[level].bp_sib_bh = NULL;
1347 }
1348 
nilfs_btree_concat_left(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1349 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1350 				    struct nilfs_btree_path *path,
1351 				    int level, __u64 *keyp, __u64 *ptrp)
1352 {
1353 	struct nilfs_btree_node *node, *left;
1354 	int n, ncblk;
1355 
1356 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1357 
1358 	node = nilfs_btree_get_nonroot_node(path, level);
1359 	left = nilfs_btree_get_sib_node(path, level);
1360 	ncblk = nilfs_btree_nchildren_per_block(btree);
1361 
1362 	n = nilfs_btree_node_get_nchildren(node);
1363 
1364 	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1365 
1366 	if (!buffer_dirty(path[level].bp_sib_bh))
1367 		mark_buffer_dirty(path[level].bp_sib_bh);
1368 
1369 	nilfs_btnode_delete(path[level].bp_bh);
1370 	path[level].bp_bh = path[level].bp_sib_bh;
1371 	path[level].bp_sib_bh = NULL;
1372 	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1373 }
1374 
nilfs_btree_concat_right(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1375 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1376 				     struct nilfs_btree_path *path,
1377 				     int level, __u64 *keyp, __u64 *ptrp)
1378 {
1379 	struct nilfs_btree_node *node, *right;
1380 	int n, ncblk;
1381 
1382 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1383 
1384 	node = nilfs_btree_get_nonroot_node(path, level);
1385 	right = nilfs_btree_get_sib_node(path, level);
1386 	ncblk = nilfs_btree_nchildren_per_block(btree);
1387 
1388 	n = nilfs_btree_node_get_nchildren(right);
1389 
1390 	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1391 
1392 	if (!buffer_dirty(path[level].bp_bh))
1393 		mark_buffer_dirty(path[level].bp_bh);
1394 
1395 	nilfs_btnode_delete(path[level].bp_sib_bh);
1396 	path[level].bp_sib_bh = NULL;
1397 	path[level + 1].bp_index++;
1398 }
1399 
nilfs_btree_shrink(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1400 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1401 			       struct nilfs_btree_path *path,
1402 			       int level, __u64 *keyp, __u64 *ptrp)
1403 {
1404 	struct nilfs_btree_node *root, *child;
1405 	int n, ncblk;
1406 
1407 	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1408 
1409 	root = nilfs_btree_get_root(btree);
1410 	child = nilfs_btree_get_nonroot_node(path, level);
1411 	ncblk = nilfs_btree_nchildren_per_block(btree);
1412 
1413 	nilfs_btree_node_delete(root, 0, NULL, NULL,
1414 				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1415 	nilfs_btree_node_set_level(root, level);
1416 	n = nilfs_btree_node_get_nchildren(child);
1417 	nilfs_btree_node_move_left(root, child, n,
1418 				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1419 
1420 	nilfs_btnode_delete(path[level].bp_bh);
1421 	path[level].bp_bh = NULL;
1422 }
1423 
nilfs_btree_nop(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,__u64 * keyp,__u64 * ptrp)1424 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1425 			    struct nilfs_btree_path *path,
1426 			    int level, __u64 *keyp, __u64 *ptrp)
1427 {
1428 }
1429 
nilfs_btree_prepare_delete(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int * levelp,struct nilfs_bmap_stats * stats,struct inode * dat)1430 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1431 				      struct nilfs_btree_path *path,
1432 				      int *levelp,
1433 				      struct nilfs_bmap_stats *stats,
1434 				      struct inode *dat)
1435 {
1436 	struct buffer_head *bh;
1437 	struct nilfs_btree_node *node, *parent, *sib;
1438 	__u64 sibptr;
1439 	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1440 
1441 	ret = 0;
1442 	stats->bs_nblocks = 0;
1443 	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1444 	ncblk = nilfs_btree_nchildren_per_block(btree);
1445 
1446 	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1447 	     level < nilfs_btree_height(btree) - 1;
1448 	     level++) {
1449 		node = nilfs_btree_get_nonroot_node(path, level);
1450 		path[level].bp_oldreq.bpr_ptr =
1451 			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1452 		ret = nilfs_bmap_prepare_end_ptr(btree,
1453 						 &path[level].bp_oldreq, dat);
1454 		if (ret < 0)
1455 			goto err_out_child_node;
1456 
1457 		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1458 			path[level].bp_op = nilfs_btree_do_delete;
1459 			stats->bs_nblocks++;
1460 			goto out;
1461 		}
1462 
1463 		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1464 		pindex = path[level + 1].bp_index;
1465 		dindex = pindex;
1466 
1467 		if (pindex > 0) {
1468 			/* left sibling */
1469 			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1470 							  ncmax);
1471 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1472 			if (ret < 0)
1473 				goto err_out_curr_node;
1474 			sib = (struct nilfs_btree_node *)bh->b_data;
1475 			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1476 				path[level].bp_sib_bh = bh;
1477 				path[level].bp_op = nilfs_btree_borrow_left;
1478 				stats->bs_nblocks++;
1479 				goto out;
1480 			} else {
1481 				path[level].bp_sib_bh = bh;
1482 				path[level].bp_op = nilfs_btree_concat_left;
1483 				stats->bs_nblocks++;
1484 				/* continue; */
1485 			}
1486 		} else if (pindex <
1487 			   nilfs_btree_node_get_nchildren(parent) - 1) {
1488 			/* right sibling */
1489 			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1490 							  ncmax);
1491 			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1492 			if (ret < 0)
1493 				goto err_out_curr_node;
1494 			sib = (struct nilfs_btree_node *)bh->b_data;
1495 			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1496 				path[level].bp_sib_bh = bh;
1497 				path[level].bp_op = nilfs_btree_borrow_right;
1498 				stats->bs_nblocks++;
1499 				goto out;
1500 			} else {
1501 				path[level].bp_sib_bh = bh;
1502 				path[level].bp_op = nilfs_btree_concat_right;
1503 				stats->bs_nblocks++;
1504 				/*
1505 				 * When merging right sibling node
1506 				 * into the current node, pointer to
1507 				 * the right sibling node must be
1508 				 * terminated instead.  The adjustment
1509 				 * below is required for that.
1510 				 */
1511 				dindex = pindex + 1;
1512 				/* continue; */
1513 			}
1514 		} else {
1515 			/* no siblings */
1516 			/* the only child of the root node */
1517 			WARN_ON(level != nilfs_btree_height(btree) - 2);
1518 			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1519 			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1520 				path[level].bp_op = nilfs_btree_shrink;
1521 				stats->bs_nblocks += 2;
1522 				level++;
1523 				path[level].bp_op = nilfs_btree_nop;
1524 				goto shrink_root_child;
1525 			} else {
1526 				path[level].bp_op = nilfs_btree_do_delete;
1527 				stats->bs_nblocks++;
1528 				goto out;
1529 			}
1530 		}
1531 	}
1532 
1533 	/* child of the root node is deleted */
1534 	path[level].bp_op = nilfs_btree_do_delete;
1535 	stats->bs_nblocks++;
1536 
1537 shrink_root_child:
1538 	node = nilfs_btree_get_root(btree);
1539 	path[level].bp_oldreq.bpr_ptr =
1540 		nilfs_btree_node_get_ptr(node, dindex,
1541 					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1542 
1543 	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1544 	if (ret < 0)
1545 		goto err_out_child_node;
1546 
1547 	/* success */
1548  out:
1549 	*levelp = level;
1550 	return ret;
1551 
1552 	/* error */
1553  err_out_curr_node:
1554 	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1555  err_out_child_node:
1556 	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1557 		brelse(path[level].bp_sib_bh);
1558 		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1559 	}
1560 	*levelp = level;
1561 	stats->bs_nblocks = 0;
1562 	return ret;
1563 }
1564 
nilfs_btree_commit_delete(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int maxlevel,struct inode * dat)1565 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1566 				      struct nilfs_btree_path *path,
1567 				      int maxlevel, struct inode *dat)
1568 {
1569 	int level;
1570 
1571 	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1572 		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1573 		path[level].bp_op(btree, path, level, NULL, NULL);
1574 	}
1575 
1576 	if (!nilfs_bmap_dirty(btree))
1577 		nilfs_bmap_set_dirty(btree);
1578 }
1579 
nilfs_btree_delete(struct nilfs_bmap * btree,__u64 key)1580 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1581 
1582 {
1583 	struct nilfs_btree_path *path;
1584 	struct nilfs_bmap_stats stats;
1585 	struct inode *dat;
1586 	int level, ret;
1587 
1588 	path = nilfs_btree_alloc_path();
1589 	if (path == NULL)
1590 		return -ENOMEM;
1591 
1592 	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1593 				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1594 	if (ret < 0)
1595 		goto out;
1596 
1597 
1598 	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1599 
1600 	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1601 	if (ret < 0)
1602 		goto out;
1603 	nilfs_btree_commit_delete(btree, path, level, dat);
1604 	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1605 
1606 out:
1607 	nilfs_btree_free_path(path);
1608 	return ret;
1609 }
1610 
nilfs_btree_seek_key(const struct nilfs_bmap * btree,__u64 start,__u64 * keyp)1611 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1612 				__u64 *keyp)
1613 {
1614 	struct nilfs_btree_path *path;
1615 	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1616 	int ret;
1617 
1618 	path = nilfs_btree_alloc_path();
1619 	if (!path)
1620 		return -ENOMEM;
1621 
1622 	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1623 	if (!ret)
1624 		*keyp = start;
1625 	else if (ret == -ENOENT)
1626 		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1627 
1628 	nilfs_btree_free_path(path);
1629 	return ret;
1630 }
1631 
nilfs_btree_last_key(const struct nilfs_bmap * btree,__u64 * keyp)1632 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1633 {
1634 	struct nilfs_btree_path *path;
1635 	int ret;
1636 
1637 	path = nilfs_btree_alloc_path();
1638 	if (path == NULL)
1639 		return -ENOMEM;
1640 
1641 	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1642 
1643 	nilfs_btree_free_path(path);
1644 
1645 	return ret;
1646 }
1647 
nilfs_btree_check_delete(struct nilfs_bmap * btree,__u64 key)1648 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1649 {
1650 	struct buffer_head *bh;
1651 	struct nilfs_btree_node *root, *node;
1652 	__u64 maxkey, nextmaxkey;
1653 	__u64 ptr;
1654 	int nchildren, ret;
1655 
1656 	root = nilfs_btree_get_root(btree);
1657 	switch (nilfs_btree_height(btree)) {
1658 	case 2:
1659 		bh = NULL;
1660 		node = root;
1661 		break;
1662 	case 3:
1663 		nchildren = nilfs_btree_node_get_nchildren(root);
1664 		if (nchildren > 1)
1665 			return 0;
1666 		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1667 					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1668 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1669 		if (ret < 0)
1670 			return ret;
1671 		node = (struct nilfs_btree_node *)bh->b_data;
1672 		break;
1673 	default:
1674 		return 0;
1675 	}
1676 
1677 	nchildren = nilfs_btree_node_get_nchildren(node);
1678 	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1679 	nextmaxkey = (nchildren > 1) ?
1680 		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1681 	if (bh != NULL)
1682 		brelse(bh);
1683 
1684 	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1685 }
1686 
nilfs_btree_gather_data(struct nilfs_bmap * btree,__u64 * keys,__u64 * ptrs,int nitems)1687 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1688 				   __u64 *keys, __u64 *ptrs, int nitems)
1689 {
1690 	struct buffer_head *bh;
1691 	struct nilfs_btree_node *node, *root;
1692 	__le64 *dkeys;
1693 	__le64 *dptrs;
1694 	__u64 ptr;
1695 	int nchildren, ncmax, i, ret;
1696 
1697 	root = nilfs_btree_get_root(btree);
1698 	switch (nilfs_btree_height(btree)) {
1699 	case 2:
1700 		bh = NULL;
1701 		node = root;
1702 		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1703 		break;
1704 	case 3:
1705 		nchildren = nilfs_btree_node_get_nchildren(root);
1706 		WARN_ON(nchildren > 1);
1707 		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1708 					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1709 		ret = nilfs_btree_get_block(btree, ptr, &bh);
1710 		if (ret < 0)
1711 			return ret;
1712 		node = (struct nilfs_btree_node *)bh->b_data;
1713 		ncmax = nilfs_btree_nchildren_per_block(btree);
1714 		break;
1715 	default:
1716 		node = NULL;
1717 		return -EINVAL;
1718 	}
1719 
1720 	nchildren = nilfs_btree_node_get_nchildren(node);
1721 	if (nchildren < nitems)
1722 		nitems = nchildren;
1723 	dkeys = nilfs_btree_node_dkeys(node);
1724 	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1725 	for (i = 0; i < nitems; i++) {
1726 		keys[i] = le64_to_cpu(dkeys[i]);
1727 		ptrs[i] = le64_to_cpu(dptrs[i]);
1728 	}
1729 
1730 	if (bh != NULL)
1731 		brelse(bh);
1732 
1733 	return nitems;
1734 }
1735 
1736 static int
nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap * btree,__u64 key,union nilfs_bmap_ptr_req * dreq,union nilfs_bmap_ptr_req * nreq,struct buffer_head ** bhp,struct nilfs_bmap_stats * stats)1737 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1738 				       union nilfs_bmap_ptr_req *dreq,
1739 				       union nilfs_bmap_ptr_req *nreq,
1740 				       struct buffer_head **bhp,
1741 				       struct nilfs_bmap_stats *stats)
1742 {
1743 	struct buffer_head *bh;
1744 	struct inode *dat = NULL;
1745 	int ret;
1746 
1747 	stats->bs_nblocks = 0;
1748 
1749 	/* for data */
1750 	/* cannot find near ptr */
1751 	if (NILFS_BMAP_USE_VBN(btree)) {
1752 		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1753 		dat = nilfs_bmap_get_dat(btree);
1754 	}
1755 
1756 	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1757 	if (ret < 0)
1758 		return ret;
1759 
1760 	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1761 	if (ret < 0)
1762 		return ret;
1763 
1764 	*bhp = NULL;
1765 	stats->bs_nblocks++;
1766 	if (nreq != NULL) {
1767 		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1768 		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1769 		if (ret < 0)
1770 			goto err_out_dreq;
1771 
1772 		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1773 		if (ret < 0)
1774 			goto err_out_nreq;
1775 
1776 		*bhp = bh;
1777 		stats->bs_nblocks++;
1778 	}
1779 
1780 	/* success */
1781 	return 0;
1782 
1783 	/* error */
1784  err_out_nreq:
1785 	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1786  err_out_dreq:
1787 	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1788 	stats->bs_nblocks = 0;
1789 	return ret;
1790 
1791 }
1792 
1793 static void
nilfs_btree_commit_convert_and_insert(struct nilfs_bmap * btree,__u64 key,__u64 ptr,const __u64 * keys,const __u64 * ptrs,int n,union nilfs_bmap_ptr_req * dreq,union nilfs_bmap_ptr_req * nreq,struct buffer_head * bh)1794 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1795 				      __u64 key, __u64 ptr,
1796 				      const __u64 *keys, const __u64 *ptrs,
1797 				      int n,
1798 				      union nilfs_bmap_ptr_req *dreq,
1799 				      union nilfs_bmap_ptr_req *nreq,
1800 				      struct buffer_head *bh)
1801 {
1802 	struct nilfs_btree_node *node;
1803 	struct inode *dat;
1804 	__u64 tmpptr;
1805 	int ncblk;
1806 
1807 	/* free resources */
1808 	if (btree->b_ops->bop_clear != NULL)
1809 		btree->b_ops->bop_clear(btree);
1810 
1811 	/* ptr must be a pointer to a buffer head. */
1812 	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1813 
1814 	/* convert and insert */
1815 	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1816 	__nilfs_btree_init(btree);
1817 	if (nreq != NULL) {
1818 		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1819 		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1820 
1821 		/* create child node at level 1 */
1822 		node = (struct nilfs_btree_node *)bh->b_data;
1823 		ncblk = nilfs_btree_nchildren_per_block(btree);
1824 		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1825 		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1826 		if (!buffer_dirty(bh))
1827 			mark_buffer_dirty(bh);
1828 		if (!nilfs_bmap_dirty(btree))
1829 			nilfs_bmap_set_dirty(btree);
1830 
1831 		brelse(bh);
1832 
1833 		/* create root node at level 2 */
1834 		node = nilfs_btree_get_root(btree);
1835 		tmpptr = nreq->bpr_ptr;
1836 		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1837 				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1838 				      &keys[0], &tmpptr);
1839 	} else {
1840 		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1841 
1842 		/* create root node at level 1 */
1843 		node = nilfs_btree_get_root(btree);
1844 		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1845 				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1846 				      keys, ptrs);
1847 		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1848 					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1849 		if (!nilfs_bmap_dirty(btree))
1850 			nilfs_bmap_set_dirty(btree);
1851 	}
1852 
1853 	if (NILFS_BMAP_USE_VBN(btree))
1854 		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1855 }
1856 
1857 /**
1858  * nilfs_btree_convert_and_insert -
1859  * @bmap:
1860  * @key:
1861  * @ptr:
1862  * @keys:
1863  * @ptrs:
1864  * @n:
1865  */
nilfs_btree_convert_and_insert(struct nilfs_bmap * btree,__u64 key,__u64 ptr,const __u64 * keys,const __u64 * ptrs,int n)1866 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1867 				   __u64 key, __u64 ptr,
1868 				   const __u64 *keys, const __u64 *ptrs, int n)
1869 {
1870 	struct buffer_head *bh = NULL;
1871 	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1872 	struct nilfs_bmap_stats stats;
1873 	int ret;
1874 
1875 	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1876 		di = &dreq;
1877 		ni = NULL;
1878 	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1879 			   nilfs_btree_node_size(btree))) {
1880 		di = &dreq;
1881 		ni = &nreq;
1882 	} else {
1883 		di = NULL;
1884 		ni = NULL;
1885 		BUG();
1886 	}
1887 
1888 	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1889 						     &stats);
1890 	if (ret < 0)
1891 		return ret;
1892 	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1893 					      di, ni, bh);
1894 	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1895 	return 0;
1896 }
1897 
nilfs_btree_propagate_p(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct buffer_head * bh)1898 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1899 				   struct nilfs_btree_path *path,
1900 				   int level,
1901 				   struct buffer_head *bh)
1902 {
1903 	while ((++level < nilfs_btree_height(btree) - 1) &&
1904 	       !buffer_dirty(path[level].bp_bh))
1905 		mark_buffer_dirty(path[level].bp_bh);
1906 
1907 	return 0;
1908 }
1909 
nilfs_btree_prepare_update_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct inode * dat)1910 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1911 					struct nilfs_btree_path *path,
1912 					int level, struct inode *dat)
1913 {
1914 	struct nilfs_btree_node *parent;
1915 	int ncmax, ret;
1916 
1917 	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1918 	path[level].bp_oldreq.bpr_ptr =
1919 		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1920 					 ncmax);
1921 	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1922 	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1923 				       &path[level].bp_newreq.bpr_req);
1924 	if (ret < 0)
1925 		return ret;
1926 
1927 	if (buffer_nilfs_node(path[level].bp_bh)) {
1928 		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1929 		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1930 		path[level].bp_ctxt.bh = path[level].bp_bh;
1931 		ret = nilfs_btnode_prepare_change_key(
1932 			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1933 			&path[level].bp_ctxt);
1934 		if (ret < 0) {
1935 			nilfs_dat_abort_update(dat,
1936 					       &path[level].bp_oldreq.bpr_req,
1937 					       &path[level].bp_newreq.bpr_req);
1938 			return ret;
1939 		}
1940 	}
1941 
1942 	return 0;
1943 }
1944 
nilfs_btree_commit_update_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct inode * dat)1945 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1946 					struct nilfs_btree_path *path,
1947 					int level, struct inode *dat)
1948 {
1949 	struct nilfs_btree_node *parent;
1950 	int ncmax;
1951 
1952 	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1953 				&path[level].bp_newreq.bpr_req,
1954 				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1955 
1956 	if (buffer_nilfs_node(path[level].bp_bh)) {
1957 		nilfs_btnode_commit_change_key(
1958 			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1959 			&path[level].bp_ctxt);
1960 		path[level].bp_bh = path[level].bp_ctxt.bh;
1961 	}
1962 	set_buffer_nilfs_volatile(path[level].bp_bh);
1963 
1964 	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1965 	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1966 				 path[level].bp_newreq.bpr_ptr, ncmax);
1967 }
1968 
nilfs_btree_abort_update_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct inode * dat)1969 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1970 				       struct nilfs_btree_path *path,
1971 				       int level, struct inode *dat)
1972 {
1973 	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1974 			       &path[level].bp_newreq.bpr_req);
1975 	if (buffer_nilfs_node(path[level].bp_bh))
1976 		nilfs_btnode_abort_change_key(
1977 			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1978 			&path[level].bp_ctxt);
1979 }
1980 
nilfs_btree_prepare_propagate_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int minlevel,int * maxlevelp,struct inode * dat)1981 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1982 					   struct nilfs_btree_path *path,
1983 					   int minlevel, int *maxlevelp,
1984 					   struct inode *dat)
1985 {
1986 	int level, ret;
1987 
1988 	level = minlevel;
1989 	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1990 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1991 		if (ret < 0)
1992 			return ret;
1993 	}
1994 	while ((++level < nilfs_btree_height(btree) - 1) &&
1995 	       !buffer_dirty(path[level].bp_bh)) {
1996 
1997 		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1998 		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1999 		if (ret < 0)
2000 			goto out;
2001 	}
2002 
2003 	/* success */
2004 	*maxlevelp = level - 1;
2005 	return 0;
2006 
2007 	/* error */
2008  out:
2009 	while (--level > minlevel)
2010 		nilfs_btree_abort_update_v(btree, path, level, dat);
2011 	if (!buffer_nilfs_volatile(path[level].bp_bh))
2012 		nilfs_btree_abort_update_v(btree, path, level, dat);
2013 	return ret;
2014 }
2015 
nilfs_btree_commit_propagate_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int minlevel,int maxlevel,struct buffer_head * bh,struct inode * dat)2016 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2017 					   struct nilfs_btree_path *path,
2018 					   int minlevel, int maxlevel,
2019 					   struct buffer_head *bh,
2020 					   struct inode *dat)
2021 {
2022 	int level;
2023 
2024 	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2025 		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2026 
2027 	for (level = minlevel + 1; level <= maxlevel; level++)
2028 		nilfs_btree_commit_update_v(btree, path, level, dat);
2029 }
2030 
nilfs_btree_propagate_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct buffer_head * bh)2031 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2032 				   struct nilfs_btree_path *path,
2033 				   int level, struct buffer_head *bh)
2034 {
2035 	int maxlevel = 0, ret;
2036 	struct nilfs_btree_node *parent;
2037 	struct inode *dat = nilfs_bmap_get_dat(btree);
2038 	__u64 ptr;
2039 	int ncmax;
2040 
2041 	get_bh(bh);
2042 	path[level].bp_bh = bh;
2043 	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2044 					      dat);
2045 	if (ret < 0)
2046 		goto out;
2047 
2048 	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2049 		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2050 		ptr = nilfs_btree_node_get_ptr(parent,
2051 					       path[level + 1].bp_index,
2052 					       ncmax);
2053 		ret = nilfs_dat_mark_dirty(dat, ptr);
2054 		if (ret < 0)
2055 			goto out;
2056 	}
2057 
2058 	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2059 
2060  out:
2061 	brelse(path[level].bp_bh);
2062 	path[level].bp_bh = NULL;
2063 	return ret;
2064 }
2065 
nilfs_btree_propagate(struct nilfs_bmap * btree,struct buffer_head * bh)2066 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2067 				 struct buffer_head *bh)
2068 {
2069 	struct nilfs_btree_path *path;
2070 	struct nilfs_btree_node *node;
2071 	__u64 key;
2072 	int level, ret;
2073 
2074 	WARN_ON(!buffer_dirty(bh));
2075 
2076 	path = nilfs_btree_alloc_path();
2077 	if (path == NULL)
2078 		return -ENOMEM;
2079 
2080 	if (buffer_nilfs_node(bh)) {
2081 		node = (struct nilfs_btree_node *)bh->b_data;
2082 		key = nilfs_btree_node_get_key(node, 0);
2083 		level = nilfs_btree_node_get_level(node);
2084 	} else {
2085 		key = nilfs_bmap_data_get_key(btree, bh);
2086 		level = NILFS_BTREE_LEVEL_DATA;
2087 	}
2088 
2089 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2090 	if (ret < 0) {
2091 		if (unlikely(ret == -ENOENT))
2092 			nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
2093 				  "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2094 				  btree->b_inode->i_ino,
2095 				  (unsigned long long)key, level);
2096 		goto out;
2097 	}
2098 
2099 	ret = NILFS_BMAP_USE_VBN(btree) ?
2100 		nilfs_btree_propagate_v(btree, path, level, bh) :
2101 		nilfs_btree_propagate_p(btree, path, level, bh);
2102 
2103  out:
2104 	nilfs_btree_free_path(path);
2105 
2106 	return ret;
2107 }
2108 
nilfs_btree_propagate_gc(struct nilfs_bmap * btree,struct buffer_head * bh)2109 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2110 				    struct buffer_head *bh)
2111 {
2112 	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2113 }
2114 
nilfs_btree_add_dirty_buffer(struct nilfs_bmap * btree,struct list_head * lists,struct buffer_head * bh)2115 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2116 					 struct list_head *lists,
2117 					 struct buffer_head *bh)
2118 {
2119 	struct list_head *head;
2120 	struct buffer_head *cbh;
2121 	struct nilfs_btree_node *node, *cnode;
2122 	__u64 key, ckey;
2123 	int level;
2124 
2125 	get_bh(bh);
2126 	node = (struct nilfs_btree_node *)bh->b_data;
2127 	key = nilfs_btree_node_get_key(node, 0);
2128 	level = nilfs_btree_node_get_level(node);
2129 	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2130 	    level >= NILFS_BTREE_LEVEL_MAX) {
2131 		dump_stack();
2132 		nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
2133 			  "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2134 			  level, (unsigned long long)key,
2135 			  btree->b_inode->i_ino,
2136 			  (unsigned long long)bh->b_blocknr);
2137 		return;
2138 	}
2139 
2140 	list_for_each(head, &lists[level]) {
2141 		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2142 		cnode = (struct nilfs_btree_node *)cbh->b_data;
2143 		ckey = nilfs_btree_node_get_key(cnode, 0);
2144 		if (key < ckey)
2145 			break;
2146 	}
2147 	list_add_tail(&bh->b_assoc_buffers, head);
2148 }
2149 
nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap * btree,struct list_head * listp)2150 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2151 					     struct list_head *listp)
2152 {
2153 	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2154 	struct address_space *btcache = btnc_inode->i_mapping;
2155 	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2156 	struct pagevec pvec;
2157 	struct buffer_head *bh, *head;
2158 	pgoff_t index = 0;
2159 	int level, i;
2160 
2161 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2162 	     level < NILFS_BTREE_LEVEL_MAX;
2163 	     level++)
2164 		INIT_LIST_HEAD(&lists[level]);
2165 
2166 	pagevec_init(&pvec);
2167 
2168 	while (pagevec_lookup_tag(&pvec, btcache, &index,
2169 					PAGECACHE_TAG_DIRTY)) {
2170 		for (i = 0; i < pagevec_count(&pvec); i++) {
2171 			bh = head = page_buffers(pvec.pages[i]);
2172 			do {
2173 				if (buffer_dirty(bh))
2174 					nilfs_btree_add_dirty_buffer(btree,
2175 								     lists, bh);
2176 			} while ((bh = bh->b_this_page) != head);
2177 		}
2178 		pagevec_release(&pvec);
2179 		cond_resched();
2180 	}
2181 
2182 	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2183 	     level < NILFS_BTREE_LEVEL_MAX;
2184 	     level++)
2185 		list_splice_tail(&lists[level], listp);
2186 }
2187 
nilfs_btree_assign_p(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct buffer_head ** bh,sector_t blocknr,union nilfs_binfo * binfo)2188 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2189 				struct nilfs_btree_path *path,
2190 				int level,
2191 				struct buffer_head **bh,
2192 				sector_t blocknr,
2193 				union nilfs_binfo *binfo)
2194 {
2195 	struct nilfs_btree_node *parent;
2196 	__u64 key;
2197 	__u64 ptr;
2198 	int ncmax, ret;
2199 
2200 	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2201 	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2202 				       ncmax);
2203 	if (buffer_nilfs_node(*bh)) {
2204 		path[level].bp_ctxt.oldkey = ptr;
2205 		path[level].bp_ctxt.newkey = blocknr;
2206 		path[level].bp_ctxt.bh = *bh;
2207 		ret = nilfs_btnode_prepare_change_key(
2208 			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2209 			&path[level].bp_ctxt);
2210 		if (ret < 0)
2211 			return ret;
2212 		nilfs_btnode_commit_change_key(
2213 			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2214 			&path[level].bp_ctxt);
2215 		*bh = path[level].bp_ctxt.bh;
2216 	}
2217 
2218 	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2219 				 ncmax);
2220 
2221 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2222 	/* on-disk format */
2223 	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2224 	binfo->bi_dat.bi_level = level;
2225 
2226 	return 0;
2227 }
2228 
nilfs_btree_assign_v(struct nilfs_bmap * btree,struct nilfs_btree_path * path,int level,struct buffer_head ** bh,sector_t blocknr,union nilfs_binfo * binfo)2229 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2230 				struct nilfs_btree_path *path,
2231 				int level,
2232 				struct buffer_head **bh,
2233 				sector_t blocknr,
2234 				union nilfs_binfo *binfo)
2235 {
2236 	struct nilfs_btree_node *parent;
2237 	struct inode *dat = nilfs_bmap_get_dat(btree);
2238 	__u64 key;
2239 	__u64 ptr;
2240 	union nilfs_bmap_ptr_req req;
2241 	int ncmax, ret;
2242 
2243 	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2244 	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2245 				       ncmax);
2246 	req.bpr_ptr = ptr;
2247 	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2248 	if (ret < 0)
2249 		return ret;
2250 	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2251 
2252 	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2253 	/* on-disk format */
2254 	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2255 	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2256 
2257 	return 0;
2258 }
2259 
nilfs_btree_assign(struct nilfs_bmap * btree,struct buffer_head ** bh,sector_t blocknr,union nilfs_binfo * binfo)2260 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2261 			      struct buffer_head **bh,
2262 			      sector_t blocknr,
2263 			      union nilfs_binfo *binfo)
2264 {
2265 	struct nilfs_btree_path *path;
2266 	struct nilfs_btree_node *node;
2267 	__u64 key;
2268 	int level, ret;
2269 
2270 	path = nilfs_btree_alloc_path();
2271 	if (path == NULL)
2272 		return -ENOMEM;
2273 
2274 	if (buffer_nilfs_node(*bh)) {
2275 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2276 		key = nilfs_btree_node_get_key(node, 0);
2277 		level = nilfs_btree_node_get_level(node);
2278 	} else {
2279 		key = nilfs_bmap_data_get_key(btree, *bh);
2280 		level = NILFS_BTREE_LEVEL_DATA;
2281 	}
2282 
2283 	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2284 	if (ret < 0) {
2285 		WARN_ON(ret == -ENOENT);
2286 		goto out;
2287 	}
2288 
2289 	ret = NILFS_BMAP_USE_VBN(btree) ?
2290 		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2291 		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2292 
2293  out:
2294 	nilfs_btree_free_path(path);
2295 
2296 	return ret;
2297 }
2298 
nilfs_btree_assign_gc(struct nilfs_bmap * btree,struct buffer_head ** bh,sector_t blocknr,union nilfs_binfo * binfo)2299 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2300 				 struct buffer_head **bh,
2301 				 sector_t blocknr,
2302 				 union nilfs_binfo *binfo)
2303 {
2304 	struct nilfs_btree_node *node;
2305 	__u64 key;
2306 	int ret;
2307 
2308 	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2309 			     blocknr);
2310 	if (ret < 0)
2311 		return ret;
2312 
2313 	if (buffer_nilfs_node(*bh)) {
2314 		node = (struct nilfs_btree_node *)(*bh)->b_data;
2315 		key = nilfs_btree_node_get_key(node, 0);
2316 	} else
2317 		key = nilfs_bmap_data_get_key(btree, *bh);
2318 
2319 	/* on-disk format */
2320 	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2321 	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2322 
2323 	return 0;
2324 }
2325 
nilfs_btree_mark(struct nilfs_bmap * btree,__u64 key,int level)2326 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2327 {
2328 	struct buffer_head *bh;
2329 	struct nilfs_btree_path *path;
2330 	__u64 ptr;
2331 	int ret;
2332 
2333 	path = nilfs_btree_alloc_path();
2334 	if (path == NULL)
2335 		return -ENOMEM;
2336 
2337 	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2338 	if (ret < 0) {
2339 		WARN_ON(ret == -ENOENT);
2340 		goto out;
2341 	}
2342 	ret = nilfs_btree_get_block(btree, ptr, &bh);
2343 	if (ret < 0) {
2344 		WARN_ON(ret == -ENOENT);
2345 		goto out;
2346 	}
2347 
2348 	if (!buffer_dirty(bh))
2349 		mark_buffer_dirty(bh);
2350 	brelse(bh);
2351 	if (!nilfs_bmap_dirty(btree))
2352 		nilfs_bmap_set_dirty(btree);
2353 
2354  out:
2355 	nilfs_btree_free_path(path);
2356 	return ret;
2357 }
2358 
2359 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2360 	.bop_lookup		=	nilfs_btree_lookup,
2361 	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2362 	.bop_insert		=	nilfs_btree_insert,
2363 	.bop_delete		=	nilfs_btree_delete,
2364 	.bop_clear		=	NULL,
2365 
2366 	.bop_propagate		=	nilfs_btree_propagate,
2367 
2368 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2369 
2370 	.bop_assign		=	nilfs_btree_assign,
2371 	.bop_mark		=	nilfs_btree_mark,
2372 
2373 	.bop_seek_key		=	nilfs_btree_seek_key,
2374 	.bop_last_key		=	nilfs_btree_last_key,
2375 
2376 	.bop_check_insert	=	NULL,
2377 	.bop_check_delete	=	nilfs_btree_check_delete,
2378 	.bop_gather_data	=	nilfs_btree_gather_data,
2379 };
2380 
2381 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2382 	.bop_lookup		=	NULL,
2383 	.bop_lookup_contig	=	NULL,
2384 	.bop_insert		=	NULL,
2385 	.bop_delete		=	NULL,
2386 	.bop_clear		=	NULL,
2387 
2388 	.bop_propagate		=	nilfs_btree_propagate_gc,
2389 
2390 	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2391 
2392 	.bop_assign		=	nilfs_btree_assign_gc,
2393 	.bop_mark		=	NULL,
2394 
2395 	.bop_seek_key		=	NULL,
2396 	.bop_last_key		=	NULL,
2397 
2398 	.bop_check_insert	=	NULL,
2399 	.bop_check_delete	=	NULL,
2400 	.bop_gather_data	=	NULL,
2401 };
2402 
__nilfs_btree_init(struct nilfs_bmap * bmap)2403 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2404 {
2405 	bmap->b_ops = &nilfs_btree_ops;
2406 	bmap->b_nchildren_per_block =
2407 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2408 }
2409 
nilfs_btree_init(struct nilfs_bmap * bmap)2410 int nilfs_btree_init(struct nilfs_bmap *bmap)
2411 {
2412 	int ret = 0;
2413 
2414 	__nilfs_btree_init(bmap);
2415 
2416 	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2417 		ret = -EIO;
2418 	else
2419 		ret = nilfs_attach_btree_node_cache(
2420 			&NILFS_BMAP_I(bmap)->vfs_inode);
2421 
2422 	return ret;
2423 }
2424 
nilfs_btree_init_gc(struct nilfs_bmap * bmap)2425 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2426 {
2427 	bmap->b_ops = &nilfs_btree_ops_gc;
2428 	bmap->b_nchildren_per_block =
2429 		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2430 }
2431