1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2007 Oracle.  All rights reserved.
4  */
5 
6 #include <linux/sched.h>
7 #include "ctree.h"
8 #include "disk-io.h"
9 #include "print-tree.h"
10 #include "transaction.h"
11 #include "locking.h"
12 
13 /*
14  * Defrag all the leaves in a given btree.
15  * Read all the leaves and try to get key order to
16  * better reflect disk order
17  */
18 
btrfs_defrag_leaves(struct btrfs_trans_handle * trans,struct btrfs_root * root)19 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
20 			struct btrfs_root *root)
21 {
22 	struct btrfs_path *path = NULL;
23 	struct btrfs_key key;
24 	int ret = 0;
25 	int wret;
26 	int level;
27 	int next_key_ret = 0;
28 	u64 last_ret = 0;
29 
30 	if (root->fs_info->extent_root == root) {
31 		/*
32 		 * there's recursion here right now in the tree locking,
33 		 * we can't defrag the extent root without deadlock
34 		 */
35 		goto out;
36 	}
37 
38 	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
39 		goto out;
40 
41 	path = btrfs_alloc_path();
42 	if (!path)
43 		return -ENOMEM;
44 
45 	level = btrfs_header_level(root->node);
46 
47 	if (level == 0)
48 		goto out;
49 
50 	if (root->defrag_progress.objectid == 0) {
51 		struct extent_buffer *root_node;
52 		u32 nritems;
53 
54 		root_node = btrfs_lock_root_node(root);
55 		btrfs_set_lock_blocking(root_node);
56 		nritems = btrfs_header_nritems(root_node);
57 		root->defrag_max.objectid = 0;
58 		/* from above we know this is not a leaf */
59 		btrfs_node_key_to_cpu(root_node, &root->defrag_max,
60 				      nritems - 1);
61 		btrfs_tree_unlock(root_node);
62 		free_extent_buffer(root_node);
63 		memset(&key, 0, sizeof(key));
64 	} else {
65 		memcpy(&key, &root->defrag_progress, sizeof(key));
66 	}
67 
68 	path->keep_locks = 1;
69 
70 	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
71 	if (ret < 0)
72 		goto out;
73 	if (ret > 0) {
74 		ret = 0;
75 		goto out;
76 	}
77 	btrfs_release_path(path);
78 	/*
79 	 * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
80 	 * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
81 	 * a deadlock (attempting to write lock an already write locked leaf).
82 	 */
83 	path->lowest_level = 1;
84 	wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
85 
86 	if (wret < 0) {
87 		ret = wret;
88 		goto out;
89 	}
90 	if (!path->nodes[1]) {
91 		ret = 0;
92 		goto out;
93 	}
94 	/*
95 	 * The node at level 1 must always be locked when our path has
96 	 * keep_locks set and lowest_level is 1, regardless of the value of
97 	 * path->slots[1].
98 	 */
99 	BUG_ON(path->locks[1] == 0);
100 	ret = btrfs_realloc_node(trans, root,
101 				 path->nodes[1], 0,
102 				 &last_ret,
103 				 &root->defrag_progress);
104 	if (ret) {
105 		WARN_ON(ret == -EAGAIN);
106 		goto out;
107 	}
108 	/*
109 	 * Now that we reallocated the node we can find the next key. Note that
110 	 * btrfs_find_next_key() can release our path and do another search
111 	 * without COWing, this is because even with path->keep_locks = 1,
112 	 * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
113 	 * node when path->slots[node_level - 1] does not point to the last
114 	 * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
115 	 * we search for the next key after reallocating our node.
116 	 */
117 	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
118 	next_key_ret = btrfs_find_next_key(root, path, &key, 1,
119 					   BTRFS_OLDEST_GENERATION);
120 	if (next_key_ret == 0) {
121 		memcpy(&root->defrag_progress, &key, sizeof(key));
122 		ret = -EAGAIN;
123 	}
124 out:
125 	btrfs_free_path(path);
126 	if (ret == -EAGAIN) {
127 		if (root->defrag_max.objectid > root->defrag_progress.objectid)
128 			goto done;
129 		if (root->defrag_max.type > root->defrag_progress.type)
130 			goto done;
131 		if (root->defrag_max.offset > root->defrag_progress.offset)
132 			goto done;
133 		ret = 0;
134 	}
135 done:
136 	if (ret != -EAGAIN) {
137 		memset(&root->defrag_progress, 0,
138 		       sizeof(root->defrag_progress));
139 		root->defrag_trans_start = trans->transid;
140 	}
141 	return ret;
142 }
143