1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_inode_fork.h"
20 #include "xfs_da_format.h"
21 #include "xfs_da_btree.h"
22 #include "xfs_dir2.h"
23 #include "xfs_dir2_priv.h"
24 #include "xfs_attr_leaf.h"
25 #include "scrub/xfs_scrub.h"
26 #include "scrub/scrub.h"
27 #include "scrub/common.h"
28 #include "scrub/trace.h"
29 #include "scrub/dabtree.h"
30
31 /* Directory/Attribute Btree */
32
33 /*
34 * Check for da btree operation errors. See the section about handling
35 * operational errors in common.c.
36 */
37 bool
xchk_da_process_error(struct xchk_da_btree * ds,int level,int * error)38 xchk_da_process_error(
39 struct xchk_da_btree *ds,
40 int level,
41 int *error)
42 {
43 struct xfs_scrub *sc = ds->sc;
44
45 if (*error == 0)
46 return true;
47
48 switch (*error) {
49 case -EDEADLOCK:
50 /* Used to restart an op with deadlock avoidance. */
51 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
52 break;
53 case -EFSBADCRC:
54 case -EFSCORRUPTED:
55 /* Note the badness but don't abort. */
56 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
57 *error = 0;
58 /* fall through */
59 default:
60 trace_xchk_file_op_error(sc, ds->dargs.whichfork,
61 xfs_dir2_da_to_db(ds->dargs.geo,
62 ds->state->path.blk[level].blkno),
63 *error, __return_address);
64 break;
65 }
66 return false;
67 }
68
69 /*
70 * Check for da btree corruption. See the section about handling
71 * operational errors in common.c.
72 */
73 void
xchk_da_set_corrupt(struct xchk_da_btree * ds,int level)74 xchk_da_set_corrupt(
75 struct xchk_da_btree *ds,
76 int level)
77 {
78 struct xfs_scrub *sc = ds->sc;
79
80 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
81
82 trace_xchk_fblock_error(sc, ds->dargs.whichfork,
83 xfs_dir2_da_to_db(ds->dargs.geo,
84 ds->state->path.blk[level].blkno),
85 __return_address);
86 }
87
88 /* Find an entry at a certain level in a da btree. */
89 STATIC void *
xchk_da_btree_entry(struct xchk_da_btree * ds,int level,int rec)90 xchk_da_btree_entry(
91 struct xchk_da_btree *ds,
92 int level,
93 int rec)
94 {
95 char *ents;
96 struct xfs_da_state_blk *blk;
97 void *baddr;
98
99 /* Dispatch the entry finding function. */
100 blk = &ds->state->path.blk[level];
101 baddr = blk->bp->b_addr;
102 switch (blk->magic) {
103 case XFS_ATTR_LEAF_MAGIC:
104 case XFS_ATTR3_LEAF_MAGIC:
105 ents = (char *)xfs_attr3_leaf_entryp(baddr);
106 return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
107 case XFS_DIR2_LEAFN_MAGIC:
108 case XFS_DIR3_LEAFN_MAGIC:
109 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
110 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
111 case XFS_DIR2_LEAF1_MAGIC:
112 case XFS_DIR3_LEAF1_MAGIC:
113 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
114 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
115 case XFS_DA_NODE_MAGIC:
116 case XFS_DA3_NODE_MAGIC:
117 ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
118 return ents + (rec * sizeof(struct xfs_da_node_entry));
119 }
120
121 return NULL;
122 }
123
124 /* Scrub a da btree hash (key). */
125 int
xchk_da_btree_hash(struct xchk_da_btree * ds,int level,__be32 * hashp)126 xchk_da_btree_hash(
127 struct xchk_da_btree *ds,
128 int level,
129 __be32 *hashp)
130 {
131 struct xfs_da_state_blk *blks;
132 struct xfs_da_node_entry *entry;
133 xfs_dahash_t hash;
134 xfs_dahash_t parent_hash;
135
136 /* Is this hash in order? */
137 hash = be32_to_cpu(*hashp);
138 if (hash < ds->hashes[level])
139 xchk_da_set_corrupt(ds, level);
140 ds->hashes[level] = hash;
141
142 if (level == 0)
143 return 0;
144
145 /* Is this hash no larger than the parent hash? */
146 blks = ds->state->path.blk;
147 entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
148 parent_hash = be32_to_cpu(entry->hashval);
149 if (parent_hash < hash)
150 xchk_da_set_corrupt(ds, level);
151
152 return 0;
153 }
154
155 /*
156 * Check a da btree pointer. Returns true if it's ok to use this
157 * pointer.
158 */
159 STATIC bool
xchk_da_btree_ptr_ok(struct xchk_da_btree * ds,int level,xfs_dablk_t blkno)160 xchk_da_btree_ptr_ok(
161 struct xchk_da_btree *ds,
162 int level,
163 xfs_dablk_t blkno)
164 {
165 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
166 xchk_da_set_corrupt(ds, level);
167 return false;
168 }
169
170 return true;
171 }
172
173 /*
174 * The da btree scrubber can handle leaf1 blocks as a degenerate
175 * form of leafn blocks. Since the regular da code doesn't handle
176 * leaf1, we must multiplex the verifiers.
177 */
178 static void
xchk_da_btree_read_verify(struct xfs_buf * bp)179 xchk_da_btree_read_verify(
180 struct xfs_buf *bp)
181 {
182 struct xfs_da_blkinfo *info = bp->b_addr;
183
184 switch (be16_to_cpu(info->magic)) {
185 case XFS_DIR2_LEAF1_MAGIC:
186 case XFS_DIR3_LEAF1_MAGIC:
187 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
188 bp->b_ops->verify_read(bp);
189 return;
190 default:
191 /*
192 * xfs_da3_node_buf_ops already know how to handle
193 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
194 */
195 bp->b_ops = &xfs_da3_node_buf_ops;
196 bp->b_ops->verify_read(bp);
197 return;
198 }
199 }
200 static void
xchk_da_btree_write_verify(struct xfs_buf * bp)201 xchk_da_btree_write_verify(
202 struct xfs_buf *bp)
203 {
204 struct xfs_da_blkinfo *info = bp->b_addr;
205
206 switch (be16_to_cpu(info->magic)) {
207 case XFS_DIR2_LEAF1_MAGIC:
208 case XFS_DIR3_LEAF1_MAGIC:
209 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
210 bp->b_ops->verify_write(bp);
211 return;
212 default:
213 /*
214 * xfs_da3_node_buf_ops already know how to handle
215 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
216 */
217 bp->b_ops = &xfs_da3_node_buf_ops;
218 bp->b_ops->verify_write(bp);
219 return;
220 }
221 }
222 static void *
xchk_da_btree_verify(struct xfs_buf * bp)223 xchk_da_btree_verify(
224 struct xfs_buf *bp)
225 {
226 struct xfs_da_blkinfo *info = bp->b_addr;
227
228 switch (be16_to_cpu(info->magic)) {
229 case XFS_DIR2_LEAF1_MAGIC:
230 case XFS_DIR3_LEAF1_MAGIC:
231 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
232 return bp->b_ops->verify_struct(bp);
233 default:
234 bp->b_ops = &xfs_da3_node_buf_ops;
235 return bp->b_ops->verify_struct(bp);
236 }
237 }
238
239 static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
240 .name = "xchk_da_btree",
241 .verify_read = xchk_da_btree_read_verify,
242 .verify_write = xchk_da_btree_write_verify,
243 .verify_struct = xchk_da_btree_verify,
244 };
245
246 /* Check a block's sibling. */
247 STATIC int
xchk_da_btree_block_check_sibling(struct xchk_da_btree * ds,int level,int direction,xfs_dablk_t sibling)248 xchk_da_btree_block_check_sibling(
249 struct xchk_da_btree *ds,
250 int level,
251 int direction,
252 xfs_dablk_t sibling)
253 {
254 int retval;
255 int error;
256
257 memcpy(&ds->state->altpath, &ds->state->path,
258 sizeof(ds->state->altpath));
259
260 /*
261 * If the pointer is null, we shouldn't be able to move the upper
262 * level pointer anywhere.
263 */
264 if (sibling == 0) {
265 error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
266 direction, false, &retval);
267 if (error == 0 && retval == 0)
268 xchk_da_set_corrupt(ds, level);
269 error = 0;
270 goto out;
271 }
272
273 /* Move the alternate cursor one block in the direction given. */
274 error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
275 direction, false, &retval);
276 if (!xchk_da_process_error(ds, level, &error))
277 return error;
278 if (retval) {
279 xchk_da_set_corrupt(ds, level);
280 return error;
281 }
282 if (ds->state->altpath.blk[level].bp)
283 xchk_buffer_recheck(ds->sc,
284 ds->state->altpath.blk[level].bp);
285
286 /* Compare upper level pointer to sibling pointer. */
287 if (ds->state->altpath.blk[level].blkno != sibling)
288 xchk_da_set_corrupt(ds, level);
289 xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
290 out:
291 return error;
292 }
293
294 /* Check a block's sibling pointers. */
295 STATIC int
xchk_da_btree_block_check_siblings(struct xchk_da_btree * ds,int level,struct xfs_da_blkinfo * hdr)296 xchk_da_btree_block_check_siblings(
297 struct xchk_da_btree *ds,
298 int level,
299 struct xfs_da_blkinfo *hdr)
300 {
301 xfs_dablk_t forw;
302 xfs_dablk_t back;
303 int error = 0;
304
305 forw = be32_to_cpu(hdr->forw);
306 back = be32_to_cpu(hdr->back);
307
308 /* Top level blocks should not have sibling pointers. */
309 if (level == 0) {
310 if (forw != 0 || back != 0)
311 xchk_da_set_corrupt(ds, level);
312 return 0;
313 }
314
315 /*
316 * Check back (left) and forw (right) pointers. These functions
317 * absorb error codes for us.
318 */
319 error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
320 if (error)
321 goto out;
322 error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
323
324 out:
325 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
326 return error;
327 }
328
329 /* Load a dir/attribute block from a btree. */
330 STATIC int
xchk_da_btree_block(struct xchk_da_btree * ds,int level,xfs_dablk_t blkno)331 xchk_da_btree_block(
332 struct xchk_da_btree *ds,
333 int level,
334 xfs_dablk_t blkno)
335 {
336 struct xfs_da_state_blk *blk;
337 struct xfs_da_intnode *node;
338 struct xfs_da_node_entry *btree;
339 struct xfs_da3_blkinfo *hdr3;
340 struct xfs_da_args *dargs = &ds->dargs;
341 struct xfs_inode *ip = ds->dargs.dp;
342 xfs_ino_t owner;
343 int *pmaxrecs;
344 struct xfs_da3_icnode_hdr nodehdr;
345 int error = 0;
346
347 blk = &ds->state->path.blk[level];
348 ds->state->path.active = level + 1;
349
350 /* Release old block. */
351 if (blk->bp) {
352 xfs_trans_brelse(dargs->trans, blk->bp);
353 blk->bp = NULL;
354 }
355
356 /* Check the pointer. */
357 blk->blkno = blkno;
358 if (!xchk_da_btree_ptr_ok(ds, level, blkno))
359 goto out_nobuf;
360
361 /* Read the buffer. */
362 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
363 &blk->bp, dargs->whichfork,
364 &xchk_da_btree_buf_ops);
365 if (!xchk_da_process_error(ds, level, &error))
366 goto out_nobuf;
367 if (blk->bp)
368 xchk_buffer_recheck(ds->sc, blk->bp);
369
370 /*
371 * We didn't find a dir btree root block, which means that
372 * there's no LEAF1/LEAFN tree (at least not where it's supposed
373 * to be), so jump out now.
374 */
375 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
376 blk->bp == NULL)
377 goto out_nobuf;
378
379 /* It's /not/ ok for attr trees not to have a da btree. */
380 if (blk->bp == NULL) {
381 xchk_da_set_corrupt(ds, level);
382 goto out_nobuf;
383 }
384
385 hdr3 = blk->bp->b_addr;
386 blk->magic = be16_to_cpu(hdr3->hdr.magic);
387 pmaxrecs = &ds->maxrecs[level];
388
389 /* We only started zeroing the header on v5 filesystems. */
390 if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
391 xchk_da_set_corrupt(ds, level);
392
393 /* Check the owner. */
394 if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
395 owner = be64_to_cpu(hdr3->owner);
396 if (owner != ip->i_ino)
397 xchk_da_set_corrupt(ds, level);
398 }
399
400 /* Check the siblings. */
401 error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
402 if (error)
403 goto out;
404
405 /* Interpret the buffer. */
406 switch (blk->magic) {
407 case XFS_ATTR_LEAF_MAGIC:
408 case XFS_ATTR3_LEAF_MAGIC:
409 xfs_trans_buf_set_type(dargs->trans, blk->bp,
410 XFS_BLFT_ATTR_LEAF_BUF);
411 blk->magic = XFS_ATTR_LEAF_MAGIC;
412 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
413 if (ds->tree_level != 0)
414 xchk_da_set_corrupt(ds, level);
415 break;
416 case XFS_DIR2_LEAFN_MAGIC:
417 case XFS_DIR3_LEAFN_MAGIC:
418 xfs_trans_buf_set_type(dargs->trans, blk->bp,
419 XFS_BLFT_DIR_LEAFN_BUF);
420 blk->magic = XFS_DIR2_LEAFN_MAGIC;
421 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
422 if (ds->tree_level != 0)
423 xchk_da_set_corrupt(ds, level);
424 break;
425 case XFS_DIR2_LEAF1_MAGIC:
426 case XFS_DIR3_LEAF1_MAGIC:
427 xfs_trans_buf_set_type(dargs->trans, blk->bp,
428 XFS_BLFT_DIR_LEAF1_BUF);
429 blk->magic = XFS_DIR2_LEAF1_MAGIC;
430 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
431 if (ds->tree_level != 0)
432 xchk_da_set_corrupt(ds, level);
433 break;
434 case XFS_DA_NODE_MAGIC:
435 case XFS_DA3_NODE_MAGIC:
436 xfs_trans_buf_set_type(dargs->trans, blk->bp,
437 XFS_BLFT_DA_NODE_BUF);
438 blk->magic = XFS_DA_NODE_MAGIC;
439 node = blk->bp->b_addr;
440 ip->d_ops->node_hdr_from_disk(&nodehdr, node);
441 btree = ip->d_ops->node_tree_p(node);
442 *pmaxrecs = nodehdr.count;
443 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
444 if (level == 0) {
445 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
446 xchk_da_set_corrupt(ds, level);
447 goto out_freebp;
448 }
449 ds->tree_level = nodehdr.level;
450 } else {
451 if (ds->tree_level != nodehdr.level) {
452 xchk_da_set_corrupt(ds, level);
453 goto out_freebp;
454 }
455 }
456
457 /* XXX: Check hdr3.pad32 once we know how to fix it. */
458 break;
459 default:
460 xchk_da_set_corrupt(ds, level);
461 goto out_freebp;
462 }
463
464 out:
465 return error;
466 out_freebp:
467 xfs_trans_brelse(dargs->trans, blk->bp);
468 blk->bp = NULL;
469 out_nobuf:
470 blk->blkno = 0;
471 return error;
472 }
473
474 /* Visit all nodes and leaves of a da btree. */
475 int
xchk_da_btree(struct xfs_scrub * sc,int whichfork,xchk_da_btree_rec_fn scrub_fn,void * private)476 xchk_da_btree(
477 struct xfs_scrub *sc,
478 int whichfork,
479 xchk_da_btree_rec_fn scrub_fn,
480 void *private)
481 {
482 struct xchk_da_btree ds = {};
483 struct xfs_mount *mp = sc->mp;
484 struct xfs_da_state_blk *blks;
485 struct xfs_da_node_entry *key;
486 void *rec;
487 xfs_dablk_t blkno;
488 int level;
489 int error;
490
491 /* Skip short format data structures; no btree to scan. */
492 if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
493 XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
494 return 0;
495
496 /* Set up initial da state. */
497 ds.dargs.dp = sc->ip;
498 ds.dargs.whichfork = whichfork;
499 ds.dargs.trans = sc->tp;
500 ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
501 ds.state = xfs_da_state_alloc();
502 ds.state->args = &ds.dargs;
503 ds.state->mp = mp;
504 ds.sc = sc;
505 ds.private = private;
506 if (whichfork == XFS_ATTR_FORK) {
507 ds.dargs.geo = mp->m_attr_geo;
508 ds.lowest = 0;
509 ds.highest = 0;
510 } else {
511 ds.dargs.geo = mp->m_dir_geo;
512 ds.lowest = ds.dargs.geo->leafblk;
513 ds.highest = ds.dargs.geo->freeblk;
514 }
515 blkno = ds.lowest;
516 level = 0;
517
518 /* Find the root of the da tree, if present. */
519 blks = ds.state->path.blk;
520 error = xchk_da_btree_block(&ds, level, blkno);
521 if (error)
522 goto out_state;
523 /*
524 * We didn't find a block at ds.lowest, which means that there's
525 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
526 * so jump out now.
527 */
528 if (blks[level].bp == NULL)
529 goto out_state;
530
531 blks[level].index = 0;
532 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
533 /* Handle leaf block. */
534 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
535 /* End of leaf, pop back towards the root. */
536 if (blks[level].index >= ds.maxrecs[level]) {
537 if (level > 0)
538 blks[level - 1].index++;
539 ds.tree_level++;
540 level--;
541 continue;
542 }
543
544 /* Dispatch record scrubbing. */
545 rec = xchk_da_btree_entry(&ds, level,
546 blks[level].index);
547 error = scrub_fn(&ds, level, rec);
548 if (error)
549 break;
550 if (xchk_should_terminate(sc, &error) ||
551 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
552 break;
553
554 blks[level].index++;
555 continue;
556 }
557
558
559 /* End of node, pop back towards the root. */
560 if (blks[level].index >= ds.maxrecs[level]) {
561 if (level > 0)
562 blks[level - 1].index++;
563 ds.tree_level++;
564 level--;
565 continue;
566 }
567
568 /* Hashes in order for scrub? */
569 key = xchk_da_btree_entry(&ds, level, blks[level].index);
570 error = xchk_da_btree_hash(&ds, level, &key->hashval);
571 if (error)
572 goto out;
573
574 /* Drill another level deeper. */
575 blkno = be32_to_cpu(key->before);
576 level++;
577 ds.tree_level--;
578 error = xchk_da_btree_block(&ds, level, blkno);
579 if (error)
580 goto out;
581 if (blks[level].bp == NULL)
582 goto out;
583
584 blks[level].index = 0;
585 }
586
587 out:
588 /* Release all the buffers we're tracking. */
589 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
590 if (blks[level].bp == NULL)
591 continue;
592 xfs_trans_brelse(sc->tp, blks[level].bp);
593 blks[level].bp = NULL;
594 }
595
596 out_state:
597 xfs_da_state_free(ds.state);
598 return error;
599 }
600