1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4  * Copyright (c) 2013 Red Hat, Inc.
5  * All Rights Reserved.
6  */
7 #ifndef __XFS_DA_FORMAT_H__
8 #define __XFS_DA_FORMAT_H__
9 
10 /*
11  * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
12  *
13  * It is used to manage a doubly linked list of all blocks at the same
14  * level in the Btree, and to identify which type of block this is.
15  */
16 #define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
17 #define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */
18 #define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */
19 #define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */
20 
21 typedef struct xfs_da_blkinfo {
22 	__be32		forw;			/* previous block in list */
23 	__be32		back;			/* following block in list */
24 	__be16		magic;			/* validity check on block */
25 	__be16		pad;			/* unused */
26 } xfs_da_blkinfo_t;
27 
28 /*
29  * CRC enabled directory structure types
30  *
31  * The headers change size for the additional verification information, but
32  * otherwise the tree layouts and contents are unchanged. Hence the da btree
33  * code can use the struct xfs_da_blkinfo for manipulating the tree links and
34  * magic numbers without modification for both v2 and v3 nodes.
35  */
36 #define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */
37 #define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */
38 #define	XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v2 dirlf single blks */
39 #define	XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v2 dirlf multi blks */
40 
41 struct xfs_da3_blkinfo {
42 	/*
43 	 * the node link manipulation code relies on the fact that the first
44 	 * element of this structure is the struct xfs_da_blkinfo so it can
45 	 * ignore the differences in the rest of the structures.
46 	 */
47 	struct xfs_da_blkinfo	hdr;
48 	__be32			crc;	/* CRC of block */
49 	__be64			blkno;	/* first block of the buffer */
50 	__be64			lsn;	/* sequence number of last write */
51 	uuid_t			uuid;	/* filesystem we belong to */
52 	__be64			owner;	/* inode that owns the block */
53 };
54 
55 /*
56  * This is the structure of the root and intermediate nodes in the Btree.
57  * The leaf nodes are defined above.
58  *
59  * Entries are not packed.
60  *
61  * Since we have duplicate keys, use a binary search but always follow
62  * all match in the block, not just the first match found.
63  */
64 #define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */
65 
66 typedef struct xfs_da_node_hdr {
67 	struct xfs_da_blkinfo	info;	/* block type, links, etc. */
68 	__be16			__count; /* count of active entries */
69 	__be16			__level; /* level above leaves (leaf == 0) */
70 } xfs_da_node_hdr_t;
71 
72 struct xfs_da3_node_hdr {
73 	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */
74 	__be16			__count; /* count of active entries */
75 	__be16			__level; /* level above leaves (leaf == 0) */
76 	__be32			__pad32;
77 };
78 
79 #define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc))
80 
81 typedef struct xfs_da_node_entry {
82 	__be32	hashval;	/* hash value for this descendant */
83 	__be32	before;		/* Btree block before this key */
84 } xfs_da_node_entry_t;
85 
86 typedef struct xfs_da_intnode {
87 	struct xfs_da_node_hdr	hdr;
88 	struct xfs_da_node_entry __btree[];
89 } xfs_da_intnode_t;
90 
91 struct xfs_da3_intnode {
92 	struct xfs_da3_node_hdr	hdr;
93 	struct xfs_da_node_entry __btree[];
94 };
95 
96 /*
97  * In-core version of the node header to abstract the differences in the v2 and
98  * v3 disk format of the headers. Callers need to convert to/from disk format as
99  * appropriate.
100  */
101 struct xfs_da3_icnode_hdr {
102 	uint32_t	forw;
103 	uint32_t	back;
104 	uint16_t	magic;
105 	uint16_t	count;
106 	uint16_t	level;
107 };
108 
109 /*
110  * Directory version 2.
111  *
112  * There are 4 possible formats:
113  *  - shortform - embedded into the inode
114  *  - single block - data with embedded leaf at the end
115  *  - multiple data blocks, single leaf+freeindex block
116  *  - data blocks, node and leaf blocks (btree), freeindex blocks
117  *
118  * Note: many node blocks structures and constants are shared with the attr
119  * code and defined in xfs_da_btree.h.
120  */
121 
122 #define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: single block dirs */
123 #define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: multiblock dirs */
124 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F: free index blocks */
125 
126 /*
127  * Directory Version 3 With CRCs.
128  *
129  * The tree formats are the same as for version 2 directories.  The difference
130  * is in the block header and dirent formats. In many cases the v3 structures
131  * use v2 definitions as they are no different and this makes code sharing much
132  * easier.
133  *
134  * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
135  * format is v2 then they switch to the existing v2 code, or the format is v3
136  * they implement the v3 functionality. This means the existing dir2 is a mix of
137  * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
138  * where there is a difference in the formats, otherwise the code is unchanged.
139  *
140  * Where it is possible, the code decides what to do based on the magic numbers
141  * in the blocks rather than feature bits in the superblock. This means the code
142  * is as independent of the external XFS code as possible as doesn't require
143  * passing struct xfs_mount pointers into places where it isn't really
144  * necessary.
145  *
146  * Version 3 includes:
147  *
148  *	- a larger block header for CRC and identification purposes and so the
149  *	offsets of all the structures inside the blocks are different.
150  *
151  *	- new magic numbers to be able to detect the v2/v3 types on the fly.
152  */
153 
154 #define	XFS_DIR3_BLOCK_MAGIC	0x58444233	/* XDB3: single block dirs */
155 #define	XFS_DIR3_DATA_MAGIC	0x58444433	/* XDD3: multiblock dirs */
156 #define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
157 
158 /*
159  * Dirents in version 3 directories have a file type field. Additions to this
160  * list are an on-disk format change, requiring feature bits. Valid values
161  * are as follows:
162  */
163 #define XFS_DIR3_FT_UNKNOWN		0
164 #define XFS_DIR3_FT_REG_FILE		1
165 #define XFS_DIR3_FT_DIR			2
166 #define XFS_DIR3_FT_CHRDEV		3
167 #define XFS_DIR3_FT_BLKDEV		4
168 #define XFS_DIR3_FT_FIFO		5
169 #define XFS_DIR3_FT_SOCK		6
170 #define XFS_DIR3_FT_SYMLINK		7
171 #define XFS_DIR3_FT_WHT			8
172 
173 #define XFS_DIR3_FT_MAX			9
174 
175 /*
176  * Byte offset in data block and shortform entry.
177  */
178 typedef uint16_t	xfs_dir2_data_off_t;
179 #define	NULLDATAOFF	0xffffU
180 typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
181 
182 /*
183  * Offset in data space of a data entry.
184  */
185 typedef uint32_t	xfs_dir2_dataptr_t;
186 #define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
187 #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
188 
189 /*
190  * Byte offset in a directory.
191  */
192 typedef	xfs_off_t	xfs_dir2_off_t;
193 
194 /*
195  * Directory block number (logical dirblk in file)
196  */
197 typedef uint32_t	xfs_dir2_db_t;
198 
199 #define XFS_INO32_SIZE	4
200 #define XFS_INO64_SIZE	8
201 #define XFS_INO64_DIFF	(XFS_INO64_SIZE - XFS_INO32_SIZE)
202 
203 #define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
204 
205 /*
206  * Directory layout when stored internal to an inode.
207  *
208  * Small directories are packed as tightly as possible so as to fit into the
209  * literal area of the inode.  These "shortform" directories consist of a
210  * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
211  * structures.  Due the different inode number storage size and the variable
212  * length name field in the xfs_dir2_sf_entry all these structure are
213  * variable length, and the accessors in this file should be used to iterate
214  * over them.
215  */
216 typedef struct xfs_dir2_sf_hdr {
217 	uint8_t			count;		/* count of entries */
218 	uint8_t			i8count;	/* count of 8-byte inode #s */
219 	uint8_t			parent[8];	/* parent dir inode number */
220 } __packed xfs_dir2_sf_hdr_t;
221 
222 typedef struct xfs_dir2_sf_entry {
223 	__u8			namelen;	/* actual name length */
224 	__u8			offset[2];	/* saved offset */
225 	__u8			name[];		/* name, variable size */
226 	/*
227 	 * A single byte containing the file type field follows the inode
228 	 * number for version 3 directory entries.
229 	 *
230 	 * A 64-bit or 32-bit inode number follows here, at a variable offset
231 	 * after the name.
232 	 */
233 } xfs_dir2_sf_entry_t;
234 
xfs_dir2_sf_hdr_size(int i8count)235 static inline int xfs_dir2_sf_hdr_size(int i8count)
236 {
237 	return sizeof(struct xfs_dir2_sf_hdr) -
238 		(i8count == 0) * XFS_INO64_DIFF;
239 }
240 
241 static inline xfs_dir2_data_aoff_t
xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t * sfep)242 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
243 {
244 	return get_unaligned_be16(sfep->offset);
245 }
246 
247 static inline void
xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t * sfep,xfs_dir2_data_aoff_t off)248 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
249 {
250 	put_unaligned_be16(off, sfep->offset);
251 }
252 
253 static inline struct xfs_dir2_sf_entry *
xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr * hdr)254 xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
255 {
256 	return (struct xfs_dir2_sf_entry *)
257 		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
258 }
259 
260 /*
261  * Data block structures.
262  *
263  * A pure data block looks like the following drawing on disk:
264  *
265  *    +-------------------------------------------------+
266  *    | xfs_dir2_data_hdr_t                             |
267  *    +-------------------------------------------------+
268  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
269  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
270  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
271  *    | ...                                             |
272  *    +-------------------------------------------------+
273  *    | unused space                                    |
274  *    +-------------------------------------------------+
275  *
276  * As all the entries are variable size structures the accessors below should
277  * be used to iterate over them.
278  *
279  * In addition to the pure data blocks for the data and node formats,
280  * most structures are also used for the combined data/freespace "block"
281  * format below.
282  */
283 
284 #define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
285 #define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
286 #define	XFS_DIR2_DATA_FREE_TAG	0xffff
287 #define	XFS_DIR2_DATA_FD_COUNT	3
288 
289 /*
290  * Directory address space divided into sections,
291  * spaces separated by 32GB.
292  */
293 #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
294 #define	XFS_DIR2_DATA_SPACE	0
295 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
296 
297 /*
298  * Describe a free area in the data block.
299  *
300  * The freespace will be formatted as a xfs_dir2_data_unused_t.
301  */
302 typedef struct xfs_dir2_data_free {
303 	__be16			offset;		/* start of freespace */
304 	__be16			length;		/* length of freespace */
305 } xfs_dir2_data_free_t;
306 
307 /*
308  * Header for the data blocks.
309  *
310  * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
311  */
312 typedef struct xfs_dir2_data_hdr {
313 	__be32			magic;		/* XFS_DIR2_DATA_MAGIC or */
314 						/* XFS_DIR2_BLOCK_MAGIC */
315 	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
316 } xfs_dir2_data_hdr_t;
317 
318 /*
319  * define a structure for all the verification fields we are adding to the
320  * directory block structures. This will be used in several structures.
321  * The magic number must be the first entry to align with all the dir2
322  * structures so we determine how to decode them just by the magic number.
323  */
324 struct xfs_dir3_blk_hdr {
325 	__be32			magic;	/* magic number */
326 	__be32			crc;	/* CRC of block */
327 	__be64			blkno;	/* first block of the buffer */
328 	__be64			lsn;	/* sequence number of last write */
329 	uuid_t			uuid;	/* filesystem we belong to */
330 	__be64			owner;	/* inode that owns the block */
331 };
332 
333 struct xfs_dir3_data_hdr {
334 	struct xfs_dir3_blk_hdr	hdr;
335 	xfs_dir2_data_free_t	best_free[XFS_DIR2_DATA_FD_COUNT];
336 	__be32			pad;	/* 64 bit alignment */
337 };
338 
339 #define XFS_DIR3_DATA_CRC_OFF  offsetof(struct xfs_dir3_data_hdr, hdr.crc)
340 
341 /*
342  * Active entry in a data block.
343  *
344  * Aligned to 8 bytes.  After the variable length name field there is a
345  * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
346  *
347  * For dir3 structures, there is file type field between the name and the tag.
348  * This can only be manipulated by helper functions. It is packed hard against
349  * the end of the name so any padding for rounding is between the file type and
350  * the tag.
351  */
352 typedef struct xfs_dir2_data_entry {
353 	__be64			inumber;	/* inode number */
354 	__u8			namelen;	/* name length */
355 	__u8			name[];		/* name bytes, no null */
356      /* __u8			filetype; */	/* type of inode we point to */
357      /*	__be16                  tag; */		/* starting offset of us */
358 } xfs_dir2_data_entry_t;
359 
360 /*
361  * Unused entry in a data block.
362  *
363  * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
364  * using xfs_dir2_data_unused_tag_p.
365  */
366 typedef struct xfs_dir2_data_unused {
367 	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
368 	__be16			length;		/* total free length */
369 						/* variable offset */
370 	__be16			tag;		/* starting offset of us */
371 } xfs_dir2_data_unused_t;
372 
373 /*
374  * Pointer to a freespace's tag word.
375  */
376 static inline __be16 *
xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused * dup)377 xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
378 {
379 	return (__be16 *)((char *)dup +
380 			be16_to_cpu(dup->length) - sizeof(__be16));
381 }
382 
383 /*
384  * Leaf block structures.
385  *
386  * A pure leaf block looks like the following drawing on disk:
387  *
388  *    +---------------------------+
389  *    | xfs_dir2_leaf_hdr_t       |
390  *    +---------------------------+
391  *    | xfs_dir2_leaf_entry_t     |
392  *    | xfs_dir2_leaf_entry_t     |
393  *    | xfs_dir2_leaf_entry_t     |
394  *    | xfs_dir2_leaf_entry_t     |
395  *    | ...                       |
396  *    +---------------------------+
397  *    | xfs_dir2_data_off_t       |
398  *    | xfs_dir2_data_off_t       |
399  *    | xfs_dir2_data_off_t       |
400  *    | ...                       |
401  *    +---------------------------+
402  *    | xfs_dir2_leaf_tail_t      |
403  *    +---------------------------+
404  *
405  * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
406  * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
407  * for directories with separate leaf nodes and free space blocks
408  * (magic = XFS_DIR2_LEAFN_MAGIC).
409  *
410  * As all the entries are variable size structures the accessors below should
411  * be used to iterate over them.
412  */
413 
414 /*
415  * Offset of the leaf/node space.  First block in this space
416  * is the btree root.
417  */
418 #define	XFS_DIR2_LEAF_SPACE	1
419 #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
420 
421 /*
422  * Leaf block header.
423  */
424 typedef struct xfs_dir2_leaf_hdr {
425 	xfs_da_blkinfo_t	info;		/* header for da routines */
426 	__be16			count;		/* count of entries */
427 	__be16			stale;		/* count of stale entries */
428 } xfs_dir2_leaf_hdr_t;
429 
430 struct xfs_dir3_leaf_hdr {
431 	struct xfs_da3_blkinfo	info;		/* header for da routines */
432 	__be16			count;		/* count of entries */
433 	__be16			stale;		/* count of stale entries */
434 	__be32			pad;		/* 64 bit alignment */
435 };
436 
437 struct xfs_dir3_icleaf_hdr {
438 	uint32_t		forw;
439 	uint32_t		back;
440 	uint16_t		magic;
441 	uint16_t		count;
442 	uint16_t		stale;
443 };
444 
445 /*
446  * Leaf block entry.
447  */
448 typedef struct xfs_dir2_leaf_entry {
449 	__be32			hashval;	/* hash value of name */
450 	__be32			address;	/* address of data entry */
451 } xfs_dir2_leaf_entry_t;
452 
453 /*
454  * Leaf block tail.
455  */
456 typedef struct xfs_dir2_leaf_tail {
457 	__be32			bestcount;
458 } xfs_dir2_leaf_tail_t;
459 
460 /*
461  * Leaf block.
462  */
463 typedef struct xfs_dir2_leaf {
464 	xfs_dir2_leaf_hdr_t	hdr;			/* leaf header */
465 	xfs_dir2_leaf_entry_t	__ents[];		/* entries */
466 } xfs_dir2_leaf_t;
467 
468 struct xfs_dir3_leaf {
469 	struct xfs_dir3_leaf_hdr	hdr;		/* leaf header */
470 	struct xfs_dir2_leaf_entry	__ents[];	/* entries */
471 };
472 
473 #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
474 
475 /*
476  * Get address of the bests array in the single-leaf block.
477  */
478 static inline __be16 *
xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail * ltp)479 xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
480 {
481 	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
482 }
483 
484 /*
485  * Free space block defintions for the node format.
486  */
487 
488 /*
489  * Offset of the freespace index.
490  */
491 #define	XFS_DIR2_FREE_SPACE	2
492 #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
493 
494 typedef	struct xfs_dir2_free_hdr {
495 	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
496 	__be32			firstdb;	/* db of first entry */
497 	__be32			nvalid;		/* count of valid entries */
498 	__be32			nused;		/* count of used entries */
499 } xfs_dir2_free_hdr_t;
500 
501 typedef struct xfs_dir2_free {
502 	xfs_dir2_free_hdr_t	hdr;		/* block header */
503 	__be16			bests[];	/* best free counts */
504 						/* unused entries are -1 */
505 } xfs_dir2_free_t;
506 
507 struct xfs_dir3_free_hdr {
508 	struct xfs_dir3_blk_hdr	hdr;
509 	__be32			firstdb;	/* db of first entry */
510 	__be32			nvalid;		/* count of valid entries */
511 	__be32			nused;		/* count of used entries */
512 	__be32			pad;		/* 64 bit alignment */
513 };
514 
515 struct xfs_dir3_free {
516 	struct xfs_dir3_free_hdr hdr;
517 	__be16			bests[];	/* best free counts */
518 						/* unused entries are -1 */
519 };
520 
521 #define XFS_DIR3_FREE_CRC_OFF  offsetof(struct xfs_dir3_free, hdr.hdr.crc)
522 
523 /*
524  * In core version of the free block header, abstracted away from on-disk format
525  * differences. Use this in the code, and convert to/from the disk version using
526  * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
527  */
528 struct xfs_dir3_icfree_hdr {
529 	uint32_t	magic;
530 	uint32_t	firstdb;
531 	uint32_t	nvalid;
532 	uint32_t	nused;
533 
534 };
535 
536 /*
537  * Single block format.
538  *
539  * The single block format looks like the following drawing on disk:
540  *
541  *    +-------------------------------------------------+
542  *    | xfs_dir2_data_hdr_t                             |
543  *    +-------------------------------------------------+
544  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
545  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
546  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
547  *    | ...                                             |
548  *    +-------------------------------------------------+
549  *    | unused space                                    |
550  *    +-------------------------------------------------+
551  *    | ...                                             |
552  *    | xfs_dir2_leaf_entry_t                           |
553  *    | xfs_dir2_leaf_entry_t                           |
554  *    +-------------------------------------------------+
555  *    | xfs_dir2_block_tail_t                           |
556  *    +-------------------------------------------------+
557  *
558  * As all the entries are variable size structures the accessors below should
559  * be used to iterate over them.
560  */
561 
562 typedef struct xfs_dir2_block_tail {
563 	__be32		count;			/* count of leaf entries */
564 	__be32		stale;			/* count of stale lf entries */
565 } xfs_dir2_block_tail_t;
566 
567 /*
568  * Pointer to the leaf entries embedded in a data block (1-block format)
569  */
570 static inline struct xfs_dir2_leaf_entry *
xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail * btp)571 xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
572 {
573 	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
574 }
575 
576 
577 /*
578  * Attribute storage layout
579  *
580  * Attribute lists are structured around Btrees where all the data
581  * elements are in the leaf nodes.  Attribute names are hashed into an int,
582  * then that int is used as the index into the Btree.  Since the hashval
583  * of an attribute name may not be unique, we may have duplicate keys.  The
584  * internal links in the Btree are logical block offsets into the file.
585  *
586  * Struct leaf_entry's are packed from the top.  Name/values grow from the
587  * bottom but are not packed.  The freemap contains run-length-encoded entries
588  * for the free bytes after the leaf_entry's, but only the N largest such,
589  * smaller runs are dropped.  When the freemap doesn't show enough space
590  * for an allocation, we compact the name/value area and try again.  If we
591  * still don't have enough space, then we have to split the block.  The
592  * name/value structs (both local and remote versions) must be 32bit aligned.
593  *
594  * Since we have duplicate hash keys, for each key that matches, compare
595  * the actual name string.  The root and intermediate node search always
596  * takes the first-in-the-block key match found, so we should only have
597  * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
598  * nodes until the hash key changes or the attribute name is found.
599  *
600  * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
601  * the leaf_entry.  The namespaces are independent only because we also look
602  * at the namespace bit when we are looking for a matching attribute name.
603  *
604  * We also store an "incomplete" bit in the leaf_entry.  It shows that an
605  * attribute is in the middle of being created and should not be shown to
606  * the user if we crash during the time that the bit is set.  We clear the
607  * bit when we have finished setting up the attribute.  We do this because
608  * we cannot create some large attributes inside a single transaction, and we
609  * need some indication that we weren't finished if we crash in the middle.
610  */
611 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
612 
613 /*
614  * Entries are packed toward the top as tight as possible.
615  */
616 typedef struct xfs_attr_shortform {
617 	struct xfs_attr_sf_hdr {	/* constant-structure header block */
618 		__be16	totsize;	/* total bytes in shortform list */
619 		__u8	count;	/* count of active entries */
620 		__u8	padding;
621 	} hdr;
622 	struct xfs_attr_sf_entry {
623 		uint8_t namelen;	/* actual length of name (no NULL) */
624 		uint8_t valuelen;	/* actual length of value (no NULL) */
625 		uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
626 		uint8_t nameval[1];	/* name & value bytes concatenated */
627 	} list[1];			/* variable sized array */
628 } xfs_attr_shortform_t;
629 
630 typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
631 	__be16	base;			  /* base of free region */
632 	__be16	size;			  /* length of free region */
633 } xfs_attr_leaf_map_t;
634 
635 typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
636 	xfs_da_blkinfo_t info;		/* block type, links, etc. */
637 	__be16	count;			/* count of active leaf_entry's */
638 	__be16	usedbytes;		/* num bytes of names/values stored */
639 	__be16	firstused;		/* first used byte in name area */
640 	__u8	holes;			/* != 0 if blk needs compaction */
641 	__u8	pad1;
642 	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
643 					/* N largest free regions */
644 } xfs_attr_leaf_hdr_t;
645 
646 typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
647 	__be32	hashval;		/* hash value of name */
648 	__be16	nameidx;		/* index into buffer of name/value */
649 	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
650 	__u8	pad2;			/* unused pad byte */
651 } xfs_attr_leaf_entry_t;
652 
653 typedef struct xfs_attr_leaf_name_local {
654 	__be16	valuelen;		/* number of bytes in value */
655 	__u8	namelen;		/* length of name bytes */
656 	__u8	nameval[1];		/* name/value bytes */
657 } xfs_attr_leaf_name_local_t;
658 
659 typedef struct xfs_attr_leaf_name_remote {
660 	__be32	valueblk;		/* block number of value bytes */
661 	__be32	valuelen;		/* number of bytes in value */
662 	__u8	namelen;		/* length of name bytes */
663 	__u8	name[1];		/* name bytes */
664 } xfs_attr_leaf_name_remote_t;
665 
666 typedef struct xfs_attr_leafblock {
667 	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */
668 	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */
669 	/*
670 	 * The rest of the block contains the following structures after the
671 	 * leaf entries, growing from the bottom up. The variables are never
672 	 * referenced and definining them can actually make gcc optimize away
673 	 * accesses to the 'entries' array above index 0 so don't do that.
674 	 *
675 	 * xfs_attr_leaf_name_local_t namelist;
676 	 * xfs_attr_leaf_name_remote_t valuelist;
677 	 */
678 } xfs_attr_leafblock_t;
679 
680 /*
681  * CRC enabled leaf structures. Called "version 3" structures to match the
682  * version number of the directory and dablk structures for this feature, and
683  * attr2 is already taken by the variable inode attribute fork size feature.
684  */
685 struct xfs_attr3_leaf_hdr {
686 	struct xfs_da3_blkinfo	info;
687 	__be16			count;
688 	__be16			usedbytes;
689 	__be16			firstused;
690 	__u8			holes;
691 	__u8			pad1;
692 	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
693 	__be32			pad2;		/* 64 bit alignment */
694 };
695 
696 #define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc))
697 
698 struct xfs_attr3_leafblock {
699 	struct xfs_attr3_leaf_hdr	hdr;
700 	struct xfs_attr_leaf_entry	entries[1];
701 
702 	/*
703 	 * The rest of the block contains the following structures after the
704 	 * leaf entries, growing from the bottom up. The variables are never
705 	 * referenced, the locations accessed purely from helper functions.
706 	 *
707 	 * struct xfs_attr_leaf_name_local
708 	 * struct xfs_attr_leaf_name_remote
709 	 */
710 };
711 
712 /*
713  * incore, neutral version of the attribute leaf header
714  */
715 struct xfs_attr3_icleaf_hdr {
716 	uint32_t	forw;
717 	uint32_t	back;
718 	uint16_t	magic;
719 	uint16_t	count;
720 	uint16_t	usedbytes;
721 	/*
722 	 * firstused is 32-bit here instead of 16-bit like the on-disk variant
723 	 * to support maximum fsb size of 64k without overflow issues throughout
724 	 * the attr code. Instead, the overflow condition is handled on
725 	 * conversion to/from disk.
726 	 */
727 	uint32_t	firstused;
728 	__u8		holes;
729 	struct {
730 		uint16_t	base;
731 		uint16_t	size;
732 	} freemap[XFS_ATTR_LEAF_MAPSIZE];
733 };
734 
735 /*
736  * Special value to represent fs block size in the leaf header firstused field.
737  * Only used when block size overflows the 2-bytes available on disk.
738  */
739 #define XFS_ATTR3_LEAF_NULLOFF	0
740 
741 /*
742  * Flags used in the leaf_entry[i].flags field.
743  * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
744  * on the system call, they are "or"ed together for various operations.
745  */
746 #define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */
747 #define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */
748 #define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */
749 #define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */
750 #define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT)
751 #define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT)
752 #define XFS_ATTR_SECURE		(1 << XFS_ATTR_SECURE_BIT)
753 #define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT)
754 
755 /*
756  * Conversion macros for converting namespace bits from argument flags
757  * to ondisk flags.
758  */
759 #define XFS_ATTR_NSP_ARGS_MASK		(ATTR_ROOT | ATTR_SECURE)
760 #define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | XFS_ATTR_SECURE)
761 #define XFS_ATTR_NSP_ONDISK(flags)	((flags) & XFS_ATTR_NSP_ONDISK_MASK)
762 #define XFS_ATTR_NSP_ARGS(flags)	((flags) & XFS_ATTR_NSP_ARGS_MASK)
763 #define XFS_ATTR_NSP_ARGS_TO_ONDISK(x)	(((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
764 					 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
765 #define XFS_ATTR_NSP_ONDISK_TO_ARGS(x)	(((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
766 					 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
767 
768 /*
769  * Alignment for namelist and valuelist entries (since they are mixed
770  * there can be only one alignment value)
771  */
772 #define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t))
773 
774 static inline int
xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock * leafp)775 xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
776 {
777 	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
778 		return sizeof(struct xfs_attr3_leaf_hdr);
779 	return sizeof(struct xfs_attr_leaf_hdr);
780 }
781 
782 static inline struct xfs_attr_leaf_entry *
xfs_attr3_leaf_entryp(xfs_attr_leafblock_t * leafp)783 xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
784 {
785 	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
786 		return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
787 	return &leafp->entries[0];
788 }
789 
790 /*
791  * Cast typed pointers for "local" and "remote" name/value structs.
792  */
793 static inline char *
xfs_attr3_leaf_name(xfs_attr_leafblock_t * leafp,int idx)794 xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
795 {
796 	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
797 
798 	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
799 }
800 
801 static inline xfs_attr_leaf_name_remote_t *
xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t * leafp,int idx)802 xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
803 {
804 	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
805 }
806 
807 static inline xfs_attr_leaf_name_local_t *
xfs_attr3_leaf_name_local(xfs_attr_leafblock_t * leafp,int idx)808 xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
809 {
810 	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
811 }
812 
813 /*
814  * Calculate total bytes used (including trailing pad for alignment) for
815  * a "local" name/value structure, a "remote" name/value structure, and
816  * a pointer which might be either.
817  */
xfs_attr_leaf_entsize_remote(int nlen)818 static inline int xfs_attr_leaf_entsize_remote(int nlen)
819 {
820 	return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
821 		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
822 }
823 
xfs_attr_leaf_entsize_local(int nlen,int vlen)824 static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
825 {
826 	return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
827 		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
828 }
829 
xfs_attr_leaf_entsize_local_max(int bsize)830 static inline int xfs_attr_leaf_entsize_local_max(int bsize)
831 {
832 	return (((bsize) >> 1) + ((bsize) >> 2));
833 }
834 
835 
836 
837 /*
838  * Remote attribute block format definition
839  *
840  * There is one of these headers per filesystem block in a remote attribute.
841  * This is done to ensure there is a 1:1 mapping between the attribute value
842  * length and the number of blocks needed to store the attribute. This makes the
843  * verification of a buffer a little more complex, but greatly simplifies the
844  * allocation, reading and writing of these attributes as we don't have to guess
845  * the number of blocks needed to store the attribute data.
846  */
847 #define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */
848 
849 struct xfs_attr3_rmt_hdr {
850 	__be32	rm_magic;
851 	__be32	rm_offset;
852 	__be32	rm_bytes;
853 	__be32	rm_crc;
854 	uuid_t	rm_uuid;
855 	__be64	rm_owner;
856 	__be64	rm_blkno;
857 	__be64	rm_lsn;
858 };
859 
860 #define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
861 
862 #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\
863 	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
864 			sizeof(struct xfs_attr3_rmt_hdr) : 0))
865 
866 /* Number of bytes in a directory block. */
xfs_dir2_dirblock_bytes(struct xfs_sb * sbp)867 static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
868 {
869 	return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
870 }
871 
872 #endif /* __XFS_DA_FORMAT_H__ */
873