1 /*
2  *   Copyright (C) International Business Machines Corp., 2000-2004
3  *
4  *   This program is free software;  you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation; either version 2 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12  *   the GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program;  if not, write to the Free Software
16  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 
19 /*
20  *	jfs_imap.c: inode allocation map manager
21  *
22  * Serialization:
23  *   Each AG has a simple lock which is used to control the serialization of
24  *	the AG level lists.  This lock should be taken first whenever an AG
25  *	level list will be modified or accessed.
26  *
27  *   Each IAG is locked by obtaining the buffer for the IAG page.
28  *
29  *   There is also a inode lock for the inode map inode.  A read lock needs to
30  *	be taken whenever an IAG is read from the map or the global level
31  *	information is read.  A write lock needs to be taken whenever the global
32  *	level information is modified or an atomic operation needs to be used.
33  *
34  *	If more than one IAG is read at one time, the read lock may not
35  *	be given up until all of the IAG's are read.  Otherwise, a deadlock
36  *	may occur when trying to obtain the read lock while another thread
37  *	holding the read lock is waiting on the IAG already being held.
38  *
39  *   The control page of the inode map is read into memory by diMount().
40  *	Thereafter it should only be modified in memory and then it will be
41  *	written out when the filesystem is unmounted by diUnmount().
42  */
43 
44 #include <linux/fs.h>
45 #include <linux/buffer_head.h>
46 #include <linux/pagemap.h>
47 #include <linux/quotaops.h>
48 #include <linux/slab.h>
49 
50 #include "jfs_incore.h"
51 #include "jfs_inode.h"
52 #include "jfs_filsys.h"
53 #include "jfs_dinode.h"
54 #include "jfs_dmap.h"
55 #include "jfs_imap.h"
56 #include "jfs_metapage.h"
57 #include "jfs_superblock.h"
58 #include "jfs_debug.h"
59 
60 /*
61  * imap locks
62  */
63 /* iag free list lock */
64 #define IAGFREE_LOCK_INIT(imap)		mutex_init(&imap->im_freelock)
65 #define IAGFREE_LOCK(imap)		mutex_lock(&imap->im_freelock)
66 #define IAGFREE_UNLOCK(imap)		mutex_unlock(&imap->im_freelock)
67 
68 /* per ag iag list locks */
69 #define AG_LOCK_INIT(imap,index)	mutex_init(&(imap->im_aglock[index]))
70 #define AG_LOCK(imap,agno)		mutex_lock(&imap->im_aglock[agno])
71 #define AG_UNLOCK(imap,agno)		mutex_unlock(&imap->im_aglock[agno])
72 
73 /*
74  * forward references
75  */
76 static int diAllocAG(struct inomap *, int, bool, struct inode *);
77 static int diAllocAny(struct inomap *, int, bool, struct inode *);
78 static int diAllocBit(struct inomap *, struct iag *, int);
79 static int diAllocExt(struct inomap *, int, struct inode *);
80 static int diAllocIno(struct inomap *, int, struct inode *);
81 static int diFindFree(u32, int);
82 static int diNewExt(struct inomap *, struct iag *, int);
83 static int diNewIAG(struct inomap *, int *, int, struct metapage **);
84 static void duplicateIXtree(struct super_block *, s64, int, s64 *);
85 
86 static int diIAGRead(struct inomap * imap, int, struct metapage **);
87 static int copy_from_dinode(struct dinode *, struct inode *);
88 static void copy_to_dinode(struct dinode *, struct inode *);
89 
90 /*
91  * NAME:	diMount()
92  *
93  * FUNCTION:	initialize the incore inode map control structures for
94  *		a fileset or aggregate init time.
95  *
96  *		the inode map's control structure (dinomap) is
97  *		brought in from disk and placed in virtual memory.
98  *
99  * PARAMETERS:
100  *	ipimap	- pointer to inode map inode for the aggregate or fileset.
101  *
102  * RETURN VALUES:
103  *	0	- success
104  *	-ENOMEM	- insufficient free virtual memory.
105  *	-EIO	- i/o error.
106  */
diMount(struct inode * ipimap)107 int diMount(struct inode *ipimap)
108 {
109 	struct inomap *imap;
110 	struct metapage *mp;
111 	int index;
112 	struct dinomap_disk *dinom_le;
113 
114 	/*
115 	 * allocate/initialize the in-memory inode map control structure
116 	 */
117 	/* allocate the in-memory inode map control structure. */
118 	imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
119 	if (imap == NULL) {
120 		jfs_err("diMount: kmalloc returned NULL!");
121 		return -ENOMEM;
122 	}
123 
124 	/* read the on-disk inode map control structure. */
125 
126 	mp = read_metapage(ipimap,
127 			   IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
128 			   PSIZE, 0);
129 	if (mp == NULL) {
130 		kfree(imap);
131 		return -EIO;
132 	}
133 
134 	/* copy the on-disk version to the in-memory version. */
135 	dinom_le = (struct dinomap_disk *) mp->data;
136 	imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
137 	imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
138 	atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
139 	atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
140 	imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
141 	imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
142 	for (index = 0; index < MAXAG; index++) {
143 		imap->im_agctl[index].inofree =
144 		    le32_to_cpu(dinom_le->in_agctl[index].inofree);
145 		imap->im_agctl[index].extfree =
146 		    le32_to_cpu(dinom_le->in_agctl[index].extfree);
147 		imap->im_agctl[index].numinos =
148 		    le32_to_cpu(dinom_le->in_agctl[index].numinos);
149 		imap->im_agctl[index].numfree =
150 		    le32_to_cpu(dinom_le->in_agctl[index].numfree);
151 	}
152 
153 	/* release the buffer. */
154 	release_metapage(mp);
155 
156 	/*
157 	 * allocate/initialize inode allocation map locks
158 	 */
159 	/* allocate and init iag free list lock */
160 	IAGFREE_LOCK_INIT(imap);
161 
162 	/* allocate and init ag list locks */
163 	for (index = 0; index < MAXAG; index++) {
164 		AG_LOCK_INIT(imap, index);
165 	}
166 
167 	/* bind the inode map inode and inode map control structure
168 	 * to each other.
169 	 */
170 	imap->im_ipimap = ipimap;
171 	JFS_IP(ipimap)->i_imap = imap;
172 
173 	return (0);
174 }
175 
176 
177 /*
178  * NAME:	diUnmount()
179  *
180  * FUNCTION:	write to disk the incore inode map control structures for
181  *		a fileset or aggregate at unmount time.
182  *
183  * PARAMETERS:
184  *	ipimap	- pointer to inode map inode for the aggregate or fileset.
185  *
186  * RETURN VALUES:
187  *	0	- success
188  *	-ENOMEM	- insufficient free virtual memory.
189  *	-EIO	- i/o error.
190  */
diUnmount(struct inode * ipimap,int mounterror)191 int diUnmount(struct inode *ipimap, int mounterror)
192 {
193 	struct inomap *imap = JFS_IP(ipimap)->i_imap;
194 
195 	/*
196 	 * update the on-disk inode map control structure
197 	 */
198 
199 	if (!(mounterror || isReadOnly(ipimap)))
200 		diSync(ipimap);
201 
202 	/*
203 	 * Invalidate the page cache buffers
204 	 */
205 	truncate_inode_pages(ipimap->i_mapping, 0);
206 
207 	/*
208 	 * free in-memory control structure
209 	 */
210 	kfree(imap);
211 	JFS_IP(ipimap)->i_imap = NULL;
212 
213 	return (0);
214 }
215 
216 
217 /*
218  *	diSync()
219  */
diSync(struct inode * ipimap)220 int diSync(struct inode *ipimap)
221 {
222 	struct dinomap_disk *dinom_le;
223 	struct inomap *imp = JFS_IP(ipimap)->i_imap;
224 	struct metapage *mp;
225 	int index;
226 
227 	/*
228 	 * write imap global conrol page
229 	 */
230 	/* read the on-disk inode map control structure */
231 	mp = get_metapage(ipimap,
232 			  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
233 			  PSIZE, 0);
234 	if (mp == NULL) {
235 		jfs_err("diSync: get_metapage failed!");
236 		return -EIO;
237 	}
238 
239 	/* copy the in-memory version to the on-disk version */
240 	dinom_le = (struct dinomap_disk *) mp->data;
241 	dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
242 	dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
243 	dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
244 	dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
245 	dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
246 	dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
247 	for (index = 0; index < MAXAG; index++) {
248 		dinom_le->in_agctl[index].inofree =
249 		    cpu_to_le32(imp->im_agctl[index].inofree);
250 		dinom_le->in_agctl[index].extfree =
251 		    cpu_to_le32(imp->im_agctl[index].extfree);
252 		dinom_le->in_agctl[index].numinos =
253 		    cpu_to_le32(imp->im_agctl[index].numinos);
254 		dinom_le->in_agctl[index].numfree =
255 		    cpu_to_le32(imp->im_agctl[index].numfree);
256 	}
257 
258 	/* write out the control structure */
259 	write_metapage(mp);
260 
261 	/*
262 	 * write out dirty pages of imap
263 	 */
264 	filemap_write_and_wait(ipimap->i_mapping);
265 
266 	diWriteSpecial(ipimap, 0);
267 
268 	return (0);
269 }
270 
271 
272 /*
273  * NAME:	diRead()
274  *
275  * FUNCTION:	initialize an incore inode from disk.
276  *
277  *		on entry, the specifed incore inode should itself
278  *		specify the disk inode number corresponding to the
279  *		incore inode (i.e. i_number should be initialized).
280  *
281  *		this routine handles incore inode initialization for
282  *		both "special" and "regular" inodes.  special inodes
283  *		are those required early in the mount process and
284  *		require special handling since much of the file system
285  *		is not yet initialized.  these "special" inodes are
286  *		identified by a NULL inode map inode pointer and are
287  *		actually initialized by a call to diReadSpecial().
288  *
289  *		for regular inodes, the iag describing the disk inode
290  *		is read from disk to determine the inode extent address
291  *		for the disk inode.  with the inode extent address in
292  *		hand, the page of the extent that contains the disk
293  *		inode is read and the disk inode is copied to the
294  *		incore inode.
295  *
296  * PARAMETERS:
297  *	ip	-  pointer to incore inode to be initialized from disk.
298  *
299  * RETURN VALUES:
300  *	0	- success
301  *	-EIO	- i/o error.
302  *	-ENOMEM	- insufficient memory
303  *
304  */
diRead(struct inode * ip)305 int diRead(struct inode *ip)
306 {
307 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
308 	int iagno, ino, extno, rc;
309 	struct inode *ipimap;
310 	struct dinode *dp;
311 	struct iag *iagp;
312 	struct metapage *mp;
313 	s64 blkno, agstart;
314 	struct inomap *imap;
315 	int block_offset;
316 	int inodes_left;
317 	unsigned long pageno;
318 	int rel_inode;
319 
320 	jfs_info("diRead: ino = %ld", ip->i_ino);
321 
322 	ipimap = sbi->ipimap;
323 	JFS_IP(ip)->ipimap = ipimap;
324 
325 	/* determine the iag number for this inode (number) */
326 	iagno = INOTOIAG(ip->i_ino);
327 
328 	/* read the iag */
329 	imap = JFS_IP(ipimap)->i_imap;
330 	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
331 	rc = diIAGRead(imap, iagno, &mp);
332 	IREAD_UNLOCK(ipimap);
333 	if (rc) {
334 		jfs_err("diRead: diIAGRead returned %d", rc);
335 		return (rc);
336 	}
337 
338 	iagp = (struct iag *) mp->data;
339 
340 	/* determine inode extent that holds the disk inode */
341 	ino = ip->i_ino & (INOSPERIAG - 1);
342 	extno = ino >> L2INOSPEREXT;
343 
344 	if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
345 	    (addressPXD(&iagp->inoext[extno]) == 0)) {
346 		release_metapage(mp);
347 		return -ESTALE;
348 	}
349 
350 	/* get disk block number of the page within the inode extent
351 	 * that holds the disk inode.
352 	 */
353 	blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
354 
355 	/* get the ag for the iag */
356 	agstart = le64_to_cpu(iagp->agstart);
357 
358 	release_metapage(mp);
359 
360 	rel_inode = (ino & (INOSPERPAGE - 1));
361 	pageno = blkno >> sbi->l2nbperpage;
362 
363 	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
364 		/*
365 		 * OS/2 didn't always align inode extents on page boundaries
366 		 */
367 		inodes_left =
368 		     (sbi->nbperpage - block_offset) << sbi->l2niperblk;
369 
370 		if (rel_inode < inodes_left)
371 			rel_inode += block_offset << sbi->l2niperblk;
372 		else {
373 			pageno += 1;
374 			rel_inode -= inodes_left;
375 		}
376 	}
377 
378 	/* read the page of disk inode */
379 	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
380 	if (!mp) {
381 		jfs_err("diRead: read_metapage failed");
382 		return -EIO;
383 	}
384 
385 	/* locate the disk inode requested */
386 	dp = (struct dinode *) mp->data;
387 	dp += rel_inode;
388 
389 	if (ip->i_ino != le32_to_cpu(dp->di_number)) {
390 		jfs_error(ip->i_sb, "i_ino != di_number\n");
391 		rc = -EIO;
392 	} else if (le32_to_cpu(dp->di_nlink) == 0)
393 		rc = -ESTALE;
394 	else
395 		/* copy the disk inode to the in-memory inode */
396 		rc = copy_from_dinode(dp, ip);
397 
398 	release_metapage(mp);
399 
400 	/* set the ag for the inode */
401 	JFS_IP(ip)->agstart = agstart;
402 	JFS_IP(ip)->active_ag = -1;
403 
404 	return (rc);
405 }
406 
407 
408 /*
409  * NAME:	diReadSpecial()
410  *
411  * FUNCTION:	initialize a 'special' inode from disk.
412  *
413  *		this routines handles aggregate level inodes.  The
414  *		inode cache cannot differentiate between the
415  *		aggregate inodes and the filesystem inodes, so we
416  *		handle these here.  We don't actually use the aggregate
417  *		inode map, since these inodes are at a fixed location
418  *		and in some cases the aggregate inode map isn't initialized
419  *		yet.
420  *
421  * PARAMETERS:
422  *	sb - filesystem superblock
423  *	inum - aggregate inode number
424  *	secondary - 1 if secondary aggregate inode table
425  *
426  * RETURN VALUES:
427  *	new inode	- success
428  *	NULL		- i/o error.
429  */
diReadSpecial(struct super_block * sb,ino_t inum,int secondary)430 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
431 {
432 	struct jfs_sb_info *sbi = JFS_SBI(sb);
433 	uint address;
434 	struct dinode *dp;
435 	struct inode *ip;
436 	struct metapage *mp;
437 
438 	ip = new_inode(sb);
439 	if (ip == NULL) {
440 		jfs_err("diReadSpecial: new_inode returned NULL!");
441 		return ip;
442 	}
443 
444 	if (secondary) {
445 		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
446 		JFS_IP(ip)->ipimap = sbi->ipaimap2;
447 	} else {
448 		address = AITBL_OFF >> L2PSIZE;
449 		JFS_IP(ip)->ipimap = sbi->ipaimap;
450 	}
451 
452 	ASSERT(inum < INOSPEREXT);
453 
454 	ip->i_ino = inum;
455 
456 	address += inum >> 3;	/* 8 inodes per 4K page */
457 
458 	/* read the page of fixed disk inode (AIT) in raw mode */
459 	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
460 	if (mp == NULL) {
461 		set_nlink(ip, 1);	/* Don't want iput() deleting it */
462 		iput(ip);
463 		return (NULL);
464 	}
465 
466 	/* get the pointer to the disk inode of interest */
467 	dp = (struct dinode *) (mp->data);
468 	dp += inum % 8;		/* 8 inodes per 4K page */
469 
470 	/* copy on-disk inode to in-memory inode */
471 	if ((copy_from_dinode(dp, ip)) != 0) {
472 		/* handle bad return by returning NULL for ip */
473 		set_nlink(ip, 1);	/* Don't want iput() deleting it */
474 		iput(ip);
475 		/* release the page */
476 		release_metapage(mp);
477 		return (NULL);
478 
479 	}
480 
481 	ip->i_mapping->a_ops = &jfs_metapage_aops;
482 	mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
483 
484 	/* Allocations to metadata inodes should not affect quotas */
485 	ip->i_flags |= S_NOQUOTA;
486 
487 	if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
488 		sbi->gengen = le32_to_cpu(dp->di_gengen);
489 		sbi->inostamp = le32_to_cpu(dp->di_inostamp);
490 	}
491 
492 	/* release the page */
493 	release_metapage(mp);
494 
495 	inode_fake_hash(ip);
496 
497 	return (ip);
498 }
499 
500 /*
501  * NAME:	diWriteSpecial()
502  *
503  * FUNCTION:	Write the special inode to disk
504  *
505  * PARAMETERS:
506  *	ip - special inode
507  *	secondary - 1 if secondary aggregate inode table
508  *
509  * RETURN VALUES: none
510  */
511 
diWriteSpecial(struct inode * ip,int secondary)512 void diWriteSpecial(struct inode *ip, int secondary)
513 {
514 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
515 	uint address;
516 	struct dinode *dp;
517 	ino_t inum = ip->i_ino;
518 	struct metapage *mp;
519 
520 	if (secondary)
521 		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
522 	else
523 		address = AITBL_OFF >> L2PSIZE;
524 
525 	ASSERT(inum < INOSPEREXT);
526 
527 	address += inum >> 3;	/* 8 inodes per 4K page */
528 
529 	/* read the page of fixed disk inode (AIT) in raw mode */
530 	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
531 	if (mp == NULL) {
532 		jfs_err("diWriteSpecial: failed to read aggregate inode extent!");
533 		return;
534 	}
535 
536 	/* get the pointer to the disk inode of interest */
537 	dp = (struct dinode *) (mp->data);
538 	dp += inum % 8;		/* 8 inodes per 4K page */
539 
540 	/* copy on-disk inode to in-memory inode */
541 	copy_to_dinode(dp, ip);
542 	memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
543 
544 	if (inum == FILESYSTEM_I)
545 		dp->di_gengen = cpu_to_le32(sbi->gengen);
546 
547 	/* write the page */
548 	write_metapage(mp);
549 }
550 
551 /*
552  * NAME:	diFreeSpecial()
553  *
554  * FUNCTION:	Free allocated space for special inode
555  */
diFreeSpecial(struct inode * ip)556 void diFreeSpecial(struct inode *ip)
557 {
558 	if (ip == NULL) {
559 		jfs_err("diFreeSpecial called with NULL ip!");
560 		return;
561 	}
562 	filemap_write_and_wait(ip->i_mapping);
563 	truncate_inode_pages(ip->i_mapping, 0);
564 	iput(ip);
565 }
566 
567 
568 
569 /*
570  * NAME:	diWrite()
571  *
572  * FUNCTION:	write the on-disk inode portion of the in-memory inode
573  *		to its corresponding on-disk inode.
574  *
575  *		on entry, the specifed incore inode should itself
576  *		specify the disk inode number corresponding to the
577  *		incore inode (i.e. i_number should be initialized).
578  *
579  *		the inode contains the inode extent address for the disk
580  *		inode.  with the inode extent address in hand, the
581  *		page of the extent that contains the disk inode is
582  *		read and the disk inode portion of the incore inode
583  *		is copied to the disk inode.
584  *
585  * PARAMETERS:
586  *	tid -  transacation id
587  *	ip  -  pointer to incore inode to be written to the inode extent.
588  *
589  * RETURN VALUES:
590  *	0	- success
591  *	-EIO	- i/o error.
592  */
diWrite(tid_t tid,struct inode * ip)593 int diWrite(tid_t tid, struct inode *ip)
594 {
595 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
596 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
597 	int rc = 0;
598 	s32 ino;
599 	struct dinode *dp;
600 	s64 blkno;
601 	int block_offset;
602 	int inodes_left;
603 	struct metapage *mp;
604 	unsigned long pageno;
605 	int rel_inode;
606 	int dioffset;
607 	struct inode *ipimap;
608 	uint type;
609 	lid_t lid;
610 	struct tlock *ditlck, *tlck;
611 	struct linelock *dilinelock, *ilinelock;
612 	struct lv *lv;
613 	int n;
614 
615 	ipimap = jfs_ip->ipimap;
616 
617 	ino = ip->i_ino & (INOSPERIAG - 1);
618 
619 	if (!addressPXD(&(jfs_ip->ixpxd)) ||
620 	    (lengthPXD(&(jfs_ip->ixpxd)) !=
621 	     JFS_IP(ipimap)->i_imap->im_nbperiext)) {
622 		jfs_error(ip->i_sb, "ixpxd invalid\n");
623 		return -EIO;
624 	}
625 
626 	/*
627 	 * read the page of disk inode containing the specified inode:
628 	 */
629 	/* compute the block address of the page */
630 	blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
631 
632 	rel_inode = (ino & (INOSPERPAGE - 1));
633 	pageno = blkno >> sbi->l2nbperpage;
634 
635 	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
636 		/*
637 		 * OS/2 didn't always align inode extents on page boundaries
638 		 */
639 		inodes_left =
640 		    (sbi->nbperpage - block_offset) << sbi->l2niperblk;
641 
642 		if (rel_inode < inodes_left)
643 			rel_inode += block_offset << sbi->l2niperblk;
644 		else {
645 			pageno += 1;
646 			rel_inode -= inodes_left;
647 		}
648 	}
649 	/* read the page of disk inode */
650       retry:
651 	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
652 	if (!mp)
653 		return -EIO;
654 
655 	/* get the pointer to the disk inode */
656 	dp = (struct dinode *) mp->data;
657 	dp += rel_inode;
658 
659 	dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
660 
661 	/*
662 	 * acquire transaction lock on the on-disk inode;
663 	 * N.B. tlock is acquired on ipimap not ip;
664 	 */
665 	if ((ditlck =
666 	     txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
667 		goto retry;
668 	dilinelock = (struct linelock *) & ditlck->lock;
669 
670 	/*
671 	 * copy btree root from in-memory inode to on-disk inode
672 	 *
673 	 * (tlock is taken from inline B+-tree root in in-memory
674 	 * inode when the B+-tree root is updated, which is pointed
675 	 * by jfs_ip->blid as well as being on tx tlock list)
676 	 *
677 	 * further processing of btree root is based on the copy
678 	 * in in-memory inode, where txLog() will log from, and,
679 	 * for xtree root, txUpdateMap() will update map and reset
680 	 * XAD_NEW bit;
681 	 */
682 
683 	if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
684 		/*
685 		 * This is the special xtree inside the directory for storing
686 		 * the directory table
687 		 */
688 		xtpage_t *p, *xp;
689 		xad_t *xad;
690 
691 		jfs_ip->xtlid = 0;
692 		tlck = lid_to_tlock(lid);
693 		assert(tlck->type & tlckXTREE);
694 		tlck->type |= tlckBTROOT;
695 		tlck->mp = mp;
696 		ilinelock = (struct linelock *) & tlck->lock;
697 
698 		/*
699 		 * copy xtree root from inode to dinode:
700 		 */
701 		p = &jfs_ip->i_xtroot;
702 		xp = (xtpage_t *) &dp->di_dirtable;
703 		lv = ilinelock->lv;
704 		for (n = 0; n < ilinelock->index; n++, lv++) {
705 			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
706 			       lv->length << L2XTSLOTSIZE);
707 		}
708 
709 		/* reset on-disk (metadata page) xtree XAD_NEW bit */
710 		xad = &xp->xad[XTENTRYSTART];
711 		for (n = XTENTRYSTART;
712 		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
713 			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
714 				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
715 	}
716 
717 	if ((lid = jfs_ip->blid) == 0)
718 		goto inlineData;
719 	jfs_ip->blid = 0;
720 
721 	tlck = lid_to_tlock(lid);
722 	type = tlck->type;
723 	tlck->type |= tlckBTROOT;
724 	tlck->mp = mp;
725 	ilinelock = (struct linelock *) & tlck->lock;
726 
727 	/*
728 	 *	regular file: 16 byte (XAD slot) granularity
729 	 */
730 	if (type & tlckXTREE) {
731 		xtpage_t *p, *xp;
732 		xad_t *xad;
733 
734 		/*
735 		 * copy xtree root from inode to dinode:
736 		 */
737 		p = &jfs_ip->i_xtroot;
738 		xp = &dp->di_xtroot;
739 		lv = ilinelock->lv;
740 		for (n = 0; n < ilinelock->index; n++, lv++) {
741 			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
742 			       lv->length << L2XTSLOTSIZE);
743 		}
744 
745 		/* reset on-disk (metadata page) xtree XAD_NEW bit */
746 		xad = &xp->xad[XTENTRYSTART];
747 		for (n = XTENTRYSTART;
748 		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
749 			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
750 				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
751 	}
752 	/*
753 	 *	directory: 32 byte (directory entry slot) granularity
754 	 */
755 	else if (type & tlckDTREE) {
756 		dtpage_t *p, *xp;
757 
758 		/*
759 		 * copy dtree root from inode to dinode:
760 		 */
761 		p = (dtpage_t *) &jfs_ip->i_dtroot;
762 		xp = (dtpage_t *) & dp->di_dtroot;
763 		lv = ilinelock->lv;
764 		for (n = 0; n < ilinelock->index; n++, lv++) {
765 			memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
766 			       lv->length << L2DTSLOTSIZE);
767 		}
768 	} else {
769 		jfs_err("diWrite: UFO tlock");
770 	}
771 
772       inlineData:
773 	/*
774 	 * copy inline symlink from in-memory inode to on-disk inode
775 	 */
776 	if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
777 		lv = & dilinelock->lv[dilinelock->index];
778 		lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
779 		lv->length = 2;
780 		memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
781 		dilinelock->index++;
782 	}
783 	/*
784 	 * copy inline data from in-memory inode to on-disk inode:
785 	 * 128 byte slot granularity
786 	 */
787 	if (test_cflag(COMMIT_Inlineea, ip)) {
788 		lv = & dilinelock->lv[dilinelock->index];
789 		lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
790 		lv->length = 1;
791 		memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
792 		dilinelock->index++;
793 
794 		clear_cflag(COMMIT_Inlineea, ip);
795 	}
796 
797 	/*
798 	 *	lock/copy inode base: 128 byte slot granularity
799 	 */
800 	lv = & dilinelock->lv[dilinelock->index];
801 	lv->offset = dioffset >> L2INODESLOTSIZE;
802 	copy_to_dinode(dp, ip);
803 	if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
804 		lv->length = 2;
805 		memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
806 	} else
807 		lv->length = 1;
808 	dilinelock->index++;
809 
810 	/* release the buffer holding the updated on-disk inode.
811 	 * the buffer will be later written by commit processing.
812 	 */
813 	write_metapage(mp);
814 
815 	return (rc);
816 }
817 
818 
819 /*
820  * NAME:	diFree(ip)
821  *
822  * FUNCTION:	free a specified inode from the inode working map
823  *		for a fileset or aggregate.
824  *
825  *		if the inode to be freed represents the first (only)
826  *		free inode within the iag, the iag will be placed on
827  *		the ag free inode list.
828  *
829  *		freeing the inode will cause the inode extent to be
830  *		freed if the inode is the only allocated inode within
831  *		the extent.  in this case all the disk resource backing
832  *		up the inode extent will be freed. in addition, the iag
833  *		will be placed on the ag extent free list if the extent
834  *		is the first free extent in the iag.  if freeing the
835  *		extent also means that no free inodes will exist for
836  *		the iag, the iag will also be removed from the ag free
837  *		inode list.
838  *
839  *		the iag describing the inode will be freed if the extent
840  *		is to be freed and it is the only backed extent within
841  *		the iag.  in this case, the iag will be removed from the
842  *		ag free extent list and ag free inode list and placed on
843  *		the inode map's free iag list.
844  *
845  *		a careful update approach is used to provide consistency
846  *		in the face of updates to multiple buffers.  under this
847  *		approach, all required buffers are obtained before making
848  *		any updates and are held until all updates are complete.
849  *
850  * PARAMETERS:
851  *	ip	- inode to be freed.
852  *
853  * RETURN VALUES:
854  *	0	- success
855  *	-EIO	- i/o error.
856  */
diFree(struct inode * ip)857 int diFree(struct inode *ip)
858 {
859 	int rc;
860 	ino_t inum = ip->i_ino;
861 	struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
862 	struct metapage *mp, *amp, *bmp, *cmp, *dmp;
863 	int iagno, ino, extno, bitno, sword, agno;
864 	int back, fwd;
865 	u32 bitmap, mask;
866 	struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
867 	struct inomap *imap = JFS_IP(ipimap)->i_imap;
868 	pxd_t freepxd;
869 	tid_t tid;
870 	struct inode *iplist[3];
871 	struct tlock *tlck;
872 	struct pxd_lock *pxdlock;
873 
874 	/*
875 	 * This is just to suppress compiler warnings.  The same logic that
876 	 * references these variables is used to initialize them.
877 	 */
878 	aiagp = biagp = ciagp = diagp = NULL;
879 
880 	/* get the iag number containing the inode.
881 	 */
882 	iagno = INOTOIAG(inum);
883 
884 	/* make sure that the iag is contained within
885 	 * the map.
886 	 */
887 	if (iagno >= imap->im_nextiag) {
888 		print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
889 			       imap, 32, 0);
890 		jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n",
891 			  (uint) inum, iagno, imap->im_nextiag);
892 		return -EIO;
893 	}
894 
895 	/* get the allocation group for this ino.
896 	 */
897 	agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
898 
899 	/* Lock the AG specific inode map information
900 	 */
901 	AG_LOCK(imap, agno);
902 
903 	/* Obtain read lock in imap inode.  Don't release it until we have
904 	 * read all of the IAG's that we are going to.
905 	 */
906 	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
907 
908 	/* read the iag.
909 	 */
910 	if ((rc = diIAGRead(imap, iagno, &mp))) {
911 		IREAD_UNLOCK(ipimap);
912 		AG_UNLOCK(imap, agno);
913 		return (rc);
914 	}
915 	iagp = (struct iag *) mp->data;
916 
917 	/* get the inode number and extent number of the inode within
918 	 * the iag and the inode number within the extent.
919 	 */
920 	ino = inum & (INOSPERIAG - 1);
921 	extno = ino >> L2INOSPEREXT;
922 	bitno = ino & (INOSPEREXT - 1);
923 	mask = HIGHORDER >> bitno;
924 
925 	if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
926 		jfs_error(ip->i_sb, "wmap shows inode already free\n");
927 	}
928 
929 	if (!addressPXD(&iagp->inoext[extno])) {
930 		release_metapage(mp);
931 		IREAD_UNLOCK(ipimap);
932 		AG_UNLOCK(imap, agno);
933 		jfs_error(ip->i_sb, "invalid inoext\n");
934 		return -EIO;
935 	}
936 
937 	/* compute the bitmap for the extent reflecting the freed inode.
938 	 */
939 	bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
940 
941 	if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
942 		release_metapage(mp);
943 		IREAD_UNLOCK(ipimap);
944 		AG_UNLOCK(imap, agno);
945 		jfs_error(ip->i_sb, "numfree > numinos\n");
946 		return -EIO;
947 	}
948 	/*
949 	 *	inode extent still has some inodes or below low water mark:
950 	 *	keep the inode extent;
951 	 */
952 	if (bitmap ||
953 	    imap->im_agctl[agno].numfree < 96 ||
954 	    (imap->im_agctl[agno].numfree < 288 &&
955 	     (((imap->im_agctl[agno].numfree * 100) /
956 	       imap->im_agctl[agno].numinos) <= 25))) {
957 		/* if the iag currently has no free inodes (i.e.,
958 		 * the inode being freed is the first free inode of iag),
959 		 * insert the iag at head of the inode free list for the ag.
960 		 */
961 		if (iagp->nfreeinos == 0) {
962 			/* check if there are any iags on the ag inode
963 			 * free list.  if so, read the first one so that
964 			 * we can link the current iag onto the list at
965 			 * the head.
966 			 */
967 			if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
968 				/* read the iag that currently is the head
969 				 * of the list.
970 				 */
971 				if ((rc = diIAGRead(imap, fwd, &amp))) {
972 					IREAD_UNLOCK(ipimap);
973 					AG_UNLOCK(imap, agno);
974 					release_metapage(mp);
975 					return (rc);
976 				}
977 				aiagp = (struct iag *) amp->data;
978 
979 				/* make current head point back to the iag.
980 				 */
981 				aiagp->inofreeback = cpu_to_le32(iagno);
982 
983 				write_metapage(amp);
984 			}
985 
986 			/* iag points forward to current head and iag
987 			 * becomes the new head of the list.
988 			 */
989 			iagp->inofreefwd =
990 			    cpu_to_le32(imap->im_agctl[agno].inofree);
991 			iagp->inofreeback = cpu_to_le32(-1);
992 			imap->im_agctl[agno].inofree = iagno;
993 		}
994 		IREAD_UNLOCK(ipimap);
995 
996 		/* update the free inode summary map for the extent if
997 		 * freeing the inode means the extent will now have free
998 		 * inodes (i.e., the inode being freed is the first free
999 		 * inode of extent),
1000 		 */
1001 		if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
1002 			sword = extno >> L2EXTSPERSUM;
1003 			bitno = extno & (EXTSPERSUM - 1);
1004 			iagp->inosmap[sword] &=
1005 			    cpu_to_le32(~(HIGHORDER >> bitno));
1006 		}
1007 
1008 		/* update the bitmap.
1009 		 */
1010 		iagp->wmap[extno] = cpu_to_le32(bitmap);
1011 
1012 		/* update the free inode counts at the iag, ag and
1013 		 * map level.
1014 		 */
1015 		le32_add_cpu(&iagp->nfreeinos, 1);
1016 		imap->im_agctl[agno].numfree += 1;
1017 		atomic_inc(&imap->im_numfree);
1018 
1019 		/* release the AG inode map lock
1020 		 */
1021 		AG_UNLOCK(imap, agno);
1022 
1023 		/* write the iag */
1024 		write_metapage(mp);
1025 
1026 		return (0);
1027 	}
1028 
1029 
1030 	/*
1031 	 *	inode extent has become free and above low water mark:
1032 	 *	free the inode extent;
1033 	 */
1034 
1035 	/*
1036 	 *	prepare to update iag list(s) (careful update step 1)
1037 	 */
1038 	amp = bmp = cmp = dmp = NULL;
1039 	fwd = back = -1;
1040 
1041 	/* check if the iag currently has no free extents.  if so,
1042 	 * it will be placed on the head of the ag extent free list.
1043 	 */
1044 	if (iagp->nfreeexts == 0) {
1045 		/* check if the ag extent free list has any iags.
1046 		 * if so, read the iag at the head of the list now.
1047 		 * this (head) iag will be updated later to reflect
1048 		 * the addition of the current iag at the head of
1049 		 * the list.
1050 		 */
1051 		if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1052 			if ((rc = diIAGRead(imap, fwd, &amp)))
1053 				goto error_out;
1054 			aiagp = (struct iag *) amp->data;
1055 		}
1056 	} else {
1057 		/* iag has free extents. check if the addition of a free
1058 		 * extent will cause all extents to be free within this
1059 		 * iag.  if so, the iag will be removed from the ag extent
1060 		 * free list and placed on the inode map's free iag list.
1061 		 */
1062 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1063 			/* in preparation for removing the iag from the
1064 			 * ag extent free list, read the iags preceding
1065 			 * and following the iag on the ag extent free
1066 			 * list.
1067 			 */
1068 			if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1069 				if ((rc = diIAGRead(imap, fwd, &amp)))
1070 					goto error_out;
1071 				aiagp = (struct iag *) amp->data;
1072 			}
1073 
1074 			if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1075 				if ((rc = diIAGRead(imap, back, &bmp)))
1076 					goto error_out;
1077 				biagp = (struct iag *) bmp->data;
1078 			}
1079 		}
1080 	}
1081 
1082 	/* remove the iag from the ag inode free list if freeing
1083 	 * this extent cause the iag to have no free inodes.
1084 	 */
1085 	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1086 		int inofreeback = le32_to_cpu(iagp->inofreeback);
1087 		int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1088 
1089 		/* in preparation for removing the iag from the
1090 		 * ag inode free list, read the iags preceding
1091 		 * and following the iag on the ag inode free
1092 		 * list.  before reading these iags, we must make
1093 		 * sure that we already don't have them in hand
1094 		 * from up above, since re-reading an iag (buffer)
1095 		 * we are currently holding would cause a deadlock.
1096 		 */
1097 		if (inofreefwd >= 0) {
1098 
1099 			if (inofreefwd == fwd)
1100 				ciagp = (struct iag *) amp->data;
1101 			else if (inofreefwd == back)
1102 				ciagp = (struct iag *) bmp->data;
1103 			else {
1104 				if ((rc =
1105 				     diIAGRead(imap, inofreefwd, &cmp)))
1106 					goto error_out;
1107 				ciagp = (struct iag *) cmp->data;
1108 			}
1109 			assert(ciagp != NULL);
1110 		}
1111 
1112 		if (inofreeback >= 0) {
1113 			if (inofreeback == fwd)
1114 				diagp = (struct iag *) amp->data;
1115 			else if (inofreeback == back)
1116 				diagp = (struct iag *) bmp->data;
1117 			else {
1118 				if ((rc =
1119 				     diIAGRead(imap, inofreeback, &dmp)))
1120 					goto error_out;
1121 				diagp = (struct iag *) dmp->data;
1122 			}
1123 			assert(diagp != NULL);
1124 		}
1125 	}
1126 
1127 	IREAD_UNLOCK(ipimap);
1128 
1129 	/*
1130 	 * invalidate any page of the inode extent freed from buffer cache;
1131 	 */
1132 	freepxd = iagp->inoext[extno];
1133 	invalidate_pxd_metapages(ip, freepxd);
1134 
1135 	/*
1136 	 *	update iag list(s) (careful update step 2)
1137 	 */
1138 	/* add the iag to the ag extent free list if this is the
1139 	 * first free extent for the iag.
1140 	 */
1141 	if (iagp->nfreeexts == 0) {
1142 		if (fwd >= 0)
1143 			aiagp->extfreeback = cpu_to_le32(iagno);
1144 
1145 		iagp->extfreefwd =
1146 		    cpu_to_le32(imap->im_agctl[agno].extfree);
1147 		iagp->extfreeback = cpu_to_le32(-1);
1148 		imap->im_agctl[agno].extfree = iagno;
1149 	} else {
1150 		/* remove the iag from the ag extent list if all extents
1151 		 * are now free and place it on the inode map iag free list.
1152 		 */
1153 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1154 			if (fwd >= 0)
1155 				aiagp->extfreeback = iagp->extfreeback;
1156 
1157 			if (back >= 0)
1158 				biagp->extfreefwd = iagp->extfreefwd;
1159 			else
1160 				imap->im_agctl[agno].extfree =
1161 				    le32_to_cpu(iagp->extfreefwd);
1162 
1163 			iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1164 
1165 			IAGFREE_LOCK(imap);
1166 			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1167 			imap->im_freeiag = iagno;
1168 			IAGFREE_UNLOCK(imap);
1169 		}
1170 	}
1171 
1172 	/* remove the iag from the ag inode free list if freeing
1173 	 * this extent causes the iag to have no free inodes.
1174 	 */
1175 	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1176 		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1177 			ciagp->inofreeback = iagp->inofreeback;
1178 
1179 		if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1180 			diagp->inofreefwd = iagp->inofreefwd;
1181 		else
1182 			imap->im_agctl[agno].inofree =
1183 			    le32_to_cpu(iagp->inofreefwd);
1184 
1185 		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1186 	}
1187 
1188 	/* update the inode extent address and working map
1189 	 * to reflect the free extent.
1190 	 * the permanent map should have been updated already
1191 	 * for the inode being freed.
1192 	 */
1193 	if (iagp->pmap[extno] != 0) {
1194 		jfs_error(ip->i_sb, "the pmap does not show inode free\n");
1195 	}
1196 	iagp->wmap[extno] = 0;
1197 	PXDlength(&iagp->inoext[extno], 0);
1198 	PXDaddress(&iagp->inoext[extno], 0);
1199 
1200 	/* update the free extent and free inode summary maps
1201 	 * to reflect the freed extent.
1202 	 * the inode summary map is marked to indicate no inodes
1203 	 * available for the freed extent.
1204 	 */
1205 	sword = extno >> L2EXTSPERSUM;
1206 	bitno = extno & (EXTSPERSUM - 1);
1207 	mask = HIGHORDER >> bitno;
1208 	iagp->inosmap[sword] |= cpu_to_le32(mask);
1209 	iagp->extsmap[sword] &= cpu_to_le32(~mask);
1210 
1211 	/* update the number of free inodes and number of free extents
1212 	 * for the iag.
1213 	 */
1214 	le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1215 	le32_add_cpu(&iagp->nfreeexts, 1);
1216 
1217 	/* update the number of free inodes and backed inodes
1218 	 * at the ag and inode map level.
1219 	 */
1220 	imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1221 	imap->im_agctl[agno].numinos -= INOSPEREXT;
1222 	atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1223 	atomic_sub(INOSPEREXT, &imap->im_numinos);
1224 
1225 	if (amp)
1226 		write_metapage(amp);
1227 	if (bmp)
1228 		write_metapage(bmp);
1229 	if (cmp)
1230 		write_metapage(cmp);
1231 	if (dmp)
1232 		write_metapage(dmp);
1233 
1234 	/*
1235 	 * start transaction to update block allocation map
1236 	 * for the inode extent freed;
1237 	 *
1238 	 * N.B. AG_LOCK is released and iag will be released below, and
1239 	 * other thread may allocate inode from/reusing the ixad freed
1240 	 * BUT with new/different backing inode extent from the extent
1241 	 * to be freed by the transaction;
1242 	 */
1243 	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1244 	mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1245 
1246 	/* acquire tlock of the iag page of the freed ixad
1247 	 * to force the page NOHOMEOK (even though no data is
1248 	 * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1249 	 * for the free of the extent is committed;
1250 	 * write FREEXTENT|NOREDOPAGE log record
1251 	 * N.B. linelock is overlaid as freed extent descriptor;
1252 	 */
1253 	tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1254 	pxdlock = (struct pxd_lock *) & tlck->lock;
1255 	pxdlock->flag = mlckFREEPXD;
1256 	pxdlock->pxd = freepxd;
1257 	pxdlock->index = 1;
1258 
1259 	write_metapage(mp);
1260 
1261 	iplist[0] = ipimap;
1262 
1263 	/*
1264 	 * logredo needs the IAG number and IAG extent index in order
1265 	 * to ensure that the IMap is consistent.  The least disruptive
1266 	 * way to pass these values through  to the transaction manager
1267 	 * is in the iplist array.
1268 	 *
1269 	 * It's not pretty, but it works.
1270 	 */
1271 	iplist[1] = (struct inode *) (size_t)iagno;
1272 	iplist[2] = (struct inode *) (size_t)extno;
1273 
1274 	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1275 
1276 	txEnd(tid);
1277 	mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1278 
1279 	/* unlock the AG inode map information */
1280 	AG_UNLOCK(imap, agno);
1281 
1282 	return (0);
1283 
1284       error_out:
1285 	IREAD_UNLOCK(ipimap);
1286 
1287 	if (amp)
1288 		release_metapage(amp);
1289 	if (bmp)
1290 		release_metapage(bmp);
1291 	if (cmp)
1292 		release_metapage(cmp);
1293 	if (dmp)
1294 		release_metapage(dmp);
1295 
1296 	AG_UNLOCK(imap, agno);
1297 
1298 	release_metapage(mp);
1299 
1300 	return (rc);
1301 }
1302 
1303 /*
1304  * There are several places in the diAlloc* routines where we initialize
1305  * the inode.
1306  */
1307 static inline void
diInitInode(struct inode * ip,int iagno,int ino,int extno,struct iag * iagp)1308 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1309 {
1310 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1311 
1312 	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1313 	jfs_ip->ixpxd = iagp->inoext[extno];
1314 	jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1315 	jfs_ip->active_ag = -1;
1316 }
1317 
1318 
1319 /*
1320  * NAME:	diAlloc(pip,dir,ip)
1321  *
1322  * FUNCTION:	allocate a disk inode from the inode working map
1323  *		for a fileset or aggregate.
1324  *
1325  * PARAMETERS:
1326  *	pip	- pointer to incore inode for the parent inode.
1327  *	dir	- 'true' if the new disk inode is for a directory.
1328  *	ip	- pointer to a new inode
1329  *
1330  * RETURN VALUES:
1331  *	0	- success.
1332  *	-ENOSPC	- insufficient disk resources.
1333  *	-EIO	- i/o error.
1334  */
diAlloc(struct inode * pip,bool dir,struct inode * ip)1335 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1336 {
1337 	int rc, ino, iagno, addext, extno, bitno, sword;
1338 	int nwords, rem, i, agno;
1339 	u32 mask, inosmap, extsmap;
1340 	struct inode *ipimap;
1341 	struct metapage *mp;
1342 	ino_t inum;
1343 	struct iag *iagp;
1344 	struct inomap *imap;
1345 
1346 	/* get the pointers to the inode map inode and the
1347 	 * corresponding imap control structure.
1348 	 */
1349 	ipimap = JFS_SBI(pip->i_sb)->ipimap;
1350 	imap = JFS_IP(ipimap)->i_imap;
1351 	JFS_IP(ip)->ipimap = ipimap;
1352 	JFS_IP(ip)->fileset = FILESYSTEM_I;
1353 
1354 	/* for a directory, the allocation policy is to start
1355 	 * at the ag level using the preferred ag.
1356 	 */
1357 	if (dir) {
1358 		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1359 		AG_LOCK(imap, agno);
1360 		goto tryag;
1361 	}
1362 
1363 	/* for files, the policy starts off by trying to allocate from
1364 	 * the same iag containing the parent disk inode:
1365 	 * try to allocate the new disk inode close to the parent disk
1366 	 * inode, using parent disk inode number + 1 as the allocation
1367 	 * hint.  (we use a left-to-right policy to attempt to avoid
1368 	 * moving backward on the disk.)  compute the hint within the
1369 	 * file system and the iag.
1370 	 */
1371 
1372 	/* get the ag number of this iag */
1373 	agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1374 
1375 	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1376 		/*
1377 		 * There is an open file actively growing.  We want to
1378 		 * allocate new inodes from a different ag to avoid
1379 		 * fragmentation problems.
1380 		 */
1381 		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1382 		AG_LOCK(imap, agno);
1383 		goto tryag;
1384 	}
1385 
1386 	inum = pip->i_ino + 1;
1387 	ino = inum & (INOSPERIAG - 1);
1388 
1389 	/* back off the hint if it is outside of the iag */
1390 	if (ino == 0)
1391 		inum = pip->i_ino;
1392 
1393 	/* lock the AG inode map information */
1394 	AG_LOCK(imap, agno);
1395 
1396 	/* Get read lock on imap inode */
1397 	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1398 
1399 	/* get the iag number and read the iag */
1400 	iagno = INOTOIAG(inum);
1401 	if ((rc = diIAGRead(imap, iagno, &mp))) {
1402 		IREAD_UNLOCK(ipimap);
1403 		AG_UNLOCK(imap, agno);
1404 		return (rc);
1405 	}
1406 	iagp = (struct iag *) mp->data;
1407 
1408 	/* determine if new inode extent is allowed to be added to the iag.
1409 	 * new inode extent can be added to the iag if the ag
1410 	 * has less than 32 free disk inodes and the iag has free extents.
1411 	 */
1412 	addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1413 
1414 	/*
1415 	 *	try to allocate from the IAG
1416 	 */
1417 	/* check if the inode may be allocated from the iag
1418 	 * (i.e. the inode has free inodes or new extent can be added).
1419 	 */
1420 	if (iagp->nfreeinos || addext) {
1421 		/* determine the extent number of the hint.
1422 		 */
1423 		extno = ino >> L2INOSPEREXT;
1424 
1425 		/* check if the extent containing the hint has backed
1426 		 * inodes.  if so, try to allocate within this extent.
1427 		 */
1428 		if (addressPXD(&iagp->inoext[extno])) {
1429 			bitno = ino & (INOSPEREXT - 1);
1430 			if ((bitno =
1431 			     diFindFree(le32_to_cpu(iagp->wmap[extno]),
1432 					bitno))
1433 			    < INOSPEREXT) {
1434 				ino = (extno << L2INOSPEREXT) + bitno;
1435 
1436 				/* a free inode (bit) was found within this
1437 				 * extent, so allocate it.
1438 				 */
1439 				rc = diAllocBit(imap, iagp, ino);
1440 				IREAD_UNLOCK(ipimap);
1441 				if (rc) {
1442 					assert(rc == -EIO);
1443 				} else {
1444 					/* set the results of the allocation
1445 					 * and write the iag.
1446 					 */
1447 					diInitInode(ip, iagno, ino, extno,
1448 						    iagp);
1449 					mark_metapage_dirty(mp);
1450 				}
1451 				release_metapage(mp);
1452 
1453 				/* free the AG lock and return.
1454 				 */
1455 				AG_UNLOCK(imap, agno);
1456 				return (rc);
1457 			}
1458 
1459 			if (!addext)
1460 				extno =
1461 				    (extno ==
1462 				     EXTSPERIAG - 1) ? 0 : extno + 1;
1463 		}
1464 
1465 		/*
1466 		 * no free inodes within the extent containing the hint.
1467 		 *
1468 		 * try to allocate from the backed extents following
1469 		 * hint or, if appropriate (i.e. addext is true), allocate
1470 		 * an extent of free inodes at or following the extent
1471 		 * containing the hint.
1472 		 *
1473 		 * the free inode and free extent summary maps are used
1474 		 * here, so determine the starting summary map position
1475 		 * and the number of words we'll have to examine.  again,
1476 		 * the approach is to allocate following the hint, so we
1477 		 * might have to initially ignore prior bits of the summary
1478 		 * map that represent extents prior to the extent containing
1479 		 * the hint and later revisit these bits.
1480 		 */
1481 		bitno = extno & (EXTSPERSUM - 1);
1482 		nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1483 		sword = extno >> L2EXTSPERSUM;
1484 
1485 		/* mask any prior bits for the starting words of the
1486 		 * summary map.
1487 		 */
1488 		mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno));
1489 		inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1490 		extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1491 
1492 		/* scan the free inode and free extent summary maps for
1493 		 * free resources.
1494 		 */
1495 		for (i = 0; i < nwords; i++) {
1496 			/* check if this word of the free inode summary
1497 			 * map describes an extent with free inodes.
1498 			 */
1499 			if (~inosmap) {
1500 				/* an extent with free inodes has been
1501 				 * found. determine the extent number
1502 				 * and the inode number within the extent.
1503 				 */
1504 				rem = diFindFree(inosmap, 0);
1505 				extno = (sword << L2EXTSPERSUM) + rem;
1506 				rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1507 						 0);
1508 				if (rem >= INOSPEREXT) {
1509 					IREAD_UNLOCK(ipimap);
1510 					release_metapage(mp);
1511 					AG_UNLOCK(imap, agno);
1512 					jfs_error(ip->i_sb,
1513 						  "can't find free bit in wmap\n");
1514 					return -EIO;
1515 				}
1516 
1517 				/* determine the inode number within the
1518 				 * iag and allocate the inode from the
1519 				 * map.
1520 				 */
1521 				ino = (extno << L2INOSPEREXT) + rem;
1522 				rc = diAllocBit(imap, iagp, ino);
1523 				IREAD_UNLOCK(ipimap);
1524 				if (rc)
1525 					assert(rc == -EIO);
1526 				else {
1527 					/* set the results of the allocation
1528 					 * and write the iag.
1529 					 */
1530 					diInitInode(ip, iagno, ino, extno,
1531 						    iagp);
1532 					mark_metapage_dirty(mp);
1533 				}
1534 				release_metapage(mp);
1535 
1536 				/* free the AG lock and return.
1537 				 */
1538 				AG_UNLOCK(imap, agno);
1539 				return (rc);
1540 
1541 			}
1542 
1543 			/* check if we may allocate an extent of free
1544 			 * inodes and whether this word of the free
1545 			 * extents summary map describes a free extent.
1546 			 */
1547 			if (addext && ~extsmap) {
1548 				/* a free extent has been found.  determine
1549 				 * the extent number.
1550 				 */
1551 				rem = diFindFree(extsmap, 0);
1552 				extno = (sword << L2EXTSPERSUM) + rem;
1553 
1554 				/* allocate an extent of free inodes.
1555 				 */
1556 				if ((rc = diNewExt(imap, iagp, extno))) {
1557 					/* if there is no disk space for a
1558 					 * new extent, try to allocate the
1559 					 * disk inode from somewhere else.
1560 					 */
1561 					if (rc == -ENOSPC)
1562 						break;
1563 
1564 					assert(rc == -EIO);
1565 				} else {
1566 					/* set the results of the allocation
1567 					 * and write the iag.
1568 					 */
1569 					diInitInode(ip, iagno,
1570 						    extno << L2INOSPEREXT,
1571 						    extno, iagp);
1572 					mark_metapage_dirty(mp);
1573 				}
1574 				release_metapage(mp);
1575 				/* free the imap inode & the AG lock & return.
1576 				 */
1577 				IREAD_UNLOCK(ipimap);
1578 				AG_UNLOCK(imap, agno);
1579 				return (rc);
1580 			}
1581 
1582 			/* move on to the next set of summary map words.
1583 			 */
1584 			sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1585 			inosmap = le32_to_cpu(iagp->inosmap[sword]);
1586 			extsmap = le32_to_cpu(iagp->extsmap[sword]);
1587 		}
1588 	}
1589 	/* unlock imap inode */
1590 	IREAD_UNLOCK(ipimap);
1591 
1592 	/* nothing doing in this iag, so release it. */
1593 	release_metapage(mp);
1594 
1595       tryag:
1596 	/*
1597 	 * try to allocate anywhere within the same AG as the parent inode.
1598 	 */
1599 	rc = diAllocAG(imap, agno, dir, ip);
1600 
1601 	AG_UNLOCK(imap, agno);
1602 
1603 	if (rc != -ENOSPC)
1604 		return (rc);
1605 
1606 	/*
1607 	 * try to allocate in any AG.
1608 	 */
1609 	return (diAllocAny(imap, agno, dir, ip));
1610 }
1611 
1612 
1613 /*
1614  * NAME:	diAllocAG(imap,agno,dir,ip)
1615  *
1616  * FUNCTION:	allocate a disk inode from the allocation group.
1617  *
1618  *		this routine first determines if a new extent of free
1619  *		inodes should be added for the allocation group, with
1620  *		the current request satisfied from this extent. if this
1621  *		is the case, an attempt will be made to do just that.  if
1622  *		this attempt fails or it has been determined that a new
1623  *		extent should not be added, an attempt is made to satisfy
1624  *		the request by allocating an existing (backed) free inode
1625  *		from the allocation group.
1626  *
1627  * PRE CONDITION: Already have the AG lock for this AG.
1628  *
1629  * PARAMETERS:
1630  *	imap	- pointer to inode map control structure.
1631  *	agno	- allocation group to allocate from.
1632  *	dir	- 'true' if the new disk inode is for a directory.
1633  *	ip	- pointer to the new inode to be filled in on successful return
1634  *		  with the disk inode number allocated, its extent address
1635  *		  and the start of the ag.
1636  *
1637  * RETURN VALUES:
1638  *	0	- success.
1639  *	-ENOSPC	- insufficient disk resources.
1640  *	-EIO	- i/o error.
1641  */
1642 static int
diAllocAG(struct inomap * imap,int agno,bool dir,struct inode * ip)1643 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1644 {
1645 	int rc, addext, numfree, numinos;
1646 
1647 	/* get the number of free and the number of backed disk
1648 	 * inodes currently within the ag.
1649 	 */
1650 	numfree = imap->im_agctl[agno].numfree;
1651 	numinos = imap->im_agctl[agno].numinos;
1652 
1653 	if (numfree > numinos) {
1654 		jfs_error(ip->i_sb, "numfree > numinos\n");
1655 		return -EIO;
1656 	}
1657 
1658 	/* determine if we should allocate a new extent of free inodes
1659 	 * within the ag: for directory inodes, add a new extent
1660 	 * if there are a small number of free inodes or number of free
1661 	 * inodes is a small percentage of the number of backed inodes.
1662 	 */
1663 	if (dir)
1664 		addext = (numfree < 64 ||
1665 			  (numfree < 256
1666 			   && ((numfree * 100) / numinos) <= 20));
1667 	else
1668 		addext = (numfree == 0);
1669 
1670 	/*
1671 	 * try to allocate a new extent of free inodes.
1672 	 */
1673 	if (addext) {
1674 		/* if free space is not available for this new extent, try
1675 		 * below to allocate a free and existing (already backed)
1676 		 * inode from the ag.
1677 		 */
1678 		if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1679 			return (rc);
1680 	}
1681 
1682 	/*
1683 	 * try to allocate an existing free inode from the ag.
1684 	 */
1685 	return (diAllocIno(imap, agno, ip));
1686 }
1687 
1688 
1689 /*
1690  * NAME:	diAllocAny(imap,agno,dir,iap)
1691  *
1692  * FUNCTION:	allocate a disk inode from any other allocation group.
1693  *
1694  *		this routine is called when an allocation attempt within
1695  *		the primary allocation group has failed. if attempts to
1696  *		allocate an inode from any allocation group other than the
1697  *		specified primary group.
1698  *
1699  * PARAMETERS:
1700  *	imap	- pointer to inode map control structure.
1701  *	agno	- primary allocation group (to avoid).
1702  *	dir	- 'true' if the new disk inode is for a directory.
1703  *	ip	- pointer to a new inode to be filled in on successful return
1704  *		  with the disk inode number allocated, its extent address
1705  *		  and the start of the ag.
1706  *
1707  * RETURN VALUES:
1708  *	0	- success.
1709  *	-ENOSPC	- insufficient disk resources.
1710  *	-EIO	- i/o error.
1711  */
1712 static int
diAllocAny(struct inomap * imap,int agno,bool dir,struct inode * ip)1713 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1714 {
1715 	int ag, rc;
1716 	int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1717 
1718 
1719 	/* try to allocate from the ags following agno up to
1720 	 * the maximum ag number.
1721 	 */
1722 	for (ag = agno + 1; ag <= maxag; ag++) {
1723 		AG_LOCK(imap, ag);
1724 
1725 		rc = diAllocAG(imap, ag, dir, ip);
1726 
1727 		AG_UNLOCK(imap, ag);
1728 
1729 		if (rc != -ENOSPC)
1730 			return (rc);
1731 	}
1732 
1733 	/* try to allocate from the ags in front of agno.
1734 	 */
1735 	for (ag = 0; ag < agno; ag++) {
1736 		AG_LOCK(imap, ag);
1737 
1738 		rc = diAllocAG(imap, ag, dir, ip);
1739 
1740 		AG_UNLOCK(imap, ag);
1741 
1742 		if (rc != -ENOSPC)
1743 			return (rc);
1744 	}
1745 
1746 	/* no free disk inodes.
1747 	 */
1748 	return -ENOSPC;
1749 }
1750 
1751 
1752 /*
1753  * NAME:	diAllocIno(imap,agno,ip)
1754  *
1755  * FUNCTION:	allocate a disk inode from the allocation group's free
1756  *		inode list, returning an error if this free list is
1757  *		empty (i.e. no iags on the list).
1758  *
1759  *		allocation occurs from the first iag on the list using
1760  *		the iag's free inode summary map to find the leftmost
1761  *		free inode in the iag.
1762  *
1763  * PRE CONDITION: Already have AG lock for this AG.
1764  *
1765  * PARAMETERS:
1766  *	imap	- pointer to inode map control structure.
1767  *	agno	- allocation group.
1768  *	ip	- pointer to new inode to be filled in on successful return
1769  *		  with the disk inode number allocated, its extent address
1770  *		  and the start of the ag.
1771  *
1772  * RETURN VALUES:
1773  *	0	- success.
1774  *	-ENOSPC	- insufficient disk resources.
1775  *	-EIO	- i/o error.
1776  */
diAllocIno(struct inomap * imap,int agno,struct inode * ip)1777 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1778 {
1779 	int iagno, ino, rc, rem, extno, sword;
1780 	struct metapage *mp;
1781 	struct iag *iagp;
1782 
1783 	/* check if there are iags on the ag's free inode list.
1784 	 */
1785 	if ((iagno = imap->im_agctl[agno].inofree) < 0)
1786 		return -ENOSPC;
1787 
1788 	/* obtain read lock on imap inode */
1789 	IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1790 
1791 	/* read the iag at the head of the list.
1792 	 */
1793 	if ((rc = diIAGRead(imap, iagno, &mp))) {
1794 		IREAD_UNLOCK(imap->im_ipimap);
1795 		return (rc);
1796 	}
1797 	iagp = (struct iag *) mp->data;
1798 
1799 	/* better be free inodes in this iag if it is on the
1800 	 * list.
1801 	 */
1802 	if (!iagp->nfreeinos) {
1803 		IREAD_UNLOCK(imap->im_ipimap);
1804 		release_metapage(mp);
1805 		jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n");
1806 		return -EIO;
1807 	}
1808 
1809 	/* scan the free inode summary map to find an extent
1810 	 * with free inodes.
1811 	 */
1812 	for (sword = 0;; sword++) {
1813 		if (sword >= SMAPSZ) {
1814 			IREAD_UNLOCK(imap->im_ipimap);
1815 			release_metapage(mp);
1816 			jfs_error(ip->i_sb,
1817 				  "free inode not found in summary map\n");
1818 			return -EIO;
1819 		}
1820 
1821 		if (~iagp->inosmap[sword])
1822 			break;
1823 	}
1824 
1825 	/* found a extent with free inodes. determine
1826 	 * the extent number.
1827 	 */
1828 	rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1829 	if (rem >= EXTSPERSUM) {
1830 		IREAD_UNLOCK(imap->im_ipimap);
1831 		release_metapage(mp);
1832 		jfs_error(ip->i_sb, "no free extent found\n");
1833 		return -EIO;
1834 	}
1835 	extno = (sword << L2EXTSPERSUM) + rem;
1836 
1837 	/* find the first free inode in the extent.
1838 	 */
1839 	rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1840 	if (rem >= INOSPEREXT) {
1841 		IREAD_UNLOCK(imap->im_ipimap);
1842 		release_metapage(mp);
1843 		jfs_error(ip->i_sb, "free inode not found\n");
1844 		return -EIO;
1845 	}
1846 
1847 	/* compute the inode number within the iag.
1848 	 */
1849 	ino = (extno << L2INOSPEREXT) + rem;
1850 
1851 	/* allocate the inode.
1852 	 */
1853 	rc = diAllocBit(imap, iagp, ino);
1854 	IREAD_UNLOCK(imap->im_ipimap);
1855 	if (rc) {
1856 		release_metapage(mp);
1857 		return (rc);
1858 	}
1859 
1860 	/* set the results of the allocation and write the iag.
1861 	 */
1862 	diInitInode(ip, iagno, ino, extno, iagp);
1863 	write_metapage(mp);
1864 
1865 	return (0);
1866 }
1867 
1868 
1869 /*
1870  * NAME:	diAllocExt(imap,agno,ip)
1871  *
1872  * FUNCTION:	add a new extent of free inodes to an iag, allocating
1873  *		an inode from this extent to satisfy the current allocation
1874  *		request.
1875  *
1876  *		this routine first tries to find an existing iag with free
1877  *		extents through the ag free extent list.  if list is not
1878  *		empty, the head of the list will be selected as the home
1879  *		of the new extent of free inodes.  otherwise (the list is
1880  *		empty), a new iag will be allocated for the ag to contain
1881  *		the extent.
1882  *
1883  *		once an iag has been selected, the free extent summary map
1884  *		is used to locate a free extent within the iag and diNewExt()
1885  *		is called to initialize the extent, with initialization
1886  *		including the allocation of the first inode of the extent
1887  *		for the purpose of satisfying this request.
1888  *
1889  * PARAMETERS:
1890  *	imap	- pointer to inode map control structure.
1891  *	agno	- allocation group number.
1892  *	ip	- pointer to new inode to be filled in on successful return
1893  *		  with the disk inode number allocated, its extent address
1894  *		  and the start of the ag.
1895  *
1896  * RETURN VALUES:
1897  *	0	- success.
1898  *	-ENOSPC	- insufficient disk resources.
1899  *	-EIO	- i/o error.
1900  */
diAllocExt(struct inomap * imap,int agno,struct inode * ip)1901 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1902 {
1903 	int rem, iagno, sword, extno, rc;
1904 	struct metapage *mp;
1905 	struct iag *iagp;
1906 
1907 	/* check if the ag has any iags with free extents.  if not,
1908 	 * allocate a new iag for the ag.
1909 	 */
1910 	if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1911 		/* If successful, diNewIAG will obtain the read lock on the
1912 		 * imap inode.
1913 		 */
1914 		if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1915 			return (rc);
1916 		}
1917 		iagp = (struct iag *) mp->data;
1918 
1919 		/* set the ag number if this a brand new iag
1920 		 */
1921 		iagp->agstart =
1922 		    cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1923 	} else {
1924 		/* read the iag.
1925 		 */
1926 		IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1927 		if ((rc = diIAGRead(imap, iagno, &mp))) {
1928 			IREAD_UNLOCK(imap->im_ipimap);
1929 			jfs_error(ip->i_sb, "error reading iag\n");
1930 			return rc;
1931 		}
1932 		iagp = (struct iag *) mp->data;
1933 	}
1934 
1935 	/* using the free extent summary map, find a free extent.
1936 	 */
1937 	for (sword = 0;; sword++) {
1938 		if (sword >= SMAPSZ) {
1939 			release_metapage(mp);
1940 			IREAD_UNLOCK(imap->im_ipimap);
1941 			jfs_error(ip->i_sb, "free ext summary map not found\n");
1942 			return -EIO;
1943 		}
1944 		if (~iagp->extsmap[sword])
1945 			break;
1946 	}
1947 
1948 	/* determine the extent number of the free extent.
1949 	 */
1950 	rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1951 	if (rem >= EXTSPERSUM) {
1952 		release_metapage(mp);
1953 		IREAD_UNLOCK(imap->im_ipimap);
1954 		jfs_error(ip->i_sb, "free extent not found\n");
1955 		return -EIO;
1956 	}
1957 	extno = (sword << L2EXTSPERSUM) + rem;
1958 
1959 	/* initialize the new extent.
1960 	 */
1961 	rc = diNewExt(imap, iagp, extno);
1962 	IREAD_UNLOCK(imap->im_ipimap);
1963 	if (rc) {
1964 		/* something bad happened.  if a new iag was allocated,
1965 		 * place it back on the inode map's iag free list, and
1966 		 * clear the ag number information.
1967 		 */
1968 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1969 			IAGFREE_LOCK(imap);
1970 			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1971 			imap->im_freeiag = iagno;
1972 			IAGFREE_UNLOCK(imap);
1973 		}
1974 		write_metapage(mp);
1975 		return (rc);
1976 	}
1977 
1978 	/* set the results of the allocation and write the iag.
1979 	 */
1980 	diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1981 
1982 	write_metapage(mp);
1983 
1984 	return (0);
1985 }
1986 
1987 
1988 /*
1989  * NAME:	diAllocBit(imap,iagp,ino)
1990  *
1991  * FUNCTION:	allocate a backed inode from an iag.
1992  *
1993  *		this routine performs the mechanics of allocating a
1994  *		specified inode from a backed extent.
1995  *
1996  *		if the inode to be allocated represents the last free
1997  *		inode within the iag, the iag will be removed from the
1998  *		ag free inode list.
1999  *
2000  *		a careful update approach is used to provide consistency
2001  *		in the face of updates to multiple buffers.  under this
2002  *		approach, all required buffers are obtained before making
2003  *		any updates and are held all are updates are complete.
2004  *
2005  * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
2006  *	this AG.  Must have read lock on imap inode.
2007  *
2008  * PARAMETERS:
2009  *	imap	- pointer to inode map control structure.
2010  *	iagp	- pointer to iag.
2011  *	ino	- inode number to be allocated within the iag.
2012  *
2013  * RETURN VALUES:
2014  *	0	- success.
2015  *	-ENOSPC	- insufficient disk resources.
2016  *	-EIO	- i/o error.
2017  */
diAllocBit(struct inomap * imap,struct iag * iagp,int ino)2018 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2019 {
2020 	int extno, bitno, agno, sword, rc;
2021 	struct metapage *amp = NULL, *bmp = NULL;
2022 	struct iag *aiagp = NULL, *biagp = NULL;
2023 	u32 mask;
2024 
2025 	/* check if this is the last free inode within the iag.
2026 	 * if so, it will have to be removed from the ag free
2027 	 * inode list, so get the iags preceding and following
2028 	 * it on the list.
2029 	 */
2030 	if (iagp->nfreeinos == cpu_to_le32(1)) {
2031 		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2032 			if ((rc =
2033 			     diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2034 				       &amp)))
2035 				return (rc);
2036 			aiagp = (struct iag *) amp->data;
2037 		}
2038 
2039 		if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2040 			if ((rc =
2041 			     diIAGRead(imap,
2042 				       le32_to_cpu(iagp->inofreeback),
2043 				       &bmp))) {
2044 				if (amp)
2045 					release_metapage(amp);
2046 				return (rc);
2047 			}
2048 			biagp = (struct iag *) bmp->data;
2049 		}
2050 	}
2051 
2052 	/* get the ag number, extent number, inode number within
2053 	 * the extent.
2054 	 */
2055 	agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2056 	extno = ino >> L2INOSPEREXT;
2057 	bitno = ino & (INOSPEREXT - 1);
2058 
2059 	/* compute the mask for setting the map.
2060 	 */
2061 	mask = HIGHORDER >> bitno;
2062 
2063 	/* the inode should be free and backed.
2064 	 */
2065 	if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2066 	    ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2067 	    (addressPXD(&iagp->inoext[extno]) == 0)) {
2068 		if (amp)
2069 			release_metapage(amp);
2070 		if (bmp)
2071 			release_metapage(bmp);
2072 
2073 		jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n");
2074 		return -EIO;
2075 	}
2076 
2077 	/* mark the inode as allocated in the working map.
2078 	 */
2079 	iagp->wmap[extno] |= cpu_to_le32(mask);
2080 
2081 	/* check if all inodes within the extent are now
2082 	 * allocated.  if so, update the free inode summary
2083 	 * map to reflect this.
2084 	 */
2085 	if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2086 		sword = extno >> L2EXTSPERSUM;
2087 		bitno = extno & (EXTSPERSUM - 1);
2088 		iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2089 	}
2090 
2091 	/* if this was the last free inode in the iag, remove the
2092 	 * iag from the ag free inode list.
2093 	 */
2094 	if (iagp->nfreeinos == cpu_to_le32(1)) {
2095 		if (amp) {
2096 			aiagp->inofreeback = iagp->inofreeback;
2097 			write_metapage(amp);
2098 		}
2099 
2100 		if (bmp) {
2101 			biagp->inofreefwd = iagp->inofreefwd;
2102 			write_metapage(bmp);
2103 		} else {
2104 			imap->im_agctl[agno].inofree =
2105 			    le32_to_cpu(iagp->inofreefwd);
2106 		}
2107 		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2108 	}
2109 
2110 	/* update the free inode count at the iag, ag, inode
2111 	 * map levels.
2112 	 */
2113 	le32_add_cpu(&iagp->nfreeinos, -1);
2114 	imap->im_agctl[agno].numfree -= 1;
2115 	atomic_dec(&imap->im_numfree);
2116 
2117 	return (0);
2118 }
2119 
2120 
2121 /*
2122  * NAME:	diNewExt(imap,iagp,extno)
2123  *
2124  * FUNCTION:	initialize a new extent of inodes for an iag, allocating
2125  *		the first inode of the extent for use for the current
2126  *		allocation request.
2127  *
2128  *		disk resources are allocated for the new extent of inodes
2129  *		and the inodes themselves are initialized to reflect their
2130  *		existence within the extent (i.e. their inode numbers and
2131  *		inode extent addresses are set) and their initial state
2132  *		(mode and link count are set to zero).
2133  *
2134  *		if the iag is new, it is not yet on an ag extent free list
2135  *		but will now be placed on this list.
2136  *
2137  *		if the allocation of the new extent causes the iag to
2138  *		have no free extent, the iag will be removed from the
2139  *		ag extent free list.
2140  *
2141  *		if the iag has no free backed inodes, it will be placed
2142  *		on the ag free inode list, since the addition of the new
2143  *		extent will now cause it to have free inodes.
2144  *
2145  *		a careful update approach is used to provide consistency
2146  *		(i.e. list consistency) in the face of updates to multiple
2147  *		buffers.  under this approach, all required buffers are
2148  *		obtained before making any updates and are held until all
2149  *		updates are complete.
2150  *
2151  * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
2152  *	this AG.  Must have read lock on imap inode.
2153  *
2154  * PARAMETERS:
2155  *	imap	- pointer to inode map control structure.
2156  *	iagp	- pointer to iag.
2157  *	extno	- extent number.
2158  *
2159  * RETURN VALUES:
2160  *	0	- success.
2161  *	-ENOSPC	- insufficient disk resources.
2162  *	-EIO	- i/o error.
2163  */
diNewExt(struct inomap * imap,struct iag * iagp,int extno)2164 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2165 {
2166 	int agno, iagno, fwd, back, freei = 0, sword, rc;
2167 	struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2168 	struct metapage *amp, *bmp, *cmp, *dmp;
2169 	struct inode *ipimap;
2170 	s64 blkno, hint;
2171 	int i, j;
2172 	u32 mask;
2173 	ino_t ino;
2174 	struct dinode *dp;
2175 	struct jfs_sb_info *sbi;
2176 
2177 	/* better have free extents.
2178 	 */
2179 	if (!iagp->nfreeexts) {
2180 		jfs_error(imap->im_ipimap->i_sb, "no free extents\n");
2181 		return -EIO;
2182 	}
2183 
2184 	/* get the inode map inode.
2185 	 */
2186 	ipimap = imap->im_ipimap;
2187 	sbi = JFS_SBI(ipimap->i_sb);
2188 
2189 	amp = bmp = cmp = NULL;
2190 
2191 	/* get the ag and iag numbers for this iag.
2192 	 */
2193 	agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2194 	iagno = le32_to_cpu(iagp->iagnum);
2195 
2196 	/* check if this is the last free extent within the
2197 	 * iag.  if so, the iag must be removed from the ag
2198 	 * free extent list, so get the iags preceding and
2199 	 * following the iag on this list.
2200 	 */
2201 	if (iagp->nfreeexts == cpu_to_le32(1)) {
2202 		if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2203 			if ((rc = diIAGRead(imap, fwd, &amp)))
2204 				return (rc);
2205 			aiagp = (struct iag *) amp->data;
2206 		}
2207 
2208 		if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2209 			if ((rc = diIAGRead(imap, back, &bmp)))
2210 				goto error_out;
2211 			biagp = (struct iag *) bmp->data;
2212 		}
2213 	} else {
2214 		/* the iag has free extents.  if all extents are free
2215 		 * (as is the case for a newly allocated iag), the iag
2216 		 * must be added to the ag free extent list, so get
2217 		 * the iag at the head of the list in preparation for
2218 		 * adding this iag to this list.
2219 		 */
2220 		fwd = back = -1;
2221 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2222 			if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2223 				if ((rc = diIAGRead(imap, fwd, &amp)))
2224 					goto error_out;
2225 				aiagp = (struct iag *) amp->data;
2226 			}
2227 		}
2228 	}
2229 
2230 	/* check if the iag has no free inodes.  if so, the iag
2231 	 * will have to be added to the ag free inode list, so get
2232 	 * the iag at the head of the list in preparation for
2233 	 * adding this iag to this list.  in doing this, we must
2234 	 * check if we already have the iag at the head of
2235 	 * the list in hand.
2236 	 */
2237 	if (iagp->nfreeinos == 0) {
2238 		freei = imap->im_agctl[agno].inofree;
2239 
2240 		if (freei >= 0) {
2241 			if (freei == fwd) {
2242 				ciagp = aiagp;
2243 			} else if (freei == back) {
2244 				ciagp = biagp;
2245 			} else {
2246 				if ((rc = diIAGRead(imap, freei, &cmp)))
2247 					goto error_out;
2248 				ciagp = (struct iag *) cmp->data;
2249 			}
2250 			if (ciagp == NULL) {
2251 				jfs_error(imap->im_ipimap->i_sb,
2252 					  "ciagp == NULL\n");
2253 				rc = -EIO;
2254 				goto error_out;
2255 			}
2256 		}
2257 	}
2258 
2259 	/* allocate disk space for the inode extent.
2260 	 */
2261 	if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2262 		hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2263 	else
2264 		hint = addressPXD(&iagp->inoext[extno - 1]) +
2265 		    lengthPXD(&iagp->inoext[extno - 1]) - 1;
2266 
2267 	if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2268 		goto error_out;
2269 
2270 	/* compute the inode number of the first inode within the
2271 	 * extent.
2272 	 */
2273 	ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2274 
2275 	/* initialize the inodes within the newly allocated extent a
2276 	 * page at a time.
2277 	 */
2278 	for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2279 		/* get a buffer for this page of disk inodes.
2280 		 */
2281 		dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2282 		if (dmp == NULL) {
2283 			rc = -EIO;
2284 			goto error_out;
2285 		}
2286 		dp = (struct dinode *) dmp->data;
2287 
2288 		/* initialize the inode number, mode, link count and
2289 		 * inode extent address.
2290 		 */
2291 		for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2292 			dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2293 			dp->di_number = cpu_to_le32(ino);
2294 			dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2295 			dp->di_mode = 0;
2296 			dp->di_nlink = 0;
2297 			PXDaddress(&(dp->di_ixpxd), blkno);
2298 			PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2299 		}
2300 		write_metapage(dmp);
2301 	}
2302 
2303 	/* if this is the last free extent within the iag, remove the
2304 	 * iag from the ag free extent list.
2305 	 */
2306 	if (iagp->nfreeexts == cpu_to_le32(1)) {
2307 		if (fwd >= 0)
2308 			aiagp->extfreeback = iagp->extfreeback;
2309 
2310 		if (back >= 0)
2311 			biagp->extfreefwd = iagp->extfreefwd;
2312 		else
2313 			imap->im_agctl[agno].extfree =
2314 			    le32_to_cpu(iagp->extfreefwd);
2315 
2316 		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2317 	} else {
2318 		/* if the iag has all free extents (newly allocated iag),
2319 		 * add the iag to the ag free extent list.
2320 		 */
2321 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2322 			if (fwd >= 0)
2323 				aiagp->extfreeback = cpu_to_le32(iagno);
2324 
2325 			iagp->extfreefwd = cpu_to_le32(fwd);
2326 			iagp->extfreeback = cpu_to_le32(-1);
2327 			imap->im_agctl[agno].extfree = iagno;
2328 		}
2329 	}
2330 
2331 	/* if the iag has no free inodes, add the iag to the
2332 	 * ag free inode list.
2333 	 */
2334 	if (iagp->nfreeinos == 0) {
2335 		if (freei >= 0)
2336 			ciagp->inofreeback = cpu_to_le32(iagno);
2337 
2338 		iagp->inofreefwd =
2339 		    cpu_to_le32(imap->im_agctl[agno].inofree);
2340 		iagp->inofreeback = cpu_to_le32(-1);
2341 		imap->im_agctl[agno].inofree = iagno;
2342 	}
2343 
2344 	/* initialize the extent descriptor of the extent. */
2345 	PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2346 	PXDaddress(&iagp->inoext[extno], blkno);
2347 
2348 	/* initialize the working and persistent map of the extent.
2349 	 * the working map will be initialized such that
2350 	 * it indicates the first inode of the extent is allocated.
2351 	 */
2352 	iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2353 	iagp->pmap[extno] = 0;
2354 
2355 	/* update the free inode and free extent summary maps
2356 	 * for the extent to indicate the extent has free inodes
2357 	 * and no longer represents a free extent.
2358 	 */
2359 	sword = extno >> L2EXTSPERSUM;
2360 	mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2361 	iagp->extsmap[sword] |= cpu_to_le32(mask);
2362 	iagp->inosmap[sword] &= cpu_to_le32(~mask);
2363 
2364 	/* update the free inode and free extent counts for the
2365 	 * iag.
2366 	 */
2367 	le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2368 	le32_add_cpu(&iagp->nfreeexts, -1);
2369 
2370 	/* update the free and backed inode counts for the ag.
2371 	 */
2372 	imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2373 	imap->im_agctl[agno].numinos += INOSPEREXT;
2374 
2375 	/* update the free and backed inode counts for the inode map.
2376 	 */
2377 	atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2378 	atomic_add(INOSPEREXT, &imap->im_numinos);
2379 
2380 	/* write the iags.
2381 	 */
2382 	if (amp)
2383 		write_metapage(amp);
2384 	if (bmp)
2385 		write_metapage(bmp);
2386 	if (cmp)
2387 		write_metapage(cmp);
2388 
2389 	return (0);
2390 
2391       error_out:
2392 
2393 	/* release the iags.
2394 	 */
2395 	if (amp)
2396 		release_metapage(amp);
2397 	if (bmp)
2398 		release_metapage(bmp);
2399 	if (cmp)
2400 		release_metapage(cmp);
2401 
2402 	return (rc);
2403 }
2404 
2405 
2406 /*
2407  * NAME:	diNewIAG(imap,iagnop,agno)
2408  *
2409  * FUNCTION:	allocate a new iag for an allocation group.
2410  *
2411  *		first tries to allocate the iag from the inode map
2412  *		iagfree list:
2413  *		if the list has free iags, the head of the list is removed
2414  *		and returned to satisfy the request.
2415  *		if the inode map's iag free list is empty, the inode map
2416  *		is extended to hold a new iag. this new iag is initialized
2417  *		and returned to satisfy the request.
2418  *
2419  * PARAMETERS:
2420  *	imap	- pointer to inode map control structure.
2421  *	iagnop	- pointer to an iag number set with the number of the
2422  *		  newly allocated iag upon successful return.
2423  *	agno	- allocation group number.
2424  *	bpp	- Buffer pointer to be filled in with new IAG's buffer
2425  *
2426  * RETURN VALUES:
2427  *	0	- success.
2428  *	-ENOSPC	- insufficient disk resources.
2429  *	-EIO	- i/o error.
2430  *
2431  * serialization:
2432  *	AG lock held on entry/exit;
2433  *	write lock on the map is held inside;
2434  *	read lock on the map is held on successful completion;
2435  *
2436  * note: new iag transaction:
2437  * . synchronously write iag;
2438  * . write log of xtree and inode of imap;
2439  * . commit;
2440  * . synchronous write of xtree (right to left, bottom to top);
2441  * . at start of logredo(): init in-memory imap with one additional iag page;
2442  * . at end of logredo(): re-read imap inode to determine
2443  *   new imap size;
2444  */
2445 static int
diNewIAG(struct inomap * imap,int * iagnop,int agno,struct metapage ** mpp)2446 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2447 {
2448 	int rc;
2449 	int iagno, i, xlen;
2450 	struct inode *ipimap;
2451 	struct super_block *sb;
2452 	struct jfs_sb_info *sbi;
2453 	struct metapage *mp;
2454 	struct iag *iagp;
2455 	s64 xaddr = 0;
2456 	s64 blkno;
2457 	tid_t tid;
2458 	struct inode *iplist[1];
2459 
2460 	/* pick up pointers to the inode map and mount inodes */
2461 	ipimap = imap->im_ipimap;
2462 	sb = ipimap->i_sb;
2463 	sbi = JFS_SBI(sb);
2464 
2465 	/* acquire the free iag lock */
2466 	IAGFREE_LOCK(imap);
2467 
2468 	/* if there are any iags on the inode map free iag list,
2469 	 * allocate the iag from the head of the list.
2470 	 */
2471 	if (imap->im_freeiag >= 0) {
2472 		/* pick up the iag number at the head of the list */
2473 		iagno = imap->im_freeiag;
2474 
2475 		/* determine the logical block number of the iag */
2476 		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2477 	} else {
2478 		/* no free iags. the inode map will have to be extented
2479 		 * to include a new iag.
2480 		 */
2481 
2482 		/* acquire inode map lock */
2483 		IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2484 
2485 		if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2486 			IWRITE_UNLOCK(ipimap);
2487 			IAGFREE_UNLOCK(imap);
2488 			jfs_error(imap->im_ipimap->i_sb,
2489 				  "ipimap->i_size is wrong\n");
2490 			return -EIO;
2491 		}
2492 
2493 
2494 		/* get the next available iag number */
2495 		iagno = imap->im_nextiag;
2496 
2497 		/* make sure that we have not exceeded the maximum inode
2498 		 * number limit.
2499 		 */
2500 		if (iagno > (MAXIAGS - 1)) {
2501 			/* release the inode map lock */
2502 			IWRITE_UNLOCK(ipimap);
2503 
2504 			rc = -ENOSPC;
2505 			goto out;
2506 		}
2507 
2508 		/*
2509 		 * synchronously append new iag page.
2510 		 */
2511 		/* determine the logical address of iag page to append */
2512 		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2513 
2514 		/* Allocate extent for new iag page */
2515 		xlen = sbi->nbperpage;
2516 		if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2517 			/* release the inode map lock */
2518 			IWRITE_UNLOCK(ipimap);
2519 
2520 			goto out;
2521 		}
2522 
2523 		/*
2524 		 * start transaction of update of the inode map
2525 		 * addressing structure pointing to the new iag page;
2526 		 */
2527 		tid = txBegin(sb, COMMIT_FORCE);
2528 		mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2529 
2530 		/* update the inode map addressing structure to point to it */
2531 		if ((rc =
2532 		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2533 			txEnd(tid);
2534 			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2535 			/* Free the blocks allocated for the iag since it was
2536 			 * not successfully added to the inode map
2537 			 */
2538 			dbFree(ipimap, xaddr, (s64) xlen);
2539 
2540 			/* release the inode map lock */
2541 			IWRITE_UNLOCK(ipimap);
2542 
2543 			goto out;
2544 		}
2545 
2546 		/* update the inode map's inode to reflect the extension */
2547 		ipimap->i_size += PSIZE;
2548 		inode_add_bytes(ipimap, PSIZE);
2549 
2550 		/* assign a buffer for the page */
2551 		mp = get_metapage(ipimap, blkno, PSIZE, 0);
2552 		if (!mp) {
2553 			/*
2554 			 * This is very unlikely since we just created the
2555 			 * extent, but let's try to handle it correctly
2556 			 */
2557 			xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2558 				   COMMIT_PWMAP);
2559 
2560 			txAbort(tid, 0);
2561 			txEnd(tid);
2562 			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2563 
2564 			/* release the inode map lock */
2565 			IWRITE_UNLOCK(ipimap);
2566 
2567 			rc = -EIO;
2568 			goto out;
2569 		}
2570 		iagp = (struct iag *) mp->data;
2571 
2572 		/* init the iag */
2573 		memset(iagp, 0, sizeof(struct iag));
2574 		iagp->iagnum = cpu_to_le32(iagno);
2575 		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2576 		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2577 		iagp->iagfree = cpu_to_le32(-1);
2578 		iagp->nfreeinos = 0;
2579 		iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2580 
2581 		/* initialize the free inode summary map (free extent
2582 		 * summary map initialization handled by bzero).
2583 		 */
2584 		for (i = 0; i < SMAPSZ; i++)
2585 			iagp->inosmap[i] = cpu_to_le32(ONES);
2586 
2587 		/*
2588 		 * Write and sync the metapage
2589 		 */
2590 		flush_metapage(mp);
2591 
2592 		/*
2593 		 * txCommit(COMMIT_FORCE) will synchronously write address
2594 		 * index pages and inode after commit in careful update order
2595 		 * of address index pages (right to left, bottom up);
2596 		 */
2597 		iplist[0] = ipimap;
2598 		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2599 
2600 		txEnd(tid);
2601 		mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2602 
2603 		duplicateIXtree(sb, blkno, xlen, &xaddr);
2604 
2605 		/* update the next available iag number */
2606 		imap->im_nextiag += 1;
2607 
2608 		/* Add the iag to the iag free list so we don't lose the iag
2609 		 * if a failure happens now.
2610 		 */
2611 		imap->im_freeiag = iagno;
2612 
2613 		/* Until we have logredo working, we want the imap inode &
2614 		 * control page to be up to date.
2615 		 */
2616 		diSync(ipimap);
2617 
2618 		/* release the inode map lock */
2619 		IWRITE_UNLOCK(ipimap);
2620 	}
2621 
2622 	/* obtain read lock on map */
2623 	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2624 
2625 	/* read the iag */
2626 	if ((rc = diIAGRead(imap, iagno, &mp))) {
2627 		IREAD_UNLOCK(ipimap);
2628 		rc = -EIO;
2629 		goto out;
2630 	}
2631 	iagp = (struct iag *) mp->data;
2632 
2633 	/* remove the iag from the iag free list */
2634 	imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2635 	iagp->iagfree = cpu_to_le32(-1);
2636 
2637 	/* set the return iag number and buffer pointer */
2638 	*iagnop = iagno;
2639 	*mpp = mp;
2640 
2641       out:
2642 	/* release the iag free lock */
2643 	IAGFREE_UNLOCK(imap);
2644 
2645 	return (rc);
2646 }
2647 
2648 /*
2649  * NAME:	diIAGRead()
2650  *
2651  * FUNCTION:	get the buffer for the specified iag within a fileset
2652  *		or aggregate inode map.
2653  *
2654  * PARAMETERS:
2655  *	imap	- pointer to inode map control structure.
2656  *	iagno	- iag number.
2657  *	bpp	- point to buffer pointer to be filled in on successful
2658  *		  exit.
2659  *
2660  * SERIALIZATION:
2661  *	must have read lock on imap inode
2662  *	(When called by diExtendFS, the filesystem is quiesced, therefore
2663  *	 the read lock is unnecessary.)
2664  *
2665  * RETURN VALUES:
2666  *	0	- success.
2667  *	-EIO	- i/o error.
2668  */
diIAGRead(struct inomap * imap,int iagno,struct metapage ** mpp)2669 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2670 {
2671 	struct inode *ipimap = imap->im_ipimap;
2672 	s64 blkno;
2673 
2674 	/* compute the logical block number of the iag. */
2675 	blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2676 
2677 	/* read the iag. */
2678 	*mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2679 	if (*mpp == NULL) {
2680 		return -EIO;
2681 	}
2682 
2683 	return (0);
2684 }
2685 
2686 /*
2687  * NAME:	diFindFree()
2688  *
2689  * FUNCTION:	find the first free bit in a word starting at
2690  *		the specified bit position.
2691  *
2692  * PARAMETERS:
2693  *	word	- word to be examined.
2694  *	start	- starting bit position.
2695  *
2696  * RETURN VALUES:
2697  *	bit position of first free bit in the word or 32 if
2698  *	no free bits were found.
2699  */
diFindFree(u32 word,int start)2700 static int diFindFree(u32 word, int start)
2701 {
2702 	int bitno;
2703 	assert(start < 32);
2704 	/* scan the word for the first free bit. */
2705 	for (word <<= start, bitno = start; bitno < 32;
2706 	     bitno++, word <<= 1) {
2707 		if ((word & HIGHORDER) == 0)
2708 			break;
2709 	}
2710 	return (bitno);
2711 }
2712 
2713 /*
2714  * NAME:	diUpdatePMap()
2715  *
2716  * FUNCTION: Update the persistent map in an IAG for the allocation or
2717  *	freeing of the specified inode.
2718  *
2719  * PRE CONDITIONS: Working map has already been updated for allocate.
2720  *
2721  * PARAMETERS:
2722  *	ipimap	- Incore inode map inode
2723  *	inum	- Number of inode to mark in permanent map
2724  *	is_free	- If 'true' indicates inode should be marked freed, otherwise
2725  *		  indicates inode should be marked allocated.
2726  *
2727  * RETURN VALUES:
2728  *		0 for success
2729  */
2730 int
diUpdatePMap(struct inode * ipimap,unsigned long inum,bool is_free,struct tblock * tblk)2731 diUpdatePMap(struct inode *ipimap,
2732 	     unsigned long inum, bool is_free, struct tblock * tblk)
2733 {
2734 	int rc;
2735 	struct iag *iagp;
2736 	struct metapage *mp;
2737 	int iagno, ino, extno, bitno;
2738 	struct inomap *imap;
2739 	u32 mask;
2740 	struct jfs_log *log;
2741 	int lsn, difft, diffp;
2742 	unsigned long flags;
2743 
2744 	imap = JFS_IP(ipimap)->i_imap;
2745 	/* get the iag number containing the inode */
2746 	iagno = INOTOIAG(inum);
2747 	/* make sure that the iag is contained within the map */
2748 	if (iagno >= imap->im_nextiag) {
2749 		jfs_error(ipimap->i_sb, "the iag is outside the map\n");
2750 		return -EIO;
2751 	}
2752 	/* read the iag */
2753 	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2754 	rc = diIAGRead(imap, iagno, &mp);
2755 	IREAD_UNLOCK(ipimap);
2756 	if (rc)
2757 		return (rc);
2758 	metapage_wait_for_io(mp);
2759 	iagp = (struct iag *) mp->data;
2760 	/* get the inode number and extent number of the inode within
2761 	 * the iag and the inode number within the extent.
2762 	 */
2763 	ino = inum & (INOSPERIAG - 1);
2764 	extno = ino >> L2INOSPEREXT;
2765 	bitno = ino & (INOSPEREXT - 1);
2766 	mask = HIGHORDER >> bitno;
2767 	/*
2768 	 * mark the inode free in persistent map:
2769 	 */
2770 	if (is_free) {
2771 		/* The inode should have been allocated both in working
2772 		 * map and in persistent map;
2773 		 * the inode will be freed from working map at the release
2774 		 * of last reference release;
2775 		 */
2776 		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2777 			jfs_error(ipimap->i_sb,
2778 				  "inode %ld not marked as allocated in wmap!\n",
2779 				  inum);
2780 		}
2781 		if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2782 			jfs_error(ipimap->i_sb,
2783 				  "inode %ld not marked as allocated in pmap!\n",
2784 				  inum);
2785 		}
2786 		/* update the bitmap for the extent of the freed inode */
2787 		iagp->pmap[extno] &= cpu_to_le32(~mask);
2788 	}
2789 	/*
2790 	 * mark the inode allocated in persistent map:
2791 	 */
2792 	else {
2793 		/* The inode should be already allocated in the working map
2794 		 * and should be free in persistent map;
2795 		 */
2796 		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2797 			release_metapage(mp);
2798 			jfs_error(ipimap->i_sb,
2799 				  "the inode is not allocated in the working map\n");
2800 			return -EIO;
2801 		}
2802 		if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2803 			release_metapage(mp);
2804 			jfs_error(ipimap->i_sb,
2805 				  "the inode is not free in the persistent map\n");
2806 			return -EIO;
2807 		}
2808 		/* update the bitmap for the extent of the allocated inode */
2809 		iagp->pmap[extno] |= cpu_to_le32(mask);
2810 	}
2811 	/*
2812 	 * update iag lsn
2813 	 */
2814 	lsn = tblk->lsn;
2815 	log = JFS_SBI(tblk->sb)->log;
2816 	LOGSYNC_LOCK(log, flags);
2817 	if (mp->lsn != 0) {
2818 		/* inherit older/smaller lsn */
2819 		logdiff(difft, lsn, log);
2820 		logdiff(diffp, mp->lsn, log);
2821 		if (difft < diffp) {
2822 			mp->lsn = lsn;
2823 			/* move mp after tblock in logsync list */
2824 			list_move(&mp->synclist, &tblk->synclist);
2825 		}
2826 		/* inherit younger/larger clsn */
2827 		assert(mp->clsn);
2828 		logdiff(difft, tblk->clsn, log);
2829 		logdiff(diffp, mp->clsn, log);
2830 		if (difft > diffp)
2831 			mp->clsn = tblk->clsn;
2832 	} else {
2833 		mp->log = log;
2834 		mp->lsn = lsn;
2835 		/* insert mp after tblock in logsync list */
2836 		log->count++;
2837 		list_add(&mp->synclist, &tblk->synclist);
2838 		mp->clsn = tblk->clsn;
2839 	}
2840 	LOGSYNC_UNLOCK(log, flags);
2841 	write_metapage(mp);
2842 	return (0);
2843 }
2844 
2845 /*
2846  *	diExtendFS()
2847  *
2848  * function: update imap for extendfs();
2849  *
2850  * note: AG size has been increased s.t. each k old contiguous AGs are
2851  * coalesced into a new AG;
2852  */
diExtendFS(struct inode * ipimap,struct inode * ipbmap)2853 int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2854 {
2855 	int rc, rcx = 0;
2856 	struct inomap *imap = JFS_IP(ipimap)->i_imap;
2857 	struct iag *iagp = NULL, *hiagp = NULL;
2858 	struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2859 	struct metapage *bp, *hbp;
2860 	int i, n, head;
2861 	int numinos, xnuminos = 0, xnumfree = 0;
2862 	s64 agstart;
2863 
2864 	jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2865 		   imap->im_nextiag, atomic_read(&imap->im_numinos),
2866 		   atomic_read(&imap->im_numfree));
2867 
2868 	/*
2869 	 *	reconstruct imap
2870 	 *
2871 	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2872 	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2873 	 * note: new AG size = old AG size * (2**x).
2874 	 */
2875 
2876 	/* init per AG control information im_agctl[] */
2877 	for (i = 0; i < MAXAG; i++) {
2878 		imap->im_agctl[i].inofree = -1;
2879 		imap->im_agctl[i].extfree = -1;
2880 		imap->im_agctl[i].numinos = 0;	/* number of backed inodes */
2881 		imap->im_agctl[i].numfree = 0;	/* number of free backed inodes */
2882 	}
2883 
2884 	/*
2885 	 *	process each iag page of the map.
2886 	 *
2887 	 * rebuild AG Free Inode List, AG Free Inode Extent List;
2888 	 */
2889 	for (i = 0; i < imap->im_nextiag; i++) {
2890 		if ((rc = diIAGRead(imap, i, &bp))) {
2891 			rcx = rc;
2892 			continue;
2893 		}
2894 		iagp = (struct iag *) bp->data;
2895 		if (le32_to_cpu(iagp->iagnum) != i) {
2896 			release_metapage(bp);
2897 			jfs_error(ipimap->i_sb, "unexpected value of iagnum\n");
2898 			return -EIO;
2899 		}
2900 
2901 		/* leave free iag in the free iag list */
2902 		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2903 			release_metapage(bp);
2904 			continue;
2905 		}
2906 
2907 		agstart = le64_to_cpu(iagp->agstart);
2908 		n = agstart >> mp->db_agl2size;
2909 		iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2910 
2911 		/* compute backed inodes */
2912 		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2913 		    << L2INOSPEREXT;
2914 		if (numinos > 0) {
2915 			/* merge AG backed inodes */
2916 			imap->im_agctl[n].numinos += numinos;
2917 			xnuminos += numinos;
2918 		}
2919 
2920 		/* if any backed free inodes, insert at AG free inode list */
2921 		if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2922 			if ((head = imap->im_agctl[n].inofree) == -1) {
2923 				iagp->inofreefwd = cpu_to_le32(-1);
2924 				iagp->inofreeback = cpu_to_le32(-1);
2925 			} else {
2926 				if ((rc = diIAGRead(imap, head, &hbp))) {
2927 					rcx = rc;
2928 					goto nextiag;
2929 				}
2930 				hiagp = (struct iag *) hbp->data;
2931 				hiagp->inofreeback = iagp->iagnum;
2932 				iagp->inofreefwd = cpu_to_le32(head);
2933 				iagp->inofreeback = cpu_to_le32(-1);
2934 				write_metapage(hbp);
2935 			}
2936 
2937 			imap->im_agctl[n].inofree =
2938 			    le32_to_cpu(iagp->iagnum);
2939 
2940 			/* merge AG backed free inodes */
2941 			imap->im_agctl[n].numfree +=
2942 			    le32_to_cpu(iagp->nfreeinos);
2943 			xnumfree += le32_to_cpu(iagp->nfreeinos);
2944 		}
2945 
2946 		/* if any free extents, insert at AG free extent list */
2947 		if (le32_to_cpu(iagp->nfreeexts) > 0) {
2948 			if ((head = imap->im_agctl[n].extfree) == -1) {
2949 				iagp->extfreefwd = cpu_to_le32(-1);
2950 				iagp->extfreeback = cpu_to_le32(-1);
2951 			} else {
2952 				if ((rc = diIAGRead(imap, head, &hbp))) {
2953 					rcx = rc;
2954 					goto nextiag;
2955 				}
2956 				hiagp = (struct iag *) hbp->data;
2957 				hiagp->extfreeback = iagp->iagnum;
2958 				iagp->extfreefwd = cpu_to_le32(head);
2959 				iagp->extfreeback = cpu_to_le32(-1);
2960 				write_metapage(hbp);
2961 			}
2962 
2963 			imap->im_agctl[n].extfree =
2964 			    le32_to_cpu(iagp->iagnum);
2965 		}
2966 
2967 	      nextiag:
2968 		write_metapage(bp);
2969 	}
2970 
2971 	if (xnuminos != atomic_read(&imap->im_numinos) ||
2972 	    xnumfree != atomic_read(&imap->im_numfree)) {
2973 		jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n");
2974 		return -EIO;
2975 	}
2976 
2977 	return rcx;
2978 }
2979 
2980 
2981 /*
2982  *	duplicateIXtree()
2983  *
2984  * serialization: IWRITE_LOCK held on entry/exit
2985  *
2986  * note: shadow page with regular inode (rel.2);
2987  */
duplicateIXtree(struct super_block * sb,s64 blkno,int xlen,s64 * xaddr)2988 static void duplicateIXtree(struct super_block *sb, s64 blkno,
2989 			    int xlen, s64 *xaddr)
2990 {
2991 	struct jfs_superblock *j_sb;
2992 	struct buffer_head *bh;
2993 	struct inode *ip;
2994 	tid_t tid;
2995 
2996 	/* if AIT2 ipmap2 is bad, do not try to update it */
2997 	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
2998 		return;
2999 	ip = diReadSpecial(sb, FILESYSTEM_I, 1);
3000 	if (ip == NULL) {
3001 		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3002 		if (readSuper(sb, &bh))
3003 			return;
3004 		j_sb = (struct jfs_superblock *)bh->b_data;
3005 		j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
3006 
3007 		mark_buffer_dirty(bh);
3008 		sync_dirty_buffer(bh);
3009 		brelse(bh);
3010 		return;
3011 	}
3012 
3013 	/* start transaction */
3014 	tid = txBegin(sb, COMMIT_FORCE);
3015 	/* update the inode map addressing structure to point to it */
3016 	if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3017 		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3018 		txAbort(tid, 1);
3019 		goto cleanup;
3020 
3021 	}
3022 	/* update the inode map's inode to reflect the extension */
3023 	ip->i_size += PSIZE;
3024 	inode_add_bytes(ip, PSIZE);
3025 	txCommit(tid, 1, &ip, COMMIT_FORCE);
3026       cleanup:
3027 	txEnd(tid);
3028 	diFreeSpecial(ip);
3029 }
3030 
3031 /*
3032  * NAME:	copy_from_dinode()
3033  *
3034  * FUNCTION:	Copies inode info from disk inode to in-memory inode
3035  *
3036  * RETURN VALUES:
3037  *	0	- success
3038  *	-ENOMEM	- insufficient memory
3039  */
copy_from_dinode(struct dinode * dip,struct inode * ip)3040 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3041 {
3042 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3043 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3044 
3045 	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3046 	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3047 	jfs_set_inode_flags(ip);
3048 
3049 	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3050 	if (sbi->umask != -1) {
3051 		ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3052 		/* For directories, add x permission if r is allowed by umask */
3053 		if (S_ISDIR(ip->i_mode)) {
3054 			if (ip->i_mode & 0400)
3055 				ip->i_mode |= 0100;
3056 			if (ip->i_mode & 0040)
3057 				ip->i_mode |= 0010;
3058 			if (ip->i_mode & 0004)
3059 				ip->i_mode |= 0001;
3060 		}
3061 	}
3062 	set_nlink(ip, le32_to_cpu(dip->di_nlink));
3063 
3064 	jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3065 	if (!uid_valid(sbi->uid))
3066 		ip->i_uid = jfs_ip->saved_uid;
3067 	else {
3068 		ip->i_uid = sbi->uid;
3069 	}
3070 
3071 	jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3072 	if (!gid_valid(sbi->gid))
3073 		ip->i_gid = jfs_ip->saved_gid;
3074 	else {
3075 		ip->i_gid = sbi->gid;
3076 	}
3077 
3078 	ip->i_size = le64_to_cpu(dip->di_size);
3079 	ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3080 	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3081 	ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3082 	ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3083 	ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
3084 	ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
3085 	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3086 	ip->i_generation = le32_to_cpu(dip->di_gen);
3087 
3088 	jfs_ip->ixpxd = dip->di_ixpxd;	/* in-memory pxd's are little-endian */
3089 	jfs_ip->acl = dip->di_acl;	/* as are dxd's */
3090 	jfs_ip->ea = dip->di_ea;
3091 	jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3092 	jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3093 	jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3094 
3095 	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3096 		jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3097 		ip->i_rdev = new_decode_dev(jfs_ip->dev);
3098 	}
3099 
3100 	if (S_ISDIR(ip->i_mode)) {
3101 		memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384);
3102 	} else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3103 		memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3104 	} else
3105 		memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3106 
3107 	/* Zero the in-memory-only stuff */
3108 	jfs_ip->cflag = 0;
3109 	jfs_ip->btindex = 0;
3110 	jfs_ip->btorder = 0;
3111 	jfs_ip->bxflag = 0;
3112 	jfs_ip->blid = 0;
3113 	jfs_ip->atlhead = 0;
3114 	jfs_ip->atltail = 0;
3115 	jfs_ip->xtlid = 0;
3116 	return (0);
3117 }
3118 
3119 /*
3120  * NAME:	copy_to_dinode()
3121  *
3122  * FUNCTION:	Copies inode info from in-memory inode to disk inode
3123  */
copy_to_dinode(struct dinode * dip,struct inode * ip)3124 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3125 {
3126 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3127 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3128 
3129 	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3130 	dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3131 	dip->di_number = cpu_to_le32(ip->i_ino);
3132 	dip->di_gen = cpu_to_le32(ip->i_generation);
3133 	dip->di_size = cpu_to_le64(ip->i_size);
3134 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3135 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
3136 	if (!uid_valid(sbi->uid))
3137 		dip->di_uid = cpu_to_le32(i_uid_read(ip));
3138 	else
3139 		dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3140 						   jfs_ip->saved_uid));
3141 	if (!gid_valid(sbi->gid))
3142 		dip->di_gid = cpu_to_le32(i_gid_read(ip));
3143 	else
3144 		dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3145 						    jfs_ip->saved_gid));
3146 	/*
3147 	 * mode2 is only needed for storing the higher order bits.
3148 	 * Trust i_mode for the lower order ones
3149 	 */
3150 	if (sbi->umask == -1)
3151 		dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3152 					   ip->i_mode);
3153 	else /* Leave the original permissions alone */
3154 		dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3155 
3156 	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3157 	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3158 	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
3159 	dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
3160 	dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3161 	dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3162 	dip->di_ixpxd = jfs_ip->ixpxd;	/* in-memory pxd's are little-endian */
3163 	dip->di_acl = jfs_ip->acl;	/* as are dxd's */
3164 	dip->di_ea = jfs_ip->ea;
3165 	dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3166 	dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3167 	dip->di_otime.tv_nsec = 0;
3168 	dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3169 	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3170 		dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3171 }
3172