1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_da_format.h"
15 #include "xfs_inode.h"
16 #include "xfs_btree.h"
17 #include "xfs_bmap.h"
18 #include "xfs_alloc.h"
19 #include "xfs_error.h"
20 #include "xfs_fsops.h"
21 #include "xfs_trans.h"
22 #include "xfs_buf_item.h"
23 #include "xfs_log.h"
24 #include "xfs_log_priv.h"
25 #include "xfs_da_btree.h"
26 #include "xfs_dir2.h"
27 #include "xfs_extfree_item.h"
28 #include "xfs_mru_cache.h"
29 #include "xfs_inode_item.h"
30 #include "xfs_icache.h"
31 #include "xfs_trace.h"
32 #include "xfs_icreate_item.h"
33 #include "xfs_filestream.h"
34 #include "xfs_quota.h"
35 #include "xfs_sysfs.h"
36 #include "xfs_ondisk.h"
37 #include "xfs_rmap_item.h"
38 #include "xfs_refcount_item.h"
39 #include "xfs_bmap_item.h"
40 #include "xfs_reflink.h"
41 
42 #include <linux/namei.h>
43 #include <linux/dax.h>
44 #include <linux/init.h>
45 #include <linux/slab.h>
46 #include <linux/mount.h>
47 #include <linux/mempool.h>
48 #include <linux/writeback.h>
49 #include <linux/kthread.h>
50 #include <linux/freezer.h>
51 #include <linux/parser.h>
52 
53 static const struct super_operations xfs_super_operations;
54 struct bio_set xfs_ioend_bioset;
55 
56 static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
57 #ifdef DEBUG
58 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
59 #endif
60 
61 /*
62  * Table driven mount option parser.
63  */
64 enum {
65 	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
66 	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
67 	Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
68 	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
69 	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
70 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
71 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
72 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
73 	Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
74 };
75 
76 static const match_table_t tokens = {
77 	{Opt_logbufs,	"logbufs=%u"},	/* number of XFS log buffers */
78 	{Opt_logbsize,	"logbsize=%s"},	/* size of XFS log buffers */
79 	{Opt_logdev,	"logdev=%s"},	/* log device */
80 	{Opt_rtdev,	"rtdev=%s"},	/* realtime I/O device */
81 	{Opt_biosize,	"biosize=%u"},	/* log2 of preferred buffered io size */
82 	{Opt_wsync,	"wsync"},	/* safe-mode nfs compatible mount */
83 	{Opt_noalign,	"noalign"},	/* turn off stripe alignment */
84 	{Opt_swalloc,	"swalloc"},	/* turn on stripe width allocation */
85 	{Opt_sunit,	"sunit=%u"},	/* data volume stripe unit */
86 	{Opt_swidth,	"swidth=%u"},	/* data volume stripe width */
87 	{Opt_nouuid,	"nouuid"},	/* ignore filesystem UUID */
88 	{Opt_mtpt,	"mtpt"},	/* filesystem mount point */
89 	{Opt_grpid,	"grpid"},	/* group-ID from parent directory */
90 	{Opt_nogrpid,	"nogrpid"},	/* group-ID from current process */
91 	{Opt_bsdgroups,	"bsdgroups"},	/* group-ID from parent directory */
92 	{Opt_sysvgroups,"sysvgroups"},	/* group-ID from current process */
93 	{Opt_allocsize,	"allocsize=%s"},/* preferred allocation size */
94 	{Opt_norecovery,"norecovery"},	/* don't run XFS recovery */
95 	{Opt_inode64,	"inode64"},	/* inodes can be allocated anywhere */
96 	{Opt_inode32,   "inode32"},	/* inode allocation limited to
97 					 * XFS_MAXINUMBER_32 */
98 	{Opt_ikeep,	"ikeep"},	/* do not free empty inode clusters */
99 	{Opt_noikeep,	"noikeep"},	/* free empty inode clusters */
100 	{Opt_largeio,	"largeio"},	/* report large I/O sizes in stat() */
101 	{Opt_nolargeio,	"nolargeio"},	/* do not report large I/O sizes
102 					 * in stat(). */
103 	{Opt_attr2,	"attr2"},	/* do use attr2 attribute format */
104 	{Opt_noattr2,	"noattr2"},	/* do not use attr2 attribute format */
105 	{Opt_filestreams,"filestreams"},/* use filestreams allocator */
106 	{Opt_quota,	"quota"},	/* disk quotas (user) */
107 	{Opt_noquota,	"noquota"},	/* no quotas */
108 	{Opt_usrquota,	"usrquota"},	/* user quota enabled */
109 	{Opt_grpquota,	"grpquota"},	/* group quota enabled */
110 	{Opt_prjquota,	"prjquota"},	/* project quota enabled */
111 	{Opt_uquota,	"uquota"},	/* user quota (IRIX variant) */
112 	{Opt_gquota,	"gquota"},	/* group quota (IRIX variant) */
113 	{Opt_pquota,	"pquota"},	/* project quota (IRIX variant) */
114 	{Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
115 	{Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
116 	{Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
117 	{Opt_qnoenforce, "qnoenforce"},	/* same as uqnoenforce */
118 	{Opt_discard,	"discard"},	/* Discard unused blocks */
119 	{Opt_nodiscard,	"nodiscard"},	/* Do not discard unused blocks */
120 	{Opt_dax,	"dax"},		/* Enable direct access to bdev pages */
121 	{Opt_err,	NULL},
122 };
123 
124 
125 STATIC int
suffix_kstrtoint(const substring_t * s,unsigned int base,int * res)126 suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
127 {
128 	int	last, shift_left_factor = 0, _res;
129 	char	*value;
130 	int	ret = 0;
131 
132 	value = match_strdup(s);
133 	if (!value)
134 		return -ENOMEM;
135 
136 	last = strlen(value) - 1;
137 	if (value[last] == 'K' || value[last] == 'k') {
138 		shift_left_factor = 10;
139 		value[last] = '\0';
140 	}
141 	if (value[last] == 'M' || value[last] == 'm') {
142 		shift_left_factor = 20;
143 		value[last] = '\0';
144 	}
145 	if (value[last] == 'G' || value[last] == 'g') {
146 		shift_left_factor = 30;
147 		value[last] = '\0';
148 	}
149 
150 	if (kstrtoint(value, base, &_res))
151 		ret = -EINVAL;
152 	kfree(value);
153 	*res = _res << shift_left_factor;
154 	return ret;
155 }
156 
157 /*
158  * This function fills in xfs_mount_t fields based on mount args.
159  * Note: the superblock has _not_ yet been read in.
160  *
161  * Note that this function leaks the various device name allocations on
162  * failure.  The caller takes care of them.
163  *
164  * *sb is const because this is also used to test options on the remount
165  * path, and we don't want this to have any side effects at remount time.
166  * Today this function does not change *sb, but just to future-proof...
167  */
168 STATIC int
xfs_parseargs(struct xfs_mount * mp,char * options)169 xfs_parseargs(
170 	struct xfs_mount	*mp,
171 	char			*options)
172 {
173 	const struct super_block *sb = mp->m_super;
174 	char			*p;
175 	substring_t		args[MAX_OPT_ARGS];
176 	int			dsunit = 0;
177 	int			dswidth = 0;
178 	int			iosize = 0;
179 	uint8_t			iosizelog = 0;
180 
181 	/*
182 	 * set up the mount name first so all the errors will refer to the
183 	 * correct device.
184 	 */
185 	mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
186 	if (!mp->m_fsname)
187 		return -ENOMEM;
188 	mp->m_fsname_len = strlen(mp->m_fsname) + 1;
189 
190 	/*
191 	 * Copy binary VFS mount flags we are interested in.
192 	 */
193 	if (sb_rdonly(sb))
194 		mp->m_flags |= XFS_MOUNT_RDONLY;
195 	if (sb->s_flags & SB_DIRSYNC)
196 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
197 	if (sb->s_flags & SB_SYNCHRONOUS)
198 		mp->m_flags |= XFS_MOUNT_WSYNC;
199 
200 	/*
201 	 * Set some default flags that could be cleared by the mount option
202 	 * parsing.
203 	 */
204 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
205 
206 	/*
207 	 * These can be overridden by the mount option parsing.
208 	 */
209 	mp->m_logbufs = -1;
210 	mp->m_logbsize = -1;
211 
212 	if (!options)
213 		goto done;
214 
215 	while ((p = strsep(&options, ",")) != NULL) {
216 		int		token;
217 
218 		if (!*p)
219 			continue;
220 
221 		token = match_token(p, tokens, args);
222 		switch (token) {
223 		case Opt_logbufs:
224 			if (match_int(args, &mp->m_logbufs))
225 				return -EINVAL;
226 			break;
227 		case Opt_logbsize:
228 			if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
229 				return -EINVAL;
230 			break;
231 		case Opt_logdev:
232 			kfree(mp->m_logname);
233 			mp->m_logname = match_strdup(args);
234 			if (!mp->m_logname)
235 				return -ENOMEM;
236 			break;
237 		case Opt_mtpt:
238 			xfs_warn(mp, "%s option not allowed on this system", p);
239 			return -EINVAL;
240 		case Opt_rtdev:
241 			kfree(mp->m_rtname);
242 			mp->m_rtname = match_strdup(args);
243 			if (!mp->m_rtname)
244 				return -ENOMEM;
245 			break;
246 		case Opt_allocsize:
247 		case Opt_biosize:
248 			if (suffix_kstrtoint(args, 10, &iosize))
249 				return -EINVAL;
250 			iosizelog = ffs(iosize) - 1;
251 			break;
252 		case Opt_grpid:
253 		case Opt_bsdgroups:
254 			mp->m_flags |= XFS_MOUNT_GRPID;
255 			break;
256 		case Opt_nogrpid:
257 		case Opt_sysvgroups:
258 			mp->m_flags &= ~XFS_MOUNT_GRPID;
259 			break;
260 		case Opt_wsync:
261 			mp->m_flags |= XFS_MOUNT_WSYNC;
262 			break;
263 		case Opt_norecovery:
264 			mp->m_flags |= XFS_MOUNT_NORECOVERY;
265 			break;
266 		case Opt_noalign:
267 			mp->m_flags |= XFS_MOUNT_NOALIGN;
268 			break;
269 		case Opt_swalloc:
270 			mp->m_flags |= XFS_MOUNT_SWALLOC;
271 			break;
272 		case Opt_sunit:
273 			if (match_int(args, &dsunit))
274 				return -EINVAL;
275 			break;
276 		case Opt_swidth:
277 			if (match_int(args, &dswidth))
278 				return -EINVAL;
279 			break;
280 		case Opt_inode32:
281 			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
282 			break;
283 		case Opt_inode64:
284 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
285 			break;
286 		case Opt_nouuid:
287 			mp->m_flags |= XFS_MOUNT_NOUUID;
288 			break;
289 		case Opt_ikeep:
290 			mp->m_flags |= XFS_MOUNT_IKEEP;
291 			break;
292 		case Opt_noikeep:
293 			mp->m_flags &= ~XFS_MOUNT_IKEEP;
294 			break;
295 		case Opt_largeio:
296 			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
297 			break;
298 		case Opt_nolargeio:
299 			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
300 			break;
301 		case Opt_attr2:
302 			mp->m_flags |= XFS_MOUNT_ATTR2;
303 			break;
304 		case Opt_noattr2:
305 			mp->m_flags &= ~XFS_MOUNT_ATTR2;
306 			mp->m_flags |= XFS_MOUNT_NOATTR2;
307 			break;
308 		case Opt_filestreams:
309 			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
310 			break;
311 		case Opt_noquota:
312 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
313 			mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
314 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
315 			break;
316 		case Opt_quota:
317 		case Opt_uquota:
318 		case Opt_usrquota:
319 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
320 					 XFS_UQUOTA_ENFD);
321 			break;
322 		case Opt_qnoenforce:
323 		case Opt_uqnoenforce:
324 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
325 			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
326 			break;
327 		case Opt_pquota:
328 		case Opt_prjquota:
329 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
330 					 XFS_PQUOTA_ENFD);
331 			break;
332 		case Opt_pqnoenforce:
333 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
334 			mp->m_qflags &= ~XFS_PQUOTA_ENFD;
335 			break;
336 		case Opt_gquota:
337 		case Opt_grpquota:
338 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
339 					 XFS_GQUOTA_ENFD);
340 			break;
341 		case Opt_gqnoenforce:
342 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
343 			mp->m_qflags &= ~XFS_GQUOTA_ENFD;
344 			break;
345 		case Opt_discard:
346 			mp->m_flags |= XFS_MOUNT_DISCARD;
347 			break;
348 		case Opt_nodiscard:
349 			mp->m_flags &= ~XFS_MOUNT_DISCARD;
350 			break;
351 #ifdef CONFIG_FS_DAX
352 		case Opt_dax:
353 			mp->m_flags |= XFS_MOUNT_DAX;
354 			break;
355 #endif
356 		default:
357 			xfs_warn(mp, "unknown mount option [%s].", p);
358 			return -EINVAL;
359 		}
360 	}
361 
362 	/*
363 	 * no recovery flag requires a read-only mount
364 	 */
365 	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
366 	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
367 		xfs_warn(mp, "no-recovery mounts must be read-only.");
368 		return -EINVAL;
369 	}
370 
371 	if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
372 		xfs_warn(mp,
373 	"sunit and swidth options incompatible with the noalign option");
374 		return -EINVAL;
375 	}
376 
377 #ifndef CONFIG_XFS_QUOTA
378 	if (XFS_IS_QUOTA_RUNNING(mp)) {
379 		xfs_warn(mp, "quota support not available in this kernel.");
380 		return -EINVAL;
381 	}
382 #endif
383 
384 	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
385 		xfs_warn(mp, "sunit and swidth must be specified together");
386 		return -EINVAL;
387 	}
388 
389 	if (dsunit && (dswidth % dsunit != 0)) {
390 		xfs_warn(mp,
391 	"stripe width (%d) must be a multiple of the stripe unit (%d)",
392 			dswidth, dsunit);
393 		return -EINVAL;
394 	}
395 
396 done:
397 	if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
398 		/*
399 		 * At this point the superblock has not been read
400 		 * in, therefore we do not know the block size.
401 		 * Before the mount call ends we will convert
402 		 * these to FSBs.
403 		 */
404 		mp->m_dalign = dsunit;
405 		mp->m_swidth = dswidth;
406 	}
407 
408 	if (mp->m_logbufs != -1 &&
409 	    mp->m_logbufs != 0 &&
410 	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
411 	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
412 		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
413 			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
414 		return -EINVAL;
415 	}
416 	if (mp->m_logbsize != -1 &&
417 	    mp->m_logbsize !=  0 &&
418 	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
419 	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
420 	     !is_power_of_2(mp->m_logbsize))) {
421 		xfs_warn(mp,
422 			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
423 			mp->m_logbsize);
424 		return -EINVAL;
425 	}
426 
427 	if (iosizelog) {
428 		if (iosizelog > XFS_MAX_IO_LOG ||
429 		    iosizelog < XFS_MIN_IO_LOG) {
430 			xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
431 				iosizelog, XFS_MIN_IO_LOG,
432 				XFS_MAX_IO_LOG);
433 			return -EINVAL;
434 		}
435 
436 		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
437 		mp->m_readio_log = iosizelog;
438 		mp->m_writeio_log = iosizelog;
439 	}
440 
441 	return 0;
442 }
443 
444 struct proc_xfs_info {
445 	uint64_t	flag;
446 	char		*str;
447 };
448 
449 STATIC int
xfs_showargs(struct xfs_mount * mp,struct seq_file * m)450 xfs_showargs(
451 	struct xfs_mount	*mp,
452 	struct seq_file		*m)
453 {
454 	static struct proc_xfs_info xfs_info_set[] = {
455 		/* the few simple ones we can get from the mount struct */
456 		{ XFS_MOUNT_IKEEP,		",ikeep" },
457 		{ XFS_MOUNT_WSYNC,		",wsync" },
458 		{ XFS_MOUNT_NOALIGN,		",noalign" },
459 		{ XFS_MOUNT_SWALLOC,		",swalloc" },
460 		{ XFS_MOUNT_NOUUID,		",nouuid" },
461 		{ XFS_MOUNT_NORECOVERY,		",norecovery" },
462 		{ XFS_MOUNT_ATTR2,		",attr2" },
463 		{ XFS_MOUNT_FILESTREAMS,	",filestreams" },
464 		{ XFS_MOUNT_GRPID,		",grpid" },
465 		{ XFS_MOUNT_DISCARD,		",discard" },
466 		{ XFS_MOUNT_SMALL_INUMS,	",inode32" },
467 		{ XFS_MOUNT_DAX,		",dax" },
468 		{ 0, NULL }
469 	};
470 	static struct proc_xfs_info xfs_info_unset[] = {
471 		/* the few simple ones we can get from the mount struct */
472 		{ XFS_MOUNT_COMPAT_IOSIZE,	",largeio" },
473 		{ XFS_MOUNT_SMALL_INUMS,	",inode64" },
474 		{ 0, NULL }
475 	};
476 	struct proc_xfs_info	*xfs_infop;
477 
478 	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
479 		if (mp->m_flags & xfs_infop->flag)
480 			seq_puts(m, xfs_infop->str);
481 	}
482 	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
483 		if (!(mp->m_flags & xfs_infop->flag))
484 			seq_puts(m, xfs_infop->str);
485 	}
486 
487 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
488 		seq_printf(m, ",allocsize=%dk",
489 				(int)(1 << mp->m_writeio_log) >> 10);
490 
491 	if (mp->m_logbufs > 0)
492 		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
493 	if (mp->m_logbsize > 0)
494 		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
495 
496 	if (mp->m_logname)
497 		seq_show_option(m, "logdev", mp->m_logname);
498 	if (mp->m_rtname)
499 		seq_show_option(m, "rtdev", mp->m_rtname);
500 
501 	if (mp->m_dalign > 0)
502 		seq_printf(m, ",sunit=%d",
503 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
504 	if (mp->m_swidth > 0)
505 		seq_printf(m, ",swidth=%d",
506 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
507 
508 	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
509 		seq_puts(m, ",usrquota");
510 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
511 		seq_puts(m, ",uqnoenforce");
512 
513 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
514 		if (mp->m_qflags & XFS_PQUOTA_ENFD)
515 			seq_puts(m, ",prjquota");
516 		else
517 			seq_puts(m, ",pqnoenforce");
518 	}
519 	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
520 		if (mp->m_qflags & XFS_GQUOTA_ENFD)
521 			seq_puts(m, ",grpquota");
522 		else
523 			seq_puts(m, ",gqnoenforce");
524 	}
525 
526 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
527 		seq_puts(m, ",noquota");
528 
529 	return 0;
530 }
531 static uint64_t
xfs_max_file_offset(unsigned int blockshift)532 xfs_max_file_offset(
533 	unsigned int		blockshift)
534 {
535 	unsigned int		pagefactor = 1;
536 	unsigned int		bitshift = BITS_PER_LONG - 1;
537 
538 	/* Figure out maximum filesize, on Linux this can depend on
539 	 * the filesystem blocksize (on 32 bit platforms).
540 	 * __block_write_begin does this in an [unsigned] long...
541 	 *      page->index << (PAGE_SHIFT - bbits)
542 	 * So, for page sized blocks (4K on 32 bit platforms),
543 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
544 	 *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
545 	 * but for smaller blocksizes it is less (bbits = log2 bsize).
546 	 * Note1: get_block_t takes a long (implicit cast from above)
547 	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
548 	 * can optionally convert the [unsigned] long from above into
549 	 * an [unsigned] long long.
550 	 */
551 
552 #if BITS_PER_LONG == 32
553 # if defined(CONFIG_LBDAF)
554 	ASSERT(sizeof(sector_t) == 8);
555 	pagefactor = PAGE_SIZE;
556 	bitshift = BITS_PER_LONG;
557 # else
558 	pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
559 # endif
560 #endif
561 
562 	return (((uint64_t)pagefactor) << bitshift) - 1;
563 }
564 
565 /*
566  * Set parameters for inode allocation heuristics, taking into account
567  * filesystem size and inode32/inode64 mount options; i.e. specifically
568  * whether or not XFS_MOUNT_SMALL_INUMS is set.
569  *
570  * Inode allocation patterns are altered only if inode32 is requested
571  * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
572  * If altered, XFS_MOUNT_32BITINODES is set as well.
573  *
574  * An agcount independent of that in the mount structure is provided
575  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
576  * to the potentially higher ag count.
577  *
578  * Returns the maximum AG index which may contain inodes.
579  */
580 xfs_agnumber_t
xfs_set_inode_alloc(struct xfs_mount * mp,xfs_agnumber_t agcount)581 xfs_set_inode_alloc(
582 	struct xfs_mount *mp,
583 	xfs_agnumber_t	agcount)
584 {
585 	xfs_agnumber_t	index;
586 	xfs_agnumber_t	maxagi = 0;
587 	xfs_sb_t	*sbp = &mp->m_sb;
588 	xfs_agnumber_t	max_metadata;
589 	xfs_agino_t	agino;
590 	xfs_ino_t	ino;
591 
592 	/*
593 	 * Calculate how much should be reserved for inodes to meet
594 	 * the max inode percentage.  Used only for inode32.
595 	 */
596 	if (mp->m_maxicount) {
597 		uint64_t	icount;
598 
599 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
600 		do_div(icount, 100);
601 		icount += sbp->sb_agblocks - 1;
602 		do_div(icount, sbp->sb_agblocks);
603 		max_metadata = icount;
604 	} else {
605 		max_metadata = agcount;
606 	}
607 
608 	/* Get the last possible inode in the filesystem */
609 	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
610 	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
611 
612 	/*
613 	 * If user asked for no more than 32-bit inodes, and the fs is
614 	 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
615 	 * the allocator to accommodate the request.
616 	 */
617 	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
618 		mp->m_flags |= XFS_MOUNT_32BITINODES;
619 	else
620 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
621 
622 	for (index = 0; index < agcount; index++) {
623 		struct xfs_perag	*pag;
624 
625 		ino = XFS_AGINO_TO_INO(mp, index, agino);
626 
627 		pag = xfs_perag_get(mp, index);
628 
629 		if (mp->m_flags & XFS_MOUNT_32BITINODES) {
630 			if (ino > XFS_MAXINUMBER_32) {
631 				pag->pagi_inodeok = 0;
632 				pag->pagf_metadata = 0;
633 			} else {
634 				pag->pagi_inodeok = 1;
635 				maxagi++;
636 				if (index < max_metadata)
637 					pag->pagf_metadata = 1;
638 				else
639 					pag->pagf_metadata = 0;
640 			}
641 		} else {
642 			pag->pagi_inodeok = 1;
643 			pag->pagf_metadata = 0;
644 		}
645 
646 		xfs_perag_put(pag);
647 	}
648 
649 	return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
650 }
651 
652 STATIC int
xfs_blkdev_get(xfs_mount_t * mp,const char * name,struct block_device ** bdevp)653 xfs_blkdev_get(
654 	xfs_mount_t		*mp,
655 	const char		*name,
656 	struct block_device	**bdevp)
657 {
658 	int			error = 0;
659 
660 	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
661 				    mp);
662 	if (IS_ERR(*bdevp)) {
663 		error = PTR_ERR(*bdevp);
664 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
665 	}
666 
667 	return error;
668 }
669 
670 STATIC void
xfs_blkdev_put(struct block_device * bdev)671 xfs_blkdev_put(
672 	struct block_device	*bdev)
673 {
674 	if (bdev)
675 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
676 }
677 
678 void
xfs_blkdev_issue_flush(xfs_buftarg_t * buftarg)679 xfs_blkdev_issue_flush(
680 	xfs_buftarg_t		*buftarg)
681 {
682 	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
683 }
684 
685 STATIC void
xfs_close_devices(struct xfs_mount * mp)686 xfs_close_devices(
687 	struct xfs_mount	*mp)
688 {
689 	struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev;
690 
691 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
692 		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
693 		struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
694 
695 		xfs_free_buftarg(mp->m_logdev_targp);
696 		xfs_blkdev_put(logdev);
697 		fs_put_dax(dax_logdev);
698 	}
699 	if (mp->m_rtdev_targp) {
700 		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
701 		struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
702 
703 		xfs_free_buftarg(mp->m_rtdev_targp);
704 		xfs_blkdev_put(rtdev);
705 		fs_put_dax(dax_rtdev);
706 	}
707 	xfs_free_buftarg(mp->m_ddev_targp);
708 	fs_put_dax(dax_ddev);
709 }
710 
711 /*
712  * The file system configurations are:
713  *	(1) device (partition) with data and internal log
714  *	(2) logical volume with data and log subvolumes.
715  *	(3) logical volume with data, log, and realtime subvolumes.
716  *
717  * We only have to handle opening the log and realtime volumes here if
718  * they are present.  The data subvolume has already been opened by
719  * get_sb_bdev() and is stored in sb->s_bdev.
720  */
721 STATIC int
xfs_open_devices(struct xfs_mount * mp)722 xfs_open_devices(
723 	struct xfs_mount	*mp)
724 {
725 	struct block_device	*ddev = mp->m_super->s_bdev;
726 	struct dax_device	*dax_ddev = fs_dax_get_by_bdev(ddev);
727 	struct dax_device	*dax_logdev = NULL, *dax_rtdev = NULL;
728 	struct block_device	*logdev = NULL, *rtdev = NULL;
729 	int			error;
730 
731 	/*
732 	 * Open real time and log devices - order is important.
733 	 */
734 	if (mp->m_logname) {
735 		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
736 		if (error)
737 			goto out;
738 		dax_logdev = fs_dax_get_by_bdev(logdev);
739 	}
740 
741 	if (mp->m_rtname) {
742 		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
743 		if (error)
744 			goto out_close_logdev;
745 
746 		if (rtdev == ddev || rtdev == logdev) {
747 			xfs_warn(mp,
748 	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
749 			error = -EINVAL;
750 			goto out_close_rtdev;
751 		}
752 		dax_rtdev = fs_dax_get_by_bdev(rtdev);
753 	}
754 
755 	/*
756 	 * Setup xfs_mount buffer target pointers
757 	 */
758 	error = -ENOMEM;
759 	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev);
760 	if (!mp->m_ddev_targp)
761 		goto out_close_rtdev;
762 
763 	if (rtdev) {
764 		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev);
765 		if (!mp->m_rtdev_targp)
766 			goto out_free_ddev_targ;
767 	}
768 
769 	if (logdev && logdev != ddev) {
770 		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev);
771 		if (!mp->m_logdev_targp)
772 			goto out_free_rtdev_targ;
773 	} else {
774 		mp->m_logdev_targp = mp->m_ddev_targp;
775 	}
776 
777 	return 0;
778 
779  out_free_rtdev_targ:
780 	if (mp->m_rtdev_targp)
781 		xfs_free_buftarg(mp->m_rtdev_targp);
782  out_free_ddev_targ:
783 	xfs_free_buftarg(mp->m_ddev_targp);
784  out_close_rtdev:
785 	xfs_blkdev_put(rtdev);
786 	fs_put_dax(dax_rtdev);
787  out_close_logdev:
788 	if (logdev && logdev != ddev) {
789 		xfs_blkdev_put(logdev);
790 		fs_put_dax(dax_logdev);
791 	}
792  out:
793 	fs_put_dax(dax_ddev);
794 	return error;
795 }
796 
797 /*
798  * Setup xfs_mount buffer target pointers based on superblock
799  */
800 STATIC int
xfs_setup_devices(struct xfs_mount * mp)801 xfs_setup_devices(
802 	struct xfs_mount	*mp)
803 {
804 	int			error;
805 
806 	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
807 	if (error)
808 		return error;
809 
810 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
811 		unsigned int	log_sector_size = BBSIZE;
812 
813 		if (xfs_sb_version_hassector(&mp->m_sb))
814 			log_sector_size = mp->m_sb.sb_logsectsize;
815 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
816 					    log_sector_size);
817 		if (error)
818 			return error;
819 	}
820 	if (mp->m_rtdev_targp) {
821 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
822 					    mp->m_sb.sb_sectsize);
823 		if (error)
824 			return error;
825 	}
826 
827 	return 0;
828 }
829 
830 STATIC int
xfs_init_mount_workqueues(struct xfs_mount * mp)831 xfs_init_mount_workqueues(
832 	struct xfs_mount	*mp)
833 {
834 	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
835 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
836 	if (!mp->m_buf_workqueue)
837 		goto out;
838 
839 	mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
840 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
841 	if (!mp->m_data_workqueue)
842 		goto out_destroy_buf;
843 
844 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
845 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
846 	if (!mp->m_unwritten_workqueue)
847 		goto out_destroy_data_iodone_queue;
848 
849 	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
850 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
851 	if (!mp->m_cil_workqueue)
852 		goto out_destroy_unwritten;
853 
854 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
855 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
856 	if (!mp->m_reclaim_workqueue)
857 		goto out_destroy_cil;
858 
859 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
860 			WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
861 			mp->m_fsname);
862 	if (!mp->m_log_workqueue)
863 		goto out_destroy_reclaim;
864 
865 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
866 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
867 	if (!mp->m_eofblocks_workqueue)
868 		goto out_destroy_log;
869 
870 	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
871 					       mp->m_fsname);
872 	if (!mp->m_sync_workqueue)
873 		goto out_destroy_eofb;
874 
875 	return 0;
876 
877 out_destroy_eofb:
878 	destroy_workqueue(mp->m_eofblocks_workqueue);
879 out_destroy_log:
880 	destroy_workqueue(mp->m_log_workqueue);
881 out_destroy_reclaim:
882 	destroy_workqueue(mp->m_reclaim_workqueue);
883 out_destroy_cil:
884 	destroy_workqueue(mp->m_cil_workqueue);
885 out_destroy_unwritten:
886 	destroy_workqueue(mp->m_unwritten_workqueue);
887 out_destroy_data_iodone_queue:
888 	destroy_workqueue(mp->m_data_workqueue);
889 out_destroy_buf:
890 	destroy_workqueue(mp->m_buf_workqueue);
891 out:
892 	return -ENOMEM;
893 }
894 
895 STATIC void
xfs_destroy_mount_workqueues(struct xfs_mount * mp)896 xfs_destroy_mount_workqueues(
897 	struct xfs_mount	*mp)
898 {
899 	destroy_workqueue(mp->m_sync_workqueue);
900 	destroy_workqueue(mp->m_eofblocks_workqueue);
901 	destroy_workqueue(mp->m_log_workqueue);
902 	destroy_workqueue(mp->m_reclaim_workqueue);
903 	destroy_workqueue(mp->m_cil_workqueue);
904 	destroy_workqueue(mp->m_data_workqueue);
905 	destroy_workqueue(mp->m_unwritten_workqueue);
906 	destroy_workqueue(mp->m_buf_workqueue);
907 }
908 
909 /*
910  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
911  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
912  * for IO to complete so that we effectively throttle multiple callers to the
913  * rate at which IO is completing.
914  */
915 void
xfs_flush_inodes(struct xfs_mount * mp)916 xfs_flush_inodes(
917 	struct xfs_mount	*mp)
918 {
919 	struct super_block	*sb = mp->m_super;
920 
921 	if (down_read_trylock(&sb->s_umount)) {
922 		sync_inodes_sb(sb);
923 		up_read(&sb->s_umount);
924 	}
925 }
926 
927 /* Catch misguided souls that try to use this interface on XFS */
928 STATIC struct inode *
xfs_fs_alloc_inode(struct super_block * sb)929 xfs_fs_alloc_inode(
930 	struct super_block	*sb)
931 {
932 	BUG();
933 	return NULL;
934 }
935 
936 /*
937  * Now that the generic code is guaranteed not to be accessing
938  * the linux inode, we can inactivate and reclaim the inode.
939  */
940 STATIC void
xfs_fs_destroy_inode(struct inode * inode)941 xfs_fs_destroy_inode(
942 	struct inode		*inode)
943 {
944 	struct xfs_inode	*ip = XFS_I(inode);
945 
946 	trace_xfs_destroy_inode(ip);
947 
948 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
949 	XFS_STATS_INC(ip->i_mount, vn_rele);
950 	XFS_STATS_INC(ip->i_mount, vn_remove);
951 
952 	xfs_inactive(ip);
953 
954 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
955 	XFS_STATS_INC(ip->i_mount, vn_reclaim);
956 
957 	/*
958 	 * We should never get here with one of the reclaim flags already set.
959 	 */
960 	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
961 	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
962 
963 	/*
964 	 * We always use background reclaim here because even if the
965 	 * inode is clean, it still may be under IO and hence we have
966 	 * to take the flush lock. The background reclaim path handles
967 	 * this more efficiently than we can here, so simply let background
968 	 * reclaim tear down all inodes.
969 	 */
970 	xfs_inode_set_reclaim_tag(ip);
971 }
972 
973 static void
xfs_fs_dirty_inode(struct inode * inode,int flag)974 xfs_fs_dirty_inode(
975 	struct inode			*inode,
976 	int				flag)
977 {
978 	struct xfs_inode		*ip = XFS_I(inode);
979 	struct xfs_mount		*mp = ip->i_mount;
980 	struct xfs_trans		*tp;
981 
982 	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
983 		return;
984 	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
985 		return;
986 
987 	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
988 		return;
989 	xfs_ilock(ip, XFS_ILOCK_EXCL);
990 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
991 	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
992 	xfs_trans_commit(tp);
993 }
994 
995 /*
996  * Slab object creation initialisation for the XFS inode.
997  * This covers only the idempotent fields in the XFS inode;
998  * all other fields need to be initialised on allocation
999  * from the slab. This avoids the need to repeatedly initialise
1000  * fields in the xfs inode that left in the initialise state
1001  * when freeing the inode.
1002  */
1003 STATIC void
xfs_fs_inode_init_once(void * inode)1004 xfs_fs_inode_init_once(
1005 	void			*inode)
1006 {
1007 	struct xfs_inode	*ip = inode;
1008 
1009 	memset(ip, 0, sizeof(struct xfs_inode));
1010 
1011 	/* vfs inode */
1012 	inode_init_once(VFS_I(ip));
1013 
1014 	/* xfs inode */
1015 	atomic_set(&ip->i_pincount, 0);
1016 	spin_lock_init(&ip->i_flags_lock);
1017 
1018 	mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1019 		     "xfsino", ip->i_ino);
1020 	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
1021 		     "xfsino", ip->i_ino);
1022 }
1023 
1024 /*
1025  * We do an unlocked check for XFS_IDONTCACHE here because we are already
1026  * serialised against cache hits here via the inode->i_lock and igrab() in
1027  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
1028  * racing with us, and it avoids needing to grab a spinlock here for every inode
1029  * we drop the final reference on.
1030  */
1031 STATIC int
xfs_fs_drop_inode(struct inode * inode)1032 xfs_fs_drop_inode(
1033 	struct inode		*inode)
1034 {
1035 	struct xfs_inode	*ip = XFS_I(inode);
1036 
1037 	/*
1038 	 * If this unlinked inode is in the middle of recovery, don't
1039 	 * drop the inode just yet; log recovery will take care of
1040 	 * that.  See the comment for this inode flag.
1041 	 */
1042 	if (ip->i_flags & XFS_IRECOVERY) {
1043 		ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED);
1044 		return 0;
1045 	}
1046 
1047 	return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
1048 }
1049 
1050 STATIC void
xfs_free_fsname(struct xfs_mount * mp)1051 xfs_free_fsname(
1052 	struct xfs_mount	*mp)
1053 {
1054 	kfree(mp->m_fsname);
1055 	kfree(mp->m_rtname);
1056 	kfree(mp->m_logname);
1057 }
1058 
1059 STATIC int
xfs_fs_sync_fs(struct super_block * sb,int wait)1060 xfs_fs_sync_fs(
1061 	struct super_block	*sb,
1062 	int			wait)
1063 {
1064 	struct xfs_mount	*mp = XFS_M(sb);
1065 
1066 	/*
1067 	 * Doing anything during the async pass would be counterproductive.
1068 	 */
1069 	if (!wait)
1070 		return 0;
1071 
1072 	xfs_log_force(mp, XFS_LOG_SYNC);
1073 	if (laptop_mode) {
1074 		/*
1075 		 * The disk must be active because we're syncing.
1076 		 * We schedule log work now (now that the disk is
1077 		 * active) instead of later (when it might not be).
1078 		 */
1079 		flush_delayed_work(&mp->m_log->l_work);
1080 	}
1081 
1082 	return 0;
1083 }
1084 
1085 STATIC int
xfs_fs_statfs(struct dentry * dentry,struct kstatfs * statp)1086 xfs_fs_statfs(
1087 	struct dentry		*dentry,
1088 	struct kstatfs		*statp)
1089 {
1090 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
1091 	xfs_sb_t		*sbp = &mp->m_sb;
1092 	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
1093 	uint64_t		fakeinos, id;
1094 	uint64_t		icount;
1095 	uint64_t		ifree;
1096 	uint64_t		fdblocks;
1097 	xfs_extlen_t		lsize;
1098 	int64_t			ffree;
1099 
1100 	statp->f_type = XFS_SB_MAGIC;
1101 	statp->f_namelen = MAXNAMELEN - 1;
1102 
1103 	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
1104 	statp->f_fsid.val[0] = (u32)id;
1105 	statp->f_fsid.val[1] = (u32)(id >> 32);
1106 
1107 	icount = percpu_counter_sum(&mp->m_icount);
1108 	ifree = percpu_counter_sum(&mp->m_ifree);
1109 	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1110 
1111 	spin_lock(&mp->m_sb_lock);
1112 	statp->f_bsize = sbp->sb_blocksize;
1113 	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1114 	statp->f_blocks = sbp->sb_dblocks - lsize;
1115 	spin_unlock(&mp->m_sb_lock);
1116 
1117 	statp->f_bfree = fdblocks - mp->m_alloc_set_aside;
1118 	statp->f_bavail = statp->f_bfree;
1119 
1120 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
1121 	statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1122 	if (mp->m_maxicount)
1123 		statp->f_files = min_t(typeof(statp->f_files),
1124 					statp->f_files,
1125 					mp->m_maxicount);
1126 
1127 	/* If sb_icount overshot maxicount, report actual allocation */
1128 	statp->f_files = max_t(typeof(statp->f_files),
1129 					statp->f_files,
1130 					sbp->sb_icount);
1131 
1132 	/* make sure statp->f_ffree does not underflow */
1133 	ffree = statp->f_files - (icount - ifree);
1134 	statp->f_ffree = max_t(int64_t, ffree, 0);
1135 
1136 
1137 	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1138 	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
1139 			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
1140 		xfs_qm_statvfs(ip, statp);
1141 
1142 	if (XFS_IS_REALTIME_MOUNT(mp) &&
1143 	    (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
1144 		statp->f_blocks = sbp->sb_rblocks;
1145 		statp->f_bavail = statp->f_bfree =
1146 			sbp->sb_frextents * sbp->sb_rextsize;
1147 	}
1148 
1149 	return 0;
1150 }
1151 
1152 STATIC void
xfs_save_resvblks(struct xfs_mount * mp)1153 xfs_save_resvblks(struct xfs_mount *mp)
1154 {
1155 	uint64_t resblks = 0;
1156 
1157 	mp->m_resblks_save = mp->m_resblks;
1158 	xfs_reserve_blocks(mp, &resblks, NULL);
1159 }
1160 
1161 STATIC void
xfs_restore_resvblks(struct xfs_mount * mp)1162 xfs_restore_resvblks(struct xfs_mount *mp)
1163 {
1164 	uint64_t resblks;
1165 
1166 	if (mp->m_resblks_save) {
1167 		resblks = mp->m_resblks_save;
1168 		mp->m_resblks_save = 0;
1169 	} else
1170 		resblks = xfs_default_resblks(mp);
1171 
1172 	xfs_reserve_blocks(mp, &resblks, NULL);
1173 }
1174 
1175 /*
1176  * Trigger writeback of all the dirty metadata in the file system.
1177  *
1178  * This ensures that the metadata is written to their location on disk rather
1179  * than just existing in transactions in the log. This means after a quiesce
1180  * there is no log replay required to write the inodes to disk - this is the
1181  * primary difference between a sync and a quiesce.
1182  *
1183  * Note: xfs_log_quiesce() stops background log work - the callers must ensure
1184  * it is started again when appropriate.
1185  */
1186 void
xfs_quiesce_attr(struct xfs_mount * mp)1187 xfs_quiesce_attr(
1188 	struct xfs_mount	*mp)
1189 {
1190 	int	error = 0;
1191 
1192 	/* wait for all modifications to complete */
1193 	while (atomic_read(&mp->m_active_trans) > 0)
1194 		delay(100);
1195 
1196 	/* force the log to unpin objects from the now complete transactions */
1197 	xfs_log_force(mp, XFS_LOG_SYNC);
1198 
1199 	/* reclaim inodes to do any IO before the freeze completes */
1200 	xfs_reclaim_inodes(mp, 0);
1201 	xfs_reclaim_inodes(mp, SYNC_WAIT);
1202 
1203 	/* Push the superblock and write an unmount record */
1204 	error = xfs_log_sbcount(mp);
1205 	if (error)
1206 		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
1207 				"Frozen image may not be consistent.");
1208 	/*
1209 	 * Just warn here till VFS can correctly support
1210 	 * read-only remount without racing.
1211 	 */
1212 	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
1213 
1214 	xfs_log_quiesce(mp);
1215 }
1216 
1217 STATIC int
xfs_test_remount_options(struct super_block * sb,char * options)1218 xfs_test_remount_options(
1219 	struct super_block	*sb,
1220 	char			*options)
1221 {
1222 	int			error = 0;
1223 	struct xfs_mount	*tmp_mp;
1224 
1225 	tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
1226 	if (!tmp_mp)
1227 		return -ENOMEM;
1228 
1229 	tmp_mp->m_super = sb;
1230 	error = xfs_parseargs(tmp_mp, options);
1231 	xfs_free_fsname(tmp_mp);
1232 	kmem_free(tmp_mp);
1233 
1234 	return error;
1235 }
1236 
1237 STATIC int
xfs_fs_remount(struct super_block * sb,int * flags,char * options)1238 xfs_fs_remount(
1239 	struct super_block	*sb,
1240 	int			*flags,
1241 	char			*options)
1242 {
1243 	struct xfs_mount	*mp = XFS_M(sb);
1244 	xfs_sb_t		*sbp = &mp->m_sb;
1245 	substring_t		args[MAX_OPT_ARGS];
1246 	char			*p;
1247 	int			error;
1248 
1249 	/* First, check for complete junk; i.e. invalid options */
1250 	error = xfs_test_remount_options(sb, options);
1251 	if (error)
1252 		return error;
1253 
1254 	sync_filesystem(sb);
1255 	while ((p = strsep(&options, ",")) != NULL) {
1256 		int token;
1257 
1258 		if (!*p)
1259 			continue;
1260 
1261 		token = match_token(p, tokens, args);
1262 		switch (token) {
1263 		case Opt_inode64:
1264 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
1265 			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1266 			break;
1267 		case Opt_inode32:
1268 			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
1269 			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
1270 			break;
1271 		default:
1272 			/*
1273 			 * Logically we would return an error here to prevent
1274 			 * users from believing they might have changed
1275 			 * mount options using remount which can't be changed.
1276 			 *
1277 			 * But unfortunately mount(8) adds all options from
1278 			 * mtab and fstab to the mount arguments in some cases
1279 			 * so we can't blindly reject options, but have to
1280 			 * check for each specified option if it actually
1281 			 * differs from the currently set option and only
1282 			 * reject it if that's the case.
1283 			 *
1284 			 * Until that is implemented we return success for
1285 			 * every remount request, and silently ignore all
1286 			 * options that we can't actually change.
1287 			 */
1288 #if 0
1289 			xfs_info(mp,
1290 		"mount option \"%s\" not supported for remount", p);
1291 			return -EINVAL;
1292 #else
1293 			break;
1294 #endif
1295 		}
1296 	}
1297 
1298 	/* ro -> rw */
1299 	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
1300 		if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1301 			xfs_warn(mp,
1302 		"ro->rw transition prohibited on norecovery mount");
1303 			return -EINVAL;
1304 		}
1305 
1306 		if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1307 		    xfs_sb_has_ro_compat_feature(sbp,
1308 					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1309 			xfs_warn(mp,
1310 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1311 				(sbp->sb_features_ro_compat &
1312 					XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1313 			return -EINVAL;
1314 		}
1315 
1316 		mp->m_flags &= ~XFS_MOUNT_RDONLY;
1317 
1318 		/*
1319 		 * If this is the first remount to writeable state we
1320 		 * might have some superblock changes to update.
1321 		 */
1322 		if (mp->m_update_sb) {
1323 			error = xfs_sync_sb(mp, false);
1324 			if (error) {
1325 				xfs_warn(mp, "failed to write sb changes");
1326 				return error;
1327 			}
1328 			mp->m_update_sb = false;
1329 		}
1330 
1331 		/*
1332 		 * Fill out the reserve pool if it is empty. Use the stashed
1333 		 * value if it is non-zero, otherwise go with the default.
1334 		 */
1335 		xfs_restore_resvblks(mp);
1336 		xfs_log_work_queue(mp);
1337 
1338 		/* Recover any CoW blocks that never got remapped. */
1339 		error = xfs_reflink_recover_cow(mp);
1340 		if (error) {
1341 			xfs_err(mp,
1342 	"Error %d recovering leftover CoW allocations.", error);
1343 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1344 			return error;
1345 		}
1346 		xfs_icache_enable_reclaim(mp);
1347 
1348 		/* Create the per-AG metadata reservation pool .*/
1349 		error = xfs_fs_reserve_ag_blocks(mp);
1350 		if (error && error != -ENOSPC)
1351 			return error;
1352 	}
1353 
1354 	/* rw -> ro */
1355 	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1356 		/*
1357 		 * Cancel background eofb scanning so it cannot race with the
1358 		 * final log force+buftarg wait and deadlock the remount.
1359 		 */
1360 		xfs_icache_disable_reclaim(mp);
1361 
1362 		/* Get rid of any leftover CoW reservations... */
1363 		error = xfs_icache_free_cowblocks(mp, NULL);
1364 		if (error) {
1365 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1366 			return error;
1367 		}
1368 
1369 		/* Free the per-AG metadata reservation pool. */
1370 		error = xfs_fs_unreserve_ag_blocks(mp);
1371 		if (error) {
1372 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1373 			return error;
1374 		}
1375 
1376 		/*
1377 		 * Before we sync the metadata, we need to free up the reserve
1378 		 * block pool so that the used block count in the superblock on
1379 		 * disk is correct at the end of the remount. Stash the current
1380 		 * reserve pool size so that if we get remounted rw, we can
1381 		 * return it to the same size.
1382 		 */
1383 		xfs_save_resvblks(mp);
1384 
1385 		xfs_quiesce_attr(mp);
1386 		mp->m_flags |= XFS_MOUNT_RDONLY;
1387 	}
1388 
1389 	return 0;
1390 }
1391 
1392 /*
1393  * Second stage of a freeze. The data is already frozen so we only
1394  * need to take care of the metadata. Once that's done sync the superblock
1395  * to the log to dirty it in case of a crash while frozen. This ensures that we
1396  * will recover the unlinked inode lists on the next mount.
1397  */
1398 STATIC int
xfs_fs_freeze(struct super_block * sb)1399 xfs_fs_freeze(
1400 	struct super_block	*sb)
1401 {
1402 	struct xfs_mount	*mp = XFS_M(sb);
1403 
1404 	xfs_icache_disable_reclaim(mp);
1405 	xfs_save_resvblks(mp);
1406 	xfs_quiesce_attr(mp);
1407 	return xfs_sync_sb(mp, true);
1408 }
1409 
1410 STATIC int
xfs_fs_unfreeze(struct super_block * sb)1411 xfs_fs_unfreeze(
1412 	struct super_block	*sb)
1413 {
1414 	struct xfs_mount	*mp = XFS_M(sb);
1415 
1416 	xfs_restore_resvblks(mp);
1417 	xfs_log_work_queue(mp);
1418 	xfs_icache_enable_reclaim(mp);
1419 	return 0;
1420 }
1421 
1422 STATIC int
xfs_fs_show_options(struct seq_file * m,struct dentry * root)1423 xfs_fs_show_options(
1424 	struct seq_file		*m,
1425 	struct dentry		*root)
1426 {
1427 	return xfs_showargs(XFS_M(root->d_sb), m);
1428 }
1429 
1430 /*
1431  * This function fills in xfs_mount_t fields based on mount args.
1432  * Note: the superblock _has_ now been read in.
1433  */
1434 STATIC int
xfs_finish_flags(struct xfs_mount * mp)1435 xfs_finish_flags(
1436 	struct xfs_mount	*mp)
1437 {
1438 	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1439 
1440 	/* Fail a mount where the logbuf is smaller than the log stripe */
1441 	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1442 		if (mp->m_logbsize <= 0 &&
1443 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1444 			mp->m_logbsize = mp->m_sb.sb_logsunit;
1445 		} else if (mp->m_logbsize > 0 &&
1446 			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
1447 			xfs_warn(mp,
1448 		"logbuf size must be greater than or equal to log stripe size");
1449 			return -EINVAL;
1450 		}
1451 	} else {
1452 		/* Fail a mount if the logbuf is larger than 32K */
1453 		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1454 			xfs_warn(mp,
1455 		"logbuf size for version 1 logs must be 16K or 32K");
1456 			return -EINVAL;
1457 		}
1458 	}
1459 
1460 	/*
1461 	 * V5 filesystems always use attr2 format for attributes.
1462 	 */
1463 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
1464 	    (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1465 		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1466 			     "attr2 is always enabled for V5 filesystems.");
1467 		return -EINVAL;
1468 	}
1469 
1470 	/*
1471 	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1472 	 * told by noattr2 to turn it off
1473 	 */
1474 	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1475 	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
1476 		mp->m_flags |= XFS_MOUNT_ATTR2;
1477 
1478 	/*
1479 	 * prohibit r/w mounts of read-only filesystems
1480 	 */
1481 	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1482 		xfs_warn(mp,
1483 			"cannot mount a read-only filesystem as read-write");
1484 		return -EROFS;
1485 	}
1486 
1487 	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
1488 	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
1489 	    !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1490 		xfs_warn(mp,
1491 		  "Super block does not support project and group quota together");
1492 		return -EINVAL;
1493 	}
1494 
1495 	return 0;
1496 }
1497 
1498 static int
xfs_init_percpu_counters(struct xfs_mount * mp)1499 xfs_init_percpu_counters(
1500 	struct xfs_mount	*mp)
1501 {
1502 	int		error;
1503 
1504 	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1505 	if (error)
1506 		return -ENOMEM;
1507 
1508 	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1509 	if (error)
1510 		goto free_icount;
1511 
1512 	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1513 	if (error)
1514 		goto free_ifree;
1515 
1516 	return 0;
1517 
1518 free_ifree:
1519 	percpu_counter_destroy(&mp->m_ifree);
1520 free_icount:
1521 	percpu_counter_destroy(&mp->m_icount);
1522 	return -ENOMEM;
1523 }
1524 
1525 void
xfs_reinit_percpu_counters(struct xfs_mount * mp)1526 xfs_reinit_percpu_counters(
1527 	struct xfs_mount	*mp)
1528 {
1529 	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1530 	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1531 	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1532 }
1533 
1534 static void
xfs_destroy_percpu_counters(struct xfs_mount * mp)1535 xfs_destroy_percpu_counters(
1536 	struct xfs_mount	*mp)
1537 {
1538 	percpu_counter_destroy(&mp->m_icount);
1539 	percpu_counter_destroy(&mp->m_ifree);
1540 	percpu_counter_destroy(&mp->m_fdblocks);
1541 }
1542 
1543 static struct xfs_mount *
xfs_mount_alloc(struct super_block * sb)1544 xfs_mount_alloc(
1545 	struct super_block	*sb)
1546 {
1547 	struct xfs_mount	*mp;
1548 
1549 	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1550 	if (!mp)
1551 		return NULL;
1552 
1553 	mp->m_super = sb;
1554 	spin_lock_init(&mp->m_sb_lock);
1555 	spin_lock_init(&mp->m_agirotor_lock);
1556 	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1557 	spin_lock_init(&mp->m_perag_lock);
1558 	mutex_init(&mp->m_growlock);
1559 	atomic_set(&mp->m_active_trans, 0);
1560 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
1561 	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
1562 	INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
1563 	mp->m_kobj.kobject.kset = xfs_kset;
1564 	/*
1565 	 * We don't create the finobt per-ag space reservation until after log
1566 	 * recovery, so we must set this to true so that an ifree transaction
1567 	 * started during log recovery will not depend on space reservations
1568 	 * for finobt expansion.
1569 	 */
1570 	mp->m_finobt_nores = true;
1571 	return mp;
1572 }
1573 
1574 
1575 STATIC int
xfs_fs_fill_super(struct super_block * sb,void * data,int silent)1576 xfs_fs_fill_super(
1577 	struct super_block	*sb,
1578 	void			*data,
1579 	int			silent)
1580 {
1581 	struct inode		*root;
1582 	struct xfs_mount	*mp = NULL;
1583 	int			flags = 0, error = -ENOMEM;
1584 
1585 	/*
1586 	 * allocate mp and do all low-level struct initializations before we
1587 	 * attach it to the super
1588 	 */
1589 	mp = xfs_mount_alloc(sb);
1590 	if (!mp)
1591 		goto out;
1592 	sb->s_fs_info = mp;
1593 
1594 	error = xfs_parseargs(mp, (char *)data);
1595 	if (error)
1596 		goto out_free_fsname;
1597 
1598 	sb_min_blocksize(sb, BBSIZE);
1599 	sb->s_xattr = xfs_xattr_handlers;
1600 	sb->s_export_op = &xfs_export_operations;
1601 #ifdef CONFIG_XFS_QUOTA
1602 	sb->s_qcop = &xfs_quotactl_operations;
1603 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1604 #endif
1605 	sb->s_op = &xfs_super_operations;
1606 
1607 	/*
1608 	 * Delay mount work if the debug hook is set. This is debug
1609 	 * instrumention to coordinate simulation of xfs mount failures with
1610 	 * VFS superblock operations
1611 	 */
1612 	if (xfs_globals.mount_delay) {
1613 		xfs_notice(mp, "Delaying mount for %d seconds.",
1614 			xfs_globals.mount_delay);
1615 		msleep(xfs_globals.mount_delay * 1000);
1616 	}
1617 
1618 	if (silent)
1619 		flags |= XFS_MFSI_QUIET;
1620 
1621 	error = xfs_open_devices(mp);
1622 	if (error)
1623 		goto out_free_fsname;
1624 
1625 	error = xfs_init_mount_workqueues(mp);
1626 	if (error)
1627 		goto out_close_devices;
1628 
1629 	error = xfs_init_percpu_counters(mp);
1630 	if (error)
1631 		goto out_destroy_workqueues;
1632 
1633 	/* Allocate stats memory before we do operations that might use it */
1634 	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1635 	if (!mp->m_stats.xs_stats) {
1636 		error = -ENOMEM;
1637 		goto out_destroy_counters;
1638 	}
1639 
1640 	error = xfs_readsb(mp, flags);
1641 	if (error)
1642 		goto out_free_stats;
1643 
1644 	error = xfs_finish_flags(mp);
1645 	if (error)
1646 		goto out_free_sb;
1647 
1648 	error = xfs_setup_devices(mp);
1649 	if (error)
1650 		goto out_free_sb;
1651 
1652 	error = xfs_filestream_mount(mp);
1653 	if (error)
1654 		goto out_free_sb;
1655 
1656 	/*
1657 	 * we must configure the block size in the superblock before we run the
1658 	 * full mount process as the mount process can lookup and cache inodes.
1659 	 */
1660 	sb->s_magic = XFS_SB_MAGIC;
1661 	sb->s_blocksize = mp->m_sb.sb_blocksize;
1662 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1663 	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1664 	sb->s_max_links = XFS_MAXLINK;
1665 	sb->s_time_gran = 1;
1666 	set_posix_acl_flag(sb);
1667 
1668 	/* version 5 superblocks support inode version counters. */
1669 	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1670 		sb->s_flags |= SB_I_VERSION;
1671 
1672 	if (mp->m_flags & XFS_MOUNT_DAX) {
1673 		bool rtdev_is_dax = false, datadev_is_dax;
1674 
1675 		xfs_warn(mp,
1676 		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1677 
1678 		datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
1679 			sb->s_blocksize);
1680 		if (mp->m_rtdev_targp)
1681 			rtdev_is_dax = bdev_dax_supported(
1682 				mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
1683 		if (!rtdev_is_dax && !datadev_is_dax) {
1684 			xfs_alert(mp,
1685 			"DAX unsupported by block device. Turning off DAX.");
1686 			mp->m_flags &= ~XFS_MOUNT_DAX;
1687 		}
1688 		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1689 			xfs_alert(mp,
1690 		"DAX and reflink cannot be used together!");
1691 			error = -EINVAL;
1692 			goto out_filestream_unmount;
1693 		}
1694 	}
1695 
1696 	if (mp->m_flags & XFS_MOUNT_DISCARD) {
1697 		struct request_queue *q = bdev_get_queue(sb->s_bdev);
1698 
1699 		if (!blk_queue_discard(q)) {
1700 			xfs_warn(mp, "mounting with \"discard\" option, but "
1701 					"the device does not support discard");
1702 			mp->m_flags &= ~XFS_MOUNT_DISCARD;
1703 		}
1704 	}
1705 
1706 	if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1707 		xfs_alert(mp,
1708 	"reflink not compatible with realtime device!");
1709 		error = -EINVAL;
1710 		goto out_filestream_unmount;
1711 	}
1712 
1713 	if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) {
1714 		xfs_alert(mp,
1715 	"reverse mapping btree not compatible with realtime device!");
1716 		error = -EINVAL;
1717 		goto out_filestream_unmount;
1718 	}
1719 
1720 	error = xfs_mountfs(mp);
1721 	if (error)
1722 		goto out_filestream_unmount;
1723 
1724 	root = igrab(VFS_I(mp->m_rootip));
1725 	if (!root) {
1726 		error = -ENOENT;
1727 		goto out_unmount;
1728 	}
1729 	sb->s_root = d_make_root(root);
1730 	if (!sb->s_root) {
1731 		error = -ENOMEM;
1732 		goto out_unmount;
1733 	}
1734 
1735 	return 0;
1736 
1737  out_filestream_unmount:
1738 	xfs_filestream_unmount(mp);
1739  out_free_sb:
1740 	xfs_freesb(mp);
1741  out_free_stats:
1742 	free_percpu(mp->m_stats.xs_stats);
1743  out_destroy_counters:
1744 	xfs_destroy_percpu_counters(mp);
1745  out_destroy_workqueues:
1746 	xfs_destroy_mount_workqueues(mp);
1747  out_close_devices:
1748 	xfs_close_devices(mp);
1749  out_free_fsname:
1750 	sb->s_fs_info = NULL;
1751 	xfs_free_fsname(mp);
1752 	kfree(mp);
1753  out:
1754 	return error;
1755 
1756  out_unmount:
1757 	xfs_filestream_unmount(mp);
1758 	xfs_unmountfs(mp);
1759 	goto out_free_sb;
1760 }
1761 
1762 STATIC void
xfs_fs_put_super(struct super_block * sb)1763 xfs_fs_put_super(
1764 	struct super_block	*sb)
1765 {
1766 	struct xfs_mount	*mp = XFS_M(sb);
1767 
1768 	/* if ->fill_super failed, we have no mount to tear down */
1769 	if (!sb->s_fs_info)
1770 		return;
1771 
1772 	xfs_notice(mp, "Unmounting Filesystem");
1773 	xfs_filestream_unmount(mp);
1774 	xfs_unmountfs(mp);
1775 
1776 	xfs_freesb(mp);
1777 	free_percpu(mp->m_stats.xs_stats);
1778 	xfs_destroy_percpu_counters(mp);
1779 	xfs_destroy_mount_workqueues(mp);
1780 	xfs_close_devices(mp);
1781 
1782 	sb->s_fs_info = NULL;
1783 	xfs_free_fsname(mp);
1784 	kfree(mp);
1785 }
1786 
1787 STATIC struct dentry *
xfs_fs_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * data)1788 xfs_fs_mount(
1789 	struct file_system_type	*fs_type,
1790 	int			flags,
1791 	const char		*dev_name,
1792 	void			*data)
1793 {
1794 	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1795 }
1796 
1797 static long
xfs_fs_nr_cached_objects(struct super_block * sb,struct shrink_control * sc)1798 xfs_fs_nr_cached_objects(
1799 	struct super_block	*sb,
1800 	struct shrink_control	*sc)
1801 {
1802 	/* Paranoia: catch incorrect calls during mount setup or teardown */
1803 	if (WARN_ON_ONCE(!sb->s_fs_info))
1804 		return 0;
1805 	return xfs_reclaim_inodes_count(XFS_M(sb));
1806 }
1807 
1808 static long
xfs_fs_free_cached_objects(struct super_block * sb,struct shrink_control * sc)1809 xfs_fs_free_cached_objects(
1810 	struct super_block	*sb,
1811 	struct shrink_control	*sc)
1812 {
1813 	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1814 }
1815 
1816 static const struct super_operations xfs_super_operations = {
1817 	.alloc_inode		= xfs_fs_alloc_inode,
1818 	.destroy_inode		= xfs_fs_destroy_inode,
1819 	.dirty_inode		= xfs_fs_dirty_inode,
1820 	.drop_inode		= xfs_fs_drop_inode,
1821 	.put_super		= xfs_fs_put_super,
1822 	.sync_fs		= xfs_fs_sync_fs,
1823 	.freeze_fs		= xfs_fs_freeze,
1824 	.unfreeze_fs		= xfs_fs_unfreeze,
1825 	.statfs			= xfs_fs_statfs,
1826 	.remount_fs		= xfs_fs_remount,
1827 	.show_options		= xfs_fs_show_options,
1828 	.nr_cached_objects	= xfs_fs_nr_cached_objects,
1829 	.free_cached_objects	= xfs_fs_free_cached_objects,
1830 };
1831 
1832 static struct file_system_type xfs_fs_type = {
1833 	.owner			= THIS_MODULE,
1834 	.name			= "xfs",
1835 	.mount			= xfs_fs_mount,
1836 	.kill_sb		= kill_block_super,
1837 	.fs_flags		= FS_REQUIRES_DEV,
1838 };
1839 MODULE_ALIAS_FS("xfs");
1840 
1841 STATIC int __init
xfs_init_zones(void)1842 xfs_init_zones(void)
1843 {
1844 	if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
1845 			offsetof(struct xfs_ioend, io_inline_bio),
1846 			BIOSET_NEED_BVECS))
1847 		goto out;
1848 
1849 	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1850 						"xfs_log_ticket");
1851 	if (!xfs_log_ticket_zone)
1852 		goto out_free_ioend_bioset;
1853 
1854 	xfs_bmap_free_item_zone = kmem_zone_init(
1855 			sizeof(struct xfs_extent_free_item),
1856 			"xfs_bmap_free_item");
1857 	if (!xfs_bmap_free_item_zone)
1858 		goto out_destroy_log_ticket_zone;
1859 
1860 	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1861 						"xfs_btree_cur");
1862 	if (!xfs_btree_cur_zone)
1863 		goto out_destroy_bmap_free_item_zone;
1864 
1865 	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1866 						"xfs_da_state");
1867 	if (!xfs_da_state_zone)
1868 		goto out_destroy_btree_cur_zone;
1869 
1870 	xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
1871 	if (!xfs_ifork_zone)
1872 		goto out_destroy_da_state_zone;
1873 
1874 	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1875 	if (!xfs_trans_zone)
1876 		goto out_destroy_ifork_zone;
1877 
1878 
1879 	/*
1880 	 * The size of the zone allocated buf log item is the maximum
1881 	 * size possible under XFS.  This wastes a little bit of memory,
1882 	 * but it is much faster.
1883 	 */
1884 	xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1885 					   "xfs_buf_item");
1886 	if (!xfs_buf_item_zone)
1887 		goto out_destroy_trans_zone;
1888 
1889 	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1890 			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1891 				 sizeof(xfs_extent_t))), "xfs_efd_item");
1892 	if (!xfs_efd_zone)
1893 		goto out_destroy_buf_item_zone;
1894 
1895 	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1896 			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1897 				sizeof(xfs_extent_t))), "xfs_efi_item");
1898 	if (!xfs_efi_zone)
1899 		goto out_destroy_efd_zone;
1900 
1901 	xfs_inode_zone =
1902 		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1903 			KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
1904 			KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
1905 	if (!xfs_inode_zone)
1906 		goto out_destroy_efi_zone;
1907 
1908 	xfs_ili_zone =
1909 		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1910 					KM_ZONE_SPREAD, NULL);
1911 	if (!xfs_ili_zone)
1912 		goto out_destroy_inode_zone;
1913 	xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
1914 					"xfs_icr");
1915 	if (!xfs_icreate_zone)
1916 		goto out_destroy_ili_zone;
1917 
1918 	xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
1919 			"xfs_rud_item");
1920 	if (!xfs_rud_zone)
1921 		goto out_destroy_icreate_zone;
1922 
1923 	xfs_rui_zone = kmem_zone_init(
1924 			xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1925 			"xfs_rui_item");
1926 	if (!xfs_rui_zone)
1927 		goto out_destroy_rud_zone;
1928 
1929 	xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
1930 			"xfs_cud_item");
1931 	if (!xfs_cud_zone)
1932 		goto out_destroy_rui_zone;
1933 
1934 	xfs_cui_zone = kmem_zone_init(
1935 			xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
1936 			"xfs_cui_item");
1937 	if (!xfs_cui_zone)
1938 		goto out_destroy_cud_zone;
1939 
1940 	xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
1941 			"xfs_bud_item");
1942 	if (!xfs_bud_zone)
1943 		goto out_destroy_cui_zone;
1944 
1945 	xfs_bui_zone = kmem_zone_init(
1946 			xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
1947 			"xfs_bui_item");
1948 	if (!xfs_bui_zone)
1949 		goto out_destroy_bud_zone;
1950 
1951 	return 0;
1952 
1953  out_destroy_bud_zone:
1954 	kmem_zone_destroy(xfs_bud_zone);
1955  out_destroy_cui_zone:
1956 	kmem_zone_destroy(xfs_cui_zone);
1957  out_destroy_cud_zone:
1958 	kmem_zone_destroy(xfs_cud_zone);
1959  out_destroy_rui_zone:
1960 	kmem_zone_destroy(xfs_rui_zone);
1961  out_destroy_rud_zone:
1962 	kmem_zone_destroy(xfs_rud_zone);
1963  out_destroy_icreate_zone:
1964 	kmem_zone_destroy(xfs_icreate_zone);
1965  out_destroy_ili_zone:
1966 	kmem_zone_destroy(xfs_ili_zone);
1967  out_destroy_inode_zone:
1968 	kmem_zone_destroy(xfs_inode_zone);
1969  out_destroy_efi_zone:
1970 	kmem_zone_destroy(xfs_efi_zone);
1971  out_destroy_efd_zone:
1972 	kmem_zone_destroy(xfs_efd_zone);
1973  out_destroy_buf_item_zone:
1974 	kmem_zone_destroy(xfs_buf_item_zone);
1975  out_destroy_trans_zone:
1976 	kmem_zone_destroy(xfs_trans_zone);
1977  out_destroy_ifork_zone:
1978 	kmem_zone_destroy(xfs_ifork_zone);
1979  out_destroy_da_state_zone:
1980 	kmem_zone_destroy(xfs_da_state_zone);
1981  out_destroy_btree_cur_zone:
1982 	kmem_zone_destroy(xfs_btree_cur_zone);
1983  out_destroy_bmap_free_item_zone:
1984 	kmem_zone_destroy(xfs_bmap_free_item_zone);
1985  out_destroy_log_ticket_zone:
1986 	kmem_zone_destroy(xfs_log_ticket_zone);
1987  out_free_ioend_bioset:
1988 	bioset_exit(&xfs_ioend_bioset);
1989  out:
1990 	return -ENOMEM;
1991 }
1992 
1993 STATIC void
xfs_destroy_zones(void)1994 xfs_destroy_zones(void)
1995 {
1996 	/*
1997 	 * Make sure all delayed rcu free are flushed before we
1998 	 * destroy caches.
1999 	 */
2000 	rcu_barrier();
2001 	kmem_zone_destroy(xfs_bui_zone);
2002 	kmem_zone_destroy(xfs_bud_zone);
2003 	kmem_zone_destroy(xfs_cui_zone);
2004 	kmem_zone_destroy(xfs_cud_zone);
2005 	kmem_zone_destroy(xfs_rui_zone);
2006 	kmem_zone_destroy(xfs_rud_zone);
2007 	kmem_zone_destroy(xfs_icreate_zone);
2008 	kmem_zone_destroy(xfs_ili_zone);
2009 	kmem_zone_destroy(xfs_inode_zone);
2010 	kmem_zone_destroy(xfs_efi_zone);
2011 	kmem_zone_destroy(xfs_efd_zone);
2012 	kmem_zone_destroy(xfs_buf_item_zone);
2013 	kmem_zone_destroy(xfs_trans_zone);
2014 	kmem_zone_destroy(xfs_ifork_zone);
2015 	kmem_zone_destroy(xfs_da_state_zone);
2016 	kmem_zone_destroy(xfs_btree_cur_zone);
2017 	kmem_zone_destroy(xfs_bmap_free_item_zone);
2018 	kmem_zone_destroy(xfs_log_ticket_zone);
2019 	bioset_exit(&xfs_ioend_bioset);
2020 }
2021 
2022 STATIC int __init
xfs_init_workqueues(void)2023 xfs_init_workqueues(void)
2024 {
2025 	/*
2026 	 * The allocation workqueue can be used in memory reclaim situations
2027 	 * (writepage path), and parallelism is only limited by the number of
2028 	 * AGs in all the filesystems mounted. Hence use the default large
2029 	 * max_active value for this workqueue.
2030 	 */
2031 	xfs_alloc_wq = alloc_workqueue("xfsalloc",
2032 			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0);
2033 	if (!xfs_alloc_wq)
2034 		return -ENOMEM;
2035 
2036 	xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0);
2037 	if (!xfs_discard_wq)
2038 		goto out_free_alloc_wq;
2039 
2040 	return 0;
2041 out_free_alloc_wq:
2042 	destroy_workqueue(xfs_alloc_wq);
2043 	return -ENOMEM;
2044 }
2045 
2046 STATIC void
xfs_destroy_workqueues(void)2047 xfs_destroy_workqueues(void)
2048 {
2049 	destroy_workqueue(xfs_discard_wq);
2050 	destroy_workqueue(xfs_alloc_wq);
2051 }
2052 
2053 STATIC int __init
init_xfs_fs(void)2054 init_xfs_fs(void)
2055 {
2056 	int			error;
2057 
2058 	xfs_check_ondisk_structs();
2059 
2060 	printk(KERN_INFO XFS_VERSION_STRING " with "
2061 			 XFS_BUILD_OPTIONS " enabled\n");
2062 
2063 	xfs_extent_free_init_defer_op();
2064 	xfs_rmap_update_init_defer_op();
2065 	xfs_refcount_update_init_defer_op();
2066 	xfs_bmap_update_init_defer_op();
2067 
2068 	xfs_dir_startup();
2069 
2070 	error = xfs_init_zones();
2071 	if (error)
2072 		goto out;
2073 
2074 	error = xfs_init_workqueues();
2075 	if (error)
2076 		goto out_destroy_zones;
2077 
2078 	error = xfs_mru_cache_init();
2079 	if (error)
2080 		goto out_destroy_wq;
2081 
2082 	error = xfs_buf_init();
2083 	if (error)
2084 		goto out_mru_cache_uninit;
2085 
2086 	error = xfs_init_procfs();
2087 	if (error)
2088 		goto out_buf_terminate;
2089 
2090 	error = xfs_sysctl_register();
2091 	if (error)
2092 		goto out_cleanup_procfs;
2093 
2094 	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2095 	if (!xfs_kset) {
2096 		error = -ENOMEM;
2097 		goto out_sysctl_unregister;
2098 	}
2099 
2100 	xfsstats.xs_kobj.kobject.kset = xfs_kset;
2101 
2102 	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2103 	if (!xfsstats.xs_stats) {
2104 		error = -ENOMEM;
2105 		goto out_kset_unregister;
2106 	}
2107 
2108 	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2109 			       "stats");
2110 	if (error)
2111 		goto out_free_stats;
2112 
2113 #ifdef DEBUG
2114 	xfs_dbg_kobj.kobject.kset = xfs_kset;
2115 	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2116 	if (error)
2117 		goto out_remove_stats_kobj;
2118 #endif
2119 
2120 	error = xfs_qm_init();
2121 	if (error)
2122 		goto out_remove_dbg_kobj;
2123 
2124 	error = register_filesystem(&xfs_fs_type);
2125 	if (error)
2126 		goto out_qm_exit;
2127 	return 0;
2128 
2129  out_qm_exit:
2130 	xfs_qm_exit();
2131  out_remove_dbg_kobj:
2132 #ifdef DEBUG
2133 	xfs_sysfs_del(&xfs_dbg_kobj);
2134  out_remove_stats_kobj:
2135 #endif
2136 	xfs_sysfs_del(&xfsstats.xs_kobj);
2137  out_free_stats:
2138 	free_percpu(xfsstats.xs_stats);
2139  out_kset_unregister:
2140 	kset_unregister(xfs_kset);
2141  out_sysctl_unregister:
2142 	xfs_sysctl_unregister();
2143  out_cleanup_procfs:
2144 	xfs_cleanup_procfs();
2145  out_buf_terminate:
2146 	xfs_buf_terminate();
2147  out_mru_cache_uninit:
2148 	xfs_mru_cache_uninit();
2149  out_destroy_wq:
2150 	xfs_destroy_workqueues();
2151  out_destroy_zones:
2152 	xfs_destroy_zones();
2153  out:
2154 	return error;
2155 }
2156 
2157 STATIC void __exit
exit_xfs_fs(void)2158 exit_xfs_fs(void)
2159 {
2160 	xfs_qm_exit();
2161 	unregister_filesystem(&xfs_fs_type);
2162 #ifdef DEBUG
2163 	xfs_sysfs_del(&xfs_dbg_kobj);
2164 #endif
2165 	xfs_sysfs_del(&xfsstats.xs_kobj);
2166 	free_percpu(xfsstats.xs_stats);
2167 	kset_unregister(xfs_kset);
2168 	xfs_sysctl_unregister();
2169 	xfs_cleanup_procfs();
2170 	xfs_buf_terminate();
2171 	xfs_mru_cache_uninit();
2172 	xfs_destroy_workqueues();
2173 	xfs_destroy_zones();
2174 	xfs_uuid_table_free();
2175 }
2176 
2177 module_init(init_xfs_fs);
2178 module_exit(exit_xfs_fs);
2179 
2180 MODULE_AUTHOR("Silicon Graphics, Inc.");
2181 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2182 MODULE_LICENSE("GPL");
2183