1 /* du -- summarize device usage
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Differences from the Unix du:
18 * Doesn't simply ignore the names of regular files given as arguments
19 when -a is given.
20
21 By tege@sics.se, Torbjörn Granlund,
22 and djm@ai.mit.edu, David MacKenzie.
23 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
24 Rewritten to use nftw, then to use fts by Jim Meyering. */
25
26 #include <config.h>
27 #include <getopt.h>
28 #include <sys/types.h>
29 #include "system.h"
30 #include "argmatch.h"
31 #include "argv-iter.h"
32 #include "assure.h"
33 #include "di-set.h"
34 #include "exclude.h"
35 #include "fprintftime.h"
36 #include "human.h"
37 #include "mountlist.h"
38 #include "quote.h"
39 #include "stat-size.h"
40 #include "stat-time.h"
41 #include "stdio--.h"
42 #include "xfts.h"
43 #include "xstrtol.h"
44 #include "xstrtol-error.h"
45
46 extern bool fts_debug;
47
48 /* The official name of this program (e.g., no 'g' prefix). */
49 #define PROGRAM_NAME "du"
50
51 #define AUTHORS \
52 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
53 proper_name ("David MacKenzie"), \
54 proper_name ("Paul Eggert"), \
55 proper_name ("Jim Meyering")
56
57 #if DU_DEBUG
58 # define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts)
59 #else
60 # define FTS_CROSS_CHECK(Fts)
61 #endif
62
63 /* A set of dev/ino pairs to help identify files and directories
64 whose sizes have already been counted. */
65 static struct di_set *di_files;
66
67 /* A set containing a dev/ino pair for each local mount point directory. */
68 static struct di_set *di_mnt;
69
70 /* Keep track of the preceding "level" (depth in hierarchy)
71 from one call of process_file to the next. */
72 static size_t prev_level;
73
74 /* Define a class for collecting directory information. */
75 struct duinfo
76 {
77 /* Size of files in directory. */
78 uintmax_t size;
79
80 /* Number of inodes in directory. */
81 uintmax_t inodes;
82
83 /* Latest timestamp found. If tmax.tv_sec == TYPE_MINIMUM (time_t)
84 && tmax.tv_nsec < 0, no timestamp has been found. */
85 struct timespec tmax;
86 };
87
88 /* Initialize directory data. */
89 static inline void
duinfo_init(struct duinfo * a)90 duinfo_init (struct duinfo *a)
91 {
92 a->size = 0;
93 a->inodes = 0;
94 a->tmax.tv_sec = TYPE_MINIMUM (time_t);
95 a->tmax.tv_nsec = -1;
96 }
97
98 /* Set directory data. */
99 static inline void
duinfo_set(struct duinfo * a,uintmax_t size,struct timespec tmax)100 duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax)
101 {
102 a->size = size;
103 a->inodes = 1;
104 a->tmax = tmax;
105 }
106
107 /* Accumulate directory data. */
108 static inline void
duinfo_add(struct duinfo * a,struct duinfo const * b)109 duinfo_add (struct duinfo *a, struct duinfo const *b)
110 {
111 uintmax_t sum = a->size + b->size;
112 a->size = a->size <= sum ? sum : UINTMAX_MAX;
113 a->inodes = a->inodes + b->inodes;
114 if (timespec_cmp (a->tmax, b->tmax) < 0)
115 a->tmax = b->tmax;
116 }
117
118 /* A structure for per-directory level information. */
119 struct dulevel
120 {
121 /* Entries in this directory. */
122 struct duinfo ent;
123
124 /* Total for subdirectories. */
125 struct duinfo subdir;
126 };
127
128 /* If true, display counts for all files, not just directories. */
129 static bool opt_all = false;
130
131 /* If true, rather than using the device usage of each file,
132 use the apparent size (stat.st_size if usable, 0 otherwise). */
133 static bool apparent_size = false;
134
135 /* If true, count each hard link of files with multiple links. */
136 static bool opt_count_all = false;
137
138 /* If true, hash all files to look for hard links. */
139 static bool hash_all;
140
141 /* If true, output the NUL byte instead of a newline at the end of each line. */
142 static bool opt_nul_terminate_output = false;
143
144 /* If true, print a grand total at the end. */
145 static bool print_grand_total = false;
146
147 /* If nonzero, do not add sizes of subdirectories. */
148 static bool opt_separate_dirs = false;
149
150 /* Show the total for each directory (and file if --all) that is at
151 most MAX_DEPTH levels down from the root of the hierarchy. The root
152 is at level 0, so 'du --max-depth=0' is equivalent to 'du -s'. */
153 static idx_t max_depth = IDX_MAX;
154
155 /* Only output entries with at least this SIZE if positive,
156 or at most if negative. See --threshold option. */
157 static intmax_t opt_threshold = 0;
158
159 /* Human-readable options for output. */
160 static int human_output_opts;
161
162 /* Output inodes count instead of blocks used. */
163 static bool opt_inodes = false;
164
165 /* If true, print most recently modified date, using the specified format. */
166 static bool opt_time = false;
167
168 /* Type of time to display. controlled by --time. */
169
170 enum time_type
171 {
172 time_mtime, /* default */
173 time_ctime,
174 time_atime
175 };
176
177 static enum time_type time_type = time_mtime;
178
179 /* User specified date / time style */
180 static char const *time_style = nullptr;
181
182 /* Format used to display date / time. Controlled by --time-style */
183 static char const *time_format = nullptr;
184
185 /* The local time zone rules, as per the TZ environment variable. */
186 static timezone_t localtz;
187
188 /* The units to use when printing sizes. */
189 static uintmax_t output_block_size;
190
191 /* File name patterns to exclude. */
192 static struct exclude *exclude;
193
194 /* Grand total size of all args, in bytes. Also latest modified date. */
195 static struct duinfo tot_dui;
196
197 #define IS_DIR_TYPE(Type) \
198 ((Type) == FTS_DP \
199 || (Type) == FTS_DNR)
200
201 /* For long options that have no equivalent short option, use a
202 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
203 enum
204 {
205 APPARENT_SIZE_OPTION = CHAR_MAX + 1,
206 EXCLUDE_OPTION,
207 FILES0_FROM_OPTION,
208 HUMAN_SI_OPTION,
209 FTS_DEBUG,
210 TIME_OPTION,
211 TIME_STYLE_OPTION,
212 INODES_OPTION
213 };
214
215 static struct option const long_options[] =
216 {
217 {"all", no_argument, nullptr, 'a'},
218 {"apparent-size", no_argument, nullptr, APPARENT_SIZE_OPTION},
219 {"block-size", required_argument, nullptr, 'B'},
220 {"bytes", no_argument, nullptr, 'b'},
221 {"count-links", no_argument, nullptr, 'l'},
222 /* {"-debug", no_argument, nullptr, FTS_DEBUG}, */
223 {"dereference", no_argument, nullptr, 'L'},
224 {"dereference-args", no_argument, nullptr, 'D'},
225 {"exclude", required_argument, nullptr, EXCLUDE_OPTION},
226 {"exclude-from", required_argument, nullptr, 'X'},
227 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
228 {"human-readable", no_argument, nullptr, 'h'},
229 {"inodes", no_argument, nullptr, INODES_OPTION},
230 {"si", no_argument, nullptr, HUMAN_SI_OPTION},
231 {"max-depth", required_argument, nullptr, 'd'},
232 {"null", no_argument, nullptr, '0'},
233 {"no-dereference", no_argument, nullptr, 'P'},
234 {"one-file-system", no_argument, nullptr, 'x'},
235 {"separate-dirs", no_argument, nullptr, 'S'},
236 {"summarize", no_argument, nullptr, 's'},
237 {"total", no_argument, nullptr, 'c'},
238 {"threshold", required_argument, nullptr, 't'},
239 {"time", optional_argument, nullptr, TIME_OPTION},
240 {"time-style", required_argument, nullptr, TIME_STYLE_OPTION},
241 {GETOPT_HELP_OPTION_DECL},
242 {GETOPT_VERSION_OPTION_DECL},
243 {nullptr, 0, nullptr, 0}
244 };
245
246 static char const *const time_args[] =
247 {
248 "atime", "access", "use", "ctime", "status", nullptr
249 };
250 static enum time_type const time_types[] =
251 {
252 time_atime, time_atime, time_atime, time_ctime, time_ctime
253 };
254 ARGMATCH_VERIFY (time_args, time_types);
255
256 /* 'full-iso' uses full ISO-style dates and times. 'long-iso' uses longer
257 ISO-style timestamps, though shorter than 'full-iso'. 'iso' uses shorter
258 ISO-style timestamps. */
259 enum time_style
260 {
261 full_iso_time_style, /* --time-style=full-iso */
262 long_iso_time_style, /* --time-style=long-iso */
263 iso_time_style /* --time-style=iso */
264 };
265
266 static char const *const time_style_args[] =
267 {
268 "full-iso", "long-iso", "iso", nullptr
269 };
270 static enum time_style const time_style_types[] =
271 {
272 full_iso_time_style, long_iso_time_style, iso_time_style
273 };
274 ARGMATCH_VERIFY (time_style_args, time_style_types);
275
276 void
usage(int status)277 usage (int status)
278 {
279 if (status != EXIT_SUCCESS)
280 emit_try_help ();
281 else
282 {
283 printf (_("\
284 Usage: %s [OPTION]... [FILE]...\n\
285 or: %s [OPTION]... --files0-from=F\n\
286 "), program_name, program_name);
287 fputs (_("\
288 Summarize device usage of the set of FILEs, recursively for directories.\n\
289 "), stdout);
290
291 emit_mandatory_arg_note ();
292
293 fputs (_("\
294 -0, --null end each output line with NUL, not newline\n\
295 -a, --all write counts for all files, not just directories\n\
296 --apparent-size print apparent sizes rather than device usage; although\
297 \n\
298 the apparent size is usually smaller, it may be\n\
299 larger due to holes in ('sparse') files, internal\n\
300 fragmentation, indirect blocks, and the like\n\
301 "), stdout);
302 fputs (_("\
303 -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\
304 '-BM' prints sizes in units of 1,048,576 bytes;\n\
305 see SIZE format below\n\
306 -b, --bytes equivalent to '--apparent-size --block-size=1'\n\
307 -c, --total produce a grand total\n\
308 -D, --dereference-args dereference only symlinks that are listed on the\n\
309 command line\n\
310 -d, --max-depth=N print the total for a directory (or file, with --all)\n\
311 only if it is N or fewer levels below the command\n\
312 line argument; --max-depth=0 is the same as\n\
313 --summarize\n\
314 "), stdout);
315 fputs (_("\
316 --files0-from=F summarize device usage of the\n\
317 NUL-terminated file names specified in file F;\n\
318 if F is -, then read names from standard input\n\
319 -H equivalent to --dereference-args (-D)\n\
320 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\
321 \n\
322 --inodes list inode usage information instead of block usage\n\
323 "), stdout);
324 fputs (_("\
325 -k like --block-size=1K\n\
326 -L, --dereference dereference all symbolic links\n\
327 -l, --count-links count sizes many times if hard linked\n\
328 -m like --block-size=1M\n\
329 "), stdout);
330 fputs (_("\
331 -P, --no-dereference don't follow any symbolic links (this is the default)\n\
332 -S, --separate-dirs for directories do not include size of subdirectories\n\
333 --si like -h, but use powers of 1000 not 1024\n\
334 -s, --summarize display only a total for each argument\n\
335 "), stdout);
336 fputs (_("\
337 -t, --threshold=SIZE exclude entries smaller than SIZE if positive,\n\
338 or entries greater than SIZE if negative\n\
339 --time show time of the last modification of any file in the\n\
340 directory, or any of its subdirectories\n\
341 --time=WORD show time as WORD instead of modification time:\n\
342 atime, access, use, ctime or status\n\
343 --time-style=STYLE show times using STYLE, which can be:\n\
344 full-iso, long-iso, iso, or +FORMAT;\n\
345 FORMAT is interpreted like in 'date'\n\
346 "), stdout);
347 fputs (_("\
348 -X, --exclude-from=FILE exclude files that match any pattern in FILE\n\
349 --exclude=PATTERN exclude files that match PATTERN\n\
350 -x, --one-file-system skip directories on different file systems\n\
351 "), stdout);
352 fputs (HELP_OPTION_DESCRIPTION, stdout);
353 fputs (VERSION_OPTION_DESCRIPTION, stdout);
354 emit_blocksize_note ("DU");
355 emit_size_note ();
356 emit_ancillary_info (PROGRAM_NAME);
357 }
358 exit (status);
359 }
360
361 /* Try to insert the INO/DEV pair into DI_SET.
362 Return true if the pair is successfully inserted,
363 false if the pair was already there. */
364 static bool
hash_ins(struct di_set * di_set,ino_t ino,dev_t dev)365 hash_ins (struct di_set *di_set, ino_t ino, dev_t dev)
366 {
367 int inserted = di_set_insert (di_set, dev, ino);
368 if (inserted < 0)
369 xalloc_die ();
370 return inserted;
371 }
372
373 /* FIXME: this code is nearly identical to code in date.c */
374 /* Display the date and time in WHEN according to the format specified
375 in FORMAT. */
376
377 static void
show_date(char const * format,struct timespec when,timezone_t tz)378 show_date (char const *format, struct timespec when, timezone_t tz)
379 {
380 struct tm tm;
381 if (localtime_rz (tz, &when.tv_sec, &tm))
382 fprintftime (stdout, format, &tm, tz, when.tv_nsec);
383 else
384 {
385 char buf[INT_BUFSIZE_BOUND (intmax_t)];
386 char *when_str = timetostr (when.tv_sec, buf);
387 error (0, 0, _("time %s is out of range"), quote (when_str));
388 fputs (when_str, stdout);
389 }
390 }
391
392 /* Print N_BYTES. Convert it to a readable value before printing. */
393
394 static void
print_only_size(uintmax_t n_bytes)395 print_only_size (uintmax_t n_bytes)
396 {
397 char buf[LONGEST_HUMAN_READABLE + 1];
398 fputs ((n_bytes == UINTMAX_MAX
399 ? _("Infinity")
400 : human_readable (n_bytes, buf, human_output_opts,
401 1, output_block_size)),
402 stdout);
403 }
404
405 /* Print size (and optionally time) indicated by *PDUI, followed by STRING. */
406
407 static void
print_size(const struct duinfo * pdui,char const * string)408 print_size (const struct duinfo *pdui, char const *string)
409 {
410 print_only_size (opt_inodes
411 ? pdui->inodes
412 : pdui->size);
413
414 if (opt_time)
415 {
416 putchar ('\t');
417 show_date (time_format, pdui->tmax, localtz);
418 }
419 printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n');
420 fflush (stdout);
421 }
422
423 /* Fill the di_mnt set with local mount point dev/ino pairs. */
424
425 static void
fill_mount_table(void)426 fill_mount_table (void)
427 {
428 struct mount_entry *mnt_ent = read_file_system_list (false);
429 while (mnt_ent)
430 {
431 struct mount_entry *mnt_free;
432 if (!mnt_ent->me_remote && !mnt_ent->me_dummy)
433 {
434 struct stat buf;
435 if (!stat (mnt_ent->me_mountdir, &buf))
436 hash_ins (di_mnt, buf.st_ino, buf.st_dev);
437 else
438 {
439 /* Ignore stat failure. False positives are too common.
440 E.g., "Permission denied" on /run/user/<name>/gvfs. */
441 }
442 }
443
444 mnt_free = mnt_ent;
445 mnt_ent = mnt_ent->me_next;
446 free_mount_entry (mnt_free);
447 }
448 }
449
450 /* This function checks whether any of the directories in the cycle that
451 fts detected is a mount point. */
452
453 static bool
mount_point_in_fts_cycle(FTSENT const * ent)454 mount_point_in_fts_cycle (FTSENT const *ent)
455 {
456 FTSENT const *cycle_ent = ent->fts_cycle;
457
458 if (!di_mnt)
459 {
460 /* Initialize the set of dev,inode pairs. */
461 di_mnt = di_set_alloc ();
462 if (!di_mnt)
463 xalloc_die ();
464
465 fill_mount_table ();
466 }
467
468 while (ent && ent != cycle_ent)
469 {
470 if (di_set_lookup (di_mnt, ent->fts_statp->st_dev,
471 ent->fts_statp->st_ino) > 0)
472 {
473 return true;
474 }
475 ent = ent->fts_parent;
476 }
477
478 return false;
479 }
480
481 /* This function is called once for every file system object that fts
482 encounters. fts does a depth-first traversal. This function knows
483 that and accumulates per-directory totals based on changes in
484 the depth of the current entry. It returns true on success. */
485
486 static bool
process_file(FTS * fts,FTSENT * ent)487 process_file (FTS *fts, FTSENT *ent)
488 {
489 bool ok = true;
490 struct duinfo dui;
491 struct duinfo dui_to_print;
492 size_t level;
493 static size_t n_alloc;
494 /* First element of the structure contains:
495 The sum of the sizes of all entries in the single directory
496 at the corresponding level. Although this does include the sizes
497 corresponding to each subdirectory, it does not include the size of
498 any file in a subdirectory. Also corresponding last modified date.
499 Second element of the structure contains:
500 The sum of the sizes of all entries in the hierarchy at or below the
501 directory at the specified level. */
502 static struct dulevel *dulvl;
503
504 char const *file = ent->fts_path;
505 const struct stat *sb = ent->fts_statp;
506 int info = ent->fts_info;
507
508 if (info == FTS_DNR)
509 {
510 /* An error occurred, but the size is known, so count it. */
511 error (0, ent->fts_errno, _("cannot read directory %s"), quoteaf (file));
512 ok = false;
513 }
514 else if (info != FTS_DP)
515 {
516 bool excluded = excluded_file_name (exclude, file);
517 if (! excluded)
518 {
519 /* Make the stat buffer *SB valid, or fail noisily. */
520
521 if (info == FTS_NSOK)
522 {
523 fts_set (fts, ent, FTS_AGAIN);
524 MAYBE_UNUSED FTSENT const *e = fts_read (fts);
525 affirm (e == ent);
526 info = ent->fts_info;
527 }
528
529 if (info == FTS_NS || info == FTS_SLNONE)
530 {
531 error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file));
532 return false;
533 }
534
535 /* The --one-file-system (-x) option cannot exclude anything
536 specified on the command-line. By definition, it can exclude
537 a file or directory only when its device number is different
538 from that of its just-processed parent directory, and du does
539 not process the parent of a command-line argument. */
540 if (fts->fts_options & FTS_XDEV
541 && FTS_ROOTLEVEL < ent->fts_level
542 && fts->fts_dev != sb->st_dev)
543 excluded = true;
544 }
545
546 if (excluded
547 || (! opt_count_all
548 && (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink))
549 && ! hash_ins (di_files, sb->st_ino, sb->st_dev)))
550 {
551 /* If ignoring a directory in preorder, skip its children.
552 Ignore the next fts_read output too, as it's a postorder
553 visit to the same directory. */
554 if (info == FTS_D)
555 {
556 fts_set (fts, ent, FTS_SKIP);
557 MAYBE_UNUSED FTSENT const *e = fts_read (fts);
558 affirm (e == ent);
559 }
560
561 return true;
562 }
563
564 switch (info)
565 {
566 case FTS_D:
567 return true;
568
569 case FTS_ERR:
570 /* An error occurred, but the size is known, so count it. */
571 error (0, ent->fts_errno, "%s", quotef (file));
572 ok = false;
573 break;
574
575 case FTS_DC:
576 /* If not following symlinks and not a (bind) mount point. */
577 if (cycle_warning_required (fts, ent)
578 && ! mount_point_in_fts_cycle (ent))
579 {
580 emit_cycle_warning (file);
581 return false;
582 }
583 return true;
584 }
585 }
586
587 duinfo_set (&dui,
588 (apparent_size
589 ? (usable_st_size (sb) ? MAX (0, sb->st_size) : 0)
590 : (uintmax_t) STP_NBLOCKS (sb) * ST_NBLOCKSIZE),
591 (time_type == time_mtime ? get_stat_mtime (sb)
592 : time_type == time_atime ? get_stat_atime (sb)
593 : get_stat_ctime (sb)));
594
595 level = ent->fts_level;
596 dui_to_print = dui;
597
598 if (n_alloc == 0)
599 {
600 n_alloc = level + 10;
601 dulvl = xcalloc (n_alloc, sizeof *dulvl);
602 }
603 else
604 {
605 if (level == prev_level)
606 {
607 /* This is usually the most common case. Do nothing. */
608 }
609 else if (level > prev_level)
610 {
611 /* Descending the hierarchy.
612 Clear the accumulators for *all* levels between prev_level
613 and the current one. The depth may change dramatically,
614 e.g., from 1 to 10. */
615
616 if (n_alloc <= level)
617 {
618 dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl);
619 n_alloc = level * 2;
620 }
621
622 for (size_t i = prev_level + 1; i <= level; i++)
623 {
624 duinfo_init (&dulvl[i].ent);
625 duinfo_init (&dulvl[i].subdir);
626 }
627 }
628 else /* level < prev_level */
629 {
630 /* Ascending the hierarchy.
631 Process a directory only after all entries in that
632 directory have been processed. When the depth decreases,
633 propagate sums from the children (prev_level) to the parent.
634 Here, the current level is always one smaller than the
635 previous one. */
636 affirm (level == prev_level - 1);
637 duinfo_add (&dui_to_print, &dulvl[prev_level].ent);
638 if (!opt_separate_dirs)
639 duinfo_add (&dui_to_print, &dulvl[prev_level].subdir);
640 duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].ent);
641 duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].subdir);
642 }
643 }
644
645 prev_level = level;
646
647 /* Let the size of a directory entry contribute to the total for the
648 containing directory, unless --separate-dirs (-S) is specified. */
649 if (! (opt_separate_dirs && IS_DIR_TYPE (info)))
650 duinfo_add (&dulvl[level].ent, &dui);
651
652 /* Even if this directory is unreadable or we can't chdir into it,
653 do let its size contribute to the total. */
654 duinfo_add (&tot_dui, &dui);
655
656 if ((IS_DIR_TYPE (info) && level <= max_depth)
657 || (opt_all && level <= max_depth)
658 || level == 0)
659 {
660 /* Print or elide this entry according to the --threshold option. */
661 uintmax_t v = opt_inodes ? dui_to_print.inodes : dui_to_print.size;
662 if (opt_threshold < 0
663 ? v <= -opt_threshold
664 : v >= opt_threshold)
665 print_size (&dui_to_print, file);
666 }
667
668 return ok;
669 }
670
671 /* Recursively print the sizes of the directories (and, if selected, files)
672 named in FILES, the last entry of which is null.
673 BIT_FLAGS controls how fts works.
674 Return true if successful. */
675
676 static bool
du_files(char ** files,int bit_flags)677 du_files (char **files, int bit_flags)
678 {
679 bool ok = true;
680
681 if (*files)
682 {
683 FTS *fts = xfts_open (files, bit_flags, nullptr);
684
685 while (true)
686 {
687 FTSENT *ent;
688
689 ent = fts_read (fts);
690 if (ent == nullptr)
691 {
692 if (errno != 0)
693 {
694 error (0, errno, _("fts_read failed: %s"),
695 quotef (fts->fts_path));
696 ok = false;
697 }
698
699 /* When exiting this loop early, be careful to reset the
700 global, prev_level, used in process_file. Otherwise, its
701 (level == prev_level - 1) assertion could fail. */
702 prev_level = 0;
703 break;
704 }
705 FTS_CROSS_CHECK (fts);
706
707 ok &= process_file (fts, ent);
708 }
709
710 if (fts_close (fts) != 0)
711 {
712 error (0, errno, _("fts_close failed"));
713 ok = false;
714 }
715 }
716
717 return ok;
718 }
719
720 int
main(int argc,char ** argv)721 main (int argc, char **argv)
722 {
723 char *cwd_only[2];
724 bool max_depth_specified = false;
725 bool ok = true;
726 char *files_from = nullptr;
727
728 /* Bit flags that control how fts works. */
729 int bit_flags = FTS_NOSTAT;
730
731 /* Select one of the three FTS_ options that control if/when
732 to follow a symlink. */
733 int symlink_deref_bits = FTS_PHYSICAL;
734
735 /* If true, display only a total for each argument. */
736 bool opt_summarize_only = false;
737
738 cwd_only[0] = bad_cast (".");
739 cwd_only[1] = nullptr;
740
741 initialize_main (&argc, &argv);
742 set_program_name (argv[0]);
743 setlocale (LC_ALL, "");
744 bindtextdomain (PACKAGE, LOCALEDIR);
745 textdomain (PACKAGE);
746
747 atexit (close_stdout);
748
749 exclude = new_exclude ();
750
751 human_options (getenv ("DU_BLOCK_SIZE"),
752 &human_output_opts, &output_block_size);
753
754 while (true)
755 {
756 int oi = -1;
757 int c = getopt_long (argc, argv, "0abd:chHklmst:xB:DLPSX:",
758 long_options, &oi);
759 if (c == -1)
760 break;
761
762 switch (c)
763 {
764 #if DU_DEBUG
765 case FTS_DEBUG:
766 fts_debug = true;
767 break;
768 #endif
769
770 case '0':
771 opt_nul_terminate_output = true;
772 break;
773
774 case 'a':
775 opt_all = true;
776 break;
777
778 case APPARENT_SIZE_OPTION:
779 apparent_size = true;
780 break;
781
782 case 'b':
783 apparent_size = true;
784 human_output_opts = 0;
785 output_block_size = 1;
786 break;
787
788 case 'c':
789 print_grand_total = true;
790 break;
791
792 case 'h':
793 human_output_opts = human_autoscale | human_SI | human_base_1024;
794 output_block_size = 1;
795 break;
796
797 case HUMAN_SI_OPTION:
798 human_output_opts = human_autoscale | human_SI;
799 output_block_size = 1;
800 break;
801
802 case 'k':
803 human_output_opts = 0;
804 output_block_size = 1024;
805 break;
806
807 case 'd': /* --max-depth=N */
808 {
809 intmax_t tmp;
810 if (xstrtoimax (optarg, nullptr, 0, &tmp, "") == LONGINT_OK
811 && tmp <= IDX_MAX)
812 {
813 max_depth_specified = true;
814 max_depth = tmp;
815 }
816 else
817 {
818 error (0, 0, _("invalid maximum depth %s"),
819 quote (optarg));
820 ok = false;
821 }
822 }
823 break;
824
825 case 'm':
826 human_output_opts = 0;
827 output_block_size = 1024 * 1024;
828 break;
829
830 case 'l':
831 opt_count_all = true;
832 break;
833
834 case 's':
835 opt_summarize_only = true;
836 break;
837
838 case 't':
839 {
840 enum strtol_error e;
841 e = xstrtoimax (optarg, nullptr, 0, &opt_threshold,
842 "kKmMGTPEZYRQ0");
843 if (e != LONGINT_OK)
844 xstrtol_fatal (e, oi, c, long_options, optarg);
845 if (opt_threshold == 0 && *optarg == '-')
846 {
847 /* Do not allow -0, as this wouldn't make sense anyway. */
848 error (EXIT_FAILURE, 0, _("invalid --threshold argument '-0'"));
849 }
850 }
851 break;
852
853 case 'x':
854 bit_flags |= FTS_XDEV;
855 break;
856
857 case 'B':
858 {
859 enum strtol_error e = human_options (optarg, &human_output_opts,
860 &output_block_size);
861 if (e != LONGINT_OK)
862 xstrtol_fatal (e, oi, c, long_options, optarg);
863 }
864 break;
865
866 case 'H': /* NOTE: before 2008-12, -H was equivalent to --si. */
867 case 'D':
868 symlink_deref_bits = FTS_COMFOLLOW | FTS_PHYSICAL;
869 break;
870
871 case 'L': /* --dereference */
872 symlink_deref_bits = FTS_LOGICAL;
873 break;
874
875 case 'P': /* --no-dereference */
876 symlink_deref_bits = FTS_PHYSICAL;
877 break;
878
879 case 'S':
880 opt_separate_dirs = true;
881 break;
882
883 case 'X':
884 if (add_exclude_file (add_exclude, exclude, optarg,
885 EXCLUDE_WILDCARDS, '\n'))
886 {
887 error (0, errno, "%s", quotef (optarg));
888 ok = false;
889 }
890 break;
891
892 case FILES0_FROM_OPTION:
893 files_from = optarg;
894 break;
895
896 case EXCLUDE_OPTION:
897 add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
898 break;
899
900 case INODES_OPTION:
901 opt_inodes = true;
902 break;
903
904 case TIME_OPTION:
905 opt_time = true;
906 time_type =
907 (optarg
908 ? XARGMATCH ("--time", optarg, time_args, time_types)
909 : time_mtime);
910 localtz = tzalloc (getenv ("TZ"));
911 break;
912
913 case TIME_STYLE_OPTION:
914 time_style = optarg;
915 break;
916
917 case_GETOPT_HELP_CHAR;
918
919 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
920
921 default:
922 ok = false;
923 }
924 }
925
926 if (!ok)
927 usage (EXIT_FAILURE);
928
929 if (opt_all && opt_summarize_only)
930 {
931 error (0, 0, _("cannot both summarize and show all entries"));
932 usage (EXIT_FAILURE);
933 }
934
935 if (opt_summarize_only && max_depth_specified && max_depth == 0)
936 {
937 error (0, 0,
938 _("warning: summarizing is the same as using --max-depth=0"));
939 }
940
941 if (opt_summarize_only && max_depth_specified && max_depth != 0)
942 {
943 error (0, 0, _("warning: summarizing conflicts with --max-depth=%td"),
944 max_depth);
945 usage (EXIT_FAILURE);
946 }
947
948 if (opt_summarize_only)
949 max_depth = 0;
950
951 if (opt_inodes)
952 {
953 if (apparent_size)
954 {
955 error (0, 0, _("warning: options --apparent-size and -b are "
956 "ineffective with --inodes"));
957 }
958 output_block_size = 1;
959 }
960
961 /* Process time style if printing last times. */
962 if (opt_time)
963 {
964 if (! time_style)
965 {
966 time_style = getenv ("TIME_STYLE");
967
968 /* Ignore TIMESTYLE="locale", for compatibility with ls. */
969 if (! time_style || STREQ (time_style, "locale"))
970 time_style = "long-iso";
971 else if (*time_style == '+')
972 {
973 /* Ignore anything after a newline, for compatibility
974 with ls. */
975 char *p = strchr (time_style, '\n');
976 if (p)
977 *p = '\0';
978 }
979 else
980 {
981 /* Ignore "posix-" prefix, for compatibility with ls. */
982 static char const posix_prefix[] = "posix-";
983 static const size_t prefix_len = sizeof posix_prefix - 1;
984 while (STREQ_LEN (time_style, posix_prefix, prefix_len))
985 time_style += prefix_len;
986 }
987 }
988
989 if (*time_style == '+')
990 time_format = time_style + 1;
991 else
992 {
993 switch (XARGMATCH ("time style", time_style,
994 time_style_args, time_style_types))
995 {
996 case full_iso_time_style:
997 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
998 break;
999
1000 case long_iso_time_style:
1001 time_format = "%Y-%m-%d %H:%M";
1002 break;
1003
1004 case iso_time_style:
1005 time_format = "%Y-%m-%d";
1006 break;
1007 }
1008 }
1009 }
1010
1011 struct argv_iterator *ai;
1012 if (files_from)
1013 {
1014 /* When using --files0-from=F, you may not specify any files
1015 on the command-line. */
1016 if (optind < argc)
1017 {
1018 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1019 fprintf (stderr, "%s\n",
1020 _("file operands cannot be combined with --files0-from"));
1021 usage (EXIT_FAILURE);
1022 }
1023
1024 if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin)))
1025 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1026 quoteaf (files_from));
1027
1028 ai = argv_iter_init_stream (stdin);
1029
1030 /* It's not easy here to count the arguments, so assume the
1031 worst. */
1032 hash_all = true;
1033 }
1034 else
1035 {
1036 char **files = (optind < argc ? argv + optind : cwd_only);
1037 ai = argv_iter_init_argv (files);
1038
1039 /* Hash all dev,ino pairs if there are multiple arguments, or if
1040 following non-command-line symlinks, because in either case a
1041 file with just one hard link might be seen more than once. */
1042 hash_all = (optind + 1 < argc || symlink_deref_bits == FTS_LOGICAL);
1043 }
1044
1045 if (!ai)
1046 xalloc_die ();
1047
1048 /* Initialize the set of dev,inode pairs. */
1049 di_files = di_set_alloc ();
1050 if (!di_files)
1051 xalloc_die ();
1052
1053 /* If not hashing everything, process_file won't find cycles on its
1054 own, so ask fts_read to check for them accurately. */
1055 if (opt_count_all || ! hash_all)
1056 bit_flags |= FTS_TIGHT_CYCLE_CHECK;
1057
1058 bit_flags |= symlink_deref_bits;
1059 static char *temp_argv[] = { nullptr, nullptr };
1060
1061 while (true)
1062 {
1063 bool skip_file = false;
1064 enum argv_iter_err ai_err;
1065 char *file_name = argv_iter (ai, &ai_err);
1066 if (!file_name)
1067 {
1068 switch (ai_err)
1069 {
1070 case AI_ERR_EOF:
1071 goto argv_iter_done;
1072 case AI_ERR_READ:
1073 error (0, errno, _("%s: read error"),
1074 quotef (files_from));
1075 ok = false;
1076 goto argv_iter_done;
1077 case AI_ERR_MEM:
1078 xalloc_die ();
1079 default:
1080 affirm (!"unexpected error code from argv_iter");
1081 }
1082 }
1083 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
1084 {
1085 /* Give a better diagnostic in an unusual case:
1086 printf - | du --files0-from=- */
1087 error (0, 0, _("when reading file names from stdin, "
1088 "no file name of %s allowed"),
1089 quoteaf (file_name));
1090 skip_file = true;
1091 }
1092
1093 /* Report and skip any empty file names before invoking fts.
1094 This works around a glitch in fts, which fails immediately
1095 (without looking at the other file names) when given an empty
1096 file name. */
1097 if (!file_name[0])
1098 {
1099 /* Diagnose a zero-length file name. When it's one
1100 among many, knowing the record number may help.
1101 FIXME: currently print the record number only with
1102 --files0-from=FILE. Maybe do it for argv, too? */
1103 if (files_from == nullptr)
1104 error (0, 0, "%s", _("invalid zero-length file name"));
1105 else
1106 {
1107 /* Using the standard 'filename:line-number:' prefix here is
1108 not totally appropriate, since NUL is the separator, not NL,
1109 but it might be better than nothing. */
1110 idx_t file_number = argv_iter_n_args (ai);
1111 error (0, 0, "%s:%td: %s", quotef (files_from),
1112 file_number, _("invalid zero-length file name"));
1113 }
1114 skip_file = true;
1115 }
1116
1117 if (skip_file)
1118 ok = false;
1119 else
1120 {
1121 temp_argv[0] = file_name;
1122 ok &= du_files (temp_argv, bit_flags);
1123 }
1124 }
1125 argv_iter_done:
1126
1127 argv_iter_free (ai);
1128 di_set_free (di_files);
1129 if (di_mnt)
1130 di_set_free (di_mnt);
1131
1132 if (files_from && (ferror (stdin) || fclose (stdin) != 0) && ok)
1133 error (EXIT_FAILURE, 0, _("error reading %s"), quoteaf (files_from));
1134
1135 if (print_grand_total)
1136 print_size (&tot_dui, _("total"));
1137
1138 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
1139 }
1140