1 /* wc - print the number of lines, words, and bytes in files
2    Copyright (C) 1985-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18    and David MacKenzie, djm@gnu.ai.mit.edu. */
19 
20 #include <config.h>
21 
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <uchar.h>
27 
28 #include <argmatch.h>
29 #include <argv-iter.h>
30 #include <fadvise.h>
31 #include <physmem.h>
32 #include <readtokens0.h>
33 #include <stat-size.h>
34 #include <xbinary-io.h>
35 
36 #include "system.h"
37 #include "wc.h"
38 
39 /* The official name of this program (e.g., no 'g' prefix).  */
40 #define PROGRAM_NAME "wc"
41 
42 #define AUTHORS \
43   proper_name ("Paul Rubin"), \
44   proper_name ("David MacKenzie")
45 
46 /* Size of atomic reads. */
47 #define BUFFER_SIZE (16 * 1024)
48 
49 static bool wc_isprint[UCHAR_MAX + 1];
50 static bool wc_isspace[UCHAR_MAX + 1];
51 
52 static bool debug;
53 
54 /* Cumulative number of lines, words, chars and bytes in all files so far.
55    max_line_length is the maximum over all files processed so far.  */
56 static uintmax_t total_lines;
57 static uintmax_t total_words;
58 static uintmax_t total_chars;
59 static uintmax_t total_bytes;
60 static bool total_lines_overflow;
61 static bool total_words_overflow;
62 static bool total_chars_overflow;
63 static bool total_bytes_overflow;
64 static intmax_t max_line_length;
65 
66 /* Which counts to print. */
67 static bool print_lines, print_words, print_chars, print_bytes;
68 static bool print_linelength;
69 
70 /* The print width of each count.  */
71 static int number_width;
72 
73 /* True if we have ever read the standard input. */
74 static bool have_read_stdin;
75 
76 /* Used to determine if file size can be determined without reading.  */
77 static idx_t page_size;
78 
79 /* Enable to _not_ treat non breaking space as a word separator.  */
80 static bool posixly_correct;
81 
82 /* The result of calling fstat or stat on a file descriptor or file.  */
83 struct fstatus
84 {
85   /* If positive, fstat or stat has not been called yet.  Otherwise,
86      this is the value returned from fstat or stat.  */
87   int failed;
88 
89   /* If FAILED is zero, this is the file's status.  */
90   struct stat st;
91 };
92 
93 /* For long options that have no equivalent short option, use a
94    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
95 enum
96 {
97   DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
98   FILES0_FROM_OPTION,
99   TOTAL_OPTION,
100 };
101 
102 static struct option const longopts[] =
103 {
104   {"bytes", no_argument, nullptr, 'c'},
105   {"chars", no_argument, nullptr, 'm'},
106   {"lines", no_argument, nullptr, 'l'},
107   {"words", no_argument, nullptr, 'w'},
108   {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
109   {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
110   {"max-line-length", no_argument, nullptr, 'L'},
111   {"total", required_argument, nullptr, TOTAL_OPTION},
112   {GETOPT_HELP_OPTION_DECL},
113   {GETOPT_VERSION_OPTION_DECL},
114   {nullptr, 0, nullptr, 0}
115 };
116 
117 enum total_type
118   {
119     total_auto,         /* 0: default or --total=auto */
120     total_always,       /* 1: --total=always */
121     total_only,         /* 2: --total=only */
122     total_never         /* 3: --total=never */
123   };
124 static char const *const total_args[] =
125 {
126   "auto", "always", "only", "never", nullptr
127 };
128 static enum total_type const total_types[] =
129 {
130   total_auto, total_always, total_only, total_never
131 };
132 ARGMATCH_VERIFY (total_args, total_types);
133 static enum total_type total_mode = total_auto;
134 
135 #ifdef USE_AVX2_WC_LINECOUNT
136 static bool
avx2_supported(void)137 avx2_supported (void)
138 {
139   bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
140 
141   if (debug)
142     error (0, 0, (avx_enabled
143                   ? _("using avx2 hardware support")
144                   : _("avx2 support not detected")));
145 
146   return avx_enabled;
147 }
148 #endif
149 
150 void
usage(int status)151 usage (int status)
152 {
153   if (status != EXIT_SUCCESS)
154     emit_try_help ();
155   else
156     {
157       printf (_("\
158 Usage: %s [OPTION]... [FILE]...\n\
159   or:  %s [OPTION]... --files0-from=F\n\
160 "),
161               program_name, program_name);
162       fputs (_("\
163 Print newline, word, and byte counts for each FILE, and a total line if\n\
164 more than one FILE is specified.  A word is a nonempty sequence of non white\n\
165 space delimited by white space characters or by start or end of input.\n\
166 "), stdout);
167 
168       emit_stdin_note ();
169 
170       fputs (_("\
171 \n\
172 The options below may be used to select which counts are printed, always in\n\
173 the following order: newline, word, character, byte, maximum line length.\n\
174   -c, --bytes            print the byte counts\n\
175   -m, --chars            print the character counts\n\
176   -l, --lines            print the newline counts\n\
177 "), stdout);
178       fputs (_("\
179       --files0-from=F    read input from the files specified by\n\
180                            NUL-terminated names in file F;\n\
181                            If F is - then read names from standard input\n\
182   -L, --max-line-length  print the maximum display width\n\
183   -w, --words            print the word counts\n\
184 "), stdout);
185       fputs (_("\
186       --total=WHEN       when to print a line with total counts;\n\
187                            WHEN can be: auto, always, only, never\n\
188 "), stdout);
189       fputs (HELP_OPTION_DESCRIPTION, stdout);
190       fputs (VERSION_OPTION_DESCRIPTION, stdout);
191       emit_ancillary_info (PROGRAM_NAME);
192     }
193   exit (status);
194 }
195 
196 /* Return non zero if a non breaking space.  */
197 ATTRIBUTE_PURE
198 static int
iswnbspace(wint_t wc)199 iswnbspace (wint_t wc)
200 {
201   return ! posixly_correct
202          && (wc == 0x00A0 || wc == 0x2007
203              || wc == 0x202F || wc == 0x2060);
204 }
205 
206 /* FILE is the name of the file (or null for standard input)
207    associated with the specified counters.  */
208 static void
write_counts(uintmax_t lines,uintmax_t words,uintmax_t chars,uintmax_t bytes,intmax_t linelength,char const * file)209 write_counts (uintmax_t lines,
210               uintmax_t words,
211               uintmax_t chars,
212               uintmax_t bytes,
213               intmax_t linelength,
214               char const *file)
215 {
216   static char const format_sp_int[] = " %*s";
217   char const *format_int = format_sp_int + 1;
218   char buf[MAX (INT_BUFSIZE_BOUND (intmax_t),
219                 INT_BUFSIZE_BOUND (uintmax_t))];
220 
221   if (print_lines)
222     {
223       printf (format_int, number_width, umaxtostr (lines, buf));
224       format_int = format_sp_int;
225     }
226   if (print_words)
227     {
228       printf (format_int, number_width, umaxtostr (words, buf));
229       format_int = format_sp_int;
230     }
231   if (print_chars)
232     {
233       printf (format_int, number_width, umaxtostr (chars, buf));
234       format_int = format_sp_int;
235     }
236   if (print_bytes)
237     {
238       printf (format_int, number_width, umaxtostr (bytes, buf));
239       format_int = format_sp_int;
240     }
241   if (print_linelength)
242     printf (format_int, number_width, imaxtostr (linelength, buf));
243   if (file)
244     printf (" %s", strchr (file, '\n') ? quotef (file) : file);
245   putchar ('\n');
246 }
247 
248 /* Read FD and return a summary.  */
249 static struct wc_lines
wc_lines(int fd)250 wc_lines (int fd)
251 {
252 #ifdef USE_AVX2_WC_LINECOUNT
253   static signed char use_avx2;
254   if (!use_avx2)
255     use_avx2 = avx2_supported () ? 1 : -1;
256   if (0 < use_avx2)
257     return wc_lines_avx2 (fd);
258 #endif
259 
260   intmax_t lines = 0, bytes = 0;
261   bool long_lines = false;
262 
263   while (true)
264     {
265       char buf[BUFFER_SIZE + 1];
266       ssize_t bytes_read = read (fd, buf, BUFFER_SIZE);
267       if (bytes_read <= 0)
268         return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };
269 
270       bytes += bytes_read;
271       char *end = buf + bytes_read;
272       idx_t buflines = 0;
273 
274       if (! long_lines)
275         {
276           /* Avoid function call overhead for shorter lines.  */
277           for (char *p = buf; p < end; p++)
278             buflines += *p == '\n';
279         }
280       else
281         {
282           /* rawmemchr is more efficient with longer lines.  */
283           *end = '\n';
284           for (char *p = buf; (p = rawmemchr (p, '\n')) < end; p++)
285             buflines++;
286         }
287 
288       /* If the average line length in the block is >= 15, then use
289           memchr for the next block, where system specific optimizations
290           may outweigh function call overhead.
291           FIXME: This line length was determined in 2015, on both
292           x86_64 and ppc64, but it's worth re-evaluating in future with
293           newer compilers, CPUs, or memchr() implementations etc.  */
294       long_lines = 15 * buflines <= bytes_read;
295       lines += buflines;
296     }
297 }
298 
299 /* Count words.  FILE_X is the name of the file (or null for standard
300    input) that is open on descriptor FD.  *FSTATUS is its status.
301    CURRENT_POS is the current file offset if known, negative if unknown.
302    Return true if successful.  */
303 static bool
wc(int fd,char const * file_x,struct fstatus * fstatus,off_t current_pos)304 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
305 {
306   int err = 0;
307   char buf[BUFFER_SIZE + 1];
308   intmax_t lines, words, chars, bytes, linelength;
309   bool count_bytes, count_chars, count_complicated;
310   char const *file = file_x ? file_x : _("standard input");
311 
312   lines = words = chars = bytes = linelength = 0;
313 
314   /* If in the current locale, chars are equivalent to bytes, we prefer
315      counting bytes, because that's easier.  */
316   if (MB_CUR_MAX > 1)
317     {
318       count_bytes = print_bytes;
319       count_chars = print_chars;
320     }
321   else
322     {
323       count_bytes = print_bytes || print_chars;
324       count_chars = false;
325     }
326   count_complicated = print_words || print_linelength;
327 
328   /* Advise the kernel of our access pattern only if we will read().  */
329   if (!count_bytes || count_chars || print_lines || count_complicated)
330     fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
331 
332   /* When counting only bytes, save some line- and word-counting
333      overhead.  If FD is a 'regular' Unix file, using lseek is enough
334      to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
335      bytes at a time until EOF.  Note that the 'size' (number of bytes)
336      that wc reports is smaller than stats.st_size when the file is not
337      positioned at its beginning.  That's why the lseek calls below are
338      necessary.  For example the command
339      '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
340      should make wc report '0' bytes.  */
341 
342   if (count_bytes && !count_chars && !print_lines && !count_complicated)
343     {
344       bool skip_read = false;
345 
346       if (0 < fstatus->failed)
347         fstatus->failed = fstat (fd, &fstatus->st);
348 
349       /* For sized files, seek to one st_blksize before EOF rather than to EOF.
350          This works better for files in proc-like file systems where
351          the size is only approximate.  */
352       if (! fstatus->failed && usable_st_size (&fstatus->st)
353           && 0 <= fstatus->st.st_size)
354         {
355           off_t end_pos = fstatus->st.st_size;
356           if (current_pos < 0)
357             current_pos = lseek (fd, 0, SEEK_CUR);
358 
359           if (end_pos % page_size)
360             {
361               /* We only need special handling of /proc and /sys files etc.
362                  when they're a multiple of PAGE_SIZE.  In the common case
363                  for files with st_size not a multiple of PAGE_SIZE,
364                  it's more efficient and accurate to use st_size.
365 
366                  Be careful here.  The current position may actually be
367                  beyond the end of the file.  As in the example above.  */
368 
369               bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
370               if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
371                 skip_read = true;
372               else
373                 bytes = 0;
374             }
375           else
376             {
377               off_t hi_pos = (end_pos
378                               - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
379               if (0 <= current_pos && current_pos < hi_pos
380                   && 0 <= lseek (fd, hi_pos, SEEK_CUR))
381                 bytes = hi_pos - current_pos;
382             }
383         }
384 
385       if (! skip_read)
386         {
387           fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
388           for (ssize_t bytes_read;
389                (bytes_read = read (fd, buf, BUFFER_SIZE));
390                bytes += bytes_read)
391             if (bytes_read < 0)
392               {
393                 err = errno;
394                 break;
395               }
396         }
397     }
398   else if (!count_chars && !count_complicated)
399     {
400       /* Use a separate loop when counting only lines or lines and bytes --
401          but not chars or words.  */
402       struct wc_lines w = wc_lines (fd);
403       err = w.err;
404       lines = w.lines;
405       bytes = w.bytes;
406     }
407   else if (MB_CUR_MAX > 1)
408     {
409       bool in_word = false;
410       intmax_t linepos = 0;
411       mbstate_t state; mbszero (&state);
412       bool in_shift = false;
413       idx_t prev = 0; /* Number of bytes carried over from previous round.  */
414 
415       for (ssize_t bytes_read;
416            ((bytes_read = read (fd, buf + prev, BUFFER_SIZE - prev))
417             || prev);
418            )
419         {
420           if (bytes_read < 0)
421             {
422               err = errno;
423               break;
424             }
425 
426           bytes += bytes_read;
427           char const *p = buf;
428           char const *plim = p + prev + bytes_read;
429           do
430             {
431               char32_t wide_char;
432               idx_t charbytes;
433               bool single_byte;
434 
435               if (!in_shift && 0 <= *p && *p < 0x80)
436                 {
437                   /* Handle most ASCII characters quickly, without calling
438                      mbrtoc32.  */
439                   charbytes = 1;
440                   wide_char = *p;
441                   single_byte = true;
442                 }
443               else
444                 {
445                   idx_t scanbytes = plim - (p + prev);
446                   size_t n = mbrtoc32 (&wide_char, p + prev, scanbytes, &state);
447                   prev = 0;
448 
449                   if (scanbytes < n)
450                     {
451                       if (n == (size_t) -2 && plim - p < BUFFER_SIZE
452                           && bytes_read)
453                         {
454                           /* An incomplete character that is not ridiculously
455                              long and there may be more input.  Move the bytes
456                              to buffer start and prepare to read more data.  */
457                           prev = plim - p;
458                           memmove (buf, p, prev);
459                           in_shift = true;
460                           break;
461                         }
462 
463                       /* Remember that we read a byte, but don't complain
464                          about the error.  Because of the decoding error,
465                          this is a considered to be byte but not a
466                          character (that is, chars is not incremented).  */
467                       p++;
468                       mbszero (&state);
469                       in_shift = false;
470 
471                       /* Treat encoding errors as non white space.
472                          POSIX says a word is "a non-zero-length string of
473                          characters delimited by white space".  This is
474                          wrong in some sense, as the string can be delimited
475                          by start or end of input, and it is unclear what it
476                          means when the input contains encoding errors.
477                          Since encoding errors are not white space,
478                          treat them that way here.  */
479                       words += !in_word;
480                       in_word = true;
481                       continue;
482                     }
483 
484                   charbytes = n + !n;
485                   single_byte = charbytes == !in_shift;
486                   in_shift = !mbsinit (&state);
487                 }
488 
489               switch (wide_char)
490                 {
491                 case '\n':
492                   lines++;
493                   FALLTHROUGH;
494                 case '\r':
495                 case '\f':
496                   if (linepos > linelength)
497                     linelength = linepos;
498                   linepos = 0;
499                   in_word = false;
500                   break;
501 
502                 case '\t':
503                   linepos += 8 - (linepos % 8);
504                   in_word = false;
505                   break;
506 
507                 case ' ':
508                   linepos++;
509                   FALLTHROUGH;
510                 case '\v':
511                   in_word = false;
512                   break;
513 
514                 default:;
515                   bool in_word2;
516                   if (single_byte)
517                     {
518                       linepos += wc_isprint[wide_char];
519                       in_word2 = !wc_isspace[wide_char];
520                     }
521                   else
522                     {
523                       /* c32width can be expensive on macOS for example,
524                          so avoid if not needed.  */
525                       if (print_linelength)
526                         {
527                           int width = c32width (wide_char);
528                           if (width > 0)
529                             linepos += width;
530                         }
531                       in_word2 = !iswnbspace (wide_char);
532                     }
533 
534                   /* Count words by counting word starts, i.e., each
535                      white space character (or the start of input)
536                      followed by non white space.  */
537                   words += !in_word & in_word2;
538                   in_word = in_word2;
539                   break;
540                 }
541 
542               p += charbytes;
543               chars++;
544             }
545           while (p < plim);
546         }
547       if (linepos > linelength)
548         linelength = linepos;
549     }
550   else
551     {
552       bool in_word = false;
553       intmax_t linepos = 0;
554 
555       for (ssize_t bytes_read; (bytes_read = read (fd, buf, BUFFER_SIZE)); )
556         {
557           if (bytes_read < 0)
558             {
559               err = errno;
560               break;
561             }
562 
563           bytes += bytes_read;
564           char const *p = buf;
565           do
566             {
567               unsigned char c = *p++;
568               switch (c)
569                 {
570                 case '\n':
571                   lines++;
572                   FALLTHROUGH;
573                 case '\r':
574                 case '\f':
575                   if (linepos > linelength)
576                     linelength = linepos;
577                   linepos = 0;
578                   in_word = false;
579                   break;
580 
581                 case '\t':
582                   linepos += 8 - (linepos % 8);
583                   in_word = false;
584                   break;
585 
586                 case ' ':
587                   linepos++;
588                   FALLTHROUGH;
589                 case '\v':
590                   in_word = false;
591                   break;
592 
593                 default:
594                   linepos += wc_isprint[c];
595                   bool in_word2 = !wc_isspace[c];
596                   words += !in_word & in_word2;
597                   in_word = in_word2;
598                   break;
599                 }
600             }
601           while (--bytes_read);
602         }
603       if (linepos > linelength)
604         linelength = linepos;
605     }
606 
607   if (count_chars < print_chars)
608     chars = bytes;
609 
610   if (total_mode != total_only)
611     write_counts (lines, words, chars, bytes, linelength, file_x);
612 
613   total_lines_overflow |= ckd_add (&total_lines, total_lines, lines);
614   total_words_overflow |= ckd_add (&total_words, total_words, words);
615   total_chars_overflow |= ckd_add (&total_chars, total_chars, chars);
616   total_bytes_overflow |= ckd_add (&total_bytes, total_bytes, bytes);
617 
618   if (linelength > max_line_length)
619     max_line_length = linelength;
620 
621   if (err)
622     error (0, err, "%s", quotef (file));
623   return !err;
624 }
625 
626 static bool
wc_file(char const * file,struct fstatus * fstatus)627 wc_file (char const *file, struct fstatus *fstatus)
628 {
629   if (! file || STREQ (file, "-"))
630     {
631       have_read_stdin = true;
632       xset_binary_mode (STDIN_FILENO, O_BINARY);
633       return wc (STDIN_FILENO, file, fstatus, -1);
634     }
635   else
636     {
637       int fd = open (file, O_RDONLY | O_BINARY);
638       if (fd == -1)
639         {
640           error (0, errno, "%s", quotef (file));
641           return false;
642         }
643       else
644         {
645           bool ok = wc (fd, file, fstatus, 0);
646           if (close (fd) != 0)
647             {
648               error (0, errno, "%s", quotef (file));
649               return false;
650             }
651           return ok;
652         }
653     }
654 }
655 
656 /* Return the file status for the NFILES files addressed by FILE.
657    Optimize the case where only one number is printed, for just one
658    file; in that case we can use a print width of 1, so we don't need
659    to stat the file.  Handle the case of (nfiles == 0) in the same way;
660    that happens when we don't know how long the list of file names will be.  */
661 
662 static struct fstatus *
get_input_fstatus(idx_t nfiles,char * const * file)663 get_input_fstatus (idx_t nfiles, char *const *file)
664 {
665   struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
666 
667   if (nfiles == 0
668       || (nfiles == 1
669           && ((print_lines + print_words + print_chars
670                + print_bytes + print_linelength)
671               == 1)))
672     fstatus[0].failed = 1;
673   else
674     {
675       for (idx_t i = 0; i < nfiles; i++)
676         fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
677                              ? fstat (STDIN_FILENO, &fstatus[i].st)
678                              : stat (file[i], &fstatus[i].st));
679     }
680 
681   return fstatus;
682 }
683 
684 /* Return a print width suitable for the NFILES files whose status is
685    recorded in FSTATUS.  Optimize the same special case that
686    get_input_fstatus optimizes.  */
687 
688 ATTRIBUTE_PURE
689 static int
compute_number_width(idx_t nfiles,struct fstatus const * fstatus)690 compute_number_width (idx_t nfiles, struct fstatus const *fstatus)
691 {
692   int width = 1;
693 
694   if (0 < nfiles && fstatus[0].failed <= 0)
695     {
696       int minimum_width = 1;
697       uintmax_t regular_total = 0;
698 
699       for (idx_t i = 0; i < nfiles; i++)
700         if (! fstatus[i].failed)
701           {
702             if (!S_ISREG (fstatus[i].st.st_mode))
703               minimum_width = 7;
704             else if (ckd_add (&regular_total, regular_total,
705                               fstatus[i].st.st_size))
706               {
707                 regular_total = UINTMAX_MAX;
708                 break;
709               }
710           }
711 
712       for (; 10 <= regular_total; regular_total /= 10)
713         width++;
714       if (width < minimum_width)
715         width = minimum_width;
716     }
717 
718   return width;
719 }
720 
721 
722 int
main(int argc,char ** argv)723 main (int argc, char **argv)
724 {
725   int optc;
726   idx_t nfiles;
727   char **files;
728   char *files_from = nullptr;
729   struct fstatus *fstatus;
730   struct Tokens tok;
731 
732   initialize_main (&argc, &argv);
733   set_program_name (argv[0]);
734   setlocale (LC_ALL, "");
735   bindtextdomain (PACKAGE, LOCALEDIR);
736   textdomain (PACKAGE);
737 
738   atexit (close_stdout);
739 
740   page_size = getpagesize ();
741   /* Line buffer stdout to ensure lines are written atomically and immediately
742      so that processes running in parallel do not intersperse their output.  */
743   setvbuf (stdout, nullptr, _IOLBF, 0);
744 
745   posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
746 
747   print_lines = print_words = print_chars = print_bytes = false;
748   print_linelength = false;
749   total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
750 
751   while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
752     switch (optc)
753       {
754       case 'c':
755         print_bytes = true;
756         break;
757 
758       case 'm':
759         print_chars = true;
760         break;
761 
762       case 'l':
763         print_lines = true;
764         break;
765 
766       case 'w':
767         print_words = true;
768         break;
769 
770       case 'L':
771         print_linelength = true;
772         break;
773 
774       case DEBUG_PROGRAM_OPTION:
775         debug = true;
776         break;
777 
778       case FILES0_FROM_OPTION:
779         files_from = optarg;
780         break;
781 
782       case TOTAL_OPTION:
783         total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
784         break;
785 
786       case_GETOPT_HELP_CHAR;
787 
788       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
789 
790       default:
791         usage (EXIT_FAILURE);
792       }
793 
794   if (! (print_lines || print_words || print_chars || print_bytes
795          || print_linelength))
796     print_lines = print_words = print_bytes = true;
797 
798   if (print_linelength)
799     for (int i = 0; i <= UCHAR_MAX; i++)
800       wc_isprint[i] = !!isprint (i);
801   if (print_words)
802     for (int i = 0; i <= UCHAR_MAX; i++)
803       wc_isspace[i] = isspace (i) || iswnbspace (btoc32 (i));
804 
805   bool read_tokens = false;
806   struct argv_iterator *ai;
807   if (files_from)
808     {
809       FILE *stream;
810 
811       /* When using --files0-from=F, you may not specify any files
812          on the command-line.  */
813       if (optind < argc)
814         {
815           error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
816           fprintf (stderr, "%s\n",
817                    _("file operands cannot be combined with --files0-from"));
818           usage (EXIT_FAILURE);
819         }
820 
821       if (STREQ (files_from, "-"))
822         stream = stdin;
823       else
824         {
825           stream = fopen (files_from, "r");
826           if (stream == nullptr)
827             error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
828                    quoteaf (files_from));
829         }
830 
831       /* Read the file list into RAM if we can detect its size and that
832          size is reasonable.  Otherwise, we'll read a name at a time.  */
833       struct stat st;
834       if (fstat (fileno (stream), &st) == 0
835           && S_ISREG (st.st_mode)
836           && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
837         {
838           read_tokens = true;
839           readtokens0_init (&tok);
840           if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
841             error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
842                    quoteaf (files_from));
843           files = tok.tok;
844           nfiles = tok.n_tok;
845           ai = argv_iter_init_argv (files);
846         }
847       else
848         {
849           files = nullptr;
850           nfiles = 0;
851           ai = argv_iter_init_stream (stream);
852         }
853     }
854   else
855     {
856       static char *stdin_only[] = { nullptr };
857       files = (optind < argc ? argv + optind : stdin_only);
858       nfiles = (optind < argc ? argc - optind : 1);
859       ai = argv_iter_init_argv (files);
860     }
861 
862   if (!ai)
863     xalloc_die ();
864 
865   fstatus = get_input_fstatus (nfiles, files);
866   if (total_mode == total_only)
867     number_width = 1;  /* No extra padding, since no alignment requirement.  */
868   else
869     number_width = compute_number_width (nfiles, fstatus);
870 
871   bool ok = true;
872   enum argv_iter_err ai_err;
873   char *file_name;
874   for (int i = 0; (file_name = argv_iter (ai, &ai_err)); i++)
875     {
876       bool skip_file = false;
877       if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
878         {
879           /* Give a better diagnostic in an unusual case:
880              printf - | wc --files0-from=- */
881           error (0, 0, _("when reading file names from stdin, "
882                          "no file name of %s allowed"),
883                  quoteaf (file_name));
884           skip_file = true;
885         }
886 
887       if (!file_name[0])
888         {
889           /* Diagnose a zero-length file name.  When it's one
890              among many, knowing the record number may help.
891              FIXME: currently print the record number only with
892              --files0-from=FILE.  Maybe do it for argv, too?  */
893           if (files_from == nullptr)
894             error (0, 0, "%s", _("invalid zero-length file name"));
895           else
896             {
897               /* Using the standard 'filename:line-number:' prefix here is
898                  not totally appropriate, since NUL is the separator, not NL,
899                  but it might be better than nothing.  */
900               error (0, 0, "%s:%zu: %s", quotef (files_from),
901                      argv_iter_n_args (ai), _("invalid zero-length file name"));
902             }
903           skip_file = true;
904         }
905 
906       if (skip_file)
907         ok = false;
908       else
909         ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
910 
911       if (! nfiles)
912         fstatus[0].failed = 1;
913     }
914   switch (ai_err)
915     {
916     case AI_ERR_EOF:
917       break;
918 
919     case AI_ERR_READ:
920       error (0, errno, _("%s: read error"), quotef (files_from));
921       ok = false;
922       break;
923 
924     case AI_ERR_MEM:
925       xalloc_die ();
926 
927     default:
928       unreachable ();
929     }
930 
931   /* No arguments on the command line is fine.  That means read from stdin.
932      However, no arguments on the --files0-from input stream is an error
933      means don't read anything.  */
934   if (ok && !files_from && argv_iter_n_args (ai) == 0)
935     ok &= wc_file (nullptr, &fstatus[0]);
936 
937   if (read_tokens)
938     readtokens0_free (&tok);
939 
940   if (total_mode != total_never
941       && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
942     {
943       if (total_lines_overflow)
944         {
945           total_lines = UINTMAX_MAX;
946           error (0, EOVERFLOW, _("total lines"));
947           ok = false;
948         }
949       if (total_words_overflow)
950         {
951           total_words = UINTMAX_MAX;
952           error (0, EOVERFLOW, _("total words"));
953           ok = false;
954         }
955       if (total_chars_overflow)
956         {
957           total_chars = UINTMAX_MAX;
958           error (0, EOVERFLOW, _("total characters"));
959           ok = false;
960         }
961       if (total_bytes_overflow)
962         {
963           total_bytes = UINTMAX_MAX;
964           error (0, EOVERFLOW, _("total bytes"));
965           ok = false;
966         }
967 
968       write_counts (total_lines, total_words, total_chars, total_bytes,
969                     max_line_length,
970                     total_mode != total_only ? _("total") : nullptr);
971     }
972 
973   argv_iter_free (ai);
974 
975   free (fstatus);
976 
977   if (have_read_stdin && close (STDIN_FILENO) != 0)
978     error (EXIT_FAILURE, errno, "-");
979 
980   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
981 }
982