1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
19
20 #include <config.h>
21
22 #include <stdckdint.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <uchar.h>
27
28 #include <argmatch.h>
29 #include <argv-iter.h>
30 #include <fadvise.h>
31 #include <physmem.h>
32 #include <readtokens0.h>
33 #include <stat-size.h>
34 #include <xbinary-io.h>
35
36 #include "system.h"
37 #include "wc.h"
38
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "wc"
41
42 #define AUTHORS \
43 proper_name ("Paul Rubin"), \
44 proper_name ("David MacKenzie")
45
46 /* Size of atomic reads. */
47 #define BUFFER_SIZE (16 * 1024)
48
49 static bool wc_isprint[UCHAR_MAX + 1];
50 static bool wc_isspace[UCHAR_MAX + 1];
51
52 static bool debug;
53
54 /* Cumulative number of lines, words, chars and bytes in all files so far.
55 max_line_length is the maximum over all files processed so far. */
56 static uintmax_t total_lines;
57 static uintmax_t total_words;
58 static uintmax_t total_chars;
59 static uintmax_t total_bytes;
60 static bool total_lines_overflow;
61 static bool total_words_overflow;
62 static bool total_chars_overflow;
63 static bool total_bytes_overflow;
64 static intmax_t max_line_length;
65
66 /* Which counts to print. */
67 static bool print_lines, print_words, print_chars, print_bytes;
68 static bool print_linelength;
69
70 /* The print width of each count. */
71 static int number_width;
72
73 /* True if we have ever read the standard input. */
74 static bool have_read_stdin;
75
76 /* Used to determine if file size can be determined without reading. */
77 static idx_t page_size;
78
79 /* Enable to _not_ treat non breaking space as a word separator. */
80 static bool posixly_correct;
81
82 /* The result of calling fstat or stat on a file descriptor or file. */
83 struct fstatus
84 {
85 /* If positive, fstat or stat has not been called yet. Otherwise,
86 this is the value returned from fstat or stat. */
87 int failed;
88
89 /* If FAILED is zero, this is the file's status. */
90 struct stat st;
91 };
92
93 /* For long options that have no equivalent short option, use a
94 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
95 enum
96 {
97 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
98 FILES0_FROM_OPTION,
99 TOTAL_OPTION,
100 };
101
102 static struct option const longopts[] =
103 {
104 {"bytes", no_argument, nullptr, 'c'},
105 {"chars", no_argument, nullptr, 'm'},
106 {"lines", no_argument, nullptr, 'l'},
107 {"words", no_argument, nullptr, 'w'},
108 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
109 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
110 {"max-line-length", no_argument, nullptr, 'L'},
111 {"total", required_argument, nullptr, TOTAL_OPTION},
112 {GETOPT_HELP_OPTION_DECL},
113 {GETOPT_VERSION_OPTION_DECL},
114 {nullptr, 0, nullptr, 0}
115 };
116
117 enum total_type
118 {
119 total_auto, /* 0: default or --total=auto */
120 total_always, /* 1: --total=always */
121 total_only, /* 2: --total=only */
122 total_never /* 3: --total=never */
123 };
124 static char const *const total_args[] =
125 {
126 "auto", "always", "only", "never", nullptr
127 };
128 static enum total_type const total_types[] =
129 {
130 total_auto, total_always, total_only, total_never
131 };
132 ARGMATCH_VERIFY (total_args, total_types);
133 static enum total_type total_mode = total_auto;
134
135 #ifdef USE_AVX2_WC_LINECOUNT
136 static bool
avx2_supported(void)137 avx2_supported (void)
138 {
139 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
140
141 if (debug)
142 error (0, 0, (avx_enabled
143 ? _("using avx2 hardware support")
144 : _("avx2 support not detected")));
145
146 return avx_enabled;
147 }
148 #endif
149
150 void
usage(int status)151 usage (int status)
152 {
153 if (status != EXIT_SUCCESS)
154 emit_try_help ();
155 else
156 {
157 printf (_("\
158 Usage: %s [OPTION]... [FILE]...\n\
159 or: %s [OPTION]... --files0-from=F\n\
160 "),
161 program_name, program_name);
162 fputs (_("\
163 Print newline, word, and byte counts for each FILE, and a total line if\n\
164 more than one FILE is specified. A word is a nonempty sequence of non white\n\
165 space delimited by white space characters or by start or end of input.\n\
166 "), stdout);
167
168 emit_stdin_note ();
169
170 fputs (_("\
171 \n\
172 The options below may be used to select which counts are printed, always in\n\
173 the following order: newline, word, character, byte, maximum line length.\n\
174 -c, --bytes print the byte counts\n\
175 -m, --chars print the character counts\n\
176 -l, --lines print the newline counts\n\
177 "), stdout);
178 fputs (_("\
179 --files0-from=F read input from the files specified by\n\
180 NUL-terminated names in file F;\n\
181 If F is - then read names from standard input\n\
182 -L, --max-line-length print the maximum display width\n\
183 -w, --words print the word counts\n\
184 "), stdout);
185 fputs (_("\
186 --total=WHEN when to print a line with total counts;\n\
187 WHEN can be: auto, always, only, never\n\
188 "), stdout);
189 fputs (HELP_OPTION_DESCRIPTION, stdout);
190 fputs (VERSION_OPTION_DESCRIPTION, stdout);
191 emit_ancillary_info (PROGRAM_NAME);
192 }
193 exit (status);
194 }
195
196 /* Return non zero if a non breaking space. */
197 ATTRIBUTE_PURE
198 static int
iswnbspace(wint_t wc)199 iswnbspace (wint_t wc)
200 {
201 return ! posixly_correct
202 && (wc == 0x00A0 || wc == 0x2007
203 || wc == 0x202F || wc == 0x2060);
204 }
205
206 /* FILE is the name of the file (or null for standard input)
207 associated with the specified counters. */
208 static void
write_counts(uintmax_t lines,uintmax_t words,uintmax_t chars,uintmax_t bytes,intmax_t linelength,char const * file)209 write_counts (uintmax_t lines,
210 uintmax_t words,
211 uintmax_t chars,
212 uintmax_t bytes,
213 intmax_t linelength,
214 char const *file)
215 {
216 static char const format_sp_int[] = " %*s";
217 char const *format_int = format_sp_int + 1;
218 char buf[MAX (INT_BUFSIZE_BOUND (intmax_t),
219 INT_BUFSIZE_BOUND (uintmax_t))];
220
221 if (print_lines)
222 {
223 printf (format_int, number_width, umaxtostr (lines, buf));
224 format_int = format_sp_int;
225 }
226 if (print_words)
227 {
228 printf (format_int, number_width, umaxtostr (words, buf));
229 format_int = format_sp_int;
230 }
231 if (print_chars)
232 {
233 printf (format_int, number_width, umaxtostr (chars, buf));
234 format_int = format_sp_int;
235 }
236 if (print_bytes)
237 {
238 printf (format_int, number_width, umaxtostr (bytes, buf));
239 format_int = format_sp_int;
240 }
241 if (print_linelength)
242 printf (format_int, number_width, imaxtostr (linelength, buf));
243 if (file)
244 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
245 putchar ('\n');
246 }
247
248 /* Read FD and return a summary. */
249 static struct wc_lines
wc_lines(int fd)250 wc_lines (int fd)
251 {
252 #ifdef USE_AVX2_WC_LINECOUNT
253 static signed char use_avx2;
254 if (!use_avx2)
255 use_avx2 = avx2_supported () ? 1 : -1;
256 if (0 < use_avx2)
257 return wc_lines_avx2 (fd);
258 #endif
259
260 intmax_t lines = 0, bytes = 0;
261 bool long_lines = false;
262
263 while (true)
264 {
265 char buf[BUFFER_SIZE + 1];
266 ssize_t bytes_read = read (fd, buf, BUFFER_SIZE);
267 if (bytes_read <= 0)
268 return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };
269
270 bytes += bytes_read;
271 char *end = buf + bytes_read;
272 idx_t buflines = 0;
273
274 if (! long_lines)
275 {
276 /* Avoid function call overhead for shorter lines. */
277 for (char *p = buf; p < end; p++)
278 buflines += *p == '\n';
279 }
280 else
281 {
282 /* rawmemchr is more efficient with longer lines. */
283 *end = '\n';
284 for (char *p = buf; (p = rawmemchr (p, '\n')) < end; p++)
285 buflines++;
286 }
287
288 /* If the average line length in the block is >= 15, then use
289 memchr for the next block, where system specific optimizations
290 may outweigh function call overhead.
291 FIXME: This line length was determined in 2015, on both
292 x86_64 and ppc64, but it's worth re-evaluating in future with
293 newer compilers, CPUs, or memchr() implementations etc. */
294 long_lines = 15 * buflines <= bytes_read;
295 lines += buflines;
296 }
297 }
298
299 /* Count words. FILE_X is the name of the file (or null for standard
300 input) that is open on descriptor FD. *FSTATUS is its status.
301 CURRENT_POS is the current file offset if known, negative if unknown.
302 Return true if successful. */
303 static bool
wc(int fd,char const * file_x,struct fstatus * fstatus,off_t current_pos)304 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
305 {
306 int err = 0;
307 char buf[BUFFER_SIZE + 1];
308 intmax_t lines, words, chars, bytes, linelength;
309 bool count_bytes, count_chars, count_complicated;
310 char const *file = file_x ? file_x : _("standard input");
311
312 lines = words = chars = bytes = linelength = 0;
313
314 /* If in the current locale, chars are equivalent to bytes, we prefer
315 counting bytes, because that's easier. */
316 if (MB_CUR_MAX > 1)
317 {
318 count_bytes = print_bytes;
319 count_chars = print_chars;
320 }
321 else
322 {
323 count_bytes = print_bytes || print_chars;
324 count_chars = false;
325 }
326 count_complicated = print_words || print_linelength;
327
328 /* Advise the kernel of our access pattern only if we will read(). */
329 if (!count_bytes || count_chars || print_lines || count_complicated)
330 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
331
332 /* When counting only bytes, save some line- and word-counting
333 overhead. If FD is a 'regular' Unix file, using lseek is enough
334 to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
335 bytes at a time until EOF. Note that the 'size' (number of bytes)
336 that wc reports is smaller than stats.st_size when the file is not
337 positioned at its beginning. That's why the lseek calls below are
338 necessary. For example the command
339 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
340 should make wc report '0' bytes. */
341
342 if (count_bytes && !count_chars && !print_lines && !count_complicated)
343 {
344 bool skip_read = false;
345
346 if (0 < fstatus->failed)
347 fstatus->failed = fstat (fd, &fstatus->st);
348
349 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
350 This works better for files in proc-like file systems where
351 the size is only approximate. */
352 if (! fstatus->failed && usable_st_size (&fstatus->st)
353 && 0 <= fstatus->st.st_size)
354 {
355 off_t end_pos = fstatus->st.st_size;
356 if (current_pos < 0)
357 current_pos = lseek (fd, 0, SEEK_CUR);
358
359 if (end_pos % page_size)
360 {
361 /* We only need special handling of /proc and /sys files etc.
362 when they're a multiple of PAGE_SIZE. In the common case
363 for files with st_size not a multiple of PAGE_SIZE,
364 it's more efficient and accurate to use st_size.
365
366 Be careful here. The current position may actually be
367 beyond the end of the file. As in the example above. */
368
369 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
370 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
371 skip_read = true;
372 else
373 bytes = 0;
374 }
375 else
376 {
377 off_t hi_pos = (end_pos
378 - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
379 if (0 <= current_pos && current_pos < hi_pos
380 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
381 bytes = hi_pos - current_pos;
382 }
383 }
384
385 if (! skip_read)
386 {
387 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
388 for (ssize_t bytes_read;
389 (bytes_read = read (fd, buf, BUFFER_SIZE));
390 bytes += bytes_read)
391 if (bytes_read < 0)
392 {
393 err = errno;
394 break;
395 }
396 }
397 }
398 else if (!count_chars && !count_complicated)
399 {
400 /* Use a separate loop when counting only lines or lines and bytes --
401 but not chars or words. */
402 struct wc_lines w = wc_lines (fd);
403 err = w.err;
404 lines = w.lines;
405 bytes = w.bytes;
406 }
407 else if (MB_CUR_MAX > 1)
408 {
409 bool in_word = false;
410 intmax_t linepos = 0;
411 mbstate_t state; mbszero (&state);
412 bool in_shift = false;
413 idx_t prev = 0; /* Number of bytes carried over from previous round. */
414
415 for (ssize_t bytes_read;
416 ((bytes_read = read (fd, buf + prev, BUFFER_SIZE - prev))
417 || prev);
418 )
419 {
420 if (bytes_read < 0)
421 {
422 err = errno;
423 break;
424 }
425
426 bytes += bytes_read;
427 char const *p = buf;
428 char const *plim = p + prev + bytes_read;
429 do
430 {
431 char32_t wide_char;
432 idx_t charbytes;
433 bool single_byte;
434
435 if (!in_shift && 0 <= *p && *p < 0x80)
436 {
437 /* Handle most ASCII characters quickly, without calling
438 mbrtoc32. */
439 charbytes = 1;
440 wide_char = *p;
441 single_byte = true;
442 }
443 else
444 {
445 idx_t scanbytes = plim - (p + prev);
446 size_t n = mbrtoc32 (&wide_char, p + prev, scanbytes, &state);
447 prev = 0;
448
449 if (scanbytes < n)
450 {
451 if (n == (size_t) -2 && plim - p < BUFFER_SIZE
452 && bytes_read)
453 {
454 /* An incomplete character that is not ridiculously
455 long and there may be more input. Move the bytes
456 to buffer start and prepare to read more data. */
457 prev = plim - p;
458 memmove (buf, p, prev);
459 in_shift = true;
460 break;
461 }
462
463 /* Remember that we read a byte, but don't complain
464 about the error. Because of the decoding error,
465 this is a considered to be byte but not a
466 character (that is, chars is not incremented). */
467 p++;
468 mbszero (&state);
469 in_shift = false;
470
471 /* Treat encoding errors as non white space.
472 POSIX says a word is "a non-zero-length string of
473 characters delimited by white space". This is
474 wrong in some sense, as the string can be delimited
475 by start or end of input, and it is unclear what it
476 means when the input contains encoding errors.
477 Since encoding errors are not white space,
478 treat them that way here. */
479 words += !in_word;
480 in_word = true;
481 continue;
482 }
483
484 charbytes = n + !n;
485 single_byte = charbytes == !in_shift;
486 in_shift = !mbsinit (&state);
487 }
488
489 switch (wide_char)
490 {
491 case '\n':
492 lines++;
493 FALLTHROUGH;
494 case '\r':
495 case '\f':
496 if (linepos > linelength)
497 linelength = linepos;
498 linepos = 0;
499 in_word = false;
500 break;
501
502 case '\t':
503 linepos += 8 - (linepos % 8);
504 in_word = false;
505 break;
506
507 case ' ':
508 linepos++;
509 FALLTHROUGH;
510 case '\v':
511 in_word = false;
512 break;
513
514 default:;
515 bool in_word2;
516 if (single_byte)
517 {
518 linepos += wc_isprint[wide_char];
519 in_word2 = !wc_isspace[wide_char];
520 }
521 else
522 {
523 /* c32width can be expensive on macOS for example,
524 so avoid if not needed. */
525 if (print_linelength)
526 {
527 int width = c32width (wide_char);
528 if (width > 0)
529 linepos += width;
530 }
531 in_word2 = !iswnbspace (wide_char);
532 }
533
534 /* Count words by counting word starts, i.e., each
535 white space character (or the start of input)
536 followed by non white space. */
537 words += !in_word & in_word2;
538 in_word = in_word2;
539 break;
540 }
541
542 p += charbytes;
543 chars++;
544 }
545 while (p < plim);
546 }
547 if (linepos > linelength)
548 linelength = linepos;
549 }
550 else
551 {
552 bool in_word = false;
553 intmax_t linepos = 0;
554
555 for (ssize_t bytes_read; (bytes_read = read (fd, buf, BUFFER_SIZE)); )
556 {
557 if (bytes_read < 0)
558 {
559 err = errno;
560 break;
561 }
562
563 bytes += bytes_read;
564 char const *p = buf;
565 do
566 {
567 unsigned char c = *p++;
568 switch (c)
569 {
570 case '\n':
571 lines++;
572 FALLTHROUGH;
573 case '\r':
574 case '\f':
575 if (linepos > linelength)
576 linelength = linepos;
577 linepos = 0;
578 in_word = false;
579 break;
580
581 case '\t':
582 linepos += 8 - (linepos % 8);
583 in_word = false;
584 break;
585
586 case ' ':
587 linepos++;
588 FALLTHROUGH;
589 case '\v':
590 in_word = false;
591 break;
592
593 default:
594 linepos += wc_isprint[c];
595 bool in_word2 = !wc_isspace[c];
596 words += !in_word & in_word2;
597 in_word = in_word2;
598 break;
599 }
600 }
601 while (--bytes_read);
602 }
603 if (linepos > linelength)
604 linelength = linepos;
605 }
606
607 if (count_chars < print_chars)
608 chars = bytes;
609
610 if (total_mode != total_only)
611 write_counts (lines, words, chars, bytes, linelength, file_x);
612
613 total_lines_overflow |= ckd_add (&total_lines, total_lines, lines);
614 total_words_overflow |= ckd_add (&total_words, total_words, words);
615 total_chars_overflow |= ckd_add (&total_chars, total_chars, chars);
616 total_bytes_overflow |= ckd_add (&total_bytes, total_bytes, bytes);
617
618 if (linelength > max_line_length)
619 max_line_length = linelength;
620
621 if (err)
622 error (0, err, "%s", quotef (file));
623 return !err;
624 }
625
626 static bool
wc_file(char const * file,struct fstatus * fstatus)627 wc_file (char const *file, struct fstatus *fstatus)
628 {
629 if (! file || STREQ (file, "-"))
630 {
631 have_read_stdin = true;
632 xset_binary_mode (STDIN_FILENO, O_BINARY);
633 return wc (STDIN_FILENO, file, fstatus, -1);
634 }
635 else
636 {
637 int fd = open (file, O_RDONLY | O_BINARY);
638 if (fd == -1)
639 {
640 error (0, errno, "%s", quotef (file));
641 return false;
642 }
643 else
644 {
645 bool ok = wc (fd, file, fstatus, 0);
646 if (close (fd) != 0)
647 {
648 error (0, errno, "%s", quotef (file));
649 return false;
650 }
651 return ok;
652 }
653 }
654 }
655
656 /* Return the file status for the NFILES files addressed by FILE.
657 Optimize the case where only one number is printed, for just one
658 file; in that case we can use a print width of 1, so we don't need
659 to stat the file. Handle the case of (nfiles == 0) in the same way;
660 that happens when we don't know how long the list of file names will be. */
661
662 static struct fstatus *
get_input_fstatus(idx_t nfiles,char * const * file)663 get_input_fstatus (idx_t nfiles, char *const *file)
664 {
665 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
666
667 if (nfiles == 0
668 || (nfiles == 1
669 && ((print_lines + print_words + print_chars
670 + print_bytes + print_linelength)
671 == 1)))
672 fstatus[0].failed = 1;
673 else
674 {
675 for (idx_t i = 0; i < nfiles; i++)
676 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
677 ? fstat (STDIN_FILENO, &fstatus[i].st)
678 : stat (file[i], &fstatus[i].st));
679 }
680
681 return fstatus;
682 }
683
684 /* Return a print width suitable for the NFILES files whose status is
685 recorded in FSTATUS. Optimize the same special case that
686 get_input_fstatus optimizes. */
687
688 ATTRIBUTE_PURE
689 static int
compute_number_width(idx_t nfiles,struct fstatus const * fstatus)690 compute_number_width (idx_t nfiles, struct fstatus const *fstatus)
691 {
692 int width = 1;
693
694 if (0 < nfiles && fstatus[0].failed <= 0)
695 {
696 int minimum_width = 1;
697 uintmax_t regular_total = 0;
698
699 for (idx_t i = 0; i < nfiles; i++)
700 if (! fstatus[i].failed)
701 {
702 if (!S_ISREG (fstatus[i].st.st_mode))
703 minimum_width = 7;
704 else if (ckd_add (®ular_total, regular_total,
705 fstatus[i].st.st_size))
706 {
707 regular_total = UINTMAX_MAX;
708 break;
709 }
710 }
711
712 for (; 10 <= regular_total; regular_total /= 10)
713 width++;
714 if (width < minimum_width)
715 width = minimum_width;
716 }
717
718 return width;
719 }
720
721
722 int
main(int argc,char ** argv)723 main (int argc, char **argv)
724 {
725 int optc;
726 idx_t nfiles;
727 char **files;
728 char *files_from = nullptr;
729 struct fstatus *fstatus;
730 struct Tokens tok;
731
732 initialize_main (&argc, &argv);
733 set_program_name (argv[0]);
734 setlocale (LC_ALL, "");
735 bindtextdomain (PACKAGE, LOCALEDIR);
736 textdomain (PACKAGE);
737
738 atexit (close_stdout);
739
740 page_size = getpagesize ();
741 /* Line buffer stdout to ensure lines are written atomically and immediately
742 so that processes running in parallel do not intersperse their output. */
743 setvbuf (stdout, nullptr, _IOLBF, 0);
744
745 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
746
747 print_lines = print_words = print_chars = print_bytes = false;
748 print_linelength = false;
749 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
750
751 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
752 switch (optc)
753 {
754 case 'c':
755 print_bytes = true;
756 break;
757
758 case 'm':
759 print_chars = true;
760 break;
761
762 case 'l':
763 print_lines = true;
764 break;
765
766 case 'w':
767 print_words = true;
768 break;
769
770 case 'L':
771 print_linelength = true;
772 break;
773
774 case DEBUG_PROGRAM_OPTION:
775 debug = true;
776 break;
777
778 case FILES0_FROM_OPTION:
779 files_from = optarg;
780 break;
781
782 case TOTAL_OPTION:
783 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
784 break;
785
786 case_GETOPT_HELP_CHAR;
787
788 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
789
790 default:
791 usage (EXIT_FAILURE);
792 }
793
794 if (! (print_lines || print_words || print_chars || print_bytes
795 || print_linelength))
796 print_lines = print_words = print_bytes = true;
797
798 if (print_linelength)
799 for (int i = 0; i <= UCHAR_MAX; i++)
800 wc_isprint[i] = !!isprint (i);
801 if (print_words)
802 for (int i = 0; i <= UCHAR_MAX; i++)
803 wc_isspace[i] = isspace (i) || iswnbspace (btoc32 (i));
804
805 bool read_tokens = false;
806 struct argv_iterator *ai;
807 if (files_from)
808 {
809 FILE *stream;
810
811 /* When using --files0-from=F, you may not specify any files
812 on the command-line. */
813 if (optind < argc)
814 {
815 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
816 fprintf (stderr, "%s\n",
817 _("file operands cannot be combined with --files0-from"));
818 usage (EXIT_FAILURE);
819 }
820
821 if (STREQ (files_from, "-"))
822 stream = stdin;
823 else
824 {
825 stream = fopen (files_from, "r");
826 if (stream == nullptr)
827 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
828 quoteaf (files_from));
829 }
830
831 /* Read the file list into RAM if we can detect its size and that
832 size is reasonable. Otherwise, we'll read a name at a time. */
833 struct stat st;
834 if (fstat (fileno (stream), &st) == 0
835 && S_ISREG (st.st_mode)
836 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
837 {
838 read_tokens = true;
839 readtokens0_init (&tok);
840 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
841 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
842 quoteaf (files_from));
843 files = tok.tok;
844 nfiles = tok.n_tok;
845 ai = argv_iter_init_argv (files);
846 }
847 else
848 {
849 files = nullptr;
850 nfiles = 0;
851 ai = argv_iter_init_stream (stream);
852 }
853 }
854 else
855 {
856 static char *stdin_only[] = { nullptr };
857 files = (optind < argc ? argv + optind : stdin_only);
858 nfiles = (optind < argc ? argc - optind : 1);
859 ai = argv_iter_init_argv (files);
860 }
861
862 if (!ai)
863 xalloc_die ();
864
865 fstatus = get_input_fstatus (nfiles, files);
866 if (total_mode == total_only)
867 number_width = 1; /* No extra padding, since no alignment requirement. */
868 else
869 number_width = compute_number_width (nfiles, fstatus);
870
871 bool ok = true;
872 enum argv_iter_err ai_err;
873 char *file_name;
874 for (int i = 0; (file_name = argv_iter (ai, &ai_err)); i++)
875 {
876 bool skip_file = false;
877 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
878 {
879 /* Give a better diagnostic in an unusual case:
880 printf - | wc --files0-from=- */
881 error (0, 0, _("when reading file names from stdin, "
882 "no file name of %s allowed"),
883 quoteaf (file_name));
884 skip_file = true;
885 }
886
887 if (!file_name[0])
888 {
889 /* Diagnose a zero-length file name. When it's one
890 among many, knowing the record number may help.
891 FIXME: currently print the record number only with
892 --files0-from=FILE. Maybe do it for argv, too? */
893 if (files_from == nullptr)
894 error (0, 0, "%s", _("invalid zero-length file name"));
895 else
896 {
897 /* Using the standard 'filename:line-number:' prefix here is
898 not totally appropriate, since NUL is the separator, not NL,
899 but it might be better than nothing. */
900 error (0, 0, "%s:%zu: %s", quotef (files_from),
901 argv_iter_n_args (ai), _("invalid zero-length file name"));
902 }
903 skip_file = true;
904 }
905
906 if (skip_file)
907 ok = false;
908 else
909 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
910
911 if (! nfiles)
912 fstatus[0].failed = 1;
913 }
914 switch (ai_err)
915 {
916 case AI_ERR_EOF:
917 break;
918
919 case AI_ERR_READ:
920 error (0, errno, _("%s: read error"), quotef (files_from));
921 ok = false;
922 break;
923
924 case AI_ERR_MEM:
925 xalloc_die ();
926
927 default:
928 unreachable ();
929 }
930
931 /* No arguments on the command line is fine. That means read from stdin.
932 However, no arguments on the --files0-from input stream is an error
933 means don't read anything. */
934 if (ok && !files_from && argv_iter_n_args (ai) == 0)
935 ok &= wc_file (nullptr, &fstatus[0]);
936
937 if (read_tokens)
938 readtokens0_free (&tok);
939
940 if (total_mode != total_never
941 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
942 {
943 if (total_lines_overflow)
944 {
945 total_lines = UINTMAX_MAX;
946 error (0, EOVERFLOW, _("total lines"));
947 ok = false;
948 }
949 if (total_words_overflow)
950 {
951 total_words = UINTMAX_MAX;
952 error (0, EOVERFLOW, _("total words"));
953 ok = false;
954 }
955 if (total_chars_overflow)
956 {
957 total_chars = UINTMAX_MAX;
958 error (0, EOVERFLOW, _("total characters"));
959 ok = false;
960 }
961 if (total_bytes_overflow)
962 {
963 total_bytes = UINTMAX_MAX;
964 error (0, EOVERFLOW, _("total bytes"));
965 ok = false;
966 }
967
968 write_counts (total_lines, total_words, total_chars, total_bytes,
969 max_line_length,
970 total_mode != total_only ? _("total") : nullptr);
971 }
972
973 argv_iter_free (ai);
974
975 free (fstatus);
976
977 if (have_read_stdin && close (STDIN_FILENO) != 0)
978 error (EXIT_FAILURE, errno, "-");
979
980 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
981 }
982