1 /* pr -- convert text files for printing.
2    Copyright (C) 1988-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /*  By Pete TerMaat, with considerable refinement by Roland Huebner.  */
18 
19 /* Things to watch: Sys V screws up on ...
20    pr -n -3 -s: /usr/dict/words
21    pr -m -o10 -n /usr/dict/words{,,,}
22    pr -6 -a -n -o5 /usr/dict/words
23 
24    Ideas:
25 
26    Keep a things_to_do list of functions to call when we know we have
27    something to print.  Cleaner than current series of checks.
28 
29    Improve the printing of control prefixes.
30 
31    Expand the file name in the centered header line to a full file name.
32 
33 
34    Concept:
35 
36    If the input_tab_char differs from the default value TAB
37    ('-e[CHAR[...]]' is used), any input text tab is expanded to the
38    default width of 8 spaces (compare char_to_clump). - Same as SunOS
39    does.
40 
41    The treatment of the number_separator (compare add_line_number):
42    The default value TAB of the number_separator ('-n[SEP[...]]') doesn't
43    be thought to be an input character. An optional '-e'-input has no
44    effect.
45    -  With single column output
46       only one POSIX requirement has to be met:
47    The default n-separator should be a TAB. The consequence is a
48    different width between the number and the text if the output position
49    of the separator changes, i.e., it depends upon the left margin used.
50    That's not nice but easy-to-use together with the defaults of other
51    utilities, e.g. sort or cut. - Same as SunOS does.
52    -  With multicolumn output
53       two conflicting POSIX requirements exist:
54    First "default n-separator is TAB", second "output text columns shall
55    be of equal width". Moreover POSIX specifies the number+separator a
56    part of the column, together with '-COLUMN' and '-a -COLUMN'.
57    (With -m output the number shall occupy each line only once. Exactly
58    the same situation as single column output exists.)
59       GNU pr gives priority to the 2nd requirement and observes POSIX
60    column definition. The n-separator TAB is expanded to the same number
61    of spaces in each column using the default value 8. Tabification is
62    only performed if it is compatible with the output position.
63    Consequence: The output text columns are of equal width. The layout
64    of a page does not change if the left margin varies. - Looks better
65    than the SunOS approach.
66       SunOS pr gives priority to the 1st requirement. n-separator TAB
67    width varies with each column. Only the width of text part of the
68    column is fixed.
69    Consequence: The output text columns don't have equal width. The
70    widths and the layout of the whole page varies with the left margin.
71    An overflow of the line length (without margin) over the input value
72    PAGE_WIDTH may occur.
73 
74    The interference of the POSIX-compliant small letter options -w and -s:
75    ("interference" means "setting a _separator_ with -s switches off the
76    column structure and the default - not generally - page_width,
77    acts on -w option")
78        options:       text form  / separator:     equivalent new options:
79        -w l   -s[x]
80     --------------------------------------------------------------------
81     1.  --     --     columns    / space          --
82                       trunc. to page_width = 72
83     2.  --    -s[:]   full lines / TAB[:]         -J  --sep-string[="<TAB>"|:]
84                       no truncation
85     3.  -w l   --     columns    / space          -W l
86                       trunc. to page_width = l
87     4.  -w l  -s[:]   columns    / no sep.[:]     -W l  --sep-string[=:]
88                       trunc. to page_width = l
89     --------------------------------------------------------------------
90 
91 
92    Options:
93 
94    Including version 1.22i:
95    Some SMALL LETTER options have been redefined with the object of a
96    better POSIX compliance. The output of some further cases has been
97    adapted to other UNIXes. A violation of downward compatibility has to
98    be accepted.
99    Some NEW CAPITAL LETTER options ( -J, -S, -W) has been introduced to
100    turn off unexpected interference of small letter options (-s and -w
101    together with the three column options).
102    -N option and the second argument LAST_PAGE of +FIRST_PAGE offer more
103    flexibility; The detailed handling of form feeds set in the input
104    files requires -T option.
105 
106    Capital letter options dominate small letter ones.
107 
108    Some of the option-arguments cannot be specified as separate arguments
109    from the preceding option letter (already stated in POSIX specification).
110 
111    Form feeds in the input cause page breaks in the output. Multiple
112    form feeds produce empty pages.
113 
114    +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]
115                 begin [stop] printing with page FIRST_[LAST_]PAGE
116 
117    -COLUMN, --columns=COLUMN
118                 Produce output that is COLUMN columns wide and
119                 print columns down, unless -a is used. Balance number of
120                 lines in the columns on each page.
121 
122    -a, --across		Print columns across rather than down, used
123                 together with -COLUMN. The input
124                 one
125                 two
126                 three
127                 four
128                 will be printed with '-a -3' as
129                 one	two	three
130                 four
131 
132    -b		Balance columns on the last page.
133                 -b is no longer an independent option. It's always used
134                 together with -COLUMN (unless -a is used) to get a
135                 consistent formulation with "FF set by hand" in input
136                 files. Each formfeed found terminates the number of lines
137                 to be read with the actual page. The situation for
138                 printing columns down is equivalent to that on the last
139                 page. So we need a balancing.
140 
141                 Keeping -b as an underground option guarantees some
142                 downward compatibility. Utilities using pr with -b
143                 (a most frequently used form) still work as usual.
144 
145    -c, --show-control-chars
146                 Print nonprintable characters as control prefixes.
147                 Control-g is printed as ^G (use hat notation) and
148                 octal backslash notation.
149 
150    -d, --double-space	Double space the output.
151 
152    -D FORMAT, --date-format=FORMAT  Use FORMAT for the header date.
153 
154    -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]
155                 Expand tabs to spaces on input.  Optional argument CHAR
156                 is the input TAB character. (Default is TAB).  Optional
157                 argument WIDTH is the input TAB character's width.
158                 (Default is 8.)
159 
160    -F, -f, --form-feed	Use formfeeds instead of newlines to separate
161                 pages. A three line HEADER is used, no TRAILER with -F,
162                 without -F both HEADER and TRAILER are made of five lines.
163 
164    -h HEADER, --header=HEADER
165                 Replace the filename in the header with the string HEADER.
166                 A centered header is used.
167 
168    -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]
169                 Replace spaces with tabs on output.  Optional argument
170                 CHAR is the output TAB character. (Default is TAB).
171                 Optional argument WIDTH is the output TAB character's
172                 width. (Default is 8)
173 
174    -J, --join-lines	Merge lines of full length, turns off -W/-w
175                 line truncation, no column alignment, --sep-string[=STRING]
176                 sets separators, works with all column options
177                 (-COLUMN | -a -COLUMN | -m).
178                 -J has been introduced (together with -W and --sep-string) to
179                 disentangle the old (POSIX compliant) options -w, -s
180                 along with the 3 column options.
181 
182    -l PAGE_LENGTH, --length=PAGE_LENGTH
183                 Set the page length to PAGE_LENGTH lines. Default is 66,
184                 including 5 lines of HEADER and 5 lines of TRAILER
185                 without -F, but only 3 lines of HEADER and no TRAILER
186                 with -F (i.e the number of text lines defaults to 56 or
187                 63 respectively).
188 
189    -m, --merge		Print files in parallel; pad_across_to align
190                 columns; truncate lines and print separator strings;
191                 Do it also with empty columns to get a continuous line
192                 numbering and column marking by separators throughout
193                 the whole merged file.
194 
195                 Empty pages in some input files produce empty columns
196                 [marked by separators] in the merged pages. Completely
197                 empty merged pages show no column separators at all.
198 
199                 The layout of a merged page is ruled by the largest form
200                 feed distance of the single pages at that page. Shorter
201                 columns will be filled up with empty lines.
202 
203                 Together with -J option join lines of full length and
204                 set separators when -S option is used.
205 
206    -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]
207                 Provide DIGITS digit line numbering (default for DIGITS
208                 is 5). With multicolumn output the number occupies the
209                 first DIGITS column positions of each text column or only
210                 each line of -m output.
211                 With single column output the number precedes each line
212                 just as -m output.
213                 Optional argument SEP is the character appended to the
214                 line number to separate it from the text followed.
215                 The default separator is a TAB. In a strict sense a TAB
216                 is always printed with single column output only. The
217                 TAB-width varies with the TAB-position, e.g. with the
218                 left margin specified by -o option.
219                 With multicolumn output priority is given to "equal width
220                 of output columns" (a POSIX specification). The TAB-width
221                 is fixed to the value of the 1st column and does not
222                 change with different values of left margin. That means a
223                 fixed number of spaces is always printed in the place of
224                 a TAB. The tabification depends upon the output
225                 position.
226 
227                 Default counting of the line numbers starts with 1st
228                 line of the input file (not the 1st line printed,
229                 compare the --page option and -N option).
230 
231    -N NUMBER, --first-line-number=NUMBER
232                 Start line counting with the number NUMBER at the 1st
233                 line of first page printed (mostly not the 1st line of
234                 the input file).
235 
236    -o MARGIN, --indent=MARGIN
237                 Offset each line with a margin MARGIN spaces wide.
238                 Total page width is the size of the margin plus the
239                 PAGE_WIDTH set with -W/-w option.
240 
241    -r, --no-file-warnings
242                 Omit warning when a file cannot be opened.
243 
244    -s[CHAR], --separator[=CHAR]
245                 Separate columns by a single character CHAR, default for
246                 CHAR is the TAB character without -w and 'no char' with -w.
247                 Without '-s' default separator 'space' is set.
248                 -s[CHAR] turns off line truncation of all 3 column options
249                 (-COLUMN|-a -COLUMN|-m) except -w is set. That is a POSIX
250                 compliant formulation. The source code translates -s into
251                 the new options -S and -J, also -W if required.
252 
253    -S[STRING], --sep-string[=STRING]
254                 Separate columns by any string STRING. The -S option
255                 doesn't react upon the -W/-w option (unlike -s option
256                 does). It defines a separator nothing else.
257                 Without -S: Default separator TAB is used with -J and
258                 'space' otherwise (same as -S" ").
259                 With -S "": No separator is used.
260                 Quotes should be used with blanks and some shell active
261                 characters.
262                 -S is problematic because in its obsolete form you
263                 cannot use -S "STRING", but in its standard form you
264                 must use -S "STRING" if STRING is empty.  Use
265                 --sep-string to avoid the ambiguity.
266 
267    -t, --omit-header	Do not print headers or footers but retain form
268                 feeds set in the input files.
269 
270    -T, --omit-pagination
271                 Do not print headers or footers, eliminate any pagination
272                 by form feeds set in the input files.
273 
274    -v, --show-nonprinting
275                 Print nonprintable characters as escape sequences. Use
276                 octal backslash notation. Control-G becomes \007.
277 
278    -w PAGE_WIDTH, --width=PAGE_WIDTH
279                 Set page width to PAGE_WIDTH characters for multiple
280                 text-column output only (default for PAGE_WIDTH is 72).
281                 -s[CHAR] turns off the default page width and any line
282                 truncation. Lines of full length will be merged,
283                 regardless of the column options set. A POSIX compliant
284                 formulation.
285 
286    -W PAGE_WIDTH, --page-width=PAGE_WIDTH
287                 Set the page width to PAGE_WIDTH characters. That's valid
288                 with and without a column option. Text lines will be
289                 truncated, unless -J is used. Together with one of the
290                 column options (-COLUMN| -a -COLUMN| -m) column alignment
291                 is always used.
292                 Default is 72 characters.
293                 Without -W PAGE_WIDTH
294                 - but with one of the column options default truncation of
295                   72 characters is used (to keep downward compatibility
296                   and to simplify most frequently met column tasks).
297                   Column alignment and column separators are used.
298                 - and without any of the column options NO line truncation
299                   is used (to keep downward compatibility and to meet most
300                   frequent tasks). That's equivalent to  -W 72 -J .
301 
302                 With/without  -W PAGE_WIDTH  the header line is always
303                 truncated to avoid line overflow.
304 
305                 (In pr versions newer than 1.14 -S option does no longer
306                 affect -W option.)
307 
308 */
309 
310 #include <config.h>
311 
312 #include <getopt.h>
313 #include <stdckdint.h>
314 #include <sys/types.h>
315 #include "system.h"
316 #include "fadvise.h"
317 #include "hard-locale.h"
318 #include "mbswidth.h"
319 #include "quote.h"
320 #include "stat-time.h"
321 #include "stdio--.h"
322 #include "strftime.h"
323 #include "xstrtol.h"
324 #include "xstrtol-error.h"
325 #include "xdectoint.h"
326 
327 /* The official name of this program (e.g., no 'g' prefix).  */
328 #define PROGRAM_NAME "pr"
329 
330 #define AUTHORS \
331   proper_name ("Pete TerMaat"), \
332   proper_name ("Roland Huebner")
333 
334 /* Used with start_position in the struct COLUMN described below.
335    If start_position == ANYWHERE, we aren't truncating columns and
336    can begin printing a column anywhere.  Otherwise we must pad to
337    the horizontal position start_position. */
338 #define ANYWHERE	0
339 
340 /* Each column has one of these structures allocated for it.
341    If we're only dealing with one file, fp is the same for all
342    columns.
343 
344    The general strategy is to spend time setting up these column
345    structures (storing columns if necessary), after which printing
346    is a matter of flitting from column to column and calling
347    print_func.
348 
349    Parallel files, single files printing across in multiple
350    columns, and single files printing down in multiple columns all
351    fit the same printing loop.
352 
353    print_func		Function used to print lines in this column.
354                         If we're storing this column it will be
355                         print_stored(), Otherwise it will be read_line().
356 
357    char_func		Function used to process characters in this column.
358                         If we're storing this column it will be store_char(),
359                         otherwise it will be print_char().
360 
361    current_line		Index of the current entry in line_vector, which
362                         contains the index of the first character of the
363                         current line in buff[].
364 
365    lines_stored		Number of lines in this column which are stored in
366                         buff.
367 
368    lines_to_print	If we're storing this column, lines_to_print is
369                         the number of stored_lines which remain to be
370                         printed.  Otherwise it is the number of lines
371                         we can print without exceeding lines_per_body.
372 
373    start_position	The horizontal position we want to be in before we
374                         print the first character in this column.
375 
376    numbered		True means precede this column with a line number. */
377 
378 /* FIXME: There are many unchecked integer overflows in this file,
379    that will cause this command to misbehave given large inputs or
380    options.  Many of the "int" values below should be "size_t" or
381    something else like that.  */
382 
383 struct COLUMN;
384 struct COLUMN
385   {
386     FILE *fp;			/* Input stream for this column. */
387     char const *name;		/* File name. */
388     enum
389       {
390         OPEN,
391         FF_FOUND,		/* used with -b option, set with \f, changed
392                                    to ON_HOLD after print_header */
393         ON_HOLD,		/* Hit a form feed. */
394         CLOSED
395       }
396     status;			/* Status of the file pointer. */
397 
398     /* Func to print lines in this col. */
399     bool (*print_func) (struct COLUMN *);
400 
401     /* Func to print/store chars in this col. */
402     void (*char_func) (char);
403 
404     int current_line;		/* Index of current place in line_vector. */
405     int lines_stored;		/* Number of lines stored in buff. */
406     int lines_to_print;		/* No. lines stored or space left on page. */
407     int start_position;		/* Horizontal position of first char. */
408     bool numbered;
409     bool full_page_printed;	/* True means printed without a FF found. */
410 
411     /* p->full_page_printed  controls a special case of "FF set by hand":
412        True means a full page has been printed without FF found. To avoid an
413        additional empty page we have to ignore a FF immediately following in
414        the next line. */
415   };
416 
417 typedef struct COLUMN COLUMN;
418 
419 static int char_to_clump (char c);
420 static bool read_line (COLUMN *p);
421 static bool print_page (void);
422 static bool print_stored (COLUMN *p);
423 static bool open_file (char *name, COLUMN *p);
424 static bool skip_to_page (uintmax_t page);
425 static void print_header (void);
426 static void pad_across_to (int position);
427 static void add_line_number (COLUMN *p);
428 static void getoptnum (char const *n_str, int min, int *num,
429                        char const *errfmt);
430 static void getoptarg (char *arg, char switch_char, char *character,
431                        int *number);
432 static void print_files (int number_of_files, char **av);
433 static void init_parameters (int number_of_files);
434 static void init_header (char const *filename, int desc);
435 static bool init_fps (int number_of_files, char **av);
436 static void init_funcs (void);
437 static void init_store_cols (void);
438 static void store_columns (void);
439 static void balance (int total_stored);
440 static void store_char (char c);
441 static void pad_down (unsigned int lines);
442 static void read_rest_of_line (COLUMN *p);
443 static void skip_read (COLUMN *p, int column_number);
444 static void print_char (char c);
445 static void cleanup (void);
446 static void print_sep_string (void);
447 static void separator_string (char const *optarg_S);
448 
449 /* All of the columns to print.  */
450 static COLUMN *column_vector;
451 
452 /* When printing a single file in multiple downward columns,
453    we store the leftmost columns contiguously in buff.
454    To print a line from buff, get the index of the first character
455    from line_vector[i], and print up to line_vector[i + 1]. */
456 static char *buff;
457 
458 /* Index of the position in buff where the next character
459    will be stored. */
460 static unsigned int buff_current;
461 
462 /* The number of characters in buff.
463    Used for allocation of buff and to detect overflow of buff. */
464 static size_t buff_allocated;
465 
466 /* Array of indices into buff.
467    Each entry is an index of the first character of a line.
468    This is used when storing lines to facilitate shuffling when
469    we do column balancing on the last page. */
470 static int *line_vector;
471 
472 /* Array of horizontal positions.
473    For each line in line_vector, end_vector[line] is the horizontal
474    position we are in after printing that line.  We keep track of this
475    so that we know how much we need to pad to prepare for the next
476    column. */
477 static int *end_vector;
478 
479 /* (-m) True means we're printing multiple files in parallel. */
480 static bool parallel_files = false;
481 
482 /* (-m) True means a line starts with some empty columns (some files
483    already CLOSED or ON_HOLD) which we have to align. */
484 static bool align_empty_cols;
485 
486 /* (-m) True means we have not yet found any printable column in a line.
487    align_empty_cols = true  has to be maintained. */
488 static bool empty_line;
489 
490 /* (-m) False means printable column output precedes a form feed found.
491    Column alignment is done only once. No additional action with that form
492    feed.
493    True means we found only a form feed in a column. Maybe we have to do
494    some column alignment with that form feed. */
495 static bool FF_only;
496 
497 /* (-[0-9]+) True means we're given an option explicitly specifying
498    number of columns.  Used to detect when this option is used with -m
499    and when translating old options to new/long options. */
500 static bool explicit_columns = false;
501 
502 /* (-t|-T) False means we aren't printing headers and footers. */
503 static bool extremities = true;
504 
505 /* (-t) True means we retain all FF set by hand in input files.
506    False is set with -T option. */
507 static bool keep_FF = false;
508 static bool print_a_FF = false;
509 
510 /* True means we need to print a header as soon as we know we've got input
511    to print after it. */
512 static bool print_a_header;
513 
514 /* (-f) True means use formfeeds instead of newlines to separate pages. */
515 static bool use_form_feed = false;
516 
517 /* True means we have read the standard input. */
518 static bool have_read_stdin = false;
519 
520 /* True means the -a flag has been given. */
521 static bool print_across_flag = false;
522 
523 /* True means we're printing one file in multiple (>1) downward columns. */
524 static bool storing_columns = true;
525 
526 /* (-b) True means balance columns on the last page as Sys V does. */
527 /* That's no longer an independent option. With storing_columns = true
528    balance_columns = true is used too (s. function init_parameters).
529    We get a consistent formulation with "FF set by hand" in input files. */
530 static bool balance_columns = false;
531 
532 /* (-l) Number of lines on a page, including header and footer lines. */
533 static int lines_per_page = 66;
534 
535 /* Number of lines in the header and footer can be reset to 0 using
536    the -t flag. */
537 enum { lines_per_header = 5 };
538 static int lines_per_body;
539 enum { lines_per_footer = 5 };
540 
541 /* (-w|-W) Width in characters of the page.  Does not include the width of
542    the margin. */
543 static int chars_per_line = 72;
544 
545 /* (-w|W) True means we truncate lines longer than chars_per_column. */
546 static bool truncate_lines = false;
547 
548 /* (-J) True means we join lines without any line truncation. -J
549    dominates -w option. */
550 static bool join_lines = false;
551 
552 /* Number of characters in a column.  Based on col_sep_length and
553    page width. */
554 static int chars_per_column;
555 
556 /* (-e) True means convert tabs to spaces on input. */
557 static bool untabify_input = false;
558 
559 /* (-e) The input tab character. */
560 static char input_tab_char = '\t';
561 
562 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
563    where the leftmost column is 1. */
564 static int chars_per_input_tab = 8;
565 
566 /* (-i) True means convert spaces to tabs on output. */
567 static bool tabify_output = false;
568 
569 /* (-i) The output tab character. */
570 static char output_tab_char = '\t';
571 
572 /* (-i) The width of the output tab. */
573 static int chars_per_output_tab = 8;
574 
575 /* Keeps track of pending white space.  When we hit a nonspace
576    character after some whitespace, we print whitespace, tabbing
577    if necessary to get to output_position + spaces_not_printed. */
578 static int spaces_not_printed;
579 
580 /* (-o) Number of spaces in the left margin (tabs used when possible). */
581 static int chars_per_margin = 0;
582 
583 /* Position where the next character will fall.
584    Leftmost position is 0 + chars_per_margin.
585    Rightmost position is chars_per_margin + chars_per_line - 1.
586    This is important for converting spaces to tabs on output. */
587 static int output_position;
588 
589 /* Horizontal position relative to the current file.
590    (output_position depends on where we are on the page;
591    input_position depends on where we are in the file.)
592    Important for converting tabs to spaces on input. */
593 static int input_position;
594 
595 /* True if there were any failed opens so we can exit with nonzero
596    status.  */
597 static bool failed_opens = false;
598 
599 /* The number of spaces taken up if we print a tab character with width
600    c_ from position h_. */
601 #define TAB_WIDTH(c_, h_) ((c_) - ((h_) % (c_)))
602 
603 /* The horizontal position we'll be at after printing a tab character
604    of width c_ from the position h_. */
605 #define POS_AFTER_TAB(c_, h_) ((h_) + TAB_WIDTH (c_, h_))
606 
607 /* (-NNN) Number of columns of text to print. */
608 static int columns = 1;
609 
610 /* (+NNN:MMM) Page numbers on which to begin and stop printing.
611    first_page_number = 0  will be used to check input only. */
612 static uintmax_t first_page_number = 0;
613 static uintmax_t last_page_number = UINTMAX_MAX;
614 
615 /* Number of files open (not closed, not on hold). */
616 static int files_ready_to_read = 0;
617 
618 /* Current page number.  Displayed in header. */
619 static uintmax_t page_number;
620 
621 /* Current line number.  Displayed when -n flag is specified.
622 
623    When printing files in parallel (-m flag), line numbering is as follows:
624    1    foo     goo     moo
625    2    hoo     too     zoo
626 
627    When printing files across (-a flag), ...
628    1    foo     2       moo     3       goo
629    4    hoo     5       too     6       zoo
630 
631    Otherwise, line numbering is as follows:
632    1    foo     3       goo     5       too
633    2    moo     4       hoo     6       zoo */
634 static int line_number;
635 
636 /* (-n) True means lines should be preceded by numbers. */
637 static bool numbered_lines = false;
638 
639 /* (-n) Character which follows each line number. */
640 static char number_separator = '\t';
641 
642 /* (-n) line counting starts with 1st line of input file (not with 1st
643    line of 1st page printed). */
644 static int line_count = 1;
645 
646 /* (-n) True means counting of skipped lines starts with 1st line of
647    input file. False means -N option is used in addition, counting of
648    skipped lines not required. */
649 static bool skip_count = true;
650 
651 /* (-N) Counting starts with start_line_number = NUMBER at 1st line of
652    first page printed, usually not 1st page of input file. */
653 static int start_line_num = 1;
654 
655 /* (-n) Width in characters of a line number. */
656 static int chars_per_number = 5;
657 
658 /* Used when widening the first column to accommodate numbers -- only
659    needed when printing files in parallel.  Includes width of both the
660    number and the number_separator. */
661 static int number_width;
662 
663 /* Buffer sprintf uses to format a line number. */
664 static char *number_buff;
665 
666 /* (-v) True means nonprintable characters are printed as escape sequences.
667    control-g becomes \007. */
668 static bool use_esc_sequence = false;
669 
670 /* (-c) True means nonprintable characters are printed as control prefixes.
671    control-g becomes ^G. */
672 static bool use_cntrl_prefix = false;
673 
674 /* (-d) True means output is double spaced. */
675 static bool double_space = false;
676 
677 /* Number of files opened initially in init_files.  Should be 1
678    unless we're printing multiple files in parallel. */
679 static int total_files = 0;
680 
681 /* (-r) True means don't complain if we can't open a file. */
682 static bool ignore_failed_opens = false;
683 
684 /* (-S) True means we separate columns with a specified string.
685    -S option does not affect line truncation nor column alignment. */
686 static bool use_col_separator = false;
687 
688 /* String used to separate columns if the -S option has been specified.
689    Default without -S but together with one of the column options
690    -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */
691 static char const *col_sep_string = "";
692 static int col_sep_length = 0;
693 static char *column_separator = (char *) " ";
694 static char *line_separator = (char *) "\t";
695 
696 /* Number of separator characters waiting to be printed as soon as we
697    know that we have any input remaining to be printed. */
698 static int separators_not_printed;
699 
700 /* Position we need to pad to, as soon as we know that we have input
701    remaining to be printed. */
702 static int padding_not_printed;
703 
704 /* True means we should pad the end of the page.  Remains false until we
705    know we have a page to print. */
706 static bool pad_vertically;
707 
708 /* (-h) String of characters used in place of the filename in the header. */
709 static char *custom_header;
710 
711 /* (-D) Date format for the header.  */
712 static char const *date_format;
713 
714 /* The local time zone rules, as per the TZ environment variable.  */
715 static timezone_t localtz;
716 
717 /* Date and file name for the header.  */
718 static char *date_text;
719 static char const *file_text;
720 
721 /* Output columns available, not counting the date and file name.  */
722 static int header_width_available;
723 
724 static char *clump_buff;
725 
726 /* True means we read the line no. lines_per_body in skip_read
727    called by skip_to_page. That variable controls the coincidence of a
728    "FF set by hand" and "full_page_printed", see above the definition of
729    structure COLUMN. */
730 static bool last_line = false;
731 
732 /* For long options that have no equivalent short option, use a
733    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
734 enum
735 {
736   COLUMNS_OPTION = CHAR_MAX + 1,
737   PAGES_OPTION
738 };
739 
740 static char const short_options[] =
741   "-0123456789D:FJN:S::TW:abcde::fh:i::l:mn::o:rs::tvw:";
742 
743 static struct option const long_options[] =
744 {
745   {"pages", required_argument, nullptr, PAGES_OPTION},
746   {"columns", required_argument, nullptr, COLUMNS_OPTION},
747   {"across", no_argument, nullptr, 'a'},
748   {"show-control-chars", no_argument, nullptr, 'c'},
749   {"double-space", no_argument, nullptr, 'd'},
750   {"date-format", required_argument, nullptr, 'D'},
751   {"expand-tabs", optional_argument, nullptr, 'e'},
752   {"form-feed", no_argument, nullptr, 'f'},
753   {"header", required_argument, nullptr, 'h'},
754   {"output-tabs", optional_argument, nullptr, 'i'},
755   {"join-lines", no_argument, nullptr, 'J'},
756   {"length", required_argument, nullptr, 'l'},
757   {"merge", no_argument, nullptr, 'm'},
758   {"number-lines", optional_argument, nullptr, 'n'},
759   {"first-line-number", required_argument, nullptr, 'N'},
760   {"indent", required_argument, nullptr, 'o'},
761   {"no-file-warnings", no_argument, nullptr, 'r'},
762   {"separator", optional_argument, nullptr, 's'},
763   {"sep-string", optional_argument, nullptr, 'S'},
764   {"omit-header", no_argument, nullptr, 't'},
765   {"omit-pagination", no_argument, nullptr, 'T'},
766   {"show-nonprinting", no_argument, nullptr, 'v'},
767   {"width", required_argument, nullptr, 'w'},
768   {"page-width", required_argument, nullptr, 'W'},
769   {GETOPT_HELP_OPTION_DECL},
770   {GETOPT_VERSION_OPTION_DECL},
771   {nullptr, 0, nullptr, 0}
772 };
773 
774 static _Noreturn void
integer_overflow(void)775 integer_overflow (void)
776 {
777   error (EXIT_FAILURE, 0, _("integer overflow"));
778 }
779 
780 /* Return the number of columns that have either an open file or
781    stored lines. */
782 
783 ATTRIBUTE_PURE
784 static unsigned int
cols_ready_to_print(void)785 cols_ready_to_print (void)
786 {
787   COLUMN *q;
788   unsigned int i;
789   unsigned int n;
790 
791   n = 0;
792   for (q = column_vector, i = 0; i < columns; ++q, ++i)
793     if (q->status == OPEN
794         || q->status == FF_FOUND	/* With -b: To print a header only */
795         || (storing_columns && q->lines_stored > 0 && q->lines_to_print > 0))
796       ++n;
797   return n;
798 }
799 
800 /* Estimate first_ / last_page_number
801    using option +FIRST_PAGE:LAST_PAGE */
802 
803 static bool
first_last_page(int oi,char c,char const * pages)804 first_last_page (int oi, char c, char const *pages)
805 {
806   char *p;
807   uintmax_t first;
808   uintmax_t last = UINTMAX_MAX;
809   strtol_error err = xstrtoumax (pages, &p, 10, &first, "");
810   if (err != LONGINT_OK && err != LONGINT_INVALID_SUFFIX_CHAR)
811     xstrtol_fatal (err, oi, c, long_options, pages);
812 
813   if (p == pages || !first)
814     return false;
815 
816   if (*p == ':')
817     {
818       char const *p1 = p + 1;
819       err = xstrtoumax (p1, &p, 10, &last, "");
820       if (err != LONGINT_OK)
821         xstrtol_fatal (err, oi, c, long_options, pages);
822       if (p1 == p || last < first)
823         return false;
824     }
825 
826   if (*p)
827     return false;
828 
829   first_page_number = first;
830   last_page_number = last;
831   return true;
832 }
833 
834 /* Parse column count string S, and if it's valid (1 or larger and
835    within range of the type of 'columns') set the global variables
836    columns and explicit_columns.  Otherwise, exit with a diagnostic.  */
837 
838 static void
parse_column_count(char const * s)839 parse_column_count (char const *s)
840 {
841   getoptnum (s, 1, &columns, _("invalid number of columns"));
842   explicit_columns = true;
843 }
844 
845 /* Estimate length of col_sep_string with option -S.  */
846 
847 static void
separator_string(char const * optarg_S)848 separator_string (char const *optarg_S)
849 {
850   size_t len = strlen (optarg_S);
851   if (INT_MAX < len)
852     integer_overflow ();
853   col_sep_length = len;
854   col_sep_string = optarg_S;
855 }
856 
857 int
main(int argc,char ** argv)858 main (int argc, char **argv)
859 {
860   unsigned int n_files;
861   bool old_options = false;
862   bool old_w = false;
863   bool old_s = false;
864   char **file_names;
865 
866   /* Accumulate the digits of old-style options like -99.  */
867   char *column_count_string = nullptr;
868   size_t n_digits = 0;
869   size_t n_alloc = 0;
870 
871   initialize_main (&argc, &argv);
872   set_program_name (argv[0]);
873   setlocale (LC_ALL, "");
874   bindtextdomain (PACKAGE, LOCALEDIR);
875   textdomain (PACKAGE);
876 
877   atexit (close_stdout);
878 
879   n_files = 0;
880   file_names = (argc > 1
881                 ? xnmalloc (argc - 1, sizeof (char *))
882                 : nullptr);
883 
884   while (true)
885     {
886       int oi = -1;
887       int c = getopt_long (argc, argv, short_options, long_options, &oi);
888       if (c == -1)
889         break;
890 
891       if (ISDIGIT (c))
892         {
893           /* Accumulate column-count digits specified via old-style options. */
894           if (n_digits + 1 >= n_alloc)
895             column_count_string
896               = X2REALLOC (column_count_string, &n_alloc);
897           column_count_string[n_digits++] = c;
898           column_count_string[n_digits] = '\0';
899           continue;
900         }
901 
902       n_digits = 0;
903 
904       switch (c)
905         {
906         case 1:			/* Non-option argument. */
907           /* long option --page dominates old '+FIRST_PAGE ...'.  */
908           if (! (first_page_number == 0
909                  && *optarg == '+' && first_last_page (-2, '+', optarg + 1)))
910             file_names[n_files++] = optarg;
911           break;
912 
913         case PAGES_OPTION:	/* --pages=FIRST_PAGE[:LAST_PAGE] */
914           {			/* dominates old opt +... */
915             if (! optarg)
916               error (EXIT_FAILURE, 0,
917                      _("'--pages=FIRST_PAGE[:LAST_PAGE]' missing argument"));
918             else if (! first_last_page (oi, 0, optarg))
919               error (EXIT_FAILURE, 0, _("invalid page range %s"),
920                      quote (optarg));
921             break;
922           }
923 
924         case COLUMNS_OPTION:	/* --columns=COLUMN */
925           {
926             parse_column_count (optarg);
927 
928             /* If there was a prior column count specified via the
929                short-named option syntax, e.g., -9, ensure that this
930                long-name-specified value overrides it.  */
931             free (column_count_string);
932             column_count_string = nullptr;
933             n_alloc = 0;
934             break;
935           }
936 
937         case 'a':
938           print_across_flag = true;
939           storing_columns = false;
940           break;
941         case 'b':
942           balance_columns = true;
943           break;
944         case 'c':
945           use_cntrl_prefix = true;
946           break;
947         case 'd':
948           double_space = true;
949           break;
950         case 'D':
951           date_format = optarg;
952           break;
953         case 'e':
954           if (optarg)
955             getoptarg (optarg, 'e', &input_tab_char,
956                        &chars_per_input_tab);
957           /* Could check tab width > 0. */
958           untabify_input = true;
959           break;
960         case 'f':
961         case 'F':
962           use_form_feed = true;
963           break;
964         case 'h':
965           custom_header = optarg;
966           break;
967         case 'i':
968           if (optarg)
969             getoptarg (optarg, 'i', &output_tab_char,
970                        &chars_per_output_tab);
971           /* Could check tab width > 0. */
972           tabify_output = true;
973           break;
974         case 'J':
975           join_lines = true;
976           break;
977         case 'l':
978           getoptnum (optarg, 1, &lines_per_page,
979                      _("'-l PAGE_LENGTH' invalid number of lines"));
980           break;
981         case 'm':
982           parallel_files = true;
983           storing_columns = false;
984           break;
985         case 'n':
986           numbered_lines = true;
987           if (optarg)
988             getoptarg (optarg, 'n', &number_separator,
989                        &chars_per_number);
990           break;
991         case 'N':
992           skip_count = false;
993           getoptnum (optarg, INT_MIN, &start_line_num,
994                      _("'-N NUMBER' invalid starting line number"));
995           break;
996         case 'o':
997           getoptnum (optarg, 0, &chars_per_margin,
998                      _("'-o MARGIN' invalid line offset"));
999           break;
1000         case 'r':
1001           ignore_failed_opens = true;
1002           break;
1003         case 's':
1004           old_options = true;
1005           old_s = true;
1006           if (!use_col_separator && optarg)
1007             separator_string (optarg);
1008           break;
1009         case 'S':
1010           old_s = false;
1011           /* Reset an additional input of -s, -S dominates -s */
1012           col_sep_string = "";
1013           col_sep_length = 0;
1014           use_col_separator = true;
1015           if (optarg)
1016             separator_string (optarg);
1017           break;
1018         case 't':
1019           extremities = false;
1020           keep_FF = true;
1021           break;
1022         case 'T':
1023           extremities = false;
1024           keep_FF = false;
1025           break;
1026         case 'v':
1027           use_esc_sequence = true;
1028           break;
1029         case 'w':
1030           old_options = true;
1031           old_w = true;
1032           {
1033             int tmp_cpl;
1034             getoptnum (optarg, 1, &tmp_cpl,
1035                        _("'-w PAGE_WIDTH' invalid number of characters"));
1036             if (! truncate_lines)
1037               chars_per_line = tmp_cpl;
1038           }
1039           break;
1040         case 'W':
1041           old_w = false;			/* dominates -w */
1042           truncate_lines = true;
1043           getoptnum (optarg, 1, &chars_per_line,
1044                      _("'-W PAGE_WIDTH' invalid number of characters"));
1045           break;
1046         case_GETOPT_HELP_CHAR;
1047         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1048         default:
1049           usage (EXIT_FAILURE);
1050           break;
1051         }
1052     }
1053 
1054   if (column_count_string)
1055     {
1056       parse_column_count (column_count_string);
1057       free (column_count_string);
1058     }
1059 
1060   if (! date_format)
1061     date_format = (getenv ("POSIXLY_CORRECT") && !hard_locale (LC_TIME)
1062                    ? "%b %e %H:%M %Y"
1063                    : "%Y-%m-%d %H:%M");
1064 
1065   localtz = tzalloc (getenv ("TZ"));
1066 
1067   /* Now we can set a reasonable initial value: */
1068   if (first_page_number == 0)
1069     first_page_number = 1;
1070 
1071   if (parallel_files && explicit_columns)
1072     error (EXIT_FAILURE, 0,
1073            _("cannot specify number of columns when printing in parallel"));
1074 
1075   if (parallel_files && print_across_flag)
1076     error (EXIT_FAILURE, 0,
1077            _("cannot specify both printing across and printing in parallel"));
1078 
1079 /* Translate some old short options to new/long options.
1080    To meet downward compatibility with other UNIX pr utilities
1081    and some POSIX specifications. */
1082 
1083   if (old_options)
1084     {
1085       if (old_w)
1086         {
1087           if (parallel_files || explicit_columns)
1088             {
1089               /* activate -W */
1090               truncate_lines = true;
1091               if (old_s)
1092                 /* adapt HP-UX and SunOS: -s = no separator;
1093                    activate -S */
1094                 use_col_separator = true;
1095             }
1096           else
1097             /* old -w sets width with columns only
1098                activate -J */
1099             join_lines = true;
1100         }
1101       else if (!use_col_separator)
1102         {
1103           /* No -S option read */
1104           if (old_s && (parallel_files || explicit_columns))
1105             {
1106               if (!truncate_lines)
1107                 {
1108                   /* old -s (without -w and -W) annuls column alignment,
1109                   uses fields, activate -J */
1110                   join_lines = true;
1111                   if (col_sep_length > 0)
1112                     /* activate -S */
1113                     use_col_separator = true;
1114                 }
1115               else
1116                 /* with -W */
1117                 /* adapt HP-UX and SunOS: -s = no separator;
1118                    activate -S */
1119                 use_col_separator = true;
1120             }
1121         }
1122     }
1123 
1124   for (; optind < argc; optind++)
1125     {
1126       file_names[n_files++] = argv[optind];
1127     }
1128 
1129   if (n_files == 0)
1130     {
1131       /* No file arguments specified;  read from standard input.  */
1132       print_files (0, nullptr);
1133     }
1134   else
1135     {
1136       if (parallel_files)
1137         print_files (n_files, file_names);
1138       else
1139         {
1140           for (unsigned int i = 0; i < n_files; i++)
1141             print_files (1, &file_names[i]);
1142         }
1143     }
1144 
1145   cleanup ();
1146 
1147   if (have_read_stdin && fclose (stdin) == EOF)
1148     error (EXIT_FAILURE, errno, _("standard input"));
1149   main_exit (failed_opens ? EXIT_FAILURE : EXIT_SUCCESS);
1150 }
1151 
1152 /* Parse numeric arguments, ensuring MIN <= number <= INT_MAX.  */
1153 
1154 static void
getoptnum(char const * n_str,int min,int * num,char const * err)1155 getoptnum (char const *n_str, int min, int *num, char const *err)
1156 {
1157   intmax_t tnum = xdectoimax (n_str, min, INT_MAX, "", err, 0);
1158   *num = tnum;
1159 }
1160 
1161 /* Parse options of the form -scNNN.
1162 
1163    Example: -nck, where 'n' is the option, c is the optional number
1164    separator, and k is the optional width of the field used when printing
1165    a number. */
1166 
1167 static void
getoptarg(char * arg,char switch_char,char * character,int * number)1168 getoptarg (char *arg, char switch_char, char *character, int *number)
1169 {
1170   if (!*arg)
1171     {
1172       error (0, 0, _("'-%c': Invalid argument: %s"), switch_char, quote (arg));
1173       usage (EXIT_FAILURE);
1174     }
1175 
1176   if (!ISDIGIT (*arg))
1177     *character = *arg++;
1178   if (*arg)
1179     {
1180       long int tmp_long;
1181       strtol_error e = xstrtol (arg, nullptr, 10, &tmp_long, "");
1182       if (e == LONGINT_OK)
1183         {
1184           if (tmp_long <= 0)
1185             e = LONGINT_INVALID;
1186           else if (INT_MAX < tmp_long)
1187             e = LONGINT_OVERFLOW;
1188         }
1189       if (e != LONGINT_OK)
1190         {
1191           error (0, e & LONGINT_OVERFLOW ? EOVERFLOW : 0,
1192              _("'-%c' extra characters or invalid number in the argument: %s"),
1193                  switch_char, quote (arg));
1194           usage (EXIT_FAILURE);
1195         }
1196       *number = tmp_long;
1197     }
1198 }
1199 
1200 /* Set parameters related to formatting. */
1201 
1202 static void
init_parameters(int number_of_files)1203 init_parameters (int number_of_files)
1204 {
1205   int chars_used_by_number = 0;
1206 
1207   lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
1208   if (lines_per_body <= 0)
1209     {
1210       extremities = false;
1211       keep_FF = true;
1212     }
1213   if (extremities == false)
1214     lines_per_body = lines_per_page;
1215 
1216   if (double_space)
1217     lines_per_body = MAX (1, lines_per_body / 2);
1218 
1219   /* If input is stdin, cannot print parallel files.  BSD dumps core
1220      on this. */
1221   if (number_of_files == 0)
1222     parallel_files = false;
1223 
1224   if (parallel_files)
1225     columns = number_of_files;
1226 
1227   /* One file, multi columns down: -b option is set to get a consistent
1228      formulation with "FF set by hand" in input files. */
1229   if (storing_columns)
1230     balance_columns = true;
1231 
1232   /* Tabification is assumed for multiple columns. */
1233   if (columns > 1)
1234     {
1235       if (!use_col_separator)
1236         {
1237           /* Use default separator */
1238           if (join_lines)
1239             col_sep_string = line_separator;
1240           else
1241             col_sep_string = column_separator;
1242 
1243           col_sep_length = 1;
1244           use_col_separator = true;
1245         }
1246       /* It's rather pointless to define a TAB separator with column
1247          alignment */
1248       else if (!join_lines && col_sep_length == 1 && *col_sep_string == '\t')
1249         col_sep_string = column_separator;
1250 
1251       truncate_lines = true;
1252       if (! (col_sep_length == 1 && *col_sep_string == '\t'))
1253         untabify_input = true;
1254       tabify_output = true;
1255     }
1256   else
1257     storing_columns = false;
1258 
1259   /* -J dominates -w in any case */
1260   if (join_lines)
1261     truncate_lines = false;
1262 
1263   if (numbered_lines)
1264     {
1265       int chars_per_default_tab = 8;
1266 
1267       line_count = start_line_num;
1268 
1269       /* To allow input tab-expansion (-e sensitive) use:
1270          if (number_separator == input_tab_char)
1271            number_width = chars_per_number
1272              + TAB_WIDTH (chars_per_input_tab, chars_per_number);   */
1273 
1274       /* Estimate chars_per_text without any margin and keep it constant. */
1275       if (number_separator == '\t')
1276         number_width = (chars_per_number
1277                         + TAB_WIDTH (chars_per_default_tab, chars_per_number));
1278       else
1279         number_width = chars_per_number + 1;
1280 
1281       /* The number is part of the column width unless we are
1282          printing files in parallel. */
1283       if (parallel_files)
1284         chars_used_by_number = number_width;
1285     }
1286 
1287   int sep_chars, useful_chars;
1288   if (ckd_mul (&sep_chars, columns - 1, col_sep_length))
1289     sep_chars = INT_MAX;
1290   if (ckd_sub (&useful_chars, chars_per_line - chars_used_by_number,
1291                sep_chars))
1292     useful_chars = 0;
1293   chars_per_column = useful_chars / columns;
1294 
1295   if (chars_per_column < 1)
1296     error (EXIT_FAILURE, 0, _("page width too narrow"));
1297 
1298   if (numbered_lines)
1299     {
1300       free (number_buff);
1301       number_buff = xmalloc (MAX (chars_per_number,
1302                                   INT_STRLEN_BOUND (line_number)) + 1);
1303     }
1304 
1305   /* Pick the maximum between the tab width and the width of an
1306      escape sequence.
1307      The width of an escape sequence (4) isn't the lower limit any longer.
1308      We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
1309      to expand a tab which is not an input_tab-char. */
1310   free (clump_buff);
1311   clump_buff = xmalloc (MAX (8, chars_per_input_tab));
1312 }
1313 
1314 /* Open the necessary files,
1315    maintaining a COLUMN structure for each column.
1316 
1317    With multiple files, each column p has a different p->fp.
1318    With single files, each column p has the same p->fp.
1319    Return false if (number_of_files > 0) and no files can be opened,
1320    true otherwise.
1321 
1322    With each column/file p, p->full_page_printed is initialized,
1323    see also open_file.  */
1324 
1325 static bool
init_fps(int number_of_files,char ** av)1326 init_fps (int number_of_files, char **av)
1327 {
1328   COLUMN *p;
1329 
1330   total_files = 0;
1331 
1332   free (column_vector);
1333   column_vector = xnmalloc (columns, sizeof (COLUMN));
1334 
1335   if (parallel_files)
1336     {
1337       int files_left = number_of_files;
1338       for (p = column_vector; files_left--; ++p, ++av)
1339         {
1340           if (! open_file (*av, p))
1341             {
1342               --p;
1343               --columns;
1344             }
1345         }
1346       if (columns == 0)
1347         return false;
1348       init_header ("", -1);
1349     }
1350   else
1351     {
1352       p = column_vector;
1353       if (number_of_files > 0)
1354         {
1355           if (! open_file (*av, p))
1356             return false;
1357           init_header (*av, fileno (p->fp));
1358           p->lines_stored = 0;
1359         }
1360       else
1361         {
1362           p->name = _("standard input");
1363           p->fp = stdin;
1364           have_read_stdin = true;
1365           p->status = OPEN;
1366           p->full_page_printed = false;
1367           ++total_files;
1368           init_header ("", -1);
1369           p->lines_stored = 0;
1370         }
1371 
1372       char const *firstname = p->name;
1373       FILE *firstfp = p->fp;
1374       int i;
1375       for (i = columns - 1, ++p; i; --i, ++p)
1376         {
1377           p->name = firstname;
1378           p->fp = firstfp;
1379           p->status = OPEN;
1380           p->full_page_printed = false;
1381           p->lines_stored = 0;
1382         }
1383     }
1384   files_ready_to_read = total_files;
1385   return true;
1386 }
1387 
1388 /* Determine print_func and char_func, the functions
1389    used by each column for printing and/or storing.
1390 
1391    Determine the horizontal position desired when we begin
1392    printing a column (p->start_position). */
1393 
1394 static void
init_funcs(void)1395 init_funcs (void)
1396 {
1397   int i, h, h_next;
1398   COLUMN *p;
1399 
1400   h = chars_per_margin;
1401 
1402   if (!truncate_lines)
1403     h_next = ANYWHERE;
1404   else
1405     {
1406       /* When numbering lines of parallel files, we enlarge the
1407          first column to accommodate the number.  Looks better than
1408          the Sys V approach. */
1409       if (parallel_files && numbered_lines)
1410         h_next = h + chars_per_column + number_width;
1411       else
1412         h_next = h + chars_per_column;
1413     }
1414 
1415   /* Enlarge p->start_position of first column to use the same form of
1416      padding_not_printed with all columns. */
1417   h = h + col_sep_length;
1418 
1419   /* This loop takes care of all but the rightmost column. */
1420 
1421   for (p = column_vector, i = 1; i < columns; ++p, ++i)
1422     {
1423       if (storing_columns)	/* One file, multi columns down. */
1424         {
1425           p->char_func = store_char;
1426           p->print_func = print_stored;
1427         }
1428       else
1429         /* One file, multi columns across; or parallel files.  */
1430         {
1431           p->char_func = print_char;
1432           p->print_func = read_line;
1433         }
1434 
1435       /* Number only the first column when printing files in
1436          parallel. */
1437       p->numbered = numbered_lines && (!parallel_files || i == 1);
1438       p->start_position = h;
1439 
1440       /* If we don't truncate lines, all start_positions are
1441          ANYWHERE, except the first column's start_position when
1442          using a margin. */
1443 
1444       if (!truncate_lines)
1445         {
1446           h = ANYWHERE;
1447           h_next = ANYWHERE;
1448         }
1449       else
1450         {
1451           h = h_next + col_sep_length;
1452           h_next = h + chars_per_column;
1453         }
1454     }
1455 
1456   /* The rightmost column.
1457 
1458      Doesn't need to be stored unless we intend to balance
1459      columns on the last page. */
1460   if (storing_columns && balance_columns)
1461     {
1462       p->char_func = store_char;
1463       p->print_func = print_stored;
1464     }
1465   else
1466     {
1467       p->char_func = print_char;
1468       p->print_func = read_line;
1469     }
1470 
1471   p->numbered = numbered_lines && (!parallel_files || i == 1);
1472   p->start_position = h;
1473 }
1474 
1475 /* Open a file.  Return true if successful.
1476 
1477    With each file p, p->full_page_printed is initialized,
1478    see also init_fps. */
1479 
1480 static bool
open_file(char * name,COLUMN * p)1481 open_file (char *name, COLUMN *p)
1482 {
1483   if (STREQ (name, "-"))
1484     {
1485       p->name = _("standard input");
1486       p->fp = stdin;
1487       have_read_stdin = true;
1488     }
1489   else
1490     {
1491       p->name = name;
1492       p->fp = fopen (name, "r");
1493     }
1494   if (p->fp == nullptr)
1495     {
1496       failed_opens = true;
1497       if (!ignore_failed_opens)
1498         error (0, errno, "%s", quotef (name));
1499       return false;
1500     }
1501   fadvise (p->fp, FADVISE_SEQUENTIAL);
1502   p->status = OPEN;
1503   p->full_page_printed = false;
1504   ++total_files;
1505   return true;
1506 }
1507 
1508 /* Close the file in P.
1509 
1510    If we aren't dealing with multiple files in parallel, we change
1511    the status of all columns in the column list to reflect the close. */
1512 
1513 static void
close_file(COLUMN * p)1514 close_file (COLUMN *p)
1515 {
1516   COLUMN *q;
1517   int i;
1518 
1519   if (p->status == CLOSED)
1520     return;
1521 
1522   int err = errno;
1523   if (!ferror (p->fp))
1524     err = 0;
1525   if (fileno (p->fp) == STDIN_FILENO)
1526     clearerr (p->fp);
1527   else if (fclose (p->fp) != 0 && !err)
1528     err = errno;
1529   if (err)
1530     error (EXIT_FAILURE, err, "%s", quotef (p->name));
1531 
1532   if (!parallel_files)
1533     {
1534       for (q = column_vector, i = columns; i; ++q, --i)
1535         {
1536           q->status = CLOSED;
1537           if (q->lines_stored == 0)
1538             {
1539               q->lines_to_print = 0;
1540             }
1541         }
1542     }
1543   else
1544     {
1545       p->status = CLOSED;
1546       p->lines_to_print = 0;
1547     }
1548 
1549   --files_ready_to_read;
1550 }
1551 
1552 /* Put a file on hold until we start a new page,
1553    since we've hit a form feed.
1554 
1555    If we aren't dealing with parallel files, we must change the
1556    status of all columns in the column list. */
1557 
1558 static void
hold_file(COLUMN * p)1559 hold_file (COLUMN *p)
1560 {
1561   COLUMN *q;
1562   int i;
1563 
1564   if (!parallel_files)
1565     for (q = column_vector, i = columns; i; ++q, --i)
1566       {
1567         if (storing_columns)
1568           q->status = FF_FOUND;
1569         else
1570           q->status = ON_HOLD;
1571       }
1572   else
1573     p->status = ON_HOLD;
1574 
1575   p->lines_to_print = 0;
1576   --files_ready_to_read;
1577 }
1578 
1579 /* Undo hold_file -- go through the column list and change any
1580    ON_HOLD columns to OPEN.  Used at the end of each page. */
1581 
1582 static void
reset_status(void)1583 reset_status (void)
1584 {
1585   int i = columns;
1586   COLUMN *p;
1587 
1588   for (p = column_vector; i; --i, ++p)
1589     if (p->status == ON_HOLD)
1590       {
1591         p->status = OPEN;
1592         files_ready_to_read++;
1593       }
1594 
1595   if (storing_columns)
1596     {
1597       if (column_vector->status == CLOSED)
1598         /* We use the info to output an error message in  skip_to_page. */
1599         files_ready_to_read = 0;
1600       else
1601         files_ready_to_read = 1;
1602     }
1603 }
1604 
1605 /* Print a single file, or multiple files in parallel.
1606 
1607    Set up the list of columns, opening the necessary files.
1608    Allocate space for storing columns, if necessary.
1609    Skip to first_page_number, if user has asked to skip leading pages.
1610    Determine which functions are appropriate to store/print lines
1611    in each column.
1612    Print the file(s). */
1613 
1614 static void
print_files(int number_of_files,char ** av)1615 print_files (int number_of_files, char **av)
1616 {
1617   init_parameters (number_of_files);
1618   if (! init_fps (number_of_files, av))
1619     return;
1620   if (storing_columns)
1621     init_store_cols ();
1622 
1623   if (first_page_number > 1)
1624     {
1625       if (!skip_to_page (first_page_number))
1626         return;
1627       else
1628         page_number = first_page_number;
1629     }
1630   else
1631     page_number = 1;
1632 
1633   init_funcs ();
1634 
1635   line_number = line_count;
1636   while (print_page ())
1637     ;
1638 }
1639 
1640 /* Initialize header information.
1641    If DESC is non-negative, it is a file descriptor open to
1642    FILENAME for reading.  */
1643 
1644 static void
init_header(char const * filename,int desc)1645 init_header (char const *filename, int desc)
1646 {
1647   char *buf = nullptr;
1648   struct stat st;
1649   struct timespec t;
1650   int ns;
1651   struct tm tm;
1652 
1653   /* If parallel files or standard input, use current date. */
1654   if (STREQ (filename, "-"))
1655     desc = -1;
1656   if (0 <= desc && fstat (desc, &st) == 0)
1657     t = get_stat_mtime (&st);
1658   else
1659     {
1660       static struct timespec timespec;
1661       if (! timespec.tv_sec)
1662         gettime (&timespec);
1663       t = timespec;
1664     }
1665 
1666   ns = t.tv_nsec;
1667   if (localtime_rz (localtz, &t.tv_sec, &tm))
1668     {
1669       size_t bufsize
1670         = nstrftime (nullptr, SIZE_MAX, date_format, &tm, localtz, ns) + 1;
1671       buf = xmalloc (bufsize);
1672       nstrftime (buf, bufsize, date_format, &tm, localtz, ns);
1673     }
1674   else
1675     {
1676       char secbuf[INT_BUFSIZE_BOUND (intmax_t)];
1677       buf = xmalloc (sizeof secbuf + MAX (10, INT_BUFSIZE_BOUND (int)));
1678       sprintf (buf, "%s.%09d", timetostr (t.tv_sec, secbuf), ns);
1679     }
1680 
1681   free (date_text);
1682   date_text = buf;
1683   file_text = custom_header ? custom_header : desc < 0 ? "" : filename;
1684   header_width_available = (chars_per_line
1685                             - mbswidth (date_text, 0)
1686                             - mbswidth (file_text, 0));
1687 }
1688 
1689 /* Set things up for printing a page
1690 
1691    Scan through the columns ...
1692    Determine which are ready to print
1693    (i.e., which have lines stored or open files)
1694    Set p->lines_to_print appropriately
1695    (to p->lines_stored if we're storing, or lines_per_body
1696    if we're reading straight from the file)
1697    Keep track of this total so we know when to stop printing */
1698 
1699 static void
init_page(void)1700 init_page (void)
1701 {
1702   int j;
1703   COLUMN *p;
1704 
1705   if (storing_columns)
1706     {
1707       store_columns ();
1708       for (j = columns - 1, p = column_vector; j; --j, ++p)
1709         {
1710           p->lines_to_print = p->lines_stored;
1711         }
1712 
1713       /* Last column. */
1714       if (balance_columns)
1715         {
1716           p->lines_to_print = p->lines_stored;
1717         }
1718       /* Since we're not balancing columns, we don't need to store
1719          the rightmost column.   Read it straight from the file. */
1720       else
1721         {
1722           if (p->status == OPEN)
1723             {
1724               p->lines_to_print = lines_per_body;
1725             }
1726           else
1727             p->lines_to_print = 0;
1728         }
1729     }
1730   else
1731     for (j = columns, p = column_vector; j; --j, ++p)
1732       if (p->status == OPEN)
1733         {
1734           p->lines_to_print = lines_per_body;
1735         }
1736       else
1737         p->lines_to_print = 0;
1738 }
1739 
1740 /* Align empty columns and print separators.
1741    Empty columns will be formed by files with status ON_HOLD or CLOSED
1742    when printing multiple files in parallel. */
1743 
1744 static void
align_column(COLUMN * p)1745 align_column (COLUMN *p)
1746 {
1747   padding_not_printed = p->start_position;
1748   if (col_sep_length < padding_not_printed)
1749     {
1750       pad_across_to (padding_not_printed - col_sep_length);
1751       padding_not_printed = ANYWHERE;
1752     }
1753 
1754   if (use_col_separator)
1755     print_sep_string ();
1756 
1757   if (p->numbered)
1758     add_line_number (p);
1759 }
1760 
1761 /* Print one page.
1762 
1763    As long as there are lines left on the page and columns ready to print,
1764    Scan across the column list
1765    if the column has stored lines or the file is open
1766    pad to the appropriate spot
1767    print the column
1768    pad the remainder of the page with \n or \f as requested
1769    reset the status of all files -- any files which where on hold because
1770    of formfeeds are now put back into the lineup. */
1771 
1772 static bool
print_page(void)1773 print_page (void)
1774 {
1775   int j;
1776   int lines_left_on_page;
1777   COLUMN *p;
1778 
1779   /* Used as an accumulator (with | operator) of successive values of
1780      pad_vertically.  The trick is to set pad_vertically
1781      to false before each run through the inner loop, then after that
1782      loop, it tells us whether a line was actually printed (whether a
1783      newline needs to be output -- or two for double spacing).  But those
1784      values have to be accumulated (in pv) so we can invoke pad_down
1785      properly after the outer loop completes. */
1786   bool pv;
1787 
1788   init_page ();
1789 
1790   if (cols_ready_to_print () == 0)
1791     return false;
1792 
1793   if (extremities)
1794     print_a_header = true;
1795 
1796   /* Don't pad unless we know a page was printed. */
1797   pad_vertically = false;
1798   pv = false;
1799 
1800   lines_left_on_page = lines_per_body;
1801   if (double_space)
1802     lines_left_on_page *= 2;
1803 
1804   while (lines_left_on_page > 0 && cols_ready_to_print () > 0)
1805     {
1806       output_position = 0;
1807       spaces_not_printed = 0;
1808       separators_not_printed = 0;
1809       pad_vertically = false;
1810       align_empty_cols = false;
1811       empty_line = true;
1812 
1813       for (j = 1, p = column_vector; j <= columns; ++j, ++p)
1814         {
1815           input_position = 0;
1816           if (p->lines_to_print > 0 || p->status == FF_FOUND)
1817             {
1818               FF_only = false;
1819               padding_not_printed = p->start_position;
1820               if (!(p->print_func) (p))
1821                 read_rest_of_line (p);
1822               pv |= pad_vertically;
1823 
1824               --p->lines_to_print;
1825               if (p->lines_to_print <= 0)
1826                 {
1827                   if (cols_ready_to_print () == 0)
1828                     break;
1829                 }
1830 
1831               /* File p changed its status to ON_HOLD or CLOSED */
1832               if (parallel_files && p->status != OPEN)
1833                 {
1834                   if (empty_line)
1835                     align_empty_cols = true;
1836                   else if (p->status == CLOSED
1837                            || (p->status == ON_HOLD && FF_only))
1838                     align_column (p);
1839                 }
1840             }
1841           else if (parallel_files)
1842             {
1843               /* File status ON_HOLD or CLOSED */
1844               if (empty_line)
1845                 align_empty_cols = true;
1846               else
1847                 align_column (p);
1848             }
1849 
1850           /* We need it also with an empty column */
1851           if (use_col_separator)
1852             ++separators_not_printed;
1853         }
1854 
1855       if (pad_vertically)
1856         {
1857           putchar ('\n');
1858           --lines_left_on_page;
1859         }
1860 
1861       if (cols_ready_to_print () == 0 && !extremities)
1862         break;
1863 
1864       if (double_space && pv)
1865         {
1866           putchar ('\n');
1867           --lines_left_on_page;
1868         }
1869     }
1870 
1871   if (lines_left_on_page == 0)
1872     for (j = 1, p = column_vector; j <= columns; ++j, ++p)
1873       if (p->status == OPEN)
1874         p->full_page_printed = true;
1875 
1876   pad_vertically = pv;
1877 
1878   if (pad_vertically && extremities)
1879     pad_down (lines_left_on_page + lines_per_footer);
1880   else if (keep_FF && print_a_FF)
1881     {
1882       putchar ('\f');
1883       print_a_FF = false;
1884     }
1885 
1886   if (last_page_number < ++page_number)
1887     return false;		/* Stop printing with LAST_PAGE */
1888 
1889   reset_status ();		/* Change ON_HOLD to OPEN. */
1890 
1891   return true;			/* More pages to go. */
1892 }
1893 
1894 /* Allocate space for storing columns.
1895 
1896    This is necessary when printing multiple columns from a single file.
1897    Lines are stored consecutively in buff, separated by '\0'.
1898 
1899    The following doesn't apply any longer - any tuning possible?
1900    (We can't use a fixed offset since with the '-s' flag lines aren't
1901    truncated.)
1902 
1903    We maintain a list (line_vector) of pointers to the beginnings
1904    of lines in buff.  We allocate one more than the number of lines
1905    because the last entry tells us the index of the last character,
1906    which we need to know in order to print the last line in buff. */
1907 
1908 static void
init_store_cols(void)1909 init_store_cols (void)
1910 {
1911   int total_lines, total_lines_1, chars_per_column_1, chars_if_truncate;
1912   if (ckd_mul (&total_lines, lines_per_body, columns)
1913       || ckd_add (&total_lines_1, total_lines, 1)
1914       || ckd_add (&chars_per_column_1, chars_per_column, 1)
1915       || ckd_mul (&chars_if_truncate, total_lines, chars_per_column_1))
1916     integer_overflow ();
1917 
1918   free (line_vector);
1919   /* FIXME: here's where it was allocated.  */
1920   line_vector = xnmalloc (total_lines_1, sizeof *line_vector);
1921 
1922   free (end_vector);
1923   end_vector = xnmalloc (total_lines, sizeof *end_vector);
1924 
1925   free (buff);
1926   buff = xnmalloc (chars_if_truncate, use_col_separator + 1);
1927   buff_allocated = chars_if_truncate;  /* Tune this. */
1928   buff_allocated *= use_col_separator + 1;
1929 }
1930 
1931 /* Store all but the rightmost column.
1932    (Used when printing a single file in multiple downward columns)
1933 
1934    For each column
1935    set p->current_line to be the index in line_vector of the
1936    first line in the column
1937    For each line in the column
1938    store the line in buff
1939    add to line_vector the index of the line's first char
1940    buff_start is the index in buff of the first character in the
1941    current line. */
1942 
1943 static void
store_columns(void)1944 store_columns (void)
1945 {
1946   int i, j;
1947   unsigned int line = 0;
1948   unsigned int buff_start;
1949   int last_col;		/* The rightmost column which will be saved in buff */
1950   COLUMN *p;
1951 
1952   buff_current = 0;
1953   buff_start = 0;
1954 
1955   if (balance_columns)
1956     last_col = columns;
1957   else
1958     last_col = columns - 1;
1959 
1960   for (i = 1, p = column_vector; i <= last_col; ++i, ++p)
1961     p->lines_stored = 0;
1962 
1963   for (i = 1, p = column_vector; i <= last_col && files_ready_to_read;
1964        ++i, ++p)
1965     {
1966       p->current_line = line;
1967       for (j = lines_per_body; j && files_ready_to_read; --j)
1968 
1969         if (p->status == OPEN)	/* Redundant.  Clean up. */
1970           {
1971             input_position = 0;
1972 
1973             if (!read_line (p))
1974               read_rest_of_line (p);
1975 
1976             if (p->status == OPEN
1977                 || buff_start != buff_current)
1978               {
1979                 ++p->lines_stored;
1980                 line_vector[line] = buff_start;
1981                 end_vector[line++] = input_position;
1982                 buff_start = buff_current;
1983               }
1984           }
1985     }
1986 
1987   /* Keep track of the location of the last char in buff. */
1988   line_vector[line] = buff_start;
1989 
1990   if (balance_columns)
1991     balance (line);
1992 }
1993 
1994 static void
balance(int total_stored)1995 balance (int total_stored)
1996 {
1997   COLUMN *p;
1998   int i, lines;
1999   int first_line = 0;
2000 
2001   for (i = 1, p = column_vector; i <= columns; ++i, ++p)
2002     {
2003       lines = total_stored / columns;
2004       if (i <= total_stored % columns)
2005         ++lines;
2006 
2007       p->lines_stored = lines;
2008       p->current_line = first_line;
2009 
2010       first_line += lines;
2011     }
2012 }
2013 
2014 /* Store a character in the buffer. */
2015 
2016 static void
store_char(char c)2017 store_char (char c)
2018 {
2019   if (buff_current >= buff_allocated)
2020     {
2021       /* May be too generous. */
2022       buff = X2REALLOC (buff, &buff_allocated);
2023     }
2024   buff[buff_current++] = c;
2025 }
2026 
2027 static void
add_line_number(COLUMN * p)2028 add_line_number (COLUMN *p)
2029 {
2030   int i;
2031   char *s;
2032   int num_width;
2033 
2034   /* Cutting off the higher-order digits is more informative than
2035      lower-order cut off. */
2036   num_width = sprintf (number_buff, "%*d", chars_per_number, line_number);
2037   line_number++;
2038   s = number_buff + (num_width - chars_per_number);
2039   for (i = chars_per_number; i > 0; i--)
2040     (p->char_func) (*s++);
2041 
2042   if (columns > 1)
2043     {
2044       /* Tabification is assumed for multiple columns, also for n-separators,
2045          but 'default n-separator = TAB' hasn't been given priority over
2046          equal column_width also specified by POSIX. */
2047       if (number_separator == '\t')
2048         {
2049           i = number_width - chars_per_number;
2050           while (i-- > 0)
2051             (p->char_func) (' ');
2052         }
2053       else
2054         (p->char_func) (number_separator);
2055     }
2056   else
2057     /* To comply with POSIX, we avoid any expansion of default TAB
2058        separator with a single column output. No column_width requirement
2059        has to be considered. */
2060     {
2061       (p->char_func) (number_separator);
2062       if (number_separator == '\t')
2063         output_position = POS_AFTER_TAB (chars_per_output_tab,
2064                           output_position);
2065     }
2066 
2067   if (truncate_lines && !parallel_files)
2068     input_position += number_width;
2069 }
2070 
2071 /* Print (or store) padding until the current horizontal position
2072    is position. */
2073 
2074 static void
pad_across_to(int position)2075 pad_across_to (int position)
2076 {
2077   int h = output_position;
2078 
2079   if (tabify_output)
2080     spaces_not_printed = position - output_position;
2081   else
2082     {
2083       while (++h <= position)
2084         putchar (' ');
2085       output_position = position;
2086     }
2087 }
2088 
2089 /* Pad to the bottom of the page.
2090 
2091    If the user has requested a formfeed, use one.
2092    Otherwise, use newlines. */
2093 
2094 static void
pad_down(unsigned int lines)2095 pad_down (unsigned int lines)
2096 {
2097   if (use_form_feed)
2098     putchar ('\f');
2099   else
2100     for (unsigned int i = lines; i; --i)
2101       putchar ('\n');
2102 }
2103 
2104 /* Read the rest of the line.
2105 
2106    Read from the current column's file until an end of line is
2107    hit.  Used when we've truncated a line and we no longer need
2108    to print or store its characters. */
2109 
2110 static void
read_rest_of_line(COLUMN * p)2111 read_rest_of_line (COLUMN *p)
2112 {
2113   int c;
2114   FILE *f = p->fp;
2115 
2116   while ((c = getc (f)) != '\n')
2117     {
2118       if (c == '\f')
2119         {
2120           if ((c = getc (f)) != '\n')
2121             ungetc (c, f);
2122           if (keep_FF)
2123             print_a_FF = true;
2124           hold_file (p);
2125           break;
2126         }
2127       else if (c == EOF)
2128         {
2129           close_file (p);
2130           break;
2131         }
2132     }
2133 }
2134 
2135 /* Read a line with skip_to_page.
2136 
2137    Read from the current column's file until an end of line is
2138    hit.  Used when we read full lines to skip pages.
2139    With skip_to_page we have to check for FF-coincidence which is done
2140    in function read_line otherwise.
2141    Count lines of skipped pages to find the line number of 1st page
2142    printed relative to 1st line of input file (start_line_num). */
2143 
2144 static void
skip_read(COLUMN * p,int column_number)2145 skip_read (COLUMN *p, int column_number)
2146 {
2147   int c;
2148   FILE *f = p->fp;
2149   int i;
2150   bool single_ff = false;
2151   COLUMN *q;
2152 
2153   /* Read 1st character in a line or any character succeeding a FF */
2154   if ((c = getc (f)) == '\f' && p->full_page_printed)
2155     /* A FF-coincidence with a previous full_page_printed.
2156        To avoid an additional empty page, eliminate the FF */
2157     if ((c = getc (f)) == '\n')
2158       c = getc (f);
2159 
2160   p->full_page_printed = false;
2161 
2162   /* 1st character a FF means a single FF without any printable
2163      characters. Don't count it as a line with -n option. */
2164   if (c == '\f')
2165     single_ff = true;
2166 
2167   /* Preparing for a FF-coincidence: Maybe we finish that page
2168      without a FF found */
2169   if (last_line)
2170     p->full_page_printed = true;
2171 
2172   while (c != '\n')
2173     {
2174       if (c == '\f')
2175         {
2176           /* No FF-coincidence possible,
2177              no catching up of a FF-coincidence with next page */
2178           if (last_line)
2179             {
2180               if (!parallel_files)
2181                 for (q = column_vector, i = columns; i; ++q, --i)
2182                   q->full_page_printed = false;
2183               else
2184                 p->full_page_printed = false;
2185             }
2186 
2187           if ((c = getc (f)) != '\n')
2188             ungetc (c, f);
2189           hold_file (p);
2190           break;
2191         }
2192       else if (c == EOF)
2193         {
2194           close_file (p);
2195           break;
2196         }
2197       c = getc (f);
2198     }
2199 
2200   if (skip_count)
2201     if ((!parallel_files || column_number == 1) && !single_ff)
2202       ++line_count;
2203 }
2204 
2205 /* If we're tabifying output,
2206 
2207    When print_char encounters white space it keeps track
2208    of our desired horizontal position and delays printing
2209    until this function is called. */
2210 
2211 static void
print_white_space(void)2212 print_white_space (void)
2213 {
2214   int h_new;
2215   int h_old = output_position;
2216   int goal = h_old + spaces_not_printed;
2217 
2218   while (goal - h_old > 1
2219          && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
2220     {
2221       putchar (output_tab_char);
2222       h_old = h_new;
2223     }
2224   while (++h_old <= goal)
2225     putchar (' ');
2226 
2227   output_position = goal;
2228   spaces_not_printed = 0;
2229 }
2230 
2231 /* Print column separators.
2232 
2233    We keep a count until we know that we'll be printing a line,
2234    then print_sep_string() is called. */
2235 
2236 static void
print_sep_string(void)2237 print_sep_string (void)
2238 {
2239   char const *s = col_sep_string;
2240   int l = col_sep_length;
2241 
2242   if (separators_not_printed <= 0)
2243     {
2244       /* We'll be starting a line with chars_per_margin, anything else? */
2245       if (spaces_not_printed > 0)
2246         print_white_space ();
2247     }
2248   else
2249     {
2250       for (; separators_not_printed > 0; --separators_not_printed)
2251         {
2252           while (l-- > 0)
2253             {
2254               /* 3 types of sep_strings: spaces only, spaces and chars,
2255               chars only */
2256               if (*s == ' ')
2257                 {
2258                   /* We're tabifying output; consecutive spaces in
2259                   sep_string may have to be converted to tabs */
2260                   s++;
2261                   ++spaces_not_printed;
2262                 }
2263               else
2264                 {
2265                   if (spaces_not_printed > 0)
2266                     print_white_space ();
2267                   putchar (*s++);
2268                   ++output_position;
2269                 }
2270             }
2271           /* sep_string ends with some spaces */
2272           if (spaces_not_printed > 0)
2273             print_white_space ();
2274         }
2275     }
2276 }
2277 
2278 /* Print (or store, depending on p->char_func) a clump of N
2279    characters. */
2280 
2281 static void
print_clump(COLUMN * p,int n,char * clump)2282 print_clump (COLUMN *p, int n, char *clump)
2283 {
2284   while (n--)
2285     (p->char_func) (*clump++);
2286 }
2287 
2288 /* Print a character.
2289 
2290    Update the following comment: process-char hasn't been used any
2291    longer.
2292    If we're tabifying, all tabs have been converted to spaces by
2293    process_char().  Keep a count of consecutive spaces, and when
2294    a nonspace is encountered, call print_white_space() to print the
2295    required number of tabs and spaces. */
2296 
2297 static void
print_char(char c)2298 print_char (char c)
2299 {
2300   if (tabify_output)
2301     {
2302       if (c == ' ')
2303         {
2304           ++spaces_not_printed;
2305           return;
2306         }
2307       else if (spaces_not_printed > 0)
2308         print_white_space ();
2309 
2310       /* Nonprintables are assumed to have width 0, except '\b'. */
2311       if (! isprint (to_uchar (c)))
2312         {
2313           if (c == '\b')
2314             --output_position;
2315         }
2316       else
2317         ++output_position;
2318     }
2319   putchar (c);
2320 }
2321 
2322 /* Skip to page PAGE before printing.
2323    PAGE may be larger than total number of pages. */
2324 
2325 static bool
skip_to_page(uintmax_t page)2326 skip_to_page (uintmax_t page)
2327 {
2328   for (uintmax_t n = 1; n < page; ++n)
2329     {
2330       COLUMN *p;
2331       int j;
2332 
2333       for (int i = 1; i < lines_per_body; ++i)
2334         {
2335           for (j = 1, p = column_vector; j <= columns; ++j, ++p)
2336             if (p->status == OPEN)
2337               skip_read (p, j);
2338         }
2339       last_line = true;
2340       for (j = 1, p = column_vector; j <= columns; ++j, ++p)
2341         if (p->status == OPEN)
2342           skip_read (p, j);
2343 
2344       if (storing_columns)	/* change FF_FOUND to ON_HOLD */
2345         for (j = 1, p = column_vector; j <= columns; ++j, ++p)
2346           if (p->status != CLOSED)
2347             p->status = ON_HOLD;
2348 
2349       reset_status ();
2350       last_line = false;
2351 
2352       if (files_ready_to_read < 1)
2353         {
2354           /* It's very helpful, normally the total number of pages is
2355              not known in advance.  */
2356           error (0, 0,
2357                  _("starting page number %ju exceeds page count %ju"),
2358                  page, n);
2359           break;
2360         }
2361     }
2362   return files_ready_to_read > 0;
2363 }
2364 
2365 /* Print a header.
2366 
2367    Formfeeds are assumed to use up two lines at the beginning of
2368    the page. */
2369 
2370 static void
print_header(void)2371 print_header (void)
2372 {
2373   char page_text[256 + INT_STRLEN_BOUND (page_number)];
2374   int available_width;
2375   int lhs_spaces;
2376   int rhs_spaces;
2377 
2378   output_position = 0;
2379   pad_across_to (chars_per_margin);
2380   print_white_space ();
2381 
2382   if (page_number == 0)
2383     error (EXIT_FAILURE, 0, _("page number overflow"));
2384 
2385   /* The translator must ensure that formatting the translation of
2386      "Page %ju" does not generate more than (sizeof page_text - 1)
2387      bytes.  */
2388   sprintf (page_text, _("Page %ju"), page_number);
2389   available_width = header_width_available - mbswidth (page_text, 0);
2390   available_width = MAX (0, available_width);
2391   lhs_spaces = available_width >> 1;
2392   rhs_spaces = available_width - lhs_spaces;
2393 
2394   printf ("\n\n%*s%s%*s%s%*s%s\n\n\n",
2395           chars_per_margin, "",
2396           date_text, lhs_spaces, " ",
2397           file_text, rhs_spaces, " ", page_text);
2398 
2399   print_a_header = false;
2400   output_position = 0;
2401 }
2402 
2403 /* Print (or store, if p->char_func is store_char()) a line.
2404 
2405    Read a character to determine whether we have a line or not.
2406    (We may hit EOF, \n, or \f)
2407 
2408    Once we know we have a line,
2409    set pad_vertically = true, meaning it's safe
2410    to pad down at the end of the page, since we do have a page.
2411    print a header if needed.
2412    pad across to padding_not_printed if needed.
2413    print any separators which need to be printed.
2414    print a line number if it needs to be printed.
2415 
2416    Print the clump which corresponds to the first character.
2417 
2418    Enter a loop and keep printing until an end of line condition
2419    exists, or until we exceed chars_per_column.
2420 
2421    Return false if we exceed chars_per_column before reading
2422    an end of line character, true otherwise. */
2423 
2424 static bool
read_line(COLUMN * p)2425 read_line (COLUMN *p)
2426 {
2427   int c;
2428   int chars;
2429   int last_input_position;
2430   int j, k;
2431   COLUMN *q;
2432 
2433   /* read 1st character in each line or any character succeeding a FF: */
2434   c = getc (p->fp);
2435 
2436   last_input_position = input_position;
2437 
2438   if (c == '\f' && p->full_page_printed)
2439     if ((c = getc (p->fp)) == '\n')
2440       c = getc (p->fp);
2441   p->full_page_printed = false;
2442 
2443   switch (c)
2444     {
2445     case '\f':
2446       if ((c = getc (p->fp)) != '\n')
2447         ungetc (c, p->fp);
2448       FF_only = true;
2449       if (print_a_header && !storing_columns)
2450         {
2451           pad_vertically = true;
2452           print_header ();
2453         }
2454       else if (keep_FF)
2455         print_a_FF = true;
2456       hold_file (p);
2457       return true;
2458     case EOF:
2459       close_file (p);
2460       return true;
2461     case '\n':
2462       break;
2463     default:
2464       chars = char_to_clump (c);
2465     }
2466 
2467   if (truncate_lines && input_position > chars_per_column)
2468     {
2469       input_position = last_input_position;
2470       return false;
2471     }
2472 
2473   if (p->char_func != store_char)
2474     {
2475       pad_vertically = true;
2476 
2477       if (print_a_header && !storing_columns)
2478         print_header ();
2479 
2480       if (parallel_files && align_empty_cols)
2481         {
2482           /* We have to align empty columns at the beginning of a line. */
2483           k = separators_not_printed;
2484           separators_not_printed = 0;
2485           for (j = 1, q = column_vector; j <= k; ++j, ++q)
2486             {
2487               align_column (q);
2488               separators_not_printed += 1;
2489             }
2490           padding_not_printed = p->start_position;
2491           if (truncate_lines)
2492             spaces_not_printed = chars_per_column;
2493           else
2494             spaces_not_printed = 0;
2495           align_empty_cols = false;
2496         }
2497 
2498       if (col_sep_length < padding_not_printed)
2499         {
2500           pad_across_to (padding_not_printed - col_sep_length);
2501           padding_not_printed = ANYWHERE;
2502         }
2503 
2504       if (use_col_separator)
2505         print_sep_string ();
2506     }
2507 
2508   if (p->numbered)
2509     add_line_number (p);
2510 
2511   empty_line = false;
2512   if (c == '\n')
2513     return true;
2514 
2515   print_clump (p, chars, clump_buff);
2516 
2517   while (true)
2518     {
2519       c = getc (p->fp);
2520 
2521       switch (c)
2522         {
2523         case '\n':
2524           return true;
2525         case '\f':
2526           if ((c = getc (p->fp)) != '\n')
2527             ungetc (c, p->fp);
2528           if (keep_FF)
2529             print_a_FF = true;
2530           hold_file (p);
2531           return true;
2532         case EOF:
2533           close_file (p);
2534           return true;
2535         }
2536 
2537       last_input_position = input_position;
2538       chars = char_to_clump (c);
2539       if (truncate_lines && input_position > chars_per_column)
2540         {
2541           input_position = last_input_position;
2542           return false;
2543         }
2544 
2545       print_clump (p, chars, clump_buff);
2546     }
2547 }
2548 
2549 /* Print a line from buff.
2550 
2551    If this function has been called, we know we have "something to
2552    print". But it remains to be seen whether we have a real text page
2553    or an empty page (a single form feed) with/without a header only.
2554    Therefore first we set pad_vertically to true and print a header
2555    if necessary.
2556    If FF_FOUND and we are using -t|-T option we omit any newline by
2557    setting pad_vertically to false (see print_page).
2558    Otherwise we pad across if necessary, print separators if necessary
2559    and text of COLUMN *p.
2560 
2561    Return true, meaning there is no need to call read_rest_of_line. */
2562 
2563 static bool
print_stored(COLUMN * p)2564 print_stored (COLUMN *p)
2565 {
2566   COLUMN *q;
2567 
2568   int line = p->current_line++;
2569   char *first = &buff[line_vector[line]];
2570   /* FIXME
2571      UMR: Uninitialized memory read:
2572      * This is occurring while in:
2573      print_stored   [pr.c:2239]
2574      * Reading 4 bytes from 0x5148c in the heap.
2575      * Address 0x5148c is 4 bytes into a malloc'd block at 0x51488 of 676 bytes
2576      * This block was allocated from:
2577      malloc         [rtlib.o]
2578      xmalloc        [xmalloc.c:94]
2579      init_store_cols [pr.c:1648]
2580      */
2581   char *last = &buff[line_vector[line + 1]];
2582 
2583   pad_vertically = true;
2584 
2585   if (print_a_header)
2586     print_header ();
2587 
2588   if (p->status == FF_FOUND)
2589     {
2590       int i;
2591       for (i = 1, q = column_vector; i <= columns; ++i, ++q)
2592         q->status = ON_HOLD;
2593       if (column_vector->lines_to_print <= 0)
2594         {
2595           if (!extremities)
2596             pad_vertically = false;
2597           return true;		/* print a header only */
2598         }
2599     }
2600 
2601   if (col_sep_length < padding_not_printed)
2602     {
2603       pad_across_to (padding_not_printed - col_sep_length);
2604       padding_not_printed = ANYWHERE;
2605     }
2606 
2607   if (use_col_separator)
2608     print_sep_string ();
2609 
2610   while (first != last)
2611     print_char (*first++);
2612 
2613   if (spaces_not_printed == 0)
2614     {
2615       output_position = p->start_position + end_vector[line];
2616       if (p->start_position - col_sep_length == chars_per_margin)
2617         output_position -= col_sep_length;
2618     }
2619 
2620   return true;
2621 }
2622 
2623 /* Convert a character to the proper format and return the number of
2624    characters in the resulting clump.  Increment input_position by
2625    the width of the clump.
2626 
2627    Tabs are converted to clumps of spaces.
2628    Nonprintable characters may be converted to clumps of escape
2629    sequences or control prefixes.
2630 
2631    Note: the width of a clump is not necessarily equal to the number of
2632    characters in clump_buff.  (e.g., the width of '\b' is -1, while the
2633    number of characters is 1.) */
2634 
2635 static int
char_to_clump(char c)2636 char_to_clump (char c)
2637 {
2638   unsigned char uc = c;
2639   char *s = clump_buff;
2640   int i;
2641   char esc_buff[4];
2642   int width;
2643   int chars;
2644   int chars_per_c = 8;
2645 
2646   if (c == input_tab_char)
2647     chars_per_c = chars_per_input_tab;
2648 
2649   if (c == input_tab_char || c == '\t')
2650     {
2651       width = TAB_WIDTH (chars_per_c, input_position);
2652 
2653       if (untabify_input)
2654         {
2655           for (i = width; i; --i)
2656             *s++ = ' ';
2657           chars = width;
2658         }
2659       else
2660         {
2661           *s = c;
2662           chars = 1;
2663         }
2664 
2665     }
2666   else if (! isprint (uc))
2667     {
2668       if (use_esc_sequence)
2669         {
2670           width = 4;
2671           chars = 4;
2672           *s++ = '\\';
2673           sprintf (esc_buff, "%03o", uc);
2674           for (i = 0; i <= 2; ++i)
2675             *s++ = esc_buff[i];
2676         }
2677       else if (use_cntrl_prefix)
2678         {
2679           if (uc < 0200)
2680             {
2681               width = 2;
2682               chars = 2;
2683               *s++ = '^';
2684               *s = c ^ 0100;
2685             }
2686           else
2687             {
2688               width = 4;
2689               chars = 4;
2690               *s++ = '\\';
2691               sprintf (esc_buff, "%03o", uc);
2692               for (i = 0; i <= 2; ++i)
2693                 *s++ = esc_buff[i];
2694             }
2695         }
2696       else if (c == '\b')
2697         {
2698           width = -1;
2699           chars = 1;
2700           *s = c;
2701         }
2702       else
2703         {
2704           width = 0;
2705           chars = 1;
2706           *s = c;
2707         }
2708     }
2709   else
2710     {
2711       width = 1;
2712       chars = 1;
2713       *s = c;
2714     }
2715 
2716   /* Too many backspaces must put us in position 0 -- never negative.  */
2717   if (width < 0 && input_position == 0)
2718     {
2719       chars = 0;
2720       input_position = 0;
2721     }
2722   else if (width < 0 && input_position <= -width)
2723     input_position = 0;
2724   else
2725     input_position += width;
2726 
2727   return chars;
2728 }
2729 
2730 /* We've just printed some files and need to clean up things before
2731    looking for more options and printing the next batch of files.
2732 
2733    Free everything we've xmalloc'ed, except 'header'. */
2734 
2735 static void
cleanup(void)2736 cleanup (void)
2737 {
2738   free (number_buff);
2739   free (clump_buff);
2740   free (column_vector);
2741   free (line_vector);
2742   free (end_vector);
2743   free (buff);
2744 }
2745 
2746 /* Complain, print a usage message, and die. */
2747 
2748 void
usage(int status)2749 usage (int status)
2750 {
2751   if (status != EXIT_SUCCESS)
2752     emit_try_help ();
2753   else
2754     {
2755       printf (_("\
2756 Usage: %s [OPTION]... [FILE]...\n\
2757 "),
2758               program_name);
2759 
2760       fputs (_("\
2761 Paginate or columnate FILE(s) for printing.\n\
2762 "), stdout);
2763 
2764       emit_stdin_note ();
2765       emit_mandatory_arg_note ();
2766 
2767       fputs (_("\
2768   +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]\n\
2769                     begin [stop] printing with page FIRST_[LAST_]PAGE\n\
2770   -COLUMN, --columns=COLUMN\n\
2771                     output COLUMN columns and print columns down,\n\
2772                     unless -a is used. Balance number of lines in the\n\
2773                     columns on each page\n\
2774 "), stdout);
2775       fputs (_("\
2776   -a, --across      print columns across rather than down, used together\n\
2777                     with -COLUMN\n\
2778   -c, --show-control-chars\n\
2779                     use hat notation (^G) and octal backslash notation\n\
2780   -d, --double-space\n\
2781                     double space the output\n\
2782 "), stdout);
2783       fputs (_("\
2784   -D, --date-format=FORMAT\n\
2785                     use FORMAT for the header date\n\
2786   -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]\n\
2787                     expand input CHARs (TABs) to tab WIDTH (8)\n\
2788   -F, -f, --form-feed\n\
2789                     use form feeds instead of newlines to separate pages\n\
2790                     (by a 3-line page header with -F or a 5-line header\n\
2791                     and trailer without -F)\n\
2792 "), stdout);
2793       fputs (_("\
2794   -h, --header=HEADER\n\
2795                     use a centered HEADER instead of filename in page header,\n\
2796                     -h \"\" prints a blank line, don't use -h\"\"\n\
2797   -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]\n\
2798                     replace spaces with CHARs (TABs) to tab WIDTH (8)\n\
2799   -J, --join-lines  merge full lines, turns off -W line truncation, no column\n\
2800                     alignment, --sep-string[=STRING] sets separators\n\
2801 "), stdout);
2802       fputs (_("\
2803   -l, --length=PAGE_LENGTH\n\
2804                     set the page length to PAGE_LENGTH (66) lines\n\
2805                     (default number of lines of text 56, and with -F 63).\n\
2806                     implies -t if PAGE_LENGTH <= 10\n\
2807 "), stdout);
2808       fputs (_("\
2809   -m, --merge       print all files in parallel, one in each column,\n\
2810                     truncate lines, but join lines of full length with -J\n\
2811 "), stdout);
2812       fputs (_("\
2813   -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]\n\
2814                     number lines, use DIGITS (5) digits, then SEP (TAB),\n\
2815                     default counting starts with 1st line of input file\n\
2816   -N, --first-line-number=NUMBER\n\
2817                     start counting with NUMBER at 1st line of first\n\
2818                     page printed (see +FIRST_PAGE)\n\
2819 "), stdout);
2820       fputs (_("\
2821   -o, --indent=MARGIN\n\
2822                     offset each line with MARGIN (zero) spaces, do not\n\
2823                     affect -w or -W, MARGIN will be added to PAGE_WIDTH\n\
2824   -r, --no-file-warnings\n\
2825                     omit warning when a file cannot be opened\n\
2826 "), stdout);
2827       fputs (_("\
2828   -s[CHAR], --separator[=CHAR]\n\
2829                     separate columns by a single character, default for CHAR\n\
2830                     is the <TAB> character without -w and \'no char\' with -w.\
2831 \n\
2832                     -s[CHAR] turns off line truncation of all 3 column\n\
2833                     options (-COLUMN|-a -COLUMN|-m) except -w is set\n\
2834 "), stdout);
2835       fputs (_("\
2836   -S[STRING], --sep-string[=STRING]\n\
2837                     separate columns by STRING,\n\
2838                     without -S: Default separator <TAB> with -J and <space>\n\
2839                     otherwise (same as -S\" \"), no effect on column options\n\
2840 "), stdout);
2841       fputs (_("\
2842   -t, --omit-header  omit page headers and trailers;\n\
2843                      implied if PAGE_LENGTH <= 10\n\
2844 "), stdout);
2845       fputs (_("\
2846   -T, --omit-pagination\n\
2847                     omit page headers and trailers, eliminate any pagination\n\
2848                     by form feeds set in input files\n\
2849   -v, --show-nonprinting\n\
2850                     use octal backslash notation\n\
2851   -w, --width=PAGE_WIDTH\n\
2852                     set page width to PAGE_WIDTH (72) characters for\n\
2853                     multiple text-column output only, -s[char] turns off (72)\n\
2854 "), stdout);
2855       fputs (_("\
2856   -W, --page-width=PAGE_WIDTH\n\
2857                     set page width to PAGE_WIDTH (72) characters always,\n\
2858                     truncate lines, except -J option is set, no interference\n\
2859                     with -S or -s\n\
2860 "), stdout);
2861       fputs (HELP_OPTION_DESCRIPTION, stdout);
2862       fputs (VERSION_OPTION_DESCRIPTION, stdout);
2863       emit_ancillary_info (PROGRAM_NAME);
2864     }
2865   exit (status);
2866 }
2867