1 /* tac - concatenate and print files in reverse
2    Copyright (C) 1988-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18    GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
19 
20 /* Copy each FILE, or the standard input if none are given or when a
21    FILE name of "-" is encountered, to the standard output with the
22    order of the records reversed.  The records are separated by
23    instances of a string, or a newline if none is given.  By default, the
24    separator string is attached to the end of the record that it
25    follows in the file.
26 
27    Options:
28    -b, --before			The separator is attached to the beginning
29                                 of the record that it precedes in the file.
30    -r, --regex			The separator is a regular expression.
31    -s, --separator=separator	Use SEPARATOR as the record separator.
32 
33    To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
36 
37 #include <config.h>
38 
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43 
44 #include <regex.h>
45 
46 #include "filenamecat.h"
47 #include "full-read.h"
48 #include "safe-read.h"
49 #include "temp-stream.h"
50 #include "xbinary-io.h"
51 
52 /* The official name of this program (e.g., no 'g' prefix).  */
53 #define PROGRAM_NAME "tac"
54 
55 #define AUTHORS \
56   proper_name ("Jay Lepreau"), \
57   proper_name ("David MacKenzie")
58 
59 
60 /* The number of bytes per atomic read. */
61 #define INITIAL_READSIZE 8192
62 
63 /* The number of bytes per atomic write. */
64 #define WRITESIZE 8192
65 
66 /* The string that separates the records of the file. */
67 static char const *separator;
68 
69 /* True if we have ever read standard input.  */
70 static bool have_read_stdin = false;
71 
72 /* If true, print 'separator' along with the record preceding it
73    in the file; otherwise with the record following it. */
74 static bool separator_ends_record;
75 
76 /* 0 if 'separator' is to be matched as a regular expression;
77    otherwise, the length of 'separator', used as a sentinel to
78    stop the search. */
79 static size_t sentinel_length;
80 
81 /* The length of a match with 'separator'.  If 'sentinel_length' is 0,
82    'match_length' is computed every time a match succeeds;
83    otherwise, it is simply the length of 'separator'. */
84 static size_t match_length;
85 
86 /* The input buffer. */
87 static char *G_buffer;
88 
89 /* The number of bytes to read at once into 'buffer'. */
90 static size_t read_size;
91 
92 /* The size of 'buffer'.  This is read_size * 2 + sentinel_length + 2.
93    The extra 2 bytes allow 'past_end' to have a value beyond the
94    end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
95 static size_t G_buffer_size;
96 
97 /* The compiled regular expression representing 'separator'. */
98 static struct re_pattern_buffer compiled_separator;
99 static char compiled_separator_fastmap[UCHAR_MAX + 1];
100 static struct re_registers regs;
101 
102 static struct option const longopts[] =
103 {
104   {"before", no_argument, nullptr, 'b'},
105   {"regex", no_argument, nullptr, 'r'},
106   {"separator", required_argument, nullptr, 's'},
107   {GETOPT_HELP_OPTION_DECL},
108   {GETOPT_VERSION_OPTION_DECL},
109   {nullptr, 0, nullptr, 0}
110 };
111 
112 void
usage(int status)113 usage (int status)
114 {
115   if (status != EXIT_SUCCESS)
116     emit_try_help ();
117   else
118     {
119       printf (_("\
120 Usage: %s [OPTION]... [FILE]...\n\
121 "),
122               program_name);
123       fputs (_("\
124 Write each FILE to standard output, last line first.\n\
125 "), stdout);
126 
127       emit_stdin_note ();
128       emit_mandatory_arg_note ();
129 
130       fputs (_("\
131   -b, --before             attach the separator before instead of after\n\
132   -r, --regex              interpret the separator as a regular expression\n\
133   -s, --separator=STRING   use STRING as the separator instead of newline\n\
134 "), stdout);
135       fputs (HELP_OPTION_DESCRIPTION, stdout);
136       fputs (VERSION_OPTION_DESCRIPTION, stdout);
137       emit_ancillary_info (PROGRAM_NAME);
138     }
139   exit (status);
140 }
141 
142 /* Print the characters from START to PAST_END - 1.
143    If START is null, just flush the buffer. */
144 
145 static void
output(char const * start,char const * past_end)146 output (char const *start, char const *past_end)
147 {
148   static char buffer[WRITESIZE];
149   static size_t bytes_in_buffer = 0;
150   size_t bytes_to_add = past_end - start;
151   size_t bytes_available = WRITESIZE - bytes_in_buffer;
152 
153   if (start == 0)
154     {
155       fwrite (buffer, 1, bytes_in_buffer, stdout);
156       bytes_in_buffer = 0;
157       return;
158     }
159 
160   /* Write out as many full buffers as possible. */
161   while (bytes_to_add >= bytes_available)
162     {
163       memcpy (buffer + bytes_in_buffer, start, bytes_available);
164       bytes_to_add -= bytes_available;
165       start += bytes_available;
166       fwrite (buffer, 1, WRITESIZE, stdout);
167       bytes_in_buffer = 0;
168       bytes_available = WRITESIZE;
169     }
170 
171   memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
172   bytes_in_buffer += bytes_to_add;
173 }
174 
175 /* Print in reverse the file open on descriptor FD for reading FILE.
176    The file is already positioned at FILE_POS, which should be near its end.
177    Return true if successful.  */
178 
179 static bool
tac_seekable(int input_fd,char const * file,off_t file_pos)180 tac_seekable (int input_fd, char const *file, off_t file_pos)
181 {
182   /* Pointer to the location in 'G_buffer' where the search for
183      the next separator will begin. */
184   char *match_start;
185 
186   /* Pointer to one past the rightmost character in 'G_buffer' that
187      has not been printed yet. */
188   char *past_end;
189 
190   /* Length of the record growing in 'G_buffer'. */
191   size_t saved_record_size;
192 
193   /* True if 'output' has not been called yet for any file.
194      Only used when the separator is attached to the preceding record. */
195   bool first_time = true;
196   char first_char = *separator;	/* Speed optimization, non-regexp. */
197   char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
198   size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
199 
200   /* Arrange for the first read to lop off enough to leave the rest of the
201      file a multiple of 'read_size'.  Since 'read_size' can change, this may
202      not always hold during the program run, but since it usually will, leave
203      it here for i/o efficiency (page/sector boundaries and all that).
204      Note: the efficiency gain has not been verified. */
205   size_t remainder = file_pos % read_size;
206   if (remainder != 0)
207     {
208       file_pos -= remainder;
209       if (lseek (input_fd, file_pos, SEEK_SET) < 0)
210         error (0, errno, _("%s: seek failed"), quotef (file));
211     }
212 
213   /* Scan backward, looking for end of file.  This caters to proc-like
214      file systems where the file size is just an estimate.  */
215   while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
216          && file_pos != 0)
217     {
218       off_t rsize = read_size;
219       if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
220         error (0, errno, _("%s: seek failed"), quotef (file));
221       file_pos -= read_size;
222     }
223 
224   /* Now scan forward, looking for end of file.  */
225   while (saved_record_size == read_size)
226     {
227       size_t nread = safe_read (input_fd, G_buffer, read_size);
228       if (nread == 0)
229         break;
230       saved_record_size = nread;
231       if (saved_record_size == SAFE_READ_ERROR)
232         break;
233       file_pos += nread;
234     }
235 
236   if (saved_record_size == SAFE_READ_ERROR)
237     {
238       error (0, errno, _("%s: read error"), quotef (file));
239       return false;
240     }
241 
242   match_start = past_end = G_buffer + saved_record_size;
243   /* For non-regexp search, move past impossible positions for a match. */
244   if (sentinel_length)
245     match_start -= match_length1;
246 
247   while (true)
248     {
249       /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
250          with 'separator'; for speed, use strncmp if 'separator' contains no
251          metacharacters.
252          If the match succeeds, set 'match_start' to point to the start of
253          the match and 'match_length' to the length of the match.
254          Otherwise, make 'match_start' < 'G_buffer'. */
255       if (sentinel_length == 0)
256         {
257           size_t i = match_start - G_buffer;
258           regoff_t ri = i;
259           regoff_t range = 1 - ri;
260           regoff_t ret;
261 
262           if (1 < range)
263             error (EXIT_FAILURE, 0, _("record too large"));
264 
265           if (range == 1
266               || ((ret = re_search (&compiled_separator, G_buffer,
267                                     i, i - 1, range, &regs))
268                   == -1))
269             match_start = G_buffer - 1;
270           else if (ret == -2)
271             error (EXIT_FAILURE, 0,
272                    _("error in regular expression search"));
273           else
274             {
275               match_start = G_buffer + regs.start[0];
276               match_length = regs.end[0] - regs.start[0];
277             }
278         }
279       else
280         {
281           /* 'match_length' is constant for non-regexp boundaries. */
282           while (*--match_start != first_char
283                  || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
284                                                   match_length1)))
285             /* Do nothing. */ ;
286         }
287 
288       /* Check whether we backed off the front of 'G_buffer' without finding
289          a match for 'separator'. */
290       if (match_start < G_buffer)
291         {
292           if (file_pos == 0)
293             {
294               /* Hit the beginning of the file; print the remaining record. */
295               output (G_buffer, past_end);
296               return true;
297             }
298 
299           saved_record_size = past_end - G_buffer;
300           if (saved_record_size > read_size)
301             {
302               /* 'G_buffer_size' is about twice 'read_size', so since
303                  we want to read in another 'read_size' bytes before
304                  the data already in 'G_buffer', we need to increase
305                  'G_buffer_size'. */
306               char *newbuffer;
307               size_t offset = sentinel_length ? sentinel_length : 1;
308               size_t old_G_buffer_size = G_buffer_size;
309 
310               read_size *= 2;
311               G_buffer_size = read_size * 2 + sentinel_length + 2;
312               if (G_buffer_size < old_G_buffer_size)
313                 xalloc_die ();
314               newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
315               newbuffer += offset;
316               G_buffer = newbuffer;
317             }
318 
319           /* Back up to the start of the next bufferfull of the file.  */
320           if (file_pos >= read_size)
321             file_pos -= read_size;
322           else
323             {
324               read_size = file_pos;
325               file_pos = 0;
326             }
327           if (lseek (input_fd, file_pos, SEEK_SET) < 0)
328             error (0, errno, _("%s: seek failed"), quotef (file));
329 
330           /* Shift the pending record data right to make room for the new.
331              The source and destination regions probably overlap.  */
332           memmove (G_buffer + read_size, G_buffer, saved_record_size);
333           past_end = G_buffer + read_size + saved_record_size;
334           /* For non-regexp searches, avoid unnecessary scanning. */
335           if (sentinel_length)
336             match_start = G_buffer + read_size;
337           else
338             match_start = past_end;
339 
340           if (full_read (input_fd, G_buffer, read_size) != read_size)
341             {
342               error (0, errno, _("%s: read error"), quotef (file));
343               return false;
344             }
345         }
346       else
347         {
348           /* Found a match of 'separator'. */
349           if (separator_ends_record)
350             {
351               char *match_end = match_start + match_length;
352 
353               /* If this match of 'separator' isn't at the end of the
354                  file, print the record. */
355               if (!first_time || match_end != past_end)
356                 output (match_end, past_end);
357               past_end = match_end;
358               first_time = false;
359             }
360           else
361             {
362               output (match_start, past_end);
363               past_end = match_start;
364             }
365 
366           /* For non-regex matching, we can back up.  */
367           if (sentinel_length > 0)
368             match_start -= match_length - 1;
369         }
370     }
371 }
372 
373 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
374    a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
375    and file name.  Return the number of bytes copied, or -1 on error.  */
376 
377 static off_t
copy_to_temp(FILE ** g_tmp,char ** g_tempfile,int input_fd,char const * file)378 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
379 {
380   FILE *fp;
381   char *file_name;
382   uintmax_t bytes_copied = 0;
383   if (!temp_stream (&fp, &file_name))
384     return -1;
385 
386   while (true)
387     {
388       size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
389       if (bytes_read == 0)
390         break;
391       if (bytes_read == SAFE_READ_ERROR)
392         {
393           error (0, errno, _("%s: read error"), quotef (file));
394           return -1;
395         }
396 
397       if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
398         {
399           error (0, errno, _("%s: write error"), quotef (file_name));
400           return -1;
401         }
402 
403       /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
404          but unsigned type used to avoid compiler warnings
405          not aware of this fact.  */
406       bytes_copied += bytes_read;
407     }
408 
409   if (fflush (fp) != 0)
410     {
411       error (0, errno, _("%s: write error"), quotef (file_name));
412       return -1;
413     }
414 
415   *g_tmp = fp;
416   *g_tempfile = file_name;
417   return bytes_copied;
418 }
419 
420 /* Copy INPUT_FD to a temporary, then tac that file.
421    Return true if successful.  */
422 
423 static bool
tac_nonseekable(int input_fd,char const * file)424 tac_nonseekable (int input_fd, char const *file)
425 {
426   FILE *tmp_stream;
427   char *tmp_file;
428   off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
429   if (bytes_copied < 0)
430     return false;
431 
432   bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
433   return ok;
434 }
435 
436 /* Print FILE in reverse, copying it to a temporary
437    file first if it is not seekable.
438    Return true if successful.  */
439 
440 static bool
tac_file(char const * filename)441 tac_file (char const *filename)
442 {
443   bool ok;
444   off_t file_size;
445   int fd;
446   bool is_stdin = STREQ (filename, "-");
447 
448   if (is_stdin)
449     {
450       have_read_stdin = true;
451       fd = STDIN_FILENO;
452       filename = _("standard input");
453       xset_binary_mode (STDIN_FILENO, O_BINARY);
454     }
455   else
456     {
457       fd = open (filename, O_RDONLY | O_BINARY);
458       if (fd < 0)
459         {
460           error (0, errno, _("failed to open %s for reading"),
461                  quoteaf (filename));
462           return false;
463         }
464     }
465 
466   file_size = lseek (fd, 0, SEEK_END);
467 
468   ok = (file_size < 0 || isatty (fd)
469         ? tac_nonseekable (fd, filename)
470         : tac_seekable (fd, filename, file_size));
471 
472   if (!is_stdin && close (fd) != 0)
473     {
474       error (0, errno, _("%s: read error"), quotef (filename));
475       ok = false;
476     }
477   return ok;
478 }
479 
480 int
main(int argc,char ** argv)481 main (int argc, char **argv)
482 {
483   char const *error_message;	/* Return value from re_compile_pattern. */
484   int optc;
485   bool ok;
486   size_t half_buffer_size;
487 
488   /* Initializer for file_list if no file-arguments
489      were specified on the command line.  */
490   static char const *const default_file_list[] = {"-", nullptr};
491   char const *const *file;
492 
493   initialize_main (&argc, &argv);
494   set_program_name (argv[0]);
495   setlocale (LC_ALL, "");
496   bindtextdomain (PACKAGE, LOCALEDIR);
497   textdomain (PACKAGE);
498 
499   atexit (close_stdout);
500 
501   separator = "\n";
502   sentinel_length = 1;
503   separator_ends_record = true;
504 
505   while ((optc = getopt_long (argc, argv, "brs:", longopts, nullptr)) != -1)
506     {
507       switch (optc)
508         {
509         case 'b':
510           separator_ends_record = false;
511           break;
512         case 'r':
513           sentinel_length = 0;
514           break;
515         case 's':
516           separator = optarg;
517           break;
518         case_GETOPT_HELP_CHAR;
519         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
520         default:
521           usage (EXIT_FAILURE);
522         }
523     }
524 
525   if (sentinel_length == 0)
526     {
527       if (*separator == 0)
528         error (EXIT_FAILURE, 0, _("separator cannot be empty"));
529 
530       compiled_separator.buffer = nullptr;
531       compiled_separator.allocated = 0;
532       compiled_separator.fastmap = compiled_separator_fastmap;
533       compiled_separator.translate = nullptr;
534       error_message = re_compile_pattern (separator, strlen (separator),
535                                           &compiled_separator);
536       if (error_message)
537         error (EXIT_FAILURE, 0, "%s", (error_message));
538     }
539   else
540     match_length = sentinel_length = *separator ? strlen (separator) : 1;
541 
542   read_size = INITIAL_READSIZE;
543   while (sentinel_length >= read_size / 2)
544     {
545       if (SIZE_MAX / 2 < read_size)
546         xalloc_die ();
547       read_size *= 2;
548     }
549   half_buffer_size = read_size + sentinel_length + 1;
550   G_buffer_size = 2 * half_buffer_size;
551   if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
552     xalloc_die ();
553   G_buffer = xmalloc (G_buffer_size);
554   if (sentinel_length)
555     {
556       memcpy (G_buffer, separator, sentinel_length + 1);
557       G_buffer += sentinel_length;
558     }
559   else
560     {
561       ++G_buffer;
562     }
563 
564   file = (optind < argc
565           ? (char const *const *) &argv[optind]
566           : default_file_list);
567 
568   xset_binary_mode (STDOUT_FILENO, O_BINARY);
569 
570   {
571     ok = true;
572     for (size_t i = 0; file[i]; ++i)
573       ok &= tac_file (file[i]);
574   }
575 
576   /* Flush the output buffer. */
577   output ((char *) nullptr, (char *) nullptr);
578 
579   if (have_read_stdin && close (STDIN_FILENO) < 0)
580     {
581       error (0, errno, "-");
582       ok = false;
583     }
584 
585   main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
586 }
587