1 /* paste - merge lines of files
2    Copyright (C) 1997-2023 Free Software Foundation, Inc.
3    Copyright (C) 1984 David M. Ihnat
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 /* Written by David Ihnat.  */
19 
20 /* The list of valid escape sequences has been expanded over the Unix
21    version, to include \b, \f, \r, and \v.
22 
23    POSIX changes, bug fixes, long-named options, and cleanup
24    by David MacKenzie <djm@gnu.ai.mit.edu>.
25 
26    Options:
27    --serial
28    -s				Paste one file at a time rather than
29                                 one line from each file.
30    --delimiters=delim-list
31    -d delim-list		Consecutively use the characters in
32                                 DELIM-LIST instead of tab to separate
33                                 merged lines.  When DELIM-LIST is exhausted,
34                                 start again at its beginning.
35    A FILE of '-' means standard input.
36    If no FILEs are given, standard input is used. */
37 
38 #include <config.h>
39 
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "fadvise.h"
45 
46 /* The official name of this program (e.g., no 'g' prefix).  */
47 #define PROGRAM_NAME "paste"
48 
49 #define AUTHORS \
50   proper_name ("David M. Ihnat"), \
51   proper_name ("David MacKenzie")
52 
53 /* Indicates that no delimiter should be added in the current position. */
54 #define EMPTY_DELIM '\0'
55 
56 /* If nonzero, we have read standard input at some point. */
57 static bool have_read_stdin;
58 
59 /* If nonzero, merge subsequent lines of each file rather than
60    corresponding lines from each file in parallel. */
61 static bool serial_merge;
62 
63 /* The delimiters between lines of input files (used cyclically). */
64 static char *delims;
65 
66 /* A pointer to the character after the end of 'delims'. */
67 static char const *delim_end;
68 
69 static unsigned char line_delim = '\n';
70 
71 static struct option const longopts[] =
72 {
73   {"serial", no_argument, nullptr, 's'},
74   {"delimiters", required_argument, nullptr, 'd'},
75   {"zero-terminated", no_argument, nullptr, 'z'},
76   {GETOPT_HELP_OPTION_DECL},
77   {GETOPT_VERSION_OPTION_DECL},
78   {nullptr, 0, nullptr, 0}
79 };
80 
81 /* Set globals delims and delim_end.  Copy STRPTR to DELIMS, converting
82    backslash representations of special characters in STRPTR to their actual
83    values. The set of possible backslash characters has been expanded beyond
84    that recognized by the Unix version.
85    Return 0 upon success.
86    If the string ends in an odd number of backslashes, ignore the
87    final backslash and return nonzero.  */
88 
89 static int
collapse_escapes(char const * strptr)90 collapse_escapes (char const *strptr)
91 {
92   char *strout = xstrdup (strptr);
93   bool backslash_at_end = false;
94 
95   delims = strout;
96 
97   while (*strptr)
98     {
99       if (*strptr != '\\')	/* Is it an escape character? */
100         *strout++ = *strptr++;	/* No, just transfer it. */
101       else
102         {
103           switch (*++strptr)
104             {
105             case '0':
106               *strout++ = EMPTY_DELIM;
107               break;
108 
109             case 'b':
110               *strout++ = '\b';
111               break;
112 
113             case 'f':
114               *strout++ = '\f';
115               break;
116 
117             case 'n':
118               *strout++ = '\n';
119               break;
120 
121             case 'r':
122               *strout++ = '\r';
123               break;
124 
125             case 't':
126               *strout++ = '\t';
127               break;
128 
129             case 'v':
130               *strout++ = '\v';
131               break;
132 
133             case '\\':
134               *strout++ = '\\';
135               break;
136 
137             case '\0':
138               backslash_at_end = true;
139               goto done;
140 
141             default:
142               *strout++ = *strptr;
143               break;
144             }
145           strptr++;
146         }
147     }
148 
149  done:
150 
151   delim_end = strout;
152   return backslash_at_end ? 1 : 0;
153 }
154 
155 /* Output a single byte, reporting any write errors.  */
156 
157 static inline void
xputchar(char c)158 xputchar (char c)
159 {
160   if (putchar (c) < 0)
161     write_error ();
162 }
163 
164 /* Perform column paste on the NFILES files named in FNAMPTR.
165    Return true if successful, false if one or more files could not be
166    opened or read. */
167 
168 static bool
paste_parallel(size_t nfiles,char ** fnamptr)169 paste_parallel (size_t nfiles, char **fnamptr)
170 {
171   bool ok = true;
172   /* If all files are just ready to be closed, or will be on this
173      round, the string of delimiters must be preserved.
174      delbuf[0] through delbuf[nfiles]
175      store the delimiters for closed files. */
176   char *delbuf = xmalloc (nfiles + 2);
177 
178   /* Streams open to the files to process; null if the corresponding
179      stream is closed.  */
180   FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
181 
182   /* Number of files still open to process.  */
183   size_t files_open;
184 
185   /* True if any fopen got fd == STDIN_FILENO.  */
186   bool opened_stdin = false;
187 
188   /* Attempt to open all files.  This could be expanded to an infinite
189      number of files, but at the (considerable) expense of remembering
190      each file and its current offset, then opening/reading/closing.  */
191 
192   for (files_open = 0; files_open < nfiles; ++files_open)
193     {
194       if (STREQ (fnamptr[files_open], "-"))
195         {
196           have_read_stdin = true;
197           fileptr[files_open] = stdin;
198         }
199       else
200         {
201           fileptr[files_open] = fopen (fnamptr[files_open], "r");
202           if (fileptr[files_open] == nullptr)
203             error (EXIT_FAILURE, errno, "%s", quotef (fnamptr[files_open]));
204           else if (fileno (fileptr[files_open]) == STDIN_FILENO)
205             opened_stdin = true;
206           fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
207         }
208     }
209 
210   if (opened_stdin && have_read_stdin)
211     error (EXIT_FAILURE, 0, _("standard input is closed"));
212 
213   /* Read a line from each file and output it to stdout separated by a
214      delimiter, until we go through the loop without successfully
215      reading from any of the files. */
216 
217   while (files_open)
218     {
219       /* Set up for the next line. */
220       bool somedone = false;
221       char const *delimptr = delims;
222       size_t delims_saved = 0;	/* Number of delims saved in 'delbuf'. */
223 
224       for (size_t i = 0; i < nfiles && files_open; i++)
225         {
226           int chr;			/* Input character. */
227           int err;			/* Input errno value.  */
228           bool sometodo = false;	/* Input chars to process.  */
229 
230           if (fileptr[i])
231             {
232               chr = getc (fileptr[i]);
233               err = errno;
234               if (chr != EOF && delims_saved)
235                 {
236                   if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
237                     write_error ();
238                   delims_saved = 0;
239                 }
240 
241               while (chr != EOF)
242                 {
243                   sometodo = true;
244                   if (chr == line_delim)
245                     break;
246                   xputchar (chr);
247                   chr = getc (fileptr[i]);
248                   err = errno;
249                 }
250             }
251 
252           if (! sometodo)
253             {
254               /* EOF, read error, or closed file.
255                  If an EOF or error, close the file.  */
256               if (fileptr[i])
257                 {
258                   if (!ferror (fileptr[i]))
259                     err = 0;
260                   if (fileptr[i] == stdin)
261                     clearerr (fileptr[i]); /* Also clear EOF. */
262                   else if (fclose (fileptr[i]) == EOF && !err)
263                     err = errno;
264                   if (err)
265                     {
266                       error (0, err, "%s", quotef (fnamptr[i]));
267                       ok = false;
268                     }
269 
270                   fileptr[i] = nullptr;
271                   files_open--;
272                 }
273 
274               if (i + 1 == nfiles)
275                 {
276                   /* End of this output line.
277                      Is this the end of the whole thing? */
278                   if (somedone)
279                     {
280                       /* No.  Some files were not closed for this line. */
281                       if (delims_saved)
282                         {
283                           if (fwrite (delbuf, 1, delims_saved, stdout)
284                               != delims_saved)
285                             write_error ();
286                           delims_saved = 0;
287                         }
288                       xputchar (line_delim);
289                     }
290                   continue;	/* Next read of files, or exit. */
291                 }
292               else
293                 {
294                   /* Closed file; add delimiter to 'delbuf'. */
295                   if (*delimptr != EMPTY_DELIM)
296                     delbuf[delims_saved++] = *delimptr;
297                   if (++delimptr == delim_end)
298                     delimptr = delims;
299                 }
300             }
301           else
302             {
303               /* Some data read. */
304               somedone = true;
305 
306               /* Except for last file, replace last newline with delim. */
307               if (i + 1 != nfiles)
308                 {
309                   if (chr != line_delim && chr != EOF)
310                     xputchar (chr);
311                   if (*delimptr != EMPTY_DELIM)
312                     xputchar (*delimptr);
313                   if (++delimptr == delim_end)
314                     delimptr = delims;
315                 }
316               else
317                 {
318                   /* If the last line of the last file lacks a newline,
319                      print one anyhow.  POSIX requires this.  */
320                   char c = (chr == EOF ? line_delim : chr);
321                   xputchar (c);
322                 }
323             }
324         }
325     }
326   free (fileptr);
327   free (delbuf);
328   return ok;
329 }
330 
331 /* Perform serial paste on the NFILES files named in FNAMPTR.
332    Return true if no errors, false if one or more files could not be
333    opened or read. */
334 
335 static bool
paste_serial(size_t nfiles,char ** fnamptr)336 paste_serial (size_t nfiles, char **fnamptr)
337 {
338   bool ok = true;	/* false if open or read errors occur. */
339   int charnew, charold; /* Current and previous char read. */
340   char const *delimptr;	/* Current delimiter char. */
341   FILE *fileptr;	/* Open for reading current file. */
342 
343   for (; nfiles; nfiles--, fnamptr++)
344     {
345       int saved_errno;
346       bool is_stdin = STREQ (*fnamptr, "-");
347       if (is_stdin)
348         {
349           have_read_stdin = true;
350           fileptr = stdin;
351         }
352       else
353         {
354           fileptr = fopen (*fnamptr, "r");
355           if (fileptr == nullptr)
356             {
357               error (0, errno, "%s", quotef (*fnamptr));
358               ok = false;
359               continue;
360             }
361           fadvise (fileptr, FADVISE_SEQUENTIAL);
362         }
363 
364       delimptr = delims;	/* Set up for delimiter string. */
365 
366       charold = getc (fileptr);
367       saved_errno = errno;
368       if (charold != EOF)
369         {
370           /* 'charold' is set up.  Hit it!
371              Keep reading characters, stashing them in 'charnew';
372              output 'charold', converting to the appropriate delimiter
373              character if needed.  After the EOF, output 'charold'
374              if it's a newline; otherwise, output it and then a newline. */
375 
376           while ((charnew = getc (fileptr)) != EOF)
377             {
378               /* Process the old character. */
379               if (charold == line_delim)
380                 {
381                   if (*delimptr != EMPTY_DELIM)
382                     xputchar (*delimptr);
383 
384                   if (++delimptr == delim_end)
385                     delimptr = delims;
386                 }
387               else
388                 xputchar (charold);
389 
390               charold = charnew;
391             }
392           saved_errno = errno;
393 
394           /* Hit EOF.  Process that last character. */
395           xputchar (charold);
396         }
397 
398       if (charold != line_delim)
399         xputchar (line_delim);
400 
401       if (!ferror (fileptr))
402         saved_errno = 0;
403       if (is_stdin)
404         clearerr (fileptr);	/* Also clear EOF. */
405       else if (fclose (fileptr) != 0 && !saved_errno)
406         saved_errno = errno;
407       if (saved_errno)
408         {
409           error (0, saved_errno, "%s", quotef (*fnamptr));
410           ok = false;
411         }
412     }
413   return ok;
414 }
415 
416 void
usage(int status)417 usage (int status)
418 {
419   if (status != EXIT_SUCCESS)
420     emit_try_help ();
421   else
422     {
423       printf (_("\
424 Usage: %s [OPTION]... [FILE]...\n\
425 "),
426               program_name);
427       fputs (_("\
428 Write lines consisting of the sequentially corresponding lines from\n\
429 each FILE, separated by TABs, to standard output.\n\
430 "), stdout);
431 
432       emit_stdin_note ();
433       emit_mandatory_arg_note ();
434 
435       fputs (_("\
436   -d, --delimiters=LIST   reuse characters from LIST instead of TABs\n\
437   -s, --serial            paste one file at a time instead of in parallel\n\
438 "), stdout);
439       fputs (_("\
440   -z, --zero-terminated    line delimiter is NUL, not newline\n\
441 "), stdout);
442       fputs (HELP_OPTION_DESCRIPTION, stdout);
443       fputs (VERSION_OPTION_DESCRIPTION, stdout);
444       /* FIXME: add a couple of examples.  */
445       emit_ancillary_info (PROGRAM_NAME);
446     }
447   exit (status);
448 }
449 
450 int
main(int argc,char ** argv)451 main (int argc, char **argv)
452 {
453   int optc;
454   char const *delim_arg = "\t";
455 
456   initialize_main (&argc, &argv);
457   set_program_name (argv[0]);
458   setlocale (LC_ALL, "");
459   bindtextdomain (PACKAGE, LOCALEDIR);
460   textdomain (PACKAGE);
461 
462   atexit (close_stdout);
463 
464   have_read_stdin = false;
465   serial_merge = false;
466 
467   while ((optc = getopt_long (argc, argv, "d:sz", longopts, nullptr)) != -1)
468     {
469       switch (optc)
470         {
471         case 'd':
472           /* Delimiter character(s). */
473           delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
474           break;
475 
476         case 's':
477           serial_merge = true;
478           break;
479 
480         case 'z':
481           line_delim = '\0';
482           break;
483 
484         case_GETOPT_HELP_CHAR;
485 
486         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
487 
488         default:
489           usage (EXIT_FAILURE);
490         }
491     }
492 
493   int nfiles = argc - optind;
494   if (nfiles == 0)
495     {
496       argv[optind] = bad_cast ("-");
497       nfiles++;
498     }
499 
500   if (collapse_escapes (delim_arg))
501     {
502       /* Don't use the quote() quoting style, because that would double the
503          number of displayed backslashes, making the diagnostic look bogus.  */
504       error (EXIT_FAILURE, 0,
505              _("delimiter list ends with an unescaped backslash: %s"),
506              quotearg_n_style_colon (0, c_maybe_quoting_style, delim_arg));
507     }
508 
509   bool ok = ((serial_merge ? paste_serial : paste_parallel)
510              (nfiles, &argv[optind]));
511 
512   free (delims);
513 
514   if (have_read_stdin && fclose (stdin) == EOF)
515     error (EXIT_FAILURE, errno, "-");
516   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
517 }
518