1 /* paste - merge lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 /* Written by David Ihnat. */
19
20 /* The list of valid escape sequences has been expanded over the Unix
21 version, to include \b, \f, \r, and \v.
22
23 POSIX changes, bug fixes, long-named options, and cleanup
24 by David MacKenzie <djm@gnu.ai.mit.edu>.
25
26 Options:
27 --serial
28 -s Paste one file at a time rather than
29 one line from each file.
30 --delimiters=delim-list
31 -d delim-list Consecutively use the characters in
32 DELIM-LIST instead of tab to separate
33 merged lines. When DELIM-LIST is exhausted,
34 start again at its beginning.
35 A FILE of '-' means standard input.
36 If no FILEs are given, standard input is used. */
37
38 #include <config.h>
39
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "fadvise.h"
45
46 /* The official name of this program (e.g., no 'g' prefix). */
47 #define PROGRAM_NAME "paste"
48
49 #define AUTHORS \
50 proper_name ("David M. Ihnat"), \
51 proper_name ("David MacKenzie")
52
53 /* Indicates that no delimiter should be added in the current position. */
54 #define EMPTY_DELIM '\0'
55
56 /* If nonzero, we have read standard input at some point. */
57 static bool have_read_stdin;
58
59 /* If nonzero, merge subsequent lines of each file rather than
60 corresponding lines from each file in parallel. */
61 static bool serial_merge;
62
63 /* The delimiters between lines of input files (used cyclically). */
64 static char *delims;
65
66 /* A pointer to the character after the end of 'delims'. */
67 static char const *delim_end;
68
69 static unsigned char line_delim = '\n';
70
71 static struct option const longopts[] =
72 {
73 {"serial", no_argument, nullptr, 's'},
74 {"delimiters", required_argument, nullptr, 'd'},
75 {"zero-terminated", no_argument, nullptr, 'z'},
76 {GETOPT_HELP_OPTION_DECL},
77 {GETOPT_VERSION_OPTION_DECL},
78 {nullptr, 0, nullptr, 0}
79 };
80
81 /* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting
82 backslash representations of special characters in STRPTR to their actual
83 values. The set of possible backslash characters has been expanded beyond
84 that recognized by the Unix version.
85 Return 0 upon success.
86 If the string ends in an odd number of backslashes, ignore the
87 final backslash and return nonzero. */
88
89 static int
collapse_escapes(char const * strptr)90 collapse_escapes (char const *strptr)
91 {
92 char *strout = xstrdup (strptr);
93 bool backslash_at_end = false;
94
95 delims = strout;
96
97 while (*strptr)
98 {
99 if (*strptr != '\\') /* Is it an escape character? */
100 *strout++ = *strptr++; /* No, just transfer it. */
101 else
102 {
103 switch (*++strptr)
104 {
105 case '0':
106 *strout++ = EMPTY_DELIM;
107 break;
108
109 case 'b':
110 *strout++ = '\b';
111 break;
112
113 case 'f':
114 *strout++ = '\f';
115 break;
116
117 case 'n':
118 *strout++ = '\n';
119 break;
120
121 case 'r':
122 *strout++ = '\r';
123 break;
124
125 case 't':
126 *strout++ = '\t';
127 break;
128
129 case 'v':
130 *strout++ = '\v';
131 break;
132
133 case '\\':
134 *strout++ = '\\';
135 break;
136
137 case '\0':
138 backslash_at_end = true;
139 goto done;
140
141 default:
142 *strout++ = *strptr;
143 break;
144 }
145 strptr++;
146 }
147 }
148
149 done:
150
151 delim_end = strout;
152 return backslash_at_end ? 1 : 0;
153 }
154
155 /* Output a single byte, reporting any write errors. */
156
157 static inline void
xputchar(char c)158 xputchar (char c)
159 {
160 if (putchar (c) < 0)
161 write_error ();
162 }
163
164 /* Perform column paste on the NFILES files named in FNAMPTR.
165 Return true if successful, false if one or more files could not be
166 opened or read. */
167
168 static bool
paste_parallel(size_t nfiles,char ** fnamptr)169 paste_parallel (size_t nfiles, char **fnamptr)
170 {
171 bool ok = true;
172 /* If all files are just ready to be closed, or will be on this
173 round, the string of delimiters must be preserved.
174 delbuf[0] through delbuf[nfiles]
175 store the delimiters for closed files. */
176 char *delbuf = xmalloc (nfiles + 2);
177
178 /* Streams open to the files to process; null if the corresponding
179 stream is closed. */
180 FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
181
182 /* Number of files still open to process. */
183 size_t files_open;
184
185 /* True if any fopen got fd == STDIN_FILENO. */
186 bool opened_stdin = false;
187
188 /* Attempt to open all files. This could be expanded to an infinite
189 number of files, but at the (considerable) expense of remembering
190 each file and its current offset, then opening/reading/closing. */
191
192 for (files_open = 0; files_open < nfiles; ++files_open)
193 {
194 if (STREQ (fnamptr[files_open], "-"))
195 {
196 have_read_stdin = true;
197 fileptr[files_open] = stdin;
198 }
199 else
200 {
201 fileptr[files_open] = fopen (fnamptr[files_open], "r");
202 if (fileptr[files_open] == nullptr)
203 error (EXIT_FAILURE, errno, "%s", quotef (fnamptr[files_open]));
204 else if (fileno (fileptr[files_open]) == STDIN_FILENO)
205 opened_stdin = true;
206 fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
207 }
208 }
209
210 if (opened_stdin && have_read_stdin)
211 error (EXIT_FAILURE, 0, _("standard input is closed"));
212
213 /* Read a line from each file and output it to stdout separated by a
214 delimiter, until we go through the loop without successfully
215 reading from any of the files. */
216
217 while (files_open)
218 {
219 /* Set up for the next line. */
220 bool somedone = false;
221 char const *delimptr = delims;
222 size_t delims_saved = 0; /* Number of delims saved in 'delbuf'. */
223
224 for (size_t i = 0; i < nfiles && files_open; i++)
225 {
226 int chr; /* Input character. */
227 int err; /* Input errno value. */
228 bool sometodo = false; /* Input chars to process. */
229
230 if (fileptr[i])
231 {
232 chr = getc (fileptr[i]);
233 err = errno;
234 if (chr != EOF && delims_saved)
235 {
236 if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
237 write_error ();
238 delims_saved = 0;
239 }
240
241 while (chr != EOF)
242 {
243 sometodo = true;
244 if (chr == line_delim)
245 break;
246 xputchar (chr);
247 chr = getc (fileptr[i]);
248 err = errno;
249 }
250 }
251
252 if (! sometodo)
253 {
254 /* EOF, read error, or closed file.
255 If an EOF or error, close the file. */
256 if (fileptr[i])
257 {
258 if (!ferror (fileptr[i]))
259 err = 0;
260 if (fileptr[i] == stdin)
261 clearerr (fileptr[i]); /* Also clear EOF. */
262 else if (fclose (fileptr[i]) == EOF && !err)
263 err = errno;
264 if (err)
265 {
266 error (0, err, "%s", quotef (fnamptr[i]));
267 ok = false;
268 }
269
270 fileptr[i] = nullptr;
271 files_open--;
272 }
273
274 if (i + 1 == nfiles)
275 {
276 /* End of this output line.
277 Is this the end of the whole thing? */
278 if (somedone)
279 {
280 /* No. Some files were not closed for this line. */
281 if (delims_saved)
282 {
283 if (fwrite (delbuf, 1, delims_saved, stdout)
284 != delims_saved)
285 write_error ();
286 delims_saved = 0;
287 }
288 xputchar (line_delim);
289 }
290 continue; /* Next read of files, or exit. */
291 }
292 else
293 {
294 /* Closed file; add delimiter to 'delbuf'. */
295 if (*delimptr != EMPTY_DELIM)
296 delbuf[delims_saved++] = *delimptr;
297 if (++delimptr == delim_end)
298 delimptr = delims;
299 }
300 }
301 else
302 {
303 /* Some data read. */
304 somedone = true;
305
306 /* Except for last file, replace last newline with delim. */
307 if (i + 1 != nfiles)
308 {
309 if (chr != line_delim && chr != EOF)
310 xputchar (chr);
311 if (*delimptr != EMPTY_DELIM)
312 xputchar (*delimptr);
313 if (++delimptr == delim_end)
314 delimptr = delims;
315 }
316 else
317 {
318 /* If the last line of the last file lacks a newline,
319 print one anyhow. POSIX requires this. */
320 char c = (chr == EOF ? line_delim : chr);
321 xputchar (c);
322 }
323 }
324 }
325 }
326 free (fileptr);
327 free (delbuf);
328 return ok;
329 }
330
331 /* Perform serial paste on the NFILES files named in FNAMPTR.
332 Return true if no errors, false if one or more files could not be
333 opened or read. */
334
335 static bool
paste_serial(size_t nfiles,char ** fnamptr)336 paste_serial (size_t nfiles, char **fnamptr)
337 {
338 bool ok = true; /* false if open or read errors occur. */
339 int charnew, charold; /* Current and previous char read. */
340 char const *delimptr; /* Current delimiter char. */
341 FILE *fileptr; /* Open for reading current file. */
342
343 for (; nfiles; nfiles--, fnamptr++)
344 {
345 int saved_errno;
346 bool is_stdin = STREQ (*fnamptr, "-");
347 if (is_stdin)
348 {
349 have_read_stdin = true;
350 fileptr = stdin;
351 }
352 else
353 {
354 fileptr = fopen (*fnamptr, "r");
355 if (fileptr == nullptr)
356 {
357 error (0, errno, "%s", quotef (*fnamptr));
358 ok = false;
359 continue;
360 }
361 fadvise (fileptr, FADVISE_SEQUENTIAL);
362 }
363
364 delimptr = delims; /* Set up for delimiter string. */
365
366 charold = getc (fileptr);
367 saved_errno = errno;
368 if (charold != EOF)
369 {
370 /* 'charold' is set up. Hit it!
371 Keep reading characters, stashing them in 'charnew';
372 output 'charold', converting to the appropriate delimiter
373 character if needed. After the EOF, output 'charold'
374 if it's a newline; otherwise, output it and then a newline. */
375
376 while ((charnew = getc (fileptr)) != EOF)
377 {
378 /* Process the old character. */
379 if (charold == line_delim)
380 {
381 if (*delimptr != EMPTY_DELIM)
382 xputchar (*delimptr);
383
384 if (++delimptr == delim_end)
385 delimptr = delims;
386 }
387 else
388 xputchar (charold);
389
390 charold = charnew;
391 }
392 saved_errno = errno;
393
394 /* Hit EOF. Process that last character. */
395 xputchar (charold);
396 }
397
398 if (charold != line_delim)
399 xputchar (line_delim);
400
401 if (!ferror (fileptr))
402 saved_errno = 0;
403 if (is_stdin)
404 clearerr (fileptr); /* Also clear EOF. */
405 else if (fclose (fileptr) != 0 && !saved_errno)
406 saved_errno = errno;
407 if (saved_errno)
408 {
409 error (0, saved_errno, "%s", quotef (*fnamptr));
410 ok = false;
411 }
412 }
413 return ok;
414 }
415
416 void
usage(int status)417 usage (int status)
418 {
419 if (status != EXIT_SUCCESS)
420 emit_try_help ();
421 else
422 {
423 printf (_("\
424 Usage: %s [OPTION]... [FILE]...\n\
425 "),
426 program_name);
427 fputs (_("\
428 Write lines consisting of the sequentially corresponding lines from\n\
429 each FILE, separated by TABs, to standard output.\n\
430 "), stdout);
431
432 emit_stdin_note ();
433 emit_mandatory_arg_note ();
434
435 fputs (_("\
436 -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\
437 -s, --serial paste one file at a time instead of in parallel\n\
438 "), stdout);
439 fputs (_("\
440 -z, --zero-terminated line delimiter is NUL, not newline\n\
441 "), stdout);
442 fputs (HELP_OPTION_DESCRIPTION, stdout);
443 fputs (VERSION_OPTION_DESCRIPTION, stdout);
444 /* FIXME: add a couple of examples. */
445 emit_ancillary_info (PROGRAM_NAME);
446 }
447 exit (status);
448 }
449
450 int
main(int argc,char ** argv)451 main (int argc, char **argv)
452 {
453 int optc;
454 char const *delim_arg = "\t";
455
456 initialize_main (&argc, &argv);
457 set_program_name (argv[0]);
458 setlocale (LC_ALL, "");
459 bindtextdomain (PACKAGE, LOCALEDIR);
460 textdomain (PACKAGE);
461
462 atexit (close_stdout);
463
464 have_read_stdin = false;
465 serial_merge = false;
466
467 while ((optc = getopt_long (argc, argv, "d:sz", longopts, nullptr)) != -1)
468 {
469 switch (optc)
470 {
471 case 'd':
472 /* Delimiter character(s). */
473 delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
474 break;
475
476 case 's':
477 serial_merge = true;
478 break;
479
480 case 'z':
481 line_delim = '\0';
482 break;
483
484 case_GETOPT_HELP_CHAR;
485
486 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
487
488 default:
489 usage (EXIT_FAILURE);
490 }
491 }
492
493 int nfiles = argc - optind;
494 if (nfiles == 0)
495 {
496 argv[optind] = bad_cast ("-");
497 nfiles++;
498 }
499
500 if (collapse_escapes (delim_arg))
501 {
502 /* Don't use the quote() quoting style, because that would double the
503 number of displayed backslashes, making the diagnostic look bogus. */
504 error (EXIT_FAILURE, 0,
505 _("delimiter list ends with an unescaped backslash: %s"),
506 quotearg_n_style_colon (0, c_maybe_quoting_style, delim_arg));
507 }
508
509 bool ok = ((serial_merge ? paste_serial : paste_parallel)
510 (nfiles, &argv[optind]));
511
512 free (delims);
513
514 if (have_read_stdin && fclose (stdin) == EOF)
515 error (EXIT_FAILURE, errno, "-");
516 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
517 }
518