1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* By tege@sics.se, with rms.
18
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
23
24 #include <stdckdint.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
30
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "assure.h"
34 #include "fadvise.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "sig2str.h"
41 #include "sys-limits.h"
42 #include "temp-stream.h"
43 #include "xbinary-io.h"
44 #include "xdectoint.h"
45 #include "xstrtol.h"
46
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "split"
49
50 #define AUTHORS \
51 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
52 proper_name ("Richard M. Stallman")
53
54 /* Shell command to filter through, instead of creating files. */
55 static char const *filter_command;
56
57 /* Process ID of the filter. */
58 static pid_t filter_pid;
59
60 /* Array of open pipes. */
61 static int *open_pipes;
62 static idx_t open_pipes_alloc;
63 static int n_open_pipes;
64
65 /* Whether SIGPIPE has the default action, when --filter is used. */
66 static bool default_SIGPIPE;
67
68 /* Base name of output files. */
69 static char const *outbase;
70
71 /* Name of output files. */
72 static char *outfile;
73
74 /* Pointer to the end of the prefix in OUTFILE.
75 Suffixes are inserted here. */
76 static char *outfile_mid;
77
78 /* Generate new suffix when suffixes are exhausted. */
79 static bool suffix_auto = true;
80
81 /* Length of OUTFILE's suffix. */
82 static idx_t suffix_length;
83
84 /* Alphabet of characters to use in suffix. */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
86
87 /* Numerical suffix start value. */
88 static char const *numeric_suffix_start;
89
90 /* Additional suffix to append to output file names. */
91 static char const *additional_suffix;
92
93 /* Name of input file. May be "-". */
94 static char *infile;
95
96 /* stat buf for input file. */
97 static struct stat in_stat_buf;
98
99 /* Descriptor on which output file is open. */
100 static int output_desc = -1;
101
102 /* If true, print a diagnostic on standard error just before each
103 output file is opened. */
104 static bool verbose;
105
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
108
109 /* If true, in round robin mode, immediately copy
110 input to output, which is much slower, so disabled by default. */
111 static bool unbuffered;
112
113 /* The character marking end of line. Defaults to \n below. */
114 static int eolchar = -1;
115
116 /* The split mode to use. */
117 enum Split_type
118 {
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120 type_chunk_bytes, type_chunk_lines, type_rr
121 };
122
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 enum
126 {
127 VERBOSE_OPTION = CHAR_MAX + 1,
128 FILTER_OPTION,
129 IO_BLKSIZE_OPTION,
130 ADDITIONAL_SUFFIX_OPTION
131 };
132
133 static struct option const longopts[] =
134 {
135 {"bytes", required_argument, nullptr, 'b'},
136 {"lines", required_argument, nullptr, 'l'},
137 {"line-bytes", required_argument, nullptr, 'C'},
138 {"number", required_argument, nullptr, 'n'},
139 {"elide-empty-files", no_argument, nullptr, 'e'},
140 {"unbuffered", no_argument, nullptr, 'u'},
141 {"suffix-length", required_argument, nullptr, 'a'},
142 {"additional-suffix", required_argument, nullptr,
143 ADDITIONAL_SUFFIX_OPTION},
144 {"numeric-suffixes", optional_argument, nullptr, 'd'},
145 {"hex-suffixes", optional_argument, nullptr, 'x'},
146 {"filter", required_argument, nullptr, FILTER_OPTION},
147 {"verbose", no_argument, nullptr, VERBOSE_OPTION},
148 {"separator", required_argument, nullptr, 't'},
149 {"-io-blksize", required_argument, nullptr,
150 IO_BLKSIZE_OPTION}, /* do not document */
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {nullptr, 0, nullptr, 0}
154 };
155
156 /* Return true if the errno value, ERR, is ignorable. */
157 static inline bool
ignorable(int err)158 ignorable (int err)
159 {
160 return filter_command && err == EPIPE;
161 }
162
163 static void
set_suffix_length(intmax_t n_units,enum Split_type split_type)164 set_suffix_length (intmax_t n_units, enum Split_type split_type)
165 {
166 #define DEFAULT_SUFFIX_LENGTH 2
167
168 int suffix_length_needed = 0;
169
170 /* The suffix auto length feature is incompatible with
171 a user specified start value as the generated suffixes
172 are not all consecutive. */
173 if (numeric_suffix_start)
174 suffix_auto = false;
175
176 /* Auto-calculate the suffix length if the number of files is given. */
177 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178 || split_type == type_rr)
179 {
180 intmax_t n_units_end = n_units - 1;
181 if (numeric_suffix_start)
182 {
183 intmax_t n_start;
184 strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10,
185 &n_start, "");
186 if (e == LONGINT_OK && n_start < n_units)
187 {
188 /* Restrict auto adjustment so we don't keep
189 incrementing a suffix size arbitrarily,
190 as that would break sort order for files
191 generated from multiple split runs. */
192 if (ckd_add (&n_units_end, n_units_end, n_start))
193 n_units_end = INTMAX_MAX;
194 }
195
196 }
197 idx_t alphabet_len = strlen (suffix_alphabet);
198 do
199 suffix_length_needed++;
200 while (n_units_end /= alphabet_len);
201
202 suffix_auto = false;
203 }
204
205 if (suffix_length) /* set by user */
206 {
207 if (suffix_length < suffix_length_needed)
208 error (EXIT_FAILURE, 0,
209 _("the suffix length needs to be at least %d"),
210 suffix_length_needed);
211 suffix_auto = false;
212 return;
213 }
214 else
215 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
216 }
217
218 void
usage(int status)219 usage (int status)
220 {
221 if (status != EXIT_SUCCESS)
222 emit_try_help ();
223 else
224 {
225 printf (_("\
226 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
227 "),
228 program_name);
229 fputs (_("\
230 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
231 default size is 1000 lines, and default PREFIX is 'x'.\n\
232 "), stdout);
233
234 emit_stdin_note ();
235 emit_mandatory_arg_note ();
236
237 fprintf (stdout, _("\
238 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
239 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
240 -b, --bytes=SIZE put SIZE bytes per output file\n\
241 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
242 -d use numeric suffixes starting at 0, not alphabetic\n\
243 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
244 \n\
245 -x use hex suffixes starting at 0, not alphabetic\n\
246 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
247 -e, --elide-empty-files do not generate empty output files with '-n'\n\
248 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
249 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
250 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
251 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
252 '\\0' (zero) specifies the NUL character\n\
253 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
254 "), DEFAULT_SUFFIX_LENGTH);
255 fputs (_("\
256 --verbose print a diagnostic just before each\n\
257 output file is opened\n\
258 "), stdout);
259 fputs (HELP_OPTION_DESCRIPTION, stdout);
260 fputs (VERSION_OPTION_DESCRIPTION, stdout);
261 emit_size_note ();
262 fputs (_("\n\
263 CHUNKS may be:\n\
264 N split into N files based on size of input\n\
265 K/N output Kth of N to stdout\n\
266 l/N split into N files without splitting lines/records\n\
267 l/K/N output Kth of N to stdout without splitting lines/records\n\
268 r/N like 'l' but use round robin distribution\n\
269 r/K/N likewise but only output Kth of N to stdout\n\
270 "), stdout);
271 emit_ancillary_info (PROGRAM_NAME);
272 }
273 exit (status);
274 }
275
276 /* Copy the data in FD to a temporary file, then make that file FD.
277 Use BUF, of size BUFSIZE, to copy. Return the number of
278 bytes copied, or -1 (setting errno) on error. */
279 static off_t
copy_to_tmpfile(int fd,char * buf,idx_t bufsize)280 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
281 {
282 FILE *tmp;
283 if (!temp_stream (&tmp, nullptr))
284 return -1;
285 off_t copied = 0;
286 off_t r;
287
288 while (0 < (r = read (fd, buf, bufsize)))
289 {
290 if (fwrite (buf, 1, r, tmp) != r)
291 return -1;
292 if (ckd_add (&copied, copied, r))
293 {
294 errno = EOVERFLOW;
295 return -1;
296 }
297 }
298
299 if (r < 0)
300 return r;
301 r = dup2 (fileno (tmp), fd);
302 if (r < 0)
303 return r;
304 if (fclose (tmp) < 0)
305 return -1;
306 return copied;
307 }
308
309 /* Return the number of bytes that can be read from FD with status ST.
310 Store up to the first BUFSIZE bytes of the file's data into BUF,
311 and advance the file position by the number of bytes read. On
312 input error, set errno and return -1. */
313
314 static off_t
input_file_size(int fd,struct stat const * st,char * buf,idx_t bufsize)315 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
316 {
317 off_t size = 0;
318 do
319 {
320 ssize_t n_read = read (fd, buf + size, bufsize - size);
321 if (n_read <= 0)
322 return n_read < 0 ? n_read : size;
323 size += n_read;
324 }
325 while (size < bufsize);
326
327 off_t cur, end;
328 if ((usable_st_size (st) && st->st_size < size)
329 || (cur = lseek (fd, 0, SEEK_CUR)) < 0
330 || cur < size /* E.g., /dev/zero on GNU/Linux. */
331 || (end = lseek (fd, 0, SEEK_END)) < 0)
332 {
333 char *tmpbuf = xmalloc (bufsize);
334 end = copy_to_tmpfile (fd, tmpbuf, bufsize);
335 free (tmpbuf);
336 if (end < 0)
337 return end;
338 cur = 0;
339 }
340
341 if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
342 || (cur < end && ckd_add (&size, size, end - cur)))
343 {
344 errno = EOVERFLOW;
345 return -1;
346 }
347
348 if (cur < end)
349 {
350 off_t r = lseek (fd, cur, SEEK_SET);
351 if (r < 0)
352 return r;
353 }
354
355 return size;
356 }
357
358 /* Compute the next sequential output file name and store it into the
359 string 'outfile'. */
360
361 static void
next_file_name(void)362 next_file_name (void)
363 {
364 /* Index in suffix_alphabet of each character in the suffix. */
365 static idx_t *sufindex;
366 static idx_t outbase_length;
367 static idx_t outfile_length;
368 static idx_t addsuf_length;
369
370 if (! outfile)
371 {
372 bool overflow, widen;
373
374 new_name:
375 widen = !! outfile_length;
376
377 if (! widen)
378 {
379 /* Allocate and initialize the first file name. */
380
381 outbase_length = strlen (outbase);
382 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
383 overflow = ckd_add (&outfile_length, outbase_length + addsuf_length,
384 suffix_length);
385 }
386 else
387 {
388 /* Reallocate and initialize a new wider file name.
389 We do this by subsuming the unchanging part of
390 the generated suffix into the prefix (base), and
391 reinitializing the now one longer suffix. */
392
393 overflow = ckd_add (&outfile_length, outfile_length, 2);
394 suffix_length++;
395 }
396
397 idx_t outfile_size;
398 overflow |= ckd_add (&outfile_size, outfile_length, 1);
399 if (overflow)
400 xalloc_die ();
401 outfile = xirealloc (outfile, outfile_size);
402
403 if (! widen)
404 memcpy (outfile, outbase, outbase_length);
405 else
406 {
407 /* Append the last alphabet character to the file name prefix. */
408 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
409 outbase_length++;
410 }
411
412 outfile_mid = outfile + outbase_length;
413 memset (outfile_mid, suffix_alphabet[0], suffix_length);
414 if (additional_suffix)
415 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
416 outfile[outfile_length] = 0;
417
418 free (sufindex);
419 sufindex = xicalloc (suffix_length, sizeof *sufindex);
420
421 if (numeric_suffix_start)
422 {
423 affirm (! widen);
424
425 /* Update the output file name. */
426 idx_t i = strlen (numeric_suffix_start);
427 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
428
429 /* Update the suffix index. */
430 idx_t *sufindex_end = sufindex + suffix_length;
431 while (i-- != 0)
432 *--sufindex_end = numeric_suffix_start[i] - '0';
433 }
434
435 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
436 /* POSIX requires that if the output file name is too long for
437 its directory, 'split' must fail without creating any files.
438 This must be checked for explicitly on operating systems that
439 silently truncate file names. */
440 {
441 char *dir = dir_name (outfile);
442 long name_max = pathconf (dir, _PC_NAME_MAX);
443 if (0 <= name_max && name_max < base_len (last_component (outfile)))
444 error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
445 free (dir);
446 }
447 #endif
448 }
449 else
450 {
451 /* Increment the suffix in place, if possible. */
452
453 idx_t i = suffix_length;
454 while (i-- != 0)
455 {
456 sufindex[i]++;
457 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
458 goto new_name;
459 outfile_mid[i] = suffix_alphabet[sufindex[i]];
460 if (outfile_mid[i])
461 return;
462 sufindex[i] = 0;
463 outfile_mid[i] = suffix_alphabet[sufindex[i]];
464 }
465 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
466 }
467 }
468
469 /* Create or truncate a file. */
470
471 static int
create(char const * name)472 create (char const *name)
473 {
474 if (!filter_command)
475 {
476 if (verbose)
477 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
478
479 int oflags = O_WRONLY | O_CREAT | O_BINARY;
480 int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
481 if (0 <= fd || errno != EEXIST)
482 return fd;
483 fd = open (name, oflags, MODE_RW_UGO);
484 if (fd < 0)
485 return fd;
486 struct stat out_stat_buf;
487 if (fstat (fd, &out_stat_buf) != 0)
488 error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
489 if (psame_inode (&in_stat_buf, &out_stat_buf))
490 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
491 quoteaf (name));
492 bool regularish
493 = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
494 if (! (regularish && out_stat_buf.st_size == 0)
495 && ftruncate (fd, 0) < 0 && regularish)
496 error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
497
498 return fd;
499 }
500 else
501 {
502 int fd_pair[2];
503 pid_t child_pid;
504 char const *shell_prog = getenv ("SHELL");
505 if (shell_prog == nullptr)
506 shell_prog = "/bin/sh";
507 if (setenv ("FILE", name, 1) != 0)
508 error (EXIT_FAILURE, errno,
509 _("failed to set FILE environment variable"));
510 if (verbose)
511 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
512 if (pipe (fd_pair) != 0)
513 error (EXIT_FAILURE, errno, _("failed to create pipe"));
514 child_pid = fork ();
515 if (child_pid == 0)
516 {
517 /* This is the child process. If an error occurs here, the
518 parent will eventually learn about it after doing a wait,
519 at which time it will emit its own error message. */
520 int j;
521 /* We have to close any pipes that were opened during an
522 earlier call, otherwise this process will be holding a
523 write-pipe that will prevent the earlier process from
524 reading an EOF on the corresponding read-pipe. */
525 for (j = 0; j < n_open_pipes; ++j)
526 if (close (open_pipes[j]) != 0)
527 error (EXIT_FAILURE, errno, _("closing prior pipe"));
528 if (close (fd_pair[1]))
529 error (EXIT_FAILURE, errno, _("closing output pipe"));
530 if (fd_pair[0] != STDIN_FILENO)
531 {
532 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
533 error (EXIT_FAILURE, errno, _("moving input pipe"));
534 if (close (fd_pair[0]) != 0)
535 error (EXIT_FAILURE, errno, _("closing input pipe"));
536 }
537 if (default_SIGPIPE)
538 signal (SIGPIPE, SIG_DFL);
539 execl (shell_prog, last_component (shell_prog), "-c",
540 filter_command, (char *) nullptr);
541 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
542 shell_prog, filter_command);
543 }
544 if (child_pid < 0)
545 error (EXIT_FAILURE, errno, _("fork system call failed"));
546 if (close (fd_pair[0]) != 0)
547 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
548 filter_pid = child_pid;
549 if (n_open_pipes == open_pipes_alloc)
550 open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
551 MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
552 open_pipes[n_open_pipes++] = fd_pair[1];
553 return fd_pair[1];
554 }
555 }
556
557 /* Close the output file, and do any associated cleanup.
558 If FP and FD are both specified, they refer to the same open file;
559 in this case FP is closed, but FD is still used in cleanup. */
560 static void
closeout(FILE * fp,int fd,pid_t pid,char const * name)561 closeout (FILE *fp, int fd, pid_t pid, char const *name)
562 {
563 if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno))
564 error (EXIT_FAILURE, errno, "%s", quotef (name));
565 if (fd >= 0)
566 {
567 if (fp == nullptr && close (fd) < 0)
568 error (EXIT_FAILURE, errno, "%s", quotef (name));
569 int j;
570 for (j = 0; j < n_open_pipes; ++j)
571 {
572 if (open_pipes[j] == fd)
573 {
574 open_pipes[j] = open_pipes[--n_open_pipes];
575 break;
576 }
577 }
578 }
579 if (pid > 0)
580 {
581 int wstatus;
582 if (waitpid (pid, &wstatus, 0) < 0)
583 error (EXIT_FAILURE, errno, _("waiting for child process"));
584 else if (WIFSIGNALED (wstatus))
585 {
586 int sig = WTERMSIG (wstatus);
587 if (sig != SIGPIPE)
588 {
589 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
590 if (sig2str (sig, signame) != 0)
591 sprintf (signame, "%d", sig);
592 error (sig + 128, 0,
593 _("with FILE=%s, signal %s from command: %s"),
594 quotef (name), signame, filter_command);
595 }
596 }
597 else if (WIFEXITED (wstatus))
598 {
599 int ex = WEXITSTATUS (wstatus);
600 if (ex != 0)
601 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
602 quotef (name), ex, filter_command);
603 }
604 else
605 {
606 /* shouldn't happen. */
607 error (EXIT_FAILURE, 0,
608 _("unknown status from command (0x%X)"), wstatus + 0u);
609 }
610 }
611 }
612
613 /* Write BYTES bytes at BP to an output file.
614 If NEW_FILE_FLAG is true, open the next output file.
615 Otherwise add to the same output file already in use.
616 Return true if successful. */
617
618 static bool
cwrite(bool new_file_flag,char const * bp,idx_t bytes)619 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
620 {
621 if (new_file_flag)
622 {
623 if (!bp && bytes == 0 && elide_empty_files)
624 return true;
625 closeout (nullptr, output_desc, filter_pid, outfile);
626 next_file_name ();
627 output_desc = create (outfile);
628 if (output_desc < 0)
629 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
630 }
631
632 if (full_write (output_desc, bp, bytes) == bytes)
633 return true;
634 else
635 {
636 if (! ignorable (errno))
637 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
638 return false;
639 }
640 }
641
642 /* Split into pieces of exactly N_BYTES bytes.
643 However, the first REM_BYTES pieces should be 1 byte longer.
644 Use buffer BUF, whose size is BUFSIZE.
645 If INITIAL_READ is nonnegative,
646 BUF contains the first INITIAL_READ input bytes. */
647
648 static void
bytes_split(intmax_t n_bytes,intmax_t rem_bytes,char * buf,idx_t bufsize,ssize_t initial_read,intmax_t max_files)649 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
650 char *buf, idx_t bufsize, ssize_t initial_read,
651 intmax_t max_files)
652 {
653 bool new_file_flag = true;
654 bool filter_ok = true;
655 intmax_t opened = 0;
656 intmax_t to_write = n_bytes + (0 < rem_bytes);
657 bool eof = ! to_write;
658
659 while (! eof)
660 {
661 ssize_t n_read;
662 if (0 <= initial_read)
663 {
664 n_read = initial_read;
665 initial_read = -1;
666 eof = n_read < bufsize;
667 }
668 else
669 {
670 if (! filter_ok
671 && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
672 {
673 to_write = n_bytes + (opened + 1 < rem_bytes);
674 new_file_flag = true;
675 }
676
677 n_read = read (STDIN_FILENO, buf, bufsize);
678 if (n_read < 0)
679 error (EXIT_FAILURE, errno, "%s", quotef (infile));
680 eof = n_read == 0;
681 }
682 char *bp_out = buf;
683 while (0 < to_write && to_write <= n_read)
684 {
685 if (filter_ok || new_file_flag)
686 filter_ok = cwrite (new_file_flag, bp_out, to_write);
687 opened += new_file_flag;
688 new_file_flag = !max_files || (opened < max_files);
689 if (! filter_ok && ! new_file_flag)
690 {
691 /* If filters no longer accepting input, stop reading. */
692 n_read = 0;
693 eof = true;
694 break;
695 }
696 bp_out += to_write;
697 n_read -= to_write;
698 to_write = n_bytes + (opened < rem_bytes);
699 }
700 if (0 < n_read)
701 {
702 if (filter_ok || new_file_flag)
703 filter_ok = cwrite (new_file_flag, bp_out, n_read);
704 opened += new_file_flag;
705 new_file_flag = false;
706 if (! filter_ok && opened == max_files)
707 {
708 /* If filters no longer accepting input, stop reading. */
709 break;
710 }
711 to_write -= n_read;
712 }
713 }
714
715 /* Ensure NUMBER files are created, which truncates
716 any existing files or notifies any consumers on fifos.
717 FIXME: Should we do this before EXIT_FAILURE? */
718 while (opened++ < max_files)
719 cwrite (true, nullptr, 0);
720 }
721
722 /* Split into pieces of exactly N_LINES lines.
723 Use buffer BUF, whose size is BUFSIZE. */
724
725 static void
lines_split(intmax_t n_lines,char * buf,idx_t bufsize)726 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
727 {
728 ssize_t n_read;
729 char *bp, *bp_out, *eob;
730 bool new_file_flag = true;
731 intmax_t n = 0;
732
733 do
734 {
735 n_read = read (STDIN_FILENO, buf, bufsize);
736 if (n_read < 0)
737 error (EXIT_FAILURE, errno, "%s", quotef (infile));
738 bp = bp_out = buf;
739 eob = bp + n_read;
740 *eob = eolchar;
741 while (true)
742 {
743 bp = rawmemchr (bp, eolchar);
744 if (bp == eob)
745 {
746 if (eob != bp_out) /* do not write 0 bytes! */
747 {
748 idx_t len = eob - bp_out;
749 cwrite (new_file_flag, bp_out, len);
750 new_file_flag = false;
751 }
752 break;
753 }
754
755 ++bp;
756 if (++n >= n_lines)
757 {
758 cwrite (new_file_flag, bp_out, bp - bp_out);
759 bp_out = bp;
760 new_file_flag = true;
761 n = 0;
762 }
763 }
764 }
765 while (n_read);
766 }
767
768 /* Split into pieces that are as large as possible while still not more
769 than N_BYTES bytes, and are split on line boundaries except
770 where lines longer than N_BYTES bytes occur. */
771
772 static void
line_bytes_split(intmax_t n_bytes,char * buf,idx_t bufsize)773 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
774 {
775 ssize_t n_read;
776 intmax_t n_out = 0; /* for each split. */
777 idx_t n_hold = 0;
778 char *hold = nullptr; /* for lines > bufsize. */
779 idx_t hold_size = 0;
780 bool split_line = false; /* Whether a \n was output in a split. */
781
782 do
783 {
784 n_read = read (STDIN_FILENO, buf, bufsize);
785 if (n_read < 0)
786 error (EXIT_FAILURE, errno, "%s", quotef (infile));
787 idx_t n_left = n_read;
788 char *sob = buf;
789 while (n_left)
790 {
791 idx_t split_rest = 0;
792 char *eoc = nullptr;
793 char *eol;
794
795 /* Determine End Of Chunk and/or End of Line,
796 which are used below to select what to write or buffer. */
797 if (n_bytes - n_out - n_hold <= n_left)
798 {
799 /* Have enough for split. */
800 split_rest = n_bytes - n_out - n_hold;
801 eoc = sob + split_rest - 1;
802 eol = memrchr (sob, eolchar, split_rest);
803 }
804 else
805 eol = memrchr (sob, eolchar, n_left);
806
807 /* Output hold space if possible. */
808 if (n_hold && !(!eol && n_out))
809 {
810 cwrite (n_out == 0, hold, n_hold);
811 n_out += n_hold;
812 if (n_hold > bufsize)
813 hold = xirealloc (hold, bufsize);
814 n_hold = 0;
815 hold_size = bufsize;
816 }
817
818 /* Output to eol if present. */
819 if (eol)
820 {
821 split_line = true;
822 idx_t n_write = eol - sob + 1;
823 cwrite (n_out == 0, sob, n_write);
824 n_out += n_write;
825 n_left -= n_write;
826 sob += n_write;
827 if (eoc)
828 split_rest -= n_write;
829 }
830
831 /* Output to eoc or eob if possible. */
832 if (n_left && !split_line)
833 {
834 idx_t n_write = eoc ? split_rest : n_left;
835 cwrite (n_out == 0, sob, n_write);
836 n_out += n_write;
837 n_left -= n_write;
838 sob += n_write;
839 if (eoc)
840 split_rest -= n_write;
841 }
842
843 /* Update hold if needed. */
844 if ((eoc && split_rest) || (!eoc && n_left))
845 {
846 idx_t n_buf = eoc ? split_rest : n_left;
847 if (hold_size - n_hold < n_buf)
848 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
849 -1, sizeof *hold);
850 memcpy (hold + n_hold, sob, n_buf);
851 n_hold += n_buf;
852 n_left -= n_buf;
853 sob += n_buf;
854 }
855
856 /* Reset for new split. */
857 if (eoc)
858 {
859 n_out = 0;
860 split_line = false;
861 }
862 }
863 }
864 while (n_read);
865
866 /* Handle no eol at end of file. */
867 if (n_hold)
868 cwrite (n_out == 0, hold, n_hold);
869
870 free (hold);
871 }
872
873 /* -n l/[K/]N: Write lines to files of approximately file size / N.
874 The file is partitioned into file size / N sized portions, with the
875 last assigned any excess. If a line _starts_ within a partition
876 it is written completely to the corresponding file. Since lines
877 are not split even if they overlap a partition, the files written
878 can be larger or smaller than the partition size, and even empty
879 if a line is so long as to completely overlap the partition. */
880
881 static void
lines_chunk_split(intmax_t k,intmax_t n,char * buf,idx_t bufsize,ssize_t initial_read,off_t file_size)882 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
883 ssize_t initial_read, off_t file_size)
884 {
885 affirm (n && k <= n);
886
887 intmax_t rem_bytes = file_size % n;
888 off_t chunk_size = file_size / n;
889 intmax_t chunk_no = 1;
890 off_t chunk_end = chunk_size + (0 < rem_bytes);
891 off_t n_written = 0;
892 bool new_file_flag = true;
893 bool chunk_truncated = false;
894
895 if (k > 1 && 0 < file_size)
896 {
897 /* Start reading 1 byte before kth chunk of file. */
898 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
899 if (start < initial_read)
900 {
901 memmove (buf, buf + start, initial_read - start);
902 initial_read -= start;
903 }
904 else
905 {
906 if (initial_read < start
907 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
908 error (EXIT_FAILURE, errno, "%s", quotef (infile));
909 initial_read = -1;
910 }
911 n_written = start;
912 chunk_no = k - 1;
913 chunk_end = start + 1;
914 }
915
916 while (n_written < file_size)
917 {
918 char *bp = buf, *eob;
919 ssize_t n_read;
920 if (0 <= initial_read)
921 {
922 n_read = initial_read;
923 initial_read = -1;
924 }
925 else
926 {
927 n_read = read (STDIN_FILENO, buf,
928 MIN (bufsize, file_size - n_written));
929 if (n_read < 0)
930 error (EXIT_FAILURE, errno, "%s", quotef (infile));
931 }
932 if (n_read == 0)
933 break; /* eof. */
934 chunk_truncated = false;
935 eob = buf + n_read;
936
937 while (bp != eob)
938 {
939 idx_t to_write;
940 bool next = false;
941
942 /* Begin looking for '\n' at last byte of chunk. */
943 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
944 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
945 if (bp_out)
946 {
947 bp_out++;
948 next = true;
949 }
950 else
951 bp_out = eob;
952 to_write = bp_out - bp;
953
954 if (k == chunk_no)
955 {
956 /* We don't use the stdout buffer here since we're writing
957 large chunks from an existing file, so it's more efficient
958 to write out directly. */
959 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
960 write_error ();
961 }
962 else if (! k)
963 cwrite (new_file_flag, bp, to_write);
964 n_written += to_write;
965 bp += to_write;
966 n_read -= to_write;
967 new_file_flag = next;
968
969 /* A line could have been so long that it skipped
970 entire chunks. So create empty files in that case. */
971 while (next || chunk_end <= n_written)
972 {
973 if (!next && bp == eob)
974 {
975 /* replenish buf, before going to next chunk. */
976 chunk_truncated = true;
977 break;
978 }
979 if (k == chunk_no)
980 return;
981 chunk_end += chunk_size + (chunk_no < rem_bytes);
982 chunk_no++;
983 if (chunk_end <= n_written)
984 {
985 if (! k)
986 cwrite (true, nullptr, 0);
987 }
988 else
989 next = false;
990 }
991 }
992 }
993
994 if (chunk_truncated)
995 chunk_no++;
996
997 /* Ensure NUMBER files are created, which truncates
998 any existing files or notifies any consumers on fifos.
999 FIXME: Should we do this before EXIT_FAILURE? */
1000 if (!k)
1001 while (chunk_no++ <= n)
1002 cwrite (true, nullptr, 0);
1003 }
1004
1005 /* -n K/N: Extract Kth of N chunks. */
1006
1007 static void
bytes_chunk_extract(intmax_t k,intmax_t n,char * buf,idx_t bufsize,ssize_t initial_read,off_t file_size)1008 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1009 ssize_t initial_read, off_t file_size)
1010 {
1011 off_t start;
1012 off_t end;
1013
1014 assert (0 < k && k <= n);
1015
1016 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1017 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1018
1019 if (start < initial_read)
1020 {
1021 memmove (buf, buf + start, initial_read - start);
1022 initial_read -= start;
1023 }
1024 else
1025 {
1026 if (initial_read < start
1027 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1028 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1029 initial_read = -1;
1030 }
1031
1032 while (start < end)
1033 {
1034 ssize_t n_read;
1035 if (0 <= initial_read)
1036 {
1037 n_read = initial_read;
1038 initial_read = -1;
1039 }
1040 else
1041 {
1042 n_read = read (STDIN_FILENO, buf, bufsize);
1043 if (n_read < 0)
1044 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1045 }
1046 if (n_read == 0)
1047 break; /* eof. */
1048 n_read = MIN (n_read, end - start);
1049 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1050 && ! ignorable (errno))
1051 error (EXIT_FAILURE, errno, "%s", quotef ("-"));
1052 start += n_read;
1053 }
1054 }
1055
1056 typedef struct of_info
1057 {
1058 char *of_name;
1059 int ofd;
1060 FILE *ofile;
1061 pid_t opid;
1062 } of_t;
1063
1064 enum
1065 {
1066 OFD_NEW = -1,
1067 OFD_APPEND = -2
1068 };
1069
1070 /* Rotate file descriptors when we're writing to more output files than we
1071 have available file descriptors.
1072 Return whether we came under file resource pressure.
1073 If so, it's probably best to close each file when finished with it. */
1074
1075 static bool
ofile_open(of_t * files,idx_t i_check,idx_t nfiles)1076 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1077 {
1078 bool file_limit = false;
1079
1080 if (files[i_check].ofd <= OFD_NEW)
1081 {
1082 int fd;
1083 idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1084
1085 /* Another process could have opened a file in between the calls to
1086 close and open, so we should keep trying until open succeeds or
1087 we've closed all of our files. */
1088 while (true)
1089 {
1090 if (files[i_check].ofd == OFD_NEW)
1091 fd = create (files[i_check].of_name);
1092 else /* OFD_APPEND */
1093 {
1094 /* Attempt to append to previously opened file.
1095 We use O_NONBLOCK to support writing to fifos,
1096 where the other end has closed because of our
1097 previous close. In that case we'll immediately
1098 get an error, rather than waiting indefinitely.
1099 In specialized cases the consumer can keep reading
1100 from the fifo, terminating on conditions in the data
1101 itself, or perhaps never in the case of 'tail -f'.
1102 I.e., for fifos it is valid to attempt this reopen.
1103
1104 We don't handle the filter_command case here, as create()
1105 will exit if there are not enough files in that case.
1106 I.e., we don't support restarting filters, as that would
1107 put too much burden on users specifying --filter commands. */
1108 fd = open (files[i_check].of_name,
1109 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1110 }
1111
1112 if (0 <= fd)
1113 break;
1114
1115 if (!(errno == EMFILE || errno == ENFILE))
1116 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1117
1118 file_limit = true;
1119
1120 /* Search backwards for an open file to close. */
1121 while (files[i_reopen].ofd < 0)
1122 {
1123 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1124 /* No more open files to close, exit with E[NM]FILE. */
1125 if (i_reopen == i_check)
1126 error (EXIT_FAILURE, errno, "%s",
1127 quotef (files[i_check].of_name));
1128 }
1129
1130 if (fclose (files[i_reopen].ofile) != 0)
1131 error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1132 files[i_reopen].ofile = nullptr;
1133 files[i_reopen].ofd = OFD_APPEND;
1134 }
1135
1136 files[i_check].ofd = fd;
1137 FILE *ofile = fdopen (fd, "a");
1138 if (!ofile)
1139 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1140 files[i_check].ofile = ofile;
1141 files[i_check].opid = filter_pid;
1142 filter_pid = 0;
1143 }
1144
1145 return file_limit;
1146 }
1147
1148 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1149 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1150 put its address into *FILESP to pacify -fsanitize=leak.
1151 When K == 0, we try to keep the files open in parallel.
1152 If we run out of file resources, then we revert
1153 to opening and closing each file for each line. */
1154
1155 static void
lines_rr(intmax_t k,intmax_t n,char * buf,idx_t bufsize,of_t ** filesp)1156 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1157 {
1158 bool wrapped = false;
1159 bool wrote = false;
1160 bool file_limit;
1161 idx_t i_file;
1162 of_t *files IF_LINT (= nullptr);
1163 intmax_t line_no;
1164
1165 if (k)
1166 line_no = 1;
1167 else
1168 {
1169 if (IDX_MAX < n)
1170 xalloc_die ();
1171 files = *filesp = xinmalloc (n, sizeof *files);
1172
1173 /* Generate output file names. */
1174 for (i_file = 0; i_file < n; i_file++)
1175 {
1176 next_file_name ();
1177 files[i_file].of_name = xstrdup (outfile);
1178 files[i_file].ofd = OFD_NEW;
1179 files[i_file].ofile = nullptr;
1180 files[i_file].opid = 0;
1181 }
1182 i_file = 0;
1183 file_limit = false;
1184 }
1185
1186 while (true)
1187 {
1188 char *bp = buf, *eob;
1189 ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1190 if (n_read < 0)
1191 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1192 else if (n_read == 0)
1193 break; /* eof. */
1194 eob = buf + n_read;
1195
1196 while (bp != eob)
1197 {
1198 idx_t to_write;
1199 bool next = false;
1200
1201 /* Find end of line. */
1202 char *bp_out = memchr (bp, eolchar, eob - bp);
1203 if (bp_out)
1204 {
1205 bp_out++;
1206 next = true;
1207 }
1208 else
1209 bp_out = eob;
1210 to_write = bp_out - bp;
1211
1212 if (k)
1213 {
1214 if (line_no == k && unbuffered)
1215 {
1216 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1217 write_error ();
1218 }
1219 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1220 {
1221 write_error ();
1222 }
1223 if (next)
1224 line_no = (line_no == n) ? 1 : line_no + 1;
1225 }
1226 else
1227 {
1228 /* Secure file descriptor. */
1229 file_limit |= ofile_open (files, i_file, n);
1230 if (unbuffered)
1231 {
1232 /* Note writing to fd, rather than flushing the FILE gives
1233 an 8% performance benefit, due to reduced data copying. */
1234 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1235 && ! ignorable (errno))
1236 error (EXIT_FAILURE, errno, "%s",
1237 quotef (files[i_file].of_name));
1238 }
1239 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1240 && ! ignorable (errno))
1241 error (EXIT_FAILURE, errno, "%s",
1242 quotef (files[i_file].of_name));
1243
1244 if (! ignorable (errno))
1245 wrote = true;
1246
1247 if (file_limit)
1248 {
1249 if (fclose (files[i_file].ofile) != 0)
1250 error (EXIT_FAILURE, errno, "%s",
1251 quotef (files[i_file].of_name));
1252 files[i_file].ofile = nullptr;
1253 files[i_file].ofd = OFD_APPEND;
1254 }
1255 if (next && ++i_file == n)
1256 {
1257 wrapped = true;
1258 /* If no filters are accepting input, stop reading. */
1259 if (! wrote)
1260 goto no_filters;
1261 wrote = false;
1262 i_file = 0;
1263 }
1264 }
1265
1266 bp = bp_out;
1267 }
1268 }
1269
1270 no_filters:
1271 /* Ensure all files created, so that any existing files are truncated,
1272 and to signal any waiting fifo consumers.
1273 Also, close any open file descriptors.
1274 FIXME: Should we do this before EXIT_FAILURE? */
1275 if (!k)
1276 {
1277 idx_t ceiling = wrapped ? n : i_file;
1278 for (i_file = 0; i_file < n; i_file++)
1279 {
1280 if (i_file >= ceiling && !elide_empty_files)
1281 file_limit |= ofile_open (files, i_file, n);
1282 if (files[i_file].ofd >= 0)
1283 closeout (files[i_file].ofile, files[i_file].ofd,
1284 files[i_file].opid, files[i_file].of_name);
1285 files[i_file].ofd = OFD_APPEND;
1286 }
1287 }
1288 }
1289
1290 #define FAIL_ONLY_ONE_WAY() \
1291 do \
1292 { \
1293 error (0, 0, _("cannot split in more than one way")); \
1294 usage (EXIT_FAILURE); \
1295 } \
1296 while (0)
1297
1298 /* Report a string-to-integer conversion failure MSGID with ARG. */
1299
1300 static _Noreturn void
strtoint_die(char const * msgid,char const * arg)1301 strtoint_die (char const *msgid, char const *arg)
1302 {
1303 error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1304 gettext (msgid), quote (arg));
1305 }
1306
1307 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1308 extreme value will do the right thing anyway on any practical platform. */
1309 #define OVERFLOW_OK LONGINT_OVERFLOW
1310
1311 /* Parse ARG for number of bytes or lines. The number can be followed
1312 by MULTIPLIERS, and the resulting value must be positive.
1313 If the number cannot be parsed, diagnose with MSG.
1314 Return the number parsed, or an INTMAX_MAX on overflow. */
1315
1316 static intmax_t
parse_n_units(char const * arg,char const * multipliers,char const * msgid)1317 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1318 {
1319 intmax_t n;
1320 if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1)
1321 strtoint_die (msgid, arg);
1322 return n;
1323 }
1324
1325 /* Parse K/N syntax of chunk options. */
1326
1327 static void
parse_chunk(intmax_t * k_units,intmax_t * n_units,char const * arg)1328 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1329 {
1330 char *argend;
1331 strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1332 if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1333 {
1334 *k_units = *n_units;
1335 *n_units = parse_n_units (argend + 1, "",
1336 N_("invalid number of chunks"));
1337 if (! (0 < *k_units && *k_units <= *n_units))
1338 error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1339 quote_mem (arg, argend - arg));
1340 }
1341 else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1342 strtoint_die (N_("invalid number of chunks"), arg);
1343 }
1344
1345
1346 int
main(int argc,char ** argv)1347 main (int argc, char **argv)
1348 {
1349 enum Split_type split_type = type_undef;
1350 idx_t in_blk_size = 0; /* optimal block size of input file device */
1351 idx_t page_size = getpagesize ();
1352 intmax_t k_units = 0;
1353 intmax_t n_units = 0;
1354
1355 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1356 int c;
1357 int digits_optind = 0;
1358 off_t file_size = OFF_T_MAX;
1359
1360 initialize_main (&argc, &argv);
1361 set_program_name (argv[0]);
1362 setlocale (LC_ALL, "");
1363 bindtextdomain (PACKAGE, LOCALEDIR);
1364 textdomain (PACKAGE);
1365
1366 atexit (close_stdout);
1367
1368 /* Parse command line options. */
1369
1370 infile = bad_cast ("-");
1371 outbase = bad_cast ("x");
1372
1373 while (true)
1374 {
1375 /* This is the argv-index of the option we will read next. */
1376 int this_optind = optind ? optind : 1;
1377
1378 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1379 longopts, nullptr);
1380 if (c == -1)
1381 break;
1382
1383 switch (c)
1384 {
1385 case 'a':
1386 suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1387 "", _("invalid suffix length"), 0);
1388 break;
1389
1390 case ADDITIONAL_SUFFIX_OPTION:
1391 {
1392 int suffix_len = strlen (optarg);
1393 if (last_component (optarg) != optarg
1394 || (suffix_len && ISSLASH (optarg[suffix_len - 1])))
1395 {
1396 error (0, 0,
1397 _("invalid suffix %s, contains directory separator"),
1398 quote (optarg));
1399 usage (EXIT_FAILURE);
1400 }
1401 }
1402 additional_suffix = optarg;
1403 break;
1404
1405 case 'b':
1406 if (split_type != type_undef)
1407 FAIL_ONLY_ONE_WAY ();
1408 split_type = type_bytes;
1409 n_units = parse_n_units (optarg, multipliers,
1410 N_("invalid number of bytes"));
1411 break;
1412
1413 case 'l':
1414 if (split_type != type_undef)
1415 FAIL_ONLY_ONE_WAY ();
1416 split_type = type_lines;
1417 n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1418 break;
1419
1420 case 'C':
1421 if (split_type != type_undef)
1422 FAIL_ONLY_ONE_WAY ();
1423 split_type = type_byteslines;
1424 n_units = parse_n_units (optarg, multipliers,
1425 N_("invalid number of lines"));
1426 break;
1427
1428 case 'n':
1429 if (split_type != type_undef)
1430 FAIL_ONLY_ONE_WAY ();
1431 /* skip any whitespace */
1432 while (isspace (to_uchar (*optarg)))
1433 optarg++;
1434 if (STRNCMP_LIT (optarg, "r/") == 0)
1435 {
1436 split_type = type_rr;
1437 optarg += 2;
1438 }
1439 else if (STRNCMP_LIT (optarg, "l/") == 0)
1440 {
1441 split_type = type_chunk_lines;
1442 optarg += 2;
1443 }
1444 else
1445 split_type = type_chunk_bytes;
1446 parse_chunk (&k_units, &n_units, optarg);
1447 break;
1448
1449 case 'u':
1450 unbuffered = true;
1451 break;
1452
1453 case 't':
1454 {
1455 char neweol = optarg[0];
1456 if (! neweol)
1457 error (EXIT_FAILURE, 0, _("empty record separator"));
1458 if (optarg[1])
1459 {
1460 if (STREQ (optarg, "\\0"))
1461 neweol = '\0';
1462 else
1463 {
1464 /* Provoke with 'split -txx'. Complain about
1465 "multi-character tab" instead of "multibyte tab", so
1466 that the diagnostic's wording does not need to be
1467 changed once multibyte characters are supported. */
1468 error (EXIT_FAILURE, 0, _("multi-character separator %s"),
1469 quote (optarg));
1470 }
1471 }
1472 /* Make it explicit we don't support multiple separators. */
1473 if (0 <= eolchar && neweol != eolchar)
1474 {
1475 error (EXIT_FAILURE, 0,
1476 _("multiple separator characters specified"));
1477 }
1478
1479 eolchar = neweol;
1480 }
1481 break;
1482
1483 case '0':
1484 case '1':
1485 case '2':
1486 case '3':
1487 case '4':
1488 case '5':
1489 case '6':
1490 case '7':
1491 case '8':
1492 case '9':
1493 if (split_type == type_undef)
1494 {
1495 split_type = type_digits;
1496 n_units = 0;
1497 }
1498 if (split_type != type_undef && split_type != type_digits)
1499 FAIL_ONLY_ONE_WAY ();
1500 if (digits_optind != 0 && digits_optind != this_optind)
1501 n_units = 0; /* More than one number given; ignore other. */
1502 digits_optind = this_optind;
1503 if (ckd_mul (&n_units, n_units, 10)
1504 || ckd_add (&n_units, n_units, c - '0'))
1505 n_units = INTMAX_MAX;
1506 break;
1507
1508 case 'd':
1509 case 'x':
1510 if (c == 'd')
1511 suffix_alphabet = "0123456789";
1512 else
1513 suffix_alphabet = "0123456789abcdef";
1514 if (optarg)
1515 {
1516 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1517 {
1518 error (0, 0,
1519 (c == 'd') ?
1520 _("%s: invalid start value for numerical suffix") :
1521 _("%s: invalid start value for hexadecimal suffix"),
1522 quote (optarg));
1523 usage (EXIT_FAILURE);
1524 }
1525 else
1526 {
1527 /* Skip any leading zero. */
1528 while (*optarg == '0' && *(optarg + 1) != '\0')
1529 optarg++;
1530 numeric_suffix_start = optarg;
1531 }
1532 }
1533 break;
1534
1535 case 'e':
1536 elide_empty_files = true;
1537 break;
1538
1539 case FILTER_OPTION:
1540 filter_command = optarg;
1541 break;
1542
1543 case IO_BLKSIZE_OPTION:
1544 in_blk_size = xdectoumax (optarg, 1,
1545 MIN (SYS_BUFSIZE_MAX,
1546 MIN (IDX_MAX, SIZE_MAX) - 1),
1547 multipliers, _("invalid IO block size"), 0);
1548 break;
1549
1550 case VERBOSE_OPTION:
1551 verbose = true;
1552 break;
1553
1554 case_GETOPT_HELP_CHAR;
1555
1556 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1557
1558 default:
1559 usage (EXIT_FAILURE);
1560 }
1561 }
1562
1563 if (k_units != 0 && filter_command)
1564 {
1565 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1566 usage (EXIT_FAILURE);
1567 }
1568
1569 /* Handle default case. */
1570 if (split_type == type_undef)
1571 {
1572 split_type = type_lines;
1573 n_units = 1000;
1574 }
1575
1576 if (n_units == 0)
1577 {
1578 error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1579 usage (EXIT_FAILURE);
1580 }
1581
1582 if (eolchar < 0)
1583 eolchar = '\n';
1584
1585 set_suffix_length (n_units, split_type);
1586
1587 /* Get out the filename arguments. */
1588
1589 if (optind < argc)
1590 infile = argv[optind++];
1591
1592 if (optind < argc)
1593 outbase = argv[optind++];
1594
1595 if (optind < argc)
1596 {
1597 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1598 usage (EXIT_FAILURE);
1599 }
1600
1601 /* Check that the suffix length is large enough for the numerical
1602 suffix start value. */
1603 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1604 {
1605 error (0, 0, _("numerical suffix start value is too large "
1606 "for the suffix length"));
1607 usage (EXIT_FAILURE);
1608 }
1609
1610 /* Open the input file. */
1611 if (! STREQ (infile, "-")
1612 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1613 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1614 quoteaf (infile));
1615
1616 /* Binary I/O is safer when byte counts are used. */
1617 xset_binary_mode (STDIN_FILENO, O_BINARY);
1618
1619 /* Advise the kernel of our access pattern. */
1620 fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
1621
1622 /* Get the optimal block size of input device and make a buffer. */
1623
1624 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1625 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1626
1627 if (in_blk_size == 0)
1628 {
1629 in_blk_size = io_blksize (&in_stat_buf);
1630 if (SYS_BUFSIZE_MAX < in_blk_size)
1631 in_blk_size = SYS_BUFSIZE_MAX;
1632 }
1633
1634 char *buf = xalignalloc (page_size, in_blk_size + 1);
1635 ssize_t initial_read = -1;
1636
1637 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1638 {
1639 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1640 buf, in_blk_size);
1641 if (file_size < 0)
1642 error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1643 quotef (infile));
1644 initial_read = MIN (file_size, in_blk_size);
1645 }
1646
1647 /* When filtering, closure of one pipe must not terminate the process,
1648 as there may still be other streams expecting input from us. */
1649 if (filter_command)
1650 default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1651
1652 switch (split_type)
1653 {
1654 case type_digits:
1655 case type_lines:
1656 lines_split (n_units, buf, in_blk_size);
1657 break;
1658
1659 case type_bytes:
1660 bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1661 break;
1662
1663 case type_byteslines:
1664 line_bytes_split (n_units, buf, in_blk_size);
1665 break;
1666
1667 case type_chunk_bytes:
1668 if (k_units == 0)
1669 bytes_split (file_size / n_units, file_size % n_units,
1670 buf, in_blk_size, initial_read, n_units);
1671 else
1672 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1673 file_size);
1674 break;
1675
1676 case type_chunk_lines:
1677 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1678 file_size);
1679 break;
1680
1681 case type_rr:
1682 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1683 but the functionality is provided for symmetry. */
1684 {
1685 of_t *files;
1686 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1687 }
1688 break;
1689
1690 default:
1691 affirm (false);
1692 }
1693
1694 if (close (STDIN_FILENO) != 0)
1695 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1696 closeout (nullptr, output_desc, filter_pid, outfile);
1697
1698 main_exit (EXIT_SUCCESS);
1699 }
1700