1 /* split.c -- split a file into pieces.
2    Copyright (C) 1988-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* By tege@sics.se, with rms.
18 
19    TODO:
20    * support -p REGEX as in BSD's split.
21    * support --suppress-matched as in csplit.  */
22 #include <config.h>
23 
24 #include <stdckdint.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
30 
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "assure.h"
34 #include "fadvise.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "sig2str.h"
41 #include "sys-limits.h"
42 #include "temp-stream.h"
43 #include "xbinary-io.h"
44 #include "xdectoint.h"
45 #include "xstrtol.h"
46 
47 /* The official name of this program (e.g., no 'g' prefix).  */
48 #define PROGRAM_NAME "split"
49 
50 #define AUTHORS \
51   proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
52   proper_name ("Richard M. Stallman")
53 
54 /* Shell command to filter through, instead of creating files.  */
55 static char const *filter_command;
56 
57 /* Process ID of the filter.  */
58 static pid_t filter_pid;
59 
60 /* Array of open pipes.  */
61 static int *open_pipes;
62 static idx_t open_pipes_alloc;
63 static int n_open_pipes;
64 
65 /* Whether SIGPIPE has the default action, when --filter is used.  */
66 static bool default_SIGPIPE;
67 
68 /* Base name of output files.  */
69 static char const *outbase;
70 
71 /* Name of output files.  */
72 static char *outfile;
73 
74 /* Pointer to the end of the prefix in OUTFILE.
75    Suffixes are inserted here.  */
76 static char *outfile_mid;
77 
78 /* Generate new suffix when suffixes are exhausted.  */
79 static bool suffix_auto = true;
80 
81 /* Length of OUTFILE's suffix.  */
82 static idx_t suffix_length;
83 
84 /* Alphabet of characters to use in suffix.  */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
86 
87 /* Numerical suffix start value.  */
88 static char const *numeric_suffix_start;
89 
90 /* Additional suffix to append to output file names.  */
91 static char const *additional_suffix;
92 
93 /* Name of input file.  May be "-".  */
94 static char *infile;
95 
96 /* stat buf for input file.  */
97 static struct stat in_stat_buf;
98 
99 /* Descriptor on which output file is open.  */
100 static int output_desc = -1;
101 
102 /* If true, print a diagnostic on standard error just before each
103    output file is opened. */
104 static bool verbose;
105 
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
108 
109 /* If true, in round robin mode, immediately copy
110    input to output, which is much slower, so disabled by default.  */
111 static bool unbuffered;
112 
113 /* The character marking end of line.  Defaults to \n below.  */
114 static int eolchar = -1;
115 
116 /* The split mode to use.  */
117 enum Split_type
118 {
119   type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120   type_chunk_bytes, type_chunk_lines, type_rr
121 };
122 
123 /* For long options that have no equivalent short option, use a
124    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
125 enum
126 {
127   VERBOSE_OPTION = CHAR_MAX + 1,
128   FILTER_OPTION,
129   IO_BLKSIZE_OPTION,
130   ADDITIONAL_SUFFIX_OPTION
131 };
132 
133 static struct option const longopts[] =
134 {
135   {"bytes", required_argument, nullptr, 'b'},
136   {"lines", required_argument, nullptr, 'l'},
137   {"line-bytes", required_argument, nullptr, 'C'},
138   {"number", required_argument, nullptr, 'n'},
139   {"elide-empty-files", no_argument, nullptr, 'e'},
140   {"unbuffered", no_argument, nullptr, 'u'},
141   {"suffix-length", required_argument, nullptr, 'a'},
142   {"additional-suffix", required_argument, nullptr,
143    ADDITIONAL_SUFFIX_OPTION},
144   {"numeric-suffixes", optional_argument, nullptr, 'd'},
145   {"hex-suffixes", optional_argument, nullptr, 'x'},
146   {"filter", required_argument, nullptr, FILTER_OPTION},
147   {"verbose", no_argument, nullptr, VERBOSE_OPTION},
148   {"separator", required_argument, nullptr, 't'},
149   {"-io-blksize", required_argument, nullptr,
150    IO_BLKSIZE_OPTION}, /* do not document */
151   {GETOPT_HELP_OPTION_DECL},
152   {GETOPT_VERSION_OPTION_DECL},
153   {nullptr, 0, nullptr, 0}
154 };
155 
156 /* Return true if the errno value, ERR, is ignorable.  */
157 static inline bool
ignorable(int err)158 ignorable (int err)
159 {
160   return filter_command && err == EPIPE;
161 }
162 
163 static void
set_suffix_length(intmax_t n_units,enum Split_type split_type)164 set_suffix_length (intmax_t n_units, enum Split_type split_type)
165 {
166 #define DEFAULT_SUFFIX_LENGTH 2
167 
168   int suffix_length_needed = 0;
169 
170   /* The suffix auto length feature is incompatible with
171      a user specified start value as the generated suffixes
172      are not all consecutive.  */
173   if (numeric_suffix_start)
174     suffix_auto = false;
175 
176   /* Auto-calculate the suffix length if the number of files is given.  */
177   if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178       || split_type == type_rr)
179     {
180       intmax_t n_units_end = n_units - 1;
181       if (numeric_suffix_start)
182         {
183           intmax_t n_start;
184           strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10,
185                                        &n_start, "");
186           if (e == LONGINT_OK && n_start < n_units)
187             {
188               /* Restrict auto adjustment so we don't keep
189                  incrementing a suffix size arbitrarily,
190                  as that would break sort order for files
191                  generated from multiple split runs.  */
192               if (ckd_add (&n_units_end, n_units_end, n_start))
193                 n_units_end = INTMAX_MAX;
194             }
195 
196         }
197       idx_t alphabet_len = strlen (suffix_alphabet);
198       do
199         suffix_length_needed++;
200       while (n_units_end /= alphabet_len);
201 
202       suffix_auto = false;
203     }
204 
205   if (suffix_length)            /* set by user */
206     {
207       if (suffix_length < suffix_length_needed)
208         error (EXIT_FAILURE, 0,
209                _("the suffix length needs to be at least %d"),
210                suffix_length_needed);
211       suffix_auto = false;
212       return;
213     }
214   else
215     suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
216 }
217 
218 void
usage(int status)219 usage (int status)
220 {
221   if (status != EXIT_SUCCESS)
222     emit_try_help ();
223   else
224     {
225       printf (_("\
226 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
227 "),
228               program_name);
229       fputs (_("\
230 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
231 default size is 1000 lines, and default PREFIX is 'x'.\n\
232 "), stdout);
233 
234       emit_stdin_note ();
235       emit_mandatory_arg_note ();
236 
237       fprintf (stdout, _("\
238   -a, --suffix-length=N   generate suffixes of length N (default %d)\n\
239       --additional-suffix=SUFFIX  append an additional SUFFIX to file names\n\
240   -b, --bytes=SIZE        put SIZE bytes per output file\n\
241   -C, --line-bytes=SIZE   put at most SIZE bytes of records per output file\n\
242   -d                      use numeric suffixes starting at 0, not alphabetic\n\
243       --numeric-suffixes[=FROM]  same as -d, but allow setting the start value\
244 \n\
245   -x                      use hex suffixes starting at 0, not alphabetic\n\
246       --hex-suffixes[=FROM]  same as -x, but allow setting the start value\n\
247   -e, --elide-empty-files  do not generate empty output files with '-n'\n\
248       --filter=COMMAND    write to shell COMMAND; file name is $FILE\n\
249   -l, --lines=NUMBER      put NUMBER lines/records per output file\n\
250   -n, --number=CHUNKS     generate CHUNKS output files; see explanation below\n\
251   -t, --separator=SEP     use SEP instead of newline as the record separator;\n\
252                             '\\0' (zero) specifies the NUL character\n\
253   -u, --unbuffered        immediately copy input to output with '-n r/...'\n\
254 "), DEFAULT_SUFFIX_LENGTH);
255       fputs (_("\
256       --verbose           print a diagnostic just before each\n\
257                             output file is opened\n\
258 "), stdout);
259       fputs (HELP_OPTION_DESCRIPTION, stdout);
260       fputs (VERSION_OPTION_DESCRIPTION, stdout);
261       emit_size_note ();
262       fputs (_("\n\
263 CHUNKS may be:\n\
264   N       split into N files based on size of input\n\
265   K/N     output Kth of N to stdout\n\
266   l/N     split into N files without splitting lines/records\n\
267   l/K/N   output Kth of N to stdout without splitting lines/records\n\
268   r/N     like 'l' but use round robin distribution\n\
269   r/K/N   likewise but only output Kth of N to stdout\n\
270 "), stdout);
271       emit_ancillary_info (PROGRAM_NAME);
272     }
273   exit (status);
274 }
275 
276 /* Copy the data in FD to a temporary file, then make that file FD.
277    Use BUF, of size BUFSIZE, to copy.  Return the number of
278    bytes copied, or -1 (setting errno) on error.  */
279 static off_t
copy_to_tmpfile(int fd,char * buf,idx_t bufsize)280 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
281 {
282   FILE *tmp;
283   if (!temp_stream (&tmp, nullptr))
284     return -1;
285   off_t copied = 0;
286   off_t r;
287 
288   while (0 < (r = read (fd, buf, bufsize)))
289     {
290       if (fwrite (buf, 1, r, tmp) != r)
291         return -1;
292       if (ckd_add (&copied, copied, r))
293         {
294           errno = EOVERFLOW;
295           return -1;
296         }
297     }
298 
299   if (r < 0)
300     return r;
301   r = dup2 (fileno (tmp), fd);
302   if (r < 0)
303     return r;
304   if (fclose (tmp) < 0)
305     return -1;
306   return copied;
307 }
308 
309 /* Return the number of bytes that can be read from FD with status ST.
310    Store up to the first BUFSIZE bytes of the file's data into BUF,
311    and advance the file position by the number of bytes read.  On
312    input error, set errno and return -1.  */
313 
314 static off_t
input_file_size(int fd,struct stat const * st,char * buf,idx_t bufsize)315 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
316 {
317   off_t size = 0;
318   do
319     {
320       ssize_t n_read = read (fd, buf + size, bufsize - size);
321       if (n_read <= 0)
322         return n_read < 0 ? n_read : size;
323       size += n_read;
324     }
325   while (size < bufsize);
326 
327   off_t cur, end;
328   if ((usable_st_size (st) && st->st_size < size)
329       || (cur = lseek (fd, 0, SEEK_CUR)) < 0
330       || cur < size /* E.g., /dev/zero on GNU/Linux.  */
331       || (end = lseek (fd, 0, SEEK_END)) < 0)
332     {
333       char *tmpbuf = xmalloc (bufsize);
334       end = copy_to_tmpfile (fd, tmpbuf, bufsize);
335       free (tmpbuf);
336       if (end < 0)
337         return end;
338       cur = 0;
339     }
340 
341   if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd.  */
342       || (cur < end && ckd_add (&size, size, end - cur)))
343     {
344       errno = EOVERFLOW;
345       return -1;
346     }
347 
348   if (cur < end)
349     {
350       off_t r = lseek (fd, cur, SEEK_SET);
351       if (r < 0)
352         return r;
353     }
354 
355   return size;
356 }
357 
358 /* Compute the next sequential output file name and store it into the
359    string 'outfile'.  */
360 
361 static void
next_file_name(void)362 next_file_name (void)
363 {
364   /* Index in suffix_alphabet of each character in the suffix.  */
365   static idx_t *sufindex;
366   static idx_t outbase_length;
367   static idx_t outfile_length;
368   static idx_t addsuf_length;
369 
370   if (! outfile)
371     {
372       bool overflow, widen;
373 
374 new_name:
375       widen = !! outfile_length;
376 
377       if (! widen)
378         {
379           /* Allocate and initialize the first file name.  */
380 
381           outbase_length = strlen (outbase);
382           addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
383           overflow = ckd_add (&outfile_length, outbase_length + addsuf_length,
384                               suffix_length);
385         }
386       else
387         {
388           /* Reallocate and initialize a new wider file name.
389              We do this by subsuming the unchanging part of
390              the generated suffix into the prefix (base), and
391              reinitializing the now one longer suffix.  */
392 
393           overflow = ckd_add (&outfile_length, outfile_length, 2);
394           suffix_length++;
395         }
396 
397       idx_t outfile_size;
398       overflow |= ckd_add (&outfile_size, outfile_length, 1);
399       if (overflow)
400         xalloc_die ();
401       outfile = xirealloc (outfile, outfile_size);
402 
403       if (! widen)
404         memcpy (outfile, outbase, outbase_length);
405       else
406         {
407           /* Append the last alphabet character to the file name prefix.  */
408           outfile[outbase_length] = suffix_alphabet[sufindex[0]];
409           outbase_length++;
410         }
411 
412       outfile_mid = outfile + outbase_length;
413       memset (outfile_mid, suffix_alphabet[0], suffix_length);
414       if (additional_suffix)
415         memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
416       outfile[outfile_length] = 0;
417 
418       free (sufindex);
419       sufindex = xicalloc (suffix_length, sizeof *sufindex);
420 
421       if (numeric_suffix_start)
422         {
423           affirm (! widen);
424 
425           /* Update the output file name.  */
426           idx_t i = strlen (numeric_suffix_start);
427           memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
428 
429           /* Update the suffix index.  */
430           idx_t *sufindex_end = sufindex + suffix_length;
431           while (i-- != 0)
432             *--sufindex_end = numeric_suffix_start[i] - '0';
433         }
434 
435 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
436       /* POSIX requires that if the output file name is too long for
437          its directory, 'split' must fail without creating any files.
438          This must be checked for explicitly on operating systems that
439          silently truncate file names.  */
440       {
441         char *dir = dir_name (outfile);
442         long name_max = pathconf (dir, _PC_NAME_MAX);
443         if (0 <= name_max && name_max < base_len (last_component (outfile)))
444           error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
445         free (dir);
446       }
447 #endif
448     }
449   else
450     {
451       /* Increment the suffix in place, if possible.  */
452 
453       idx_t i = suffix_length;
454       while (i-- != 0)
455         {
456           sufindex[i]++;
457           if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
458             goto new_name;
459           outfile_mid[i] = suffix_alphabet[sufindex[i]];
460           if (outfile_mid[i])
461             return;
462           sufindex[i] = 0;
463           outfile_mid[i] = suffix_alphabet[sufindex[i]];
464         }
465       error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
466     }
467 }
468 
469 /* Create or truncate a file.  */
470 
471 static int
create(char const * name)472 create (char const *name)
473 {
474   if (!filter_command)
475     {
476       if (verbose)
477         fprintf (stdout, _("creating file %s\n"), quoteaf (name));
478 
479       int oflags = O_WRONLY | O_CREAT | O_BINARY;
480       int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
481       if (0 <= fd || errno != EEXIST)
482         return fd;
483       fd = open (name, oflags, MODE_RW_UGO);
484       if (fd < 0)
485         return fd;
486       struct stat out_stat_buf;
487       if (fstat (fd, &out_stat_buf) != 0)
488         error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
489       if (psame_inode (&in_stat_buf, &out_stat_buf))
490         error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
491                quoteaf (name));
492       bool regularish
493         = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
494       if (! (regularish && out_stat_buf.st_size == 0)
495           && ftruncate (fd, 0) < 0 && regularish)
496         error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
497 
498       return fd;
499     }
500   else
501     {
502       int fd_pair[2];
503       pid_t child_pid;
504       char const *shell_prog = getenv ("SHELL");
505       if (shell_prog == nullptr)
506         shell_prog = "/bin/sh";
507       if (setenv ("FILE", name, 1) != 0)
508         error (EXIT_FAILURE, errno,
509                _("failed to set FILE environment variable"));
510       if (verbose)
511         fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
512       if (pipe (fd_pair) != 0)
513         error (EXIT_FAILURE, errno, _("failed to create pipe"));
514       child_pid = fork ();
515       if (child_pid == 0)
516         {
517           /* This is the child process.  If an error occurs here, the
518              parent will eventually learn about it after doing a wait,
519              at which time it will emit its own error message.  */
520           int j;
521           /* We have to close any pipes that were opened during an
522              earlier call, otherwise this process will be holding a
523              write-pipe that will prevent the earlier process from
524              reading an EOF on the corresponding read-pipe.  */
525           for (j = 0; j < n_open_pipes; ++j)
526             if (close (open_pipes[j]) != 0)
527               error (EXIT_FAILURE, errno, _("closing prior pipe"));
528           if (close (fd_pair[1]))
529             error (EXIT_FAILURE, errno, _("closing output pipe"));
530           if (fd_pair[0] != STDIN_FILENO)
531             {
532               if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
533                 error (EXIT_FAILURE, errno, _("moving input pipe"));
534               if (close (fd_pair[0]) != 0)
535                 error (EXIT_FAILURE, errno, _("closing input pipe"));
536             }
537           if (default_SIGPIPE)
538             signal (SIGPIPE, SIG_DFL);
539           execl (shell_prog, last_component (shell_prog), "-c",
540                  filter_command, (char *) nullptr);
541           error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
542                  shell_prog, filter_command);
543         }
544       if (child_pid < 0)
545         error (EXIT_FAILURE, errno, _("fork system call failed"));
546       if (close (fd_pair[0]) != 0)
547         error (EXIT_FAILURE, errno, _("failed to close input pipe"));
548       filter_pid = child_pid;
549       if (n_open_pipes == open_pipes_alloc)
550         open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
551                               MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
552       open_pipes[n_open_pipes++] = fd_pair[1];
553       return fd_pair[1];
554     }
555 }
556 
557 /* Close the output file, and do any associated cleanup.
558    If FP and FD are both specified, they refer to the same open file;
559    in this case FP is closed, but FD is still used in cleanup.  */
560 static void
closeout(FILE * fp,int fd,pid_t pid,char const * name)561 closeout (FILE *fp, int fd, pid_t pid, char const *name)
562 {
563   if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno))
564     error (EXIT_FAILURE, errno, "%s", quotef (name));
565   if (fd >= 0)
566     {
567       if (fp == nullptr && close (fd) < 0)
568         error (EXIT_FAILURE, errno, "%s", quotef (name));
569       int j;
570       for (j = 0; j < n_open_pipes; ++j)
571         {
572           if (open_pipes[j] == fd)
573             {
574               open_pipes[j] = open_pipes[--n_open_pipes];
575               break;
576             }
577         }
578     }
579   if (pid > 0)
580     {
581       int wstatus;
582       if (waitpid (pid, &wstatus, 0) < 0)
583         error (EXIT_FAILURE, errno, _("waiting for child process"));
584       else if (WIFSIGNALED (wstatus))
585         {
586           int sig = WTERMSIG (wstatus);
587           if (sig != SIGPIPE)
588             {
589               char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
590               if (sig2str (sig, signame) != 0)
591                 sprintf (signame, "%d", sig);
592               error (sig + 128, 0,
593                      _("with FILE=%s, signal %s from command: %s"),
594                      quotef (name), signame, filter_command);
595             }
596         }
597       else if (WIFEXITED (wstatus))
598         {
599           int ex = WEXITSTATUS (wstatus);
600           if (ex != 0)
601             error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
602                    quotef (name), ex, filter_command);
603         }
604       else
605         {
606           /* shouldn't happen.  */
607           error (EXIT_FAILURE, 0,
608                  _("unknown status from command (0x%X)"), wstatus + 0u);
609         }
610     }
611 }
612 
613 /* Write BYTES bytes at BP to an output file.
614    If NEW_FILE_FLAG is true, open the next output file.
615    Otherwise add to the same output file already in use.
616    Return true if successful.  */
617 
618 static bool
cwrite(bool new_file_flag,char const * bp,idx_t bytes)619 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
620 {
621   if (new_file_flag)
622     {
623       if (!bp && bytes == 0 && elide_empty_files)
624         return true;
625       closeout (nullptr, output_desc, filter_pid, outfile);
626       next_file_name ();
627       output_desc = create (outfile);
628       if (output_desc < 0)
629         error (EXIT_FAILURE, errno, "%s", quotef (outfile));
630     }
631 
632   if (full_write (output_desc, bp, bytes) == bytes)
633     return true;
634   else
635     {
636       if (! ignorable (errno))
637         error (EXIT_FAILURE, errno, "%s", quotef (outfile));
638       return false;
639     }
640 }
641 
642 /* Split into pieces of exactly N_BYTES bytes.
643    However, the first REM_BYTES pieces should be 1 byte longer.
644    Use buffer BUF, whose size is BUFSIZE.
645    If INITIAL_READ is nonnegative,
646    BUF contains the first INITIAL_READ input bytes.  */
647 
648 static void
bytes_split(intmax_t n_bytes,intmax_t rem_bytes,char * buf,idx_t bufsize,ssize_t initial_read,intmax_t max_files)649 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
650              char *buf, idx_t bufsize, ssize_t initial_read,
651              intmax_t max_files)
652 {
653   bool new_file_flag = true;
654   bool filter_ok = true;
655   intmax_t opened = 0;
656   intmax_t to_write = n_bytes + (0 < rem_bytes);
657   bool eof = ! to_write;
658 
659   while (! eof)
660     {
661       ssize_t n_read;
662       if (0 <= initial_read)
663         {
664           n_read = initial_read;
665           initial_read = -1;
666           eof = n_read < bufsize;
667         }
668       else
669         {
670           if (! filter_ok
671               && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
672             {
673               to_write = n_bytes + (opened + 1 < rem_bytes);
674               new_file_flag = true;
675             }
676 
677           n_read = read (STDIN_FILENO, buf, bufsize);
678           if (n_read < 0)
679             error (EXIT_FAILURE, errno, "%s", quotef (infile));
680           eof = n_read == 0;
681         }
682       char *bp_out = buf;
683       while (0 < to_write && to_write <= n_read)
684         {
685           if (filter_ok || new_file_flag)
686             filter_ok = cwrite (new_file_flag, bp_out, to_write);
687           opened += new_file_flag;
688           new_file_flag = !max_files || (opened < max_files);
689           if (! filter_ok && ! new_file_flag)
690             {
691               /* If filters no longer accepting input, stop reading.  */
692               n_read = 0;
693               eof = true;
694               break;
695             }
696           bp_out += to_write;
697           n_read -= to_write;
698           to_write = n_bytes + (opened < rem_bytes);
699         }
700       if (0 < n_read)
701         {
702           if (filter_ok || new_file_flag)
703             filter_ok = cwrite (new_file_flag, bp_out, n_read);
704           opened += new_file_flag;
705           new_file_flag = false;
706           if (! filter_ok && opened == max_files)
707             {
708               /* If filters no longer accepting input, stop reading.  */
709               break;
710             }
711           to_write -= n_read;
712         }
713     }
714 
715   /* Ensure NUMBER files are created, which truncates
716      any existing files or notifies any consumers on fifos.
717      FIXME: Should we do this before EXIT_FAILURE?  */
718   while (opened++ < max_files)
719     cwrite (true, nullptr, 0);
720 }
721 
722 /* Split into pieces of exactly N_LINES lines.
723    Use buffer BUF, whose size is BUFSIZE.  */
724 
725 static void
lines_split(intmax_t n_lines,char * buf,idx_t bufsize)726 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
727 {
728   ssize_t n_read;
729   char *bp, *bp_out, *eob;
730   bool new_file_flag = true;
731   intmax_t n = 0;
732 
733   do
734     {
735       n_read = read (STDIN_FILENO, buf, bufsize);
736       if (n_read < 0)
737         error (EXIT_FAILURE, errno, "%s", quotef (infile));
738       bp = bp_out = buf;
739       eob = bp + n_read;
740       *eob = eolchar;
741       while (true)
742         {
743           bp = rawmemchr (bp, eolchar);
744           if (bp == eob)
745             {
746               if (eob != bp_out) /* do not write 0 bytes! */
747                 {
748                   idx_t len = eob - bp_out;
749                   cwrite (new_file_flag, bp_out, len);
750                   new_file_flag = false;
751                 }
752               break;
753             }
754 
755           ++bp;
756           if (++n >= n_lines)
757             {
758               cwrite (new_file_flag, bp_out, bp - bp_out);
759               bp_out = bp;
760               new_file_flag = true;
761               n = 0;
762             }
763         }
764     }
765   while (n_read);
766 }
767 
768 /* Split into pieces that are as large as possible while still not more
769    than N_BYTES bytes, and are split on line boundaries except
770    where lines longer than N_BYTES bytes occur. */
771 
772 static void
line_bytes_split(intmax_t n_bytes,char * buf,idx_t bufsize)773 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
774 {
775   ssize_t n_read;
776   intmax_t n_out = 0;      /* for each split.  */
777   idx_t n_hold = 0;
778   char *hold = nullptr;        /* for lines > bufsize.  */
779   idx_t hold_size = 0;
780   bool split_line = false;  /* Whether a \n was output in a split.  */
781 
782   do
783     {
784       n_read = read (STDIN_FILENO, buf, bufsize);
785       if (n_read < 0)
786         error (EXIT_FAILURE, errno, "%s", quotef (infile));
787       idx_t n_left = n_read;
788       char *sob = buf;
789       while (n_left)
790         {
791           idx_t split_rest = 0;
792           char *eoc = nullptr;
793           char *eol;
794 
795           /* Determine End Of Chunk and/or End of Line,
796              which are used below to select what to write or buffer.  */
797           if (n_bytes - n_out - n_hold <= n_left)
798             {
799               /* Have enough for split.  */
800               split_rest = n_bytes - n_out - n_hold;
801               eoc = sob + split_rest - 1;
802               eol = memrchr (sob, eolchar, split_rest);
803             }
804           else
805             eol = memrchr (sob, eolchar, n_left);
806 
807           /* Output hold space if possible.  */
808           if (n_hold && !(!eol && n_out))
809             {
810               cwrite (n_out == 0, hold, n_hold);
811               n_out += n_hold;
812               if (n_hold > bufsize)
813                 hold = xirealloc (hold, bufsize);
814               n_hold = 0;
815               hold_size = bufsize;
816             }
817 
818           /* Output to eol if present.  */
819           if (eol)
820             {
821               split_line = true;
822               idx_t n_write = eol - sob + 1;
823               cwrite (n_out == 0, sob, n_write);
824               n_out += n_write;
825               n_left -= n_write;
826               sob += n_write;
827               if (eoc)
828                 split_rest -= n_write;
829             }
830 
831           /* Output to eoc or eob if possible.  */
832           if (n_left && !split_line)
833             {
834               idx_t n_write = eoc ? split_rest : n_left;
835               cwrite (n_out == 0, sob, n_write);
836               n_out += n_write;
837               n_left -= n_write;
838               sob += n_write;
839               if (eoc)
840                 split_rest -= n_write;
841             }
842 
843           /* Update hold if needed.  */
844           if ((eoc && split_rest) || (!eoc && n_left))
845             {
846               idx_t n_buf = eoc ? split_rest : n_left;
847               if (hold_size - n_hold < n_buf)
848                 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
849                                 -1, sizeof *hold);
850               memcpy (hold + n_hold, sob, n_buf);
851               n_hold += n_buf;
852               n_left -= n_buf;
853               sob += n_buf;
854             }
855 
856           /* Reset for new split.  */
857           if (eoc)
858             {
859               n_out = 0;
860               split_line = false;
861             }
862         }
863     }
864   while (n_read);
865 
866   /* Handle no eol at end of file.  */
867   if (n_hold)
868     cwrite (n_out == 0, hold, n_hold);
869 
870   free (hold);
871 }
872 
873 /* -n l/[K/]N: Write lines to files of approximately file size / N.
874    The file is partitioned into file size / N sized portions, with the
875    last assigned any excess.  If a line _starts_ within a partition
876    it is written completely to the corresponding file.  Since lines
877    are not split even if they overlap a partition, the files written
878    can be larger or smaller than the partition size, and even empty
879    if a line is so long as to completely overlap the partition.  */
880 
881 static void
lines_chunk_split(intmax_t k,intmax_t n,char * buf,idx_t bufsize,ssize_t initial_read,off_t file_size)882 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
883                    ssize_t initial_read, off_t file_size)
884 {
885   affirm (n && k <= n);
886 
887   intmax_t rem_bytes = file_size % n;
888   off_t chunk_size = file_size / n;
889   intmax_t chunk_no = 1;
890   off_t chunk_end = chunk_size + (0 < rem_bytes);
891   off_t n_written = 0;
892   bool new_file_flag = true;
893   bool chunk_truncated = false;
894 
895   if (k > 1 && 0 < file_size)
896     {
897       /* Start reading 1 byte before kth chunk of file.  */
898       off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
899       if (start < initial_read)
900         {
901           memmove (buf, buf + start, initial_read - start);
902           initial_read -= start;
903         }
904       else
905         {
906           if (initial_read < start
907               && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
908             error (EXIT_FAILURE, errno, "%s", quotef (infile));
909           initial_read = -1;
910         }
911       n_written = start;
912       chunk_no = k - 1;
913       chunk_end = start + 1;
914     }
915 
916   while (n_written < file_size)
917     {
918       char *bp = buf, *eob;
919       ssize_t n_read;
920       if (0 <= initial_read)
921         {
922           n_read = initial_read;
923           initial_read = -1;
924         }
925       else
926         {
927           n_read = read (STDIN_FILENO, buf,
928                          MIN (bufsize, file_size - n_written));
929           if (n_read < 0)
930             error (EXIT_FAILURE, errno, "%s", quotef (infile));
931         }
932       if (n_read == 0)
933         break; /* eof.  */
934       chunk_truncated = false;
935       eob = buf + n_read;
936 
937       while (bp != eob)
938         {
939           idx_t to_write;
940           bool next = false;
941 
942           /* Begin looking for '\n' at last byte of chunk.  */
943           off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
944           char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
945           if (bp_out)
946             {
947               bp_out++;
948               next = true;
949             }
950           else
951             bp_out = eob;
952           to_write = bp_out - bp;
953 
954           if (k == chunk_no)
955             {
956               /* We don't use the stdout buffer here since we're writing
957                  large chunks from an existing file, so it's more efficient
958                  to write out directly.  */
959               if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
960                 write_error ();
961             }
962           else if (! k)
963             cwrite (new_file_flag, bp, to_write);
964           n_written += to_write;
965           bp += to_write;
966           n_read -= to_write;
967           new_file_flag = next;
968 
969           /* A line could have been so long that it skipped
970              entire chunks. So create empty files in that case.  */
971           while (next || chunk_end <= n_written)
972             {
973               if (!next && bp == eob)
974                 {
975                   /* replenish buf, before going to next chunk.  */
976                   chunk_truncated = true;
977                   break;
978                 }
979               if (k == chunk_no)
980                 return;
981               chunk_end += chunk_size + (chunk_no < rem_bytes);
982               chunk_no++;
983               if (chunk_end <= n_written)
984                 {
985                   if (! k)
986                     cwrite (true, nullptr, 0);
987                 }
988               else
989                 next = false;
990             }
991         }
992     }
993 
994   if (chunk_truncated)
995     chunk_no++;
996 
997   /* Ensure NUMBER files are created, which truncates
998      any existing files or notifies any consumers on fifos.
999      FIXME: Should we do this before EXIT_FAILURE?  */
1000   if (!k)
1001     while (chunk_no++ <= n)
1002       cwrite (true, nullptr, 0);
1003 }
1004 
1005 /* -n K/N: Extract Kth of N chunks.  */
1006 
1007 static void
bytes_chunk_extract(intmax_t k,intmax_t n,char * buf,idx_t bufsize,ssize_t initial_read,off_t file_size)1008 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1009                      ssize_t initial_read, off_t file_size)
1010 {
1011   off_t start;
1012   off_t end;
1013 
1014   assert (0 < k && k <= n);
1015 
1016   start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1017   end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1018 
1019   if (start < initial_read)
1020     {
1021       memmove (buf, buf + start, initial_read - start);
1022       initial_read -= start;
1023     }
1024   else
1025     {
1026       if (initial_read < start
1027           && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1028         error (EXIT_FAILURE, errno, "%s", quotef (infile));
1029       initial_read = -1;
1030     }
1031 
1032   while (start < end)
1033     {
1034       ssize_t n_read;
1035       if (0 <= initial_read)
1036         {
1037           n_read = initial_read;
1038           initial_read = -1;
1039         }
1040       else
1041         {
1042           n_read = read (STDIN_FILENO, buf, bufsize);
1043           if (n_read < 0)
1044             error (EXIT_FAILURE, errno, "%s", quotef (infile));
1045         }
1046       if (n_read == 0)
1047         break; /* eof.  */
1048       n_read = MIN (n_read, end - start);
1049       if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1050           && ! ignorable (errno))
1051         error (EXIT_FAILURE, errno, "%s", quotef ("-"));
1052       start += n_read;
1053     }
1054 }
1055 
1056 typedef struct of_info
1057 {
1058   char *of_name;
1059   int ofd;
1060   FILE *ofile;
1061   pid_t opid;
1062 } of_t;
1063 
1064 enum
1065 {
1066   OFD_NEW = -1,
1067   OFD_APPEND = -2
1068 };
1069 
1070 /* Rotate file descriptors when we're writing to more output files than we
1071    have available file descriptors.
1072    Return whether we came under file resource pressure.
1073    If so, it's probably best to close each file when finished with it.  */
1074 
1075 static bool
ofile_open(of_t * files,idx_t i_check,idx_t nfiles)1076 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1077 {
1078   bool file_limit = false;
1079 
1080   if (files[i_check].ofd <= OFD_NEW)
1081     {
1082       int fd;
1083       idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1084 
1085       /* Another process could have opened a file in between the calls to
1086          close and open, so we should keep trying until open succeeds or
1087          we've closed all of our files.  */
1088       while (true)
1089         {
1090           if (files[i_check].ofd == OFD_NEW)
1091             fd = create (files[i_check].of_name);
1092           else /* OFD_APPEND  */
1093             {
1094               /* Attempt to append to previously opened file.
1095                  We use O_NONBLOCK to support writing to fifos,
1096                  where the other end has closed because of our
1097                  previous close.  In that case we'll immediately
1098                  get an error, rather than waiting indefinitely.
1099                  In specialized cases the consumer can keep reading
1100                  from the fifo, terminating on conditions in the data
1101                  itself, or perhaps never in the case of 'tail -f'.
1102                  I.e., for fifos it is valid to attempt this reopen.
1103 
1104                  We don't handle the filter_command case here, as create()
1105                  will exit if there are not enough files in that case.
1106                  I.e., we don't support restarting filters, as that would
1107                  put too much burden on users specifying --filter commands.  */
1108               fd = open (files[i_check].of_name,
1109                          O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1110             }
1111 
1112           if (0 <= fd)
1113             break;
1114 
1115           if (!(errno == EMFILE || errno == ENFILE))
1116             error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1117 
1118           file_limit = true;
1119 
1120           /* Search backwards for an open file to close.  */
1121           while (files[i_reopen].ofd < 0)
1122             {
1123               i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1124               /* No more open files to close, exit with E[NM]FILE.  */
1125               if (i_reopen == i_check)
1126                 error (EXIT_FAILURE, errno, "%s",
1127                        quotef (files[i_check].of_name));
1128             }
1129 
1130           if (fclose (files[i_reopen].ofile) != 0)
1131             error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1132           files[i_reopen].ofile = nullptr;
1133           files[i_reopen].ofd = OFD_APPEND;
1134         }
1135 
1136       files[i_check].ofd = fd;
1137       FILE *ofile = fdopen (fd, "a");
1138       if (!ofile)
1139         error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1140       files[i_check].ofile = ofile;
1141       files[i_check].opid = filter_pid;
1142       filter_pid = 0;
1143     }
1144 
1145   return file_limit;
1146 }
1147 
1148 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1149    Use BUF of size BUFSIZE for the buffer, and if allocating storage
1150    put its address into *FILESP to pacify -fsanitize=leak.
1151    When K == 0, we try to keep the files open in parallel.
1152    If we run out of file resources, then we revert
1153    to opening and closing each file for each line.  */
1154 
1155 static void
lines_rr(intmax_t k,intmax_t n,char * buf,idx_t bufsize,of_t ** filesp)1156 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1157 {
1158   bool wrapped = false;
1159   bool wrote = false;
1160   bool file_limit;
1161   idx_t i_file;
1162   of_t *files IF_LINT (= nullptr);
1163   intmax_t line_no;
1164 
1165   if (k)
1166     line_no = 1;
1167   else
1168     {
1169       if (IDX_MAX < n)
1170         xalloc_die ();
1171       files = *filesp = xinmalloc (n, sizeof *files);
1172 
1173       /* Generate output file names. */
1174       for (i_file = 0; i_file < n; i_file++)
1175         {
1176           next_file_name ();
1177           files[i_file].of_name = xstrdup (outfile);
1178           files[i_file].ofd = OFD_NEW;
1179           files[i_file].ofile = nullptr;
1180           files[i_file].opid = 0;
1181         }
1182       i_file = 0;
1183       file_limit = false;
1184     }
1185 
1186   while (true)
1187     {
1188       char *bp = buf, *eob;
1189       ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1190       if (n_read < 0)
1191         error (EXIT_FAILURE, errno, "%s", quotef (infile));
1192       else if (n_read == 0)
1193         break; /* eof.  */
1194       eob = buf + n_read;
1195 
1196       while (bp != eob)
1197         {
1198           idx_t to_write;
1199           bool next = false;
1200 
1201           /* Find end of line. */
1202           char *bp_out = memchr (bp, eolchar, eob - bp);
1203           if (bp_out)
1204             {
1205               bp_out++;
1206               next = true;
1207             }
1208           else
1209             bp_out = eob;
1210           to_write = bp_out - bp;
1211 
1212           if (k)
1213             {
1214               if (line_no == k && unbuffered)
1215                 {
1216                   if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1217                     write_error ();
1218                 }
1219               else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1220                 {
1221                   write_error ();
1222                 }
1223               if (next)
1224                 line_no = (line_no == n) ? 1 : line_no + 1;
1225             }
1226           else
1227             {
1228               /* Secure file descriptor. */
1229               file_limit |= ofile_open (files, i_file, n);
1230               if (unbuffered)
1231                 {
1232                   /* Note writing to fd, rather than flushing the FILE gives
1233                      an 8% performance benefit, due to reduced data copying.  */
1234                   if (full_write (files[i_file].ofd, bp, to_write) != to_write
1235                       && ! ignorable (errno))
1236                     error (EXIT_FAILURE, errno, "%s",
1237                            quotef (files[i_file].of_name));
1238                 }
1239               else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1240                        && ! ignorable (errno))
1241                 error (EXIT_FAILURE, errno, "%s",
1242                        quotef (files[i_file].of_name));
1243 
1244               if (! ignorable (errno))
1245                 wrote = true;
1246 
1247               if (file_limit)
1248                 {
1249                   if (fclose (files[i_file].ofile) != 0)
1250                     error (EXIT_FAILURE, errno, "%s",
1251                            quotef (files[i_file].of_name));
1252                   files[i_file].ofile = nullptr;
1253                   files[i_file].ofd = OFD_APPEND;
1254                 }
1255               if (next && ++i_file == n)
1256                 {
1257                   wrapped = true;
1258                   /* If no filters are accepting input, stop reading.  */
1259                   if (! wrote)
1260                     goto no_filters;
1261                   wrote = false;
1262                   i_file = 0;
1263                 }
1264             }
1265 
1266           bp = bp_out;
1267         }
1268     }
1269 
1270 no_filters:
1271   /* Ensure all files created, so that any existing files are truncated,
1272      and to signal any waiting fifo consumers.
1273      Also, close any open file descriptors.
1274      FIXME: Should we do this before EXIT_FAILURE?  */
1275   if (!k)
1276     {
1277       idx_t ceiling = wrapped ? n : i_file;
1278       for (i_file = 0; i_file < n; i_file++)
1279         {
1280           if (i_file >= ceiling && !elide_empty_files)
1281             file_limit |= ofile_open (files, i_file, n);
1282           if (files[i_file].ofd >= 0)
1283             closeout (files[i_file].ofile, files[i_file].ofd,
1284                       files[i_file].opid, files[i_file].of_name);
1285           files[i_file].ofd = OFD_APPEND;
1286         }
1287     }
1288 }
1289 
1290 #define FAIL_ONLY_ONE_WAY()					\
1291   do								\
1292     {								\
1293       error (0, 0, _("cannot split in more than one way"));	\
1294       usage (EXIT_FAILURE);					\
1295     }								\
1296   while (0)
1297 
1298 /* Report a string-to-integer conversion failure MSGID with ARG.  */
1299 
1300 static _Noreturn void
strtoint_die(char const * msgid,char const * arg)1301 strtoint_die (char const *msgid, char const *arg)
1302 {
1303   error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1304          gettext (msgid), quote (arg));
1305 }
1306 
1307 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1308    extreme value will do the right thing anyway on any practical platform.  */
1309 #define OVERFLOW_OK LONGINT_OVERFLOW
1310 
1311 /* Parse ARG for number of bytes or lines.  The number can be followed
1312    by MULTIPLIERS, and the resulting value must be positive.
1313    If the number cannot be parsed, diagnose with MSG.
1314    Return the number parsed, or an INTMAX_MAX on overflow.  */
1315 
1316 static intmax_t
parse_n_units(char const * arg,char const * multipliers,char const * msgid)1317 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1318 {
1319   intmax_t n;
1320   if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1)
1321     strtoint_die (msgid, arg);
1322   return n;
1323 }
1324 
1325 /* Parse K/N syntax of chunk options.  */
1326 
1327 static void
parse_chunk(intmax_t * k_units,intmax_t * n_units,char const * arg)1328 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1329 {
1330   char *argend;
1331   strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1332   if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1333     {
1334       *k_units = *n_units;
1335       *n_units = parse_n_units (argend + 1, "",
1336                                 N_("invalid number of chunks"));
1337       if (! (0 < *k_units && *k_units <= *n_units))
1338         error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1339                quote_mem (arg, argend - arg));
1340     }
1341   else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1342     strtoint_die (N_("invalid number of chunks"), arg);
1343 }
1344 
1345 
1346 int
main(int argc,char ** argv)1347 main (int argc, char **argv)
1348 {
1349   enum Split_type split_type = type_undef;
1350   idx_t in_blk_size = 0;	/* optimal block size of input file device */
1351   idx_t page_size = getpagesize ();
1352   intmax_t k_units = 0;
1353   intmax_t n_units = 0;
1354 
1355   static char const multipliers[] = "bEGKkMmPQRTYZ0";
1356   int c;
1357   int digits_optind = 0;
1358   off_t file_size = OFF_T_MAX;
1359 
1360   initialize_main (&argc, &argv);
1361   set_program_name (argv[0]);
1362   setlocale (LC_ALL, "");
1363   bindtextdomain (PACKAGE, LOCALEDIR);
1364   textdomain (PACKAGE);
1365 
1366   atexit (close_stdout);
1367 
1368   /* Parse command line options.  */
1369 
1370   infile = bad_cast ("-");
1371   outbase = bad_cast ("x");
1372 
1373   while (true)
1374     {
1375       /* This is the argv-index of the option we will read next.  */
1376       int this_optind = optind ? optind : 1;
1377 
1378       c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1379                        longopts, nullptr);
1380       if (c == -1)
1381         break;
1382 
1383       switch (c)
1384         {
1385         case 'a':
1386           suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1387                                       "", _("invalid suffix length"), 0);
1388           break;
1389 
1390         case ADDITIONAL_SUFFIX_OPTION:
1391           {
1392             int suffix_len = strlen (optarg);
1393             if (last_component (optarg) != optarg
1394                 || (suffix_len && ISSLASH (optarg[suffix_len - 1])))
1395               {
1396                 error (0, 0,
1397                        _("invalid suffix %s, contains directory separator"),
1398                        quote (optarg));
1399                 usage (EXIT_FAILURE);
1400               }
1401           }
1402           additional_suffix = optarg;
1403           break;
1404 
1405         case 'b':
1406           if (split_type != type_undef)
1407             FAIL_ONLY_ONE_WAY ();
1408           split_type = type_bytes;
1409           n_units = parse_n_units (optarg, multipliers,
1410                                    N_("invalid number of bytes"));
1411           break;
1412 
1413         case 'l':
1414           if (split_type != type_undef)
1415             FAIL_ONLY_ONE_WAY ();
1416           split_type = type_lines;
1417           n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1418           break;
1419 
1420         case 'C':
1421           if (split_type != type_undef)
1422             FAIL_ONLY_ONE_WAY ();
1423           split_type = type_byteslines;
1424           n_units = parse_n_units (optarg, multipliers,
1425                                    N_("invalid number of lines"));
1426           break;
1427 
1428         case 'n':
1429           if (split_type != type_undef)
1430             FAIL_ONLY_ONE_WAY ();
1431           /* skip any whitespace */
1432           while (isspace (to_uchar (*optarg)))
1433             optarg++;
1434           if (STRNCMP_LIT (optarg, "r/") == 0)
1435             {
1436               split_type = type_rr;
1437               optarg += 2;
1438             }
1439           else if (STRNCMP_LIT (optarg, "l/") == 0)
1440             {
1441               split_type = type_chunk_lines;
1442               optarg += 2;
1443             }
1444           else
1445             split_type = type_chunk_bytes;
1446           parse_chunk (&k_units, &n_units, optarg);
1447           break;
1448 
1449         case 'u':
1450           unbuffered = true;
1451           break;
1452 
1453         case 't':
1454           {
1455             char neweol = optarg[0];
1456             if (! neweol)
1457               error (EXIT_FAILURE, 0, _("empty record separator"));
1458             if (optarg[1])
1459               {
1460                 if (STREQ (optarg, "\\0"))
1461                   neweol = '\0';
1462                 else
1463                   {
1464                     /* Provoke with 'split -txx'.  Complain about
1465                        "multi-character tab" instead of "multibyte tab", so
1466                        that the diagnostic's wording does not need to be
1467                        changed once multibyte characters are supported.  */
1468                     error (EXIT_FAILURE, 0, _("multi-character separator %s"),
1469                            quote (optarg));
1470                   }
1471               }
1472             /* Make it explicit we don't support multiple separators.  */
1473             if (0 <= eolchar && neweol != eolchar)
1474               {
1475                 error (EXIT_FAILURE, 0,
1476                        _("multiple separator characters specified"));
1477               }
1478 
1479             eolchar = neweol;
1480           }
1481           break;
1482 
1483         case '0':
1484         case '1':
1485         case '2':
1486         case '3':
1487         case '4':
1488         case '5':
1489         case '6':
1490         case '7':
1491         case '8':
1492         case '9':
1493           if (split_type == type_undef)
1494             {
1495               split_type = type_digits;
1496               n_units = 0;
1497             }
1498           if (split_type != type_undef && split_type != type_digits)
1499             FAIL_ONLY_ONE_WAY ();
1500           if (digits_optind != 0 && digits_optind != this_optind)
1501             n_units = 0;	/* More than one number given; ignore other. */
1502           digits_optind = this_optind;
1503           if (ckd_mul (&n_units, n_units, 10)
1504               || ckd_add (&n_units, n_units, c - '0'))
1505             n_units = INTMAX_MAX;
1506           break;
1507 
1508         case 'd':
1509         case 'x':
1510           if (c == 'd')
1511             suffix_alphabet = "0123456789";
1512           else
1513             suffix_alphabet = "0123456789abcdef";
1514           if (optarg)
1515             {
1516               if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1517                 {
1518                   error (0, 0,
1519                          (c == 'd') ?
1520                            _("%s: invalid start value for numerical suffix") :
1521                            _("%s: invalid start value for hexadecimal suffix"),
1522                          quote (optarg));
1523                   usage (EXIT_FAILURE);
1524                 }
1525               else
1526                 {
1527                   /* Skip any leading zero.  */
1528                   while (*optarg == '0' && *(optarg + 1) != '\0')
1529                     optarg++;
1530                   numeric_suffix_start = optarg;
1531                 }
1532             }
1533           break;
1534 
1535         case 'e':
1536           elide_empty_files = true;
1537           break;
1538 
1539         case FILTER_OPTION:
1540           filter_command = optarg;
1541           break;
1542 
1543         case IO_BLKSIZE_OPTION:
1544           in_blk_size = xdectoumax (optarg, 1,
1545                                     MIN (SYS_BUFSIZE_MAX,
1546                                          MIN (IDX_MAX, SIZE_MAX) - 1),
1547                                     multipliers, _("invalid IO block size"), 0);
1548           break;
1549 
1550         case VERBOSE_OPTION:
1551           verbose = true;
1552           break;
1553 
1554         case_GETOPT_HELP_CHAR;
1555 
1556         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1557 
1558         default:
1559           usage (EXIT_FAILURE);
1560         }
1561     }
1562 
1563   if (k_units != 0 && filter_command)
1564     {
1565       error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1566       usage (EXIT_FAILURE);
1567     }
1568 
1569   /* Handle default case.  */
1570   if (split_type == type_undef)
1571     {
1572       split_type = type_lines;
1573       n_units = 1000;
1574     }
1575 
1576   if (n_units == 0)
1577     {
1578       error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1579       usage (EXIT_FAILURE);
1580     }
1581 
1582   if (eolchar < 0)
1583     eolchar = '\n';
1584 
1585   set_suffix_length (n_units, split_type);
1586 
1587   /* Get out the filename arguments.  */
1588 
1589   if (optind < argc)
1590     infile = argv[optind++];
1591 
1592   if (optind < argc)
1593     outbase = argv[optind++];
1594 
1595   if (optind < argc)
1596     {
1597       error (0, 0, _("extra operand %s"), quote (argv[optind]));
1598       usage (EXIT_FAILURE);
1599     }
1600 
1601   /* Check that the suffix length is large enough for the numerical
1602      suffix start value.  */
1603   if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1604     {
1605       error (0, 0, _("numerical suffix start value is too large "
1606                      "for the suffix length"));
1607       usage (EXIT_FAILURE);
1608     }
1609 
1610   /* Open the input file.  */
1611   if (! STREQ (infile, "-")
1612       && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1613     error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1614            quoteaf (infile));
1615 
1616   /* Binary I/O is safer when byte counts are used.  */
1617   xset_binary_mode (STDIN_FILENO, O_BINARY);
1618 
1619   /* Advise the kernel of our access pattern.  */
1620   fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
1621 
1622   /* Get the optimal block size of input device and make a buffer.  */
1623 
1624   if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1625     error (EXIT_FAILURE, errno, "%s", quotef (infile));
1626 
1627   if (in_blk_size == 0)
1628     {
1629       in_blk_size = io_blksize (&in_stat_buf);
1630       if (SYS_BUFSIZE_MAX < in_blk_size)
1631         in_blk_size = SYS_BUFSIZE_MAX;
1632     }
1633 
1634   char *buf = xalignalloc (page_size, in_blk_size + 1);
1635   ssize_t initial_read = -1;
1636 
1637   if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1638     {
1639       file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1640                                    buf, in_blk_size);
1641       if (file_size < 0)
1642         error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1643                quotef (infile));
1644       initial_read = MIN (file_size, in_blk_size);
1645     }
1646 
1647   /* When filtering, closure of one pipe must not terminate the process,
1648      as there may still be other streams expecting input from us.  */
1649   if (filter_command)
1650     default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1651 
1652   switch (split_type)
1653     {
1654     case type_digits:
1655     case type_lines:
1656       lines_split (n_units, buf, in_blk_size);
1657       break;
1658 
1659     case type_bytes:
1660       bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1661       break;
1662 
1663     case type_byteslines:
1664       line_bytes_split (n_units, buf, in_blk_size);
1665       break;
1666 
1667     case type_chunk_bytes:
1668       if (k_units == 0)
1669         bytes_split (file_size / n_units, file_size % n_units,
1670                      buf, in_blk_size, initial_read, n_units);
1671       else
1672         bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1673                              file_size);
1674       break;
1675 
1676     case type_chunk_lines:
1677       lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1678                          file_size);
1679       break;
1680 
1681     case type_rr:
1682       /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1683          but the functionality is provided for symmetry.  */
1684       {
1685         of_t *files;
1686         lines_rr (k_units, n_units, buf, in_blk_size, &files);
1687       }
1688       break;
1689 
1690     default:
1691       affirm (false);
1692     }
1693 
1694   if (close (STDIN_FILENO) != 0)
1695     error (EXIT_FAILURE, errno, "%s", quotef (infile));
1696   closeout (nullptr, output_desc, filter_pid, outfile);
1697 
1698   main_exit (EXIT_SUCCESS);
1699 }
1700