1 /* dd -- convert a file while copying it.
2    Copyright (C) 1985-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Paul Rubin, David MacKenzie, and Stuart Kemp. */
18 
19 #include <config.h>
20 
21 #include <sys/types.h>
22 #include <signal.h>
23 #include <stdckdint.h>
24 
25 #include "system.h"
26 #include "alignalloc.h"
27 #include "close-stream.h"
28 #include "fd-reopen.h"
29 #include "gethrxtime.h"
30 #include "human.h"
31 #include "ioblksize.h"
32 #include "long-options.h"
33 #include "quote.h"
34 #include "verror.h"
35 #include "xstrtol.h"
36 #include "xtime.h"
37 
38 /* The official name of this program (e.g., no 'g' prefix).  */
39 #define PROGRAM_NAME "dd"
40 
41 #define AUTHORS \
42   proper_name ("Paul Rubin"), \
43   proper_name ("David MacKenzie"), \
44   proper_name ("Stuart Kemp")
45 
46 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
47    present.  */
48 #ifndef SA_NOCLDSTOP
49 # define SA_NOCLDSTOP 0
50 # define sigprocmask(How, Set, Oset) /* empty */
51 # define sigset_t int
52 # if ! HAVE_SIGINTERRUPT
53 #  define siginterrupt(sig, flag) /* empty */
54 # endif
55 #endif
56 
57 /* NonStop circa 2011 lacks SA_RESETHAND; see Bug#9076.  */
58 #ifndef SA_RESETHAND
59 # define SA_RESETHAND 0
60 #endif
61 
62 #ifndef SIGINFO
63 # define SIGINFO SIGUSR1
64 #endif
65 
66 /* This may belong in GNULIB's fcntl module instead.
67    Define O_CIO to 0 if it is not supported by this OS. */
68 #ifndef O_CIO
69 # define O_CIO 0
70 #endif
71 
72 /* On AIX 5.1 and AIX 5.2, O_NOCACHE is defined via <fcntl.h>
73    and would interfere with our use of that name, below.  */
74 #undef O_NOCACHE
75 
76 #define output_char(c)				\
77   do						\
78     {						\
79       obuf[oc++] = (c);				\
80       if (oc >= output_blocksize)		\
81         write_output ();			\
82     }						\
83   while (0)
84 
85 /* Default input and output blocksize. */
86 #define DEFAULT_BLOCKSIZE 512
87 
88 /* Conversions bit masks. */
89 enum
90   {
91     C_ASCII = 01,
92 
93     C_EBCDIC = 02,
94     C_IBM = 04,
95     C_BLOCK = 010,
96     C_UNBLOCK = 020,
97     C_LCASE = 040,
98     C_UCASE = 0100,
99     C_SWAB = 0200,
100     C_NOERROR = 0400,
101     C_NOTRUNC = 01000,
102     C_SYNC = 02000,
103 
104     /* Use separate input and output buffers, and combine partial
105        input blocks. */
106     C_TWOBUFS = 04000,
107 
108     C_NOCREAT = 010000,
109     C_EXCL = 020000,
110     C_FDATASYNC = 040000,
111     C_FSYNC = 0100000,
112 
113     C_SPARSE = 0200000
114   };
115 
116 /* Status levels.  */
117 enum
118   {
119     STATUS_NONE = 1,
120     STATUS_NOXFER = 2,
121     STATUS_DEFAULT = 3,
122     STATUS_PROGRESS = 4
123   };
124 
125 /* The name of the input file, or nullptr for the standard input. */
126 static char const *input_file = nullptr;
127 
128 /* The name of the output file, or nullptr for the standard output. */
129 static char const *output_file = nullptr;
130 
131 /* The page size on this host.  */
132 static idx_t page_size;
133 
134 /* The number of bytes in which atomic reads are done. */
135 static idx_t input_blocksize = 0;
136 
137 /* The number of bytes in which atomic writes are done. */
138 static idx_t output_blocksize = 0;
139 
140 /* Conversion buffer size, in bytes.  0 prevents conversions. */
141 static idx_t conversion_blocksize = 0;
142 
143 /* Skip this many records of 'input_blocksize' bytes before input. */
144 static intmax_t skip_records = 0;
145 
146 /* Skip this many bytes before input in addition of 'skip_records'
147    records.  */
148 static idx_t skip_bytes = 0;
149 
150 /* Skip this many records of 'output_blocksize' bytes before output. */
151 static intmax_t seek_records = 0;
152 
153 /* Skip this many bytes in addition to 'seek_records' records before
154    output.  */
155 static intmax_t seek_bytes = 0;
156 
157 /* Whether the final output was done with a seek (rather than a write).  */
158 static bool final_op_was_seek;
159 
160 /* Copy only this many records.  The default is effectively infinity.  */
161 static intmax_t max_records = INTMAX_MAX;
162 
163 /* Copy this many bytes in addition to 'max_records' records.  */
164 static idx_t max_bytes = 0;
165 
166 /* Bit vector of conversions to apply. */
167 static int conversions_mask = 0;
168 
169 /* Open flags for the input and output files.  */
170 static int input_flags = 0;
171 static int output_flags = 0;
172 
173 /* Status flags for what is printed to stderr.  */
174 static int status_level = STATUS_DEFAULT;
175 
176 /* If nonzero, filter characters through the translation table.  */
177 static bool translation_needed = false;
178 
179 /* Number of partial blocks written. */
180 static intmax_t w_partial = 0;
181 
182 /* Number of full blocks written. */
183 static intmax_t w_full = 0;
184 
185 /* Number of partial blocks read. */
186 static intmax_t r_partial = 0;
187 
188 /* Number of full blocks read. */
189 static intmax_t r_full = 0;
190 
191 /* Number of bytes written.  */
192 static intmax_t w_bytes = 0;
193 
194 /* Last-reported number of bytes written, or negative if never reported.  */
195 static intmax_t reported_w_bytes = -1;
196 
197 /* Time that dd started.  */
198 static xtime_t start_time;
199 
200 /* Next time to report periodic progress.  */
201 static xtime_t next_time;
202 
203 /* If positive, the number of bytes output in the current progress line.  */
204 static int progress_len;
205 
206 /* True if input is seekable.  */
207 static bool input_seekable;
208 
209 /* Error number corresponding to initial attempt to lseek input.
210    If ESPIPE, do not issue any more diagnostics about it.  */
211 static int input_seek_errno;
212 
213 /* File offset of the input, in bytes, or -1 if it overflowed.  */
214 static off_t input_offset;
215 
216 /* True if a partial read should be diagnosed.  */
217 static bool warn_partial_read;
218 
219 /* Records truncated by conv=block. */
220 static intmax_t r_truncate = 0;
221 
222 /* Output representation of newline and space characters.
223    They change if we're converting to EBCDIC.  */
224 static char newline_character = '\n';
225 static char space_character = ' ';
226 
227 /* I/O buffers.  */
228 static char *ibuf;
229 static char *obuf;
230 
231 /* Current index into 'obuf'. */
232 static idx_t oc = 0;
233 
234 /* Index into current line, for 'conv=block' and 'conv=unblock'.  */
235 static idx_t col = 0;
236 
237 /* The set of signals that are caught.  */
238 static sigset_t caught_signals;
239 
240 /* If nonzero, the value of the pending fatal signal.  */
241 static sig_atomic_t volatile interrupt_signal;
242 
243 /* A count of the number of pending info signals that have been received.  */
244 static sig_atomic_t volatile info_signal_count;
245 
246 /* Whether to discard cache for input or output.  */
247 static bool i_nocache, o_nocache;
248 
249 /* Whether to instruct the kernel to discard the complete file.  */
250 static bool i_nocache_eof, o_nocache_eof;
251 
252 /* Function used for read (to handle iflag=fullblock parameter).  */
253 static ssize_t (*iread_fnc) (int fd, char *buf, idx_t size);
254 
255 /* A longest symbol in the struct symbol_values tables below.  */
256 #define LONGEST_SYMBOL "count_bytes"
257 
258 /* A symbol and the corresponding integer value.  */
259 struct symbol_value
260 {
261   char symbol[sizeof LONGEST_SYMBOL];
262   int value;
263 };
264 
265 /* Conversion symbols, for conv="...".  */
266 static struct symbol_value const conversions[] =
267 {
268   {"ascii", C_ASCII | C_UNBLOCK | C_TWOBUFS},	/* EBCDIC to ASCII. */
269   {"ebcdic", C_EBCDIC | C_BLOCK | C_TWOBUFS},	/* ASCII to EBCDIC. */
270   {"ibm", C_IBM | C_BLOCK | C_TWOBUFS},	/* Different ASCII to EBCDIC. */
271   {"block", C_BLOCK | C_TWOBUFS},	/* Variable to fixed length records. */
272   {"unblock", C_UNBLOCK | C_TWOBUFS},	/* Fixed to variable length records. */
273   {"lcase", C_LCASE | C_TWOBUFS},	/* Translate upper to lower case. */
274   {"ucase", C_UCASE | C_TWOBUFS},	/* Translate lower to upper case. */
275   {"sparse", C_SPARSE},		/* Try to sparsely write output. */
276   {"swab", C_SWAB | C_TWOBUFS},	/* Swap bytes of input. */
277   {"noerror", C_NOERROR},	/* Ignore i/o errors. */
278   {"nocreat", C_NOCREAT},	/* Do not create output file.  */
279   {"excl", C_EXCL},		/* Fail if the output file already exists.  */
280   {"notrunc", C_NOTRUNC},	/* Do not truncate output file. */
281   {"sync", C_SYNC},		/* Pad input records to ibs with NULs. */
282   {"fdatasync", C_FDATASYNC},	/* Synchronize output data before finishing.  */
283   {"fsync", C_FSYNC},		/* Also synchronize output metadata.  */
284   {"", 0}
285 };
286 
287 #define FFS_MASK(x) ((x) ^ ((x) & ((x) - 1)))
288 enum
289   {
290     /* Compute a value that's bitwise disjoint from the union
291        of all O_ values.  */
292     v = ~(0
293           | O_APPEND
294           | O_BINARY
295           | O_CIO
296           | O_DIRECT
297           | O_DIRECTORY
298           | O_DSYNC
299           | O_NOATIME
300           | O_NOCTTY
301           | O_NOFOLLOW
302           | O_NOLINKS
303           | O_NONBLOCK
304           | O_SYNC
305           | O_TEXT
306           ),
307 
308     /* Use its lowest bits for private flags.  */
309     O_FULLBLOCK = FFS_MASK (v),
310     v2 = v ^ O_FULLBLOCK,
311 
312     O_NOCACHE = FFS_MASK (v2),
313     v3 = v2 ^ O_NOCACHE,
314 
315     O_COUNT_BYTES = FFS_MASK (v3),
316     v4 = v3 ^ O_COUNT_BYTES,
317 
318     O_SKIP_BYTES = FFS_MASK (v4),
319     v5 = v4 ^ O_SKIP_BYTES,
320 
321     O_SEEK_BYTES = FFS_MASK (v5)
322   };
323 
324 /* Ensure that we got something.  */
325 static_assert (O_FULLBLOCK != 0);
326 static_assert (O_NOCACHE != 0);
327 static_assert (O_COUNT_BYTES != 0);
328 static_assert (O_SKIP_BYTES != 0);
329 static_assert (O_SEEK_BYTES != 0);
330 
331 #define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0)
332 
333 /* Ensure that this is a single-bit value.  */
334 static_assert ( ! MULTIPLE_BITS_SET (O_FULLBLOCK));
335 static_assert ( ! MULTIPLE_BITS_SET (O_NOCACHE));
336 static_assert ( ! MULTIPLE_BITS_SET (O_COUNT_BYTES));
337 static_assert ( ! MULTIPLE_BITS_SET (O_SKIP_BYTES));
338 static_assert ( ! MULTIPLE_BITS_SET (O_SEEK_BYTES));
339 
340 /* Flags, for iflag="..." and oflag="...".  */
341 static struct symbol_value const flags[] =
342 {
343   {"append",	  O_APPEND},
344   {"binary",	  O_BINARY},
345   {"cio",	  O_CIO},
346   {"direct",	  O_DIRECT},
347   {"directory",   O_DIRECTORY},
348   {"dsync",	  O_DSYNC},
349   {"noatime",	  O_NOATIME},
350   {"nocache",	  O_NOCACHE},   /* Discard cache.  */
351   {"noctty",	  O_NOCTTY},
352   {"nofollow",	  HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0},
353   {"nolinks",	  O_NOLINKS},
354   {"nonblock",	  O_NONBLOCK},
355   {"sync",	  O_SYNC},
356   {"text",	  O_TEXT},
357   {"fullblock",   O_FULLBLOCK}, /* Accumulate full blocks from input.  */
358   {"count_bytes", O_COUNT_BYTES},
359   {"skip_bytes",  O_SKIP_BYTES},
360   {"seek_bytes",  O_SEEK_BYTES},
361   {"",		0}
362 };
363 
364 /* Status, for status="...".  */
365 static struct symbol_value const statuses[] =
366 {
367   {"none",	STATUS_NONE},
368   {"noxfer",	STATUS_NOXFER},
369   {"progress",	STATUS_PROGRESS},
370   {"",		0}
371 };
372 
373 /* Translation table formed by applying successive transformations. */
374 static unsigned char trans_table[256];
375 
376 /* Standard translation tables, taken from POSIX 1003.1-2013.
377    Beware of imitations; there are lots of ASCII<->EBCDIC tables
378    floating around the net, perhaps valid for some applications but
379    not correct here.  */
380 
381 static char const ascii_to_ebcdic[] =
382 {
383   '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057',
384   '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017',
385   '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046',
386   '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037',
387   '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175',
388   '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141',
389   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
390   '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157',
391   '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
392   '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326',
393   '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346',
394   '\347', '\350', '\351', '\255', '\340', '\275', '\232', '\155',
395   '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
396   '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226',
397   '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246',
398   '\247', '\250', '\251', '\300', '\117', '\320', '\137', '\007',
399   '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027',
400   '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033',
401   '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010',
402   '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341',
403   '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110',
404   '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
405   '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147',
406   '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165',
407   '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215',
408   '\216', '\217', '\220', '\152', '\233', '\234', '\235', '\236',
409   '\237', '\240', '\252', '\253', '\254', '\112', '\256', '\257',
410   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
411   '\270', '\271', '\272', '\273', '\274', '\241', '\276', '\277',
412   '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333',
413   '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355',
414   '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377'
415 };
416 
417 static char const ascii_to_ibm[] =
418 {
419   '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057',
420   '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017',
421   '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046',
422   '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037',
423   '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175',
424   '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141',
425   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
426   '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157',
427   '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
428   '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326',
429   '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346',
430   '\347', '\350', '\351', '\255', '\340', '\275', '\137', '\155',
431   '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
432   '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226',
433   '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246',
434   '\247', '\250', '\251', '\300', '\117', '\320', '\241', '\007',
435   '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027',
436   '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033',
437   '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010',
438   '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341',
439   '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110',
440   '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
441   '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147',
442   '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165',
443   '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215',
444   '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236',
445   '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257',
446   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
447   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
448   '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333',
449   '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355',
450   '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377'
451 };
452 
453 static char const ebcdic_to_ascii[] =
454 {
455   '\000', '\001', '\002', '\003', '\234', '\011', '\206', '\177',
456   '\227', '\215', '\216', '\013', '\014', '\015', '\016', '\017',
457   '\020', '\021', '\022', '\023', '\235', '\205', '\010', '\207',
458   '\030', '\031', '\222', '\217', '\034', '\035', '\036', '\037',
459   '\200', '\201', '\202', '\203', '\204', '\012', '\027', '\033',
460   '\210', '\211', '\212', '\213', '\214', '\005', '\006', '\007',
461   '\220', '\221', '\026', '\223', '\224', '\225', '\226', '\004',
462   '\230', '\231', '\232', '\233', '\024', '\025', '\236', '\032',
463   '\040', '\240', '\241', '\242', '\243', '\244', '\245', '\246',
464   '\247', '\250', '\325', '\056', '\074', '\050', '\053', '\174',
465   '\046', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
466   '\260', '\261', '\041', '\044', '\052', '\051', '\073', '\176',
467   '\055', '\057', '\262', '\263', '\264', '\265', '\266', '\267',
468   '\270', '\271', '\313', '\054', '\045', '\137', '\076', '\077',
469   '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\301',
470   '\302', '\140', '\072', '\043', '\100', '\047', '\075', '\042',
471   '\303', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
472   '\150', '\151', '\304', '\305', '\306', '\307', '\310', '\311',
473   '\312', '\152', '\153', '\154', '\155', '\156', '\157', '\160',
474   '\161', '\162', '\136', '\314', '\315', '\316', '\317', '\320',
475   '\321', '\345', '\163', '\164', '\165', '\166', '\167', '\170',
476   '\171', '\172', '\322', '\323', '\324', '\133', '\326', '\327',
477   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
478   '\340', '\341', '\342', '\343', '\344', '\135', '\346', '\347',
479   '\173', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
480   '\110', '\111', '\350', '\351', '\352', '\353', '\354', '\355',
481   '\175', '\112', '\113', '\114', '\115', '\116', '\117', '\120',
482   '\121', '\122', '\356', '\357', '\360', '\361', '\362', '\363',
483   '\134', '\237', '\123', '\124', '\125', '\126', '\127', '\130',
484   '\131', '\132', '\364', '\365', '\366', '\367', '\370', '\371',
485   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
486   '\070', '\071', '\372', '\373', '\374', '\375', '\376', '\377'
487 };
488 
489 /* True if we need to close the standard output *stream*.  */
490 static bool close_stdout_required = true;
491 
492 /* The only reason to close the standard output *stream* is if
493    parse_long_options fails (as it does for --help or --version).
494    In any other case, dd uses only the STDOUT_FILENO file descriptor,
495    and the "cleanup" function calls "close (STDOUT_FILENO)".
496    Closing the file descriptor and then letting the usual atexit-run
497    close_stdout function call "fclose (stdout)" would result in a
498    harmless failure of the close syscall (with errno EBADF).
499    This function serves solely to avoid the unnecessary close_stdout
500    call, once parse_long_options has succeeded.
501    Meanwhile, we guarantee that the standard error stream is flushed,
502    by inlining the last half of close_stdout as needed.  */
503 static void
maybe_close_stdout(void)504 maybe_close_stdout (void)
505 {
506   if (close_stdout_required)
507     close_stdout ();
508   else if (close_stream (stderr) != 0)
509     _exit (EXIT_FAILURE);
510 }
511 
512 /* Like the 'error' function but handle any pending newline,
513    and do not exit.  */
514 
515 ATTRIBUTE_FORMAT ((__printf__, 2, 3))
516 static void
diagnose(int errnum,char const * fmt,...)517 diagnose (int errnum, char const *fmt, ...)
518 {
519   if (0 < progress_len)
520     {
521       fputc ('\n', stderr);
522       progress_len = 0;
523     }
524 
525   va_list ap;
526   va_start (ap, fmt);
527   verror (0, errnum, fmt, ap);
528   va_end (ap);
529 }
530 
531 void
usage(int status)532 usage (int status)
533 {
534   if (status != EXIT_SUCCESS)
535     emit_try_help ();
536   else
537     {
538       printf (_("\
539 Usage: %s [OPERAND]...\n\
540   or:  %s OPTION\n\
541 "),
542               program_name, program_name);
543       fputs (_("\
544 Copy a file, converting and formatting according to the operands.\n\
545 \n\
546   bs=BYTES        read and write up to BYTES bytes at a time (default: 512);\n\
547                   overrides ibs and obs\n\
548   cbs=BYTES       convert BYTES bytes at a time\n\
549   conv=CONVS      convert the file as per the comma separated symbol list\n\
550   count=N         copy only N input blocks\n\
551   ibs=BYTES       read up to BYTES bytes at a time (default: 512)\n\
552 "), stdout);
553       fputs (_("\
554   if=FILE         read from FILE instead of stdin\n\
555   iflag=FLAGS     read as per the comma separated symbol list\n\
556   obs=BYTES       write BYTES bytes at a time (default: 512)\n\
557   of=FILE         write to FILE instead of stdout\n\
558   oflag=FLAGS     write as per the comma separated symbol list\n\
559   seek=N          (or oseek=N) skip N obs-sized output blocks\n\
560   skip=N          (or iseek=N) skip N ibs-sized input blocks\n\
561   status=LEVEL    The LEVEL of information to print to stderr;\n\
562                   'none' suppresses everything but error messages,\n\
563                   'noxfer' suppresses the final transfer statistics,\n\
564                   'progress' shows periodic transfer statistics\n\
565 "), stdout);
566       fputs (_("\
567 \n\
568 N and BYTES may be followed by the following multiplicative suffixes:\n\
569 c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024, xM=M,\n\
570 GB=1000*1000*1000, G=1024*1024*1024, and so on for T, P, E, Z, Y, R, Q.\n\
571 Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
572 If N ends in 'B', it counts bytes not blocks.\n\
573 \n\
574 Each CONV symbol may be:\n\
575 \n\
576 "), stdout);
577       fputs (_("\
578   ascii     from EBCDIC to ASCII\n\
579   ebcdic    from ASCII to EBCDIC\n\
580   ibm       from ASCII to alternate EBCDIC\n\
581   block     pad newline-terminated records with spaces to cbs-size\n\
582   unblock   replace trailing spaces in cbs-size records with newline\n\
583   lcase     change upper case to lower case\n\
584   ucase     change lower case to upper case\n\
585   sparse    try to seek rather than write all-NUL output blocks\n\
586   swab      swap every pair of input bytes\n\
587   sync      pad every input block with NULs to ibs-size; when used\n\
588             with block or unblock, pad with spaces rather than NULs\n\
589 "), stdout);
590       fputs (_("\
591   excl      fail if the output file already exists\n\
592   nocreat   do not create the output file\n\
593   notrunc   do not truncate the output file\n\
594   noerror   continue after read errors\n\
595   fdatasync  physically write output file data before finishing\n\
596   fsync     likewise, but also write metadata\n\
597 "), stdout);
598       fputs (_("\
599 \n\
600 Each FLAG symbol may be:\n\
601 \n\
602   append    append mode (makes sense only for output; conv=notrunc suggested)\n\
603 "), stdout);
604       if (O_CIO)
605         fputs (_("  cio       use concurrent I/O for data\n"), stdout);
606       if (O_DIRECT)
607         fputs (_("  direct    use direct I/O for data\n"), stdout);
608       if (O_DIRECTORY)
609         fputs (_("  directory  fail unless a directory\n"), stdout);
610       if (O_DSYNC)
611         fputs (_("  dsync     use synchronized I/O for data\n"), stdout);
612       if (O_SYNC)
613         fputs (_("  sync      likewise, but also for metadata\n"), stdout);
614       fputs (_("  fullblock  accumulate full blocks of input (iflag only)\n"),
615              stdout);
616       if (O_NONBLOCK)
617         fputs (_("  nonblock  use non-blocking I/O\n"), stdout);
618       if (O_NOATIME)
619         fputs (_("  noatime   do not update access time\n"), stdout);
620 #if HAVE_POSIX_FADVISE
621       if (O_NOCACHE)
622         fputs (_("  nocache   Request to drop cache.  See also oflag=sync\n"),
623                stdout);
624 #endif
625       if (O_NOCTTY)
626         fputs (_("  noctty    do not assign controlling terminal from file\n"),
627                stdout);
628       if (HAVE_WORKING_O_NOFOLLOW)
629         fputs (_("  nofollow  do not follow symlinks\n"), stdout);
630       if (O_NOLINKS)
631         fputs (_("  nolinks   fail if multiply-linked\n"), stdout);
632       if (O_BINARY)
633         fputs (_("  binary    use binary I/O for data\n"), stdout);
634       if (O_TEXT)
635         fputs (_("  text      use text I/O for data\n"), stdout);
636 
637       {
638         printf (_("\
639 \n\
640 Sending a %s signal to a running 'dd' process makes it\n\
641 print I/O statistics to standard error and then resume copying.\n\
642 \n\
643 Options are:\n\
644 \n\
645 "), SIGINFO == SIGUSR1 ? "USR1" : "INFO");
646       }
647 
648       fputs (HELP_OPTION_DESCRIPTION, stdout);
649       fputs (VERSION_OPTION_DESCRIPTION, stdout);
650       emit_ancillary_info (PROGRAM_NAME);
651     }
652   exit (status);
653 }
654 
655 /* Common options to use when displaying sizes and rates.  */
656 
657 enum { human_opts = (human_autoscale | human_round_to_nearest
658                      | human_space_before_unit | human_SI | human_B) };
659 
660 /* Ensure input buffer IBUF is allocated.  */
661 
662 static void
alloc_ibuf(void)663 alloc_ibuf (void)
664 {
665   if (ibuf)
666     return;
667 
668   bool extra_byte_for_swab = !!(conversions_mask & C_SWAB);
669   ibuf = alignalloc (page_size, input_blocksize + extra_byte_for_swab);
670   if (!ibuf)
671     {
672       char hbuf[LONGEST_HUMAN_READABLE + 1];
673       error (EXIT_FAILURE, 0,
674              _("memory exhausted by input buffer of size %td bytes (%s)"),
675              input_blocksize,
676              human_readable (input_blocksize, hbuf,
677                              human_opts | human_base_1024, 1, 1));
678     }
679 }
680 
681 /* Ensure output buffer OBUF is allocated/initialized.  */
682 
683 static void
alloc_obuf(void)684 alloc_obuf (void)
685 {
686   if (obuf)
687     return;
688 
689   if (conversions_mask & C_TWOBUFS)
690     {
691       obuf = alignalloc (page_size, output_blocksize);
692       if (!obuf)
693         {
694           char hbuf[LONGEST_HUMAN_READABLE + 1];
695           error (EXIT_FAILURE, 0,
696                  _("memory exhausted by output buffer of size %td"
697                    " bytes (%s)"),
698                  output_blocksize,
699                  human_readable (output_blocksize, hbuf,
700                                  human_opts | human_base_1024, 1, 1));
701         }
702     }
703   else
704     {
705       alloc_ibuf ();
706       obuf = ibuf;
707     }
708 }
709 
710 static void
translate_charset(char const * new_trans)711 translate_charset (char const *new_trans)
712 {
713   for (int i = 0; i < 256; i++)
714     trans_table[i] = new_trans[trans_table[i]];
715   translation_needed = true;
716 }
717 
718 /* Return true if I has more than one bit set.  I must be nonnegative.  */
719 
720 static inline bool
multiple_bits_set(int i)721 multiple_bits_set (int i)
722 {
723   return MULTIPLE_BITS_SET (i);
724 }
725 
726 static bool
abbreviation_lacks_prefix(char const * message)727 abbreviation_lacks_prefix (char const *message)
728 {
729   return message[strlen (message) - 2] == ' ';
730 }
731 
732 /* Print transfer statistics.  */
733 
734 static void
print_xfer_stats(xtime_t progress_time)735 print_xfer_stats (xtime_t progress_time)
736 {
737   xtime_t now = progress_time ? progress_time : gethrxtime ();
738   static char const slash_s[] = "/s";
739   char hbuf[3][LONGEST_HUMAN_READABLE + sizeof slash_s];
740   double delta_s;
741   char const *bytes_per_second;
742   char const *si = human_readable (w_bytes, hbuf[0], human_opts, 1, 1);
743   char const *iec = human_readable (w_bytes, hbuf[1],
744                                     human_opts | human_base_1024, 1, 1);
745 
746   /* Use integer arithmetic to compute the transfer rate,
747      since that makes it easy to use SI abbreviations.  */
748   char *bpsbuf = hbuf[2];
749   int bpsbufsize = sizeof hbuf[2];
750   if (start_time < now)
751     {
752       double XTIME_PRECISIONe0 = XTIME_PRECISION;
753       xtime_t delta_xtime = now - start_time;
754       delta_s = delta_xtime / XTIME_PRECISIONe0;
755       bytes_per_second = human_readable (w_bytes, bpsbuf, human_opts,
756                                          XTIME_PRECISION, delta_xtime);
757       strcat (bytes_per_second - bpsbuf + bpsbuf, slash_s);
758     }
759   else
760     {
761       delta_s = 0;
762       snprintf (bpsbuf, bpsbufsize, "%s B/s", _("Infinity"));
763       bytes_per_second = bpsbuf;
764     }
765 
766   if (progress_time)
767     fputc ('\r', stderr);
768 
769   /* Use full seconds when printing progress, since the progress
770      report is output once per second and there is little point
771      displaying any subsecond jitter.  Use default precision with %g
772      otherwise, as this provides more-useful output then.  With long
773      transfers %g can generate a number with an exponent; that is OK.  */
774   char delta_s_buf[24];
775   snprintf (delta_s_buf, sizeof delta_s_buf,
776             progress_time ? "%.0f s" : "%g s", delta_s);
777 
778   int stats_len
779     = (abbreviation_lacks_prefix (si)
780        ? fprintf (stderr,
781                   ngettext ("%jd byte copied, %s, %s",
782                             "%jd bytes copied, %s, %s",
783                             select_plural (w_bytes)),
784                   w_bytes, delta_s_buf, bytes_per_second)
785        : abbreviation_lacks_prefix (iec)
786        ? fprintf (stderr,
787                   _("%jd bytes (%s) copied, %s, %s"),
788                   w_bytes, si, delta_s_buf, bytes_per_second)
789        : fprintf (stderr,
790                   _("%jd bytes (%s, %s) copied, %s, %s"),
791                   w_bytes, si, iec, delta_s_buf, bytes_per_second));
792 
793   if (progress_time)
794     {
795       /* Erase any trailing junk on the output line by outputting
796          spaces.  In theory this could glitch the display because the
797          formatted translation of a line describing a larger file
798          could consume fewer screen columns than the strlen difference
799          from the previously formatted translation.  In practice this
800          does not seem to be a problem.  */
801       if (0 <= stats_len && stats_len < progress_len)
802         fprintf (stderr, "%*s", progress_len - stats_len, "");
803       progress_len = stats_len;
804     }
805   else
806     fputc ('\n', stderr);
807 
808   reported_w_bytes = w_bytes;
809 }
810 
811 static void
print_stats(void)812 print_stats (void)
813 {
814   if (status_level == STATUS_NONE)
815     return;
816 
817   if (0 < progress_len)
818     {
819       fputc ('\n', stderr);
820       progress_len = 0;
821     }
822 
823   fprintf (stderr,
824            _("%jd+%jd records in\n"
825              "%jd+%jd records out\n"),
826            r_full, r_partial, w_full, w_partial);
827 
828   if (r_truncate != 0)
829     fprintf (stderr,
830              ngettext ("%jd truncated record\n",
831                        "%jd truncated records\n",
832                        select_plural (r_truncate)),
833              r_truncate);
834 
835   if (status_level == STATUS_NOXFER)
836     return;
837 
838   print_xfer_stats (0);
839 }
840 
841 /* An ordinary signal was received; arrange for the program to exit.  */
842 
843 static void
interrupt_handler(int sig)844 interrupt_handler (int sig)
845 {
846   if (! SA_RESETHAND)
847     signal (sig, SIG_DFL);
848   interrupt_signal = sig;
849 }
850 
851 /* An info signal was received; arrange for the program to print status.  */
852 
853 static void
siginfo_handler(int sig)854 siginfo_handler (int sig)
855 {
856   if (! SA_NOCLDSTOP)
857     signal (sig, siginfo_handler);
858   info_signal_count++;
859 }
860 
861 /* Install the signal handlers.  */
862 
863 static void
install_signal_handlers(void)864 install_signal_handlers (void)
865 {
866   bool catch_siginfo = ! (SIGINFO == SIGUSR1 && getenv ("POSIXLY_CORRECT"));
867 
868 #if SA_NOCLDSTOP
869 
870   struct sigaction act;
871   sigemptyset (&caught_signals);
872   if (catch_siginfo)
873     sigaddset (&caught_signals, SIGINFO);
874   sigaction (SIGINT, nullptr, &act);
875   if (act.sa_handler != SIG_IGN)
876     sigaddset (&caught_signals, SIGINT);
877   act.sa_mask = caught_signals;
878 
879   if (sigismember (&caught_signals, SIGINFO))
880     {
881       act.sa_handler = siginfo_handler;
882       /* Note we don't use SA_RESTART here and instead
883          handle EINTR explicitly in iftruncate etc.
884          to avoid blocking on uncommitted read/write calls.  */
885       act.sa_flags = 0;
886       sigaction (SIGINFO, &act, nullptr);
887     }
888 
889   if (sigismember (&caught_signals, SIGINT))
890     {
891       act.sa_handler = interrupt_handler;
892       act.sa_flags = SA_NODEFER | SA_RESETHAND;
893       sigaction (SIGINT, &act, nullptr);
894     }
895 
896 #else
897 
898   if (catch_siginfo)
899     {
900       signal (SIGINFO, siginfo_handler);
901       siginterrupt (SIGINFO, 1);
902     }
903   if (signal (SIGINT, SIG_IGN) != SIG_IGN)
904     {
905       signal (SIGINT, interrupt_handler);
906       siginterrupt (SIGINT, 1);
907     }
908 #endif
909 }
910 
911 /* Close FD.  Return 0 if successful, -1 (setting errno) otherwise.
912    If close fails with errno == EINTR, POSIX says the file descriptor
913    is in an unspecified state, so keep trying to close FD but do not
914    consider EBADF to be an error.  Do not process signals.  This all
915    differs somewhat from functions like ifdatasync and ifsync.  */
916 static int
iclose(int fd)917 iclose (int fd)
918 {
919   if (close (fd) != 0)
920     do
921       if (errno != EINTR)
922         return -1;
923     while (close (fd) != 0 && errno != EBADF);
924 
925   return 0;
926 }
927 
928 static int synchronize_output (void);
929 
930 static void
cleanup(void)931 cleanup (void)
932 {
933   if (!interrupt_signal)
934     {
935       int sync_status = synchronize_output ();
936       if (sync_status)
937         exit (sync_status);
938     }
939 
940   if (iclose (STDIN_FILENO) != 0)
941     error (EXIT_FAILURE, errno, _("closing input file %s"),
942            quoteaf (input_file));
943 
944   /* Don't remove this call to close, even though close_stdout
945      closes standard output.  This close is necessary when cleanup
946      is called as a consequence of signal handling.  */
947   if (iclose (STDOUT_FILENO) != 0)
948     error (EXIT_FAILURE, errno,
949            _("closing output file %s"), quoteaf (output_file));
950 }
951 
952 /* Process any pending signals.  If signals are caught, this function
953    should be called periodically.  Ideally there should never be an
954    unbounded amount of time when signals are not being processed.  */
955 
956 static void
process_signals(void)957 process_signals (void)
958 {
959   while (interrupt_signal || info_signal_count)
960     {
961       int interrupt;
962       int infos;
963       sigset_t oldset;
964 
965       sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
966 
967       /* Reload interrupt_signal and info_signal_count, in case a new
968          signal was handled before sigprocmask took effect.  */
969       interrupt = interrupt_signal;
970       infos = info_signal_count;
971 
972       if (infos)
973         info_signal_count = infos - 1;
974 
975       sigprocmask (SIG_SETMASK, &oldset, nullptr);
976 
977       if (interrupt)
978         cleanup ();
979       print_stats ();
980       if (interrupt)
981         raise (interrupt);
982     }
983 }
984 
985 static void
finish_up(void)986 finish_up (void)
987 {
988   /* Process signals first, so that cleanup is called at most once.  */
989   process_signals ();
990   cleanup ();
991   print_stats ();
992 }
993 
994 static void
quit(int code)995 quit (int code)
996 {
997   finish_up ();
998   exit (code);
999 }
1000 
1001 /* Return LEN rounded down to a multiple of IO_BUFSIZE
1002    (to minimize calls to the expensive posix_fadvise (,POSIX_FADV_DONTNEED),
1003    while storing the remainder internally per FD.
1004    Pass LEN == 0 to get the current remainder.  */
1005 
1006 static off_t
cache_round(int fd,off_t len)1007 cache_round (int fd, off_t len)
1008 {
1009   static off_t i_pending, o_pending;
1010   off_t *pending = (fd == STDIN_FILENO ? &i_pending : &o_pending);
1011 
1012   if (len)
1013     {
1014       intmax_t c_pending;
1015       if (ckd_add (&c_pending, *pending, len))
1016         c_pending = INTMAX_MAX;
1017       *pending = c_pending % IO_BUFSIZE;
1018       if (c_pending > *pending)
1019         len = c_pending - *pending;
1020       else
1021         len = 0;
1022     }
1023   else
1024     len = *pending;
1025 
1026   return len;
1027 }
1028 
1029 /* Discard the cache from the current offset of either
1030    STDIN_FILENO or STDOUT_FILENO.
1031    Return true on success.  */
1032 
1033 static bool
invalidate_cache(int fd,off_t len)1034 invalidate_cache (int fd, off_t len)
1035 {
1036   int adv_ret = -1;
1037   off_t offset;
1038   bool nocache_eof = (fd == STDIN_FILENO ? i_nocache_eof : o_nocache_eof);
1039 
1040   /* Minimize syscalls.  */
1041   off_t clen = cache_round (fd, len);
1042   if (len && !clen)
1043     return true; /* Don't advise this time.  */
1044   else if (! len && ! clen && ! nocache_eof)
1045     return true;
1046   off_t pending = len ? cache_round (fd, 0) : 0;
1047 
1048   if (fd == STDIN_FILENO)
1049     {
1050       if (input_seekable)
1051         offset = input_offset;
1052       else
1053         {
1054           offset = -1;
1055           errno = ESPIPE;
1056         }
1057     }
1058   else
1059     {
1060       static off_t output_offset = -2;
1061 
1062       if (output_offset != -1)
1063         {
1064           if (output_offset < 0)
1065             output_offset = lseek (fd, 0, SEEK_CUR);
1066           else if (len)
1067             output_offset += clen + pending;
1068         }
1069 
1070       offset = output_offset;
1071     }
1072 
1073   if (0 <= offset)
1074    {
1075      if (! len && clen && nocache_eof)
1076        {
1077          pending = clen;
1078          clen = 0;
1079        }
1080 
1081      /* Note we're being careful here to only invalidate what
1082         we've read, so as not to dump any read ahead cache.
1083         Note also the kernel is conservative and only invalidates
1084         full pages in the specified range.  */
1085 #if HAVE_POSIX_FADVISE
1086      offset = offset - clen - pending;
1087      /* ensure full page specified when invalidating to eof.  */
1088      if (clen == 0)
1089        offset -= offset % page_size;
1090      adv_ret = posix_fadvise (fd, offset, clen, POSIX_FADV_DONTNEED);
1091 #else
1092      errno = ENOTSUP;
1093 #endif
1094    }
1095 
1096   return adv_ret != -1 ? true : false;
1097 }
1098 
1099 /* Read from FD into the buffer BUF of size SIZE, processing any
1100    signals that arrive before bytes are read.  Return the number of
1101    bytes read if successful, -1 (setting errno) on failure.  */
1102 
1103 static ssize_t
iread(int fd,char * buf,idx_t size)1104 iread (int fd, char *buf, idx_t size)
1105 {
1106   ssize_t nread;
1107   static ssize_t prev_nread;
1108 
1109   do
1110     {
1111       process_signals ();
1112       nread = read (fd, buf, size);
1113       /* Ignore final read error with iflag=direct as that
1114          returns EINVAL due to the non aligned file offset.  */
1115       if (nread == -1 && errno == EINVAL
1116           && 0 < prev_nread && prev_nread < size
1117           && (input_flags & O_DIRECT))
1118         {
1119           errno = 0;
1120           nread = 0;
1121         }
1122     }
1123   while (nread < 0 && errno == EINTR);
1124 
1125   /* Short read may be due to received signal.  */
1126   if (0 < nread && nread < size)
1127     process_signals ();
1128 
1129   if (0 < nread && warn_partial_read)
1130     {
1131       if (0 < prev_nread && prev_nread < size)
1132         {
1133           idx_t prev = prev_nread;
1134           if (status_level != STATUS_NONE)
1135             diagnose (0, ngettext (("warning: partial read (%td byte); "
1136                                     "suggest iflag=fullblock"),
1137                                    ("warning: partial read (%td bytes); "
1138                                     "suggest iflag=fullblock"),
1139                                    select_plural (prev)),
1140                       prev);
1141           warn_partial_read = false;
1142         }
1143     }
1144 
1145   prev_nread = nread;
1146   return nread;
1147 }
1148 
1149 /* Wrapper around iread function to accumulate full blocks.  */
1150 static ssize_t
iread_fullblock(int fd,char * buf,idx_t size)1151 iread_fullblock (int fd, char *buf, idx_t size)
1152 {
1153   ssize_t nread = 0;
1154 
1155   while (0 < size)
1156     {
1157       ssize_t ncurr = iread (fd, buf, size);
1158       if (ncurr < 0)
1159         return ncurr;
1160       if (ncurr == 0)
1161         break;
1162       nread += ncurr;
1163       buf   += ncurr;
1164       size  -= ncurr;
1165     }
1166 
1167   return nread;
1168 }
1169 
1170 /* Write to FD the buffer BUF of size SIZE, processing any signals
1171    that arrive.  Return the number of bytes written, setting errno if
1172    this is less than SIZE.  Keep trying if there are partial
1173    writes.  */
1174 
1175 static idx_t
iwrite(int fd,char const * buf,idx_t size)1176 iwrite (int fd, char const *buf, idx_t size)
1177 {
1178   idx_t total_written = 0;
1179 
1180   if ((output_flags & O_DIRECT) && size < output_blocksize)
1181     {
1182       int old_flags = fcntl (STDOUT_FILENO, F_GETFL);
1183       if (fcntl (STDOUT_FILENO, F_SETFL, old_flags & ~O_DIRECT) != 0
1184           && status_level != STATUS_NONE)
1185         diagnose (errno, _("failed to turn off O_DIRECT: %s"),
1186                   quotef (output_file));
1187 
1188       /* Since we have just turned off O_DIRECT for the final write,
1189          we try to preserve some of its semantics.  */
1190 
1191       /* Call invalidate_cache to setup the appropriate offsets
1192          for subsequent calls.  */
1193       o_nocache_eof = true;
1194       invalidate_cache (STDOUT_FILENO, 0);
1195 
1196       /* Attempt to ensure that that final block is committed
1197          to stable storage as quickly as possible.  */
1198       conversions_mask |= C_FSYNC;
1199 
1200       /* After the subsequent fsync we'll call invalidate_cache
1201          to attempt to clear all data from the page cache.  */
1202     }
1203 
1204   while (total_written < size)
1205     {
1206       ssize_t nwritten = 0;
1207       process_signals ();
1208 
1209       /* Perform a seek for a NUL block if sparse output is enabled.  */
1210       final_op_was_seek = false;
1211       if ((conversions_mask & C_SPARSE) && is_nul (buf, size))
1212         {
1213           if (lseek (fd, size, SEEK_CUR) < 0)
1214             {
1215               conversions_mask &= ~C_SPARSE;
1216               /* Don't warn about the advisory sparse request.  */
1217             }
1218           else
1219             {
1220               final_op_was_seek = true;
1221               nwritten = size;
1222             }
1223         }
1224 
1225       if (!nwritten)
1226         nwritten = write (fd, buf + total_written, size - total_written);
1227 
1228       if (nwritten < 0)
1229         {
1230           if (errno != EINTR)
1231             break;
1232         }
1233       else if (nwritten == 0)
1234         {
1235           /* Some buggy drivers return 0 when one tries to write beyond
1236              a device's end.  (Example: Linux kernel 1.2.13 on /dev/fd0.)
1237              Set errno to ENOSPC so they get a sensible diagnostic.  */
1238           errno = ENOSPC;
1239           break;
1240         }
1241       else
1242         total_written += nwritten;
1243     }
1244 
1245   if (o_nocache && total_written)
1246     invalidate_cache (fd, total_written);
1247 
1248   return total_written;
1249 }
1250 
1251 /* Write, then empty, the output buffer 'obuf'. */
1252 
1253 static void
write_output(void)1254 write_output (void)
1255 {
1256   idx_t nwritten = iwrite (STDOUT_FILENO, obuf, output_blocksize);
1257   w_bytes += nwritten;
1258   if (nwritten != output_blocksize)
1259     {
1260       diagnose (errno, _("writing to %s"), quoteaf (output_file));
1261       if (nwritten != 0)
1262         w_partial++;
1263       quit (EXIT_FAILURE);
1264     }
1265   else
1266     w_full++;
1267   oc = 0;
1268 }
1269 
1270 /* Restart on EINTR from fdatasync.  */
1271 
1272 static int
ifdatasync(int fd)1273 ifdatasync (int fd)
1274 {
1275   int ret;
1276 
1277   do
1278     {
1279       process_signals ();
1280       ret = fdatasync (fd);
1281     }
1282   while (ret < 0 && errno == EINTR);
1283 
1284   return ret;
1285 }
1286 
1287 /* Restart on EINTR from fd_reopen.  */
1288 
1289 static int
ifd_reopen(int desired_fd,char const * file,int flag,mode_t mode)1290 ifd_reopen (int desired_fd, char const *file, int flag, mode_t mode)
1291 {
1292   int ret;
1293 
1294   do
1295     {
1296       process_signals ();
1297       ret = fd_reopen (desired_fd, file, flag, mode);
1298     }
1299   while (ret < 0 && errno == EINTR);
1300 
1301   return ret;
1302 }
1303 
1304 /* Restart on EINTR from fstat.  */
1305 
1306 static int
ifstat(int fd,struct stat * st)1307 ifstat (int fd, struct stat *st)
1308 {
1309   int ret;
1310 
1311   do
1312     {
1313       process_signals ();
1314       ret = fstat (fd, st);
1315     }
1316   while (ret < 0 && errno == EINTR);
1317 
1318   return ret;
1319 }
1320 
1321 /* Restart on EINTR from fsync.  */
1322 
1323 static int
ifsync(int fd)1324 ifsync (int fd)
1325 {
1326   int ret;
1327 
1328   do
1329     {
1330       process_signals ();
1331       ret = fsync (fd);
1332     }
1333   while (ret < 0 && errno == EINTR);
1334 
1335   return ret;
1336 }
1337 
1338 /* Restart on EINTR from ftruncate.  */
1339 
1340 static int
iftruncate(int fd,off_t length)1341 iftruncate (int fd, off_t length)
1342 {
1343   int ret;
1344 
1345   do
1346     {
1347       process_signals ();
1348       ret = ftruncate (fd, length);
1349     }
1350   while (ret < 0 && errno == EINTR);
1351 
1352   return ret;
1353 }
1354 
1355 /* Return true if STR is of the form "PATTERN" or "PATTERNDELIM...".  */
1356 
1357 ATTRIBUTE_PURE
1358 static bool
operand_matches(char const * str,char const * pattern,char delim)1359 operand_matches (char const *str, char const *pattern, char delim)
1360 {
1361   while (*pattern)
1362     if (*str++ != *pattern++)
1363       return false;
1364   return !*str || *str == delim;
1365 }
1366 
1367 /* Interpret one "conv=..." or similar operand STR according to the
1368    symbols in TABLE, returning the flags specified.  If the operand
1369    cannot be parsed, use ERROR_MSGID to generate a diagnostic.  */
1370 
1371 static int
parse_symbols(char const * str,struct symbol_value const * table,bool exclusive,char const * error_msgid)1372 parse_symbols (char const *str, struct symbol_value const *table,
1373                bool exclusive, char const *error_msgid)
1374 {
1375   int value = 0;
1376 
1377   while (true)
1378     {
1379       char const *strcomma = strchr (str, ',');
1380       struct symbol_value const *entry;
1381 
1382       for (entry = table;
1383            ! (operand_matches (str, entry->symbol, ',') && entry->value);
1384            entry++)
1385         {
1386           if (! entry->symbol[0])
1387             {
1388               idx_t slen = strcomma ? strcomma - str : strlen (str);
1389               diagnose (0, "%s: %s", _(error_msgid),
1390                         quotearg_n_style_mem (0, locale_quoting_style,
1391                                               str, slen));
1392               usage (EXIT_FAILURE);
1393             }
1394         }
1395 
1396       if (exclusive)
1397         value = entry->value;
1398       else
1399         value |= entry->value;
1400       if (!strcomma)
1401         break;
1402       str = strcomma + 1;
1403     }
1404 
1405   return value;
1406 }
1407 
1408 /* Return the value of STR, interpreted as a non-negative decimal integer,
1409    optionally multiplied by various values.
1410    Set *INVALID to an appropriate error value and return INTMAX_MAX if
1411    it is an overflow, an indeterminate value if some other error occurred.  */
1412 
1413 static intmax_t
parse_integer(char const * str,strtol_error * invalid)1414 parse_integer (char const *str, strtol_error *invalid)
1415 {
1416   /* Call xstrtoumax, not xstrtoimax, since we don't want to
1417      allow strings like " -0".  Initialize N to an indeterminate value;
1418      calling code should not rely on this function returning 0
1419      when *INVALID represents a non-overflow error.  */
1420   int indeterminate = 0;
1421   uintmax_t n = indeterminate;
1422   char *suffix;
1423   static char const suffixes[] = "bcEGkKMPQRTwYZ0";
1424   strtol_error e = xstrtoumax (str, &suffix, 10, &n, suffixes);
1425   intmax_t result;
1426 
1427   if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR
1428       && *suffix == 'B' && str < suffix && suffix[-1] != 'B')
1429     {
1430       suffix++;
1431       if (!*suffix)
1432         e &= ~LONGINT_INVALID_SUFFIX_CHAR;
1433     }
1434 
1435   if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR
1436       && *suffix == 'x')
1437     {
1438       strtol_error f = LONGINT_OK;
1439       intmax_t o = parse_integer (suffix + 1, &f);
1440       if ((f & ~LONGINT_OVERFLOW) != LONGINT_OK)
1441         {
1442           e = f;
1443           result = indeterminate;
1444         }
1445       else if (ckd_mul (&result, n, o)
1446                || (result != 0 && ((e | f) & LONGINT_OVERFLOW)))
1447         {
1448           e = LONGINT_OVERFLOW;
1449           result = INTMAX_MAX;
1450         }
1451       else
1452         {
1453           if (result == 0 && STRPREFIX (str, "0x"))
1454             diagnose (0, _("warning: %s is a zero multiplier; "
1455                            "use %s if that is intended"),
1456                       quote_n (0, "0x"), quote_n (1, "00x"));
1457           e = LONGINT_OK;
1458         }
1459     }
1460   else if (n <= INTMAX_MAX)
1461     result = n;
1462   else
1463     {
1464       e = LONGINT_OVERFLOW;
1465       result = INTMAX_MAX;
1466     }
1467 
1468   *invalid = e;
1469   return result;
1470 }
1471 
1472 /* OPERAND is of the form "X=...".  Return true if X is NAME.  */
1473 
1474 ATTRIBUTE_PURE
1475 static bool
operand_is(char const * operand,char const * name)1476 operand_is (char const *operand, char const *name)
1477 {
1478   return operand_matches (operand, name, '=');
1479 }
1480 
1481 static void
scanargs(int argc,char * const * argv)1482 scanargs (int argc, char *const *argv)
1483 {
1484   idx_t blocksize = 0;
1485   intmax_t count = INTMAX_MAX;
1486   intmax_t skip = 0;
1487   intmax_t seek = 0;
1488   bool count_B = false, skip_B = false, seek_B = false;
1489 
1490   for (int i = optind; i < argc; i++)
1491     {
1492       char const *name = argv[i];
1493       char const *val = strchr (name, '=');
1494 
1495       if (val == nullptr)
1496         {
1497           diagnose (0, _("unrecognized operand %s"), quoteaf (name));
1498           usage (EXIT_FAILURE);
1499         }
1500       val++;
1501 
1502       if (operand_is (name, "if"))
1503         input_file = val;
1504       else if (operand_is (name, "of"))
1505         output_file = val;
1506       else if (operand_is (name, "conv"))
1507         conversions_mask |= parse_symbols (val, conversions, false,
1508                                            N_("invalid conversion"));
1509       else if (operand_is (name, "iflag"))
1510         input_flags |= parse_symbols (val, flags, false,
1511                                       N_("invalid input flag"));
1512       else if (operand_is (name, "oflag"))
1513         output_flags |= parse_symbols (val, flags, false,
1514                                        N_("invalid output flag"));
1515       else if (operand_is (name, "status"))
1516         status_level = parse_symbols (val, statuses, true,
1517                                       N_("invalid status level"));
1518       else
1519         {
1520           strtol_error invalid = LONGINT_OK;
1521           intmax_t n = parse_integer (val, &invalid);
1522           bool has_B = !!strchr (val, 'B');
1523           intmax_t n_min = 0;
1524           intmax_t n_max = INTMAX_MAX;
1525           idx_t *converted_idx = nullptr;
1526 
1527           /* Maximum blocksize.  Keep it smaller than IDX_MAX, so that
1528              it fits into blocksize vars even if 1 is added for conv=swab.
1529              Do not exceed SSIZE_MAX, for the benefit of system calls
1530              like "read".  And do not exceed OFF_T_MAX, for the
1531              benefit of the large-offset seek code.  */
1532           idx_t max_blocksize = MIN (IDX_MAX - 1, MIN (SSIZE_MAX, OFF_T_MAX));
1533 
1534           if (operand_is (name, "ibs"))
1535             {
1536               n_min = 1;
1537               n_max = max_blocksize;
1538               converted_idx = &input_blocksize;
1539             }
1540           else if (operand_is (name, "obs"))
1541             {
1542               n_min = 1;
1543               n_max = max_blocksize;
1544               converted_idx = &output_blocksize;
1545             }
1546           else if (operand_is (name, "bs"))
1547             {
1548               n_min = 1;
1549               n_max = max_blocksize;
1550               converted_idx = &blocksize;
1551             }
1552           else if (operand_is (name, "cbs"))
1553             {
1554               n_min = 1;
1555               n_max = MIN (SIZE_MAX, IDX_MAX);
1556               converted_idx = &conversion_blocksize;
1557             }
1558           else if (operand_is (name, "skip") || operand_is (name, "iseek"))
1559             {
1560               skip = n;
1561               skip_B = has_B;
1562             }
1563           else if (operand_is (name + (*name == 'o'), "seek"))
1564             {
1565               seek = n;
1566               seek_B = has_B;
1567             }
1568           else if (operand_is (name, "count"))
1569             {
1570               count = n;
1571               count_B = has_B;
1572             }
1573           else
1574             {
1575               diagnose (0, _("unrecognized operand %s"), quoteaf (name));
1576               usage (EXIT_FAILURE);
1577             }
1578 
1579           if (n < n_min)
1580             invalid = LONGINT_INVALID;
1581           else if (n_max < n)
1582             invalid = LONGINT_OVERFLOW;
1583 
1584           if (invalid != LONGINT_OK)
1585             error (EXIT_FAILURE, invalid == LONGINT_OVERFLOW ? EOVERFLOW : 0,
1586                    "%s: %s", _("invalid number"), quoteaf (val));
1587           else if (converted_idx)
1588             *converted_idx = n;
1589         }
1590     }
1591 
1592   if (blocksize)
1593     input_blocksize = output_blocksize = blocksize;
1594   else
1595     {
1596       /* POSIX says dd aggregates partial reads into
1597          output_blocksize if bs= is not specified.  */
1598       conversions_mask |= C_TWOBUFS;
1599     }
1600 
1601   if (input_blocksize == 0)
1602     input_blocksize = DEFAULT_BLOCKSIZE;
1603   if (output_blocksize == 0)
1604     output_blocksize = DEFAULT_BLOCKSIZE;
1605   if (conversion_blocksize == 0)
1606     conversions_mask &= ~(C_BLOCK | C_UNBLOCK);
1607 
1608   if (input_flags & (O_DSYNC | O_SYNC))
1609     input_flags |= O_RSYNC;
1610 
1611   if (output_flags & O_FULLBLOCK)
1612     {
1613       diagnose (0, "%s: %s", _("invalid output flag"), quote ("fullblock"));
1614       usage (EXIT_FAILURE);
1615     }
1616 
1617   if (skip_B)
1618     input_flags |= O_SKIP_BYTES;
1619   if (input_flags & O_SKIP_BYTES && skip != 0)
1620     {
1621       skip_records = skip / input_blocksize;
1622       skip_bytes = skip % input_blocksize;
1623     }
1624   else if (skip != 0)
1625     skip_records = skip;
1626 
1627   if (count_B)
1628     input_flags |= O_COUNT_BYTES;
1629   if (input_flags & O_COUNT_BYTES && count != INTMAX_MAX)
1630     {
1631       max_records = count / input_blocksize;
1632       max_bytes = count % input_blocksize;
1633     }
1634   else if (count != INTMAX_MAX)
1635     max_records = count;
1636 
1637   if (seek_B)
1638     output_flags |= O_SEEK_BYTES;
1639   if (output_flags & O_SEEK_BYTES && seek != 0)
1640     {
1641       seek_records = seek / output_blocksize;
1642       seek_bytes = seek % output_blocksize;
1643     }
1644   else if (seek != 0)
1645     seek_records = seek;
1646 
1647   /* Warn about partial reads if bs=SIZE is given and iflag=fullblock
1648      is not, and if counting or skipping bytes or using direct I/O.
1649      This helps to avoid confusion with miscounts, and to avoid issues
1650      with direct I/O on GNU/Linux.  */
1651   warn_partial_read =
1652     (! (conversions_mask & C_TWOBUFS) && ! (input_flags & O_FULLBLOCK)
1653      && (skip_records
1654          || (0 < max_records && max_records < INTMAX_MAX)
1655          || (input_flags | output_flags) & O_DIRECT));
1656 
1657   iread_fnc = ((input_flags & O_FULLBLOCK)
1658                ? iread_fullblock
1659                : iread);
1660   input_flags &= ~O_FULLBLOCK;
1661 
1662   if (multiple_bits_set (conversions_mask & (C_ASCII | C_EBCDIC | C_IBM)))
1663     error (EXIT_FAILURE, 0, _("cannot combine any two of {ascii,ebcdic,ibm}"));
1664   if (multiple_bits_set (conversions_mask & (C_BLOCK | C_UNBLOCK)))
1665     error (EXIT_FAILURE, 0, _("cannot combine block and unblock"));
1666   if (multiple_bits_set (conversions_mask & (C_LCASE | C_UCASE)))
1667     error (EXIT_FAILURE, 0, _("cannot combine lcase and ucase"));
1668   if (multiple_bits_set (conversions_mask & (C_EXCL | C_NOCREAT)))
1669     error (EXIT_FAILURE, 0, _("cannot combine excl and nocreat"));
1670   if (multiple_bits_set (input_flags & (O_DIRECT | O_NOCACHE))
1671       || multiple_bits_set (output_flags & (O_DIRECT | O_NOCACHE)))
1672     error (EXIT_FAILURE, 0, _("cannot combine direct and nocache"));
1673 
1674   if (input_flags & O_NOCACHE)
1675     {
1676       i_nocache = true;
1677       i_nocache_eof = (max_records == 0 && max_bytes == 0);
1678       input_flags &= ~O_NOCACHE;
1679     }
1680   if (output_flags & O_NOCACHE)
1681     {
1682       o_nocache = true;
1683       o_nocache_eof = (max_records == 0 && max_bytes == 0);
1684       output_flags &= ~O_NOCACHE;
1685     }
1686 }
1687 
1688 /* Fix up translation table. */
1689 
1690 static void
apply_translations(void)1691 apply_translations (void)
1692 {
1693   int i;
1694 
1695   if (conversions_mask & C_ASCII)
1696     translate_charset (ebcdic_to_ascii);
1697 
1698   if (conversions_mask & C_UCASE)
1699     {
1700       for (i = 0; i < 256; i++)
1701         trans_table[i] = toupper (trans_table[i]);
1702       translation_needed = true;
1703     }
1704   else if (conversions_mask & C_LCASE)
1705     {
1706       for (i = 0; i < 256; i++)
1707         trans_table[i] = tolower (trans_table[i]);
1708       translation_needed = true;
1709     }
1710 
1711   if (conversions_mask & C_EBCDIC)
1712     {
1713       translate_charset (ascii_to_ebcdic);
1714       newline_character = ascii_to_ebcdic['\n'];
1715       space_character = ascii_to_ebcdic[' '];
1716     }
1717   else if (conversions_mask & C_IBM)
1718     {
1719       translate_charset (ascii_to_ibm);
1720       newline_character = ascii_to_ibm['\n'];
1721       space_character = ascii_to_ibm[' '];
1722     }
1723 }
1724 
1725 /* Apply the character-set translations specified by the user
1726    to the NREAD bytes in BUF.  */
1727 
1728 static void
translate_buffer(char * buf,idx_t nread)1729 translate_buffer (char *buf, idx_t nread)
1730 {
1731   idx_t i;
1732   char *cp;
1733   for (i = nread, cp = buf; i; i--, cp++)
1734     *cp = trans_table[to_uchar (*cp)];
1735 }
1736 
1737 /* Swap *NREAD bytes in BUF, which should have room for an extra byte
1738    after the end because the swapping is not in-place.  If *SAVED_BYTE
1739    is nonnegative, also swap that initial byte from the previous call.
1740    Save the last byte into into *SAVED_BYTE if needed to make the
1741    resulting *NREAD even, and set *SAVED_BYTE to -1 otherwise.
1742    Return the buffer's adjusted start, either BUF or BUF + 1.  */
1743 
1744 static char *
swab_buffer(char * buf,idx_t * nread,int * saved_byte)1745 swab_buffer (char *buf, idx_t *nread, int *saved_byte)
1746 {
1747   if (*nread == 0)
1748     return buf;
1749 
1750   /* Update *SAVED_BYTE, and set PREV_SAVED to its old value.  */
1751   int prev_saved = *saved_byte;
1752   if ((prev_saved < 0) == (*nread & 1))
1753     {
1754       unsigned char c = buf[--*nread];
1755       *saved_byte = c;
1756     }
1757   else
1758     *saved_byte = -1;
1759 
1760   /* Do the byte-swapping by moving every other byte two
1761      positions toward the end, working from the end of the buffer
1762      toward the beginning.  This way we move only half the data.  */
1763   for (idx_t i = *nread; 1 < i; i -= 2)
1764     buf[i] = buf[i - 2];
1765 
1766   if (prev_saved < 0)
1767     return buf + 1;
1768 
1769   buf[1] = prev_saved;
1770   ++*nread;
1771   return buf;
1772 }
1773 
1774 /* Add OFFSET to the input offset, setting the overflow flag if
1775    necessary.  */
1776 
1777 static void
advance_input_offset(intmax_t offset)1778 advance_input_offset (intmax_t offset)
1779 {
1780   if (0 <= input_offset && ckd_add (&input_offset, input_offset, offset))
1781     input_offset = -1;
1782 }
1783 
1784 /* Throw away RECORDS blocks of BLOCKSIZE bytes plus BYTES bytes on
1785    file descriptor FDESC, which is open with read permission for FILE.
1786    Store up to BLOCKSIZE bytes of the data at a time in IBUF or OBUF, if
1787    necessary. RECORDS or BYTES must be nonzero. If FDESC is
1788    STDIN_FILENO, advance the input offset. Return the number of
1789    records remaining, i.e., that were not skipped because EOF was
1790    reached.  If FDESC is STDOUT_FILENO, on return, BYTES is the
1791    remaining bytes in addition to the remaining records.  */
1792 
1793 static intmax_t
skip(int fdesc,char const * file,intmax_t records,idx_t blocksize,idx_t * bytes)1794 skip (int fdesc, char const *file, intmax_t records, idx_t blocksize,
1795       idx_t *bytes)
1796 {
1797   /* Try lseek and if an error indicates it was an inappropriate operation --
1798      or if the file offset is not representable as an off_t --
1799      fall back on using read.  */
1800 
1801   errno = 0;
1802   off_t offset;
1803   if (! ckd_mul (&offset, records, blocksize)
1804       && ! ckd_add (&offset, offset, *bytes)
1805       && 0 <= lseek (fdesc, offset, SEEK_CUR))
1806     {
1807       if (fdesc == STDIN_FILENO)
1808         {
1809            struct stat st;
1810            if (ifstat (STDIN_FILENO, &st) != 0)
1811              error (EXIT_FAILURE, errno, _("cannot fstat %s"), quoteaf (file));
1812            if (usable_st_size (&st) && 0 <= input_offset
1813                && st.st_size - input_offset < offset)
1814              {
1815                /* When skipping past EOF, return the number of _full_ blocks
1816                 * that are not skipped, and set offset to EOF, so the caller
1817                 * can determine the requested skip was not satisfied.  */
1818                records = ( offset - st.st_size ) / blocksize;
1819                offset = st.st_size - input_offset;
1820              }
1821            else
1822              records = 0;
1823            advance_input_offset (offset);
1824         }
1825       else
1826         {
1827           records = 0;
1828           *bytes = 0;
1829         }
1830       return records;
1831     }
1832   else
1833     {
1834       int lseek_errno = errno;
1835 
1836       /* The seek request may have failed above if it was too big
1837          (> device size, > max file size, etc.)
1838          Or it may not have been done at all (> OFF_T_MAX).
1839          Therefore try to seek to the end of the file,
1840          to avoid redundant reading.  */
1841       if (lseek (fdesc, 0, SEEK_END) >= 0)
1842         {
1843           /* File is seekable, and we're at the end of it, and
1844              size <= OFF_T_MAX. So there's no point using read to advance.  */
1845 
1846           if (!lseek_errno)
1847             {
1848               /* The original seek was not attempted as offset > OFF_T_MAX.
1849                  We should error for write as can't get to the desired
1850                  location, even if OFF_T_MAX < max file size.
1851                  For read we're not going to read any data anyway,
1852                  so we should error for consistency.
1853                  It would be nice to not error for /dev/{zero,null}
1854                  for any offset, but that's not a significant issue.  */
1855               lseek_errno = EOVERFLOW;
1856             }
1857 
1858           diagnose (lseek_errno,
1859                     gettext (fdesc == STDIN_FILENO
1860                              ? N_("%s: cannot skip")
1861                              : N_("%s: cannot seek")),
1862                     quotef (file));
1863           /* If the file has a specific size and we've asked
1864              to skip/seek beyond the max allowable, then quit.  */
1865           quit (EXIT_FAILURE);
1866         }
1867       /* else file_size && offset > OFF_T_MAX or file ! seekable */
1868 
1869       char *buf;
1870       if (fdesc == STDIN_FILENO)
1871         {
1872           alloc_ibuf ();
1873           buf = ibuf;
1874         }
1875       else
1876         {
1877           alloc_obuf ();
1878           buf = obuf;
1879         }
1880 
1881       do
1882         {
1883           ssize_t nread = iread_fnc (fdesc, buf, records ? blocksize : *bytes);
1884           if (nread < 0)
1885             {
1886               if (fdesc == STDIN_FILENO)
1887                 {
1888                   diagnose (errno, _("error reading %s"), quoteaf (file));
1889                   if (conversions_mask & C_NOERROR)
1890                     print_stats ();
1891                 }
1892               else
1893                 diagnose (lseek_errno, _("%s: cannot seek"), quotef (file));
1894               quit (EXIT_FAILURE);
1895             }
1896           else if (nread == 0)
1897             break;
1898           else if (fdesc == STDIN_FILENO)
1899             advance_input_offset (nread);
1900 
1901           if (records != 0)
1902             records--;
1903           else
1904             *bytes = 0;
1905         }
1906       while (records || *bytes);
1907 
1908       return records;
1909     }
1910 }
1911 
1912 /* Advance the input by NBYTES if possible, after a read error.
1913    The input file offset may or may not have advanced after the failed
1914    read; adjust it to point just after the bad record regardless.
1915    Return true if successful, or if the input is already known to not
1916    be seekable.  */
1917 
1918 static bool
advance_input_after_read_error(idx_t nbytes)1919 advance_input_after_read_error (idx_t nbytes)
1920 {
1921   if (! input_seekable)
1922     {
1923       if (input_seek_errno == ESPIPE)
1924         return true;
1925       errno = input_seek_errno;
1926     }
1927   else
1928     {
1929       off_t offset;
1930       advance_input_offset (nbytes);
1931       if (input_offset < 0)
1932         {
1933           diagnose (0, _("offset overflow while reading file %s"),
1934                     quoteaf (input_file));
1935           return false;
1936         }
1937       offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1938       if (0 <= offset)
1939         {
1940           off_t diff;
1941           if (offset == input_offset)
1942             return true;
1943           diff = input_offset - offset;
1944           if (! (0 <= diff && diff <= nbytes) && status_level != STATUS_NONE)
1945             diagnose (0, _("warning: invalid file offset after failed read"));
1946           if (0 <= lseek (STDIN_FILENO, diff, SEEK_CUR))
1947             return true;
1948           if (errno == 0)
1949             diagnose (0, _("cannot work around kernel bug after all"));
1950         }
1951     }
1952 
1953   diagnose (errno, _("%s: cannot seek"), quotef (input_file));
1954   return false;
1955 }
1956 
1957 /* Copy NREAD bytes of BUF, with no conversions.  */
1958 
1959 static void
copy_simple(char const * buf,idx_t nread)1960 copy_simple (char const *buf, idx_t nread)
1961 {
1962   char const *start = buf;	/* First uncopied char in BUF.  */
1963 
1964   do
1965     {
1966       idx_t nfree = MIN (nread, output_blocksize - oc);
1967 
1968       memcpy (obuf + oc, start, nfree);
1969 
1970       nread -= nfree;		/* Update the number of bytes left to copy. */
1971       start += nfree;
1972       oc += nfree;
1973       if (oc >= output_blocksize)
1974         write_output ();
1975     }
1976   while (nread != 0);
1977 }
1978 
1979 /* Copy NREAD bytes of BUF, doing conv=block
1980    (pad newline-terminated records to 'conversion_blocksize',
1981    replacing the newline with trailing spaces).  */
1982 
1983 static void
copy_with_block(char const * buf,idx_t nread)1984 copy_with_block (char const *buf, idx_t nread)
1985 {
1986   for (idx_t i = nread; i; i--, buf++)
1987     {
1988       if (*buf == newline_character)
1989         {
1990           if (col < conversion_blocksize)
1991             {
1992               idx_t j;
1993               for (j = col; j < conversion_blocksize; j++)
1994                 output_char (space_character);
1995             }
1996           col = 0;
1997         }
1998       else
1999         {
2000           if (col == conversion_blocksize)
2001             r_truncate++;
2002           else if (col < conversion_blocksize)
2003             output_char (*buf);
2004           col++;
2005         }
2006     }
2007 }
2008 
2009 /* Copy NREAD bytes of BUF, doing conv=unblock
2010    (replace trailing spaces in 'conversion_blocksize'-sized records
2011    with a newline).  */
2012 
2013 static void
copy_with_unblock(char const * buf,idx_t nread)2014 copy_with_unblock (char const *buf, idx_t nread)
2015 {
2016   static idx_t pending_spaces = 0;
2017 
2018   for (idx_t i = 0; i < nread; i++)
2019     {
2020       char c = buf[i];
2021 
2022       if (col++ >= conversion_blocksize)
2023         {
2024           col = pending_spaces = 0; /* Wipe out any pending spaces.  */
2025           i--;			/* Push the char back; get it later. */
2026           output_char (newline_character);
2027         }
2028       else if (c == space_character)
2029         pending_spaces++;
2030       else
2031         {
2032           /* 'c' is the character after a run of spaces that were not
2033              at the end of the conversion buffer.  Output them.  */
2034           while (pending_spaces)
2035             {
2036               output_char (space_character);
2037               --pending_spaces;
2038             }
2039           output_char (c);
2040         }
2041     }
2042 }
2043 
2044 /* Set the file descriptor flags for FD that correspond to the nonzero bits
2045    in ADD_FLAGS.  The file's name is NAME.  */
2046 
2047 static void
set_fd_flags(int fd,int add_flags,char const * name)2048 set_fd_flags (int fd, int add_flags, char const *name)
2049 {
2050   /* Ignore file creation flags that are no-ops on file descriptors.  */
2051   add_flags &= ~ (O_NOCTTY | O_NOFOLLOW);
2052 
2053   if (add_flags)
2054     {
2055       int old_flags = fcntl (fd, F_GETFL);
2056       int new_flags = old_flags | add_flags;
2057       bool ok = true;
2058       if (old_flags < 0)
2059         ok = false;
2060       else if (old_flags != new_flags)
2061         {
2062           if (new_flags & (O_DIRECTORY | O_NOLINKS))
2063             {
2064               /* NEW_FLAGS contains at least one file creation flag that
2065                  requires some checking of the open file descriptor.  */
2066               struct stat st;
2067               if (ifstat (fd, &st) != 0)
2068                 ok = false;
2069               else if ((new_flags & O_DIRECTORY) && ! S_ISDIR (st.st_mode))
2070                 {
2071                   errno = ENOTDIR;
2072                   ok = false;
2073                 }
2074               else if ((new_flags & O_NOLINKS) && 1 < st.st_nlink)
2075                 {
2076                   errno = EMLINK;
2077                   ok = false;
2078                 }
2079               new_flags &= ~ (O_DIRECTORY | O_NOLINKS);
2080             }
2081 
2082           if (ok && old_flags != new_flags
2083               && fcntl (fd, F_SETFL, new_flags) == -1)
2084             ok = false;
2085         }
2086 
2087       if (!ok)
2088         error (EXIT_FAILURE, errno, _("setting flags for %s"), quoteaf (name));
2089     }
2090 }
2091 
2092 /* The main loop.  */
2093 
2094 static int
dd_copy(void)2095 dd_copy (void)
2096 {
2097   char *bufstart;		/* Input buffer. */
2098   ssize_t nread;		/* Bytes read in the current block.  */
2099 
2100   /* If nonzero, then the previously read block was partial and
2101      PARTREAD was its size.  */
2102   idx_t partread = 0;
2103 
2104   int exit_status = EXIT_SUCCESS;
2105   idx_t n_bytes_read;
2106 
2107   if (skip_records != 0 || skip_bytes != 0)
2108     {
2109       intmax_t us_bytes;
2110       bool us_bytes_overflow =
2111         (ckd_mul (&us_bytes, skip_records, input_blocksize)
2112          || ckd_add (&us_bytes, skip_bytes, us_bytes));
2113       off_t input_offset0 = input_offset;
2114       intmax_t us_blocks = skip (STDIN_FILENO, input_file,
2115                                  skip_records, input_blocksize, &skip_bytes);
2116 
2117       /* POSIX doesn't say what to do when dd detects it has been
2118          asked to skip past EOF, so I assume it's non-fatal.
2119          There are 3 reasons why there might be unskipped blocks/bytes:
2120              1. file is too small
2121              2. pipe has not enough data
2122              3. partial reads  */
2123       if ((us_blocks
2124            || (0 <= input_offset
2125                && (us_bytes_overflow
2126                    || us_bytes != input_offset - input_offset0)))
2127           && status_level != STATUS_NONE)
2128         {
2129           diagnose (0, _("%s: cannot skip to specified offset"),
2130                     quotef (input_file));
2131         }
2132     }
2133 
2134   if (seek_records != 0 || seek_bytes != 0)
2135     {
2136       idx_t bytes = seek_bytes;
2137       intmax_t write_records = skip (STDOUT_FILENO, output_file,
2138                                       seek_records, output_blocksize, &bytes);
2139 
2140       if (write_records != 0 || bytes != 0)
2141         {
2142           memset (obuf, 0, write_records ? output_blocksize : bytes);
2143 
2144           do
2145             {
2146               idx_t size = write_records ? output_blocksize : bytes;
2147               if (iwrite (STDOUT_FILENO, obuf, size) != size)
2148                 {
2149                   diagnose (errno, _("writing to %s"), quoteaf (output_file));
2150                   quit (EXIT_FAILURE);
2151                 }
2152 
2153               if (write_records != 0)
2154                 write_records--;
2155               else
2156                 bytes = 0;
2157             }
2158           while (write_records || bytes);
2159         }
2160     }
2161 
2162   if (max_records == 0 && max_bytes == 0)
2163     return exit_status;
2164 
2165   alloc_ibuf ();
2166   alloc_obuf ();
2167   int saved_byte = -1;
2168 
2169   while (true)
2170     {
2171       if (status_level == STATUS_PROGRESS)
2172         {
2173           xtime_t progress_time = gethrxtime ();
2174           if (next_time <= progress_time)
2175             {
2176               print_xfer_stats (progress_time);
2177               next_time += XTIME_PRECISION;
2178             }
2179         }
2180 
2181       if (r_partial + r_full >= max_records + !!max_bytes)
2182         break;
2183 
2184       /* Zero the buffer before reading, so that if we get a read error,
2185          whatever data we are able to read is followed by zeros.
2186          This minimizes data loss. */
2187       if ((conversions_mask & C_SYNC) && (conversions_mask & C_NOERROR))
2188         memset (ibuf,
2189                 (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0',
2190                 input_blocksize);
2191 
2192       if (r_partial + r_full >= max_records)
2193         nread = iread_fnc (STDIN_FILENO, ibuf, max_bytes);
2194       else
2195         nread = iread_fnc (STDIN_FILENO, ibuf, input_blocksize);
2196 
2197       if (nread > 0)
2198         {
2199           advance_input_offset (nread);
2200           if (i_nocache)
2201             invalidate_cache (STDIN_FILENO, nread);
2202         }
2203       else if (nread == 0)
2204         {
2205           i_nocache_eof |= i_nocache;
2206           o_nocache_eof |= o_nocache && ! (conversions_mask & C_NOTRUNC);
2207           break;			/* EOF.  */
2208         }
2209       else
2210         {
2211           if (!(conversions_mask & C_NOERROR) || status_level != STATUS_NONE)
2212             diagnose (errno, _("error reading %s"), quoteaf (input_file));
2213 
2214           if (conversions_mask & C_NOERROR)
2215             {
2216               print_stats ();
2217               idx_t bad_portion = input_blocksize - partread;
2218 
2219               /* We already know this data is not cached,
2220                  but call this so that correct offsets are maintained.  */
2221               invalidate_cache (STDIN_FILENO, bad_portion);
2222 
2223               /* Seek past the bad block if possible. */
2224               if (!advance_input_after_read_error (bad_portion))
2225                 {
2226                   exit_status = EXIT_FAILURE;
2227 
2228                   /* Suppress duplicate diagnostics.  */
2229                   input_seekable = false;
2230                   input_seek_errno = ESPIPE;
2231                 }
2232               if ((conversions_mask & C_SYNC) && !partread)
2233                 /* Replace the missing input with null bytes and
2234                    proceed normally.  */
2235                 nread = 0;
2236               else
2237                 continue;
2238             }
2239           else
2240             {
2241               /* Write any partial block. */
2242               exit_status = EXIT_FAILURE;
2243               break;
2244             }
2245         }
2246 
2247       n_bytes_read = nread;
2248 
2249       if (n_bytes_read < input_blocksize)
2250         {
2251           r_partial++;
2252           partread = n_bytes_read;
2253           if (conversions_mask & C_SYNC)
2254             {
2255               if (!(conversions_mask & C_NOERROR))
2256                 /* If C_NOERROR, we zeroed the block before reading. */
2257                 memset (ibuf + n_bytes_read,
2258                         (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0',
2259                         input_blocksize - n_bytes_read);
2260               n_bytes_read = input_blocksize;
2261             }
2262         }
2263       else
2264         {
2265           r_full++;
2266           partread = 0;
2267         }
2268 
2269       if (ibuf == obuf)		/* If not C_TWOBUFS. */
2270         {
2271           idx_t nwritten = iwrite (STDOUT_FILENO, obuf, n_bytes_read);
2272           w_bytes += nwritten;
2273           if (nwritten != n_bytes_read)
2274             {
2275               diagnose (errno, _("error writing %s"), quoteaf (output_file));
2276               return EXIT_FAILURE;
2277             }
2278           else if (n_bytes_read == input_blocksize)
2279             w_full++;
2280           else
2281             w_partial++;
2282           continue;
2283         }
2284 
2285       /* Do any translations on the whole buffer at once.  */
2286 
2287       if (translation_needed)
2288         translate_buffer (ibuf, n_bytes_read);
2289 
2290       if (conversions_mask & C_SWAB)
2291         bufstart = swab_buffer (ibuf, &n_bytes_read, &saved_byte);
2292       else
2293         bufstart = ibuf;
2294 
2295       if (conversions_mask & C_BLOCK)
2296         copy_with_block (bufstart, n_bytes_read);
2297       else if (conversions_mask & C_UNBLOCK)
2298         copy_with_unblock (bufstart, n_bytes_read);
2299       else
2300         copy_simple (bufstart, n_bytes_read);
2301     }
2302 
2303   /* If we have a char left as a result of conv=swab, output it.  */
2304   if (0 <= saved_byte)
2305     {
2306       char saved_char = saved_byte;
2307       if (conversions_mask & C_BLOCK)
2308         copy_with_block (&saved_char, 1);
2309       else if (conversions_mask & C_UNBLOCK)
2310         copy_with_unblock (&saved_char, 1);
2311       else
2312         output_char (saved_char);
2313     }
2314 
2315   if ((conversions_mask & C_BLOCK) && col > 0)
2316     {
2317       /* If the final input line didn't end with a '\n', pad
2318          the output block to 'conversion_blocksize' chars.  */
2319       for (idx_t i = col; i < conversion_blocksize; i++)
2320         output_char (space_character);
2321     }
2322 
2323   if (col && (conversions_mask & C_UNBLOCK))
2324     {
2325       /* If there was any output, add a final '\n'.  */
2326       output_char (newline_character);
2327     }
2328 
2329   /* Write out the last block. */
2330   if (oc != 0)
2331     {
2332       idx_t nwritten = iwrite (STDOUT_FILENO, obuf, oc);
2333       w_bytes += nwritten;
2334       if (nwritten != 0)
2335         w_partial++;
2336       if (nwritten != oc)
2337         {
2338           diagnose (errno, _("error writing %s"), quoteaf (output_file));
2339           return EXIT_FAILURE;
2340         }
2341     }
2342 
2343   /* If the last write was converted to a seek, then for a regular file
2344      or shared memory object, ftruncate to extend the size.  */
2345   if (final_op_was_seek)
2346     {
2347       struct stat stdout_stat;
2348       if (ifstat (STDOUT_FILENO, &stdout_stat) != 0)
2349         {
2350           diagnose (errno, _("cannot fstat %s"), quoteaf (output_file));
2351           return EXIT_FAILURE;
2352         }
2353       if (S_ISREG (stdout_stat.st_mode) || S_TYPEISSHM (&stdout_stat))
2354         {
2355           off_t output_offset = lseek (STDOUT_FILENO, 0, SEEK_CUR);
2356           if (0 <= output_offset && stdout_stat.st_size < output_offset)
2357             {
2358               if (iftruncate (STDOUT_FILENO, output_offset) != 0)
2359                 {
2360                   diagnose (errno, _("failed to truncate to %jd bytes"
2361                                      " in output file %s"),
2362                             (intmax_t) output_offset, quoteaf (output_file));
2363                   return EXIT_FAILURE;
2364                 }
2365             }
2366         }
2367     }
2368 
2369   /* fdatasync/fsync can take a long time, so issue a final progress
2370      indication now if progress has been made since the previous indication.  */
2371   if (conversions_mask & (C_FDATASYNC | C_FSYNC)
2372       && status_level == STATUS_PROGRESS
2373       && 0 <= reported_w_bytes && reported_w_bytes < w_bytes)
2374     print_xfer_stats (0);
2375 
2376   return exit_status;
2377 }
2378 
2379 /* Synchronize output according to conversions_mask.
2380    Do this even if w_bytes is zero, as fsync and fdatasync
2381    flush out write requests from other processes too.
2382    Clear bits in conversions_mask so that synchronization is done only once.
2383    Return zero if successful, an exit status otherwise.  */
2384 
2385 static int
synchronize_output(void)2386 synchronize_output (void)
2387 {
2388   int exit_status = 0;
2389   int mask = conversions_mask;
2390   conversions_mask &= ~ (C_FDATASYNC | C_FSYNC);
2391 
2392   if ((mask & C_FDATASYNC) && ifdatasync (STDOUT_FILENO) != 0)
2393     {
2394       if (errno != ENOSYS && errno != EINVAL)
2395         {
2396           diagnose (errno, _("fdatasync failed for %s"), quoteaf (output_file));
2397           exit_status = EXIT_FAILURE;
2398         }
2399       mask |= C_FSYNC;
2400     }
2401 
2402   if ((mask & C_FSYNC) && ifsync (STDOUT_FILENO) != 0)
2403     {
2404       diagnose (errno, _("fsync failed for %s"), quoteaf (output_file));
2405       return EXIT_FAILURE;
2406     }
2407 
2408   return exit_status;
2409 }
2410 
2411 int
main(int argc,char ** argv)2412 main (int argc, char **argv)
2413 {
2414   int i;
2415   int exit_status;
2416   off_t offset;
2417 
2418   install_signal_handlers ();
2419 
2420   initialize_main (&argc, &argv);
2421   set_program_name (argv[0]);
2422   setlocale (LC_ALL, "");
2423   bindtextdomain (PACKAGE, LOCALEDIR);
2424   textdomain (PACKAGE);
2425 
2426   /* Arrange to close stdout if parse_long_options exits.  */
2427   atexit (maybe_close_stdout);
2428 
2429   page_size = getpagesize ();
2430 
2431   parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE, Version,
2432                                    true, usage, AUTHORS,
2433                                    (char const *) nullptr);
2434   close_stdout_required = false;
2435 
2436   /* Initialize translation table to identity translation. */
2437   for (i = 0; i < 256; i++)
2438     trans_table[i] = i;
2439 
2440   /* Decode arguments. */
2441   scanargs (argc, argv);
2442 
2443   apply_translations ();
2444 
2445   if (input_file == nullptr)
2446     {
2447       input_file = _("standard input");
2448       set_fd_flags (STDIN_FILENO, input_flags, input_file);
2449     }
2450   else
2451     {
2452       if (ifd_reopen (STDIN_FILENO, input_file, O_RDONLY | input_flags, 0) < 0)
2453         error (EXIT_FAILURE, errno, _("failed to open %s"),
2454                quoteaf (input_file));
2455     }
2456 
2457   offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
2458   input_seekable = (0 <= offset);
2459   input_offset = MAX (0, offset);
2460   input_seek_errno = errno;
2461 
2462   if (output_file == nullptr)
2463     {
2464       output_file = _("standard output");
2465       set_fd_flags (STDOUT_FILENO, output_flags, output_file);
2466     }
2467   else
2468     {
2469       mode_t perms = MODE_RW_UGO;
2470       int opts
2471         = (output_flags
2472            | (conversions_mask & C_NOCREAT ? 0 : O_CREAT)
2473            | (conversions_mask & C_EXCL ? O_EXCL : 0)
2474            | (seek_records || (conversions_mask & C_NOTRUNC) ? 0 : O_TRUNC));
2475 
2476       off_t size;
2477       if ((ckd_mul (&size, seek_records, output_blocksize)
2478            || ckd_add (&size, seek_bytes, size))
2479           && !(conversions_mask & C_NOTRUNC))
2480         error (EXIT_FAILURE, 0,
2481                _("offset too large: "
2482                  "cannot truncate to a length of seek=%jd"
2483                  " (%td-byte) blocks"),
2484                seek_records, output_blocksize);
2485 
2486       /* Open the output file with *read* access only if we might
2487          need to read to satisfy a 'seek=' request.  If we can't read
2488          the file, go ahead with write-only access; it might work.  */
2489       if ((! seek_records
2490            || ifd_reopen (STDOUT_FILENO, output_file, O_RDWR | opts, perms) < 0)
2491           && (ifd_reopen (STDOUT_FILENO, output_file, O_WRONLY | opts, perms)
2492               < 0))
2493         error (EXIT_FAILURE, errno, _("failed to open %s"),
2494                quoteaf (output_file));
2495 
2496       if (seek_records != 0 && !(conversions_mask & C_NOTRUNC))
2497         {
2498           if (iftruncate (STDOUT_FILENO, size) != 0)
2499             {
2500               /* Complain only when ftruncate fails on a regular file, a
2501                  directory, or a shared memory object, as POSIX 1003.1-2004
2502                  specifies ftruncate's behavior only for these file types.
2503                  For example, do not complain when Linux kernel 2.4 ftruncate
2504                  fails on /dev/fd0.  */
2505               int ftruncate_errno = errno;
2506               struct stat stdout_stat;
2507               if (ifstat (STDOUT_FILENO, &stdout_stat) != 0)
2508                 {
2509                   diagnose (errno, _("cannot fstat %s"), quoteaf (output_file));
2510                   exit_status = EXIT_FAILURE;
2511                 }
2512               else if (S_ISREG (stdout_stat.st_mode)
2513                        || S_ISDIR (stdout_stat.st_mode)
2514                        || S_TYPEISSHM (&stdout_stat))
2515                 {
2516                   intmax_t isize = size;
2517                   diagnose (ftruncate_errno,
2518                             _("failed to truncate to %jd bytes"
2519                               " in output file %s"),
2520                             isize, quoteaf (output_file));
2521                   exit_status = EXIT_FAILURE;
2522                 }
2523             }
2524         }
2525     }
2526 
2527   start_time = gethrxtime ();
2528   next_time = start_time + XTIME_PRECISION;
2529 
2530   exit_status = dd_copy ();
2531 
2532   int sync_status = synchronize_output ();
2533   if (sync_status)
2534     exit_status = sync_status;
2535 
2536   if (max_records == 0 && max_bytes == 0)
2537     {
2538       /* Special case to invalidate cache to end of file.  */
2539       if (i_nocache && !invalidate_cache (STDIN_FILENO, 0))
2540         {
2541           diagnose (errno, _("failed to discard cache for: %s"),
2542                     quotef (input_file));
2543           exit_status = EXIT_FAILURE;
2544         }
2545       if (o_nocache && !invalidate_cache (STDOUT_FILENO, 0))
2546         {
2547           diagnose (errno, _("failed to discard cache for: %s"),
2548                     quotef (output_file));
2549           exit_status = EXIT_FAILURE;
2550         }
2551     }
2552   else
2553     {
2554       /* Invalidate any pending region or to EOF if appropriate.  */
2555       if (i_nocache || i_nocache_eof)
2556         invalidate_cache (STDIN_FILENO, 0);
2557       if (o_nocache || o_nocache_eof)
2558         invalidate_cache (STDOUT_FILENO, 0);
2559     }
2560 
2561   finish_up ();
2562   main_exit (exit_status);
2563 }
2564