1 /* shred.c - overwrite files and devices to make it harder to recover data
2 
3    Copyright (C) 1999-2023 Free Software Foundation, Inc.
4    Copyright (C) 1997, 1998, 1999 Colin Plumb.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.
18 
19    Written by Colin Plumb.  */
20 
21 /*
22  * Do a more secure overwrite of given files or devices, to make it harder
23  * for even very expensive hardware probing to recover the data.
24  *
25  * Although this process is also known as "wiping", I prefer the longer
26  * name both because I think it is more evocative of what is happening and
27  * because a longer name conveys a more appropriate sense of deliberateness.
28  *
29  * For the theory behind this, see "Secure Deletion of Data from Magnetic
30  * and Solid-State Memory", on line at
31  * https://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
32  *
33  * Just for the record, reversing one or two passes of disk overwrite
34  * is not terribly difficult with hardware help.  Hook up a good-quality
35  * digitizing oscilloscope to the output of the head preamplifier and copy
36  * the high-res digitized data to a computer for some off-line analysis.
37  * Read the "current" data and average all the pulses together to get an
38  * "average" pulse on the disk.  Subtract this average pulse from all of
39  * the actual pulses and you can clearly see the "echo" of the previous
40  * data on the disk.
41  *
42  * Real hard drives have to balance the cost of the media, the head,
43  * and the read circuitry.  They use better-quality media than absolutely
44  * necessary to limit the cost of the read circuitry.  By throwing that
45  * assumption out, and the assumption that you want the data processed
46  * as fast as the hard drive can spin, you can do better.
47  *
48  * If asked to wipe a file, this also unlinks it, renaming it in a
49  * clever way to try to leave no trace of the original filename.
50  *
51  * This was inspired by a desire to improve on some code titled:
52  * Wipe V1.0-- Overwrite and delete files.  S. 2/3/96
53  * but I've rewritten everything here so completely that no trace of
54  * the original remains.
55  *
56  * Thanks to:
57  * Bob Jenkins, for his good RNG work and patience with the FSF copyright
58  * paperwork.
59  * Jim Meyering, for his work merging this into the GNU fileutils while
60  * still letting me feel a sense of ownership and pride.  Getting me to
61  * tolerate the GNU brace style was quite a feat of diplomacy.
62  * Paul Eggert, for lots of useful discussion and code.  I disagree with
63  * an awful lot of his suggestions, but they're disagreements worth having.
64  *
65  * Things to think about:
66  * - Security: Is there any risk to the race
67  *   between overwriting and unlinking a file?  Will it do anything
68  *   drastically bad if told to attack a named pipe or socket?
69  */
70 
71 /* The official name of this program (e.g., no 'g' prefix).  */
72 #define PROGRAM_NAME "shred"
73 
74 #define AUTHORS proper_name ("Colin Plumb")
75 
76 #include <config.h>
77 
78 #include <getopt.h>
79 #include <stdio.h>
80 #include <setjmp.h>
81 #include <sys/types.h>
82 #if defined __linux__ && HAVE_SYS_MTIO_H
83 # include <sys/mtio.h>
84 #endif
85 
86 #include "system.h"
87 #include "alignalloc.h"
88 #include "argmatch.h"
89 #include "assure.h"
90 #include "xdectoint.h"
91 #include "fcntl--.h"
92 #include "human.h"
93 #include "randint.h"
94 #include "randread.h"
95 #include "renameatu.h"
96 #include "stat-size.h"
97 
98 /* Default number of times to overwrite.  */
99 enum { DEFAULT_PASSES = 3 };
100 
101 /* How many seconds to wait before checking whether to output another
102    verbose output line.  */
103 enum { VERBOSE_UPDATE = 5 };
104 
105 /* Sector size and corresponding mask, for recovering after write failures.
106    The size must be a power of 2.  */
107 enum { SECTOR_SIZE = 512 };
108 enum { SECTOR_MASK = SECTOR_SIZE - 1 };
109 static_assert (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0);
110 
111 enum remove_method
112 {
113   remove_none = 0,      /* the default: only wipe data.  */
114   remove_unlink,        /* don't obfuscate name, just unlink.  */
115   remove_wipe,          /* obfuscate name before unlink.  */
116   remove_wipesync       /* obfuscate name, syncing each byte, before unlink.  */
117 };
118 
119 static char const *const remove_args[] =
120 {
121   "unlink", "wipe", "wipesync", nullptr
122 };
123 
124 static enum remove_method const remove_methods[] =
125 {
126   remove_unlink, remove_wipe, remove_wipesync
127 };
128 
129 struct Options
130 {
131   bool force;		/* -f flag: chmod files if necessary */
132   size_t n_iterations;	/* -n flag: Number of iterations */
133   off_t size;		/* -s flag: size of file */
134   enum remove_method remove_file; /* -u flag: remove file after shredding */
135   bool verbose;		/* -v flag: Print progress */
136   bool exact;		/* -x flag: Do not round up file size */
137   bool zero_fill;	/* -z flag: Add a final zero pass */
138 };
139 
140 /* For long options that have no equivalent short option, use a
141    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
142 enum
143 {
144   RANDOM_SOURCE_OPTION = CHAR_MAX + 1
145 };
146 
147 static struct option const long_opts[] =
148 {
149   {"exact", no_argument, nullptr, 'x'},
150   {"force", no_argument, nullptr, 'f'},
151   {"iterations", required_argument, nullptr, 'n'},
152   {"size", required_argument, nullptr, 's'},
153   {"random-source", required_argument, nullptr, RANDOM_SOURCE_OPTION},
154   {"remove", optional_argument, nullptr, 'u'},
155   {"verbose", no_argument, nullptr, 'v'},
156   {"zero", no_argument, nullptr, 'z'},
157   {GETOPT_HELP_OPTION_DECL},
158   {GETOPT_VERSION_OPTION_DECL},
159   {nullptr, 0, nullptr, 0}
160 };
161 
162 void
usage(int status)163 usage (int status)
164 {
165   if (status != EXIT_SUCCESS)
166     emit_try_help ();
167   else
168     {
169       printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
170       fputs (_("\
171 Overwrite the specified FILE(s) repeatedly, in order to make it harder\n\
172 for even very expensive hardware probing to recover the data.\n\
173 "), stdout);
174       fputs (_("\
175 \n\
176 If FILE is -, shred standard output.\n\
177 "), stdout);
178 
179       emit_mandatory_arg_note ();
180 
181       printf (_("\
182   -f, --force    change permissions to allow writing if necessary\n\
183   -n, --iterations=N  overwrite N times instead of the default (%d)\n\
184       --random-source=FILE  get random bytes from FILE\n\
185   -s, --size=N   shred this many bytes (suffixes like K, M, G accepted)\n\
186 "), DEFAULT_PASSES);
187       fputs (_("\
188   -u             deallocate and remove file after overwriting\n\
189       --remove[=HOW]  like -u but give control on HOW to delete;  See below\n\
190   -v, --verbose  show progress\n\
191   -x, --exact    do not round file sizes up to the next full block;\n\
192                    this is the default for non-regular files\n\
193   -z, --zero     add a final overwrite with zeros to hide shredding\n\
194 "), stdout);
195       fputs (HELP_OPTION_DESCRIPTION, stdout);
196       fputs (VERSION_OPTION_DESCRIPTION, stdout);
197       fputs (_("\
198 \n\
199 Delete FILE(s) if --remove (-u) is specified.  The default is not to remove\n\
200 the files because it is common to operate on device files like /dev/hda,\n\
201 and those files usually should not be removed.\n\
202 The optional HOW parameter indicates how to remove a directory entry:\n\
203 'unlink' => use a standard unlink call.\n\
204 'wipe' => also first obfuscate bytes in the name.\n\
205 'wipesync' => also sync each obfuscated byte to the device.\n\
206 The default mode is 'wipesync', but note it can be expensive.\n\
207 \n\
208 "), stdout);
209       fputs (_("\
210 CAUTION: shred assumes the file system and hardware overwrite data in place.\n\
211 Although this is common, many platforms operate otherwise.  Also, backups\n\
212 and mirrors may contain unremovable copies that will let a shredded file\n\
213 be recovered later.  See the GNU coreutils manual for details.\n\
214 "), stdout);
215       emit_ancillary_info (PROGRAM_NAME);
216     }
217   exit (status);
218 }
219 
220 /*
221  * Determine if pattern type is periodic or not.
222  */
223 static bool
periodic_pattern(int type)224 periodic_pattern (int type)
225 {
226   if (type <= 0)
227     return false;
228 
229   unsigned char r[3];
230   unsigned int bits = type & 0xfff;
231 
232   bits |= bits << 12;
233   r[0] = (bits >> 4) & 255;
234   r[1] = (bits >> 8) & 255;
235   r[2] = bits & 255;
236 
237   return (r[0] != r[1]) || (r[0] != r[2]);
238 }
239 
240 /*
241  * Fill a buffer with a fixed pattern.
242  *
243  * The buffer must be at least 3 bytes long, even if
244  * size is less.  Larger sizes are filled exactly.
245  */
246 static void
fillpattern(int type,unsigned char * r,size_t size)247 fillpattern (int type, unsigned char *r, size_t size)
248 {
249   size_t i;
250   unsigned int bits = type & 0xfff;
251 
252   bits |= bits << 12;
253   r[0] = (bits >> 4) & 255;
254   r[1] = (bits >> 8) & 255;
255   r[2] = bits & 255;
256   for (i = 3; i <= size / 2; i *= 2)
257     memcpy (r + i, r, i);
258   if (i < size)
259     memcpy (r + i, r, size - i);
260 
261   /* Invert the first bit of every sector. */
262   if (type & 0x1000)
263     for (i = 0; i < size; i += SECTOR_SIZE)
264       r[i] ^= 0x80;
265 }
266 
267 /*
268  * Generate a 6-character (+ nul) pass name string
269  * FIXME: allow translation of "random".
270  */
271 #define PASS_NAME_SIZE 7
272 static void
passname(unsigned char const * data,char name[PASS_NAME_SIZE])273 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
274 {
275   if (data)
276     sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
277   else
278     memcpy (name, "random", PASS_NAME_SIZE);
279 }
280 
281 /* Return true when it's ok to ignore an fsync or fdatasync
282    failure that set errno to ERRNO_VAL.  */
283 static bool
ignorable_sync_errno(int errno_val)284 ignorable_sync_errno (int errno_val)
285 {
286   return (errno_val == EINVAL
287           || errno_val == EBADF
288           /* HP-UX does this */
289           || errno_val == EISDIR);
290 }
291 
292 /* Request that all data for FD be transferred to the corresponding
293    storage device.  QNAME is the file name (quoted for colons).
294    Report any errors found.  Return 0 on success, -1
295    (setting errno) on failure.  It is not an error if fdatasync and/or
296    fsync is not supported for this file, or if the file is not a
297    writable file descriptor.  */
298 static int
dosync(int fd,char const * qname)299 dosync (int fd, char const *qname)
300 {
301   int err;
302 
303   if (fdatasync (fd) == 0)
304     return 0;
305   err = errno;
306   if ( ! ignorable_sync_errno (err))
307     {
308       error (0, err, _("%s: fdatasync failed"), qname);
309       errno = err;
310       return -1;
311     }
312 
313   if (fsync (fd) == 0)
314     return 0;
315   err = errno;
316   if ( ! ignorable_sync_errno (err))
317     {
318       error (0, err, _("%s: fsync failed"), qname);
319       errno = err;
320       return -1;
321     }
322 
323   sync ();
324   return 0;
325 }
326 
327 /* Turn on or off direct I/O mode for file descriptor FD, if possible.
328    Try to turn it on if ENABLE is true.  Otherwise, try to turn it off.  */
329 static void
direct_mode(int fd,bool enable)330 direct_mode (int fd, bool enable)
331 {
332   if (O_DIRECT)
333     {
334       int fd_flags = fcntl (fd, F_GETFL);
335       if (0 < fd_flags)
336         {
337           int new_flags = (enable
338                            ? (fd_flags | O_DIRECT)
339                            : (fd_flags & ~O_DIRECT));
340           if (new_flags != fd_flags)
341             fcntl (fd, F_SETFL, new_flags);
342         }
343     }
344 
345 #if HAVE_DIRECTIO && defined DIRECTIO_ON && defined DIRECTIO_OFF
346   /* This is Solaris-specific.  */
347   directio (fd, enable ? DIRECTIO_ON : DIRECTIO_OFF);
348 #endif
349 }
350 
351 /* Rewind FD; its status is ST.  */
352 static bool
dorewind(int fd,struct stat const * st)353 dorewind (int fd, struct stat const *st)
354 {
355   if (S_ISCHR (st->st_mode))
356     {
357 #if defined __linux__ && HAVE_SYS_MTIO_H
358       /* In the Linux kernel, lseek does not work on tape devices; it
359          returns a randomish value instead.  Try the low-level tape
360          rewind operation first.  */
361       struct mtop op;
362       op.mt_op = MTREW;
363       op.mt_count = 1;
364       if (ioctl (fd, MTIOCTOP, &op) == 0)
365         return true;
366 #endif
367     }
368   off_t offset = lseek (fd, 0, SEEK_SET);
369   if (0 < offset)
370     errno = EINVAL;
371   return offset == 0;
372 }
373 
374 /* By convention, negative sizes represent unknown values.  */
375 
376 static bool
known(off_t size)377 known (off_t size)
378 {
379   return 0 <= size;
380 }
381 
382 /*
383  * Do pass number K of N, writing *SIZEP bytes of the given pattern TYPE
384  * to the file descriptor FD.  K and N are passed in only for verbose
385  * progress message purposes.  If N == 0, no progress messages are printed.
386  *
387  * If *SIZEP == -1, the size is unknown, and it will be filled in as soon
388  * as writing fails with ENOSPC.
389  *
390  * Return 1 on write error, -1 on other error, 0 on success.
391  */
392 static int
dopass(int fd,struct stat const * st,char const * qname,off_t * sizep,int type,struct randread_source * s,unsigned long int k,unsigned long int n)393 dopass (int fd, struct stat const *st, char const *qname, off_t *sizep,
394         int type, struct randread_source *s,
395         unsigned long int k, unsigned long int n)
396 {
397   off_t size = *sizep;
398   off_t offset;			/* Current file position */
399   time_t thresh IF_LINT ( = 0);	/* Time to maybe print next status update */
400   time_t now = 0;		/* Current time */
401   size_t lim;			/* Amount of data to try writing */
402   size_t soff;			/* Offset into buffer for next write */
403   ssize_t ssize;		/* Return value from write */
404 
405   /* Fill pattern buffer.  Aligning it to a page so we can do direct I/O.  */
406   size_t page_size = getpagesize ();
407 #define PERIODIC_OUTPUT_SIZE (60 * 1024)
408 #define NONPERIODIC_OUTPUT_SIZE (64 * 1024)
409   static_assert (PERIODIC_OUTPUT_SIZE % 3 == 0);
410   size_t output_size = periodic_pattern (type)
411                        ? PERIODIC_OUTPUT_SIZE : NONPERIODIC_OUTPUT_SIZE;
412 #define FILLPATTERN_SIZE (((output_size + 2) / 3) * 3) /* Multiple of 3 */
413   unsigned char *pbuf = xalignalloc (page_size, FILLPATTERN_SIZE);
414 
415   char pass_string[PASS_NAME_SIZE];	/* Name of current pass */
416   bool write_error = false;
417   bool other_error = false;
418 
419   /* Printable previous offset into the file */
420   char previous_offset_buf[LONGEST_HUMAN_READABLE + 1];
421   char const *previous_human_offset;
422 
423   /* As a performance tweak, avoid direct I/O for small sizes,
424      as it's just a performance rather then security consideration,
425      and direct I/O can often be unsupported for small non aligned sizes.  */
426   bool try_without_directio = 0 < size && size < output_size;
427   if (! try_without_directio)
428     direct_mode (fd, true);
429 
430   if (! dorewind (fd, st))
431     {
432       error (0, errno, _("%s: cannot rewind"), qname);
433       other_error = true;
434       goto free_pattern_mem;
435     }
436 
437   /* Constant fill patterns need only be set up once. */
438   if (type >= 0)
439     {
440       lim = known (size) && size < FILLPATTERN_SIZE ? size : FILLPATTERN_SIZE;
441       fillpattern (type, pbuf, lim);
442       passname (pbuf, pass_string);
443     }
444   else
445     {
446       passname (0, pass_string);
447     }
448 
449   /* Set position if first status update */
450   if (n)
451     {
452       error (0, 0, _("%s: pass %lu/%lu (%s)..."), qname, k, n, pass_string);
453       thresh = time (nullptr) + VERBOSE_UPDATE;
454       previous_human_offset = "";
455     }
456 
457   offset = 0;
458   while (true)
459     {
460       /* How much to write this time? */
461       lim = output_size;
462       if (known (size) && size - offset < output_size)
463         {
464           if (size < offset)
465             break;
466           lim = size - offset;
467           if (!lim)
468             break;
469         }
470       if (type < 0)
471         randread (s, pbuf, lim);
472       /* Loop to retry partial writes. */
473       for (soff = 0; soff < lim; soff += ssize)
474         {
475           ssize = write (fd, pbuf + soff, lim - soff);
476           if (ssize <= 0)
477             {
478               if (! known (size) && (ssize == 0 || errno == ENOSPC))
479                 {
480                   /* We have found the end of the file.  */
481                   if (soff <= OFF_T_MAX - offset)
482                     *sizep = size = offset + soff;
483                   break;
484                 }
485               else
486                 {
487                   int errnum = errno;
488                   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
489 
490                   /* Retry without direct I/O since this may not be supported
491                      at all on some (file) systems, or with the current size.
492                      I.e., a specified --size that is not aligned, or when
493                      dealing with slop at the end of a file with --exact.  */
494                   if (! try_without_directio && errno == EINVAL)
495                     {
496                       direct_mode (fd, false);
497                       ssize = 0;
498                       try_without_directio = true;
499                       continue;
500                     }
501                   error (0, errnum, _("%s: error writing at offset %s"),
502                          qname, umaxtostr (offset + soff, buf));
503 
504                   /* 'shred' is often used on bad media, before throwing it
505                      out.  Thus, it shouldn't give up on bad blocks.  This
506                      code works because lim is always a multiple of
507                      SECTOR_SIZE, except at the end.  This size constraint
508                      also enables direct I/O on some (file) systems.  */
509                   static_assert (PERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
510                   static_assert (NONPERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
511                   if (errnum == EIO && known (size)
512                       && (soff | SECTOR_MASK) < lim)
513                     {
514                       size_t soff1 = (soff | SECTOR_MASK) + 1;
515                       if (lseek (fd, offset + soff1, SEEK_SET) != -1)
516                         {
517                           /* Arrange to skip this block. */
518                           ssize = soff1 - soff;
519                           write_error = true;
520                           continue;
521                         }
522                       error (0, errno, _("%s: lseek failed"), qname);
523                     }
524                   other_error = true;
525                   goto free_pattern_mem;
526                 }
527             }
528         }
529 
530       /* Okay, we have written "soff" bytes. */
531 
532       if (OFF_T_MAX - offset < soff)
533         {
534           error (0, 0, _("%s: file too large"), qname);
535           other_error = true;
536           goto free_pattern_mem;
537         }
538 
539       offset += soff;
540 
541       bool done = offset == size;
542 
543       /* Time to print progress? */
544       if (n && ((done && *previous_human_offset)
545                 || thresh <= (now = time (nullptr))))
546         {
547           char offset_buf[LONGEST_HUMAN_READABLE + 1];
548           char size_buf[LONGEST_HUMAN_READABLE + 1];
549           int human_progress_opts = (human_autoscale | human_SI
550                                      | human_base_1024 | human_B);
551           char const *human_offset
552             = human_readable (offset, offset_buf,
553                               human_floor | human_progress_opts, 1, 1);
554 
555           if (done || !STREQ (previous_human_offset, human_offset))
556             {
557               if (! known (size))
558                 error (0, 0, _("%s: pass %lu/%lu (%s)...%s"),
559                        qname, k, n, pass_string, human_offset);
560               else
561                 {
562                   uintmax_t off = offset;
563                   int percent = (size == 0
564                                  ? 100
565                                  : (off <= TYPE_MAXIMUM (uintmax_t) / 100
566                                     ? off * 100 / size
567                                     : off / (size / 100)));
568                   char const *human_size
569                     = human_readable (size, size_buf,
570                                       human_ceiling | human_progress_opts,
571                                       1, 1);
572                   if (done)
573                     human_offset = human_size;
574                   error (0, 0, _("%s: pass %lu/%lu (%s)...%s/%s %d%%"),
575                          qname, k, n, pass_string, human_offset, human_size,
576                          percent);
577                 }
578 
579               strcpy (previous_offset_buf, human_offset);
580               previous_human_offset = previous_offset_buf;
581               thresh = now + VERBOSE_UPDATE;
582 
583               /*
584                * Force periodic syncs to keep displayed progress accurate
585                * FIXME: Should these be present even if -v is not enabled,
586                * to keep the buffer cache from filling with dirty pages?
587                * It's a common problem with programs that do lots of writes,
588                * like mkfs.
589                */
590               if (dosync (fd, qname) != 0)
591                 {
592                   if (errno != EIO)
593                     {
594                       other_error = true;
595                       goto free_pattern_mem;
596                     }
597                   write_error = true;
598                 }
599             }
600         }
601     }
602 
603   /* Force what we just wrote to hit the media. */
604   if (dosync (fd, qname) != 0)
605     {
606       if (errno != EIO)
607         {
608           other_error = true;
609           goto free_pattern_mem;
610         }
611       write_error = true;
612     }
613 
614 free_pattern_mem:
615   alignfree (pbuf);
616 
617   return other_error ? -1 : write_error;
618 }
619 
620 /*
621  * The passes start and end with a random pass, and the passes in between
622  * are done in random order.  The idea is to deprive someone trying to
623  * reverse the process of knowledge of the overwrite patterns, so they
624  * have the additional step of figuring out what was done to the device
625  * before they can try to reverse or cancel it.
626  *
627  * First, all possible 1-bit patterns.  There are two of them.
628  * Then, all possible 2-bit patterns.  There are four, but the two
629  * which are also 1-bit patterns can be omitted.
630  * Then, all possible 3-bit patterns.  Likewise, 8-2 = 6.
631  * Then, all possible 4-bit patterns.  16-4 = 12.
632  *
633  * The basic passes are:
634  * 1-bit: 0x000, 0xFFF
635  * 2-bit: 0x555, 0xAAA
636  * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
637  *        100100100100         110110110110
638  *           9   2   4            D   B   6
639  * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
640  *        0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
641  * Adding three random passes at the beginning, middle and end
642  * produces the default 25-pass structure.
643  *
644  * The next extension would be to 5-bit and 6-bit patterns.
645  * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
646  * 6-bit patterns, so they would increase the time required
647  * significantly.  4-bit patterns are enough for most purposes.
648  *
649  * The main gotcha is that this would require a trickier encoding,
650  * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
651  * lcm(2,3,4,5) = 60 bits is not.
652  *
653  * One extension that is included is to complement the first bit in each
654  * 512-byte block, to alter the phase of the encoded data in the more
655  * complex encodings.  This doesn't apply to MFM, so the 1-bit patterns
656  * are considered part of the 3-bit ones and the 2-bit patterns are
657  * considered part of the 4-bit patterns.
658  *
659  *
660  * How does the generalization to variable numbers of passes work?
661  *
662  * Here's how...
663  * Have an ordered list of groups of passes.  Each group is a set.
664  * Take as many groups as will fit, plus a random subset of the
665  * last partial group, and place them into the passes list.
666  * Then shuffle the passes list into random order and use that.
667  *
668  * One extra detail: if we can't include a large enough fraction of the
669  * last group to be interesting, then just substitute random passes.
670  *
671  * If you want more passes than the entire list of groups can
672  * provide, just start repeating from the beginning of the list.
673  */
674 static int const
675   patterns[] =
676 {
677   -2,				/* 2 random passes */
678   2, 0x000, 0xFFF,		/* 1-bit */
679   2, 0x555, 0xAAA,		/* 2-bit */
680   -1,				/* 1 random pass */
681   6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6,	/* 3-bit */
682   12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
683   0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE,	/* 4-bit */
684   -1,				/* 1 random pass */
685         /* The following patterns have the first bit per block flipped */
686   8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
687   14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
688   0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
689   -1,				/* 1 random pass */
690   0				/* End */
691 };
692 
693 /*
694  * Generate a random wiping pass pattern with num passes.
695  * This is a two-stage process.  First, the passes to include
696  * are chosen, and then they are shuffled into the desired
697  * order.
698  */
699 static void
genpattern(int * dest,size_t num,struct randint_source * s)700 genpattern (int *dest, size_t num, struct randint_source *s)
701 {
702   size_t randpasses;
703   int const *p;
704   int *d;
705   size_t n;
706   size_t accum, top, swap;
707   int k;
708 
709   if (!num)
710     return;
711 
712   /* Stage 1: choose the passes to use */
713   p = patterns;
714   randpasses = 0;
715   d = dest;			/* Destination for generated pass list */
716   n = num;			/* Passes remaining to fill */
717 
718   while (true)
719     {
720       k = *p++;			/* Block descriptor word */
721       if (!k)
722         {			/* Loop back to the beginning */
723           p = patterns;
724         }
725       else if (k < 0)
726         {			/* -k random passes */
727           k = -k;
728           if ((size_t) k >= n)
729             {
730               randpasses += n;
731               break;
732             }
733           randpasses += k;
734           n -= k;
735         }
736       else if ((size_t) k <= n)
737         {			/* Full block of patterns */
738           memcpy (d, p, k * sizeof (int));
739           p += k;
740           d += k;
741           n -= k;
742         }
743       else if (n < 2 || 3 * n < (size_t) k)
744         {			/* Finish with random */
745           randpasses += n;
746           break;
747         }
748       else
749         {			/* Pad out with n of the k available */
750           do
751             {
752               if (n == (size_t) k || randint_choose (s, k) < n)
753                 {
754                   *d++ = *p;
755                   n--;
756                 }
757               p++;
758               k--;
759             }
760           while (n);
761           break;
762         }
763     }
764   top = num - randpasses;	/* Top of initialized data */
765   /* affirm (d == dest + top); */
766 
767   /*
768    * We now have fixed patterns in the dest buffer up to
769    * "top", and we need to scramble them, with "randpasses"
770    * random passes evenly spaced among them.
771    *
772    * We want one at the beginning, one at the end, and
773    * evenly spaced in between.  To do this, we basically
774    * use Bresenham's line draw (a.k.a DDA) algorithm
775    * to draw a line with slope (randpasses-1)/(num-1).
776    * (We use a positive accumulator and count down to
777    * do this.)
778    *
779    * So for each desired output value, we do the following:
780    * - If it should be a random pass, copy the pass type
781    *   to top++, out of the way of the other passes, and
782    *   set the current pass to -1 (random).
783    * - If it should be a normal pattern pass, choose an
784    *   entry at random between here and top-1 (inclusive)
785    *   and swap the current entry with that one.
786    */
787   randpasses--;			/* To speed up later math */
788   accum = randpasses;		/* Bresenham DDA accumulator */
789   for (n = 0; n < num; n++)
790     {
791       if (accum <= randpasses)
792         {
793           accum += num - 1;
794           dest[top++] = dest[n];
795           dest[n] = -1;
796         }
797       else
798         {
799           swap = n + randint_choose (s, top - n);
800           k = dest[n];
801           dest[n] = dest[swap];
802           dest[swap] = k;
803         }
804       accum -= randpasses;
805     }
806   /* affirm (top == num); */
807 }
808 
809 /*
810  * The core routine to actually do the work.  This overwrites the first
811  * size bytes of the given fd.  Return true if successful.
812  */
813 static bool
do_wipefd(int fd,char const * qname,struct randint_source * s,struct Options const * flags)814 do_wipefd (int fd, char const *qname, struct randint_source *s,
815            struct Options const *flags)
816 {
817   size_t i;
818   struct stat st;
819   off_t size;		/* Size to write, size to read */
820   off_t i_size = 0;	/* For small files, initial size to overwrite inode */
821   unsigned long int n;	/* Number of passes for printing purposes */
822   int *passarray;
823   bool ok = true;
824   struct randread_source *rs;
825 
826   n = 0;		/* dopass takes n == 0 to mean "don't print progress" */
827   if (flags->verbose)
828     n = flags->n_iterations + flags->zero_fill;
829 
830   if (fstat (fd, &st))
831     {
832       error (0, errno, _("%s: fstat failed"), qname);
833       return false;
834     }
835 
836   /* If we know that we can't possibly shred the file, give up now.
837      Otherwise, we may go into an infinite loop writing data before we
838      find that we can't rewind the device.  */
839   if ((S_ISCHR (st.st_mode) && isatty (fd))
840       || S_ISFIFO (st.st_mode)
841       || S_ISSOCK (st.st_mode))
842     {
843       error (0, 0, _("%s: invalid file type"), qname);
844       return false;
845     }
846   else if (S_ISREG (st.st_mode) && st.st_size < 0)
847     {
848       error (0, 0, _("%s: file has negative size"), qname);
849       return false;
850     }
851 
852   /* Allocate pass array */
853   passarray = xnmalloc (flags->n_iterations, sizeof *passarray);
854 
855   size = flags->size;
856   if (size == -1)
857     {
858       if (S_ISREG (st.st_mode))
859         {
860           size = st.st_size;
861 
862           if (! flags->exact)
863             {
864               /* Round up to the nearest block size to clear slack space.  */
865               off_t remainder = size % STP_BLKSIZE (&st);
866               if (size && size < STP_BLKSIZE (&st))
867                 i_size = size;
868               if (remainder != 0)
869                 {
870                   off_t size_incr = STP_BLKSIZE (&st) - remainder;
871                   size += MIN (size_incr, OFF_T_MAX - size);
872                 }
873             }
874         }
875       else
876         {
877           /* The behavior of lseek is unspecified, but in practice if
878              it returns a positive number that's the size of this
879              device.  */
880           size = lseek (fd, 0, SEEK_END);
881           if (size <= 0)
882             {
883               /* We are unable to determine the length, up front.
884                  Let dopass do that as part of its first iteration.  */
885               size = -1;
886             }
887         }
888     }
889   else if (S_ISREG (st.st_mode)
890            && st.st_size < MIN (STP_BLKSIZE (&st), size))
891     i_size = st.st_size;
892 
893   /* Schedule the passes in random order. */
894   genpattern (passarray, flags->n_iterations, s);
895 
896   rs = randint_get_source (s);
897 
898   while (true)
899     {
900       off_t pass_size;
901       unsigned long int pn = n;
902 
903       if (i_size)
904         {
905           pass_size = i_size;
906           i_size = 0;
907           pn = 0;
908         }
909       else if (size)
910         {
911           pass_size = size;
912           size = 0;
913         }
914       /* TODO: consider handling tail packing by
915          writing the tail padding as a separate pass,
916          (that would not rewind).  */
917       else
918         break;
919 
920       for (i = 0; i < flags->n_iterations + flags->zero_fill; i++)
921         {
922           int err = 0;
923           int type = i < flags->n_iterations ? passarray[i] : 0;
924 
925           err = dopass (fd, &st, qname, &pass_size, type, rs, i + 1, pn);
926 
927           if (err)
928             {
929               ok = false;
930               if (err < 0)
931                 goto wipefd_out;
932             }
933         }
934     }
935 
936   /* Now deallocate the data.  The effect of ftruncate is specified
937      on regular files and shared memory objects (also directories, but
938      they are not possible here); don't worry about errors reported
939      for other file types.  */
940 
941   if (flags->remove_file && ftruncate (fd, 0) != 0
942       && (S_ISREG (st.st_mode) || S_TYPEISSHM (&st)))
943     {
944       error (0, errno, _("%s: error truncating"), qname);
945       ok = false;
946       goto wipefd_out;
947     }
948 
949 wipefd_out:
950   free (passarray);
951   return ok;
952 }
953 
954 /* A wrapper with a little more checking for fds on the command line */
955 static bool
wipefd(int fd,char const * qname,struct randint_source * s,struct Options const * flags)956 wipefd (int fd, char const *qname, struct randint_source *s,
957         struct Options const *flags)
958 {
959   int fd_flags = fcntl (fd, F_GETFL);
960 
961   if (fd_flags < 0)
962     {
963       error (0, errno, _("%s: fcntl failed"), qname);
964       return false;
965     }
966   if (fd_flags & O_APPEND)
967     {
968       error (0, 0, _("%s: cannot shred append-only file descriptor"), qname);
969       return false;
970     }
971   return do_wipefd (fd, qname, s, flags);
972 }
973 
974 /* --- Name-wiping code --- */
975 
976 /* Characters allowed in a file name - a safe universal set.  */
977 static char const nameset[] =
978 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.";
979 
980 /* Increment NAME (with LEN bytes).  NAME must be a big-endian base N
981    number with the digits taken from nameset.  Return true if successful.
982    Otherwise, (because NAME already has the greatest possible value)
983    return false.  */
984 
985 static bool
incname(char * name,size_t len)986 incname (char *name, size_t len)
987 {
988   while (len--)
989     {
990       char const *p = strchr (nameset, name[len]);
991 
992       /* Given that NAME is composed of bytes from NAMESET,
993          P will never be null here.  */
994 
995       /* If this character has a successor, use it.  */
996       if (p[1])
997         {
998           name[len] = p[1];
999           return true;
1000         }
1001 
1002       /* Otherwise, set this digit to 0 and increment the prefix.  */
1003       name[len] = nameset[0];
1004     }
1005 
1006   return false;
1007 }
1008 
1009 /*
1010  * Repeatedly rename a file with shorter and shorter names,
1011  * to obliterate all traces of the file name (and length) on any system
1012  * that adds a trailing delimiter to on-device file names and reuses
1013  * the same directory slot.  Finally, unlink it.
1014  * The passed-in filename is modified in place to the new filename.
1015  * (Which is unlinked if this function succeeds, but is still present if
1016  * it fails for some reason.)
1017  *
1018  * The main loop is written carefully to not get stuck if all possible
1019  * names of a given length are occupied.  It counts down the length from
1020  * the original to 0.  While the length is non-zero, it tries to find an
1021  * unused file name of the given length.  It continues until either the
1022  * name is available and the rename succeeds, or it runs out of names
1023  * to try (incname wraps and returns 1).  Finally, it unlinks the file.
1024  *
1025  * The unlink is Unix-specific, as ANSI-standard remove has more
1026  * portability problems with C libraries making it "safe".  rename
1027  * is ANSI-standard.
1028  *
1029  * To force the directory data out, we try to open the directory and
1030  * invoke fdatasync and/or fsync on it.  This is non-standard, so don't
1031  * insist that it works: just fall back to a global sync in that case.
1032  * This is fairly significantly Unix-specific.  Of course, on any
1033  * file system with synchronous metadata updates, this is unnecessary.
1034  */
1035 static bool
wipename(char * oldname,char const * qoldname,struct Options const * flags)1036 wipename (char *oldname, char const *qoldname, struct Options const *flags)
1037 {
1038   char *newname = xstrdup (oldname);
1039   char *base = last_component (newname);
1040   char *dir = dir_name (newname);
1041   char *qdir = xstrdup (quotef (dir));
1042   bool first = true;
1043   bool ok = true;
1044   int dir_fd = -1;
1045 
1046   if (flags->remove_file == remove_wipesync)
1047     dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
1048 
1049   if (flags->verbose)
1050     error (0, 0, _("%s: removing"), qoldname);
1051 
1052   if (flags->remove_file != remove_unlink)
1053     for (size_t len = base_len (base); len != 0; len--)
1054       {
1055         memset (base, nameset[0], len);
1056         base[len] = 0;
1057         bool rename_ok;
1058         while (! (rename_ok = (renameatu (AT_FDCWD, oldname, AT_FDCWD, newname,
1059                                           RENAME_NOREPLACE)
1060                                == 0))
1061                && errno == EEXIST && incname (base, len))
1062           continue;
1063         if (rename_ok)
1064           {
1065             if (0 <= dir_fd && dosync (dir_fd, qdir) != 0)
1066               ok = false;
1067             if (flags->verbose)
1068               {
1069                 /* People seem to understand this better than talking
1070                    about renaming OLDNAME.  NEWNAME doesn't need
1071                    quoting because we picked it.  OLDNAME needs to be
1072                    quoted only the first time.  */
1073                 char const *old = first ? qoldname : oldname;
1074                 error (0, 0,
1075                        _("%s: renamed to %s"), old, newname);
1076                 first = false;
1077               }
1078             memcpy (oldname + (base - newname), base, len + 1);
1079           }
1080       }
1081 
1082   if (unlink (oldname) != 0)
1083     {
1084       error (0, errno, _("%s: failed to remove"), qoldname);
1085       ok = false;
1086     }
1087   else if (flags->verbose)
1088     error (0, 0, _("%s: removed"), qoldname);
1089   if (0 <= dir_fd)
1090     {
1091       if (dosync (dir_fd, qdir) != 0)
1092         ok = false;
1093       if (close (dir_fd) != 0)
1094         {
1095           error (0, errno, _("%s: failed to close"), qdir);
1096           ok = false;
1097         }
1098     }
1099   free (newname);
1100   free (dir);
1101   free (qdir);
1102   return ok;
1103 }
1104 
1105 /*
1106  * Finally, the function that actually takes a filename and grinds
1107  * it into hamburger.
1108  *
1109  * FIXME
1110  * Detail to note: since we do not restore errno to EACCES after
1111  * a failed chmod, we end up printing the error code from the chmod.
1112  * This is actually the error that stopped us from proceeding, so
1113  * it's arguably the right one, and in practice it'll be either EACCES
1114  * again or EPERM, which both give similar error messages.
1115  * Does anyone disagree?
1116  */
1117 static bool
wipefile(char * name,char const * qname,struct randint_source * s,struct Options const * flags)1118 wipefile (char *name, char const *qname,
1119           struct randint_source *s, struct Options const *flags)
1120 {
1121   bool ok;
1122   int fd;
1123 
1124   fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1125   if (fd < 0
1126       && (errno == EACCES && flags->force)
1127       && chmod (name, S_IWUSR) == 0)
1128     fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1129   if (fd < 0)
1130     {
1131       error (0, errno, _("%s: failed to open for writing"), qname);
1132       return false;
1133     }
1134 
1135   ok = do_wipefd (fd, qname, s, flags);
1136   if (close (fd) != 0)
1137     {
1138       error (0, errno, _("%s: failed to close"), qname);
1139       ok = false;
1140     }
1141   if (ok && flags->remove_file)
1142     ok = wipename (name, qname, flags);
1143   return ok;
1144 }
1145 
1146 
1147 /* Buffers for random data.  */
1148 static struct randint_source *randint_source;
1149 
1150 /* Just on general principles, wipe buffers containing information
1151    that may be related to the possibly-pseudorandom values used during
1152    shredding.  */
1153 static void
clear_random_data(void)1154 clear_random_data (void)
1155 {
1156   randint_all_free (randint_source);
1157 }
1158 
1159 
1160 int
main(int argc,char ** argv)1161 main (int argc, char **argv)
1162 {
1163   bool ok = true;
1164   struct Options flags = {0};
1165   char **file;
1166   int n_files;
1167   int c;
1168   int i;
1169   char const *random_source = nullptr;
1170 
1171   initialize_main (&argc, &argv);
1172   set_program_name (argv[0]);
1173   setlocale (LC_ALL, "");
1174   bindtextdomain (PACKAGE, LOCALEDIR);
1175   textdomain (PACKAGE);
1176 
1177   atexit (close_stdout);
1178 
1179   flags.n_iterations = DEFAULT_PASSES;
1180   flags.size = -1;
1181 
1182   while ((c = getopt_long (argc, argv, "fn:s:uvxz", long_opts, nullptr)) != -1)
1183     {
1184       switch (c)
1185         {
1186         case 'f':
1187           flags.force = true;
1188           break;
1189 
1190         case 'n':
1191           flags.n_iterations = xdectoumax (optarg, 0,
1192                                            MIN (ULONG_MAX,
1193                                                 SIZE_MAX / sizeof (int)), "",
1194                                            _("invalid number of passes"), 0);
1195           break;
1196 
1197         case RANDOM_SOURCE_OPTION:
1198           if (random_source && !STREQ (random_source, optarg))
1199             error (EXIT_FAILURE, 0, _("multiple random sources specified"));
1200           random_source = optarg;
1201           break;
1202 
1203         case 'u':
1204           if (optarg == nullptr)
1205             flags.remove_file = remove_wipesync;
1206           else
1207             flags.remove_file = XARGMATCH ("--remove", optarg,
1208                                            remove_args, remove_methods);
1209           break;
1210 
1211         case 's':
1212           flags.size = xnumtoumax (optarg, 0, 0, OFF_T_MAX, "cbBkKMGTPEZYRQ0",
1213                                    _("invalid file size"), 0);
1214           break;
1215 
1216         case 'v':
1217           flags.verbose = true;
1218           break;
1219 
1220         case 'x':
1221           flags.exact = true;
1222           break;
1223 
1224         case 'z':
1225           flags.zero_fill = true;
1226           break;
1227 
1228         case_GETOPT_HELP_CHAR;
1229 
1230         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1231 
1232         default:
1233           usage (EXIT_FAILURE);
1234         }
1235     }
1236 
1237   file = argv + optind;
1238   n_files = argc - optind;
1239 
1240   if (n_files == 0)
1241     {
1242       error (0, 0, _("missing file operand"));
1243       usage (EXIT_FAILURE);
1244     }
1245 
1246   randint_source = randint_all_new (random_source, SIZE_MAX);
1247   if (! randint_source)
1248     error (EXIT_FAILURE, errno, "%s",
1249            quotef (random_source ? random_source : "getrandom"));
1250   atexit (clear_random_data);
1251 
1252   for (i = 0; i < n_files; i++)
1253     {
1254       char *qname = xstrdup (quotef (file[i]));
1255       if (STREQ (file[i], "-"))
1256         {
1257           ok &= wipefd (STDOUT_FILENO, qname, randint_source, &flags);
1258         }
1259       else
1260         {
1261           /* Plain filename - Note that this overwrites *argv! */
1262           ok &= wipefile (file[i], qname, randint_source, &flags);
1263         }
1264       free (qname);
1265     }
1266 
1267   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
1268 }
1269 /*
1270  * vim:sw=2:sts=2:
1271  */
1272