1 /* unexpand - convert blanks to tabs
2 Copyright (C) 1989-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* By default, convert only maximal strings of initial blanks and tabs
18 into tabs.
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
22
23 Options:
24 --tabs=tab1[,tab2[,...]]
25 -t tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
30 --all
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
33
34 David MacKenzie <djm@gnu.ai.mit.edu> */
35
36 #include <config.h>
37
38 #include <stdio.h>
39 #include <getopt.h>
40 #include <sys/types.h>
41 #include "system.h"
42 #include "expand-common.h"
43
44 /* The official name of this program (e.g., no 'g' prefix). */
45 #define PROGRAM_NAME "unexpand"
46
47 #define AUTHORS proper_name ("David MacKenzie")
48
49
50
51 /* For long options that have no equivalent short option, use a
52 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
53 enum
54 {
55 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
56 };
57
58 static struct option const longopts[] =
59 {
60 {"tabs", required_argument, nullptr, 't'},
61 {"all", no_argument, nullptr, 'a'},
62 {"first-only", no_argument, nullptr, CONVERT_FIRST_ONLY_OPTION},
63 {GETOPT_HELP_OPTION_DECL},
64 {GETOPT_VERSION_OPTION_DECL},
65 {nullptr, 0, nullptr, 0}
66 };
67
68 void
usage(int status)69 usage (int status)
70 {
71 if (status != EXIT_SUCCESS)
72 emit_try_help ();
73 else
74 {
75 printf (_("\
76 Usage: %s [OPTION]... [FILE]...\n\
77 "),
78 program_name);
79 fputs (_("\
80 Convert blanks in each FILE to tabs, writing to standard output.\n\
81 "), stdout);
82
83 emit_stdin_note ();
84 emit_mandatory_arg_note ();
85
86 fputs (_("\
87 -a, --all convert all blanks, instead of just initial blanks\n\
88 --first-only convert only leading sequences of blanks (overrides -a)\n\
89 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
90 "), stdout);
91 emit_tab_list_info ();
92 fputs (HELP_OPTION_DESCRIPTION, stdout);
93 fputs (VERSION_OPTION_DESCRIPTION, stdout);
94 emit_ancillary_info (PROGRAM_NAME);
95 }
96 exit (status);
97 }
98
99 /* Change blanks to tabs, writing to stdout.
100 Read each file in 'file_list', in order. */
101
102 static void
unexpand(void)103 unexpand (void)
104 {
105 /* Input stream. */
106 FILE *fp = next_file (nullptr);
107
108 /* The array of pending blanks. In non-POSIX locales, blanks can
109 include characters other than spaces, so the blanks must be
110 stored, not merely counted. */
111 char *pending_blank;
112
113 if (!fp)
114 return;
115
116 /* The worst case is a non-blank character, then one blank, then a
117 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
118 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
119 pending_blank = xmalloc (max_column_width);
120
121 while (true)
122 {
123 /* Input character, or EOF. */
124 int c;
125
126 /* If true, perform translations. */
127 bool convert = true;
128
129
130 /* The following variables have valid values only when CONVERT
131 is true: */
132
133 /* Column of next input character. */
134 uintmax_t column = 0;
135
136 /* Column the next input tab stop is on. */
137 uintmax_t next_tab_column = 0;
138
139 /* Index in TAB_LIST of next tab stop to examine. */
140 size_t tab_index = 0;
141
142 /* If true, the first pending blank came just before a tab stop. */
143 bool one_blank_before_tab_stop = false;
144
145 /* If true, the previous input character was a blank. This is
146 initially true, since initial strings of blanks are treated
147 as if the line was preceded by a blank. */
148 bool prev_blank = true;
149
150 /* Number of pending columns of blanks. */
151 size_t pending = 0;
152
153
154 /* Convert a line of text. */
155
156 do
157 {
158 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
159 continue;
160
161 if (convert)
162 {
163 bool blank = !! isblank (c);
164
165 if (blank)
166 {
167 bool last_tab;
168
169 next_tab_column = get_next_tab_column (column, &tab_index,
170 &last_tab);
171
172 if (last_tab)
173 convert = false;
174
175 if (convert)
176 {
177 if (next_tab_column < column)
178 error (EXIT_FAILURE, 0, _("input line is too long"));
179
180 if (c == '\t')
181 {
182 column = next_tab_column;
183
184 if (pending)
185 pending_blank[0] = '\t';
186 }
187 else
188 {
189 column++;
190
191 if (! (prev_blank && column == next_tab_column))
192 {
193 /* It is not yet known whether the pending blanks
194 will be replaced by tabs. */
195 if (column == next_tab_column)
196 one_blank_before_tab_stop = true;
197 pending_blank[pending++] = c;
198 prev_blank = true;
199 continue;
200 }
201
202 /* Replace the pending blanks by a tab or two. */
203 pending_blank[0] = c = '\t';
204 }
205
206 /* Discard pending blanks, unless it was a single
207 blank just before the previous tab stop. */
208 pending = one_blank_before_tab_stop;
209 }
210 }
211 else if (c == '\b')
212 {
213 /* Go back one column, and force recalculation of the
214 next tab stop. */
215 column -= !!column;
216 next_tab_column = column;
217 tab_index -= !!tab_index;
218 }
219 else
220 {
221 column++;
222 if (!column)
223 error (EXIT_FAILURE, 0, _("input line is too long"));
224 }
225
226 if (pending)
227 {
228 if (pending > 1 && one_blank_before_tab_stop)
229 pending_blank[0] = '\t';
230 if (fwrite (pending_blank, 1, pending, stdout) != pending)
231 write_error ();
232 pending = 0;
233 one_blank_before_tab_stop = false;
234 }
235
236 prev_blank = blank;
237 convert &= convert_entire_line || blank;
238 }
239
240 if (c < 0)
241 {
242 free (pending_blank);
243 return;
244 }
245
246 if (putchar (c) < 0)
247 write_error ();
248 }
249 while (c != '\n');
250 }
251 }
252
253 int
main(int argc,char ** argv)254 main (int argc, char **argv)
255 {
256 bool have_tabval = false;
257 uintmax_t tabval IF_LINT ( = 0);
258 int c;
259
260 /* If true, cancel the effect of any -a (explicit or implicit in -t),
261 so that only leading blanks will be considered. */
262 bool convert_first_only = false;
263
264 initialize_main (&argc, &argv);
265 set_program_name (argv[0]);
266 setlocale (LC_ALL, "");
267 bindtextdomain (PACKAGE, LOCALEDIR);
268 textdomain (PACKAGE);
269
270 atexit (close_stdout);
271
272 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, nullptr))
273 != -1)
274 {
275 switch (c)
276 {
277 case '?':
278 usage (EXIT_FAILURE);
279 case 'a':
280 convert_entire_line = true;
281 break;
282 case 't':
283 convert_entire_line = true;
284 parse_tab_stops (optarg);
285 break;
286 case CONVERT_FIRST_ONLY_OPTION:
287 convert_first_only = true;
288 break;
289 case ',':
290 if (have_tabval)
291 add_tab_stop (tabval);
292 have_tabval = false;
293 break;
294 case_GETOPT_HELP_CHAR;
295 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
296 default:
297 if (!have_tabval)
298 {
299 tabval = 0;
300 have_tabval = true;
301 }
302 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
303 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
304 break;
305 }
306 }
307
308 if (convert_first_only)
309 convert_entire_line = false;
310
311 if (have_tabval)
312 add_tab_stop (tabval);
313
314 finalize_tab_stops ();
315
316 set_file_list ((optind < argc) ? &argv[optind] : nullptr);
317
318 unexpand ();
319
320 cleanup_file_list_stdin ();
321
322 return exit_status;
323 }
324