Line data Source code
1 : /* unexpand - convert blanks to tabs
2 : Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* By default, convert only maximal strings of initial blanks and tabs
18 : into tabs.
19 : Preserves backspace characters in the output; they decrement the
20 : column count for tab calculations.
21 : The default action is equivalent to -8.
22 :
23 : Options:
24 : --tabs=tab1[,tab2[,...]]
25 : -t tab1[,tab2[,...]]
26 : -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 : columns apart instead of the default 8. Otherwise,
28 : set the tabs at columns tab1, tab2, etc. (numbered from
29 : 0); preserve any blanks beyond the tab stops given.
30 : --all
31 : -a Use tabs wherever they would replace 2 or more blanks,
32 : not just at the beginnings of lines.
33 :
34 : David MacKenzie <djm@gnu.ai.mit.edu> */
35 :
36 : #include <config.h>
37 :
38 : #include <stdio.h>
39 : #include <getopt.h>
40 : #include <sys/types.h>
41 : #include "system.h"
42 : #include "error.h"
43 : #include "quote.h"
44 : #include "xstrndup.h"
45 :
46 : /* The official name of this program (e.g., no `g' prefix). */
47 : #define PROGRAM_NAME "unexpand"
48 :
49 : #define AUTHORS "David MacKenzie"
50 :
51 : /* The number of bytes added at a time to the amount of memory
52 : allocated for the output line. */
53 : #define OUTPUT_BLOCK 256
54 :
55 : /* The name this program was run with. */
56 : char *program_name;
57 :
58 : /* If true, convert blanks even after nonblank characters have been
59 : read on the line. */
60 : static bool convert_entire_line;
61 :
62 : /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
63 : static size_t tab_size;
64 :
65 : /* The maximum distance between tab stops. */
66 : static size_t max_column_width;
67 :
68 : /* Array of the explicit column numbers of the tab stops;
69 : after `tab_list' is exhausted, the rest of the line is printed
70 : unchanged. The first column is column 0. */
71 : static uintmax_t *tab_list;
72 :
73 : /* The number of allocated entries in `tab_list'. */
74 : static size_t n_tabs_allocated;
75 :
76 : /* The index of the first invalid element of `tab_list',
77 : where the next element can be added. */
78 : static size_t first_free_tab;
79 :
80 : /* Null-terminated array of input filenames. */
81 : static char **file_list;
82 :
83 : /* Default for `file_list' if no files are given on the command line. */
84 : static char *stdin_argv[] =
85 : {
86 : "-", NULL
87 : };
88 :
89 : /* True if we have ever read standard input. */
90 : static bool have_read_stdin;
91 :
92 : /* The desired exit status. */
93 : static int exit_status;
94 :
95 : /* For long options that have no equivalent short option, use a
96 : non-character as a pseudo short option, starting with CHAR_MAX + 1. */
97 : enum
98 : {
99 : CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
100 : };
101 :
102 : static struct option const longopts[] =
103 : {
104 : {"tabs", required_argument, NULL, 't'},
105 : {"all", no_argument, NULL, 'a'},
106 : {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
107 : {GETOPT_HELP_OPTION_DECL},
108 : {GETOPT_VERSION_OPTION_DECL},
109 : {NULL, 0, NULL, 0}
110 : };
111 :
112 : void
113 8 : usage (int status)
114 : {
115 8 : if (status != EXIT_SUCCESS)
116 7 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
117 : program_name);
118 : else
119 : {
120 1 : printf (_("\
121 : Usage: %s [OPTION]... [FILE]...\n\
122 : "),
123 : program_name);
124 1 : fputs (_("\
125 : Convert blanks in each FILE to tabs, writing to standard output.\n\
126 : With no FILE, or when FILE is -, read standard input.\n\
127 : \n\
128 : "), stdout);
129 1 : fputs (_("\
130 : Mandatory arguments to long options are mandatory for short options too.\n\
131 : "), stdout);
132 1 : fputs (_("\
133 : -a, --all convert all blanks, instead of just initial blanks\n\
134 : --first-only convert only leading sequences of blanks (overrides -a)\n\
135 : -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
136 : -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
137 : "), stdout);
138 1 : fputs (HELP_OPTION_DESCRIPTION, stdout);
139 1 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
140 1 : emit_bug_reporting_address ();
141 : }
142 8 : exit (status);
143 : }
144 :
145 : /* Add tab stop TABVAL to the end of `tab_list'. */
146 :
147 : static void
148 21 : add_tab_stop (uintmax_t tabval)
149 : {
150 21 : uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
151 21 : uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
152 :
153 21 : if (first_free_tab == n_tabs_allocated)
154 16 : tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
155 21 : tab_list[first_free_tab++] = tabval;
156 :
157 21 : if (max_column_width < column_width)
158 : {
159 : if (SIZE_MAX < column_width)
160 : error (EXIT_FAILURE, 0, _("tabs are too far apart"));
161 12 : max_column_width = column_width;
162 : }
163 21 : }
164 :
165 : /* Add the comma or blank separated list of tab stops STOPS
166 : to the list of tab stops. */
167 :
168 : static void
169 29 : parse_tab_stops (char const *stops)
170 : {
171 29 : bool have_tabval = false;
172 : uintmax_t tabval IF_LINT (= 0);
173 : char const *num_start IF_LINT (= NULL);
174 29 : bool ok = true;
175 :
176 44 : for (; *stops; stops++)
177 : {
178 30 : if (*stops == ',' || isblank (to_uchar (*stops)))
179 : {
180 9 : if (have_tabval)
181 1 : add_tab_stop (tabval);
182 9 : have_tabval = false;
183 : }
184 21 : else if (ISDIGIT (*stops))
185 : {
186 6 : if (!have_tabval)
187 : {
188 5 : tabval = 0;
189 5 : have_tabval = true;
190 5 : num_start = stops;
191 : }
192 :
193 : /* Detect overflow. */
194 6 : if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
195 : {
196 0 : size_t len = strspn (num_start, "0123456789");
197 0 : char *bad_num = xstrndup (num_start, len);
198 0 : error (0, 0, _("tab stop is too large %s"), quote (bad_num));
199 0 : free (bad_num);
200 0 : ok = false;
201 0 : stops = num_start + len - 1;
202 : }
203 : }
204 : else
205 : {
206 15 : error (0, 0, _("tab size contains invalid character(s): %s"),
207 : quote (stops));
208 15 : ok = false;
209 15 : break;
210 : }
211 : }
212 :
213 29 : if (!ok)
214 15 : exit (EXIT_FAILURE);
215 :
216 14 : if (have_tabval)
217 4 : add_tab_stop (tabval);
218 14 : }
219 :
220 : /* Check that the list of tab stops TABS, with ENTRIES entries,
221 : contains only nonzero, ascending values. */
222 :
223 : static void
224 59 : validate_tab_stops (uintmax_t const *tabs, size_t entries)
225 : {
226 59 : uintmax_t prev_tab = 0;
227 : size_t i;
228 :
229 73 : for (i = 0; i < entries; i++)
230 : {
231 20 : if (tabs[i] == 0)
232 5 : error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
233 15 : if (tabs[i] <= prev_tab)
234 1 : error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
235 14 : prev_tab = tabs[i];
236 : }
237 53 : }
238 :
239 : /* Close the old stream pointer FP if it is non-NULL,
240 : and return a new one opened to read the next input file.
241 : Open a filename of `-' as the standard input.
242 : Return NULL if there are no more input files. */
243 :
244 : static FILE *
245 118 : next_file (FILE *fp)
246 : {
247 : static char *prev_file;
248 : char *file;
249 :
250 118 : if (fp)
251 : {
252 65 : if (ferror (fp))
253 : {
254 5 : error (0, errno, "%s", prev_file);
255 5 : exit_status = EXIT_FAILURE;
256 : }
257 65 : if (STREQ (prev_file, "-"))
258 59 : clearerr (fp); /* Also clear EOF. */
259 6 : else if (fclose (fp) != 0)
260 : {
261 0 : error (0, errno, "%s", prev_file);
262 0 : exit_status = EXIT_FAILURE;
263 : }
264 : }
265 :
266 249 : while ((file = *file_list++) != NULL)
267 : {
268 78 : if (STREQ (file, "-"))
269 : {
270 59 : have_read_stdin = true;
271 59 : prev_file = file;
272 59 : return stdin;
273 : }
274 19 : fp = fopen (file, "r");
275 19 : if (fp)
276 : {
277 6 : prev_file = file;
278 6 : return fp;
279 : }
280 13 : error (0, errno, "%s", file);
281 13 : exit_status = EXIT_FAILURE;
282 : }
283 53 : return NULL;
284 : }
285 :
286 : /* Change blanks to tabs, writing to stdout.
287 : Read each file in `file_list', in order. */
288 :
289 : static void
290 53 : unexpand (void)
291 : {
292 : /* Input stream. */
293 53 : FILE *fp = next_file (NULL);
294 :
295 : /* The array of pending blanks. In non-POSIX locales, blanks can
296 : include characters other than spaces, so the blanks must be
297 : stored, not merely counted. */
298 : char *pending_blank;
299 :
300 53 : if (!fp)
301 3 : return;
302 :
303 : /* The worst case is a non-blank character, then one blank, then a
304 : tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
305 : allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
306 50 : pending_blank = xmalloc (max_column_width);
307 :
308 : for (;;)
309 131 : {
310 : /* Input character, or EOF. */
311 : int c;
312 :
313 : /* If true, perform translations. */
314 181 : bool convert = true;
315 :
316 :
317 : /* The following variables have valid values only when CONVERT
318 : is true: */
319 :
320 : /* Column of next input character. */
321 181 : uintmax_t column = 0;
322 :
323 : /* Column the next input tab stop is on. */
324 181 : uintmax_t next_tab_column = 0;
325 :
326 : /* Index in TAB_LIST of next tab stop to examine. */
327 181 : size_t tab_index = 0;
328 :
329 : /* If true, the first pending blank came just before a tab stop. */
330 181 : bool one_blank_before_tab_stop = false;
331 :
332 : /* If true, the previous input character was a blank. This is
333 : initially true, since initial strings of blanks are treated
334 : as if the line was preceded by a blank. */
335 181 : bool prev_blank = true;
336 :
337 : /* Number of pending columns of blanks. */
338 181 : size_t pending = 0;
339 :
340 :
341 : /* Convert a line of text. */
342 :
343 : do
344 : {
345 915 : while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
346 15 : continue;
347 :
348 450 : if (convert)
349 : {
350 316 : bool blank = !! isblank (c);
351 :
352 316 : if (blank)
353 : {
354 11 : if (next_tab_column <= column)
355 : {
356 10 : if (tab_size)
357 9 : next_tab_column =
358 9 : column + (tab_size - column % tab_size);
359 : else
360 : for (;;)
361 1 : if (tab_index == first_free_tab)
362 : {
363 0 : convert = false;
364 0 : break;
365 : }
366 : else
367 : {
368 1 : uintmax_t tab = tab_list[tab_index++];
369 1 : if (column < tab)
370 : {
371 1 : next_tab_column = tab;
372 1 : break;
373 : }
374 : }
375 : }
376 :
377 11 : if (convert)
378 : {
379 11 : if (next_tab_column < column)
380 0 : error (EXIT_FAILURE, 0, _("input line is too long"));
381 :
382 11 : if (c == '\t')
383 : {
384 6 : column = next_tab_column;
385 :
386 : /* Discard pending blanks, unless it was a single
387 : blank just before the previous tab stop. */
388 6 : if (! (pending == 1 && one_blank_before_tab_stop))
389 : {
390 5 : pending = 0;
391 5 : one_blank_before_tab_stop = false;
392 : }
393 : }
394 : else
395 : {
396 5 : column++;
397 :
398 5 : if (! (prev_blank && column == next_tab_column))
399 : {
400 : /* It is not yet known whether the pending blanks
401 : will be replaced by tabs. */
402 4 : if (column == next_tab_column)
403 1 : one_blank_before_tab_stop = true;
404 4 : pending_blank[pending++] = c;
405 4 : prev_blank = true;
406 4 : continue;
407 : }
408 :
409 : /* Replace the pending blanks by a tab or two. */
410 1 : pending_blank[0] = c = '\t';
411 1 : pending = one_blank_before_tab_stop;
412 : }
413 : }
414 : }
415 305 : else if (c == '\b')
416 : {
417 : /* Go back one column, and force recalculation of the
418 : next tab stop. */
419 250 : column -= !!column;
420 250 : next_tab_column = column;
421 250 : tab_index -= !!tab_index;
422 : }
423 : else
424 : {
425 55 : column++;
426 55 : if (!column)
427 0 : error (EXIT_FAILURE, 0, _("input line is too long"));
428 : }
429 :
430 312 : if (pending)
431 : {
432 3 : if (fwrite (pending_blank, 1, pending, stdout) != pending)
433 0 : error (EXIT_FAILURE, errno, _("write error"));
434 3 : pending = 0;
435 3 : one_blank_before_tab_stop = false;
436 : }
437 :
438 312 : prev_blank = blank;
439 312 : convert &= convert_entire_line | blank;
440 : }
441 :
442 446 : if (c < 0)
443 : {
444 50 : free (pending_blank);
445 50 : return;
446 : }
447 :
448 396 : if (putchar (c) < 0)
449 0 : error (EXIT_FAILURE, errno, _("write error"));
450 : }
451 400 : while (c != '\n');
452 : }
453 : }
454 :
455 : int
456 83 : main (int argc, char **argv)
457 : {
458 83 : bool have_tabval = false;
459 : uintmax_t tabval IF_LINT (= 0);
460 : int c;
461 :
462 : /* If true, cancel the effect of any -a (explicit or implicit in -t),
463 : so that only leading blanks will be considered. */
464 83 : bool convert_first_only = false;
465 :
466 : initialize_main (&argc, &argv);
467 83 : program_name = argv[0];
468 83 : setlocale (LC_ALL, "");
469 : bindtextdomain (PACKAGE, LOCALEDIR);
470 : textdomain (PACKAGE);
471 :
472 83 : atexit (close_stdout);
473 :
474 83 : have_read_stdin = false;
475 83 : exit_status = EXIT_SUCCESS;
476 83 : convert_entire_line = false;
477 83 : tab_list = NULL;
478 83 : first_free_tab = 0;
479 :
480 215 : while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
481 : != -1)
482 : {
483 73 : switch (c)
484 : {
485 7 : case '?':
486 7 : usage (EXIT_FAILURE);
487 2 : case 'a':
488 2 : convert_entire_line = true;
489 2 : break;
490 29 : case 't':
491 29 : convert_entire_line = true;
492 29 : parse_tab_stops (optarg);
493 14 : break;
494 1 : case CONVERT_FIRST_ONLY_OPTION:
495 1 : convert_first_only = true;
496 1 : break;
497 15 : case ',':
498 15 : if (have_tabval)
499 6 : add_tab_stop (tabval);
500 15 : have_tabval = false;
501 15 : break;
502 1 : case_GETOPT_HELP_CHAR;
503 1 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
504 17 : default:
505 17 : if (!have_tabval)
506 : {
507 16 : tabval = 0;
508 16 : have_tabval = true;
509 : }
510 17 : if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
511 0 : error (EXIT_FAILURE, 0, _("tab stop value is too large"));
512 17 : break;
513 : }
514 : }
515 :
516 59 : if (convert_first_only)
517 1 : convert_entire_line = false;
518 :
519 59 : if (have_tabval)
520 10 : add_tab_stop (tabval);
521 :
522 59 : validate_tab_stops (tab_list, first_free_tab);
523 :
524 53 : if (first_free_tab == 0)
525 43 : tab_size = max_column_width = 8;
526 10 : else if (first_free_tab == 1)
527 8 : tab_size = tab_list[0];
528 : else
529 2 : tab_size = 0;
530 :
531 53 : file_list = (optind < argc ? &argv[optind] : stdin_argv);
532 :
533 53 : unexpand ();
534 :
535 53 : if (have_read_stdin && fclose (stdin) != 0)
536 0 : error (EXIT_FAILURE, errno, "-");
537 :
538 53 : exit (exit_status);
539 : }
|