LCOV - code coverage report
Current view: top level - src - unexpand.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 183 199 92.0 %
Date: 2018-01-30 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /* unexpand - convert blanks to tabs
       2             :    Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16             : 
      17             : /* By default, convert only maximal strings of initial blanks and tabs
      18             :    into tabs.
      19             :    Preserves backspace characters in the output; they decrement the
      20             :    column count for tab calculations.
      21             :    The default action is equivalent to -8.
      22             : 
      23             :    Options:
      24             :    --tabs=tab1[,tab2[,...]]
      25             :    -t tab1[,tab2[,...]]
      26             :    -tab1[,tab2[,...]]   If only one tab stop is given, set the tabs tab1
      27             :                         columns apart instead of the default 8.  Otherwise,
      28             :                         set the tabs at columns tab1, tab2, etc. (numbered from
      29             :                         0); preserve any blanks beyond the tab stops given.
      30             :    --all
      31             :    -a                   Use tabs wherever they would replace 2 or more blanks,
      32             :                         not just at the beginnings of lines.
      33             : 
      34             :    David MacKenzie <djm@gnu.ai.mit.edu> */
      35             : 
      36             : #include <config.h>
      37             : 
      38             : #include <stdio.h>
      39             : #include <getopt.h>
      40             : #include <sys/types.h>
      41             : #include "system.h"
      42             : #include "error.h"
      43             : #include "quote.h"
      44             : #include "xstrndup.h"
      45             : 
      46             : /* The official name of this program (e.g., no `g' prefix).  */
      47             : #define PROGRAM_NAME "unexpand"
      48             : 
      49             : #define AUTHORS "David MacKenzie"
      50             : 
      51             : /* The number of bytes added at a time to the amount of memory
      52             :    allocated for the output line.  */
      53             : #define OUTPUT_BLOCK 256
      54             : 
      55             : /* The name this program was run with.  */
      56             : char *program_name;
      57             : 
      58             : /* If true, convert blanks even after nonblank characters have been
      59             :    read on the line.  */
      60             : static bool convert_entire_line;
      61             : 
      62             : /* If nonzero, the size of all tab stops.  If zero, use `tab_list' instead.  */
      63             : static size_t tab_size;
      64             : 
      65             : /* The maximum distance between tab stops.  */
      66             : static size_t max_column_width;
      67             : 
      68             : /* Array of the explicit column numbers of the tab stops;
      69             :    after `tab_list' is exhausted, the rest of the line is printed
      70             :    unchanged.  The first column is column 0.  */
      71             : static uintmax_t *tab_list;
      72             : 
      73             : /* The number of allocated entries in `tab_list'.  */
      74             : static size_t n_tabs_allocated;
      75             : 
      76             : /* The index of the first invalid element of `tab_list',
      77             :    where the next element can be added.  */
      78             : static size_t first_free_tab;
      79             : 
      80             : /* Null-terminated array of input filenames.  */
      81             : static char **file_list;
      82             : 
      83             : /* Default for `file_list' if no files are given on the command line.  */
      84             : static char *stdin_argv[] =
      85             : {
      86             :   "-", NULL
      87             : };
      88             : 
      89             : /* True if we have ever read standard input.  */
      90             : static bool have_read_stdin;
      91             : 
      92             : /* The desired exit status.  */
      93             : static int exit_status;
      94             : 
      95             : /* For long options that have no equivalent short option, use a
      96             :    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
      97             : enum
      98             : {
      99             :   CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
     100             : };
     101             : 
     102             : static struct option const longopts[] =
     103             : {
     104             :   {"tabs", required_argument, NULL, 't'},
     105             :   {"all", no_argument, NULL, 'a'},
     106             :   {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
     107             :   {GETOPT_HELP_OPTION_DECL},
     108             :   {GETOPT_VERSION_OPTION_DECL},
     109             :   {NULL, 0, NULL, 0}
     110             : };
     111             : 
     112             : void
     113           8 : usage (int status)
     114             : {
     115           8 :   if (status != EXIT_SUCCESS)
     116           7 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     117             :              program_name);
     118             :   else
     119             :     {
     120           1 :       printf (_("\
     121             : Usage: %s [OPTION]... [FILE]...\n\
     122             : "),
     123             :               program_name);
     124           1 :       fputs (_("\
     125             : Convert blanks in each FILE to tabs, writing to standard output.\n\
     126             : With no FILE, or when FILE is -, read standard input.\n\
     127             : \n\
     128             : "), stdout);
     129           1 :       fputs (_("\
     130             : Mandatory arguments to long options are mandatory for short options too.\n\
     131             : "), stdout);
     132           1 :       fputs (_("\
     133             :   -a, --all        convert all blanks, instead of just initial blanks\n\
     134             :       --first-only  convert only leading sequences of blanks (overrides -a)\n\
     135             :   -t, --tabs=N     have tabs N characters apart instead of 8 (enables -a)\n\
     136             :   -t, --tabs=LIST  use comma separated LIST of tab positions (enables -a)\n\
     137             : "), stdout);
     138           1 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     139           1 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     140           1 :       emit_bug_reporting_address ();
     141             :     }
     142           8 :   exit (status);
     143             : }
     144             : 
     145             : /* Add tab stop TABVAL to the end of `tab_list'.  */
     146             : 
     147             : static void
     148          21 : add_tab_stop (uintmax_t tabval)
     149             : {
     150          21 :   uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
     151          21 :   uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
     152             : 
     153          21 :   if (first_free_tab == n_tabs_allocated)
     154          16 :     tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
     155          21 :   tab_list[first_free_tab++] = tabval;
     156             : 
     157          21 :   if (max_column_width < column_width)
     158             :     {
     159             :       if (SIZE_MAX < column_width)
     160             :         error (EXIT_FAILURE, 0, _("tabs are too far apart"));
     161          12 :       max_column_width = column_width;
     162             :     }
     163          21 : }
     164             : 
     165             : /* Add the comma or blank separated list of tab stops STOPS
     166             :    to the list of tab stops.  */
     167             : 
     168             : static void
     169          29 : parse_tab_stops (char const *stops)
     170             : {
     171          29 :   bool have_tabval = false;
     172             :   uintmax_t tabval IF_LINT (= 0);
     173             :   char const *num_start IF_LINT (= NULL);
     174          29 :   bool ok = true;
     175             : 
     176          44 :   for (; *stops; stops++)
     177             :     {
     178          30 :       if (*stops == ',' || isblank (to_uchar (*stops)))
     179             :         {
     180           9 :           if (have_tabval)
     181           1 :             add_tab_stop (tabval);
     182           9 :           have_tabval = false;
     183             :         }
     184          21 :       else if (ISDIGIT (*stops))
     185             :         {
     186           6 :           if (!have_tabval)
     187             :             {
     188           5 :               tabval = 0;
     189           5 :               have_tabval = true;
     190           5 :               num_start = stops;
     191             :             }
     192             : 
     193             :           /* Detect overflow.  */
     194           6 :           if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
     195             :             {
     196           0 :               size_t len = strspn (num_start, "0123456789");
     197           0 :               char *bad_num = xstrndup (num_start, len);
     198           0 :               error (0, 0, _("tab stop is too large %s"), quote (bad_num));
     199           0 :               free (bad_num);
     200           0 :               ok = false;
     201           0 :               stops = num_start + len - 1;
     202             :             }
     203             :         }
     204             :       else
     205             :         {
     206          15 :           error (0, 0, _("tab size contains invalid character(s): %s"),
     207             :                  quote (stops));
     208          15 :           ok = false;
     209          15 :           break;
     210             :         }
     211             :     }
     212             : 
     213          29 :   if (!ok)
     214          15 :     exit (EXIT_FAILURE);
     215             : 
     216          14 :   if (have_tabval)
     217           4 :     add_tab_stop (tabval);
     218          14 : }
     219             : 
     220             : /* Check that the list of tab stops TABS, with ENTRIES entries,
     221             :    contains only nonzero, ascending values.  */
     222             : 
     223             : static void
     224          59 : validate_tab_stops (uintmax_t const *tabs, size_t entries)
     225             : {
     226          59 :   uintmax_t prev_tab = 0;
     227             :   size_t i;
     228             : 
     229          73 :   for (i = 0; i < entries; i++)
     230             :     {
     231          20 :       if (tabs[i] == 0)
     232           5 :         error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
     233          15 :       if (tabs[i] <= prev_tab)
     234           1 :         error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
     235          14 :       prev_tab = tabs[i];
     236             :     }
     237          53 : }
     238             : 
     239             : /* Close the old stream pointer FP if it is non-NULL,
     240             :    and return a new one opened to read the next input file.
     241             :    Open a filename of `-' as the standard input.
     242             :    Return NULL if there are no more input files.  */
     243             : 
     244             : static FILE *
     245         118 : next_file (FILE *fp)
     246             : {
     247             :   static char *prev_file;
     248             :   char *file;
     249             : 
     250         118 :   if (fp)
     251             :     {
     252          65 :       if (ferror (fp))
     253             :         {
     254           5 :           error (0, errno, "%s", prev_file);
     255           5 :           exit_status = EXIT_FAILURE;
     256             :         }
     257          65 :       if (STREQ (prev_file, "-"))
     258          59 :         clearerr (fp);          /* Also clear EOF.  */
     259           6 :       else if (fclose (fp) != 0)
     260             :         {
     261           0 :           error (0, errno, "%s", prev_file);
     262           0 :           exit_status = EXIT_FAILURE;
     263             :         }
     264             :     }
     265             : 
     266         249 :   while ((file = *file_list++) != NULL)
     267             :     {
     268          78 :       if (STREQ (file, "-"))
     269             :         {
     270          59 :           have_read_stdin = true;
     271          59 :           prev_file = file;
     272          59 :           return stdin;
     273             :         }
     274          19 :       fp = fopen (file, "r");
     275          19 :       if (fp)
     276             :         {
     277           6 :           prev_file = file;
     278           6 :           return fp;
     279             :         }
     280          13 :       error (0, errno, "%s", file);
     281          13 :       exit_status = EXIT_FAILURE;
     282             :     }
     283          53 :   return NULL;
     284             : }
     285             : 
     286             : /* Change blanks to tabs, writing to stdout.
     287             :    Read each file in `file_list', in order.  */
     288             : 
     289             : static void
     290          53 : unexpand (void)
     291             : {
     292             :   /* Input stream.  */
     293          53 :   FILE *fp = next_file (NULL);
     294             : 
     295             :   /* The array of pending blanks.  In non-POSIX locales, blanks can
     296             :      include characters other than spaces, so the blanks must be
     297             :      stored, not merely counted.  */
     298             :   char *pending_blank;
     299             : 
     300          53 :   if (!fp)
     301           3 :     return;
     302             : 
     303             :   /* The worst case is a non-blank character, then one blank, then a
     304             :      tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
     305             :      allocate MAX_COLUMN_WIDTH bytes to store the blanks.  */
     306          50 :   pending_blank = xmalloc (max_column_width);
     307             : 
     308             :   for (;;)
     309         131 :     {
     310             :       /* Input character, or EOF.  */
     311             :       int c;
     312             : 
     313             :       /* If true, perform translations.  */
     314         181 :       bool convert = true;
     315             : 
     316             : 
     317             :       /* The following variables have valid values only when CONVERT
     318             :          is true:  */
     319             : 
     320             :       /* Column of next input character.  */
     321         181 :       uintmax_t column = 0;
     322             : 
     323             :       /* Column the next input tab stop is on.  */
     324         181 :       uintmax_t next_tab_column = 0;
     325             : 
     326             :       /* Index in TAB_LIST of next tab stop to examine.  */
     327         181 :       size_t tab_index = 0;
     328             : 
     329             :       /* If true, the first pending blank came just before a tab stop.  */
     330         181 :       bool one_blank_before_tab_stop = false;
     331             : 
     332             :       /* If true, the previous input character was a blank.  This is
     333             :          initially true, since initial strings of blanks are treated
     334             :          as if the line was preceded by a blank.  */
     335         181 :       bool prev_blank = true;
     336             : 
     337             :       /* Number of pending columns of blanks.  */
     338         181 :       size_t pending = 0;
     339             : 
     340             : 
     341             :       /* Convert a line of text.  */
     342             : 
     343             :       do
     344             :         {
     345         915 :           while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
     346          15 :             continue;
     347             : 
     348         450 :           if (convert)
     349             :             {
     350         316 :               bool blank = !! isblank (c);
     351             : 
     352         316 :               if (blank)
     353             :                 {
     354          11 :                   if (next_tab_column <= column)
     355             :                     {
     356          10 :                       if (tab_size)
     357           9 :                         next_tab_column =
     358           9 :                           column + (tab_size - column % tab_size);
     359             :                       else
     360             :                         for (;;)
     361           1 :                           if (tab_index == first_free_tab)
     362             :                             {
     363           0 :                               convert = false;
     364           0 :                               break;
     365             :                             }
     366             :                           else
     367             :                             {
     368           1 :                               uintmax_t tab = tab_list[tab_index++];
     369           1 :                               if (column < tab)
     370             :                                 {
     371           1 :                                   next_tab_column = tab;
     372           1 :                                   break;
     373             :                                 }
     374             :                             }
     375             :                     }
     376             : 
     377          11 :                   if (convert)
     378             :                     {
     379          11 :                       if (next_tab_column < column)
     380           0 :                         error (EXIT_FAILURE, 0, _("input line is too long"));
     381             : 
     382          11 :                       if (c == '\t')
     383             :                         {
     384           6 :                           column = next_tab_column;
     385             : 
     386             :                           /* Discard pending blanks, unless it was a single
     387             :                              blank just before the previous tab stop.  */
     388           6 :                           if (! (pending == 1 && one_blank_before_tab_stop))
     389             :                             {
     390           5 :                               pending = 0;
     391           5 :                               one_blank_before_tab_stop = false;
     392             :                             }
     393             :                         }
     394             :                       else
     395             :                         {
     396           5 :                           column++;
     397             : 
     398           5 :                           if (! (prev_blank && column == next_tab_column))
     399             :                             {
     400             :                               /* It is not yet known whether the pending blanks
     401             :                                  will be replaced by tabs.  */
     402           4 :                               if (column == next_tab_column)
     403           1 :                                 one_blank_before_tab_stop = true;
     404           4 :                               pending_blank[pending++] = c;
     405           4 :                               prev_blank = true;
     406           4 :                               continue;
     407             :                             }
     408             : 
     409             :                           /* Replace the pending blanks by a tab or two.  */
     410           1 :                           pending_blank[0] = c = '\t';
     411           1 :                           pending = one_blank_before_tab_stop;
     412             :                         }
     413             :                     }
     414             :                 }
     415         305 :               else if (c == '\b')
     416             :                 {
     417             :                   /* Go back one column, and force recalculation of the
     418             :                      next tab stop.  */
     419         250 :                   column -= !!column;
     420         250 :                   next_tab_column = column;
     421         250 :                   tab_index -= !!tab_index;
     422             :                 }
     423             :               else
     424             :                 {
     425          55 :                   column++;
     426          55 :                   if (!column)
     427           0 :                     error (EXIT_FAILURE, 0, _("input line is too long"));
     428             :                 }
     429             : 
     430         312 :               if (pending)
     431             :                 {
     432           3 :                   if (fwrite (pending_blank, 1, pending, stdout) != pending)
     433           0 :                     error (EXIT_FAILURE, errno, _("write error"));
     434           3 :                   pending = 0;
     435           3 :                   one_blank_before_tab_stop = false;
     436             :                 }
     437             : 
     438         312 :               prev_blank = blank;
     439         312 :               convert &= convert_entire_line | blank;
     440             :             }
     441             : 
     442         446 :           if (c < 0)
     443             :             {
     444          50 :               free (pending_blank);
     445          50 :               return;
     446             :             }
     447             : 
     448         396 :           if (putchar (c) < 0)
     449           0 :             error (EXIT_FAILURE, errno, _("write error"));
     450             :         }
     451         400 :       while (c != '\n');
     452             :     }
     453             : }
     454             : 
     455             : int
     456          83 : main (int argc, char **argv)
     457             : {
     458          83 :   bool have_tabval = false;
     459             :   uintmax_t tabval IF_LINT (= 0);
     460             :   int c;
     461             : 
     462             :   /* If true, cancel the effect of any -a (explicit or implicit in -t),
     463             :      so that only leading blanks will be considered.  */
     464          83 :   bool convert_first_only = false;
     465             : 
     466             :   initialize_main (&argc, &argv);
     467          83 :   program_name = argv[0];
     468          83 :   setlocale (LC_ALL, "");
     469             :   bindtextdomain (PACKAGE, LOCALEDIR);
     470             :   textdomain (PACKAGE);
     471             : 
     472          83 :   atexit (close_stdout);
     473             : 
     474          83 :   have_read_stdin = false;
     475          83 :   exit_status = EXIT_SUCCESS;
     476          83 :   convert_entire_line = false;
     477          83 :   tab_list = NULL;
     478          83 :   first_free_tab = 0;
     479             : 
     480         215 :   while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
     481             :          != -1)
     482             :     {
     483          73 :       switch (c)
     484             :         {
     485           7 :         case '?':
     486           7 :           usage (EXIT_FAILURE);
     487           2 :         case 'a':
     488           2 :           convert_entire_line = true;
     489           2 :           break;
     490          29 :         case 't':
     491          29 :           convert_entire_line = true;
     492          29 :           parse_tab_stops (optarg);
     493          14 :           break;
     494           1 :         case CONVERT_FIRST_ONLY_OPTION:
     495           1 :           convert_first_only = true;
     496           1 :           break;
     497          15 :         case ',':
     498          15 :           if (have_tabval)
     499           6 :             add_tab_stop (tabval);
     500          15 :           have_tabval = false;
     501          15 :           break;
     502           1 :         case_GETOPT_HELP_CHAR;
     503           1 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     504          17 :         default:
     505          17 :           if (!have_tabval)
     506             :             {
     507          16 :               tabval = 0;
     508          16 :               have_tabval = true;
     509             :             }
     510          17 :           if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
     511           0 :             error (EXIT_FAILURE, 0, _("tab stop value is too large"));
     512          17 :           break;
     513             :         }
     514             :     }
     515             : 
     516          59 :   if (convert_first_only)
     517           1 :     convert_entire_line = false;
     518             : 
     519          59 :   if (have_tabval)
     520          10 :     add_tab_stop (tabval);
     521             : 
     522          59 :   validate_tab_stops (tab_list, first_free_tab);
     523             : 
     524          53 :   if (first_free_tab == 0)
     525          43 :     tab_size = max_column_width = 8;
     526          10 :   else if (first_free_tab == 1)
     527           8 :     tab_size = tab_list[0];
     528             :   else
     529           2 :     tab_size = 0;
     530             : 
     531          53 :   file_list = (optind < argc ? &argv[optind] : stdin_argv);
     532             : 
     533          53 :   unexpand ();
     534             : 
     535          53 :   if (have_read_stdin && fclose (stdin) != 0)
     536           0 :     error (EXIT_FAILURE, errno, "-");
     537             : 
     538          53 :   exit (exit_status);
     539             : }

Generated by: LCOV version 1.10