LCOV - code coverage report
Current view: top level - src - cut.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 263 302 87.1 %
Date: 2018-01-30 Functions: 11 15 73.3 %

          Line data    Source code
       1             : /* cut - remove parts of lines of files
       2             :    Copyright (C) 1997-2007 Free Software Foundation, Inc.
       3             :    Copyright (C) 1984 David M. Ihnat
       4             : 
       5             :    This program is free software: you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation, either version 3 of the License, or
       8             :    (at your option) any later version.
       9             : 
      10             :    This program is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU General Public License
      16             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      17             : 
      18             : /* Written by David Ihnat.  */
      19             : 
      20             : /* POSIX changes, bug fixes, long-named options, and cleanup
      21             :    by David MacKenzie <djm@gnu.ai.mit.edu>.
      22             : 
      23             :    Rewrite cut_fields and cut_bytes -- Jim Meyering.  */
      24             : 
      25             : #include <config.h>
      26             : 
      27             : #include <stdio.h>
      28             : #include <assert.h>
      29             : #include <getopt.h>
      30             : #include <sys/types.h>
      31             : #include "system.h"
      32             : 
      33             : #include "error.h"
      34             : #include "getndelim2.h"
      35             : #include "hash.h"
      36             : #include "quote.h"
      37             : #include "xstrndup.h"
      38             : 
      39             : /* The official name of this program (e.g., no `g' prefix).  */
      40             : #define PROGRAM_NAME "cut"
      41             : 
      42             : #define AUTHORS "David Ihnat", "David MacKenzie", "Jim Meyering"
      43             : 
      44             : #define FATAL_ERROR(Message)                                            \
      45             :   do                                                                    \
      46             :     {                                                                   \
      47             :       error (0, 0, (Message));                                          \
      48             :       usage (EXIT_FAILURE);                                             \
      49             :     }                                                                   \
      50             :   while (0)
      51             : 
      52             : /* Append LOW, HIGH to the list RP of range pairs, allocating additional
      53             :    space if necessary.  Update local variable N_RP.  When allocating,
      54             :    update global variable N_RP_ALLOCATED.  */
      55             : 
      56             : #define ADD_RANGE_PAIR(rp, low, high)                   \
      57             :   do                                                    \
      58             :     {                                                   \
      59             :       if (low == 0 || high == 0)                        \
      60             :         FATAL_ERROR (_("fields and positions are numbered from 1")); \
      61             :       if (n_rp >= n_rp_allocated)                    \
      62             :         {                                               \
      63             :           (rp) = X2NREALLOC (rp, &n_rp_allocated);  \
      64             :         }                                               \
      65             :       rp[n_rp].lo = (low);                              \
      66             :       rp[n_rp].hi = (high);                             \
      67             :       ++n_rp;                                           \
      68             :     }                                                   \
      69             :   while (0)
      70             : 
      71             : struct range_pair
      72             :   {
      73             :     size_t lo;
      74             :     size_t hi;
      75             :   };
      76             : 
      77             : /* This buffer is used to support the semantics of the -s option
      78             :    (or lack of same) when the specified field list includes (does
      79             :    not include) the first field.  In both of those cases, the entire
      80             :    first field must be read into this buffer to determine whether it
      81             :    is followed by a delimiter or a newline before any of it may be
      82             :    output.  Otherwise, cut_fields can do the job without using this
      83             :    buffer.  */
      84             : static char *field_1_buffer;
      85             : 
      86             : /* The number of bytes allocated for FIELD_1_BUFFER.  */
      87             : static size_t field_1_bufsize;
      88             : 
      89             : /* The largest field or byte index used as an endpoint of a closed
      90             :    or degenerate range specification;  this doesn't include the starting
      91             :    index of right-open-ended ranges.  For example, with either range spec
      92             :    `2-5,9-', `2-3,5,9-' this variable would be set to 5.  */
      93             : static size_t max_range_endpoint;
      94             : 
      95             : /* If nonzero, this is the index of the first field in a range that goes
      96             :    to end of line. */
      97             : static size_t eol_range_start;
      98             : 
      99             : /* This is a bit vector.
     100             :    In byte mode, which bytes to output.
     101             :    In field mode, which DELIM-separated fields to output.
     102             :    Both bytes and fields are numbered starting with 1,
     103             :    so the zeroth bit of this array is unused.
     104             :    A field or byte K has been selected if
     105             :    (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
     106             :     || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START).  */
     107             : static unsigned char *printable_field;
     108             : 
     109             : enum operating_mode
     110             :   {
     111             :     undefined_mode,
     112             : 
     113             :     /* Output characters that are in the given bytes. */
     114             :     byte_mode,
     115             : 
     116             :     /* Output the given delimeter-separated fields. */
     117             :     field_mode
     118             :   };
     119             : 
     120             : /* The name this program was run with. */
     121             : char *program_name;
     122             : 
     123             : static enum operating_mode operating_mode;
     124             : 
     125             : /* If true do not output lines containing no delimeter characters.
     126             :    Otherwise, all such lines are printed.  This option is valid only
     127             :    with field mode.  */
     128             : static bool suppress_non_delimited;
     129             : 
     130             : /* If nonzero, print all bytes, characters, or fields _except_
     131             :    those that were specified.  */
     132             : static bool complement;
     133             : 
     134             : /* The delimeter character for field mode. */
     135             : static unsigned char delim;
     136             : 
     137             : /* True if the --output-delimiter=STRING option was specified.  */
     138             : static bool output_delimiter_specified;
     139             : 
     140             : /* The length of output_delimiter_string.  */
     141             : static size_t output_delimiter_length;
     142             : 
     143             : /* The output field separator string.  Defaults to the 1-character
     144             :    string consisting of the input delimiter.  */
     145             : static char *output_delimiter_string;
     146             : 
     147             : /* True if we have ever read standard input. */
     148             : static bool have_read_stdin;
     149             : 
     150             : #define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
     151             : 
     152             : /* The set of range-start indices.  For example, given a range-spec list like
     153             :    `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
     154             :    Note that although `4' looks like a range-start index, it is in the middle
     155             :    of the `3-5' range, so it doesn't count.
     156             :    This table is created/used IFF output_delimiter_specified is set.  */
     157             : static Hash_table *range_start_ht;
     158             : 
     159             : /* For long options that have no equivalent short option, use a
     160             :    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
     161             : enum
     162             : {
     163             :   OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
     164             :   COMPLEMENT_OPTION
     165             : };
     166             : 
     167             : static struct option const longopts[] =
     168             : {
     169             :   {"bytes", required_argument, NULL, 'b'},
     170             :   {"characters", required_argument, NULL, 'c'},
     171             :   {"fields", required_argument, NULL, 'f'},
     172             :   {"delimiter", required_argument, NULL, 'd'},
     173             :   {"only-delimited", no_argument, NULL, 's'},
     174             :   {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
     175             :   {"complement", no_argument, NULL, COMPLEMENT_OPTION},
     176             :   {GETOPT_HELP_OPTION_DECL},
     177             :   {GETOPT_VERSION_OPTION_DECL},
     178             :   {NULL, 0, NULL, 0}
     179             : };
     180             : 
     181             : void
     182          56 : usage (int status)
     183             : {
     184          56 :   if (status != EXIT_SUCCESS)
     185          55 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     186             :              program_name);
     187             :   else
     188             :     {
     189           1 :       printf (_("\
     190             : Usage: %s OPTION... [FILE]...\n\
     191             : "),
     192             :               program_name);
     193           1 :       fputs (_("\
     194             : Print selected parts of lines from each FILE to standard output.\n\
     195             : \n\
     196             : "), stdout);
     197           1 :       fputs (_("\
     198             : Mandatory arguments to long options are mandatory for short options too.\n\
     199             : "), stdout);
     200           1 :       fputs (_("\
     201             :   -b, --bytes=LIST        select only these bytes\n\
     202             :   -c, --characters=LIST   select only these characters\n\
     203             :   -d, --delimiter=DELIM   use DELIM instead of TAB for field delimiter\n\
     204             : "), stdout);
     205           1 :       fputs (_("\
     206             :   -f, --fields=LIST       select only these fields;  also print any line\n\
     207             :                             that contains no delimiter character, unless\n\
     208             :                             the -s option is specified\n\
     209             :   -n                      (ignored)\n\
     210             : "), stdout);
     211           1 :       fputs (_("\
     212             :       --complement        complement the set of selected bytes, characters\n\
     213             :                             or fields.\n\
     214             : "), stdout);
     215           1 :       fputs (_("\
     216             :   -s, --only-delimited    do not print lines not containing delimiters\n\
     217             :       --output-delimiter=STRING  use STRING as the output delimiter\n\
     218             :                             the default is to use the input delimiter\n\
     219             : "), stdout);
     220           1 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     221           1 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     222           1 :       fputs (_("\
     223             : \n\
     224             : Use one, and only one of -b, -c or -f.  Each LIST is made up of one\n\
     225             : range, or many ranges separated by commas.  Selected input is written\n\
     226             : in the same order that it is read, and is written exactly once.\n\
     227             : "), stdout);
     228           1 :       fputs (_("\
     229             : Each range is one of:\n\
     230             : \n\
     231             :   N     N'th byte, character or field, counted from 1\n\
     232             :   N-    from N'th byte, character or field, to end of line\n\
     233             :   N-M   from N'th to M'th (included) byte, character or field\n\
     234             :   -M    from first to M'th (included) byte, character or field\n\
     235             : \n\
     236             : With no FILE, or when FILE is -, read standard input.\n\
     237             : "), stdout);
     238           1 :       emit_bug_reporting_address ();
     239             :     }
     240          56 :   exit (status);
     241             : }
     242             : 
     243             : static inline void
     244           0 : mark_range_start (size_t i)
     245             : {
     246             :   /* Record the fact that `i' is a range-start index.  */
     247           0 :   void *ent_from_table = hash_insert (range_start_ht, (void*) i);
     248           0 :   if (ent_from_table == NULL)
     249             :     {
     250             :       /* Insertion failed due to lack of memory.  */
     251           0 :       xalloc_die ();
     252             :     }
     253           0 :   assert ((size_t) ent_from_table == i);
     254           0 : }
     255             : 
     256             : static inline void
     257          59 : mark_printable_field (size_t i)
     258             : {
     259          59 :   size_t n = i / CHAR_BIT;
     260          59 :   printable_field[n] |= (1 << (i % CHAR_BIT));
     261          59 : }
     262             : 
     263             : static inline bool
     264          47 : is_printable_field (size_t i)
     265             : {
     266          47 :   size_t n = i / CHAR_BIT;
     267          47 :   return (printable_field[n] >> (i % CHAR_BIT)) & 1;
     268             : }
     269             : 
     270             : static size_t
     271           0 : hash_int (const void *x, size_t tablesize)
     272             : {
     273             : #ifdef UINTPTR_MAX
     274           0 :   uintptr_t y = (uintptr_t) x;
     275             : #else
     276             :   size_t y = (size_t) x;
     277             : #endif
     278           0 :   return y % tablesize;
     279             : }
     280             : 
     281             : static bool
     282           0 : hash_compare_ints (void const *x, void const *y)
     283             : {
     284           0 :   return (x == y) ? true : false;
     285             : }
     286             : 
     287             : static bool
     288           0 : is_range_start_index (size_t i)
     289             : {
     290           0 :   return hash_lookup (range_start_ht, (void *) i) ? true : false;
     291             : }
     292             : 
     293             : /* Return nonzero if the K'th field or byte is printable.
     294             :    When returning nonzero, if RANGE_START is non-NULL,
     295             :    set *RANGE_START to true if K is the beginning of a range, and to
     296             :    false otherwise.  */
     297             : 
     298             : static bool
     299         173 : print_kth (size_t k, bool *range_start)
     300             : {
     301         173 :   bool k_selected
     302         276 :     = ((0 < eol_range_start && eol_range_start <= k)
     303         253 :        || (k <= max_range_endpoint && is_printable_field (k)));
     304             : 
     305         173 :   bool is_selected = k_selected ^ complement;
     306         173 :   if (range_start && is_selected)
     307           0 :     *range_start = is_range_start_index (k);
     308             : 
     309         173 :   return is_selected;
     310             : }
     311             : 
     312             : /* Comparison function for qsort to order the list of
     313             :    struct range_pairs.  */
     314             : static int
     315           9 : compare_ranges (const void *a, const void *b)
     316             : {
     317           9 :   int a_start = ((const struct range_pair *) a)->lo;
     318           9 :   int b_start = ((const struct range_pair *) b)->lo;
     319           9 :   return a_start < b_start ? -1 : a_start > b_start;
     320             : }
     321             : 
     322             : /* Given the list of field or byte range specifications FIELDSTR, set
     323             :    MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
     324             :    array.  If there is a right-open-ended range, set EOL_RANGE_START
     325             :    to its starting index.  FIELDSTR should be composed of one or more
     326             :    numbers or ranges of numbers, separated by blanks or commas.
     327             :    Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
     328             :    through end of line.  Return true if FIELDSTR contains at least
     329             :    one field specification, false otherwise.  */
     330             : 
     331             : /* FIXME-someday:  What if the user wants to cut out the 1,000,000-th
     332             :    field of some huge input file?  This function shouldn't have to
     333             :    allocate a table of a million bits just so we can test every
     334             :    field < 10^6 with an array dereference.  Instead, consider using
     335             :    an adaptive approach: if the range of selected fields is too large,
     336             :    but only a few fields/byte-offsets are actually selected, use a
     337             :    hash table.  If the range of selected fields is too large, and
     338             :    too many are selected, then resort to using the range-pairs (the
     339             :    `rp' array) directly.  */
     340             : 
     341             : static bool
     342          68 : set_fields (const char *fieldstr)
     343             : {
     344          68 :   size_t initial = 1;           /* Value of first number in a range.  */
     345          68 :   size_t value = 0;             /* If nonzero, a number being accumulated.  */
     346          68 :   bool lhs_specified = false;
     347          68 :   bool rhs_specified = false;
     348          68 :   bool dash_found = false;      /* True if a '-' is found in this field.  */
     349          68 :   bool field_found = false;     /* True if at least one field spec
     350             :                                    has been processed.  */
     351             : 
     352          68 :   struct range_pair *rp = NULL;
     353          68 :   size_t n_rp = 0;
     354          68 :   size_t n_rp_allocated = 0;
     355             :   size_t i;
     356          68 :   bool in_digits = false;
     357             : 
     358             :   /* Collect and store in RP the range end points.
     359             :      It also sets EOL_RANGE_START if appropriate.  */
     360             : 
     361             :   for (;;)
     362             :     {
     363         412 :       if (*fieldstr == '-')
     364             :         {
     365          85 :           in_digits = false;
     366             :           /* Starting a range. */
     367          85 :           if (dash_found)
     368          12 :             FATAL_ERROR (_("invalid byte or field list"));
     369          73 :           dash_found = true;
     370          73 :           fieldstr++;
     371             : 
     372          73 :           initial = (lhs_specified ? value : 1);
     373          73 :           value = 0;
     374             :         }
     375         155 :       else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
     376             :         {
     377          76 :           in_digits = false;
     378             :           /* Ending the string, or this field/byte sublist. */
     379          76 :           if (dash_found)
     380             :             {
     381          61 :               dash_found = false;
     382             : 
     383          61 :               if (!lhs_specified && !rhs_specified)
     384           2 :                 FATAL_ERROR (_("invalid range with no endpoint: -"));
     385             : 
     386             :               /* A range.  Possibilities: -n, m-n, n-.
     387             :                  In any case, `initial' contains the start of the range. */
     388          59 :               if (!rhs_specified)
     389             :                 {
     390             :                   /* `n-'.  From `initial' to end of line. */
     391          32 :                   eol_range_start = initial;
     392          32 :                   field_found = true;
     393             :                 }
     394             :               else
     395             :                 {
     396             :                   /* `m-n' or `-n' (1-n). */
     397          27 :                   if (value < initial)
     398           1 :                     FATAL_ERROR (_("invalid decreasing range"));
     399             : 
     400             :                   /* Is there already a range going to end of line? */
     401          26 :                   if (eol_range_start != 0)
     402             :                     {
     403             :                       /* Yes.  Is the new sequence already contained
     404             :                          in the old one?  If so, no processing is
     405             :                          necessary. */
     406           5 :                       if (initial < eol_range_start)
     407             :                         {
     408             :                           /* No, the new sequence starts before the
     409             :                              old.  Does the old range going to end of line
     410             :                              extend into the new range?  */
     411           4 :                           if (eol_range_start <= value)
     412             :                             {
     413             :                               /* Yes.  Simply move the end of line marker. */
     414           1 :                               eol_range_start = initial;
     415             :                             }
     416             :                           else
     417             :                             {
     418             :                               /* No.  A simple range, before and disjoint from
     419             :                                  the range going to end of line.  Fill it. */
     420           3 :                               ADD_RANGE_PAIR (rp, initial, value);
     421             :                             }
     422             : 
     423             :                           /* In any case, some fields were selected. */
     424           3 :                           field_found = true;
     425             :                         }
     426             :                     }
     427             :                   else
     428             :                     {
     429             :                       /* There is no range going to end of line. */
     430          21 :                       ADD_RANGE_PAIR (rp, initial, value);
     431          20 :                       field_found = true;
     432             :                     }
     433          24 :                   value = 0;
     434             :                 }
     435             :             }
     436             :           else
     437             :             {
     438             :               /* A simple field number, not a range. */
     439          15 :               ADD_RANGE_PAIR (rp, value, value);
     440          10 :               value = 0;
     441          10 :               field_found = true;
     442             :             }
     443             : 
     444          66 :           if (*fieldstr == '\0')
     445             :             {
     446          45 :               break;
     447             :             }
     448             : 
     449          21 :           fieldstr++;
     450          21 :           lhs_specified = false;
     451          21 :           rhs_specified = false;
     452             :         }
     453          79 :       else if (ISDIGIT (*fieldstr))
     454             :         {
     455             :           /* Record beginning of digit string, in case we have to
     456             :              complain about it.  */
     457             :           static char const *num_start;
     458          78 :           if (!in_digits || !num_start)
     459          74 :             num_start = fieldstr;
     460          78 :           in_digits = true;
     461             : 
     462          78 :           if (dash_found)
     463          28 :             rhs_specified = 1;
     464             :           else
     465          50 :             lhs_specified = 1;
     466             : 
     467             :           /* Detect overflow.  */
     468          78 :           if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
     469             :             {
     470             :               /* In case the user specified -c$(echo 2^64|bc),22,
     471             :                  complain only about the first number.  */
     472             :               /* Determine the length of the offending number.  */
     473           0 :               size_t len = strspn (num_start, "0123456789");
     474           0 :               char *bad_num = xstrndup (num_start, len);
     475           0 :               if (operating_mode == byte_mode)
     476           0 :                 error (0, 0,
     477             :                        _("byte offset %s is too large"), quote (bad_num));
     478             :               else
     479           0 :                 error (0, 0,
     480             :                        _("field number %s is too large"), quote (bad_num));
     481           0 :               free (bad_num);
     482           0 :               exit (EXIT_FAILURE);
     483             :             }
     484             : 
     485          78 :           fieldstr++;
     486             :         }
     487             :       else
     488           1 :         FATAL_ERROR (_("invalid byte or field list"));
     489             :     }
     490             : 
     491          45 :   max_range_endpoint = 0;
     492          73 :   for (i = 0; i < n_rp; i++)
     493             :     {
     494          28 :       if (rp[i].hi > max_range_endpoint)
     495          27 :         max_range_endpoint = rp[i].hi;
     496             :     }
     497             : 
     498             :   /* Allocate an array large enough so that it may be indexed by
     499             :      the field numbers corresponding to all finite ranges
     500             :      (i.e. `2-6' or `-4', but not `5-') in FIELDSTR.  */
     501             : 
     502          45 :   printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
     503             : 
     504          45 :   qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
     505             : 
     506             :   /* Set the array entries corresponding to integers in the ranges of RP.  */
     507          73 :   for (i = 0; i < n_rp; i++)
     508             :     {
     509             :       size_t j;
     510             :       size_t rsi_candidate;
     511             : 
     512             :       /* Record the range-start indices, i.e., record each start
     513             :          index that is not part of any other (lo..hi] range.  */
     514          28 :       rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
     515          28 :       if (output_delimiter_specified
     516           0 :           && !is_printable_field (rsi_candidate))
     517           0 :         mark_range_start (rsi_candidate);
     518             : 
     519          87 :       for (j = rp[i].lo; j <= rp[i].hi; j++)
     520          59 :         mark_printable_field (j);
     521             :     }
     522             : 
     523          45 :   if (output_delimiter_specified
     524           0 :       && !complement
     525           0 :       && eol_range_start && !is_printable_field (eol_range_start))
     526           0 :     mark_range_start (eol_range_start);
     527             : 
     528          45 :   free (rp);
     529             : 
     530          45 :   return field_found;
     531             : }
     532             : 
     533             : /* Read from stream STREAM, printing to standard output any selected bytes.  */
     534             : 
     535             : static void
     536          26 : cut_bytes (FILE *stream)
     537             : {
     538             :   size_t byte_idx;      /* Number of bytes in the line so far. */
     539             :   /* Whether to begin printing delimiters between ranges for the current line.
     540             :      Set after we've begun printing data corresponding to the first range.  */
     541             :   bool print_delimiter;
     542             : 
     543          26 :   byte_idx = 0;
     544          26 :   print_delimiter = false;
     545             :   while (1)
     546         168 :     {
     547             :       int c;            /* Each character from the file. */
     548             : 
     549         194 :       c = getc (stream);
     550             : 
     551         194 :       if (c == '\n')
     552             :         {
     553         153 :           putchar ('\n');
     554         153 :           byte_idx = 0;
     555         153 :           print_delimiter = false;
     556             :         }
     557          41 :       else if (c == EOF)
     558             :         {
     559          26 :           if (byte_idx > 0)
     560           2 :             putchar ('\n');
     561          26 :           break;
     562             :         }
     563             :       else
     564             :         {
     565             :           bool range_start;
     566          15 :           bool *rs = output_delimiter_specified ? &range_start : NULL;
     567          15 :           if (print_kth (++byte_idx, rs))
     568             :             {
     569           5 :               if (rs && *rs && print_delimiter)
     570             :                 {
     571           0 :                   fwrite (output_delimiter_string, sizeof (char),
     572             :                           output_delimiter_length, stdout);
     573             :                 }
     574           5 :               print_delimiter = true;
     575           5 :               putchar (c);
     576             :             }
     577             :         }
     578             :     }
     579          26 : }
     580             : 
     581             : /* Read from stream STREAM, printing to standard output any selected fields.  */
     582             : 
     583             : static void
     584          18 : cut_fields (FILE *stream)
     585             : {
     586             :   int c;
     587          18 :   size_t field_idx = 1;
     588          18 :   bool found_any_selected_field = false;
     589             :   bool buffer_first_field;
     590             : 
     591          18 :   c = getc (stream);
     592          18 :   if (c == EOF)
     593           1 :     return;
     594             : 
     595          17 :   ungetc (c, stream);
     596             : 
     597             :   /* To support the semantics of the -s flag, we may have to buffer
     598             :      all of the first field to determine whether it is `delimited.'
     599             :      But that is unnecessary if all non-delimited lines must be printed
     600             :      and the first field has been selected, or if non-delimited lines
     601             :      must be suppressed and the first field has *not* been selected.
     602             :      That is because a non-delimited line has exactly one field.  */
     603          17 :   buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
     604             : 
     605             :   while (1)
     606             :     {
     607         263 :       if (field_idx == 1 && buffer_first_field)
     608             :         {
     609             :           ssize_t len;
     610             :           size_t n_bytes;
     611             : 
     612          15 :           len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
     613             :                             GETNLINE_NO_LIMIT, delim, '\n', stream);
     614          15 :           if (len < 0)
     615             :             {
     616           2 :               free (field_1_buffer);
     617           2 :               field_1_buffer = NULL;
     618           2 :               if (ferror (stream) || feof (stream))
     619             :                 break;
     620           0 :               xalloc_die ();
     621             :             }
     622             : 
     623          13 :           n_bytes = len;
     624          13 :           assert (n_bytes != 0);
     625             : 
     626             :           /* If the first field extends to the end of line (it is not
     627             :              delimited) and we are printing all non-delimited lines,
     628             :              print this one.  */
     629          13 :           if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
     630             :             {
     631           5 :               if (suppress_non_delimited)
     632             :                 {
     633             :                   /* Empty.  */
     634             :                 }
     635             :               else
     636             :                 {
     637           5 :                   fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
     638             :                   /* Make sure the output line is newline terminated.  */
     639           5 :                   if (field_1_buffer[n_bytes - 1] != '\n')
     640           1 :                     putchar ('\n');
     641             :                 }
     642           5 :               continue;
     643             :             }
     644           8 :           if (print_kth (1, NULL))
     645             :             {
     646             :               /* Print the field, but not the trailing delimiter.  */
     647           1 :               fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
     648           1 :               found_any_selected_field = true;
     649             :             }
     650           8 :           ++field_idx;
     651             :         }
     652             : 
     653         133 :       if (c != EOF)
     654             :         {
     655         133 :           if (print_kth (field_idx, NULL))
     656             :             {
     657         103 :               if (found_any_selected_field)
     658             :                 {
     659          84 :                   fwrite (output_delimiter_string, sizeof (char),
     660             :                           output_delimiter_length, stdout);
     661             :                 }
     662         103 :               found_any_selected_field = true;
     663             : 
     664         208 :               while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
     665             :                 {
     666           2 :                   putchar (c);
     667             :                 }
     668             :             }
     669             :           else
     670             :             {
     671          30 :               while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
     672             :                 {
     673             :                   /* Empty.  */
     674             :                 }
     675             :             }
     676             :         }
     677             : 
     678         133 :       if (c == '\n')
     679             :         {
     680           9 :           c = getc (stream);
     681           9 :           if (c != EOF)
     682             :             {
     683           8 :               ungetc (c, stream);
     684           8 :               c = '\n';
     685             :             }
     686             :         }
     687             : 
     688         133 :       if (c == delim)
     689         110 :         ++field_idx;
     690          23 :       else if (c == '\n' || c == EOF)
     691             :         {
     692          23 :           if (found_any_selected_field
     693           3 :               || !(suppress_non_delimited && field_idx == 1))
     694          22 :             putchar ('\n');
     695          23 :           if (c == EOF)
     696          15 :             break;
     697           8 :           field_idx = 1;
     698           8 :           found_any_selected_field = false;
     699             :         }
     700             :     }
     701             : }
     702             : 
     703             : static void
     704          44 : cut_stream (FILE *stream)
     705             : {
     706          44 :   if (operating_mode == byte_mode)
     707          26 :     cut_bytes (stream);
     708             :   else
     709          18 :     cut_fields (stream);
     710          44 : }
     711             : 
     712             : /* Process file FILE to standard output.
     713             :    Return true if successful.  */
     714             : 
     715             : static bool
     716          46 : cut_file (char const *file)
     717             : {
     718             :   FILE *stream;
     719             : 
     720          46 :   if (STREQ (file, "-"))
     721             :     {
     722          36 :       have_read_stdin = true;
     723          36 :       stream = stdin;
     724             :     }
     725             :   else
     726             :     {
     727          10 :       stream = fopen (file, "r");
     728          10 :       if (stream == NULL)
     729             :         {
     730           2 :           error (0, errno, "%s", file);
     731           2 :           return false;
     732             :         }
     733             :     }
     734             : 
     735          44 :   cut_stream (stream);
     736             : 
     737          44 :   if (ferror (stream))
     738             :     {
     739           7 :       error (0, errno, "%s", file);
     740           7 :       return false;
     741             :     }
     742          37 :   if (STREQ (file, "-"))
     743          36 :     clearerr (stream);          /* Also clear EOF. */
     744           1 :   else if (fclose (stream) == EOF)
     745             :     {
     746           0 :       error (0, errno, "%s", file);
     747           0 :       return false;
     748             :     }
     749          37 :   return true;
     750             : }
     751             : 
     752             : int
     753         102 : main (int argc, char **argv)
     754             : {
     755             :   int optc;
     756             :   bool ok;
     757         102 :   bool delim_specified = false;
     758             :   char *spec_list_string IF_LINT(= NULL);
     759             : 
     760             :   initialize_main (&argc, &argv);
     761         102 :   program_name = argv[0];
     762         102 :   setlocale (LC_ALL, "");
     763             :   bindtextdomain (PACKAGE, LOCALEDIR);
     764             :   textdomain (PACKAGE);
     765             : 
     766         102 :   atexit (close_stdout);
     767             : 
     768         102 :   operating_mode = undefined_mode;
     769             : 
     770             :   /* By default, all non-delimited lines are printed.  */
     771         102 :   suppress_non_delimited = false;
     772             : 
     773         102 :   delim = '\0';
     774         102 :   have_read_stdin = false;
     775             : 
     776         290 :   while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
     777             :     {
     778         102 :       switch (optc)
     779             :         {
     780          54 :         case 'b':
     781             :         case 'c':
     782             :           /* Build the byte list. */
     783          54 :           if (operating_mode != undefined_mode)
     784           1 :             FATAL_ERROR (_("only one type of list may be specified"));
     785          53 :           operating_mode = byte_mode;
     786          53 :           spec_list_string = optarg;
     787          53 :           break;
     788             : 
     789          20 :         case 'f':
     790             :           /* Build the field list. */
     791          20 :           if (operating_mode != undefined_mode)
     792           1 :             FATAL_ERROR (_("only one type of list may be specified"));
     793          19 :           operating_mode = field_mode;
     794          19 :           spec_list_string = optarg;
     795          19 :           break;
     796             : 
     797           5 :         case 'd':
     798             :           /* New delimiter. */
     799             :           /* Interpret -d '' to mean `use the NUL byte as the delimiter.'  */
     800           5 :           if (optarg[0] != '\0' && optarg[1] != '\0')
     801           1 :             FATAL_ERROR (_("the delimiter must be a single character"));
     802           4 :           delim = optarg[0];
     803           4 :           delim_specified = true;
     804           4 :           break;
     805             : 
     806           2 :         case OUTPUT_DELIMITER_OPTION:
     807           2 :           output_delimiter_specified = true;
     808             :           /* Interpret --output-delimiter='' to mean
     809             :              `use the NUL byte as the delimiter.'  */
     810           4 :           output_delimiter_length = (optarg[0] == '\0'
     811           2 :                                      ? 1 : strlen (optarg));
     812           2 :           output_delimiter_string = xstrdup (optarg);
     813           2 :           break;
     814             : 
     815           3 :         case 'n':
     816           3 :           break;
     817             : 
     818           4 :         case 's':
     819           4 :           suppress_non_delimited = true;
     820           4 :           break;
     821             : 
     822           1 :         case COMPLEMENT_OPTION:
     823           1 :           complement = true;
     824           1 :           break;
     825             : 
     826           1 :         case_GETOPT_HELP_CHAR;
     827             : 
     828           1 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     829             : 
     830          11 :         default:
     831          11 :           usage (EXIT_FAILURE);
     832             :         }
     833             :     }
     834             : 
     835          86 :   if (operating_mode == undefined_mode)
     836          16 :     FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
     837             : 
     838          70 :   if (delim != '\0' && operating_mode != field_mode)
     839           1 :     FATAL_ERROR (_("an input delimiter may be specified only\
     840             :  when operating on fields"));
     841             : 
     842          69 :   if (suppress_non_delimited && operating_mode != field_mode)
     843           1 :     FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
     844             : \tonly when operating on fields"));
     845             : 
     846          68 :   if (output_delimiter_specified)
     847             :     {
     848           0 :       range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
     849             :                                         NULL, hash_int,
     850             :                                         hash_compare_ints, NULL);
     851           0 :       if (range_start_ht == NULL)
     852           0 :         xalloc_die ();
     853             : 
     854             :     }
     855             : 
     856          68 :   if (! set_fields (spec_list_string))
     857             :     {
     858           0 :       if (operating_mode == field_mode)
     859           0 :         FATAL_ERROR (_("missing list of fields"));
     860             :       else
     861           0 :         FATAL_ERROR (_("missing list of positions"));
     862             :     }
     863             : 
     864          45 :   if (!delim_specified)
     865          44 :     delim = '\t';
     866             : 
     867          45 :   if (output_delimiter_string == NULL)
     868             :     {
     869             :       static char dummy[2];
     870          45 :       dummy[0] = delim;
     871          45 :       dummy[1] = '\0';
     872          45 :       output_delimiter_string = dummy;
     873          45 :       output_delimiter_length = 1;
     874             :     }
     875             : 
     876          45 :   if (optind == argc)
     877          33 :     ok = cut_file ("-");
     878             :   else
     879          25 :     for (ok = true; optind < argc; optind++)
     880          13 :       ok &= cut_file (argv[optind]);
     881             : 
     882          45 :   if (range_start_ht)
     883           0 :     hash_free (range_start_ht);
     884             : 
     885          45 :   if (have_read_stdin && fclose (stdin) == EOF)
     886             :     {
     887           0 :       error (0, errno, "-");
     888           0 :       ok = false;
     889             :     }
     890             : 
     891          45 :   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
     892             : }

Generated by: LCOV version 1.10