Line data Source code
1 : /* cut - remove parts of lines of files
2 : Copyright (C) 1997-2007 Free Software Foundation, Inc.
3 : Copyright (C) 1984 David M. Ihnat
4 :
5 : This program is free software: you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation, either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : This program is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 :
18 : /* Written by David Ihnat. */
19 :
20 : /* POSIX changes, bug fixes, long-named options, and cleanup
21 : by David MacKenzie <djm@gnu.ai.mit.edu>.
22 :
23 : Rewrite cut_fields and cut_bytes -- Jim Meyering. */
24 :
25 : #include <config.h>
26 :
27 : #include <stdio.h>
28 : #include <assert.h>
29 : #include <getopt.h>
30 : #include <sys/types.h>
31 : #include "system.h"
32 :
33 : #include "error.h"
34 : #include "getndelim2.h"
35 : #include "hash.h"
36 : #include "quote.h"
37 : #include "xstrndup.h"
38 :
39 : /* The official name of this program (e.g., no `g' prefix). */
40 : #define PROGRAM_NAME "cut"
41 :
42 : #define AUTHORS "David Ihnat", "David MacKenzie", "Jim Meyering"
43 :
44 : #define FATAL_ERROR(Message) \
45 : do \
46 : { \
47 : error (0, 0, (Message)); \
48 : usage (EXIT_FAILURE); \
49 : } \
50 : while (0)
51 :
52 : /* Append LOW, HIGH to the list RP of range pairs, allocating additional
53 : space if necessary. Update local variable N_RP. When allocating,
54 : update global variable N_RP_ALLOCATED. */
55 :
56 : #define ADD_RANGE_PAIR(rp, low, high) \
57 : do \
58 : { \
59 : if (low == 0 || high == 0) \
60 : FATAL_ERROR (_("fields and positions are numbered from 1")); \
61 : if (n_rp >= n_rp_allocated) \
62 : { \
63 : (rp) = X2NREALLOC (rp, &n_rp_allocated); \
64 : } \
65 : rp[n_rp].lo = (low); \
66 : rp[n_rp].hi = (high); \
67 : ++n_rp; \
68 : } \
69 : while (0)
70 :
71 : struct range_pair
72 : {
73 : size_t lo;
74 : size_t hi;
75 : };
76 :
77 : /* This buffer is used to support the semantics of the -s option
78 : (or lack of same) when the specified field list includes (does
79 : not include) the first field. In both of those cases, the entire
80 : first field must be read into this buffer to determine whether it
81 : is followed by a delimiter or a newline before any of it may be
82 : output. Otherwise, cut_fields can do the job without using this
83 : buffer. */
84 : static char *field_1_buffer;
85 :
86 : /* The number of bytes allocated for FIELD_1_BUFFER. */
87 : static size_t field_1_bufsize;
88 :
89 : /* The largest field or byte index used as an endpoint of a closed
90 : or degenerate range specification; this doesn't include the starting
91 : index of right-open-ended ranges. For example, with either range spec
92 : `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
93 : static size_t max_range_endpoint;
94 :
95 : /* If nonzero, this is the index of the first field in a range that goes
96 : to end of line. */
97 : static size_t eol_range_start;
98 :
99 : /* This is a bit vector.
100 : In byte mode, which bytes to output.
101 : In field mode, which DELIM-separated fields to output.
102 : Both bytes and fields are numbered starting with 1,
103 : so the zeroth bit of this array is unused.
104 : A field or byte K has been selected if
105 : (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
106 : || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
107 : static unsigned char *printable_field;
108 :
109 : enum operating_mode
110 : {
111 : undefined_mode,
112 :
113 : /* Output characters that are in the given bytes. */
114 : byte_mode,
115 :
116 : /* Output the given delimeter-separated fields. */
117 : field_mode
118 : };
119 :
120 : /* The name this program was run with. */
121 : char *program_name;
122 :
123 : static enum operating_mode operating_mode;
124 :
125 : /* If true do not output lines containing no delimeter characters.
126 : Otherwise, all such lines are printed. This option is valid only
127 : with field mode. */
128 : static bool suppress_non_delimited;
129 :
130 : /* If nonzero, print all bytes, characters, or fields _except_
131 : those that were specified. */
132 : static bool complement;
133 :
134 : /* The delimeter character for field mode. */
135 : static unsigned char delim;
136 :
137 : /* True if the --output-delimiter=STRING option was specified. */
138 : static bool output_delimiter_specified;
139 :
140 : /* The length of output_delimiter_string. */
141 : static size_t output_delimiter_length;
142 :
143 : /* The output field separator string. Defaults to the 1-character
144 : string consisting of the input delimiter. */
145 : static char *output_delimiter_string;
146 :
147 : /* True if we have ever read standard input. */
148 : static bool have_read_stdin;
149 :
150 : #define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
151 :
152 : /* The set of range-start indices. For example, given a range-spec list like
153 : `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
154 : Note that although `4' looks like a range-start index, it is in the middle
155 : of the `3-5' range, so it doesn't count.
156 : This table is created/used IFF output_delimiter_specified is set. */
157 : static Hash_table *range_start_ht;
158 :
159 : /* For long options that have no equivalent short option, use a
160 : non-character as a pseudo short option, starting with CHAR_MAX + 1. */
161 : enum
162 : {
163 : OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
164 : COMPLEMENT_OPTION
165 : };
166 :
167 : static struct option const longopts[] =
168 : {
169 : {"bytes", required_argument, NULL, 'b'},
170 : {"characters", required_argument, NULL, 'c'},
171 : {"fields", required_argument, NULL, 'f'},
172 : {"delimiter", required_argument, NULL, 'd'},
173 : {"only-delimited", no_argument, NULL, 's'},
174 : {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
175 : {"complement", no_argument, NULL, COMPLEMENT_OPTION},
176 : {GETOPT_HELP_OPTION_DECL},
177 : {GETOPT_VERSION_OPTION_DECL},
178 : {NULL, 0, NULL, 0}
179 : };
180 :
181 : void
182 56 : usage (int status)
183 : {
184 56 : if (status != EXIT_SUCCESS)
185 55 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
186 : program_name);
187 : else
188 : {
189 1 : printf (_("\
190 : Usage: %s OPTION... [FILE]...\n\
191 : "),
192 : program_name);
193 1 : fputs (_("\
194 : Print selected parts of lines from each FILE to standard output.\n\
195 : \n\
196 : "), stdout);
197 1 : fputs (_("\
198 : Mandatory arguments to long options are mandatory for short options too.\n\
199 : "), stdout);
200 1 : fputs (_("\
201 : -b, --bytes=LIST select only these bytes\n\
202 : -c, --characters=LIST select only these characters\n\
203 : -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
204 : "), stdout);
205 1 : fputs (_("\
206 : -f, --fields=LIST select only these fields; also print any line\n\
207 : that contains no delimiter character, unless\n\
208 : the -s option is specified\n\
209 : -n (ignored)\n\
210 : "), stdout);
211 1 : fputs (_("\
212 : --complement complement the set of selected bytes, characters\n\
213 : or fields.\n\
214 : "), stdout);
215 1 : fputs (_("\
216 : -s, --only-delimited do not print lines not containing delimiters\n\
217 : --output-delimiter=STRING use STRING as the output delimiter\n\
218 : the default is to use the input delimiter\n\
219 : "), stdout);
220 1 : fputs (HELP_OPTION_DESCRIPTION, stdout);
221 1 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
222 1 : fputs (_("\
223 : \n\
224 : Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
225 : range, or many ranges separated by commas. Selected input is written\n\
226 : in the same order that it is read, and is written exactly once.\n\
227 : "), stdout);
228 1 : fputs (_("\
229 : Each range is one of:\n\
230 : \n\
231 : N N'th byte, character or field, counted from 1\n\
232 : N- from N'th byte, character or field, to end of line\n\
233 : N-M from N'th to M'th (included) byte, character or field\n\
234 : -M from first to M'th (included) byte, character or field\n\
235 : \n\
236 : With no FILE, or when FILE is -, read standard input.\n\
237 : "), stdout);
238 1 : emit_bug_reporting_address ();
239 : }
240 56 : exit (status);
241 : }
242 :
243 : static inline void
244 0 : mark_range_start (size_t i)
245 : {
246 : /* Record the fact that `i' is a range-start index. */
247 0 : void *ent_from_table = hash_insert (range_start_ht, (void*) i);
248 0 : if (ent_from_table == NULL)
249 : {
250 : /* Insertion failed due to lack of memory. */
251 0 : xalloc_die ();
252 : }
253 0 : assert ((size_t) ent_from_table == i);
254 0 : }
255 :
256 : static inline void
257 59 : mark_printable_field (size_t i)
258 : {
259 59 : size_t n = i / CHAR_BIT;
260 59 : printable_field[n] |= (1 << (i % CHAR_BIT));
261 59 : }
262 :
263 : static inline bool
264 47 : is_printable_field (size_t i)
265 : {
266 47 : size_t n = i / CHAR_BIT;
267 47 : return (printable_field[n] >> (i % CHAR_BIT)) & 1;
268 : }
269 :
270 : static size_t
271 0 : hash_int (const void *x, size_t tablesize)
272 : {
273 : #ifdef UINTPTR_MAX
274 0 : uintptr_t y = (uintptr_t) x;
275 : #else
276 : size_t y = (size_t) x;
277 : #endif
278 0 : return y % tablesize;
279 : }
280 :
281 : static bool
282 0 : hash_compare_ints (void const *x, void const *y)
283 : {
284 0 : return (x == y) ? true : false;
285 : }
286 :
287 : static bool
288 0 : is_range_start_index (size_t i)
289 : {
290 0 : return hash_lookup (range_start_ht, (void *) i) ? true : false;
291 : }
292 :
293 : /* Return nonzero if the K'th field or byte is printable.
294 : When returning nonzero, if RANGE_START is non-NULL,
295 : set *RANGE_START to true if K is the beginning of a range, and to
296 : false otherwise. */
297 :
298 : static bool
299 173 : print_kth (size_t k, bool *range_start)
300 : {
301 173 : bool k_selected
302 276 : = ((0 < eol_range_start && eol_range_start <= k)
303 253 : || (k <= max_range_endpoint && is_printable_field (k)));
304 :
305 173 : bool is_selected = k_selected ^ complement;
306 173 : if (range_start && is_selected)
307 0 : *range_start = is_range_start_index (k);
308 :
309 173 : return is_selected;
310 : }
311 :
312 : /* Comparison function for qsort to order the list of
313 : struct range_pairs. */
314 : static int
315 9 : compare_ranges (const void *a, const void *b)
316 : {
317 9 : int a_start = ((const struct range_pair *) a)->lo;
318 9 : int b_start = ((const struct range_pair *) b)->lo;
319 9 : return a_start < b_start ? -1 : a_start > b_start;
320 : }
321 :
322 : /* Given the list of field or byte range specifications FIELDSTR, set
323 : MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
324 : array. If there is a right-open-ended range, set EOL_RANGE_START
325 : to its starting index. FIELDSTR should be composed of one or more
326 : numbers or ranges of numbers, separated by blanks or commas.
327 : Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
328 : through end of line. Return true if FIELDSTR contains at least
329 : one field specification, false otherwise. */
330 :
331 : /* FIXME-someday: What if the user wants to cut out the 1,000,000-th
332 : field of some huge input file? This function shouldn't have to
333 : allocate a table of a million bits just so we can test every
334 : field < 10^6 with an array dereference. Instead, consider using
335 : an adaptive approach: if the range of selected fields is too large,
336 : but only a few fields/byte-offsets are actually selected, use a
337 : hash table. If the range of selected fields is too large, and
338 : too many are selected, then resort to using the range-pairs (the
339 : `rp' array) directly. */
340 :
341 : static bool
342 68 : set_fields (const char *fieldstr)
343 : {
344 68 : size_t initial = 1; /* Value of first number in a range. */
345 68 : size_t value = 0; /* If nonzero, a number being accumulated. */
346 68 : bool lhs_specified = false;
347 68 : bool rhs_specified = false;
348 68 : bool dash_found = false; /* True if a '-' is found in this field. */
349 68 : bool field_found = false; /* True if at least one field spec
350 : has been processed. */
351 :
352 68 : struct range_pair *rp = NULL;
353 68 : size_t n_rp = 0;
354 68 : size_t n_rp_allocated = 0;
355 : size_t i;
356 68 : bool in_digits = false;
357 :
358 : /* Collect and store in RP the range end points.
359 : It also sets EOL_RANGE_START if appropriate. */
360 :
361 : for (;;)
362 : {
363 412 : if (*fieldstr == '-')
364 : {
365 85 : in_digits = false;
366 : /* Starting a range. */
367 85 : if (dash_found)
368 12 : FATAL_ERROR (_("invalid byte or field list"));
369 73 : dash_found = true;
370 73 : fieldstr++;
371 :
372 73 : initial = (lhs_specified ? value : 1);
373 73 : value = 0;
374 : }
375 155 : else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
376 : {
377 76 : in_digits = false;
378 : /* Ending the string, or this field/byte sublist. */
379 76 : if (dash_found)
380 : {
381 61 : dash_found = false;
382 :
383 61 : if (!lhs_specified && !rhs_specified)
384 2 : FATAL_ERROR (_("invalid range with no endpoint: -"));
385 :
386 : /* A range. Possibilities: -n, m-n, n-.
387 : In any case, `initial' contains the start of the range. */
388 59 : if (!rhs_specified)
389 : {
390 : /* `n-'. From `initial' to end of line. */
391 32 : eol_range_start = initial;
392 32 : field_found = true;
393 : }
394 : else
395 : {
396 : /* `m-n' or `-n' (1-n). */
397 27 : if (value < initial)
398 1 : FATAL_ERROR (_("invalid decreasing range"));
399 :
400 : /* Is there already a range going to end of line? */
401 26 : if (eol_range_start != 0)
402 : {
403 : /* Yes. Is the new sequence already contained
404 : in the old one? If so, no processing is
405 : necessary. */
406 5 : if (initial < eol_range_start)
407 : {
408 : /* No, the new sequence starts before the
409 : old. Does the old range going to end of line
410 : extend into the new range? */
411 4 : if (eol_range_start <= value)
412 : {
413 : /* Yes. Simply move the end of line marker. */
414 1 : eol_range_start = initial;
415 : }
416 : else
417 : {
418 : /* No. A simple range, before and disjoint from
419 : the range going to end of line. Fill it. */
420 3 : ADD_RANGE_PAIR (rp, initial, value);
421 : }
422 :
423 : /* In any case, some fields were selected. */
424 3 : field_found = true;
425 : }
426 : }
427 : else
428 : {
429 : /* There is no range going to end of line. */
430 21 : ADD_RANGE_PAIR (rp, initial, value);
431 20 : field_found = true;
432 : }
433 24 : value = 0;
434 : }
435 : }
436 : else
437 : {
438 : /* A simple field number, not a range. */
439 15 : ADD_RANGE_PAIR (rp, value, value);
440 10 : value = 0;
441 10 : field_found = true;
442 : }
443 :
444 66 : if (*fieldstr == '\0')
445 : {
446 45 : break;
447 : }
448 :
449 21 : fieldstr++;
450 21 : lhs_specified = false;
451 21 : rhs_specified = false;
452 : }
453 79 : else if (ISDIGIT (*fieldstr))
454 : {
455 : /* Record beginning of digit string, in case we have to
456 : complain about it. */
457 : static char const *num_start;
458 78 : if (!in_digits || !num_start)
459 74 : num_start = fieldstr;
460 78 : in_digits = true;
461 :
462 78 : if (dash_found)
463 28 : rhs_specified = 1;
464 : else
465 50 : lhs_specified = 1;
466 :
467 : /* Detect overflow. */
468 78 : if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
469 : {
470 : /* In case the user specified -c$(echo 2^64|bc),22,
471 : complain only about the first number. */
472 : /* Determine the length of the offending number. */
473 0 : size_t len = strspn (num_start, "0123456789");
474 0 : char *bad_num = xstrndup (num_start, len);
475 0 : if (operating_mode == byte_mode)
476 0 : error (0, 0,
477 : _("byte offset %s is too large"), quote (bad_num));
478 : else
479 0 : error (0, 0,
480 : _("field number %s is too large"), quote (bad_num));
481 0 : free (bad_num);
482 0 : exit (EXIT_FAILURE);
483 : }
484 :
485 78 : fieldstr++;
486 : }
487 : else
488 1 : FATAL_ERROR (_("invalid byte or field list"));
489 : }
490 :
491 45 : max_range_endpoint = 0;
492 73 : for (i = 0; i < n_rp; i++)
493 : {
494 28 : if (rp[i].hi > max_range_endpoint)
495 27 : max_range_endpoint = rp[i].hi;
496 : }
497 :
498 : /* Allocate an array large enough so that it may be indexed by
499 : the field numbers corresponding to all finite ranges
500 : (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
501 :
502 45 : printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
503 :
504 45 : qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
505 :
506 : /* Set the array entries corresponding to integers in the ranges of RP. */
507 73 : for (i = 0; i < n_rp; i++)
508 : {
509 : size_t j;
510 : size_t rsi_candidate;
511 :
512 : /* Record the range-start indices, i.e., record each start
513 : index that is not part of any other (lo..hi] range. */
514 28 : rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
515 28 : if (output_delimiter_specified
516 0 : && !is_printable_field (rsi_candidate))
517 0 : mark_range_start (rsi_candidate);
518 :
519 87 : for (j = rp[i].lo; j <= rp[i].hi; j++)
520 59 : mark_printable_field (j);
521 : }
522 :
523 45 : if (output_delimiter_specified
524 0 : && !complement
525 0 : && eol_range_start && !is_printable_field (eol_range_start))
526 0 : mark_range_start (eol_range_start);
527 :
528 45 : free (rp);
529 :
530 45 : return field_found;
531 : }
532 :
533 : /* Read from stream STREAM, printing to standard output any selected bytes. */
534 :
535 : static void
536 26 : cut_bytes (FILE *stream)
537 : {
538 : size_t byte_idx; /* Number of bytes in the line so far. */
539 : /* Whether to begin printing delimiters between ranges for the current line.
540 : Set after we've begun printing data corresponding to the first range. */
541 : bool print_delimiter;
542 :
543 26 : byte_idx = 0;
544 26 : print_delimiter = false;
545 : while (1)
546 168 : {
547 : int c; /* Each character from the file. */
548 :
549 194 : c = getc (stream);
550 :
551 194 : if (c == '\n')
552 : {
553 153 : putchar ('\n');
554 153 : byte_idx = 0;
555 153 : print_delimiter = false;
556 : }
557 41 : else if (c == EOF)
558 : {
559 26 : if (byte_idx > 0)
560 2 : putchar ('\n');
561 26 : break;
562 : }
563 : else
564 : {
565 : bool range_start;
566 15 : bool *rs = output_delimiter_specified ? &range_start : NULL;
567 15 : if (print_kth (++byte_idx, rs))
568 : {
569 5 : if (rs && *rs && print_delimiter)
570 : {
571 0 : fwrite (output_delimiter_string, sizeof (char),
572 : output_delimiter_length, stdout);
573 : }
574 5 : print_delimiter = true;
575 5 : putchar (c);
576 : }
577 : }
578 : }
579 26 : }
580 :
581 : /* Read from stream STREAM, printing to standard output any selected fields. */
582 :
583 : static void
584 18 : cut_fields (FILE *stream)
585 : {
586 : int c;
587 18 : size_t field_idx = 1;
588 18 : bool found_any_selected_field = false;
589 : bool buffer_first_field;
590 :
591 18 : c = getc (stream);
592 18 : if (c == EOF)
593 1 : return;
594 :
595 17 : ungetc (c, stream);
596 :
597 : /* To support the semantics of the -s flag, we may have to buffer
598 : all of the first field to determine whether it is `delimited.'
599 : But that is unnecessary if all non-delimited lines must be printed
600 : and the first field has been selected, or if non-delimited lines
601 : must be suppressed and the first field has *not* been selected.
602 : That is because a non-delimited line has exactly one field. */
603 17 : buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
604 :
605 : while (1)
606 : {
607 263 : if (field_idx == 1 && buffer_first_field)
608 : {
609 : ssize_t len;
610 : size_t n_bytes;
611 :
612 15 : len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
613 : GETNLINE_NO_LIMIT, delim, '\n', stream);
614 15 : if (len < 0)
615 : {
616 2 : free (field_1_buffer);
617 2 : field_1_buffer = NULL;
618 2 : if (ferror (stream) || feof (stream))
619 : break;
620 0 : xalloc_die ();
621 : }
622 :
623 13 : n_bytes = len;
624 13 : assert (n_bytes != 0);
625 :
626 : /* If the first field extends to the end of line (it is not
627 : delimited) and we are printing all non-delimited lines,
628 : print this one. */
629 13 : if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
630 : {
631 5 : if (suppress_non_delimited)
632 : {
633 : /* Empty. */
634 : }
635 : else
636 : {
637 5 : fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
638 : /* Make sure the output line is newline terminated. */
639 5 : if (field_1_buffer[n_bytes - 1] != '\n')
640 1 : putchar ('\n');
641 : }
642 5 : continue;
643 : }
644 8 : if (print_kth (1, NULL))
645 : {
646 : /* Print the field, but not the trailing delimiter. */
647 1 : fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
648 1 : found_any_selected_field = true;
649 : }
650 8 : ++field_idx;
651 : }
652 :
653 133 : if (c != EOF)
654 : {
655 133 : if (print_kth (field_idx, NULL))
656 : {
657 103 : if (found_any_selected_field)
658 : {
659 84 : fwrite (output_delimiter_string, sizeof (char),
660 : output_delimiter_length, stdout);
661 : }
662 103 : found_any_selected_field = true;
663 :
664 208 : while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
665 : {
666 2 : putchar (c);
667 : }
668 : }
669 : else
670 : {
671 30 : while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
672 : {
673 : /* Empty. */
674 : }
675 : }
676 : }
677 :
678 133 : if (c == '\n')
679 : {
680 9 : c = getc (stream);
681 9 : if (c != EOF)
682 : {
683 8 : ungetc (c, stream);
684 8 : c = '\n';
685 : }
686 : }
687 :
688 133 : if (c == delim)
689 110 : ++field_idx;
690 23 : else if (c == '\n' || c == EOF)
691 : {
692 23 : if (found_any_selected_field
693 3 : || !(suppress_non_delimited && field_idx == 1))
694 22 : putchar ('\n');
695 23 : if (c == EOF)
696 15 : break;
697 8 : field_idx = 1;
698 8 : found_any_selected_field = false;
699 : }
700 : }
701 : }
702 :
703 : static void
704 44 : cut_stream (FILE *stream)
705 : {
706 44 : if (operating_mode == byte_mode)
707 26 : cut_bytes (stream);
708 : else
709 18 : cut_fields (stream);
710 44 : }
711 :
712 : /* Process file FILE to standard output.
713 : Return true if successful. */
714 :
715 : static bool
716 46 : cut_file (char const *file)
717 : {
718 : FILE *stream;
719 :
720 46 : if (STREQ (file, "-"))
721 : {
722 36 : have_read_stdin = true;
723 36 : stream = stdin;
724 : }
725 : else
726 : {
727 10 : stream = fopen (file, "r");
728 10 : if (stream == NULL)
729 : {
730 2 : error (0, errno, "%s", file);
731 2 : return false;
732 : }
733 : }
734 :
735 44 : cut_stream (stream);
736 :
737 44 : if (ferror (stream))
738 : {
739 7 : error (0, errno, "%s", file);
740 7 : return false;
741 : }
742 37 : if (STREQ (file, "-"))
743 36 : clearerr (stream); /* Also clear EOF. */
744 1 : else if (fclose (stream) == EOF)
745 : {
746 0 : error (0, errno, "%s", file);
747 0 : return false;
748 : }
749 37 : return true;
750 : }
751 :
752 : int
753 102 : main (int argc, char **argv)
754 : {
755 : int optc;
756 : bool ok;
757 102 : bool delim_specified = false;
758 : char *spec_list_string IF_LINT(= NULL);
759 :
760 : initialize_main (&argc, &argv);
761 102 : program_name = argv[0];
762 102 : setlocale (LC_ALL, "");
763 : bindtextdomain (PACKAGE, LOCALEDIR);
764 : textdomain (PACKAGE);
765 :
766 102 : atexit (close_stdout);
767 :
768 102 : operating_mode = undefined_mode;
769 :
770 : /* By default, all non-delimited lines are printed. */
771 102 : suppress_non_delimited = false;
772 :
773 102 : delim = '\0';
774 102 : have_read_stdin = false;
775 :
776 290 : while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
777 : {
778 102 : switch (optc)
779 : {
780 54 : case 'b':
781 : case 'c':
782 : /* Build the byte list. */
783 54 : if (operating_mode != undefined_mode)
784 1 : FATAL_ERROR (_("only one type of list may be specified"));
785 53 : operating_mode = byte_mode;
786 53 : spec_list_string = optarg;
787 53 : break;
788 :
789 20 : case 'f':
790 : /* Build the field list. */
791 20 : if (operating_mode != undefined_mode)
792 1 : FATAL_ERROR (_("only one type of list may be specified"));
793 19 : operating_mode = field_mode;
794 19 : spec_list_string = optarg;
795 19 : break;
796 :
797 5 : case 'd':
798 : /* New delimiter. */
799 : /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
800 5 : if (optarg[0] != '\0' && optarg[1] != '\0')
801 1 : FATAL_ERROR (_("the delimiter must be a single character"));
802 4 : delim = optarg[0];
803 4 : delim_specified = true;
804 4 : break;
805 :
806 2 : case OUTPUT_DELIMITER_OPTION:
807 2 : output_delimiter_specified = true;
808 : /* Interpret --output-delimiter='' to mean
809 : `use the NUL byte as the delimiter.' */
810 4 : output_delimiter_length = (optarg[0] == '\0'
811 2 : ? 1 : strlen (optarg));
812 2 : output_delimiter_string = xstrdup (optarg);
813 2 : break;
814 :
815 3 : case 'n':
816 3 : break;
817 :
818 4 : case 's':
819 4 : suppress_non_delimited = true;
820 4 : break;
821 :
822 1 : case COMPLEMENT_OPTION:
823 1 : complement = true;
824 1 : break;
825 :
826 1 : case_GETOPT_HELP_CHAR;
827 :
828 1 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
829 :
830 11 : default:
831 11 : usage (EXIT_FAILURE);
832 : }
833 : }
834 :
835 86 : if (operating_mode == undefined_mode)
836 16 : FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
837 :
838 70 : if (delim != '\0' && operating_mode != field_mode)
839 1 : FATAL_ERROR (_("an input delimiter may be specified only\
840 : when operating on fields"));
841 :
842 69 : if (suppress_non_delimited && operating_mode != field_mode)
843 1 : FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
844 : \tonly when operating on fields"));
845 :
846 68 : if (output_delimiter_specified)
847 : {
848 0 : range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
849 : NULL, hash_int,
850 : hash_compare_ints, NULL);
851 0 : if (range_start_ht == NULL)
852 0 : xalloc_die ();
853 :
854 : }
855 :
856 68 : if (! set_fields (spec_list_string))
857 : {
858 0 : if (operating_mode == field_mode)
859 0 : FATAL_ERROR (_("missing list of fields"));
860 : else
861 0 : FATAL_ERROR (_("missing list of positions"));
862 : }
863 :
864 45 : if (!delim_specified)
865 44 : delim = '\t';
866 :
867 45 : if (output_delimiter_string == NULL)
868 : {
869 : static char dummy[2];
870 45 : dummy[0] = delim;
871 45 : dummy[1] = '\0';
872 45 : output_delimiter_string = dummy;
873 45 : output_delimiter_length = 1;
874 : }
875 :
876 45 : if (optind == argc)
877 33 : ok = cut_file ("-");
878 : else
879 25 : for (ok = true; optind < argc; optind++)
880 13 : ok &= cut_file (argv[optind]);
881 :
882 45 : if (range_start_ht)
883 0 : hash_free (range_start_ht);
884 :
885 45 : if (have_read_stdin && fclose (stdin) == EOF)
886 : {
887 0 : error (0, errno, "-");
888 0 : ok = false;
889 : }
890 :
891 45 : exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
892 : }
|