Line data Source code
1 : /* paste - merge lines of files
2 : Copyright (C) 1997-2005, 2008 Free Software Foundation, Inc.
3 : Copyright (C) 1984 David M. Ihnat
4 :
5 : This program is free software: you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation, either version 3 of the License, or
8 : (at your option) any later version.
9 :
10 : This program is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : You should have received a copy of the GNU General Public License
16 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 :
18 : /* Written by David Ihnat. */
19 :
20 : /* The list of valid escape sequences has been expanded over the Unix
21 : version, to include \b, \f, \r, and \v.
22 :
23 : POSIX changes, bug fixes, long-named options, and cleanup
24 : by David MacKenzie <djm@gnu.ai.mit.edu>.
25 :
26 : Options:
27 : --serial
28 : -s Paste one file at a time rather than
29 : one line from each file.
30 : --delimiters=delim-list
31 : -d delim-list Consecutively use the characters in
32 : DELIM-LIST instead of tab to separate
33 : merged lines. When DELIM-LIST is exhausted,
34 : start again at its beginning.
35 : A FILE of `-' means standard input.
36 : If no FILEs are given, standard input is used. */
37 :
38 : #include <config.h>
39 :
40 : #include <stdio.h>
41 : #include <getopt.h>
42 : #include <sys/types.h>
43 : #include "system.h"
44 : #include "error.h"
45 : #include "quotearg.h"
46 :
47 : /* The official name of this program (e.g., no `g' prefix). */
48 : #define PROGRAM_NAME "paste"
49 :
50 : #define AUTHORS "David M. Ihnat", "David MacKenzie"
51 :
52 : /* Indicates that no delimiter should be added in the current position. */
53 : #define EMPTY_DELIM '\0'
54 :
55 : /* Name this program was run with. */
56 : char *program_name;
57 :
58 : /* If nonzero, we have read standard input at some point. */
59 : static bool have_read_stdin;
60 :
61 : /* If nonzero, merge subsequent lines of each file rather than
62 : corresponding lines from each file in parallel. */
63 : static bool serial_merge;
64 :
65 : /* The delimeters between lines of input files (used cyclically). */
66 : static char *delims;
67 :
68 : /* A pointer to the character after the end of `delims'. */
69 : static char const *delim_end;
70 :
71 : static struct option const longopts[] =
72 : {
73 : {"serial", no_argument, NULL, 's'},
74 : {"delimiters", required_argument, NULL, 'd'},
75 : {GETOPT_HELP_OPTION_DECL},
76 : {GETOPT_VERSION_OPTION_DECL},
77 : {NULL, 0, NULL, 0}
78 : };
79 :
80 : /* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting
81 : backslash representations of special characters in STRPTR to their actual
82 : values. The set of possible backslash characters has been expanded beyond
83 : that recognized by the Unix version.
84 : Return 0 upon success.
85 : If the string ends in an odd number of backslashes, ignore the
86 : final backslash and return nonzero. */
87 :
88 : static int
89 68 : collapse_escapes (char const *strptr)
90 : {
91 68 : char *strout = xstrdup (strptr);
92 68 : bool backslash_at_end = false;
93 :
94 68 : delims = strout;
95 :
96 206 : while (*strptr)
97 : {
98 88 : if (*strptr != '\\') /* Is it an escape character? */
99 51 : *strout++ = *strptr++; /* No, just transfer it. */
100 : else
101 : {
102 37 : switch (*++strptr)
103 : {
104 6 : case '0':
105 6 : *strout++ = EMPTY_DELIM;
106 6 : break;
107 :
108 1 : case 'b':
109 1 : *strout++ = '\b';
110 1 : break;
111 :
112 1 : case 'f':
113 1 : *strout++ = '\f';
114 1 : break;
115 :
116 1 : case 'n':
117 1 : *strout++ = '\n';
118 1 : break;
119 :
120 3 : case 'r':
121 3 : *strout++ = '\r';
122 3 : break;
123 :
124 1 : case 't':
125 1 : *strout++ = '\t';
126 1 : break;
127 :
128 1 : case 'v':
129 1 : *strout++ = '\v';
130 1 : break;
131 :
132 1 : case '\\':
133 1 : *strout++ = '\\';
134 1 : break;
135 :
136 18 : case '\0':
137 18 : backslash_at_end = true;
138 18 : goto done;
139 :
140 4 : default:
141 4 : *strout++ = *strptr;
142 4 : break;
143 : }
144 19 : strptr++;
145 : }
146 : }
147 :
148 50 : done:;
149 :
150 68 : delim_end = strout;
151 68 : return backslash_at_end ? 1 : 0;
152 : }
153 :
154 : /* Report a write error and exit. */
155 :
156 : static void write_error (void) ATTRIBUTE_NORETURN;
157 : static void
158 0 : write_error (void)
159 : {
160 0 : error (EXIT_FAILURE, errno, _("write error"));
161 0 : abort ();
162 : }
163 :
164 : /* Output a single byte, reporting any write errors. */
165 :
166 : static inline void
167 349 : xputchar (char c)
168 : {
169 349 : if (putchar (c) < 0)
170 0 : write_error ();
171 349 : }
172 :
173 : /* Perform column paste on the NFILES files named in FNAMPTR.
174 : Return true if successful, false if one or more files could not be
175 : opened or read. */
176 :
177 : static bool
178 39 : paste_parallel (size_t nfiles, char **fnamptr)
179 : {
180 39 : bool ok = true;
181 : /* If all files are just ready to be closed, or will be on this
182 : round, the string of delimiters must be preserved.
183 : delbuf[0] through delbuf[nfiles]
184 : store the delimiters for closed files. */
185 39 : char *delbuf = xmalloc (nfiles + 2);
186 :
187 : /* Streams open to the files to process; NULL if the corresponding
188 : stream is closed. */
189 39 : FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
190 :
191 : /* Number of files still open to process. */
192 : size_t files_open;
193 :
194 : /* True if any fopen got fd == STDIN_FILENO. */
195 39 : bool opened_stdin = false;
196 :
197 : /* Attempt to open all files. This could be expanded to an infinite
198 : number of files, but at the (considerable) expense of remembering
199 : each file and its current offset, then opening/reading/closing. */
200 :
201 95 : for (files_open = 0; files_open < nfiles; ++files_open)
202 : {
203 63 : if (STREQ (fnamptr[files_open], "-"))
204 : {
205 42 : have_read_stdin = true;
206 42 : fileptr[files_open] = stdin;
207 : }
208 : else
209 : {
210 21 : fileptr[files_open] = fopen (fnamptr[files_open], "r");
211 21 : if (fileptr[files_open] == NULL)
212 7 : error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]);
213 14 : else if (fileno (fileptr[files_open]) == STDIN_FILENO)
214 0 : opened_stdin = true;
215 : }
216 : }
217 :
218 32 : if (opened_stdin && have_read_stdin)
219 0 : error (EXIT_FAILURE, 0, _("standard input is closed"));
220 :
221 : /* Read a line from each file and output it to stdout separated by a
222 : delimiter, until we go through the loop without successfully
223 : reading from any of the files. */
224 :
225 295 : while (files_open)
226 : {
227 : /* Set up for the next line. */
228 231 : bool somedone = false;
229 231 : char const *delimptr = delims;
230 231 : size_t delims_saved = 0; /* Number of delims saved in `delbuf'. */
231 : size_t i;
232 :
233 573 : for (i = 0; i < nfiles && files_open; i++)
234 : {
235 : int chr IF_LINT (= 0); /* Input character. */
236 : int err IF_LINT (= 0); /* Input errno value. */
237 342 : size_t line_length = 0; /* Number of chars in line. */
238 :
239 342 : if (fileptr[i])
240 : {
241 318 : chr = getc (fileptr[i]);
242 318 : err = errno;
243 318 : if (chr != EOF && delims_saved)
244 : {
245 17 : if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
246 0 : write_error ();
247 17 : delims_saved = 0;
248 : }
249 :
250 643 : while (chr != EOF)
251 : {
252 272 : line_length++;
253 272 : if (chr == '\n')
254 265 : break;
255 7 : xputchar (chr);
256 7 : chr = getc (fileptr[i]);
257 7 : err = errno;
258 : }
259 : }
260 :
261 342 : if (line_length == 0)
262 : {
263 : /* EOF, read error, or closed file.
264 : If an EOF or error, close the file. */
265 76 : if (fileptr[i])
266 : {
267 52 : if (ferror (fileptr[i]))
268 : {
269 10 : error (0, err, "%s", fnamptr[i]);
270 10 : ok = false;
271 : }
272 52 : if (fileptr[i] == stdin)
273 41 : clearerr (fileptr[i]); /* Also clear EOF. */
274 11 : else if (fclose (fileptr[i]) == EOF)
275 : {
276 0 : error (0, errno, "%s", fnamptr[i]);
277 0 : ok = false;
278 : }
279 :
280 52 : fileptr[i] = NULL;
281 52 : files_open--;
282 : }
283 :
284 76 : if (i + 1 == nfiles)
285 : {
286 : /* End of this output line.
287 : Is this the end of the whole thing? */
288 39 : if (somedone)
289 : {
290 : /* No. Some files were not closed for this line. */
291 12 : if (delims_saved)
292 : {
293 2 : if (fwrite (delbuf, 1, delims_saved, stdout)
294 : != delims_saved)
295 0 : write_error ();
296 2 : delims_saved = 0;
297 : }
298 12 : xputchar ('\n');
299 : }
300 39 : continue; /* Next read of files, or exit. */
301 : }
302 : else
303 : {
304 : /* Closed file; add delimiter to `delbuf'. */
305 37 : if (*delimptr != EMPTY_DELIM)
306 36 : delbuf[delims_saved++] = *delimptr;
307 37 : if (++delimptr == delim_end)
308 37 : delimptr = delims;
309 : }
310 : }
311 : else
312 : {
313 : /* Some data read. */
314 266 : somedone = true;
315 :
316 : /* Except for last file, replace last newline with delim. */
317 266 : if (i + 1 != nfiles)
318 : {
319 79 : if (chr != '\n' && chr != EOF)
320 0 : xputchar (chr);
321 79 : if (*delimptr != EMPTY_DELIM)
322 75 : xputchar (*delimptr);
323 79 : if (++delimptr == delim_end)
324 79 : delimptr = delims;
325 : }
326 : else
327 : {
328 : /* If the last line of the last file lacks a newline,
329 : print one anyhow. POSIX requires this. */
330 187 : char c = (chr == EOF ? '\n' : chr);
331 187 : xputchar (c);
332 : }
333 : }
334 : }
335 : }
336 32 : free (fileptr);
337 32 : free (delbuf);
338 32 : return ok;
339 : }
340 :
341 : /* Perform serial paste on the NFILES files named in FNAMPTR.
342 : Return true if no errors, false if one or more files could not be
343 : opened or read. */
344 :
345 : static bool
346 11 : paste_serial (size_t nfiles, char **fnamptr)
347 : {
348 11 : bool ok = true; /* false if open or read errors occur. */
349 : int charnew, charold; /* Current and previous char read. */
350 : char const *delimptr; /* Current delimiter char. */
351 : FILE *fileptr; /* Open for reading current file. */
352 :
353 23 : for (; nfiles; nfiles--, fnamptr++)
354 : {
355 : int saved_errno;
356 12 : bool is_stdin = STREQ (*fnamptr, "-");
357 12 : if (is_stdin)
358 : {
359 9 : have_read_stdin = true;
360 9 : fileptr = stdin;
361 : }
362 : else
363 : {
364 3 : fileptr = fopen (*fnamptr, "r");
365 3 : if (fileptr == NULL)
366 : {
367 1 : error (0, errno, "%s", *fnamptr);
368 1 : ok = false;
369 1 : continue;
370 : }
371 : }
372 :
373 11 : delimptr = delims; /* Set up for delimiter string. */
374 :
375 11 : charold = getc (fileptr);
376 11 : saved_errno = errno;
377 11 : if (charold != EOF)
378 : {
379 : /* `charold' is set up. Hit it!
380 : Keep reading characters, stashing them in `charnew';
381 : output `charold', converting to the appropriate delimiter
382 : character if needed. After the EOF, output `charold'
383 : if it's a newline; otherwise, output it and then a newline. */
384 :
385 81 : while ((charnew = getc (fileptr)) != EOF)
386 : {
387 : /* Process the old character. */
388 63 : if (charold == '\n')
389 : {
390 58 : if (*delimptr != EMPTY_DELIM)
391 51 : xputchar (*delimptr);
392 :
393 58 : if (++delimptr == delim_end)
394 58 : delimptr = delims;
395 : }
396 : else
397 5 : xputchar (charold);
398 :
399 63 : charold = charnew;
400 : }
401 9 : saved_errno = errno;
402 :
403 : /* Hit EOF. Process that last character. */
404 9 : xputchar (charold);
405 : }
406 :
407 11 : if (charold != '\n')
408 3 : xputchar ('\n');
409 :
410 11 : if (ferror (fileptr))
411 : {
412 2 : error (0, saved_errno, "%s", *fnamptr);
413 2 : ok = false;
414 : }
415 11 : if (is_stdin)
416 9 : clearerr (fileptr); /* Also clear EOF. */
417 2 : else if (fclose (fileptr) == EOF)
418 : {
419 0 : error (0, errno, "%s", *fnamptr);
420 0 : ok = false;
421 : }
422 : }
423 11 : return ok;
424 : }
425 :
426 : void
427 11 : usage (int status)
428 : {
429 11 : if (status != EXIT_SUCCESS)
430 10 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
431 : program_name);
432 : else
433 : {
434 1 : printf (_("\
435 : Usage: %s [OPTION]... [FILE]...\n\
436 : "),
437 : program_name);
438 1 : fputs (_("\
439 : Write lines consisting of the sequentially corresponding lines from\n\
440 : each FILE, separated by TABs, to standard output.\n\
441 : With no FILE, or when FILE is -, read standard input.\n\
442 : \n\
443 : "), stdout);
444 1 : fputs (_("\
445 : Mandatory arguments to long options are mandatory for short options too.\n\
446 : "), stdout);
447 1 : fputs (_("\
448 : -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\
449 : -s, --serial paste one file at a time instead of in parallel\n\
450 : "), stdout);
451 1 : fputs (HELP_OPTION_DESCRIPTION, stdout);
452 1 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
453 : /* FIXME: add a couple of examples. */
454 1 : emit_bug_reporting_address ();
455 : }
456 11 : exit (status);
457 : }
458 :
459 : int
460 80 : main (int argc, char **argv)
461 : {
462 : int optc;
463 : bool ok;
464 80 : char const *delim_arg = "\t";
465 :
466 : initialize_main (&argc, &argv);
467 80 : program_name = argv[0];
468 80 : setlocale (LC_ALL, "");
469 : bindtextdomain (PACKAGE, LOCALEDIR);
470 : textdomain (PACKAGE);
471 :
472 80 : atexit (close_stdout);
473 :
474 80 : have_read_stdin = false;
475 80 : serial_merge = false;
476 :
477 202 : while ((optc = getopt_long (argc, argv, "d:s", longopts, NULL)) != -1)
478 : {
479 54 : switch (optc)
480 : {
481 30 : case 'd':
482 : /* Delimiter character(s). */
483 30 : delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
484 30 : break;
485 :
486 12 : case 's':
487 12 : serial_merge = true;
488 12 : break;
489 :
490 1 : case_GETOPT_HELP_CHAR;
491 :
492 1 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
493 :
494 10 : default:
495 10 : usage (EXIT_FAILURE);
496 : }
497 : }
498 :
499 68 : if (optind == argc)
500 34 : argv[argc++] = "-";
501 :
502 68 : if (collapse_escapes (delim_arg))
503 : {
504 : /* Don't use the default quoting style, because that would double the
505 : number of displayed backslashes, making the diagnostic look bogus. */
506 18 : set_quoting_style (NULL, escape_quoting_style);
507 18 : error (EXIT_FAILURE, 0,
508 : _("delimiter list ends with an unescaped backslash: %s"),
509 : quotearg_colon (delim_arg));
510 : }
511 :
512 50 : if (!serial_merge)
513 39 : ok = paste_parallel (argc - optind, &argv[optind]);
514 : else
515 11 : ok = paste_serial (argc - optind, &argv[optind]);
516 :
517 43 : free (delims);
518 :
519 43 : if (have_read_stdin && fclose (stdin) == EOF)
520 0 : error (EXIT_FAILURE, errno, "-");
521 43 : exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
522 : }
|