Line data Source code
1 : /* tac - concatenate and print files in reverse
2 : Copyright (C) 1988-1991, 1995-2006 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 : GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
19 :
20 : /* Copy each FILE, or the standard input if none are given or when a
21 : FILE name of "-" is encountered, to the standard output with the
22 : order of the records reversed. The records are separated by
23 : instances of a string, or a newline if none is given. By default, the
24 : separator string is attached to the end of the record that it
25 : follows in the file.
26 :
27 : Options:
28 : -b, --before The separator is attached to the beginning
29 : of the record that it precedes in the file.
30 : -r, --regex The separator is a regular expression.
31 : -s, --separator=separator Use SEPARATOR as the record separator.
32 :
33 : To reverse a file byte by byte, use (in bash, ksh, or sh):
34 : tac -r -s '.\|
35 : ' file */
36 :
37 : #include <config.h>
38 :
39 : #include <stdio.h>
40 : #include <getopt.h>
41 : #include <sys/types.h>
42 : #include "system.h"
43 :
44 : #include <regex.h>
45 :
46 : #include "error.h"
47 : #include "quote.h"
48 : #include "quotearg.h"
49 : #include "safe-read.h"
50 : #include "stdlib--.h"
51 :
52 : /* The official name of this program (e.g., no `g' prefix). */
53 : #define PROGRAM_NAME "tac"
54 :
55 : #define AUTHORS "Jay Lepreau", "David MacKenzie"
56 :
57 : #if defined __MSDOS__ || defined _WIN32
58 : /* Define this to non-zero on systems for which the regular mechanism
59 : (of unlinking an open file and expecting to be able to write, seek
60 : back to the beginning, then reread it) doesn't work. E.g., on Windows
61 : and DOS systems. */
62 : # define DONT_UNLINK_WHILE_OPEN 1
63 : #endif
64 :
65 :
66 : #ifndef DEFAULT_TMPDIR
67 : # define DEFAULT_TMPDIR "/tmp"
68 : #endif
69 :
70 : /* The number of bytes per atomic read. */
71 : #define INITIAL_READSIZE 8192
72 :
73 : /* The number of bytes per atomic write. */
74 : #define WRITESIZE 8192
75 :
76 : /* The name this program was run with. */
77 : char *program_name;
78 :
79 : /* The string that separates the records of the file. */
80 : static char const *separator;
81 :
82 : /* True if we have ever read standard input. */
83 : static bool have_read_stdin = false;
84 :
85 : /* If true, print `separator' along with the record preceding it
86 : in the file; otherwise with the record following it. */
87 : static bool separator_ends_record;
88 :
89 : /* 0 if `separator' is to be matched as a regular expression;
90 : otherwise, the length of `separator', used as a sentinel to
91 : stop the search. */
92 : static size_t sentinel_length;
93 :
94 : /* The length of a match with `separator'. If `sentinel_length' is 0,
95 : `match_length' is computed every time a match succeeds;
96 : otherwise, it is simply the length of `separator'. */
97 : static size_t match_length;
98 :
99 : /* The input buffer. */
100 : static char *G_buffer;
101 :
102 : /* The number of bytes to read at once into `buffer'. */
103 : static size_t read_size;
104 :
105 : /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
106 : The extra 2 bytes allow `past_end' to have a value beyond the
107 : end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
108 : static size_t G_buffer_size;
109 :
110 : /* The compiled regular expression representing `separator'. */
111 : static struct re_pattern_buffer compiled_separator;
112 : static char compiled_separator_fastmap[UCHAR_MAX + 1];
113 :
114 : static struct option const longopts[] =
115 : {
116 : {"before", no_argument, NULL, 'b'},
117 : {"regex", no_argument, NULL, 'r'},
118 : {"separator", required_argument, NULL, 's'},
119 : {GETOPT_HELP_OPTION_DECL},
120 : {GETOPT_VERSION_OPTION_DECL},
121 : {NULL, 0, NULL, 0}
122 : };
123 :
124 : void
125 9 : usage (int status)
126 : {
127 9 : if (status != EXIT_SUCCESS)
128 8 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
129 : program_name);
130 : else
131 : {
132 1 : printf (_("\
133 : Usage: %s [OPTION]... [FILE]...\n\
134 : "),
135 : program_name);
136 1 : fputs (_("\
137 : Write each FILE to standard output, last line first.\n\
138 : With no FILE, or when FILE is -, read standard input.\n\
139 : \n\
140 : "), stdout);
141 1 : fputs (_("\
142 : Mandatory arguments to long options are mandatory for short options too.\n\
143 : "), stdout);
144 1 : fputs (_("\
145 : -b, --before attach the separator before instead of after\n\
146 : -r, --regex interpret the separator as a regular expression\n\
147 : -s, --separator=STRING use STRING as the separator instead of newline\n\
148 : "), stdout);
149 1 : fputs (HELP_OPTION_DESCRIPTION, stdout);
150 1 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 1 : emit_bug_reporting_address ();
152 : }
153 9 : exit (status);
154 : }
155 :
156 : /* Print the characters from START to PAST_END - 1.
157 : If START is NULL, just flush the buffer. */
158 :
159 : static void
160 243 : output (const char *start, const char *past_end)
161 : {
162 : static char buffer[WRITESIZE];
163 : static size_t bytes_in_buffer = 0;
164 243 : size_t bytes_to_add = past_end - start;
165 243 : size_t bytes_available = WRITESIZE - bytes_in_buffer;
166 :
167 243 : if (start == 0)
168 : {
169 80 : fwrite (buffer, 1, bytes_in_buffer, stdout);
170 80 : bytes_in_buffer = 0;
171 80 : return;
172 : }
173 :
174 : /* Write out as many full buffers as possible. */
175 326 : while (bytes_to_add >= bytes_available)
176 : {
177 0 : memcpy (buffer + bytes_in_buffer, start, bytes_available);
178 0 : bytes_to_add -= bytes_available;
179 0 : start += bytes_available;
180 0 : fwrite (buffer, 1, WRITESIZE, stdout);
181 0 : bytes_in_buffer = 0;
182 0 : bytes_available = WRITESIZE;
183 : }
184 :
185 163 : memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
186 163 : bytes_in_buffer += bytes_to_add;
187 : }
188 :
189 : /* Print in reverse the file open on descriptor FD for reading FILE.
190 : Return true if successful. */
191 :
192 : static bool
193 30 : tac_seekable (int input_fd, const char *file)
194 : {
195 : /* Pointer to the location in `G_buffer' where the search for
196 : the next separator will begin. */
197 : char *match_start;
198 :
199 : /* Pointer to one past the rightmost character in `G_buffer' that
200 : has not been printed yet. */
201 : char *past_end;
202 :
203 : /* Length of the record growing in `G_buffer'. */
204 : size_t saved_record_size;
205 :
206 : /* Offset in the file of the next read. */
207 : off_t file_pos;
208 :
209 : /* True if `output' has not been called yet for any file.
210 : Only used when the separator is attached to the preceding record. */
211 30 : bool first_time = true;
212 30 : char first_char = *separator; /* Speed optimization, non-regexp. */
213 30 : char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
214 30 : size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
215 : struct re_registers regs;
216 :
217 : /* Find the size of the input file. */
218 30 : file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
219 30 : if (file_pos < 1)
220 0 : return true; /* It's an empty file. */
221 :
222 : /* Arrange for the first read to lop off enough to leave the rest of the
223 : file a multiple of `read_size'. Since `read_size' can change, this may
224 : not always hold during the program run, but since it usually will, leave
225 : it here for i/o efficiency (page/sector boundaries and all that).
226 : Note: the efficiency gain has not been verified. */
227 30 : saved_record_size = file_pos % read_size;
228 30 : if (saved_record_size == 0)
229 0 : saved_record_size = read_size;
230 30 : file_pos -= saved_record_size;
231 : /* `file_pos' now points to the start of the last (probably partial) block
232 : in the input file. */
233 :
234 30 : if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235 0 : error (0, errno, _("%s: seek failed"), quotearg_colon (file));
236 :
237 30 : if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
238 : {
239 3 : error (0, errno, _("%s: read error"), quotearg_colon (file));
240 3 : return false;
241 : }
242 :
243 27 : match_start = past_end = G_buffer + saved_record_size;
244 : /* For non-regexp search, move past impossible positions for a match. */
245 27 : if (sentinel_length)
246 12 : match_start -= match_length1;
247 :
248 : for (;;)
249 : {
250 : /* Search backward from `match_start' - 1 to `G_buffer' for a match
251 : with `separator'; for speed, use strncmp if `separator' contains no
252 : metacharacters.
253 : If the match succeeds, set `match_start' to point to the start of
254 : the match and `match_length' to the length of the match.
255 : Otherwise, make `match_start' < `G_buffer'. */
256 319 : if (sentinel_length == 0)
257 : {
258 94 : size_t i = match_start - G_buffer;
259 94 : regoff_t ri = i;
260 94 : regoff_t range = 1 - ri;
261 : regoff_t ret;
262 :
263 94 : if (1 < range)
264 0 : error (EXIT_FAILURE, 0, _("record too large"));
265 :
266 94 : if (range == 1
267 84 : || ((ret = re_search (&compiled_separator, G_buffer,
268 : i, i - 1, range, ®s))
269 : == -1))
270 15 : match_start = G_buffer - 1;
271 79 : else if (ret == -2)
272 : {
273 0 : error (EXIT_FAILURE, 0,
274 : _("error in regular expression search"));
275 : }
276 : else
277 : {
278 79 : match_start = G_buffer + regs.start[0];
279 79 : match_length = regs.end[0] - regs.start[0];
280 : }
281 : }
282 : else
283 : {
284 : /* `match_length' is constant for non-regexp boundaries. */
285 79 : while (*--match_start != first_char
286 81 : || (match_length1 && strncmp (match_start + 1, separator1,
287 : match_length1)))
288 : /* Do nothing. */ ;
289 : }
290 :
291 : /* Check whether we backed off the front of `G_buffer' without finding
292 : a match for `separator'. */
293 173 : if (match_start < G_buffer)
294 : {
295 27 : if (file_pos == 0)
296 : {
297 : /* Hit the beginning of the file; print the remaining record. */
298 27 : output (G_buffer, past_end);
299 27 : return true;
300 : }
301 :
302 0 : saved_record_size = past_end - G_buffer;
303 0 : if (saved_record_size > read_size)
304 : {
305 : /* `G_buffer_size' is about twice `read_size', so since
306 : we want to read in another `read_size' bytes before
307 : the data already in `G_buffer', we need to increase
308 : `G_buffer_size'. */
309 : char *newbuffer;
310 0 : size_t offset = sentinel_length ? sentinel_length : 1;
311 0 : ptrdiff_t match_start_offset = match_start - G_buffer;
312 0 : ptrdiff_t past_end_offset = past_end - G_buffer;
313 0 : size_t old_G_buffer_size = G_buffer_size;
314 :
315 0 : read_size *= 2;
316 0 : G_buffer_size = read_size * 2 + sentinel_length + 2;
317 0 : if (G_buffer_size < old_G_buffer_size)
318 0 : xalloc_die ();
319 0 : newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
320 0 : newbuffer += offset;
321 : /* Adjust the pointers for the new buffer location. */
322 0 : match_start = newbuffer + match_start_offset;
323 0 : past_end = newbuffer + past_end_offset;
324 0 : G_buffer = newbuffer;
325 : }
326 :
327 : /* Back up to the start of the next bufferfull of the file. */
328 0 : if (file_pos >= read_size)
329 0 : file_pos -= read_size;
330 : else
331 : {
332 0 : read_size = file_pos;
333 0 : file_pos = 0;
334 : }
335 0 : if (lseek (input_fd, file_pos, SEEK_SET) < 0)
336 0 : error (0, errno, _("%s: seek failed"), quotearg_colon (file));
337 :
338 : /* Shift the pending record data right to make room for the new.
339 : The source and destination regions probably overlap. */
340 0 : memmove (G_buffer + read_size, G_buffer, saved_record_size);
341 0 : past_end = G_buffer + read_size + saved_record_size;
342 : /* For non-regexp searches, avoid unneccessary scanning. */
343 0 : if (sentinel_length)
344 0 : match_start = G_buffer + read_size;
345 : else
346 0 : match_start = past_end;
347 :
348 0 : if (safe_read (input_fd, G_buffer, read_size) != read_size)
349 : {
350 0 : error (0, errno, _("%s: read error"), quotearg_colon (file));
351 0 : return false;
352 : }
353 : }
354 : else
355 : {
356 : /* Found a match of `separator'. */
357 146 : if (separator_ends_record)
358 : {
359 138 : char *match_end = match_start + match_length;
360 :
361 : /* If this match of `separator' isn't at the end of the
362 : file, print the record. */
363 138 : if (!first_time || match_end != past_end)
364 128 : output (match_end, past_end);
365 138 : past_end = match_end;
366 138 : first_time = false;
367 : }
368 : else
369 : {
370 8 : output (match_start, past_end);
371 8 : past_end = match_start;
372 : }
373 :
374 : /* For non-regex matching, we can back up. */
375 146 : if (sentinel_length > 0)
376 67 : match_start -= match_length - 1;
377 : }
378 : }
379 : }
380 :
381 : #if DONT_UNLINK_WHILE_OPEN
382 :
383 : /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384 : Using atexit like this is wrong, since it can fail
385 : when called e.g. 32 or more times.
386 : But this isn't a big deal, since the code is used only on WOE/DOS
387 : systems, and few people invoke tac on that many nonseekable files. */
388 :
389 : static const char *file_to_remove;
390 : static FILE *fp_to_close;
391 :
392 : static void
393 : unlink_tempfile (void)
394 : {
395 : fclose (fp_to_close);
396 : unlink (file_to_remove);
397 : }
398 :
399 : static void
400 : record_or_unlink_tempfile (char const *fn, FILE *fp)
401 : {
402 : if (!file_to_remove)
403 : {
404 : file_to_remove = fn;
405 : fp_to_close = fp;
406 : atexit (unlink_tempfile);
407 : }
408 : }
409 :
410 : #else
411 :
412 : static void
413 45 : record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
414 : {
415 45 : unlink (fn);
416 45 : }
417 :
418 : #endif
419 :
420 : /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
421 : a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
422 : and file name. Return true if successful. */
423 :
424 : static bool
425 55 : copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
426 : {
427 : static char *template = NULL;
428 : static char const *tempdir;
429 : char *tempfile;
430 : FILE *tmp;
431 : int fd;
432 :
433 55 : if (template == NULL)
434 : {
435 45 : char const * const Template = "%s/tacXXXXXX";
436 45 : tempdir = getenv ("TMPDIR");
437 45 : if (tempdir == NULL)
438 45 : tempdir = DEFAULT_TMPDIR;
439 :
440 : /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
441 45 : template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
442 45 : sprintf (template, Template, tempdir);
443 : }
444 :
445 : /* FIXME: there's a small window between a successful mkstemp call
446 : and the unlink that's performed by record_or_unlink_tempfile.
447 : If we're interrupted in that interval, this code fails to remove
448 : the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
449 : the window is much larger -- it extends to the atexit-called
450 : unlink_tempfile.
451 : FIXME: clean up upon fatal signal. Don't block them, in case
452 : $TMPFILE is a remote file system. */
453 :
454 55 : tempfile = template;
455 55 : fd = mkstemp (template);
456 55 : if (fd < 0)
457 : {
458 10 : error (0, errno, _("cannot create temporary file %s"), quote (tempfile));
459 10 : return false;
460 : }
461 :
462 45 : tmp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
463 45 : if (! tmp)
464 : {
465 0 : error (0, errno, _("cannot open %s for writing"), quote (tempfile));
466 0 : close (fd);
467 0 : unlink (tempfile);
468 0 : return false;
469 : }
470 :
471 45 : record_or_unlink_tempfile (tempfile, tmp);
472 :
473 : while (1)
474 45 : {
475 90 : size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
476 90 : if (bytes_read == 0)
477 0 : break;
478 90 : if (bytes_read == SAFE_READ_ERROR)
479 : {
480 45 : error (0, errno, _("%s: read error"), quotearg_colon (file));
481 45 : goto Fail;
482 : }
483 :
484 45 : if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
485 : {
486 0 : error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
487 0 : goto Fail;
488 : }
489 : }
490 :
491 0 : if (fflush (tmp) != 0)
492 : {
493 0 : error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
494 0 : goto Fail;
495 : }
496 :
497 0 : *g_tmp = tmp;
498 0 : *g_tempfile = tempfile;
499 0 : return true;
500 :
501 45 : Fail:
502 45 : fclose (tmp);
503 45 : return false;
504 : }
505 :
506 : /* Copy INPUT_FD to a temporary, then tac that file.
507 : Return true if successful. */
508 :
509 : static bool
510 55 : tac_nonseekable (int input_fd, const char *file)
511 : {
512 : FILE *tmp_stream;
513 : char *tmp_file;
514 55 : return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)
515 55 : && tac_seekable (fileno (tmp_stream), tmp_file));
516 : }
517 :
518 : /* Print FILE in reverse, copying it to a temporary
519 : file first if it is not seekable.
520 : Return true if successful. */
521 :
522 : static bool
523 100 : tac_file (const char *filename)
524 : {
525 : bool ok;
526 : off_t file_size;
527 : int fd;
528 100 : bool is_stdin = STREQ (filename, "-");
529 :
530 100 : if (is_stdin)
531 : {
532 81 : have_read_stdin = true;
533 81 : fd = STDIN_FILENO;
534 81 : filename = _("standard input");
535 : if (O_BINARY && ! isatty (STDIN_FILENO))
536 : freopen (NULL, "rb", stdin);
537 : }
538 : else
539 : {
540 19 : fd = open (filename, O_RDONLY | O_BINARY);
541 19 : if (fd < 0)
542 : {
543 15 : error (0, errno, _("cannot open %s for reading"), quote (filename));
544 15 : return false;
545 : }
546 : }
547 :
548 85 : file_size = lseek (fd, (off_t) 0, SEEK_END);
549 :
550 115 : ok = (file_size < 0 || isatty (fd)
551 55 : ? tac_nonseekable (fd, filename)
552 140 : : tac_seekable (fd, filename));
553 :
554 85 : if (!is_stdin && close (fd) != 0)
555 : {
556 0 : error (0, errno, _("%s: read error"), quotearg_colon (filename));
557 0 : ok = false;
558 : }
559 85 : return ok;
560 : }
561 :
562 : int
563 122 : main (int argc, char **argv)
564 : {
565 : const char *error_message; /* Return value from re_compile_pattern. */
566 : int optc;
567 : bool ok;
568 : size_t half_buffer_size;
569 :
570 : /* Initializer for file_list if no file-arguments
571 : were specified on the command line. */
572 : static char const *const default_file_list[] = {"-", NULL};
573 : char const *const *file;
574 :
575 : initialize_main (&argc, &argv);
576 122 : program_name = argv[0];
577 122 : setlocale (LC_ALL, "");
578 : bindtextdomain (PACKAGE, LOCALEDIR);
579 : textdomain (PACKAGE);
580 :
581 122 : atexit (close_stdout);
582 :
583 122 : separator = "\n";
584 122 : sentinel_length = 1;
585 122 : separator_ends_record = true;
586 :
587 461 : while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
588 : {
589 230 : switch (optc)
590 : {
591 63 : case 'b':
592 63 : separator_ends_record = false;
593 63 : break;
594 79 : case 'r':
595 79 : sentinel_length = 0;
596 79 : break;
597 78 : case 's':
598 78 : separator = optarg;
599 78 : if (*separator == 0)
600 3 : error (EXIT_FAILURE, 0, _("separator cannot be empty"));
601 75 : break;
602 1 : case_GETOPT_HELP_CHAR;
603 1 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
604 8 : default:
605 8 : usage (EXIT_FAILURE);
606 : }
607 : }
608 :
609 109 : if (sentinel_length == 0)
610 : {
611 79 : compiled_separator.buffer = NULL;
612 79 : compiled_separator.allocated = 0;
613 79 : compiled_separator.fastmap = compiled_separator_fastmap;
614 79 : compiled_separator.translate = NULL;
615 79 : error_message = re_compile_pattern (separator, strlen (separator),
616 : &compiled_separator);
617 79 : if (error_message)
618 29 : error (EXIT_FAILURE, 0, "%s", error_message);
619 : }
620 : else
621 30 : match_length = sentinel_length = strlen (separator);
622 :
623 80 : read_size = INITIAL_READSIZE;
624 160 : while (sentinel_length >= read_size / 2)
625 : {
626 0 : if (SIZE_MAX / 2 < read_size)
627 0 : xalloc_die ();
628 0 : read_size *= 2;
629 : }
630 80 : half_buffer_size = read_size + sentinel_length + 1;
631 80 : G_buffer_size = 2 * half_buffer_size;
632 80 : if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
633 0 : xalloc_die ();
634 80 : G_buffer = xmalloc (G_buffer_size);
635 80 : if (sentinel_length)
636 : {
637 30 : strcpy (G_buffer, separator);
638 30 : G_buffer += sentinel_length;
639 : }
640 : else
641 : {
642 50 : ++G_buffer;
643 : }
644 :
645 160 : file = (optind < argc
646 67 : ? (char const *const *) &argv[optind]
647 147 : : default_file_list);
648 :
649 : if (O_BINARY && ! isatty (STDOUT_FILENO))
650 : freopen (NULL, "wb", stdout);
651 :
652 : {
653 : size_t i;
654 80 : ok = true;
655 180 : for (i = 0; file[i]; ++i)
656 100 : ok &= tac_file (file[i]);
657 : }
658 :
659 : /* Flush the output buffer. */
660 80 : output ((char *) NULL, (char *) NULL);
661 :
662 80 : if (have_read_stdin && close (STDIN_FILENO) < 0)
663 0 : error (EXIT_FAILURE, errno, "-");
664 80 : exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
665 : }
|