LCOV - code coverage report
Current view: top level - src - split.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 180 227 79.3 %
Date: 2018-01-30 Functions: 6 7 85.7 %

          Line data    Source code
       1             : /* split.c -- split a file into pieces.
       2             :    Copyright (C) 1988, 1991, 1995-2008 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16             : 
      17             : /* By tege@sics.se, with rms.
      18             : 
      19             :    To do:
      20             :    * Implement -t CHAR or -t REGEX to specify break characters other
      21             :      than newline. */
      22             : 
      23             : #include <config.h>
      24             : 
      25             : #include <stdio.h>
      26             : #include <getopt.h>
      27             : #include <sys/types.h>
      28             : 
      29             : #include "system.h"
      30             : #include "error.h"
      31             : #include "fd-reopen.h"
      32             : #include "fcntl--.h"
      33             : #include "full-read.h"
      34             : #include "full-write.h"
      35             : #include "inttostr.h"
      36             : #include "quote.h"
      37             : #include "safe-read.h"
      38             : #include "xstrtol.h"
      39             : 
      40             : /* The official name of this program (e.g., no `g' prefix).  */
      41             : #define PROGRAM_NAME "split"
      42             : 
      43             : #define AUTHORS "Torbjorn Granlund", "Richard M. Stallman"
      44             : 
      45             : #define DEFAULT_SUFFIX_LENGTH 2
      46             : 
      47             : /* The name this program was run with. */
      48             : char *program_name;
      49             : 
      50             : /* Base name of output files.  */
      51             : static char const *outbase;
      52             : 
      53             : /* Name of output files.  */
      54             : static char *outfile;
      55             : 
      56             : /* Pointer to the end of the prefix in OUTFILE.
      57             :    Suffixes are inserted here.  */
      58             : static char *outfile_mid;
      59             : 
      60             : /* Length of OUTFILE's suffix.  */
      61             : static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
      62             : 
      63             : /* Alphabet of characters to use in suffix.  */
      64             : static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
      65             : 
      66             : /* Name of input file.  May be "-".  */
      67             : static char *infile;
      68             : 
      69             : /* Descriptor on which output file is open.  */
      70             : static int output_desc;
      71             : 
      72             : /* If true, print a diagnostic on standard error just before each
      73             :    output file is opened. */
      74             : static bool verbose;
      75             : 
      76             : /* For long options that have no equivalent short option, use a
      77             :    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
      78             : enum
      79             : {
      80             :   VERBOSE_OPTION = CHAR_MAX + 1
      81             : };
      82             : 
      83             : static struct option const longopts[] =
      84             : {
      85             :   {"bytes", required_argument, NULL, 'b'},
      86             :   {"lines", required_argument, NULL, 'l'},
      87             :   {"line-bytes", required_argument, NULL, 'C'},
      88             :   {"suffix-length", required_argument, NULL, 'a'},
      89             :   {"numeric-suffixes", no_argument, NULL, 'd'},
      90             :   {"verbose", no_argument, NULL, VERBOSE_OPTION},
      91             :   {GETOPT_HELP_OPTION_DECL},
      92             :   {GETOPT_VERSION_OPTION_DECL},
      93             :   {NULL, 0, NULL, 0}
      94             : };
      95             : 
      96             : void
      97          47 : usage (int status)
      98             : {
      99          47 :   if (status != EXIT_SUCCESS)
     100          46 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     101             :              program_name);
     102             :   else
     103             :     {
     104           1 :       printf (_("\
     105             : Usage: %s [OPTION] [INPUT [PREFIX]]\n\
     106             : "),
     107             :               program_name);
     108           1 :     fputs (_("\
     109             : Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
     110             : size is 1000 lines, and default PREFIX is `x'.  With no INPUT, or when INPUT\n\
     111             : is -, read standard input.\n\
     112             : \n\
     113             : "), stdout);
     114           1 :       fputs (_("\
     115             : Mandatory arguments to long options are mandatory for short options too.\n\
     116             : "), stdout);
     117           1 :       fprintf (stdout, _("\
     118             :   -a, --suffix-length=N   use suffixes of length N (default %d)\n\
     119             :   -b, --bytes=SIZE        put SIZE bytes per output file\n\
     120             :   -C, --line-bytes=SIZE   put at most SIZE bytes of lines per output file\n\
     121             :   -d, --numeric-suffixes  use numeric suffixes instead of alphabetic\n\
     122             :   -l, --lines=NUMBER      put NUMBER lines per output file\n\
     123             : "), DEFAULT_SUFFIX_LENGTH);
     124           1 :       fputs (_("\
     125             :       --verbose           print a diagnostic just before each\n\
     126             :                             output file is opened\n\
     127             : "), stdout);
     128           1 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     129           1 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     130           1 :       fputs (_("\
     131             : \n\
     132             : SIZE may have a multiplier suffix:\n\
     133             : b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\
     134             : GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.\n\
     135             : "), stdout);
     136           1 :       emit_bug_reporting_address ();
     137             :     }
     138          47 :   exit (status);
     139             : }
     140             : 
     141             : /* Compute the next sequential output file name and store it into the
     142             :    string `outfile'.  */
     143             : 
     144             : static void
     145          41 : next_file_name (void)
     146             : {
     147             :   /* Index in suffix_alphabet of each character in the suffix.  */
     148             :   static size_t *sufindex;
     149             : 
     150          41 :   if (! outfile)
     151             :     {
     152             :       /* Allocate and initialize the first file name.  */
     153             : 
     154          26 :       size_t outbase_length = strlen (outbase);
     155          26 :       size_t outfile_length = outbase_length + suffix_length;
     156          26 :       if (outfile_length + 1 < outbase_length)
     157           0 :         xalloc_die ();
     158          26 :       outfile = xmalloc (outfile_length + 1);
     159          26 :       outfile_mid = outfile + outbase_length;
     160          26 :       memcpy (outfile, outbase, outbase_length);
     161          26 :       memset (outfile_mid, suffix_alphabet[0], suffix_length);
     162          26 :       outfile[outfile_length] = 0;
     163          26 :       sufindex = xcalloc (suffix_length, sizeof *sufindex);
     164             : 
     165             : #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
     166             :       /* POSIX requires that if the output file name is too long for
     167             :          its directory, `split' must fail without creating any files.
     168             :          This must be checked for explicitly on operating systems that
     169             :          silently truncate file names.  */
     170             :       {
     171             :         char *dir = dir_name (outfile);
     172             :         long name_max = pathconf (dir, _PC_NAME_MAX);
     173             :         if (0 <= name_max && name_max < base_len (last_component (outfile)))
     174             :           error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
     175             :         free (dir);
     176             :       }
     177             : #endif
     178             :     }
     179             :   else
     180             :     {
     181             :       /* Increment the suffix in place, if possible.  */
     182             : 
     183          15 :       size_t i = suffix_length;
     184          30 :       while (i-- != 0)
     185             :         {
     186          14 :           sufindex[i]++;
     187          14 :           outfile_mid[i] = suffix_alphabet[sufindex[i]];
     188          14 :           if (outfile_mid[i])
     189          14 :             return;
     190           0 :           sufindex[i] = 0;
     191           0 :           outfile_mid[i] = suffix_alphabet[sufindex[i]];
     192             :         }
     193           1 :       error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
     194             :     }
     195             : }
     196             : 
     197             : /* Write BYTES bytes at BP to an output file.
     198             :    If NEW_FILE_FLAG is true, open the next output file.
     199             :    Otherwise add to the same output file already in use.  */
     200             : 
     201             : static void
     202          41 : cwrite (bool new_file_flag, const char *bp, size_t bytes)
     203             : {
     204          41 :   if (new_file_flag)
     205             :     {
     206          41 :       if (output_desc >= 0 && close (output_desc) < 0)
     207           0 :         error (EXIT_FAILURE, errno, "%s", outfile);
     208             : 
     209          41 :       next_file_name ();
     210          40 :       if (verbose)
     211           1 :         fprintf (stdout, _("creating file %s\n"), quote (outfile));
     212          40 :       output_desc = open (outfile,
     213             :                           O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
     214             :                           (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
     215             :                            | S_IROTH | S_IWOTH));
     216          40 :       if (output_desc < 0)
     217           1 :         error (EXIT_FAILURE, errno, "%s", outfile);
     218             :     }
     219          39 :   if (full_write (output_desc, bp, bytes) != bytes)
     220           0 :     error (EXIT_FAILURE, errno, "%s", outfile);
     221          39 : }
     222             : 
     223             : /* Split into pieces of exactly N_BYTES bytes.
     224             :    Use buffer BUF, whose size is BUFSIZE.  */
     225             : 
     226             : static void
     227          10 : bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
     228             : {
     229             :   size_t n_read;
     230          10 :   bool new_file_flag = true;
     231             :   size_t to_read;
     232          10 :   uintmax_t to_write = n_bytes;
     233             :   char *bp_out;
     234             : 
     235             :   do
     236             :     {
     237          10 :       n_read = full_read (STDIN_FILENO, buf, bufsize);
     238          10 :       if (n_read == SAFE_READ_ERROR)
     239           0 :         error (EXIT_FAILURE, errno, "%s", infile);
     240          10 :       bp_out = buf;
     241          10 :       to_read = n_read;
     242             :       for (;;)
     243             :         {
     244          26 :           if (to_read < to_write)
     245             :             {
     246          10 :               if (to_read)      /* do not write 0 bytes! */
     247             :                 {
     248           9 :                   cwrite (new_file_flag, bp_out, to_read);
     249           9 :                   to_write -= to_read;
     250           9 :                   new_file_flag = false;
     251             :                 }
     252          10 :               break;
     253             :             }
     254             :           else
     255             :             {
     256           8 :               size_t w = to_write;
     257           8 :               cwrite (new_file_flag, bp_out, w);
     258           8 :               bp_out += w;
     259           8 :               to_read -= w;
     260           8 :               new_file_flag = true;
     261           8 :               to_write = n_bytes;
     262             :             }
     263             :         }
     264             :     }
     265          10 :   while (n_read == bufsize);
     266          10 : }
     267             : 
     268             : /* Split into pieces of exactly N_LINES lines.
     269             :    Use buffer BUF, whose size is BUFSIZE.  */
     270             : 
     271             : static void
     272          18 : lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
     273             : {
     274             :   size_t n_read;
     275             :   char *bp, *bp_out, *eob;
     276          18 :   bool new_file_flag = true;
     277          18 :   uintmax_t n = 0;
     278             : 
     279             :   do
     280             :     {
     281          18 :       n_read = full_read (STDIN_FILENO, buf, bufsize);
     282          18 :       if (n_read == SAFE_READ_ERROR)
     283           0 :         error (EXIT_FAILURE, errno, "%s", infile);
     284          18 :       bp = bp_out = buf;
     285          18 :       eob = bp + n_read;
     286          18 :       *eob = '\n';
     287             :       for (;;)
     288             :         {
     289         240 :           bp = memchr (bp, '\n', eob - bp + 1);
     290         129 :           if (bp == eob)
     291             :             {
     292          17 :               if (eob != bp_out) /* do not write 0 bytes! */
     293             :                 {
     294          14 :                   size_t len = eob - bp_out;
     295          14 :                   cwrite (new_file_flag, bp_out, len);
     296          13 :                   new_file_flag = false;
     297             :                 }
     298          16 :               break;
     299             :             }
     300             : 
     301         112 :           ++bp;
     302         112 :           if (++n >= n_lines)
     303             :             {
     304          10 :               cwrite (new_file_flag, bp_out, bp - bp_out);
     305           9 :               bp_out = bp;
     306           9 :               new_file_flag = true;
     307           9 :               n = 0;
     308             :             }
     309             :         }
     310             :     }
     311          16 :   while (n_read == bufsize);
     312          16 : }
     313             : 
     314             : /* Split into pieces that are as large as possible while still not more
     315             :    than N_BYTES bytes, and are split on line boundaries except
     316             :    where lines longer than N_BYTES bytes occur.
     317             :    FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
     318             :    buffer of size N_BYTES, in case N_BYTES is very large.  */
     319             : 
     320             : static void
     321           0 : line_bytes_split (size_t n_bytes)
     322             : {
     323             :   size_t n_read;
     324             :   char *bp;
     325           0 :   bool eof = false;
     326           0 :   size_t n_buffered = 0;
     327           0 :   char *buf = xmalloc (n_bytes);
     328             : 
     329             :   do
     330             :     {
     331             :       /* Fill up the full buffer size from the input file.  */
     332             : 
     333           0 :       n_read = full_read (STDIN_FILENO, buf + n_buffered, n_bytes - n_buffered);
     334           0 :       if (n_read == SAFE_READ_ERROR)
     335           0 :         error (EXIT_FAILURE, errno, "%s", infile);
     336             : 
     337           0 :       n_buffered += n_read;
     338           0 :       if (n_buffered != n_bytes)
     339             :         {
     340           0 :           if (n_buffered == 0)
     341           0 :             break;
     342           0 :           eof = true;
     343             :         }
     344             : 
     345             :       /* Find where to end this chunk.  */
     346           0 :       bp = buf + n_buffered;
     347           0 :       if (n_buffered == n_bytes)
     348             :         {
     349           0 :           while (bp > buf && bp[-1] != '\n')
     350           0 :             bp--;
     351             :         }
     352             : 
     353             :       /* If chunk has no newlines, use all the chunk.  */
     354           0 :       if (bp == buf)
     355           0 :         bp = buf + n_buffered;
     356             : 
     357             :       /* Output the chars as one output file.  */
     358           0 :       cwrite (true, buf, bp - buf);
     359             : 
     360             :       /* Discard the chars we just output; move rest of chunk
     361             :          down to be the start of the next chunk.  Source and
     362             :          destination probably overlap.  */
     363           0 :       n_buffered -= bp - buf;
     364           0 :       if (n_buffered > 0)
     365           0 :         memmove (buf, bp, n_buffered);
     366             :     }
     367           0 :   while (!eof);
     368           0 :   free (buf);
     369           0 : }
     370             : 
     371             : #define FAIL_ONLY_ONE_WAY()                                     \
     372             :   do                                                            \
     373             :     {                                                           \
     374             :       error (0, 0, _("cannot split in more than one way"));   \
     375             :       usage (EXIT_FAILURE);                                     \
     376             :     }                                                           \
     377             :   while (0)
     378             : 
     379             : int
     380          79 : main (int argc, char **argv)
     381             : {
     382             :   struct stat stat_buf;
     383             :   enum
     384             :     {
     385             :       type_undef, type_bytes, type_byteslines, type_lines, type_digits
     386          79 :     } split_type = type_undef;
     387             :   size_t in_blk_size;           /* optimal block size of input file device */
     388             :   char *buf;                    /* file i/o buffer */
     389          79 :   size_t page_size = getpagesize ();
     390             :   uintmax_t n_units;
     391             :   static char const multipliers[] = "bEGKkMmPTYZ0";
     392             :   int c;
     393          79 :   int digits_optind = 0;
     394             : 
     395             :   initialize_main (&argc, &argv);
     396          79 :   program_name = argv[0];
     397          79 :   setlocale (LC_ALL, "");
     398             :   bindtextdomain (PACKAGE, LOCALEDIR);
     399             :   textdomain (PACKAGE);
     400             : 
     401          79 :   atexit (close_stdout);
     402             : 
     403             :   /* Parse command line options.  */
     404             : 
     405          79 :   infile = "-";
     406          79 :   outbase = "x";
     407             : 
     408             :   while (1)
     409          26 :     {
     410             :       /* This is the argv-index of the option we will read next.  */
     411         105 :       int this_optind = optind ? optind : 1;
     412             : 
     413         105 :       c = getopt_long (argc, argv, "0123456789C:a:b:dl:", longopts, NULL);
     414         105 :       if (c == -1)
     415          50 :         break;
     416             : 
     417          55 :       switch (c)
     418             :         {
     419           7 :         case 'a':
     420             :           {
     421             :             unsigned long tmp;
     422           7 :             if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
     423           2 :                 || SIZE_MAX / sizeof (size_t) < tmp)
     424             :               {
     425           5 :                 error (0, 0, _("%s: invalid suffix length"), optarg);
     426           5 :                 usage (EXIT_FAILURE);
     427             :               }
     428           2 :             suffix_length = tmp;
     429             :           }
     430           2 :           break;
     431             : 
     432          21 :         case 'b':
     433          21 :           if (split_type != type_undef)
     434           0 :             FAIL_ONLY_ONE_WAY ();
     435          21 :           split_type = type_bytes;
     436          21 :           if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
     437          12 :               || n_units == 0)
     438             :             {
     439          10 :               error (0, 0, _("%s: invalid number of bytes"), optarg);
     440          10 :               usage (EXIT_FAILURE);
     441             :             }
     442          11 :           break;
     443             : 
     444           1 :         case 'l':
     445           1 :           if (split_type != type_undef)
     446           0 :             FAIL_ONLY_ONE_WAY ();
     447           1 :           split_type = type_lines;
     448           1 :           if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
     449           0 :               || n_units == 0)
     450             :             {
     451           1 :               error (0, 0, _("%s: invalid number of lines"), optarg);
     452           1 :               usage (EXIT_FAILURE);
     453             :             }
     454           0 :           break;
     455             : 
     456           2 :         case 'C':
     457           2 :           if (split_type != type_undef)
     458           1 :             FAIL_ONLY_ONE_WAY ();
     459           1 :           split_type = type_byteslines;
     460           1 :           if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
     461           0 :               || n_units == 0 || SIZE_MAX < n_units)
     462             :             {
     463           1 :               error (0, 0, _("%s: invalid number of bytes"), optarg);
     464           1 :               usage (EXIT_FAILURE);
     465             :             }
     466           0 :           break;
     467             : 
     468          12 :         case '0':
     469             :         case '1':
     470             :         case '2':
     471             :         case '3':
     472             :         case '4':
     473             :         case '5':
     474             :         case '6':
     475             :         case '7':
     476             :         case '8':
     477             :         case '9':
     478          12 :           if (split_type == type_undef)
     479             :             {
     480           8 :               split_type = type_digits;
     481           8 :               n_units = 0;
     482             :             }
     483          12 :           if (split_type != type_undef && split_type != type_digits)
     484           1 :             FAIL_ONLY_ONE_WAY ();
     485          11 :           if (digits_optind != 0 && digits_optind != this_optind)
     486           1 :             n_units = 0;        /* More than one number given; ignore other. */
     487          11 :           digits_optind = this_optind;
     488          11 :           if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
     489             :             {
     490             :               char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
     491           0 :               error (EXIT_FAILURE, 0,
     492             :                      _("line count option -%s%c... is too large"),
     493             :                      umaxtostr (n_units, buffer), c);
     494             :             }
     495          11 :           break;
     496             : 
     497           1 :         case 'd':
     498           1 :           suffix_alphabet = "0123456789";
     499           1 :           break;
     500             : 
     501           1 :         case VERBOSE_OPTION:
     502           1 :           verbose = true;
     503           1 :           break;
     504             : 
     505           1 :         case_GETOPT_HELP_CHAR;
     506             : 
     507           1 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     508             : 
     509           8 :         default:
     510           8 :           usage (EXIT_FAILURE);
     511             :         }
     512             :     }
     513             : 
     514             :   /* Handle default case.  */
     515          50 :   if (split_type == type_undef)
     516             :     {
     517          33 :       split_type = type_lines;
     518          33 :       n_units = 1000;
     519             :     }
     520             : 
     521          50 :   if (n_units == 0)
     522             :     {
     523           5 :       error (0, 0, _("invalid number of lines: 0"));
     524           5 :       usage (EXIT_FAILURE);
     525             :     }
     526             : 
     527             :   /* Get out the filename arguments.  */
     528             : 
     529          45 :   if (optind < argc)
     530          30 :     infile = argv[optind++];
     531             : 
     532          45 :   if (optind < argc)
     533          26 :     outbase = argv[optind++];
     534             : 
     535          45 :   if (optind < argc)
     536             :     {
     537          14 :       error (0, 0, _("extra operand %s"), quote (argv[optind]));
     538          14 :       usage (EXIT_FAILURE);
     539             :     }
     540             : 
     541             :   /* Open the input file.  */
     542          31 :   if (! STREQ (infile, "-")
     543           7 :       && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
     544           3 :     error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
     545             :            quote (infile));
     546             : 
     547             :   /* Binary I/O is safer when bytecounts are used.  */
     548             :   if (O_BINARY && ! isatty (STDIN_FILENO))
     549             :     freopen (NULL, "rb", stdin);
     550             : 
     551             :   /* No output file is open now.  */
     552          28 :   output_desc = -1;
     553             : 
     554             :   /* Get the optimal block size of input device and make a buffer.  */
     555             : 
     556          28 :   if (fstat (STDIN_FILENO, &stat_buf) != 0)
     557           0 :     error (EXIT_FAILURE, errno, "%s", infile);
     558          28 :   in_blk_size = ST_BLKSIZE (stat_buf);
     559             : 
     560          28 :   buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
     561             : 
     562          28 :   switch (split_type)
     563             :     {
     564          18 :     case type_digits:
     565             :     case type_lines:
     566          18 :       lines_split (n_units, buf, in_blk_size);
     567          16 :       break;
     568             : 
     569          10 :     case type_bytes:
     570          10 :       bytes_split (n_units, buf, in_blk_size);
     571          10 :       break;
     572             : 
     573           0 :     case type_byteslines:
     574           0 :       line_bytes_split (n_units);
     575           0 :       break;
     576             : 
     577           0 :     default:
     578           0 :       abort ();
     579             :     }
     580             : 
     581          26 :   if (close (STDIN_FILENO) != 0)
     582           0 :     error (EXIT_FAILURE, errno, "%s", infile);
     583          26 :   if (output_desc >= 0 && close (output_desc) < 0)
     584           0 :     error (EXIT_FAILURE, errno, "%s", outfile);
     585             : 
     586          26 :   exit (EXIT_SUCCESS);
     587             : }

Generated by: LCOV version 1.10