LCOV - coreutils.info

LCOV - code coverage report

Current view:	top level - src - sort.c (source / functions)		Hit	Total	Coverage
Test:	coreutils.info	Lines:	359	1232	29.1 %
Date:	2018-01-30	Functions:	24	57	42.1 %

          Line data    Source code

       1             : /* sort - sort lines of text (with all kinds of options).
       2             :    Copyright (C) 1988, 1991-2008 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      16             : 
      17             :    Written December 1988 by Mike Haertel.
      18             :    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
      19             :    or (US mail) as Mike Haertel c/o Free Software Foundation.
      20             : 
      21             :    Ørn E. Hansen added NLS support in 1997.  */
      22             : 
      23             : #include <config.h>
      24             : 
      25             : #include <getopt.h>
      26             : #include <sys/types.h>
      27             : #include <sys/wait.h>
      28             : #include <signal.h>
      29             : #include "system.h"
      30             : #include "argmatch.h"
      31             : #include "error.h"
      32             : #include "hard-locale.h"
      33             : #include "hash.h"
      34             : #include "inttostr.h"
      35             : #include "md5.h"
      36             : #include "physmem.h"
      37             : #include "posixver.h"
      38             : #include "quote.h"
      39             : #include "randread.h"
      40             : #include "stdio--.h"
      41             : #include "stdlib--.h"
      42             : #include "strnumcmp.h"
      43             : #include "xmemcoll.h"
      44             : #include "xmemxfrm.h"
      45             : #include "xstrtol.h"
      46             : 
      47             : #if HAVE_SYS_RESOURCE_H
      48             : # include <sys/resource.h>
      49             : #endif
      50             : #ifndef RLIMIT_DATA
      51             : struct rlimit { size_t rlim_cur; };
      52             : # define getrlimit(Resource, Rlp) (-1)
      53             : #endif
      54             : 
      55             : /* The official name of this program (e.g., no `g' prefix).  */
      56             : #define PROGRAM_NAME "sort"
      57             : 
      58             : #define AUTHORS "Mike Haertel", "Paul Eggert"
      59             : 
      60             : #if HAVE_LANGINFO_CODESET
      61             : # include <langinfo.h>
      62             : #endif
      63             : 
      64             : /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
      65             :    present.  */
      66             : #ifndef SA_NOCLDSTOP
      67             : # define SA_NOCLDSTOP 0
      68             : /* No sigprocmask.  Always 'return' zero. */
      69             : # define sigprocmask(How, Set, Oset) (0)
      70             : # define sigset_t int
      71             : # if ! HAVE_SIGINTERRUPT
      72             : #  define siginterrupt(sig, flag) /* empty */
      73             : # endif
      74             : #endif
      75             : 
      76             : #ifndef STDC_HEADERS
      77             : double strtod ();
      78             : #endif
      79             : 
      80             : #define UCHAR_LIM (UCHAR_MAX + 1)
      81             : 
      82             : #ifndef DEFAULT_TMPDIR
      83             : # define DEFAULT_TMPDIR "/tmp"
      84             : #endif
      85             : 
      86             : /* Exit statuses.  */
      87             : enum
      88             :   {
      89             :     /* POSIX says to exit with status 1 if invoked with -c and the
      90             :        input is not properly sorted.  */
      91             :     SORT_OUT_OF_ORDER = 1,
      92             : 
      93             :     /* POSIX says any other irregular exit must exit with a status
      94             :        code greater than 1.  */
      95             :     SORT_FAILURE = 2
      96             :   };
      97             : 
      98             : enum
      99             :   {
     100             :     /* The number of times we should try to fork a compression process
     101             :        (we retry if the fork call fails).  We don't _need_ to compress
     102             :        temp files, this is just to reduce disk access, so this number
     103             :        can be small.  */
     104             :     MAX_FORK_TRIES_COMPRESS = 2,
     105             : 
     106             :     /* The number of times we should try to fork a decompression process.
     107             :        If we can't fork a decompression process, we can't sort, so this
     108             :        number should be big.  */
     109             :     MAX_FORK_TRIES_DECOMPRESS = 8
     110             :   };
     111             : 
     112             : /* The representation of the decimal point in the current locale.  */
     113             : static int decimal_point;
     114             : 
     115             : /* Thousands separator; if -1, then there isn't one.  */
     116             : static int thousands_sep;
     117             : 
     118             : /* Nonzero if the corresponding locales are hard.  */
     119             : static bool hard_LC_COLLATE;
     120             : #if HAVE_NL_LANGINFO
     121             : static bool hard_LC_TIME;
     122             : #endif
     123             : 
     124             : #define NONZERO(x) ((x) != 0)
     125             : 
     126             : /* The kind of blanks for '-b' to skip in various options. */
     127             : enum blanktype { bl_start, bl_end, bl_both };
     128             : 
     129             : /* The character marking end of line. Default to \n. */
     130             : static char eolchar = '\n';
     131             : 
     132             : /* Lines are held in core as counted strings. */
     133             : struct line
     134             : {
     135             :   char *text;                   /* Text of the line. */
     136             :   size_t length;                /* Length including final newline. */
     137             :   char *keybeg;                 /* Start of first key. */
     138             :   char *keylim;                 /* Limit of first key. */
     139             : };
     140             : 
     141             : /* Input buffers. */
     142             : struct buffer
     143             : {
     144             :   char *buf;                    /* Dynamically allocated buffer,
     145             :                                    partitioned into 3 regions:
     146             :                                    - input data;
     147             :                                    - unused area;
     148             :                                    - an array of lines, in reverse order.  */
     149             :   size_t used;                  /* Number of bytes used for input data.  */
     150             :   size_t nlines;                /* Number of lines in the line array.  */
     151             :   size_t alloc;                 /* Number of bytes allocated. */
     152             :   size_t left;                  /* Number of bytes left from previous reads. */
     153             :   size_t line_bytes;            /* Number of bytes to reserve for each line. */
     154             :   bool eof;                     /* An EOF has been read.  */
     155             : };
     156             : 
     157             : struct keyfield
     158             : {
     159             :   size_t sword;                 /* Zero-origin 'word' to start at. */
     160             :   size_t schar;                 /* Additional characters to skip. */
     161             :   size_t eword;                 /* Zero-origin first word after field. */
     162             :   size_t echar;                 /* Additional characters in field. */
     163             :   bool const *ignore;           /* Boolean array of characters to ignore. */
     164             :   char const *translate;        /* Translation applied to characters. */
     165             :   bool skipsblanks;             /* Skip leading blanks when finding start.  */
     166             :   bool skipeblanks;             /* Skip leading blanks when finding end.  */
     167             :   bool numeric;                 /* Flag for numeric comparison.  Handle
     168             :                                    strings of digits with optional decimal
     169             :                                    point, but no exponential notation. */
     170             :   bool random;                  /* Sort by random hash of key.  */
     171             :   bool general_numeric;         /* Flag for general, numeric comparison.
     172             :                                    Handle numbers in exponential notation. */
     173             :   bool month;                   /* Flag for comparison by month name. */
     174             :   bool reverse;                 /* Reverse the sense of comparison. */
     175             :   struct keyfield *next;        /* Next keyfield to try. */
     176             : };
     177             : 
     178             : struct month
     179             : {
     180             :   char const *name;
     181             :   int val;
     182             : };
     183             : 
     184             : /* The name this program was run with. */
     185             : char *program_name;
     186             : 
     187             : /* FIXME: None of these tables work with multibyte character sets.
     188             :    Also, there are many other bugs when handling multibyte characters.
     189             :    One way to fix this is to rewrite `sort' to use wide characters
     190             :    internally, but doing this with good performance is a bit
     191             :    tricky.  */
     192             : 
     193             : /* Table of blanks.  */
     194             : static bool blanks[UCHAR_LIM];
     195             : 
     196             : /* Table of non-printing characters. */
     197             : static bool nonprinting[UCHAR_LIM];
     198             : 
     199             : /* Table of non-dictionary characters (not letters, digits, or blanks). */
     200             : static bool nondictionary[UCHAR_LIM];
     201             : 
     202             : /* Translation table folding lower case to upper.  */
     203             : static char fold_toupper[UCHAR_LIM];
     204             : 
     205             : #define MONTHS_PER_YEAR 12
     206             : 
     207             : /* Table mapping month names to integers.
     208             :    Alphabetic order allows binary search. */
     209             : static struct month monthtab[] =
     210             : {
     211             :   {"APR", 4},
     212             :   {"AUG", 8},
     213             :   {"DEC", 12},
     214             :   {"FEB", 2},
     215             :   {"JAN", 1},
     216             :   {"JUL", 7},
     217             :   {"JUN", 6},
     218             :   {"MAR", 3},
     219             :   {"MAY", 5},
     220             :   {"NOV", 11},
     221             :   {"OCT", 10},
     222             :   {"SEP", 9}
     223             : };
     224             : 
     225             : /* During the merge phase, the number of files to merge at once. */
     226             : #define NMERGE 16
     227             : 
     228             : /* Minimum size for a merge or check buffer.  */
     229             : #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
     230             : 
     231             : /* Minimum sort size; the code might not work with smaller sizes.  */
     232             : #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
     233             : 
     234             : /* The number of bytes needed for a merge or check buffer, which can
     235             :    function relatively efficiently even if it holds only one line.  If
     236             :    a longer line is seen, this value is increased.  */
     237             : static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
     238             : 
     239             : /* The approximate maximum number of bytes of main memory to use, as
     240             :    specified by the user.  Zero if the user has not specified a size.  */
     241             : static size_t sort_size;
     242             : 
     243             : /* The guessed size for non-regular files.  */
     244             : #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
     245             : 
     246             : /* Array of directory names in which any temporary files are to be created. */
     247             : static char const **temp_dirs;
     248             : 
     249             : /* Number of temporary directory names used.  */
     250             : static size_t temp_dir_count;
     251             : 
     252             : /* Number of allocated slots in temp_dirs.  */
     253             : static size_t temp_dir_alloc;
     254             : 
     255             : /* Flag to reverse the order of all comparisons. */
     256             : static bool reverse;
     257             : 
     258             : /* Flag for stable sort.  This turns off the last ditch bytewise
     259             :    comparison of lines, and instead leaves lines in the same order
     260             :    they were read if all keys compare equal.  */
     261             : static bool stable;
     262             : 
     263             : /* If TAB has this value, blanks separate fields.  */
     264             : enum { TAB_DEFAULT = CHAR_MAX + 1 };
     265             : 
     266             : /* Tab character separating fields.  If TAB_DEFAULT, then fields are
     267             :    separated by the empty string between a non-blank character and a blank
     268             :    character. */
     269             : static int tab = TAB_DEFAULT;
     270             : 
     271             : /* Flag to remove consecutive duplicate lines from the output.
     272             :    Only the last of a sequence of equal lines will be output. */
     273             : static bool unique;
     274             : 
     275             : /* Nonzero if any of the input files are the standard input. */
     276             : static bool have_read_stdin;
     277             : 
     278             : /* List of key field comparisons to be tried.  */
     279             : static struct keyfield *keylist;
     280             : 
     281             : /* Program used to (de)compress temp files.  Must accept -d.  */
     282             : static char const *compress_program;
     283             : 
     284             : static void sortlines_temp (struct line *, size_t, struct line *);
     285             : 
     286             : /* Report MESSAGE for FILE, then clean up and exit.
     287             :    If FILE is null, it represents standard output.  */
     288             : 
     289             : static void die (char const *, char const *) ATTRIBUTE_NORETURN;
     290             : static void
     291          21 : die (char const *message, char const *file)
     292             : {
     293          21 :   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
     294          21 :   exit (SORT_FAILURE);
     295             : }
     296             : 
     297             : void
     298           7 : usage (int status)
     299             : {
     300           7 :   if (status != EXIT_SUCCESS)
     301           7 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     302             :              program_name);
     303             :   else
     304             :     {
     305           0 :       printf (_("\
     306             : Usage: %s [OPTION]... [FILE]...\n\
     307             : "),
     308             :               program_name);
     309           0 :       fputs (_("\
     310             : Write sorted concatenation of all FILE(s) to standard output.\n\
     311             : \n\
     312             : "), stdout);
     313           0 :       fputs (_("\
     314             : Mandatory arguments to long options are mandatory for short options too.\n\
     315             : "), stdout);
     316           0 :       fputs (_("\
     317             : Ordering options:\n\
     318             : \n\
     319             : "), stdout);
     320           0 :       fputs (_("\
     321             :   -b, --ignore-leading-blanks  ignore leading blanks\n\
     322             :   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
     323             :   -f, --ignore-case           fold lower case to upper case characters\n\
     324             : "), stdout);
     325           0 :       fputs (_("\
     326             :   -g, --general-numeric-sort  compare according to general numerical value\n\
     327             :   -i, --ignore-nonprinting    consider only printable characters\n\
     328             :   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
     329             :   -n, --numeric-sort          compare according to string numerical value\n\
     330             :   -R, --random-sort           sort by random hash of keys\n\
     331             :       --random-source=FILE    get random bytes from FILE (default /dev/urandom)\n\
     332             :       --sort=WORD             sort according to WORD:\n\
     333             :                                 general-numeric -g, month -M, numeric -n,\n\
     334             :                                 random -R\n\
     335             :   -r, --reverse               reverse the result of comparisons\n\
     336             : \n\
     337             : "), stdout);
     338           0 :       fputs (_("\
     339             : Other options:\n\
     340             : \n\
     341             :   -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
     342             :   -C, --check=quiet, --check=silent  like -c, but do not report first bad line\n\
     343             :       --compress-program=PROG  compress temporaries with PROG;\n\
     344             :                               decompress them with PROG -d\n\
     345             :   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS2 (origin 1)\n\
     346             :   -m, --merge               merge already sorted files; do not sort\n\
     347             : "), stdout);
     348           0 :       fputs (_("\
     349             :   -o, --output=FILE         write result to FILE instead of standard output\n\
     350             :   -s, --stable              stabilize sort by disabling last-resort comparison\n\
     351             :   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
     352             : "), stdout);
     353           0 :       printf (_("\
     354             :   -t, --field-separator=SEP  use SEP instead of non-blank to blank transition\n\
     355             :   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
     356             :                               multiple options specify multiple directories\n\
     357             :   -u, --unique              with -c, check for strict ordering;\n\
     358             :                               without -c, output only the first of an equal run\n\
     359             : "), DEFAULT_TMPDIR);
     360           0 :       fputs (_("\
     361             :   -z, --zero-terminated     end lines with 0 byte, not newline\n\
     362             : "), stdout);
     363           0 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     364           0 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     365           0 :       fputs (_("\
     366             : \n\
     367             : POS is F[.C][OPTS], where F is the field number and C the character position\n\
     368             : in the field; both are origin 1.  If neither -t nor -b is in effect, characters\n\
     369             : in a field are counted from the beginning of the preceding whitespace.  OPTS is\n\
     370             : one or more single-letter ordering options, which override global ordering\n\
     371             : options for that key.  If no key is given, use the entire line as the key.\n\
     372             : \n\
     373             : SIZE may be followed by the following multiplicative suffixes:\n\
     374             : "), stdout);
     375           0 :       fputs (_("\
     376             : % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
     377             : \n\
     378             : With no FILE, or when FILE is -, read standard input.\n\
     379             : \n\
     380             : *** WARNING ***\n\
     381             : The locale specified by the environment affects sort order.\n\
     382             : Set LC_ALL=C to get the traditional sort order that uses\n\
     383             : native byte values.\n\
     384             : "), stdout );
     385           0 :       emit_bug_reporting_address ();
     386             :     }
     387             : 
     388           7 :   exit (status);
     389             : }
     390             : 
     391             : /* For long options that have no equivalent short option, use a
     392             :    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
     393             : enum
     394             : {
     395             :   CHECK_OPTION = CHAR_MAX + 1,
     396             :   COMPRESS_PROGRAM_OPTION,
     397             :   RANDOM_SOURCE_OPTION,
     398             :   SORT_OPTION
     399             : };
     400             : 
     401             : static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uy:z";
     402             : 
     403             : static struct option const long_options[] =
     404             : {
     405             :   {"ignore-leading-blanks", no_argument, NULL, 'b'},
     406             :   {"check", optional_argument, NULL, CHECK_OPTION},
     407             :   {"compress-program", required_argument, NULL, COMPRESS_PROGRAM_OPTION},
     408             :   {"dictionary-order", no_argument, NULL, 'd'},
     409             :   {"ignore-case", no_argument, NULL, 'f'},
     410             :   {"general-numeric-sort", no_argument, NULL, 'g'},
     411             :   {"ignore-nonprinting", no_argument, NULL, 'i'},
     412             :   {"key", required_argument, NULL, 'k'},
     413             :   {"merge", no_argument, NULL, 'm'},
     414             :   {"month-sort", no_argument, NULL, 'M'},
     415             :   {"numeric-sort", no_argument, NULL, 'n'},
     416             :   {"random-sort", no_argument, NULL, 'R'},
     417             :   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
     418             :   {"sort", required_argument, NULL, SORT_OPTION},
     419             :   {"output", required_argument, NULL, 'o'},
     420             :   {"reverse", no_argument, NULL, 'r'},
     421             :   {"stable", no_argument, NULL, 's'},
     422             :   {"buffer-size", required_argument, NULL, 'S'},
     423             :   {"field-separator", required_argument, NULL, 't'},
     424             :   {"temporary-directory", required_argument, NULL, 'T'},
     425             :   {"unique", no_argument, NULL, 'u'},
     426             :   {"zero-terminated", no_argument, NULL, 'z'},
     427             :   {GETOPT_HELP_OPTION_DECL},
     428             :   {GETOPT_VERSION_OPTION_DECL},
     429             :   {NULL, 0, NULL, 0},
     430             : };
     431             : 
     432             : static char const *const check_args[] =
     433             : {
     434             :   "quiet", "silent", "diagnose-first", NULL
     435             : };
     436             : static char const check_types[] =
     437             : {
     438             :   'C', 'C', 'c'
     439             : };
     440             : ARGMATCH_VERIFY (check_args, check_types);
     441             : 
     442             : static char const *const sort_args[] =
     443             : {
     444             :   "general-numeric", "month", "numeric", "random", NULL
     445             : };
     446             : static char const sort_types[] =
     447             : {
     448             :   'g', 'M', 'n', 'R'
     449             : };
     450             : ARGMATCH_VERIFY (sort_args, sort_types);
     451             : 
     452             : /* The set of signals that are caught.  */
     453             : static sigset_t caught_signals;
     454             : 
     455             : /* Critical section status.  */
     456             : struct cs_status
     457             : {
     458             :   bool valid;
     459             :   sigset_t sigs;
     460             : };
     461             : 
     462             : /* Enter a critical section.  */
     463             : static struct cs_status
     464           0 : cs_enter (void)
     465             : {
     466             :   struct cs_status status;
     467           0 :   status.valid = (sigprocmask (SIG_BLOCK, &caught_signals, &status.sigs) == 0);
     468           0 :   return status;
     469             : }
     470             : 
     471             : /* Leave a critical section.  */
     472             : static void
     473           0 : cs_leave (struct cs_status status)
     474             : {
     475           0 :   if (status.valid)
     476             :     {
     477             :       /* Ignore failure when restoring the signal mask. */
     478           0 :       sigprocmask (SIG_SETMASK, &status.sigs, NULL);
     479             :     }
     480           0 : }
     481             : 
     482             : /* The list of temporary files. */
     483             : struct tempnode
     484             : {
     485             :   struct tempnode *volatile next;
     486             :   pid_t pid;     /* If compressed, the pid of compressor, else zero */
     487             :   char name[1];  /* Actual size is 1 + file name length.  */
     488             : };
     489             : static struct tempnode *volatile temphead;
     490             : static struct tempnode *volatile *temptail = &temphead;
     491             : 
     492             : struct sortfile
     493             : {
     494             :   char const *name;
     495             :   pid_t pid;     /* If compressed, the pid of compressor, else zero */
     496             : };
     497             : 
     498             : /* A table where we store compression process states.  We clean up all
     499             :    processes in a timely manner so as not to exhaust system resources,
     500             :    so we store the info on whether the process is still running, or has
     501             :    been reaped here.  */
     502             : static Hash_table *proctab;
     503             : 
     504             : enum { INIT_PROCTAB_SIZE = 47 };
     505             : 
     506             : enum procstate { ALIVE, ZOMBIE };
     507             : 
     508             : /* A proctab entry.  The COUNT field is there in case we fork a new
     509             :    compression process that has the same PID as an old zombie process
     510             :    that is still in the table (because the process to decompress the
     511             :    temp file it was associated with hasn't started yet).  */
     512             : struct procnode
     513             : {
     514             :   pid_t pid;
     515             :   enum procstate state;
     516             :   size_t count;
     517             : };
     518             : 
     519             : static size_t
     520           0 : proctab_hasher (const void *entry, size_t tabsize)
     521             : {
     522           0 :   const struct procnode *node = entry;
     523           0 :   return node->pid % tabsize;
     524             : }
     525             : 
     526             : static bool
     527           0 : proctab_comparator (const void *e1, const void *e2)
     528             : {
     529           0 :   const struct procnode *n1 = e1, *n2 = e2;
     530           0 :   return n1->pid == n2->pid;
     531             : }
     532             : 
     533             : /* The total number of forked processes (compressors and decompressors)
     534             :    that have not been reaped yet. */
     535             : static size_t nprocs;
     536             : 
     537             : /* The number of child processes we'll allow before we try to reap some. */
     538             : enum { MAX_PROCS_BEFORE_REAP = 2 };
     539             : 
     540             : /* If 0 < PID, wait for the child process with that PID to exit.
     541             :    If PID is -1, clean up a random child process which has finished and
     542             :    return the process ID of that child.  If PID is -1 and no processes
     543             :    have quit yet, return 0 without waiting.  */
     544             : 
     545             : static pid_t
     546           0 : reap (pid_t pid)
     547             : {
     548             :   int status;
     549           0 :   pid_t cpid = waitpid (pid, &status, pid < 0 ? WNOHANG : 0);
     550             : 
     551           0 :   if (cpid < 0)
     552           0 :     error (SORT_FAILURE, errno, _("waiting for %s [-d]"),
     553             :            compress_program);
     554           0 :   else if (0 < cpid)
     555             :     {
     556           0 :       if (! WIFEXITED (status) || WEXITSTATUS (status))
     557           0 :         error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"),
     558             :                compress_program);
     559           0 :       --nprocs;
     560             :     }
     561             : 
     562           0 :   return cpid;
     563             : }
     564             : 
     565             : /* Add the PID of a running compression process to proctab, or update
     566             :    the entry COUNT and STATE fields if it's already there.  This also
     567             :    creates the table for us the first time it's called.  */
     568             : 
     569             : static void
     570           0 : register_proc (pid_t pid)
     571             : {
     572             :   struct procnode test, *node;
     573             : 
     574           0 :   if (! proctab)
     575             :     {
     576           0 :       proctab = hash_initialize (INIT_PROCTAB_SIZE, NULL,
     577             :                                  proctab_hasher,
     578             :                                  proctab_comparator,
     579             :                                  free);
     580           0 :       if (! proctab)
     581           0 :         xalloc_die ();
     582             :     }
     583             : 
     584           0 :   test.pid = pid;
     585           0 :   node = hash_lookup (proctab, &test);
     586           0 :   if (node)
     587             :     {
     588           0 :       node->state = ALIVE;
     589           0 :       ++node->count;
     590             :     }
     591             :   else
     592             :     {
     593           0 :       node = xmalloc (sizeof *node);
     594           0 :       node->pid = pid;
     595           0 :       node->state = ALIVE;
     596           0 :       node->count = 1;
     597           0 :       hash_insert (proctab, node);
     598             :     }
     599           0 : }
     600             : 
     601             : /* This is called when we reap a random process.  We don't know
     602             :    whether we have reaped a compression process or a decompression
     603             :    process until we look in the table.  If there's an ALIVE entry for
     604             :    it, then we have reaped a compression process, so change the state
     605             :    to ZOMBIE.  Otherwise, it's a decompression processes, so ignore it.  */
     606             : 
     607             : static void
     608           0 : update_proc (pid_t pid)
     609             : {
     610             :   struct procnode test, *node;
     611             : 
     612           0 :   test.pid = pid;
     613           0 :   node = hash_lookup (proctab, &test);
     614           0 :   if (node)
     615           0 :     node->state = ZOMBIE;
     616           0 : }
     617             : 
     618             : /* This is for when we need to wait for a compression process to exit.
     619             :    If it has a ZOMBIE entry in the table then it's already dead and has
     620             :    been reaped.  Note that if there's an ALIVE entry for it, it still may
     621             :    already have died and been reaped if a second process was created with
     622             :    the same PID.  This is probably exceedingly rare, but to be on the safe
     623             :    side we will have to wait for any compression process with this PID.  */
     624             : 
     625             : static void
     626           0 : wait_proc (pid_t pid)
     627             : {
     628             :   struct procnode test, *node;
     629             : 
     630           0 :   test.pid = pid;
     631           0 :   node = hash_lookup (proctab, &test);
     632           0 :   if (node->state == ALIVE)
     633           0 :     reap (pid);
     634             : 
     635           0 :   node->state = ZOMBIE;
     636           0 :   if (! --node->count)
     637             :     {
     638           0 :       hash_delete (proctab, node);
     639           0 :       free (node);
     640             :     }
     641           0 : }
     642             : 
     643             : /* Keep reaping finished children as long as there are more to reap.
     644             :    This doesn't block waiting for any of them, it only reaps those
     645             :    that are already dead.  */
     646             : 
     647             : static void
     648           0 : reap_some (void)
     649             : {
     650             :   pid_t pid;
     651             : 
     652           0 :   while (0 < nprocs && (pid = reap (-1)))
     653           0 :     update_proc (pid);
     654           0 : }
     655             : 
     656             : /* Clean up any remaining temporary files.  */
     657             : 
     658             : static void
     659           0 : cleanup (void)
     660             : {
     661             :   struct tempnode const *node;
     662             : 
     663           0 :   for (node = temphead; node; node = node->next)
     664           0 :     unlink (node->name);
     665           0 :   temphead = NULL;
     666           0 : }
     667             : 
     668             : /* Cleanup actions to take when exiting.  */
     669             : 
     670             : static void
     671          41 : exit_cleanup (void)
     672             : {
     673          41 :   if (temphead)
     674             :     {
     675             :       /* Clean up any remaining temporary files in a critical section so
     676             :          that a signal handler does not try to clean them too.  */
     677           0 :       struct cs_status cs = cs_enter ();
     678           0 :       cleanup ();
     679           0 :       cs_leave (cs);
     680             :     }
     681             : 
     682          41 :   close_stdout ();
     683          41 : }
     684             : 
     685             : /* Create a new temporary file, returning its newly allocated tempnode.
     686             :    Store into *PFD the file descriptor open for writing.  */
     687             : 
     688             : static struct tempnode *
     689           0 : create_temp_file (int *pfd)
     690             : {
     691             :   static char const slashbase[] = "/sortXXXXXX";
     692             :   static size_t temp_dir_index;
     693             :   int fd;
     694             :   int saved_errno;
     695           0 :   char const *temp_dir = temp_dirs[temp_dir_index];
     696           0 :   size_t len = strlen (temp_dir);
     697           0 :   struct tempnode *node =
     698           0 :     xmalloc (offsetof (struct tempnode, name) + len + sizeof slashbase);
     699           0 :   char *file = node->name;
     700             :   struct cs_status cs;
     701             : 
     702           0 :   memcpy (file, temp_dir, len);
     703           0 :   memcpy (file + len, slashbase, sizeof slashbase);
     704           0 :   node->next = NULL;
     705           0 :   node->pid = 0;
     706           0 :   if (++temp_dir_index == temp_dir_count)
     707           0 :     temp_dir_index = 0;
     708             : 
     709             :   /* Create the temporary file in a critical section, to avoid races.  */
     710           0 :   cs = cs_enter ();
     711           0 :   fd = mkstemp (file);
     712           0 :   if (0 <= fd)
     713             :     {
     714           0 :       *temptail = node;
     715           0 :       temptail = &node->next;
     716             :     }
     717           0 :   saved_errno = errno;
     718           0 :   cs_leave (cs);
     719           0 :   errno = saved_errno;
     720             : 
     721           0 :   if (fd < 0)
     722           0 :     die (_("cannot create temporary file"), file);
     723             : 
     724           0 :   *pfd = fd;
     725           0 :   return node;
     726             : }
     727             : 
     728             : /* Return a stream for FILE, opened with mode HOW.  A null FILE means
     729             :    standard output; HOW should be "w".  When opening for input, "-"
     730             :    means standard input.  To avoid confusion, do not return file
     731             :    descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when
     732             :    opening an ordinary FILE.  */
     733             : 
     734             : static FILE *
     735          57 : xfopen (const char *file, const char *how)
     736             : {
     737             :   FILE *fp;
     738             : 
     739          57 :   if (!file)
     740          13 :     fp = stdout;
     741          44 :   else if (STREQ (file, "-") && *how == 'r')
     742             :     {
     743          24 :       have_read_stdin = true;
     744          24 :       fp = stdin;
     745             :     }
     746             :   else
     747             :     {
     748          20 :       fp = fopen (file, how);
     749          20 :       if (! fp)
     750           8 :         die (_("open failed"), file);
     751             :     }
     752             : 
     753          49 :   return fp;
     754             : }
     755             : 
     756             : /* Close FP, whose name is FILE, and report any errors.  */
     757             : 
     758             : static void
     759          36 : xfclose (FILE *fp, char const *file)
     760             : {
     761          36 :   switch (fileno (fp))
     762             :     {
     763          22 :     case STDIN_FILENO:
     764             :       /* Allow reading stdin from tty more than once.  */
     765          22 :       if (feof (fp))
     766          22 :         clearerr (fp);
     767          22 :       break;
     768             : 
     769          13 :     case STDOUT_FILENO:
     770             :       /* Don't close stdout just yet.  close_stdout does that.  */
     771          13 :       if (fflush (fp) != 0)
     772           0 :         die (_("fflush failed"), file);
     773          13 :       break;
     774             : 
     775           1 :     default:
     776           1 :       if (fclose (fp) != 0)
     777           0 :         die (_("close failed"), file);
     778           1 :       break;
     779             :     }
     780          36 : }
     781             : 
     782             : static void
     783           0 : dup2_or_die (int oldfd, int newfd)
     784             : {
     785           0 :   if (dup2 (oldfd, newfd) < 0)
     786           0 :     error (SORT_FAILURE, errno, _("dup2 failed"));
     787           0 : }
     788             : 
     789             : /* Fork a child process for piping to and do common cleanup.  The
     790             :    TRIES parameter tells us how many times to try to fork before
     791             :    giving up.  Return the PID of the child or -1 if fork failed.  */
     792             : 
     793             : static pid_t
     794           0 : pipe_fork (int pipefds[2], size_t tries)
     795             : {
     796             : #if HAVE_WORKING_FORK
     797             :   struct tempnode *saved_temphead;
     798             :   int saved_errno;
     799           0 :   unsigned int wait_retry = 1;
     800             :   pid_t pid IF_LINT (= -1);
     801             :   struct cs_status cs;
     802             : 
     803           0 :   if (pipe (pipefds) < 0)
     804           0 :     return -1;
     805             : 
     806           0 :   while (tries--)
     807             :     {
     808             :       /* This is so the child process won't delete our temp files
     809             :          if it receives a signal before exec-ing.  */
     810           0 :       cs = cs_enter ();
     811           0 :       saved_temphead = temphead;
     812           0 :       temphead = NULL;
     813             : 
     814           0 :       pid = fork ();
     815           0 :       saved_errno = errno;
     816           0 :       if (pid)
     817           0 :         temphead = saved_temphead;
     818             : 
     819           0 :       cs_leave (cs);
     820           0 :       errno = saved_errno;
     821             : 
     822           0 :       if (0 <= pid || errno != EAGAIN)
     823             :         break;
     824             :       else
     825             :         {
     826           0 :           sleep (wait_retry);
     827           0 :           wait_retry *= 2;
     828           0 :           reap_some ();
     829             :         }
     830             :     }
     831             : 
     832           0 :   if (pid < 0)
     833             :     {
     834           0 :       close (pipefds[0]);
     835           0 :       close (pipefds[1]);
     836             :     }
     837           0 :   else if (pid == 0)
     838             :     {
     839           0 :       close (STDIN_FILENO);
     840           0 :       close (STDOUT_FILENO);
     841             :     }
     842             :   else
     843           0 :     ++nprocs;
     844             : 
     845           0 :   return pid;
     846             : 
     847             : #else  /* ! HAVE_WORKING_FORK */
     848             :   return -1;
     849             : #endif
     850             : }
     851             : 
     852             : /* Create a temporary file and start a compression program to filter output
     853             :    to that file.  Set *PFP to the file handle and if *PPID is non-NULL,
     854             :    set it to the PID of the newly-created process.  */
     855             : 
     856             : static char *
     857           0 : create_temp (FILE **pfp, pid_t *ppid)
     858             : {
     859             :   int tempfd;
     860           0 :   struct tempnode *node = create_temp_file (&tempfd);
     861           0 :   char *name = node->name;
     862             : 
     863           0 :   if (compress_program)
     864             :     {
     865             :       int pipefds[2];
     866             : 
     867           0 :       node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS);
     868           0 :       if (0 < node->pid)
     869             :         {
     870           0 :           close (tempfd);
     871           0 :           close (pipefds[0]);
     872           0 :           tempfd = pipefds[1];
     873             : 
     874           0 :           register_proc (node->pid);
     875             :         }
     876           0 :       else if (node->pid == 0)
     877             :         {
     878           0 :           close (pipefds[1]);
     879           0 :           dup2_or_die (tempfd, STDOUT_FILENO);
     880           0 :           close (tempfd);
     881           0 :           dup2_or_die (pipefds[0], STDIN_FILENO);
     882           0 :           close (pipefds[0]);
     883             : 
     884           0 :           if (execlp (compress_program, compress_program, (char *) NULL) < 0)
     885           0 :             error (SORT_FAILURE, errno, _("couldn't execute %s"),
     886             :                    compress_program);
     887             :         }
     888             :       else
     889           0 :         node->pid = 0;
     890             :     }
     891             : 
     892           0 :   *pfp = fdopen (tempfd, "w");
     893           0 :   if (! *pfp)
     894           0 :     die (_("couldn't create temporary file"), name);
     895             : 
     896           0 :   if (ppid)
     897           0 :     *ppid = node->pid;
     898             : 
     899           0 :   return name;
     900             : }
     901             : 
     902             : /* Open a compressed temp file and start a decompression process through
     903             :    which to filter the input.  PID must be the valid processes ID of the
     904             :    process used to compress the file.  */
     905             : 
     906             : static FILE *
     907           0 : open_temp (const char *name, pid_t pid)
     908             : {
     909             :   int tempfd, pipefds[2];
     910             :   pid_t child_pid;
     911             :   FILE *fp;
     912             : 
     913           0 :   wait_proc (pid);
     914             : 
     915           0 :   tempfd = open (name, O_RDONLY);
     916           0 :   if (tempfd < 0)
     917           0 :     die (_("couldn't open temporary file"), name);
     918             : 
     919           0 :   child_pid = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS);
     920           0 :   if (0 < child_pid)
     921             :     {
     922           0 :       close (tempfd);
     923           0 :       close (pipefds[1]);
     924             :     }
     925           0 :   else if (child_pid == 0)
     926             :     {
     927           0 :       close (pipefds[0]);
     928           0 :       dup2_or_die (tempfd, STDIN_FILENO);
     929           0 :       close (tempfd);
     930           0 :       dup2_or_die (pipefds[1], STDOUT_FILENO);
     931           0 :       close (pipefds[1]);
     932             : 
     933           0 :       if (execlp (compress_program, compress_program, "-d", (char *) NULL) < 0)
     934           0 :         error (SORT_FAILURE, errno, _("couldn't execute %s -d"),
     935             :                compress_program);
     936             :     }
     937             :   else
     938           0 :     error (SORT_FAILURE, errno, _("couldn't create process for %s -d"),
     939             :            compress_program);
     940             : 
     941           0 :   fp = fdopen (pipefds[0], "r");
     942           0 :   if (! fp)
     943           0 :     die (_("couldn't create temporary file"), name);
     944             : 
     945           0 :   return fp;
     946             : }
     947             : 
     948             : static void
     949         102 : write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
     950             : {
     951         102 :   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
     952           0 :     die (_("write failed"), output_file);
     953         102 : }
     954             : 
     955             : /* Append DIR to the array of temporary directory names.  */
     956             : static void
     957          34 : add_temp_dir (char const *dir)
     958             : {
     959          34 :   if (temp_dir_count == temp_dir_alloc)
     960          34 :     temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc);
     961             : 
     962          34 :   temp_dirs[temp_dir_count++] = dir;
     963          34 : }
     964             : 
     965             : /* Remove NAME from the list of temporary files.  */
     966             : 
     967             : static void
     968           0 : zaptemp (const char *name)
     969             : {
     970             :   struct tempnode *volatile *pnode;
     971             :   struct tempnode *node;
     972             :   struct tempnode *next;
     973             :   int unlink_status;
     974           0 :   int unlink_errno = 0;
     975             :   struct cs_status cs;
     976             : 
     977           0 :   for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next)
     978           0 :     continue;
     979             : 
     980             :   /* Unlink the temporary file in a critical section to avoid races.  */
     981           0 :   next = node->next;
     982           0 :   cs = cs_enter ();
     983           0 :   unlink_status = unlink (name);
     984           0 :   unlink_errno = errno;
     985           0 :   *pnode = next;
     986           0 :   cs_leave (cs);
     987             : 
     988           0 :   if (unlink_status != 0)
     989           0 :     error (0, unlink_errno, _("warning: cannot remove: %s"), name);
     990           0 :   if (! next)
     991           0 :     temptail = pnode;
     992           0 :   free (node);
     993           0 : }
     994             : 
     995             : #if HAVE_NL_LANGINFO
     996             : 
     997             : static int
     998             : struct_month_cmp (const void *m1, const void *m2)
     999             : {
    1000             :   struct month const *month1 = m1;
    1001             :   struct month const *month2 = m2;
    1002             :   return strcmp (month1->name, month2->name);
    1003             : }
    1004             : 
    1005             : #endif
    1006             : 
    1007             : /* Initialize the character class tables. */
    1008             : 
    1009             : static void
    1010          41 : inittables (void)
    1011             : {
    1012             :   size_t i;
    1013             : 
    1014       10537 :   for (i = 0; i < UCHAR_LIM; ++i)
    1015             :     {
    1016       10496 :       blanks[i] = !! isblank (i);
    1017       10496 :       nonprinting[i] = ! isprint (i);
    1018       10496 :       nondictionary[i] = ! isalnum (i) && ! isblank (i);
    1019       10496 :       fold_toupper[i] = toupper (i);
    1020             :     }
    1021             : 
    1022             : #if HAVE_NL_LANGINFO
    1023             :   /* If we're not in the "C" locale, read different names for months.  */
    1024             :   if (hard_LC_TIME)
    1025             :     {
    1026             :       for (i = 0; i < MONTHS_PER_YEAR; i++)
    1027             :         {
    1028             :           char const *s;
    1029             :           size_t s_len;
    1030             :           size_t j;
    1031             :           char *name;
    1032             : 
    1033             :           s = (char *) nl_langinfo (ABMON_1 + i);
    1034             :           s_len = strlen (s);
    1035             :           monthtab[i].name = name = xmalloc (s_len + 1);
    1036             :           monthtab[i].val = i + 1;
    1037             : 
    1038             :           for (j = 0; j < s_len; j++)
    1039             :             name[j] = fold_toupper[to_uchar (s[j])];
    1040             :           name[j] = '\0';
    1041             :         }
    1042             :       qsort ((void *) monthtab, MONTHS_PER_YEAR,
    1043             :              sizeof *monthtab, struct_month_cmp);
    1044             :     }
    1045             : #endif
    1046          41 : }
    1047             : 
    1048             : /* Specify the amount of main memory to use when sorting.  */
    1049             : static void
    1050           0 : specify_sort_size (int oi, char c, char const *s)
    1051             : {
    1052             :   uintmax_t n;
    1053             :   char *suffix;
    1054           0 :   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
    1055             : 
    1056             :   /* The default unit is KiB.  */
    1057           0 :   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
    1058             :     {
    1059           0 :       if (n <= UINTMAX_MAX / 1024)
    1060           0 :         n *= 1024;
    1061             :       else
    1062           0 :         e = LONGINT_OVERFLOW;
    1063             :     }
    1064             : 
    1065             :   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
    1066           0 :   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
    1067           0 :     switch (suffix[0])
    1068             :       {
    1069           0 :       case 'b':
    1070           0 :         e = LONGINT_OK;
    1071           0 :         break;
    1072             : 
    1073           0 :       case '%':
    1074             :         {
    1075           0 :           double mem = physmem_total () * n / 100;
    1076             : 
    1077             :           /* Use "<", not "<=", to avoid problems with rounding.  */
    1078           0 :           if (mem < UINTMAX_MAX)
    1079             :             {
    1080           0 :               n = mem;
    1081           0 :               e = LONGINT_OK;
    1082             :             }
    1083             :           else
    1084           0 :             e = LONGINT_OVERFLOW;
    1085             :         }
    1086           0 :         break;
    1087             :       }
    1088             : 
    1089           0 :   if (e == LONGINT_OK)
    1090             :     {
    1091             :       /* If multiple sort sizes are specified, take the maximum, so
    1092             :          that option order does not matter.  */
    1093           0 :       if (n < sort_size)
    1094           0 :         return;
    1095             : 
    1096           0 :       sort_size = n;
    1097           0 :       if (sort_size == n)
    1098             :         {
    1099           0 :           sort_size = MAX (sort_size, MIN_SORT_SIZE);
    1100           0 :           return;
    1101             :         }
    1102             : 
    1103           0 :       e = LONGINT_OVERFLOW;
    1104             :     }
    1105             : 
    1106           0 :   xstrtol_fatal (e, oi, c, long_options, s);
    1107             : }
    1108             : 
    1109             : /* Return the default sort size.  */
    1110             : static size_t
    1111          26 : default_sort_size (void)
    1112             : {
    1113             :   /* Let MEM be available memory or 1/8 of total memory, whichever
    1114             :      is greater.  */
    1115          26 :   double avail = physmem_available ();
    1116          26 :   double total = physmem_total ();
    1117          26 :   double mem = MAX (avail, total / 8);
    1118             :   struct rlimit rlimit;
    1119             : 
    1120             :   /* Let SIZE be MEM, but no more than the maximum object size or
    1121             :      system resource limits.  Avoid the MIN macro here, as it is not
    1122             :      quite right when only one argument is floating point.  Don't
    1123             :      bother to check for values like RLIM_INFINITY since in practice
    1124             :      they are not much less than SIZE_MAX.  */
    1125          26 :   size_t size = SIZE_MAX;
    1126          26 :   if (mem < size)
    1127          26 :     size = mem;
    1128          26 :   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
    1129           0 :     size = rlimit.rlim_cur;
    1130             : #ifdef RLIMIT_AS
    1131          26 :   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
    1132           0 :     size = rlimit.rlim_cur;
    1133             : #endif
    1134             : 
    1135             :   /* Leave a large safety margin for the above limits, as failure can
    1136             :      occur when they are exceeded.  */
    1137          26 :   size /= 2;
    1138             : 
    1139             : #ifdef RLIMIT_RSS
    1140             :   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
    1141             :      Exceeding RSS is not fatal, but can be quite slow.  */
    1142          26 :   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
    1143           0 :     size = rlimit.rlim_cur / 16 * 15;
    1144             : #endif
    1145             : 
    1146             :   /* Use no less than the minimum.  */
    1147          26 :   return MAX (size, MIN_SORT_SIZE);
    1148             : }
    1149             : 
    1150             : /* Return the sort buffer size to use with the input files identified
    1151             :    by FPS and FILES, which are alternate names of the same files.
    1152             :    NFILES gives the number of input files; NFPS may be less.  Assume
    1153             :    that each input line requires LINE_BYTES extra bytes' worth of line
    1154             :    information.  Do not exceed the size bound specified by the user
    1155             :    (or a default size bound, if the user does not specify one).  */
    1156             : 
    1157             : static size_t
    1158          26 : sort_buffer_size (FILE *const *fps, size_t nfps,
    1159             :                   char *const *files, size_t nfiles,
    1160             :                   size_t line_bytes)
    1161             : {
    1162             :   /* A bound on the input size.  If zero, the bound hasn't been
    1163             :      determined yet.  */
    1164             :   static size_t size_bound;
    1165             : 
    1166             :   /* In the worst case, each input byte is a newline.  */
    1167          26 :   size_t worst_case_per_input_byte = line_bytes + 1;
    1168             : 
    1169             :   /* Keep enough room for one extra input line and an extra byte.
    1170             :      This extra room might be needed when preparing to read EOF.  */
    1171          26 :   size_t size = worst_case_per_input_byte + 1;
    1172             : 
    1173             :   size_t i;
    1174             : 
    1175         126 :   for (i = 0; i < nfiles; i++)
    1176             :     {
    1177             :       struct stat st;
    1178             :       off_t file_size;
    1179             :       size_t worst_case;
    1180             : 
    1181          66 :       if ((i < nfps ? fstat (fileno (fps[i]), &st)
    1182          25 :            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
    1183           3 :            : stat (files[i], &st))
    1184          94 :           != 0)
    1185           3 :         die (_("stat failed"), files[i]);
    1186             : 
    1187          37 :       if (S_ISREG (st.st_mode))
    1188          24 :         file_size = st.st_size;
    1189             :       else
    1190             :         {
    1191             :           /* The file has unknown size.  If the user specified a sort
    1192             :              buffer size, use that; otherwise, guess the size.  */
    1193          13 :           if (sort_size)
    1194           0 :             return sort_size;
    1195          13 :           file_size = INPUT_FILE_SIZE_GUESS;
    1196             :         }
    1197             : 
    1198          37 :       if (! size_bound)
    1199             :         {
    1200          26 :           size_bound = sort_size;
    1201          26 :           if (! size_bound)
    1202          26 :             size_bound = default_sort_size ();
    1203             :         }
    1204             : 
    1205             :       /* Add the amount of memory needed to represent the worst case
    1206             :          where the input consists entirely of newlines followed by a
    1207             :          single non-newline.  Check for overflow.  */
    1208          37 :       worst_case = file_size * worst_case_per_input_byte + 1;
    1209          37 :       if (file_size != worst_case / worst_case_per_input_byte
    1210          37 :           || size_bound - size <= worst_case)
    1211           0 :         return size_bound;
    1212          37 :       size += worst_case;
    1213             :     }
    1214             : 
    1215          23 :   return size;
    1216             : }
    1217             : 
    1218             : /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
    1219             :    must be at least sizeof (struct line).  Allocate ALLOC bytes
    1220             :    initially.  */
    1221             : 
    1222             : static void
    1223          23 : initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
    1224             : {
    1225             :   /* Ensure that the line array is properly aligned.  If the desired
    1226             :      size cannot be allocated, repeatedly halve it until allocation
    1227             :      succeeds.  The smaller allocation may hurt overall performance,
    1228             :      but that's better than failing.  */
    1229             :   for (;;)
    1230             :     {
    1231          23 :       alloc += sizeof (struct line) - alloc % sizeof (struct line);
    1232          23 :       buf->buf = malloc (alloc);
    1233          23 :       if (buf->buf)
    1234          23 :         break;
    1235           0 :       alloc /= 2;
    1236           0 :       if (alloc <= line_bytes + 1)
    1237           0 :         xalloc_die ();
    1238             :     }
    1239             : 
    1240          23 :   buf->line_bytes = line_bytes;
    1241          23 :   buf->alloc = alloc;
    1242          23 :   buf->used = buf->left = buf->nlines = 0;
    1243          23 :   buf->eof = false;
    1244          23 : }
    1245             : 
    1246             : /* Return one past the limit of the line array.  */
    1247             : 
    1248             : static inline struct line *
    1249          69 : buffer_linelim (struct buffer const *buf)
    1250             : {
    1251          69 :   return (struct line *) (buf->buf + buf->alloc);
    1252             : }
    1253             : 
    1254             : /* Return a pointer to the first character of the field specified
    1255             :    by KEY in LINE. */
    1256             : 
    1257             : static char *
    1258           0 : begfield (const struct line *line, const struct keyfield *key)
    1259             : {
    1260           0 :   char *ptr = line->text, *lim = ptr + line->length - 1;
    1261           0 :   size_t sword = key->sword;
    1262           0 :   size_t schar = key->schar;
    1263             :   size_t remaining_bytes;
    1264             : 
    1265             :   /* The leading field separator itself is included in a field when -t
    1266             :      is absent.  */
    1267             : 
    1268           0 :   if (tab != TAB_DEFAULT)
    1269           0 :     while (ptr < lim && sword--)
    1270             :       {
    1271           0 :         while (ptr < lim && *ptr != tab)
    1272           0 :           ++ptr;
    1273           0 :         if (ptr < lim)
    1274           0 :           ++ptr;
    1275             :       }
    1276             :   else
    1277           0 :     while (ptr < lim && sword--)
    1278             :       {
    1279           0 :         while (ptr < lim && blanks[to_uchar (*ptr)])
    1280           0 :           ++ptr;
    1281           0 :         while (ptr < lim && !blanks[to_uchar (*ptr)])
    1282           0 :           ++ptr;
    1283             :       }
    1284             : 
    1285           0 :   if (key->skipsblanks)
    1286           0 :     while (ptr < lim && blanks[to_uchar (*ptr)])
    1287           0 :       ++ptr;
    1288             : 
    1289             :   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
    1290           0 :   remaining_bytes = lim - ptr;
    1291           0 :   if (schar < remaining_bytes)
    1292           0 :     ptr += schar;
    1293             :   else
    1294           0 :     ptr = lim;
    1295             : 
    1296           0 :   return ptr;
    1297             : }
    1298             : 
    1299             : /* Return the limit of (a pointer to the first character after) the field
    1300             :    in LINE specified by KEY. */
    1301             : 
    1302             : static char *
    1303           0 : limfield (const struct line *line, const struct keyfield *key)
    1304             : {
    1305           0 :   char *ptr = line->text, *lim = ptr + line->length - 1;
    1306           0 :   size_t eword = key->eword, echar = key->echar;
    1307             :   size_t remaining_bytes;
    1308             : 
    1309             :   /* Move PTR past EWORD fields or to one past the last byte on LINE,
    1310             :      whichever comes first.  If there are more than EWORD fields, leave
    1311             :      PTR pointing at the beginning of the field having zero-based index,
    1312             :      EWORD.  If a delimiter character was specified (via -t), then that
    1313             :      `beginning' is the first character following the delimiting TAB.
    1314             :      Otherwise, leave PTR pointing at the first `blank' character after
    1315             :      the preceding field.  */
    1316           0 :   if (tab != TAB_DEFAULT)
    1317           0 :     while (ptr < lim && eword--)
    1318             :       {
    1319           0 :         while (ptr < lim && *ptr != tab)
    1320           0 :           ++ptr;
    1321           0 :         if (ptr < lim && (eword | echar))
    1322           0 :           ++ptr;
    1323             :       }
    1324             :   else
    1325           0 :     while (ptr < lim && eword--)
    1326             :       {
    1327           0 :         while (ptr < lim && blanks[to_uchar (*ptr)])
    1328           0 :           ++ptr;
    1329           0 :         while (ptr < lim && !blanks[to_uchar (*ptr)])
    1330           0 :           ++ptr;
    1331             :       }
    1332             : 
    1333             : #ifdef POSIX_UNSPECIFIED
    1334             :   /* The following block of code makes GNU sort incompatible with
    1335             :      standard Unix sort, so it's ifdef'd out for now.
    1336             :      The POSIX spec isn't clear on how to interpret this.
    1337             :      FIXME: request clarification.
    1338             : 
    1339             :      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
    1340             :      Date: Thu, 30 May 96 12:20:41 -0400
    1341             :      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
    1342             : 
    1343             :      [...]I believe I've found another bug in `sort'.
    1344             : 
    1345             :      $ cat /tmp/sort.in
    1346             :      a b c 2 d
    1347             :      pq rs 1 t
    1348             :      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
    1349             :      a b c 2 d
    1350             :      pq rs 1 t
    1351             :      $ /bin/sort -k1.7,1.7 </tmp/sort.in
    1352             :      pq rs 1 t
    1353             :      a b c 2 d
    1354             : 
    1355             :      Unix sort produced the answer I expected: sort on the single character
    1356             :      in column 7.  GNU sort produced different results, because it disagrees
    1357             :      on the interpretation of the key-end spec "M.N".  Unix sort reads this
    1358             :      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
    1359             :      "skip M-1 fields, then either N-1 characters or the rest of the current
    1360             :      field, whichever comes first".  This extra clause applies only to
    1361             :      key-ends, not key-starts.
    1362             :      */
    1363             : 
    1364             :   /* Make LIM point to the end of (one byte past) the current field.  */
    1365             :   if (tab != TAB_DEFAULT)
    1366             :     {
    1367             :       char *newlim;
    1368             :       newlim = memchr (ptr, tab, lim - ptr);
    1369             :       if (newlim)
    1370             :         lim = newlim;
    1371             :     }
    1372             :   else
    1373             :     {
    1374             :       char *newlim;
    1375             :       newlim = ptr;
    1376             :       while (newlim < lim && blanks[to_uchar (*newlim)])
    1377             :         ++newlim;
    1378             :       while (newlim < lim && !blanks[to_uchar (*newlim)])
    1379             :         ++newlim;
    1380             :       lim = newlim;
    1381             :     }
    1382             : #endif
    1383             : 
    1384             :   /* If we're ignoring leading blanks when computing the End
    1385             :      of the field, don't start counting bytes until after skipping
    1386             :      past any leading blanks. */
    1387           0 :   if (key->skipeblanks)
    1388           0 :     while (ptr < lim && blanks[to_uchar (*ptr)])
    1389           0 :       ++ptr;
    1390             : 
    1391             :   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
    1392           0 :   remaining_bytes = lim - ptr;
    1393           0 :   if (echar < remaining_bytes)
    1394           0 :     ptr += echar;
    1395             :   else
    1396           0 :     ptr = lim;
    1397             : 
    1398           0 :   return ptr;
    1399             : }
    1400             : 
    1401             : /* Fill BUF reading from FP, moving buf->left bytes from the end
    1402             :    of buf->buf to the beginning first.  If EOF is reached and the
    1403             :    file wasn't terminated by a newline, supply one.  Set up BUF's line
    1404             :    table too.  FILE is the name of the file corresponding to FP.
    1405             :    Return true if some input was read.  */
    1406             : 
    1407             : static bool
    1408          33 : fillbuf (struct buffer *buf, FILE *fp, char const *file)
    1409             : {
    1410          33 :   struct keyfield const *key = keylist;
    1411          33 :   char eol = eolchar;
    1412          33 :   size_t line_bytes = buf->line_bytes;
    1413          33 :   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
    1414             : 
    1415          33 :   if (buf->eof)
    1416           0 :     return false;
    1417             : 
    1418          33 :   if (buf->used != buf->left)
    1419             :     {
    1420           0 :       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
    1421           0 :       buf->used = buf->left;
    1422           0 :       buf->nlines = 0;
    1423             :     }
    1424             : 
    1425             :   for (;;)
    1426           0 :     {
    1427          33 :       char *ptr = buf->buf + buf->used;
    1428          33 :       struct line *linelim = buffer_linelim (buf);
    1429          33 :       struct line *line = linelim - buf->nlines;
    1430          33 :       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
    1431          33 :       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
    1432             : 
    1433          66 :       while (line_bytes + 1 < avail)
    1434             :         {
    1435             :           /* Read as many bytes as possible, but do not read so many
    1436             :              bytes that there might not be enough room for the
    1437             :              corresponding line array.  The worst case is when the
    1438             :              rest of the input file consists entirely of newlines,
    1439             :              except that the last byte is not a newline.  */
    1440          33 :           size_t readsize = (avail - 1) / (line_bytes + 1);
    1441          33 :           size_t bytes_read = fread (ptr, 1, readsize, fp);
    1442          33 :           char *ptrlim = ptr + bytes_read;
    1443             :           char *p;
    1444          33 :           avail -= bytes_read;
    1445             : 
    1446          33 :           if (bytes_read != readsize)
    1447             :             {
    1448          33 :               if (ferror (fp))
    1449          10 :                 die (_("read failed"), file);
    1450          23 :               if (feof (fp))
    1451             :                 {
    1452          23 :                   buf->eof = true;
    1453          23 :                   if (buf->buf == ptrlim)
    1454           0 :                     return false;
    1455          23 :                   if (ptrlim[-1] != eol)
    1456           1 :                     *ptrlim++ = eol;
    1457             :                 }
    1458             :             }
    1459             : 
    1460             :           /* Find and record each line in the just-read input.  */
    1461         148 :           while ((p = memchr (ptr, eol, ptrlim - ptr)))
    1462             :             {
    1463         102 :               ptr = p + 1;
    1464         102 :               line--;
    1465         102 :               line->text = line_start;
    1466         102 :               line->length = ptr - line_start;
    1467         102 :               mergesize = MAX (mergesize, line->length);
    1468         102 :               avail -= line_bytes;
    1469             : 
    1470         102 :               if (key)
    1471             :                 {
    1472             :                   /* Precompute the position of the first key for
    1473             :                      efficiency.  */
    1474           0 :                   line->keylim = (key->eword == SIZE_MAX
    1475             :                                   ? p
    1476           0 :                                   : limfield (line, key));
    1477             : 
    1478           0 :                   if (key->sword != SIZE_MAX)
    1479           0 :                     line->keybeg = begfield (line, key);
    1480             :                   else
    1481             :                     {
    1482           0 :                       if (key->skipsblanks)
    1483           0 :                         while (blanks[to_uchar (*line_start)])
    1484           0 :                           line_start++;
    1485           0 :                       line->keybeg = line_start;
    1486             :                     }
    1487             :                 }
    1488             : 
    1489         102 :               line_start = ptr;
    1490             :             }
    1491             : 
    1492          23 :           ptr = ptrlim;
    1493          23 :           if (buf->eof)
    1494          23 :             break;
    1495             :         }
    1496             : 
    1497          23 :       buf->used = ptr - buf->buf;
    1498          23 :       buf->nlines = buffer_linelim (buf) - line;
    1499          23 :       if (buf->nlines != 0)
    1500             :         {
    1501          23 :           buf->left = ptr - line_start;
    1502          23 :           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
    1503          23 :           return true;
    1504             :         }
    1505             : 
    1506             :       {
    1507             :         /* The current input line is too long to fit in the buffer.
    1508             :            Double the buffer size and try again, keeping it properly
    1509             :            aligned.  */
    1510           0 :         size_t line_alloc = buf->alloc / sizeof (struct line);
    1511           0 :         buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line));
    1512           0 :         buf->alloc = line_alloc * sizeof (struct line);
    1513             :       }
    1514             :     }
    1515             : }
    1516             : 
    1517             : /* Compare strings A and B as numbers without explicitly converting them to
    1518             :    machine numbers.  Comparatively slow for short strings, but asymptotically
    1519             :    hideously fast. */
    1520             : 
    1521             : static int
    1522           0 : numcompare (const char *a, const char *b)
    1523             : {
    1524           0 :   while (blanks[to_uchar (*a)])
    1525           0 :     a++;
    1526           0 :   while (blanks[to_uchar (*b)])
    1527           0 :     b++;
    1528             : 
    1529           0 :   return strnumcmp (a, b, decimal_point, thousands_sep);
    1530             : }
    1531             : 
    1532             : static int
    1533           0 : general_numcompare (const char *sa, const char *sb)
    1534             : {
    1535             :   /* FIXME: add option to warn about failed conversions.  */
    1536             :   /* FIXME: maybe add option to try expensive FP conversion
    1537             :      only if A and B can't be compared more cheaply/accurately.  */
    1538             : 
    1539             :   char *ea;
    1540             :   char *eb;
    1541           0 :   double a = strtod (sa, &ea);
    1542           0 :   double b = strtod (sb, &eb);
    1543             : 
    1544             :   /* Put conversion errors at the start of the collating sequence.  */
    1545           0 :   if (sa == ea)
    1546           0 :     return sb == eb ? 0 : -1;
    1547           0 :   if (sb == eb)
    1548           0 :     return 1;
    1549             : 
    1550             :   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
    1551             :      conversion errors but before numbers; sort them by internal
    1552             :      bit-pattern, for lack of a more portable alternative.  */
    1553           0 :   return (a < b ? -1
    1554           0 :           : a > b ? 1
    1555           0 :           : a == b ? 0
    1556           0 :           : b == b ? -1
    1557           0 :           : a == a ? 1
    1558           0 :           : memcmp ((char *) &a, (char *) &b, sizeof a));
    1559             : }
    1560             : 
    1561             : /* Return an integer in 1..12 of the month name MONTH with length LEN.
    1562             :    Return 0 if the name in S is not recognized.  */
    1563             : 
    1564             : static int
    1565           0 : getmonth (char const *month, size_t len)
    1566             : {
    1567           0 :   size_t lo = 0;
    1568           0 :   size_t hi = MONTHS_PER_YEAR;
    1569           0 :   char const *monthlim = month + len;
    1570             : 
    1571             :   for (;;)
    1572             :     {
    1573           0 :       if (month == monthlim)
    1574           0 :         return 0;
    1575           0 :       if (!blanks[to_uchar (*month)])
    1576           0 :         break;
    1577           0 :       ++month;
    1578             :     }
    1579             : 
    1580             :   do
    1581             :     {
    1582           0 :       size_t ix = (lo + hi) / 2;
    1583           0 :       char const *m = month;
    1584           0 :       char const *n = monthtab[ix].name;
    1585             : 
    1586           0 :       for (;; m++, n++)
    1587             :         {
    1588           0 :           if (!*n)
    1589           0 :             return monthtab[ix].val;
    1590           0 :           if (m == monthlim || fold_toupper[to_uchar (*m)] < to_uchar (*n))
    1591             :             {
    1592           0 :               hi = ix;
    1593           0 :               break;
    1594             :             }
    1595           0 :           else if (fold_toupper[to_uchar (*m)] > to_uchar (*n))
    1596             :             {
    1597           0 :               lo = ix + 1;
    1598           0 :               break;
    1599             :             }
    1600             :         }
    1601             :     }
    1602           0 :   while (lo < hi);
    1603             : 
    1604           0 :   return 0;
    1605             : }
    1606             : 
    1607             : /* A source of random data.  */
    1608             : static struct randread_source *randread_source;
    1609             : 
    1610             : /* Return the Ith randomly-generated state.  The caller must invoke
    1611             :    random_state (H) for all H less than I before invoking random_state
    1612             :    (I).  */
    1613             : 
    1614             : static struct md5_ctx
    1615           0 : random_state (size_t i)
    1616             : {
    1617             :   /* An array of states resulting from the random data, and counts of
    1618             :      its used and allocated members.  */
    1619             :   static struct md5_ctx *state;
    1620             :   static size_t used;
    1621             :   static size_t allocated;
    1622             : 
    1623           0 :   struct md5_ctx *s = &state[i];
    1624             : 
    1625           0 :   if (used <= i)
    1626             :     {
    1627             :       unsigned char buf[MD5_DIGEST_SIZE];
    1628             : 
    1629           0 :       used++;
    1630             : 
    1631           0 :       if (allocated <= i)
    1632             :         {
    1633           0 :           state = X2NREALLOC (state, &allocated);
    1634           0 :           s = &state[i];
    1635             :         }
    1636             : 
    1637           0 :       randread (randread_source, buf, sizeof buf);
    1638           0 :       md5_init_ctx (s);
    1639           0 :       md5_process_bytes (buf, sizeof buf, s);
    1640             :     }
    1641             : 
    1642           0 :   return *s;
    1643             : }
    1644             : 
    1645             : /* Compare the hashes of TEXTA with length LENGTHA to those of TEXTB
    1646             :    with length LENGTHB.  Return negative if less, zero if equal,
    1647             :    positive if greater.  */
    1648             : 
    1649             : static int
    1650           0 : cmp_hashes (char const *texta, size_t lena,
    1651             :             char const *textb, size_t lenb)
    1652             : {
    1653             :   /* Try random hashes until a pair of hashes disagree.  But if the
    1654             :      first pair of random hashes agree, check whether the keys are
    1655             :      identical and if so report no difference.  */
    1656             :   int diff;
    1657             :   size_t i;
    1658           0 :   for (i = 0; ; i++)
    1659           0 :     {
    1660             :       uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)];
    1661             :       struct md5_ctx s[2];
    1662           0 :       s[0] = s[1] = random_state (i);
    1663           0 :       md5_process_bytes (texta, lena, &s[0]);  md5_finish_ctx (&s[0], dig[0]);
    1664           0 :       md5_process_bytes (textb, lenb, &s[1]);  md5_finish_ctx (&s[1], dig[1]);
    1665           0 :       diff = memcmp (dig[0], dig[1], sizeof dig[0]);
    1666           0 :       if (diff != 0)
    1667           0 :         break;
    1668           0 :       if (i == 0 && lena == lenb && memcmp (texta, textb, lena) == 0)
    1669           0 :         break;
    1670             :     }
    1671             : 
    1672           0 :   return diff;
    1673             : }
    1674             : 
    1675             : /* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB)
    1676             :    using one or more random hash functions.  */
    1677             : 
    1678             : static int
    1679           0 : compare_random (char *restrict texta, size_t lena,
    1680             :                 char *restrict textb, size_t lenb)
    1681             : {
    1682             :   int diff;
    1683             : 
    1684           0 :   if (! hard_LC_COLLATE)
    1685           0 :     diff = cmp_hashes (texta, lena, textb, lenb);
    1686             :   else
    1687             :     {
    1688             :       /* Transform the text into the basis of comparison, so that byte
    1689             :          strings that would otherwise considered to be equal are
    1690             :          considered equal here even if their bytes differ.  */
    1691             : 
    1692           0 :       char *buf = NULL;
    1693             :       char stackbuf[4000];
    1694           0 :       size_t tlena = xmemxfrm (stackbuf, sizeof stackbuf, texta, lena);
    1695           0 :       bool a_fits = tlena <= sizeof stackbuf;
    1696           0 :       size_t tlenb = xmemxfrm ((a_fits ? stackbuf + tlena : NULL),
    1697             :                                (a_fits ? sizeof stackbuf - tlena : 0),
    1698             :                                textb, lenb);
    1699             : 
    1700           0 :       if (a_fits && tlena + tlenb <= sizeof stackbuf)
    1701           0 :         buf = stackbuf;
    1702             :       else
    1703             :         {
    1704             :           /* Adding 1 to the buffer size lets xmemxfrm run a bit
    1705             :              faster by avoiding the need for an extra buffer copy.  */
    1706           0 :           buf = xmalloc (tlena + tlenb + 1);
    1707           0 :           xmemxfrm (buf, tlena + 1, texta, lena);
    1708           0 :           xmemxfrm (buf + tlena, tlenb + 1, textb, lenb);
    1709             :         }
    1710             : 
    1711           0 :       diff = cmp_hashes (buf, tlena, buf + tlena, tlenb);
    1712             : 
    1713           0 :       if (buf != stackbuf)
    1714           0 :         free (buf);
    1715             :     }
    1716             : 
    1717           0 :   return diff;
    1718             : }
    1719             : 
    1720             : /* Compare two lines A and B trying every key in sequence until there
    1721             :    are no more keys or a difference is found. */
    1722             : 
    1723             : static int
    1724           0 : keycompare (const struct line *a, const struct line *b)
    1725             : {
    1726           0 :   struct keyfield const *key = keylist;
    1727             : 
    1728             :   /* For the first iteration only, the key positions have been
    1729             :      precomputed for us. */
    1730           0 :   char *texta = a->keybeg;
    1731           0 :   char *textb = b->keybeg;
    1732           0 :   char *lima = a->keylim;
    1733           0 :   char *limb = b->keylim;
    1734             : 
    1735             :   int diff;
    1736             : 
    1737             :   for (;;)
    1738           0 :     {
    1739           0 :       char const *translate = key->translate;
    1740           0 :       bool const *ignore = key->ignore;
    1741             : 
    1742             :       /* Find the lengths. */
    1743           0 :       size_t lena = lima <= texta ? 0 : lima - texta;
    1744           0 :       size_t lenb = limb <= textb ? 0 : limb - textb;
    1745             : 
    1746             :       /* Actually compare the fields. */
    1747             : 
    1748           0 :       if (key->random)
    1749           0 :         diff = compare_random (texta, lena, textb, lenb);
    1750           0 :       else if (key->numeric | key->general_numeric)
    1751             :         {
    1752           0 :           char savea = *lima, saveb = *limb;
    1753             : 
    1754           0 :           *lima = *limb = '\0';
    1755           0 :           diff = ((key->numeric ? numcompare : general_numcompare)
    1756             :                   (texta, textb));
    1757           0 :           *lima = savea, *limb = saveb;
    1758             :         }
    1759           0 :       else if (key->month)
    1760           0 :         diff = getmonth (texta, lena) - getmonth (textb, lenb);
    1761             :       /* Sorting like this may become slow, so in a simple locale the user
    1762             :          can select a faster sort that is similar to ascii sort.  */
    1763           0 :       else if (hard_LC_COLLATE)
    1764             :         {
    1765           0 :           if (ignore || translate)
    1766           0 :             {
    1767             :               char buf[4000];
    1768           0 :               size_t size = lena + 1 + lenb + 1;
    1769           0 :               char *copy_a = (size <= sizeof buf ? buf : xmalloc (size));
    1770           0 :               char *copy_b = copy_a + lena + 1;
    1771             :               size_t new_len_a, new_len_b, i;
    1772             : 
    1773             :               /* Ignore and/or translate chars before comparing.  */
    1774           0 :               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
    1775             :                 {
    1776           0 :                   if (i < lena)
    1777             :                     {
    1778           0 :                       copy_a[new_len_a] = (translate
    1779           0 :                                            ? translate[to_uchar (texta[i])]
    1780           0 :                                            : texta[i]);
    1781           0 :                       if (!ignore || !ignore[to_uchar (texta[i])])
    1782           0 :                         ++new_len_a;
    1783             :                     }
    1784           0 :                   if (i < lenb)
    1785             :                     {
    1786           0 :                       copy_b[new_len_b] = (translate
    1787           0 :                                            ? translate[to_uchar (textb[i])]
    1788           0 :                                            : textb [i]);
    1789           0 :                       if (!ignore || !ignore[to_uchar (textb[i])])
    1790           0 :                         ++new_len_b;
    1791             :                     }
    1792             :                 }
    1793             : 
    1794           0 :               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
    1795             : 
    1796           0 :               if (sizeof buf < size)
    1797           0 :                 free (copy_a);
    1798             :             }
    1799           0 :           else if (lena == 0)
    1800           0 :             diff = - NONZERO (lenb);
    1801           0 :           else if (lenb == 0)
    1802           0 :             goto greater;
    1803             :           else
    1804           0 :             diff = xmemcoll (texta, lena, textb, lenb);
    1805             :         }
    1806           0 :       else if (ignore)
    1807             :         {
    1808             : #define CMP_WITH_IGNORE(A, B)                                           \
    1809             :   do                                                                    \
    1810             :     {                                                                   \
    1811             :           for (;;)                                                      \
    1812             :             {                                                           \
    1813             :               while (texta < lima && ignore[to_uchar (*texta)])              \
    1814             :                 ++texta;                                                \
    1815             :               while (textb < limb && ignore[to_uchar (*textb)])              \
    1816             :                 ++textb;                                                \
    1817             :               if (! (texta < lima && textb < limb))                       \
    1818             :                 break;                                                  \
    1819             :               diff = to_uchar (A) - to_uchar (B);                       \
    1820             :               if (diff)                                                 \
    1821             :                 goto not_equal;                                         \
    1822             :               ++texta;                                                  \
    1823             :               ++textb;                                                  \
    1824             :             }                                                           \
    1825             :                                                                         \
    1826             :           diff = (texta < lima) - (textb < limb);                 \
    1827             :     }                                                                   \
    1828             :   while (0)
    1829             : 
    1830           0 :           if (translate)
    1831           0 :             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
    1832             :                              translate[to_uchar (*textb)]);
    1833             :           else
    1834           0 :             CMP_WITH_IGNORE (*texta, *textb);
    1835             :         }
    1836           0 :       else if (lena == 0)
    1837           0 :         diff = - NONZERO (lenb);
    1838           0 :       else if (lenb == 0)
    1839           0 :         goto greater;
    1840             :       else
    1841             :         {
    1842           0 :           if (translate)
    1843             :             {
    1844           0 :               while (texta < lima && textb < limb)
    1845             :                 {
    1846           0 :                   diff = (to_uchar (translate[to_uchar (*texta++)])
    1847           0 :                           - to_uchar (translate[to_uchar (*textb++)]));
    1848           0 :                   if (diff)
    1849           0 :                     goto not_equal;
    1850             :                 }
    1851             :             }
    1852             :           else
    1853             :             {
    1854           0 :               diff = memcmp (texta, textb, MIN (lena, lenb));
    1855           0 :               if (diff)
    1856           0 :                 goto not_equal;
    1857             :             }
    1858           0 :           diff = lena < lenb ? -1 : lena != lenb;
    1859             :         }
    1860             : 
    1861           0 :       if (diff)
    1862           0 :         goto not_equal;
    1863             : 
    1864           0 :       key = key->next;
    1865           0 :       if (! key)
    1866           0 :         break;
    1867             : 
    1868             :       /* Find the beginning and limit of the next field.  */
    1869           0 :       if (key->eword != SIZE_MAX)
    1870           0 :         lima = limfield (a, key), limb = limfield (b, key);
    1871             :       else
    1872           0 :         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
    1873             : 
    1874           0 :       if (key->sword != SIZE_MAX)
    1875           0 :         texta = begfield (a, key), textb = begfield (b, key);
    1876             :       else
    1877             :         {
    1878           0 :           texta = a->text, textb = b->text;
    1879           0 :           if (key->skipsblanks)
    1880             :             {
    1881           0 :               while (texta < lima && blanks[to_uchar (*texta)])
    1882           0 :                 ++texta;
    1883           0 :               while (textb < limb && blanks[to_uchar (*textb)])
    1884           0 :                 ++textb;
    1885             :             }
    1886             :         }
    1887             :     }
    1888             : 
    1889           0 :   return 0;
    1890             : 
    1891           0 :  greater:
    1892           0 :   diff = 1;
    1893           0 :  not_equal:
    1894           0 :   return key->reverse ? -diff : diff;
    1895             : }
    1896             : 
    1897             : /* Compare two lines A and B, returning negative, zero, or positive
    1898             :    depending on whether A compares less than, equal to, or greater than B. */
    1899             : 
    1900             : static int
    1901         165 : compare (const struct line *a, const struct line *b)
    1902             : {
    1903             :   int diff;
    1904             :   size_t alen, blen;
    1905             : 
    1906             :   /* First try to compare on the specified keys (if any).
    1907             :      The only two cases with no key at all are unadorned sort,
    1908             :      and unadorned sort -r. */
    1909         165 :   if (keylist)
    1910             :     {
    1911           0 :       diff = keycompare (a, b);
    1912           0 :       if (diff | unique | stable)
    1913           0 :         return diff;
    1914             :     }
    1915             : 
    1916             :   /* If the keys all compare equal (or no keys were specified)
    1917             :      fall through to the default comparison.  */
    1918         165 :   alen = a->length - 1, blen = b->length - 1;
    1919             : 
    1920         165 :   if (alen == 0)
    1921         139 :     diff = - NONZERO (blen);
    1922          26 :   else if (blen == 0)
    1923          23 :     diff = 1;
    1924           3 :   else if (hard_LC_COLLATE)
    1925           0 :     diff = xmemcoll (a->text, alen, b->text, blen);
    1926           3 :   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
    1927           2 :     diff = alen < blen ? -1 : alen != blen;
    1928             : 
    1929         165 :   return reverse ? -diff : diff;
    1930             : }
    1931             : 
    1932             : /* Check that the lines read from FILE_NAME come in order.  Return
    1933             :    true if they are in order.  If CHECKONLY == 'c', also print a
    1934             :    diagnostic (FILE_NAME, line number, contents of line) to stderr if
    1935             :    they are not in order.  */
    1936             : 
    1937             : static bool
    1938           0 : check (char const *file_name, char checkonly)
    1939             : {
    1940           0 :   FILE *fp = xfopen (file_name, "r");
    1941             :   struct buffer buf;            /* Input buffer. */
    1942             :   struct line temp;             /* Copy of previous line. */
    1943           0 :   size_t alloc = 0;
    1944           0 :   uintmax_t line_number = 0;
    1945           0 :   struct keyfield const *key = keylist;
    1946           0 :   bool nonunique = ! unique;
    1947           0 :   bool ordered = true;
    1948             : 
    1949           0 :   initbuf (&buf, sizeof (struct line),
    1950             :            MAX (merge_buffer_size, sort_size));
    1951           0 :   temp.text = NULL;
    1952             : 
    1953           0 :   while (fillbuf (&buf, fp, file_name))
    1954             :     {
    1955           0 :       struct line const *line = buffer_linelim (&buf);
    1956           0 :       struct line const *linebase = line - buf.nlines;
    1957             : 
    1958             :       /* Make sure the line saved from the old buffer contents is
    1959             :          less than or equal to the first line of the new buffer. */
    1960           0 :       if (alloc && nonunique <= compare (&temp, line - 1))
    1961             :         {
    1962           0 :         found_disorder:
    1963             :           {
    1964           0 :             if (checkonly == 'c')
    1965             :               {
    1966           0 :                 struct line const *disorder_line = line - 1;
    1967           0 :                 uintmax_t disorder_line_number =
    1968           0 :                   buffer_linelim (&buf) - disorder_line + line_number;
    1969             :                 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
    1970           0 :                 fprintf (stderr, _("%s: %s:%s: disorder: "),
    1971             :                          program_name, file_name,
    1972             :                          umaxtostr (disorder_line_number, hr_buf));
    1973           0 :                 write_bytes (disorder_line->text, disorder_line->length,
    1974             :                              stderr, _("standard error"));
    1975             :               }
    1976             : 
    1977           0 :             ordered = false;
    1978           0 :             break;
    1979             :           }
    1980             :         }
    1981             : 
    1982             :       /* Compare each line in the buffer with its successor.  */
    1983           0 :       while (linebase < --line)
    1984           0 :         if (nonunique <= compare (line, line - 1))
    1985           0 :           goto found_disorder;
    1986             : 
    1987           0 :       line_number += buf.nlines;
    1988             : 
    1989             :       /* Save the last line of the buffer.  */
    1990           0 :       if (alloc < line->length)
    1991             :         {
    1992             :           do
    1993             :             {
    1994           0 :               alloc *= 2;
    1995           0 :               if (! alloc)
    1996             :                 {
    1997           0 :                   alloc = line->length;
    1998           0 :                   break;
    1999             :                 }
    2000             :             }
    2001           0 :           while (alloc < line->length);
    2002             : 
    2003           0 :           temp.text = xrealloc (temp.text, alloc);
    2004             :         }
    2005           0 :       memcpy (temp.text, line->text, line->length);
    2006           0 :       temp.length = line->length;
    2007           0 :       if (key)
    2008             :         {
    2009           0 :           temp.keybeg = temp.text + (line->keybeg - line->text);
    2010           0 :           temp.keylim = temp.text + (line->keylim - line->text);
    2011             :         }
    2012             :     }
    2013             : 
    2014           0 :   xfclose (fp, file_name);
    2015           0 :   free (buf.buf);
    2016           0 :   free (temp.text);
    2017           0 :   return ordered;
    2018             : }
    2019             : 
    2020             : /* Merge lines from FILES onto OFP.  NTEMPS is the number of temporary
    2021             :    files (all of which are at the start of the FILES array), and
    2022             :    NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE.
    2023             :    Close input and output files before returning.
    2024             :    OUTPUT_FILE gives the name of the output file.  If it is NULL,
    2025             :    the output file is standard output.  If OFP is NULL, the output
    2026             :    file has not been opened yet (or written to, if standard output).  */
    2027             : 
    2028             : static void
    2029           0 : mergefps (struct sortfile *files, size_t ntemps, size_t nfiles,
    2030             :           FILE *ofp, char const *output_file)
    2031             : {
    2032             :   FILE *fps[NMERGE];            /* Input streams for each file.  */
    2033             :   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
    2034             :   struct line saved;            /* Saved line storage for unique check. */
    2035           0 :   struct line const *savedline = NULL;
    2036             :                                 /* &saved if there is a saved line. */
    2037           0 :   size_t savealloc = 0;         /* Size allocated for the saved line. */
    2038             :   struct line const *cur[NMERGE]; /* Current line in each line table. */
    2039             :   struct line const *base[NMERGE]; /* Base of each line table.  */
    2040             :   size_t ord[NMERGE];           /* Table representing a permutation of fps,
    2041             :                                    such that cur[ord[0]] is the smallest line
    2042             :                                    and will be next output. */
    2043             :   size_t i;
    2044             :   size_t j;
    2045             :   size_t t;
    2046           0 :   struct keyfield const *key = keylist;
    2047           0 :   saved.text = NULL;
    2048             : 
    2049             :   /* Read initial lines from each input file. */
    2050           0 :   for (i = 0; i < nfiles; )
    2051             :     {
    2052           0 :       fps[i] = (files[i].pid
    2053           0 :                 ? open_temp (files[i].name, files[i].pid)
    2054           0 :                 : xfopen (files[i].name, "r"));
    2055           0 :       initbuf (&buffer[i], sizeof (struct line),
    2056           0 :                MAX (merge_buffer_size, sort_size / nfiles));
    2057           0 :       if (fillbuf (&buffer[i], fps[i], files[i].name))
    2058             :         {
    2059           0 :           struct line const *linelim = buffer_linelim (&buffer[i]);
    2060           0 :           cur[i] = linelim - 1;
    2061           0 :           base[i] = linelim - buffer[i].nlines;
    2062           0 :           i++;
    2063             :         }
    2064             :       else
    2065             :         {
    2066             :           /* fps[i] is empty; eliminate it from future consideration.  */
    2067           0 :           xfclose (fps[i], files[i].name);
    2068           0 :           if (i < ntemps)
    2069             :             {
    2070           0 :               ntemps--;
    2071           0 :               zaptemp (files[i].name);
    2072             :             }
    2073           0 :           free (buffer[i].buf);
    2074           0 :           --nfiles;
    2075           0 :           for (j = i; j < nfiles; ++j)
    2076           0 :             files[j] = files[j + 1];
    2077             :         }
    2078             :     }
    2079             : 
    2080           0 :   if (! ofp)
    2081           0 :     ofp = xfopen (output_file, "w");
    2082             : 
    2083             :   /* Set up the ord table according to comparisons among input lines.
    2084             :      Since this only reorders two items if one is strictly greater than
    2085             :      the other, it is stable. */
    2086           0 :   for (i = 0; i < nfiles; ++i)
    2087           0 :     ord[i] = i;
    2088           0 :   for (i = 1; i < nfiles; ++i)
    2089           0 :     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
    2090           0 :       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
    2091             : 
    2092             :   /* Repeatedly output the smallest line until no input remains. */
    2093           0 :   while (nfiles)
    2094             :     {
    2095           0 :       struct line const *smallest = cur[ord[0]];
    2096             : 
    2097             :       /* If uniquified output is turned on, output only the first of
    2098             :          an identical series of lines. */
    2099           0 :       if (unique)
    2100             :         {
    2101           0 :           if (savedline && compare (savedline, smallest))
    2102             :             {
    2103           0 :               savedline = NULL;
    2104           0 :               write_bytes (saved.text, saved.length, ofp, output_file);
    2105             :             }
    2106           0 :           if (!savedline)
    2107             :             {
    2108           0 :               savedline = &saved;
    2109           0 :               if (savealloc < smallest->length)
    2110             :                 {
    2111             :                   do
    2112           0 :                     if (! savealloc)
    2113             :                       {
    2114           0 :                         savealloc = smallest->length;
    2115           0 :                         break;
    2116             :                       }
    2117           0 :                   while ((savealloc *= 2) < smallest->length);
    2118             : 
    2119           0 :                   saved.text = xrealloc (saved.text, savealloc);
    2120             :                 }
    2121           0 :               saved.length = smallest->length;
    2122           0 :               memcpy (saved.text, smallest->text, saved.length);
    2123           0 :               if (key)
    2124             :                 {
    2125           0 :                   saved.keybeg =
    2126           0 :                     saved.text + (smallest->keybeg - smallest->text);
    2127           0 :                   saved.keylim =
    2128           0 :                     saved.text + (smallest->keylim - smallest->text);
    2129             :                 }
    2130             :             }
    2131             :         }
    2132             :       else
    2133           0 :         write_bytes (smallest->text, smallest->length, ofp, output_file);
    2134             : 
    2135             :       /* Check if we need to read more lines into core. */
    2136           0 :       if (base[ord[0]] < smallest)
    2137           0 :         cur[ord[0]] = smallest - 1;
    2138             :       else
    2139             :         {
    2140           0 :           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name))
    2141             :             {
    2142           0 :               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
    2143           0 :               cur[ord[0]] = linelim - 1;
    2144           0 :               base[ord[0]] = linelim - buffer[ord[0]].nlines;
    2145             :             }
    2146             :           else
    2147             :             {
    2148             :               /* We reached EOF on fps[ord[0]].  */
    2149           0 :               for (i = 1; i < nfiles; ++i)
    2150           0 :                 if (ord[i] > ord[0])
    2151           0 :                   --ord[i];
    2152           0 :               --nfiles;
    2153           0 :               xfclose (fps[ord[0]], files[ord[0]].name);
    2154           0 :               if (ord[0] < ntemps)
    2155             :                 {
    2156           0 :                   ntemps--;
    2157           0 :                   zaptemp (files[ord[0]].name);
    2158             :                 }
    2159           0 :               free (buffer[ord[0]].buf);
    2160           0 :               for (i = ord[0]; i < nfiles; ++i)
    2161             :                 {
    2162           0 :                   fps[i] = fps[i + 1];
    2163           0 :                   files[i] = files[i + 1];
    2164           0 :                   buffer[i] = buffer[i + 1];
    2165           0 :                   cur[i] = cur[i + 1];
    2166           0 :                   base[i] = base[i + 1];
    2167             :                 }
    2168           0 :               for (i = 0; i < nfiles; ++i)
    2169           0 :                 ord[i] = ord[i + 1];
    2170           0 :               continue;
    2171             :             }
    2172             :         }
    2173             : 
    2174             :       /* The new line just read in may be larger than other lines
    2175             :          already in main memory; push it back in the queue until we
    2176             :          encounter a line larger than it.  Optimize for the common
    2177             :          case where the new line is smallest.  */
    2178             :       {
    2179           0 :         size_t lo = 1;
    2180           0 :         size_t hi = nfiles;
    2181           0 :         size_t probe = lo;
    2182           0 :         size_t ord0 = ord[0];
    2183             :         size_t count_of_smaller_lines;
    2184             : 
    2185           0 :         while (lo < hi)
    2186             :           {
    2187           0 :             int cmp = compare (cur[ord0], cur[ord[probe]]);
    2188           0 :             if (cmp < 0 || (cmp == 0 && ord0 < ord[probe]))
    2189           0 :               hi = probe;
    2190             :             else
    2191           0 :               lo = probe + 1;
    2192           0 :             probe = (lo + hi) / 2;
    2193             :           }
    2194             : 
    2195           0 :         count_of_smaller_lines = lo - 1;
    2196           0 :         for (j = 0; j < count_of_smaller_lines; j++)
    2197           0 :           ord[j] = ord[j + 1];
    2198           0 :         ord[count_of_smaller_lines] = ord0;
    2199             :       }
    2200             : 
    2201             :       /* Free up some resources every once in a while.  */
    2202           0 :       if (MAX_PROCS_BEFORE_REAP < nprocs)
    2203           0 :         reap_some ();
    2204             :     }
    2205             : 
    2206           0 :   if (unique && savedline)
    2207             :     {
    2208           0 :       write_bytes (saved.text, saved.length, ofp, output_file);
    2209           0 :       free (saved.text);
    2210             :     }
    2211             : 
    2212           0 :   xfclose (ofp, output_file);
    2213           0 : }
    2214             : 
    2215             : /* Merge into T the two sorted arrays of lines LO (with NLO members)
    2216             :    and HI (with NHI members).  T, LO, and HI point just past their
    2217             :    respective arrays, and the arrays are in reverse order.  NLO and
    2218             :    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
    2219             : 
    2220             : static inline void
    2221         119 : mergelines (struct line *t,
    2222             :             struct line const *lo, size_t nlo,
    2223             :             struct line const *hi, size_t nhi)
    2224             : {
    2225             :   for (;;)
    2226         195 :     if (compare (lo - 1, hi - 1) <= 0)
    2227             :       {
    2228          98 :         *--t = *--lo;
    2229          98 :         if (! --nlo)
    2230             :           {
    2231             :             /* HI - NHI equalled T - (NLO + NHI) when this function
    2232             :                began.  Therefore HI must equal T now, and there is no
    2233             :                need to copy from HI to T.  */
    2234          38 :             return;
    2235             :           }
    2236             :       }
    2237             :     else
    2238             :       {
    2239          21 :         *--t = *--hi;
    2240          21 :         if (! --nhi)
    2241             :           {
    2242             :             do
    2243           6 :               *--t = *--lo;
    2244           6 :             while (--nlo);
    2245             : 
    2246           5 :             return;
    2247             :           }
    2248             :       }
    2249             : }
    2250             : 
    2251             : /* Sort the array LINES with NLINES members, using TEMP for temporary space.
    2252             :    NLINES must be at least 2.
    2253             :    The input and output arrays are in reverse order, and LINES and
    2254             :    TEMP point just past the end of their respective arrays.
    2255             : 
    2256             :    Use a recursive divide-and-conquer algorithm, in the style
    2257             :    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
    2258             :    the optimization suggested by exercise 5.2.4-10; this requires room
    2259             :    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
    2260             :    writes that this memory optimization was originally published by
    2261             :    D. A. Bell, Comp J. 1 (1958), 75.  */
    2262             : 
    2263             : static void
    2264          50 : sortlines (struct line *lines, size_t nlines, struct line *temp)
    2265             : {
    2266          50 :   if (nlines == 2)
    2267             :     {
    2268          22 :       if (0 < compare (&lines[-1], &lines[-2]))
    2269             :         {
    2270           1 :           struct line tmp = lines[-1];
    2271           1 :           lines[-1] = lines[-2];
    2272           1 :           lines[-2] = tmp;
    2273             :         }
    2274             :     }
    2275             :   else
    2276             :     {
    2277          28 :       size_t nlo = nlines / 2;
    2278          28 :       size_t nhi = nlines - nlo;
    2279          28 :       struct line *lo = lines;
    2280          28 :       struct line *hi = lines - nlo;
    2281          28 :       struct line *sorted_lo = temp;
    2282             : 
    2283          28 :       sortlines (hi, nhi, temp);
    2284          28 :       if (1 < nlo)
    2285          24 :         sortlines_temp (lo, nlo, sorted_lo);
    2286             :       else
    2287           4 :         sorted_lo[-1] = lo[-1];
    2288             : 
    2289          28 :       mergelines (lines, sorted_lo, nlo, hi, nhi);
    2290             :     }
    2291          50 : }
    2292             : 
    2293             : /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
    2294             :    rather than sorting in place.  */
    2295             : 
    2296             : static void
    2297          39 : sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
    2298             : {
    2299          39 :   if (nlines == 2)
    2300             :     {
    2301             :       /* Declare `swap' as int, not bool, to work around a bug
    2302             :          <http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html>
    2303             :          in the IBM xlc 6.0.0.0 compiler in 64-bit mode.  */
    2304          24 :       int swap = (0 < compare (&lines[-1], &lines[-2]));
    2305          24 :       temp[-1] = lines[-1 - swap];
    2306          24 :       temp[-2] = lines[-2 + swap];
    2307             :     }
    2308             :   else
    2309             :     {
    2310          15 :       size_t nlo = nlines / 2;
    2311          15 :       size_t nhi = nlines - nlo;
    2312          15 :       struct line *lo = lines;
    2313          15 :       struct line *hi = lines - nlo;
    2314          15 :       struct line *sorted_hi = temp - nlo;
    2315             : 
    2316          15 :       sortlines_temp (hi, nhi, sorted_hi);
    2317          15 :       if (1 < nlo)
    2318           9 :         sortlines (lo, nlo, temp);
    2319             : 
    2320          15 :       mergelines (temp, lo, nlo, sorted_hi, nhi);
    2321             :     }
    2322          39 : }
    2323             : 
    2324             : /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is
    2325             :    the same as OUTFILE.  If found, merge the found instances (and perhaps
    2326             :    some other files) into a temporary file so that it can in turn be
    2327             :    merged into OUTFILE without destroying OUTFILE before it is completely
    2328             :    read.  Return the new value of NFILES, which differs from the old if
    2329             :    some merging occurred.
    2330             : 
    2331             :    This test ensures that an otherwise-erroneous use like
    2332             :    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
    2333             :    It's not clear that POSIX requires this nicety.
    2334             :    Detect common error cases, but don't try to catch obscure cases like
    2335             :    "cat ... FILE ... | sort -m -o FILE"
    2336             :    where traditional "sort" doesn't copy the input and where
    2337             :    people should know that they're getting into trouble anyway.
    2338             :    Catching these obscure cases would slow down performance in
    2339             :    common cases.  */
    2340             : 
    2341             : static size_t
    2342           0 : avoid_trashing_input (struct sortfile *files, size_t ntemps,
    2343             :                       size_t nfiles, char const *outfile)
    2344             : {
    2345             :   size_t i;
    2346           0 :   bool got_outstat = false;
    2347             :   struct stat outstat;
    2348             : 
    2349           0 :   for (i = ntemps; i < nfiles; i++)
    2350             :     {
    2351           0 :       bool is_stdin = STREQ (files[i].name, "-");
    2352             :       bool same;
    2353             :       struct stat instat;
    2354             : 
    2355           0 :       if (outfile && STREQ (outfile, files[i].name) && !is_stdin)
    2356           0 :         same = true;
    2357             :       else
    2358             :         {
    2359           0 :           if (! got_outstat)
    2360             :             {
    2361           0 :               if ((outfile
    2362           0 :                    ? stat (outfile, &outstat)
    2363           0 :                    : fstat (STDOUT_FILENO, &outstat))
    2364           0 :                   != 0)
    2365           0 :                 break;
    2366           0 :               got_outstat = true;
    2367             :             }
    2368             : 
    2369           0 :           same = (((is_stdin
    2370           0 :                     ? fstat (STDIN_FILENO, &instat)
    2371           0 :                     : stat (files[i].name, &instat))
    2372           0 :                    == 0)
    2373           0 :                   && SAME_INODE (instat, outstat));
    2374             :         }
    2375             : 
    2376           0 :       if (same)
    2377             :         {
    2378             :           FILE *tftp;
    2379             :           pid_t pid;
    2380           0 :           char *temp = create_temp (&tftp, &pid);
    2381           0 :           mergefps (&files[i],0, nfiles - i, tftp, temp);
    2382           0 :           files[i].name = temp;
    2383           0 :           files[i].pid = pid;
    2384           0 :           return i + 1;
    2385             :         }
    2386             :     }
    2387             : 
    2388           0 :   return nfiles;
    2389             : }
    2390             : 
    2391             : /* Merge the input FILES.  NTEMPS is the number of files at the
    2392             :    start of FILES that are temporary; it is zero at the top level.
    2393             :    NFILES is the total number of files.  Put the output in
    2394             :    OUTPUT_FILE; a null OUTPUT_FILE stands for standard output.  */
    2395             : 
    2396             : static void
    2397           0 : merge (struct sortfile *files, size_t ntemps, size_t nfiles,
    2398             :        char const *output_file)
    2399             : {
    2400           0 :   while (NMERGE < nfiles)
    2401             :     {
    2402             :       /* Number of input files processed so far.  */
    2403             :       size_t in;
    2404             : 
    2405             :       /* Number of output files generated so far.  */
    2406             :       size_t out;
    2407             : 
    2408             :       /* nfiles % NMERGE; this counts input files that are left over
    2409             :          after all full-sized merges have been done.  */
    2410             :       size_t remainder;
    2411             : 
    2412             :       /* Number of easily-available slots at the next loop iteration.  */
    2413             :       size_t cheap_slots;
    2414             : 
    2415             :       /* Do as many NMERGE-size merges as possible.  */
    2416           0 :       for (out = in = 0; out < nfiles / NMERGE; out++, in += NMERGE)
    2417             :         {
    2418             :           FILE *tfp;
    2419             :           pid_t pid;
    2420           0 :           char *temp = create_temp (&tfp, &pid);
    2421           0 :           size_t nt = MIN (ntemps, NMERGE);
    2422           0 :           ntemps -= nt;
    2423           0 :           mergefps (&files[in], nt, NMERGE, tfp, temp);
    2424           0 :           files[out].name = temp;
    2425           0 :           files[out].pid = pid;
    2426             :         }
    2427             : 
    2428           0 :       remainder = nfiles - in;
    2429           0 :       cheap_slots = NMERGE - out % NMERGE;
    2430             : 
    2431           0 :       if (cheap_slots < remainder)
    2432             :         {
    2433             :           /* So many files remain that they can't all be put into the last
    2434             :              NMERGE-sized output window.  Do one more merge.  Merge as few
    2435             :              files as possible, to avoid needless I/O.  */
    2436           0 :           size_t nshortmerge = remainder - cheap_slots + 1;
    2437             :           FILE *tfp;
    2438             :           pid_t pid;
    2439           0 :           char *temp = create_temp (&tfp, &pid);
    2440           0 :           size_t nt = MIN (ntemps, nshortmerge);
    2441           0 :           ntemps -= nt;
    2442           0 :           mergefps (&files[in], nt, nshortmerge, tfp, temp);
    2443           0 :           files[out].name = temp;
    2444           0 :           files[out++].pid = pid;
    2445           0 :           in += nshortmerge;
    2446             :         }
    2447             : 
    2448             :       /* Put the remaining input files into the last NMERGE-sized output
    2449             :          window, so they will be merged in the next pass.  */
    2450           0 :       memmove(&files[out], &files[in], (nfiles - in) * sizeof *files);
    2451           0 :       ntemps += out;
    2452           0 :       nfiles -= in - out;
    2453             :     }
    2454             : 
    2455           0 :   nfiles = avoid_trashing_input (files, ntemps, nfiles, output_file);
    2456           0 :   mergefps (files, ntemps, nfiles, NULL, output_file);
    2457           0 : }
    2458             : 
    2459             : /* Sort NFILES FILES onto OUTPUT_FILE. */
    2460             : 
    2461             : static void
    2462          34 : sort (char * const *files, size_t nfiles, char const *output_file)
    2463             : {
    2464             :   struct buffer buf;
    2465          34 :   size_t ntemps = 0;
    2466          34 :   bool output_file_created = false;
    2467             : 
    2468          34 :   buf.alloc = 0;
    2469             : 
    2470          78 :   while (nfiles)
    2471             :     {
    2472             :       char const *temp_output;
    2473          44 :       char const *file = *files;
    2474          44 :       FILE *fp = xfopen (file, "r");
    2475             :       FILE *tfp;
    2476          36 :       size_t bytes_per_line = (2 * sizeof (struct line)
    2477             :                                - sizeof (struct line) / 2);
    2478             : 
    2479          36 :       if (! buf.alloc)
    2480          26 :         initbuf (&buf, bytes_per_line,
    2481             :                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
    2482          33 :       buf.eof = false;
    2483          33 :       files++;
    2484          33 :       nfiles--;
    2485             : 
    2486          66 :       while (fillbuf (&buf, fp, file))
    2487             :         {
    2488             :           struct line *line;
    2489             :           struct line *linebase;
    2490             : 
    2491          23 :           if (buf.eof && nfiles
    2492          20 :               && (bytes_per_line + 1
    2493          10 :                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
    2494             :             {
    2495             :               /* End of file, but there is more input and buffer room.
    2496             :                  Concatenate the next input file; this is faster in
    2497             :                  the usual case.  */
    2498          10 :               buf.left = buf.used;
    2499          10 :               break;
    2500             :             }
    2501             : 
    2502          13 :           line = buffer_linelim (&buf);
    2503          13 :           linebase = line - buf.nlines;
    2504          13 :           if (1 < buf.nlines)
    2505          13 :             sortlines (line, buf.nlines, linebase);
    2506          13 :           if (buf.eof && !nfiles && !ntemps && !buf.left)
    2507             :             {
    2508          13 :               xfclose (fp, file);
    2509          13 :               tfp = xfopen (output_file, "w");
    2510          13 :               temp_output = output_file;
    2511          13 :               output_file_created = true;
    2512             :             }
    2513             :           else
    2514             :             {
    2515           0 :               ++ntemps;
    2516           0 :               temp_output = create_temp (&tfp, NULL);
    2517             :             }
    2518             : 
    2519             :           do
    2520             :             {
    2521         102 :               line--;
    2522         102 :               write_bytes (line->text, line->length, tfp, temp_output);
    2523         102 :               if (unique)
    2524           0 :                 while (linebase < line && compare (line, line - 1) == 0)
    2525           0 :                   line--;
    2526             :             }
    2527         102 :           while (linebase < line);
    2528             : 
    2529          13 :           xfclose (tfp, temp_output);
    2530             : 
    2531             :           /* Free up some resources every once in a while.  */
    2532          13 :           if (MAX_PROCS_BEFORE_REAP < nprocs)
    2533           0 :             reap_some ();
    2534             : 
    2535          13 :           if (output_file_created)
    2536          26 :             goto finish;
    2537             :         }
    2538          10 :       xfclose (fp, file);
    2539             :     }
    2540             : 
    2541           0 :  finish:
    2542          13 :   free (buf.buf);
    2543             : 
    2544          13 :   if (! output_file_created)
    2545             :     {
    2546             :       size_t i;
    2547           0 :       struct tempnode *node = temphead;
    2548           0 :       struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles);
    2549           0 :       for (i = 0; node; i++)
    2550             :         {
    2551           0 :           tempfiles[i].name = node->name;
    2552           0 :           tempfiles[i].pid = node->pid;
    2553           0 :           node = node->next;
    2554             :         }
    2555           0 :       merge (tempfiles, ntemps, ntemps, output_file);
    2556           0 :       free (tempfiles);
    2557             :     }
    2558          13 : }
    2559             : 
    2560             : /* Insert a malloc'd copy of key KEY_ARG at the end of the key list.  */
    2561             : 
    2562             : static void
    2563           8 : insertkey (struct keyfield *key_arg)
    2564             : {
    2565             :   struct keyfield **p;
    2566           8 :   struct keyfield *key = xmemdup (key_arg, sizeof *key);
    2567             : 
    2568           8 :   for (p = &keylist; *p; p = &(*p)->next)
    2569           0 :     continue;
    2570           8 :   *p = key;
    2571           8 :   key->next = NULL;
    2572           8 : }
    2573             : 
    2574             : /* Report a bad field specification SPEC, with extra info MSGID.  */
    2575             : 
    2576             : static void badfieldspec (char const *, char const *)
    2577             :      ATTRIBUTE_NORETURN;
    2578             : static void
    2579           0 : badfieldspec (char const *spec, char const *msgid)
    2580             : {
    2581           0 :   error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
    2582             :          _(msgid), quote (spec));
    2583           0 :   abort ();
    2584             : }
    2585             : 
    2586             : /* Report incompatible options.  */
    2587             : 
    2588             : static void incompatible_options (char const *) ATTRIBUTE_NORETURN;
    2589             : static void
    2590           0 : incompatible_options (char const *opts)
    2591             : {
    2592           0 :   error (SORT_FAILURE, 0, _("options `-%s' are incompatible"), opts);
    2593           0 :   abort ();
    2594             : }
    2595             : 
    2596             : /* Check compatibility of ordering options.  */
    2597             : 
    2598             : static void
    2599          34 : check_ordering_compatibility (void)
    2600             : {
    2601             :   struct keyfield const *key;
    2602             : 
    2603          42 :   for (key = keylist; key; key = key->next)
    2604          16 :     if ((1 < (key->random + key->numeric + key->general_numeric + key->month
    2605           8 :               + !!key->ignore))
    2606           8 :         || (key->random && key->translate))
    2607             :       {
    2608             :         char opts[7];
    2609           0 :         char *p = opts;
    2610           0 :         if (key->ignore == nondictionary)
    2611           0 :           *p++ = 'd';
    2612           0 :         if (key->translate)
    2613           0 :           *p++ = 'f';
    2614           0 :         if (key->general_numeric)
    2615           0 :           *p++ = 'g';
    2616           0 :         if (key->ignore == nonprinting)
    2617           0 :           *p++ = 'i';
    2618           0 :         if (key->month)
    2619           0 :           *p++ = 'M';
    2620           0 :         if (key->numeric)
    2621           0 :           *p++ = 'n';
    2622           0 :         if (key->random)
    2623           0 :           *p++ = 'R';
    2624           0 :         *p = '\0';
    2625           0 :         incompatible_options (opts);
    2626             :       }
    2627          34 : }
    2628             : 
    2629             : /* Parse the leading integer in STRING and store the resulting value
    2630             :    (which must fit into size_t) into *VAL.  Return the address of the
    2631             :    suffix after the integer.  If the value is too large, silently
    2632             :    substitute SIZE_MAX.  If MSGID is NULL, return NULL after
    2633             :    failure; otherwise, report MSGID and exit on failure.  */
    2634             : 
    2635             : static char const *
    2636           3 : parse_field_count (char const *string, size_t *val, char const *msgid)
    2637             : {
    2638             :   char *suffix;
    2639             :   uintmax_t n;
    2640             : 
    2641           3 :   switch (xstrtoumax (string, &suffix, 10, &n, ""))
    2642             :     {
    2643           0 :     case LONGINT_OK:
    2644             :     case LONGINT_INVALID_SUFFIX_CHAR:
    2645           0 :       *val = n;
    2646           0 :       if (*val == n)
    2647           0 :         break;
    2648             :       /* Fall through.  */
    2649             :     case LONGINT_OVERFLOW:
    2650             :     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
    2651           0 :       *val = SIZE_MAX;
    2652           0 :       break;
    2653             : 
    2654           3 :     case LONGINT_INVALID:
    2655           3 :       if (msgid)
    2656           0 :         error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"),
    2657             :                _(msgid), quote (string));
    2658           3 :       return NULL;
    2659             :     }
    2660             : 
    2661           0 :   return suffix;
    2662             : }
    2663             : 
    2664             : /* Handle interrupts and hangups. */
    2665             : 
    2666             : static void
    2667           0 : sighandler (int sig)
    2668             : {
    2669             :   if (! SA_NOCLDSTOP)
    2670             :     signal (sig, SIG_IGN);
    2671             : 
    2672           0 :   cleanup ();
    2673             : 
    2674           0 :   signal (sig, SIG_DFL);
    2675           0 :   raise (sig);
    2676           0 : }
    2677             : 
    2678             : /* Set the ordering options for KEY specified in S.
    2679             :    Return the address of the first character in S that
    2680             :    is not a valid ordering option.
    2681             :    BLANKTYPE is the kind of blanks that 'b' should skip. */
    2682             : 
    2683             : static char *
    2684           8 : set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
    2685             : {
    2686          24 :   while (*s)
    2687             :     {
    2688           8 :       switch (*s)
    2689             :         {
    2690           8 :         case 'b':
    2691           8 :           if (blanktype == bl_start || blanktype == bl_both)
    2692           8 :             key->skipsblanks = true;
    2693           8 :           if (blanktype == bl_end || blanktype == bl_both)
    2694           8 :             key->skipeblanks = true;
    2695           8 :           break;
    2696           0 :         case 'd':
    2697           0 :           key->ignore = nondictionary;
    2698           0 :           break;
    2699           0 :         case 'f':
    2700           0 :           key->translate = fold_toupper;
    2701           0 :           break;
    2702           0 :         case 'g':
    2703           0 :           key->general_numeric = true;
    2704           0 :           break;
    2705           0 :         case 'i':
    2706             :           /* Option order should not matter, so don't let -i override
    2707             :              -d.  -d implies -i, but -i does not imply -d.  */
    2708           0 :           if (! key->ignore)
    2709           0 :             key->ignore = nonprinting;
    2710           0 :           break;
    2711           0 :         case 'M':
    2712           0 :           key->month = true;
    2713           0 :           break;
    2714           0 :         case 'n':
    2715           0 :           key->numeric = true;
    2716           0 :           break;
    2717           0 :         case 'R':
    2718           0 :           key->random = true;
    2719           0 :           break;
    2720           0 :         case 'r':
    2721           0 :           key->reverse = true;
    2722           0 :           break;
    2723           0 :         default:
    2724           0 :           return (char *) s;
    2725             :         }
    2726           8 :       ++s;
    2727             :     }
    2728           8 :   return (char *) s;
    2729             : }
    2730             : 
    2731             : static struct keyfield *
    2732           3 : key_init (struct keyfield *key)
    2733             : {
    2734           3 :   memset (key, 0, sizeof *key);
    2735           3 :   key->eword = SIZE_MAX;
    2736           3 :   return key;
    2737             : }
    2738             : 
    2739             : int
    2740          41 : main (int argc, char **argv)
    2741             : {
    2742             :   struct keyfield *key;
    2743             :   struct keyfield key_buf;
    2744             :   struct keyfield gkey;
    2745             :   char const *s;
    2746          41 :   int c = 0;
    2747          41 :   char checkonly = 0;
    2748          41 :   bool mergeonly = false;
    2749          41 :   char *random_source = NULL;
    2750          41 :   bool need_random = false;
    2751          41 :   size_t nfiles = 0;
    2752          41 :   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
    2753          41 :   bool obsolete_usage = (posix2_version () < 200112);
    2754             :   char **files;
    2755          41 :   char const *outfile = NULL;
    2756             : 
    2757             :   initialize_main (&argc, &argv);
    2758          41 :   program_name = argv[0];
    2759          41 :   setlocale (LC_ALL, "");
    2760             :   bindtextdomain (PACKAGE, LOCALEDIR);
    2761             :   textdomain (PACKAGE);
    2762             : 
    2763          41 :   initialize_exit_failure (SORT_FAILURE);
    2764             : 
    2765          41 :   hard_LC_COLLATE = hard_locale (LC_COLLATE);
    2766             : #if HAVE_NL_LANGINFO
    2767             :   hard_LC_TIME = hard_locale (LC_TIME);
    2768             : #endif
    2769             : 
    2770             :   /* Get locale's representation of the decimal point.  */
    2771             :   {
    2772          41 :     struct lconv const *locale = localeconv ();
    2773             : 
    2774             :     /* If the locale doesn't define a decimal point, or if the decimal
    2775             :        point is multibyte, use the C locale's decimal point.  FIXME:
    2776             :        add support for multibyte decimal points.  */
    2777          41 :     decimal_point = to_uchar (locale->decimal_point[0]);
    2778          41 :     if (! decimal_point || locale->decimal_point[1])
    2779           0 :       decimal_point = '.';
    2780             : 
    2781             :     /* FIXME: add support for multibyte thousands separators.  */
    2782          41 :     thousands_sep = to_uchar (*locale->thousands_sep);
    2783          41 :     if (! thousands_sep || locale->thousands_sep[1])
    2784          41 :       thousands_sep = -1;
    2785             :   }
    2786             : 
    2787          41 :   have_read_stdin = false;
    2788          41 :   inittables ();
    2789             : 
    2790             :   {
    2791             :     size_t i;
    2792             :     static int const sig[] =
    2793             :       {
    2794             :         /* The usual suspects.  */
    2795             :         SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
    2796             : #ifdef SIGPOLL
    2797             :         SIGPOLL,
    2798             : #endif
    2799             : #ifdef SIGPROF
    2800             :         SIGPROF,
    2801             : #endif
    2802             : #ifdef SIGVTALRM
    2803             :         SIGVTALRM,
    2804             : #endif
    2805             : #ifdef SIGXCPU
    2806             :         SIGXCPU,
    2807             : #endif
    2808             : #ifdef SIGXFSZ
    2809             :         SIGXFSZ,
    2810             : #endif
    2811             :       };
    2812             :     enum { nsigs = sizeof sig / sizeof sig[0] };
    2813             : 
    2814             : #if SA_NOCLDSTOP
    2815             :     struct sigaction act;
    2816             : 
    2817          41 :     sigemptyset (&caught_signals);
    2818         492 :     for (i = 0; i < nsigs; i++)
    2819             :       {
    2820         451 :         sigaction (sig[i], NULL, &act);
    2821         451 :         if (act.sa_handler != SIG_IGN)
    2822         451 :           sigaddset (&caught_signals, sig[i]);
    2823             :       }
    2824             : 
    2825          41 :     act.sa_handler = sighandler;
    2826          41 :     act.sa_mask = caught_signals;
    2827          41 :     act.sa_flags = 0;
    2828             : 
    2829         492 :     for (i = 0; i < nsigs; i++)
    2830         451 :       if (sigismember (&caught_signals, sig[i]))
    2831         451 :         sigaction (sig[i], &act, NULL);
    2832             : #else
    2833             :     for (i = 0; i < nsigs; i++)
    2834             :       if (signal (sig[i], SIG_IGN) != SIG_IGN)
    2835             :         {
    2836             :           signal (sig[i], sighandler);
    2837             :           siginterrupt (sig[i], 1);
    2838             :         }
    2839             : #endif
    2840             :   }
    2841             : 
    2842             :   /* The signal mask is known, so it is safe to invoke exit_cleanup.  */
    2843          41 :   atexit (exit_cleanup);
    2844             : 
    2845          41 :   gkey.sword = gkey.eword = SIZE_MAX;
    2846          41 :   gkey.ignore = NULL;
    2847          41 :   gkey.translate = NULL;
    2848          41 :   gkey.numeric = gkey.general_numeric = gkey.random = false;
    2849          41 :   gkey.month = gkey.reverse = false;
    2850          41 :   gkey.skipsblanks = gkey.skipeblanks = false;
    2851             : 
    2852          41 :   files = xnmalloc (argc, sizeof *files);
    2853             : 
    2854             :   for (;;)
    2855          62 :     {
    2856             :       /* Parse an operand as a file after "--" was seen; or if
    2857             :          pedantic and a file was seen, unless the POSIX version
    2858             :          predates 1003.1-2001 and -c was not seen and the operand is
    2859             :          "-o FILE" or "-oFILE".  */
    2860         103 :       int oi = -1;
    2861             : 
    2862         103 :       if (c == -1
    2863          76 :           || (posixly_correct && nfiles != 0
    2864           0 :               && ! (obsolete_usage
    2865           0 :                     && ! checkonly
    2866           0 :                     && optind != argc
    2867           0 :                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
    2868           0 :                     && (argv[optind][2] || optind + 1 != argc)))
    2869          76 :           || ((c = getopt_long (argc, argv, short_options,
    2870             :                                 long_options, &oi))
    2871             :               == -1))
    2872             :         {
    2873          61 :           if (argc <= optind)
    2874          34 :             break;
    2875          27 :           files[nfiles++] = argv[optind++];
    2876             :         }
    2877          42 :       else switch (c)
    2878             :         {
    2879          27 :         case 1:
    2880          27 :           key = NULL;
    2881          27 :           if (optarg[0] == '+')
    2882             :             {
    2883          26 :               bool minus_pos_usage = (optind != argc && argv[optind][0] == '-'
    2884          16 :                                       && ISDIGIT (argv[optind][1]));
    2885           9 :               obsolete_usage |= minus_pos_usage & ~posixly_correct;
    2886           9 :               if (obsolete_usage)
    2887             :                 {
    2888             :                   /* Treat +POS1 [-POS2] as a key if possible; but silently
    2889             :                      treat an operand as a file if it is not a valid +POS1.  */
    2890           3 :                   key = key_init (&key_buf);
    2891           3 :                   s = parse_field_count (optarg + 1, &key->sword, NULL);
    2892           3 :                   if (s && *s == '.')
    2893           0 :                     s = parse_field_count (s + 1, &key->schar, NULL);
    2894           3 :                   if (! (key->sword | key->schar))
    2895           3 :                     key->sword = SIZE_MAX;
    2896           3 :                   if (! s || *set_ordering (s, key, bl_start))
    2897           3 :                     key = NULL;
    2898             :                   else
    2899             :                     {
    2900           0 :                       if (minus_pos_usage)
    2901             :                         {
    2902           0 :                           char const *optarg1 = argv[optind++];
    2903           0 :                           s = parse_field_count (optarg1 + 1, &key->eword,
    2904             :                                              N_("invalid number after `-'"));
    2905           0 :                           if (*s == '.')
    2906           0 :                             s = parse_field_count (s + 1, &key->echar,
    2907             :                                                N_("invalid number after `.'"));
    2908           0 :                           if (*set_ordering (s, key, bl_end))
    2909           0 :                             badfieldspec (optarg1,
    2910             :                                       N_("stray character in field spec"));
    2911             :                         }
    2912           0 :                       insertkey (key);
    2913             :                     }
    2914             :                 }
    2915             :             }
    2916          27 :           if (! key)
    2917          27 :             files[nfiles++] = optarg;
    2918          27 :           break;
    2919             : 
    2920           0 :         case SORT_OPTION:
    2921           0 :           c = XARGMATCH ("--sort", optarg, sort_args, sort_types);
    2922             :           /* Fall through. */
    2923           8 :         case 'b':
    2924             :         case 'd':
    2925             :         case 'f':
    2926             :         case 'g':
    2927             :         case 'i':
    2928             :         case 'M':
    2929             :         case 'n':
    2930             :         case 'r':
    2931             :         case 'R':
    2932             :           {
    2933             :             char str[2];
    2934           8 :             str[0] = c;
    2935           8 :             str[1] = '\0';
    2936           8 :             set_ordering (str, &gkey, bl_both);
    2937             :           }
    2938           8 :           break;
    2939             : 
    2940           0 :         case CHECK_OPTION:
    2941           0 :           c = (optarg
    2942           0 :                ? XARGMATCH ("--check", optarg, check_args, check_types)
    2943           0 :                : 'c');
    2944             :           /* Fall through.  */
    2945           0 :         case 'c':
    2946             :         case 'C':
    2947           0 :           if (checkonly && checkonly != c)
    2948           0 :             incompatible_options ("cC");
    2949           0 :           checkonly = c;
    2950           0 :           break;
    2951             : 
    2952           0 :         case COMPRESS_PROGRAM_OPTION:
    2953           0 :           if (compress_program && !STREQ (compress_program, optarg))
    2954           0 :             error (SORT_FAILURE, 0, _("multiple compress programs specified"));
    2955           0 :           compress_program = optarg;
    2956           0 :           break;
    2957             : 
    2958           0 :         case 'k':
    2959           0 :           key = key_init (&key_buf);
    2960             : 
    2961             :           /* Get POS1. */
    2962           0 :           s = parse_field_count (optarg, &key->sword,
    2963             :                                  N_("invalid number at field start"));
    2964           0 :           if (! key->sword--)
    2965             :             {
    2966             :               /* Provoke with `sort -k0' */
    2967           0 :               badfieldspec (optarg, N_("field number is zero"));
    2968             :             }
    2969           0 :           if (*s == '.')
    2970             :             {
    2971           0 :               s = parse_field_count (s + 1, &key->schar,
    2972             :                                      N_("invalid number after `.'"));
    2973           0 :               if (! key->schar--)
    2974             :                 {
    2975             :                   /* Provoke with `sort -k1.0' */
    2976           0 :                   badfieldspec (optarg, N_("character offset is zero"));
    2977             :                 }
    2978             :             }
    2979           0 :           if (! (key->sword | key->schar))
    2980           0 :             key->sword = SIZE_MAX;
    2981           0 :           s = set_ordering (s, key, bl_start);
    2982           0 :           if (*s != ',')
    2983             :             {
    2984           0 :               key->eword = SIZE_MAX;
    2985           0 :               key->echar = 0;
    2986             :             }
    2987             :           else
    2988             :             {
    2989             :               /* Get POS2. */
    2990           0 :               s = parse_field_count (s + 1, &key->eword,
    2991             :                                      N_("invalid number after `,'"));
    2992           0 :               if (! key->eword--)
    2993             :                 {
    2994             :                   /* Provoke with `sort -k1,0' */
    2995           0 :                   badfieldspec (optarg, N_("field number is zero"));
    2996             :                 }
    2997           0 :               if (*s == '.')
    2998           0 :                 s = parse_field_count (s + 1, &key->echar,
    2999             :                                        N_("invalid number after `.'"));
    3000             :               else
    3001             :                 {
    3002             :                   /* `-k 2,3' is equivalent to `+1 -3'.  */
    3003           0 :                   key->eword++;
    3004             :                 }
    3005           0 :               s = set_ordering (s, key, bl_end);
    3006             :             }
    3007           0 :           if (*s)
    3008           0 :             badfieldspec (optarg, N_("stray character in field spec"));
    3009           0 :           insertkey (key);
    3010           0 :           break;
    3011             : 
    3012           0 :         case 'm':
    3013           0 :           mergeonly = true;
    3014           0 :           break;
    3015             : 
    3016           0 :         case 'o':
    3017           0 :           if (outfile && !STREQ (outfile, optarg))
    3018           0 :             error (SORT_FAILURE, 0, _("multiple output files specified"));
    3019           0 :           outfile = optarg;
    3020           0 :           break;
    3021             : 
    3022           0 :         case RANDOM_SOURCE_OPTION:
    3023           0 :           if (random_source && !STREQ (random_source, optarg))
    3024           0 :             error (SORT_FAILURE, 0, _("multiple random sources specified"));
    3025           0 :           random_source = optarg;
    3026           0 :           break;
    3027             : 
    3028           0 :         case 's':
    3029           0 :           stable = true;
    3030           0 :           break;
    3031             : 
    3032           0 :         case 'S':
    3033           0 :           specify_sort_size (oi, c, optarg);
    3034           0 :           break;
    3035             : 
    3036           0 :         case 't':
    3037             :           {
    3038           0 :             char newtab = optarg[0];
    3039           0 :             if (! newtab)
    3040           0 :               error (SORT_FAILURE, 0, _("empty tab"));
    3041           0 :             if (optarg[1])
    3042             :               {
    3043           0 :                 if (STREQ (optarg, "\\0"))
    3044           0 :                   newtab = '\0';
    3045             :                 else
    3046             :                   {
    3047             :                     /* Provoke with `sort -txx'.  Complain about
    3048             :                        "multi-character tab" instead of "multibyte tab", so
    3049             :                        that the diagnostic's wording does not need to be
    3050             :                        changed once multibyte characters are supported.  */
    3051           0 :                     error (SORT_FAILURE, 0, _("multi-character tab %s"),
    3052             :                            quote (optarg));
    3053             :                   }
    3054             :               }
    3055           0 :             if (tab != TAB_DEFAULT && tab != newtab)
    3056           0 :               error (SORT_FAILURE, 0, _("incompatible tabs"));
    3057           0 :             tab = newtab;
    3058             :           }
    3059           0 :           break;
    3060             : 
    3061           0 :         case 'T':
    3062           0 :           add_temp_dir (optarg);
    3063           0 :           break;
    3064             : 
    3065           0 :         case 'u':
    3066           0 :           unique = true;
    3067           0 :           break;
    3068             : 
    3069           0 :         case 'y':
    3070             :           /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x
    3071             :              through Solaris 7.  It is also accepted by many non-Solaris
    3072             :              "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5.
    3073             :              -y is marked as obsolete starting with Solaris 8 (1999), but is
    3074             :              still accepted as of Solaris 10 prerelease (2004).
    3075             : 
    3076             :              Solaris 2.5.1 "sort -y 100" reads the input file "100", but
    3077             :              emulate Solaris 8 and 9 "sort -y 100" which ignores the "100",
    3078             :              and which in general ignores the argument after "-y" if it
    3079             :              consists entirely of digits (it can even be empty).  */
    3080           0 :           if (optarg == argv[optind - 1])
    3081             :             {
    3082             :               char const *p;
    3083           0 :               for (p = optarg; ISDIGIT (*p); p++)
    3084           0 :                 continue;
    3085           0 :               optind -= (*p != '\0');
    3086             :             }
    3087           0 :           break;
    3088             : 
    3089           0 :         case 'z':
    3090           0 :           eolchar = 0;
    3091           0 :           break;
    3092             : 
    3093           0 :         case_GETOPT_HELP_CHAR;
    3094             : 
    3095           0 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
    3096             : 
    3097           7 :         default:
    3098           7 :           usage (SORT_FAILURE);
    3099             :         }
    3100             :     }
    3101             : 
    3102             :   /* Inheritance of global options to individual keys. */
    3103          34 :   for (key = keylist; key; key = key->next)
    3104             :     {
    3105           0 :       if (! (key->ignore || key->translate
    3106           0 :              || (key->skipsblanks | key->reverse
    3107           0 :                  | key->skipeblanks | key->month | key->numeric
    3108           0 :                  | key->general_numeric
    3109           0 :                  | key->random)))
    3110             :         {
    3111           0 :           key->ignore = gkey.ignore;
    3112           0 :           key->translate = gkey.translate;
    3113           0 :           key->skipsblanks = gkey.skipsblanks;
    3114           0 :           key->skipeblanks = gkey.skipeblanks;
    3115           0 :           key->month = gkey.month;
    3116           0 :           key->numeric = gkey.numeric;
    3117           0 :           key->general_numeric = gkey.general_numeric;
    3118           0 :           key->random = gkey.random;
    3119           0 :           key->reverse = gkey.reverse;
    3120             :         }
    3121             : 
    3122           0 :       need_random |= key->random;
    3123             :     }
    3124             : 
    3125          34 :   if (!keylist && (gkey.ignore || gkey.translate
    3126          68 :                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
    3127          34 :                        | gkey.numeric | gkey.general_numeric
    3128          34 :                        | gkey.random)))
    3129             :     {
    3130           8 :       insertkey (&gkey);
    3131           8 :       need_random |= gkey.random;
    3132             :     }
    3133             : 
    3134          34 :   check_ordering_compatibility ();
    3135             : 
    3136          34 :   reverse = gkey.reverse;
    3137             : 
    3138          34 :   if (need_random)
    3139             :     {
    3140           0 :       randread_source = randread_new (random_source, MD5_DIGEST_SIZE);
    3141           0 :       if (! randread_source)
    3142           0 :         die (_("open failed"), random_source);
    3143             :     }
    3144             : 
    3145          34 :   if (temp_dir_count == 0)
    3146             :     {
    3147          34 :       char const *tmp_dir = getenv ("TMPDIR");
    3148          34 :       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
    3149             :     }
    3150             : 
    3151          34 :   if (nfiles == 0)
    3152             :     {
    3153             :       static char *minus = "-";
    3154           3 :       nfiles = 1;
    3155           3 :       free (files);
    3156           3 :       files = &minus;
    3157             :     }
    3158             : 
    3159          34 :   if (checkonly)
    3160             :     {
    3161           0 :       if (nfiles > 1)
    3162           0 :         error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"),
    3163           0 :                quote (files[1]), checkonly);
    3164             : 
    3165           0 :       if (outfile)
    3166             :         {
    3167             :           static char opts[] = {0, 'o', 0};
    3168           0 :           opts[0] = checkonly;
    3169           0 :           incompatible_options (opts);
    3170             :         }
    3171             : 
    3172             :       /* POSIX requires that sort return 1 IFF invoked with -c or -C and the
    3173             :          input is not properly sorted.  */
    3174           0 :       exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
    3175             :     }
    3176             : 
    3177          34 :   if (mergeonly)
    3178             :     {
    3179           0 :       struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles);
    3180             :       size_t i;
    3181             : 
    3182           0 :       for (i = 0; i < nfiles; ++i)
    3183           0 :         sortfiles[i].name = files[i];
    3184             : 
    3185           0 :       merge (sortfiles, 0, nfiles, outfile);
    3186             :       IF_LINT (free (sortfiles));
    3187             :     }
    3188             :   else
    3189          34 :     sort (files, nfiles, outfile);
    3190             : 
    3191          13 :   if (have_read_stdin && fclose (stdin) == EOF)
    3192           0 :     die (_("close failed"), "-");
    3193             : 
    3194          13 :   exit (EXIT_SUCCESS);
    3195             : }

Generated by: LCOV version 1.10