LCOV - code coverage report
Current view: top level - src - tr.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 496 724 68.5 %
Date: 2018-01-30 Functions: 30 35 85.7 %

          Line data    Source code
       1             : /* tr -- a filter to translate characters
       2             :    Copyright (C) 91, 1995-2008 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16             : 
      17             : /* Written by Jim Meyering */
      18             : 
      19             : #include <config.h>
      20             : 
      21             : #include <stdio.h>
      22             : #include <assert.h>
      23             : #include <sys/types.h>
      24             : #include <getopt.h>
      25             : 
      26             : #include "system.h"
      27             : #include "error.h"
      28             : #include "quote.h"
      29             : #include "safe-read.h"
      30             : #include "xstrtol.h"
      31             : 
      32             : /* The official name of this program (e.g., no `g' prefix).  */
      33             : #define PROGRAM_NAME "tr"
      34             : 
      35             : #define AUTHORS "Jim Meyering"
      36             : 
      37             : enum { N_CHARS = UCHAR_MAX + 1 };
      38             : 
      39             : /* An unsigned integer type big enough to hold a repeat count or an
      40             :    unsigned character.  POSIX requires support for repeat counts as
      41             :    high as 2**31 - 1.  Since repeat counts might need to expand to
      42             :    match the length of an argument string, we need at least size_t to
      43             :    avoid arbitrary internal limits.  It doesn't cost much to use
      44             :    uintmax_t, though.  */
      45             : typedef uintmax_t count;
      46             : 
      47             : /* The value for Spec_list->state that indicates to
      48             :    get_next that it should initialize the tail pointer.
      49             :    Its value should be as large as possible to avoid conflict
      50             :    a valid value for the state field -- and that may be as
      51             :    large as any valid repeat_count.  */
      52             : #define BEGIN_STATE (UINTMAX_MAX - 1)
      53             : 
      54             : /* The value for Spec_list->state that indicates to
      55             :    get_next that the element pointed to by Spec_list->tail is
      56             :    being considered for the first time on this pass through the
      57             :    list -- it indicates that get_next should make any necessary
      58             :    initializations.  */
      59             : #define NEW_ELEMENT (BEGIN_STATE + 1)
      60             : 
      61             : /* The maximum possible repeat count.  Due to how the states are
      62             :    implemented, it can be as much as BEGIN_STATE.  */
      63             : #define REPEAT_COUNT_MAXIMUM BEGIN_STATE
      64             : 
      65             : /* The following (but not CC_NO_CLASS) are indices into the array of
      66             :    valid character class strings.  */
      67             : enum Char_class
      68             :   {
      69             :     CC_ALNUM = 0, CC_ALPHA = 1, CC_BLANK = 2, CC_CNTRL = 3,
      70             :     CC_DIGIT = 4, CC_GRAPH = 5, CC_LOWER = 6, CC_PRINT = 7,
      71             :     CC_PUNCT = 8, CC_SPACE = 9, CC_UPPER = 10, CC_XDIGIT = 11,
      72             :     CC_NO_CLASS = 9999
      73             :   };
      74             : 
      75             : /* Character class to which a character (returned by get_next) belonged;
      76             :    but it is set only if the construct from which the character was obtained
      77             :    was one of the character classes [:upper:] or [:lower:].  The value
      78             :    is used only when translating and then, only to make sure that upper
      79             :    and lower class constructs have the same relative positions in string1
      80             :    and string2.  */
      81             : enum Upper_Lower_class
      82             :   {
      83             :     UL_LOWER,
      84             :     UL_UPPER,
      85             :     UL_NONE
      86             :   };
      87             : 
      88             : /* The type of a List_element.  See build_spec_list for more details.  */
      89             : enum Range_element_type
      90             :   {
      91             :     RE_NORMAL_CHAR,
      92             :     RE_RANGE,
      93             :     RE_CHAR_CLASS,
      94             :     RE_EQUIV_CLASS,
      95             :     RE_REPEATED_CHAR
      96             :   };
      97             : 
      98             : /* One construct in one of tr's argument strings.
      99             :    For example, consider the POSIX version of the classic tr command:
     100             :        tr -cs 'a-zA-Z_' '[\n*]'
     101             :    String1 has 3 constructs, two of which are ranges (a-z and A-Z),
     102             :    and a single normal character, `_'.  String2 has one construct.  */
     103             : struct List_element
     104             :   {
     105             :     enum Range_element_type type;
     106             :     struct List_element *next;
     107             :     union
     108             :       {
     109             :         unsigned char normal_char;
     110             :         struct                  /* unnamed */
     111             :           {
     112             :             unsigned char first_char;
     113             :             unsigned char last_char;
     114             :           }
     115             :         range;
     116             :         enum Char_class char_class;
     117             :         unsigned char equiv_code;
     118             :         struct                  /* unnamed */
     119             :           {
     120             :             unsigned char the_repeated_char;
     121             :             count repeat_count;
     122             :           }
     123             :         repeated_char;
     124             :       }
     125             :     u;
     126             :   };
     127             : 
     128             : /* Each of tr's argument strings is parsed into a form that is easier
     129             :    to work with: a linked list of constructs (struct List_element).
     130             :    Each Spec_list structure also encapsulates various attributes of
     131             :    the corresponding argument string.  The attributes are used mainly
     132             :    to verify that the strings are valid in the context of any options
     133             :    specified (like -s, -d, or -c).  The main exception is the member
     134             :    `tail', which is first used to construct the list.  After construction,
     135             :    it is used by get_next to save its state when traversing the list.
     136             :    The member `state' serves a similar function.  */
     137             : struct Spec_list
     138             :   {
     139             :     /* Points to the head of the list of range elements.
     140             :        The first struct is a dummy; its members are never used.  */
     141             :     struct List_element *head;
     142             : 
     143             :     /* When appending, points to the last element.  When traversing via
     144             :        get_next(), points to the element to process next.  Setting
     145             :        Spec_list.state to the value BEGIN_STATE before calling get_next
     146             :        signals get_next to initialize tail to point to head->next.  */
     147             :     struct List_element *tail;
     148             : 
     149             :     /* Used to save state between calls to get_next.  */
     150             :     count state;
     151             : 
     152             :     /* Length, in the sense that length ('a-z[:digit:]123abc')
     153             :        is 42 ( = 26 + 10 + 6).  */
     154             :     count length;
     155             : 
     156             :     /* The number of [c*] and [c*0] constructs that appear in this spec.  */
     157             :     size_t n_indefinite_repeats;
     158             : 
     159             :     /* If n_indefinite_repeats is nonzero, this points to the List_element
     160             :        corresponding to the last [c*] or [c*0] construct encountered in
     161             :        this spec.  Otherwise it is undefined.  */
     162             :     struct List_element *indefinite_repeat_element;
     163             : 
     164             :     /* True if this spec contains at least one equivalence
     165             :        class construct e.g. [=c=].  */
     166             :     bool has_equiv_class;
     167             : 
     168             :     /* True if this spec contains at least one character class
     169             :        construct.  E.g. [:digit:].  */
     170             :     bool has_char_class;
     171             : 
     172             :     /* True if this spec contains at least one of the character class
     173             :        constructs (all but upper and lower) that aren't allowed in s2.  */
     174             :     bool has_restricted_char_class;
     175             :   };
     176             : 
     177             : /* A representation for escaped string1 or string2.  As a string is parsed,
     178             :    any backslash-escaped characters (other than octal or \a, \b, \f, \n,
     179             :    etc.) are marked as such in this structure by setting the corresponding
     180             :    entry in the ESCAPED vector.  */
     181             : struct E_string
     182             : {
     183             :   char *s;
     184             :   bool *escaped;
     185             :   size_t len;
     186             : };
     187             : 
     188             : /* Return nonzero if the Ith character of escaped string ES matches C
     189             :    and is not escaped itself.  */
     190             : static inline bool
     191          85 : es_match (struct E_string const *es, size_t i, char c)
     192             : {
     193          85 :   return es->s[i] == c && !es->escaped[i];
     194             : }
     195             : 
     196             : /* The name by which this program was run.  */
     197             : char *program_name;
     198             : 
     199             : /* When true, each sequence in the input of a repeated character
     200             :    (call it c) is replaced (in the output) by a single occurrence of c
     201             :    for every c in the squeeze set.  */
     202             : static bool squeeze_repeats = false;
     203             : 
     204             : /* When true, removes characters in the delete set from input.  */
     205             : static bool delete = false;
     206             : 
     207             : /* Use the complement of set1 in place of set1.  */
     208             : static bool complement = false;
     209             : 
     210             : /* When tr is performing translation and string1 is longer than string2,
     211             :    POSIX says that the result is unspecified.  That gives the implementor
     212             :    of a POSIX conforming version of tr two reasonable choices for the
     213             :    semantics of this case.
     214             : 
     215             :    * The BSD tr pads string2 to the length of string1 by
     216             :    repeating the last character in string2.
     217             : 
     218             :    * System V tr ignores characters in string1 that have no
     219             :    corresponding character in string2.  That is, string1 is effectively
     220             :    truncated to the length of string2.
     221             : 
     222             :    When nonzero, this flag causes GNU tr to imitate the behavior
     223             :    of System V tr when translating with string1 longer than string2.
     224             :    The default is to emulate BSD tr.  This flag is ignored in modes where
     225             :    no translation is performed.  Emulating the System V tr
     226             :    in this exceptional case causes the relatively common BSD idiom:
     227             : 
     228             :        tr -cs A-Za-z0-9 '\012'
     229             : 
     230             :    to break (it would convert only zero bytes, rather than all
     231             :    non-alphanumerics, to newlines).
     232             : 
     233             :    WARNING: This switch does not provide general BSD or System V
     234             :    compatibility.  For example, it doesn't disable the interpretation
     235             :    of the POSIX constructs [:alpha:], [=c=], and [c*10], so if by
     236             :    some unfortunate coincidence you use such constructs in scripts
     237             :    expecting to use some other version of tr, the scripts will break.  */
     238             : static bool truncate_set1 = false;
     239             : 
     240             : /* An alias for (!delete && non_option_args == 2).
     241             :    It is set in main and used there and in validate().  */
     242             : static bool translating;
     243             : 
     244             : static char io_buf[BUFSIZ];
     245             : 
     246             : static char const *const char_class_name[] =
     247             : {
     248             :   "alnum", "alpha", "blank", "cntrl", "digit", "graph",
     249             :   "lower", "print", "punct", "space", "upper", "xdigit"
     250             : };
     251             : enum { N_CHAR_CLASSES = sizeof char_class_name / sizeof char_class_name[0] };
     252             : 
     253             : /* Array of boolean values.  A character `c' is a member of the
     254             :    squeeze set if and only if in_squeeze_set[c] is true.  The squeeze
     255             :    set is defined by the last (possibly, the only) string argument
     256             :    on the command line when the squeeze option is given.  */
     257             : static bool in_squeeze_set[N_CHARS];
     258             : 
     259             : /* Array of boolean values.  A character `c' is a member of the
     260             :    delete set if and only if in_delete_set[c] is true.  The delete
     261             :    set is defined by the first (or only) string argument on the
     262             :    command line when the delete option is given.  */
     263             : static bool in_delete_set[N_CHARS];
     264             : 
     265             : /* Array of character values defining the translation (if any) that
     266             :    tr is to perform.  Translation is performed only when there are
     267             :    two specification strings and the delete switch is not given.  */
     268             : static char xlate[N_CHARS];
     269             : 
     270             : static struct option const long_options[] =
     271             : {
     272             :   {"complement", no_argument, NULL, 'c'},
     273             :   {"delete", no_argument, NULL, 'd'},
     274             :   {"squeeze-repeats", no_argument, NULL, 's'},
     275             :   {"truncate-set1", no_argument, NULL, 't'},
     276             :   {GETOPT_HELP_OPTION_DECL},
     277             :   {GETOPT_VERSION_OPTION_DECL},
     278             :   {NULL, 0, NULL, 0}
     279             : };
     280             : 
     281             : void
     282          37 : usage (int status)
     283             : {
     284          37 :   if (status != EXIT_SUCCESS)
     285          35 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     286             :              program_name);
     287             :   else
     288             :     {
     289           2 :       printf (_("\
     290             : Usage: %s [OPTION]... SET1 [SET2]\n\
     291             : "),
     292             :               program_name);
     293           2 :       fputs (_("\
     294             : Translate, squeeze, and/or delete characters from standard input,\n\
     295             : writing to standard output.\n\
     296             : \n\
     297             :   -c, -C, --complement    first complement SET1\n\
     298             :   -d, --delete            delete characters in SET1, do not translate\n\
     299             :   -s, --squeeze-repeats   replace each input sequence of a repeated character\n\
     300             :                             that is listed in SET1 with a single occurrence\n\
     301             :                             of that character\n\
     302             :   -t, --truncate-set1     first truncate SET1 to length of SET2\n\
     303             : "), stdout);
     304           2 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     305           2 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     306           2 :       fputs (_("\
     307             : \n\
     308             : SETs are specified as strings of characters.  Most represent themselves.\n\
     309             : Interpreted sequences are:\n\
     310             : \n\
     311             :   \\NNN            character with octal value NNN (1 to 3 octal digits)\n\
     312             :   \\\\              backslash\n\
     313             :   \\a              audible BEL\n\
     314             :   \\b              backspace\n\
     315             :   \\f              form feed\n\
     316             :   \\n              new line\n\
     317             :   \\r              return\n\
     318             :   \\t              horizontal tab\n\
     319             : "), stdout);
     320           2 :      fputs (_("\
     321             :   \\v              vertical tab\n\
     322             :   CHAR1-CHAR2     all characters from CHAR1 to CHAR2 in ascending order\n\
     323             :   [CHAR*]         in SET2, copies of CHAR until length of SET1\n\
     324             :   [CHAR*REPEAT]   REPEAT copies of CHAR, REPEAT octal if starting with 0\n\
     325             :   [:alnum:]       all letters and digits\n\
     326             :   [:alpha:]       all letters\n\
     327             :   [:blank:]       all horizontal whitespace\n\
     328             :   [:cntrl:]       all control characters\n\
     329             :   [:digit:]       all digits\n\
     330             : "), stdout);
     331           2 :      fputs (_("\
     332             :   [:graph:]       all printable characters, not including space\n\
     333             :   [:lower:]       all lower case letters\n\
     334             :   [:print:]       all printable characters, including space\n\
     335             :   [:punct:]       all punctuation characters\n\
     336             :   [:space:]       all horizontal or vertical whitespace\n\
     337             :   [:upper:]       all upper case letters\n\
     338             :   [:xdigit:]      all hexadecimal digits\n\
     339             :   [=CHAR=]        all characters which are equivalent to CHAR\n\
     340             : "), stdout);
     341           2 :      fputs (_("\
     342             : \n\
     343             : Translation occurs if -d is not given and both SET1 and SET2 appear.\n\
     344             : -t may be used only when translating.  SET2 is extended to length of\n\
     345             : SET1 by repeating its last character as necessary.  \
     346             : "), stdout);
     347           2 :      fputs (_("\
     348             : Excess characters\n\
     349             : of SET2 are ignored.  Only [:lower:] and [:upper:] are guaranteed to\n\
     350             : expand in ascending order; used in SET2 while translating, they may\n\
     351             : only be used in pairs to specify case conversion.  \
     352             : "), stdout);
     353           2 :      fputs (_("\
     354             : -s uses SET1 if not\n\
     355             : translating nor deleting; else squeezing uses SET2 and occurs after\n\
     356             : translation or deletion.\n\
     357             : "), stdout);
     358           2 :       emit_bug_reporting_address ();
     359             :     }
     360          37 :   exit (status);
     361             : }
     362             : 
     363             : /* Return nonzero if the character C is a member of the
     364             :    equivalence class containing the character EQUIV_CLASS.  */
     365             : 
     366             : static inline bool
     367           0 : is_equiv_class_member (unsigned char equiv_class, unsigned char c)
     368             : {
     369           0 :   return (equiv_class == c);
     370             : }
     371             : 
     372             : /* Return true if the character C is a member of the
     373             :    character class CHAR_CLASS.  */
     374             : 
     375             : static bool
     376           0 : is_char_class_member (enum Char_class char_class, unsigned char c)
     377             : {
     378             :   int result;
     379             : 
     380           0 :   switch (char_class)
     381             :     {
     382           0 :     case CC_ALNUM:
     383           0 :       result = isalnum (c);
     384           0 :       break;
     385           0 :     case CC_ALPHA:
     386           0 :       result = isalpha (c);
     387           0 :       break;
     388           0 :     case CC_BLANK:
     389           0 :       result = isblank (c);
     390           0 :       break;
     391           0 :     case CC_CNTRL:
     392           0 :       result = iscntrl (c);
     393           0 :       break;
     394           0 :     case CC_DIGIT:
     395           0 :       result = isdigit (c);
     396           0 :       break;
     397           0 :     case CC_GRAPH:
     398           0 :       result = isgraph (c);
     399           0 :       break;
     400           0 :     case CC_LOWER:
     401           0 :       result = islower (c);
     402           0 :       break;
     403           0 :     case CC_PRINT:
     404           0 :       result = isprint (c);
     405           0 :       break;
     406           0 :     case CC_PUNCT:
     407           0 :       result = ispunct (c);
     408           0 :       break;
     409           0 :     case CC_SPACE:
     410           0 :       result = isspace (c);
     411           0 :       break;
     412           0 :     case CC_UPPER:
     413           0 :       result = isupper (c);
     414           0 :       break;
     415           0 :     case CC_XDIGIT:
     416           0 :       result = isxdigit (c);
     417           0 :       break;
     418           0 :     default:
     419           0 :       abort ();
     420             :       break;
     421             :     }
     422             : 
     423           0 :   return !! result;
     424             : }
     425             : 
     426             : static void
     427         109 : es_free (struct E_string *es)
     428             : {
     429         109 :   free (es->s);
     430         109 :   free (es->escaped);
     431         109 : }
     432             : 
     433             : /* Perform the first pass over each range-spec argument S, converting all
     434             :    \c and \ddd escapes to their one-byte representations.  If an invalid
     435             :    quote sequence is found print an error message and return false;
     436             :    Otherwise set *ES to the resulting string and return true.
     437             :    The resulting array of characters may contain zero-bytes;
     438             :    however, on input, S is assumed to be null-terminated, and hence
     439             :    cannot contain actual (non-escaped) zero bytes.  */
     440             : 
     441             : static bool
     442         109 : unquote (char const *s, struct E_string *es)
     443             : {
     444             :   size_t i, j;
     445         109 :   size_t len = strlen (s);
     446             : 
     447         109 :   es->s = xmalloc (len);
     448         109 :   es->escaped = xcalloc (len, sizeof es->escaped[0]);
     449             : 
     450         109 :   j = 0;
     451         233 :   for (i = 0; s[i]; i++)
     452             :     {
     453             :       unsigned char c;
     454             :       int oct_digit;
     455             : 
     456         124 :       switch (s[i])
     457             :         {
     458          48 :         case '\\':
     459          48 :           es->escaped[j] = true;
     460          48 :           switch (s[i + 1])
     461             :             {
     462           1 :             case '\\':
     463           1 :               c = '\\';
     464           1 :               break;
     465           1 :             case 'a':
     466           1 :               c = '\a';
     467           1 :               break;
     468           1 :             case 'b':
     469           1 :               c = '\b';
     470           1 :               break;
     471           2 :             case 'f':
     472           2 :               c = '\f';
     473           2 :               break;
     474           1 :             case 'n':
     475           1 :               c = '\n';
     476           1 :               break;
     477           1 :             case 'r':
     478           1 :               c = '\r';
     479           1 :               break;
     480           1 :             case 't':
     481           1 :               c = '\t';
     482           1 :               break;
     483           2 :             case 'v':
     484           2 :               c = '\v';
     485           2 :               break;
     486          13 :             case '0':
     487             :             case '1':
     488             :             case '2':
     489             :             case '3':
     490             :             case '4':
     491             :             case '5':
     492             :             case '6':
     493             :             case '7':
     494          13 :               c = s[i + 1] - '0';
     495          13 :               oct_digit = s[i + 2] - '0';
     496          13 :               if (0 <= oct_digit && oct_digit <= 7)
     497             :                 {
     498           6 :                   c = 8 * c + oct_digit;
     499           6 :                   ++i;
     500           6 :                   oct_digit = s[i + 2] - '0';
     501           6 :                   if (0 <= oct_digit && oct_digit <= 7)
     502             :                     {
     503           5 :                       if (8 * c + oct_digit < N_CHARS)
     504             :                         {
     505           2 :                           c = 8 * c + oct_digit;
     506           2 :                           ++i;
     507             :                         }
     508             :                       else
     509             :                         {
     510             :                           /* A 3-digit octal number larger than \377 won't
     511             :                              fit in 8 bits.  So we stop when adding the
     512             :                              next digit would put us over the limit and
     513             :                              give a warning about the ambiguity.  POSIX
     514             :                              isn't clear on this, and we interpret this
     515             :                              lack of clarity as meaning the resulting behavior
     516             :                              is undefined, which means we're allowed to issue
     517             :                              a warning.  */
     518          18 :                           error (0, 0, _("warning: the ambiguous octal escape \
     519             : \\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"),
     520           9 :                                  s[i], s[i + 1], s[i + 2],
     521           9 :                                  s[i], s[i + 1], s[i + 2]);
     522             :                         }
     523             :                     }
     524             :                 }
     525          13 :               break;
     526          23 :             case '\0':
     527          23 :               error (0, 0, _("warning: an unescaped backslash "
     528             :                              "at end of string is not portable"));
     529             :               /* POSIX is not clear about this.  */
     530          23 :               es->escaped[j] = false;
     531          23 :               i--;
     532          23 :               c = '\\';
     533          23 :               break;
     534           2 :             default:
     535           2 :               c = s[i + 1];
     536           2 :               break;
     537             :             }
     538          48 :           ++i;
     539          48 :           es->s[j++] = c;
     540          48 :           break;
     541          76 :         default:
     542          76 :           es->s[j++] = s[i];
     543          76 :           break;
     544             :         }
     545             :     }
     546         109 :   es->len = j;
     547         109 :   return true;
     548             : }
     549             : 
     550             : /* If CLASS_STR is a valid character class string, return its index
     551             :    in the global char_class_name array.  Otherwise, return CC_NO_CLASS.  */
     552             : 
     553             : static enum Char_class
     554           2 : look_up_char_class (char const *class_str, size_t len)
     555             : {
     556             :   enum Char_class i;
     557             : 
     558          26 :   for (i = 0; i < N_CHAR_CLASSES; i++)
     559          24 :     if (strncmp (class_str, char_class_name[i], len) == 0
     560           2 :         && strlen (char_class_name[i]) == len)
     561           0 :       return i;
     562           2 :   return CC_NO_CLASS;
     563             : }
     564             : 
     565             : /* Return a newly allocated string with a printable version of C.
     566             :    This function is used solely for formatting error messages.  */
     567             : 
     568             : static char *
     569           6 : make_printable_char (unsigned char c)
     570             : {
     571           6 :   char *buf = xmalloc (5);
     572             : 
     573           6 :   if (isprint (c))
     574             :     {
     575           3 :       buf[0] = c;
     576           3 :       buf[1] = '\0';
     577             :     }
     578             :   else
     579             :     {
     580           3 :       sprintf (buf, "\\%03o", c);
     581             :     }
     582           6 :   return buf;
     583             : }
     584             : 
     585             : /* Return a newly allocated copy of S which is suitable for printing.
     586             :    LEN is the number of characters in S.  Most non-printing
     587             :    (isprint) characters are represented by a backslash followed by
     588             :    3 octal digits.  However, the characters represented by \c escapes
     589             :    where c is one of [abfnrtv] are represented by their 2-character \c
     590             :    sequences.  This function is used solely for printing error messages.  */
     591             : 
     592             : static char *
     593           2 : make_printable_str (char const *s, size_t len)
     594             : {
     595             :   /* Worst case is that every character expands to a backslash
     596             :      followed by a 3-character octal escape sequence.  */
     597           2 :   char *printable_buf = xnmalloc (len + 1, 4);
     598           2 :   char *p = printable_buf;
     599             :   size_t i;
     600             : 
     601           4 :   for (i = 0; i < len; i++)
     602             :     {
     603             :       char buf[5];
     604           2 :       char const *tmp = NULL;
     605           2 :       unsigned char c = s[i];
     606             : 
     607           2 :       switch (c)
     608             :         {
     609           0 :         case '\\':
     610           0 :           tmp = "\\";
     611           0 :           break;
     612           0 :         case '\a':
     613           0 :           tmp = "\\a";
     614           0 :           break;
     615           0 :         case '\b':
     616           0 :           tmp = "\\b";
     617           0 :           break;
     618           0 :         case '\f':
     619           0 :           tmp = "\\f";
     620           0 :           break;
     621           0 :         case '\n':
     622           0 :           tmp = "\\n";
     623           0 :           break;
     624           0 :         case '\r':
     625           0 :           tmp = "\\r";
     626           0 :           break;
     627           0 :         case '\t':
     628           0 :           tmp = "\\t";
     629           0 :           break;
     630           0 :         case '\v':
     631           0 :           tmp = "\\v";
     632           0 :           break;
     633           2 :         default:
     634           2 :           if (isprint (c))
     635             :             {
     636           2 :               buf[0] = c;
     637           2 :               buf[1] = '\0';
     638             :             }
     639             :           else
     640           0 :             sprintf (buf, "\\%03o", c);
     641           2 :           tmp = buf;
     642           2 :           break;
     643             :         }
     644           2 :       p = stpcpy (p, tmp);
     645             :     }
     646           2 :   return printable_buf;
     647             : }
     648             : 
     649             : /* Append a newly allocated structure representing a
     650             :    character C to the specification list LIST.  */
     651             : 
     652             : static void
     653          83 : append_normal_char (struct Spec_list *list, unsigned char c)
     654             : {
     655             :   struct List_element *new;
     656             : 
     657          83 :   new = xmalloc (sizeof *new);
     658          83 :   new->next = NULL;
     659          83 :   new->type = RE_NORMAL_CHAR;
     660          83 :   new->u.normal_char = c;
     661          83 :   assert (list->tail);
     662          83 :   list->tail->next = new;
     663          83 :   list->tail = new;
     664          83 : }
     665             : 
     666             : /* Append a newly allocated structure representing the range
     667             :    of characters from FIRST to LAST to the specification list LIST.
     668             :    Return false if LAST precedes FIRST in the collating sequence,
     669             :    true otherwise.  This means that '[c-c]' is acceptable.  */
     670             : 
     671             : static bool
     672           6 : append_range (struct Spec_list *list, unsigned char first, unsigned char last)
     673             : {
     674             :   struct List_element *new;
     675             : 
     676           6 :   if (last < first)
     677             :     {
     678           3 :       char *tmp1 = make_printable_char (first);
     679           3 :       char *tmp2 = make_printable_char (last);
     680             : 
     681           3 :       error (0, 0,
     682             :        _("range-endpoints of `%s-%s' are in reverse collating sequence order"),
     683             :              tmp1, tmp2);
     684           3 :       free (tmp1);
     685           3 :       free (tmp2);
     686           3 :       return false;
     687             :     }
     688           3 :   new = xmalloc (sizeof *new);
     689           3 :   new->next = NULL;
     690           3 :   new->type = RE_RANGE;
     691           3 :   new->u.range.first_char = first;
     692           3 :   new->u.range.last_char = last;
     693           3 :   assert (list->tail);
     694           3 :   list->tail->next = new;
     695           3 :   list->tail = new;
     696           3 :   return true;
     697             : }
     698             : 
     699             : /* If CHAR_CLASS_STR is a valid character class string, append a
     700             :    newly allocated structure representing that character class to the end
     701             :    of the specification list LIST and return true.  If CHAR_CLASS_STR is not
     702             :    a valid string return false.  */
     703             : 
     704             : static bool
     705           2 : append_char_class (struct Spec_list *list,
     706             :                    char const *char_class_str, size_t len)
     707             : {
     708             :   enum Char_class char_class;
     709             :   struct List_element *new;
     710             : 
     711           2 :   char_class = look_up_char_class (char_class_str, len);
     712           2 :   if (char_class == CC_NO_CLASS)
     713           2 :     return false;
     714           0 :   new = xmalloc (sizeof *new);
     715           0 :   new->next = NULL;
     716           0 :   new->type = RE_CHAR_CLASS;
     717           0 :   new->u.char_class = char_class;
     718           0 :   assert (list->tail);
     719           0 :   list->tail->next = new;
     720           0 :   list->tail = new;
     721           0 :   return true;
     722             : }
     723             : 
     724             : /* Append a newly allocated structure representing a [c*n]
     725             :    repeated character construct to the specification list LIST.
     726             :    THE_CHAR is the single character to be repeated, and REPEAT_COUNT
     727             :    is a non-negative repeat count.  */
     728             : 
     729             : static void
     730           6 : append_repeated_char (struct Spec_list *list, unsigned char the_char,
     731             :                       count repeat_count)
     732             : {
     733             :   struct List_element *new;
     734             : 
     735           6 :   new = xmalloc (sizeof *new);
     736           6 :   new->next = NULL;
     737           6 :   new->type = RE_REPEATED_CHAR;
     738           6 :   new->u.repeated_char.the_repeated_char = the_char;
     739           6 :   new->u.repeated_char.repeat_count = repeat_count;
     740           6 :   assert (list->tail);
     741           6 :   list->tail->next = new;
     742           6 :   list->tail = new;
     743           6 : }
     744             : 
     745             : /* Given a string, EQUIV_CLASS_STR, from a [=str=] context and
     746             :    the length of that string, LEN, if LEN is exactly one, append
     747             :    a newly allocated structure representing the specified
     748             :    equivalence class to the specification list, LIST and return true.
     749             :    If LEN is not 1, return false.  */
     750             : 
     751             : static bool
     752           0 : append_equiv_class (struct Spec_list *list,
     753             :                     char const *equiv_class_str, size_t len)
     754             : {
     755             :   struct List_element *new;
     756             : 
     757           0 :   if (len != 1)
     758           0 :     return false;
     759           0 :   new = xmalloc (sizeof *new);
     760           0 :   new->next = NULL;
     761           0 :   new->type = RE_EQUIV_CLASS;
     762           0 :   new->u.equiv_code = *equiv_class_str;
     763           0 :   assert (list->tail);
     764           0 :   list->tail->next = new;
     765           0 :   list->tail = new;
     766           0 :   return true;
     767             : }
     768             : 
     769             : /* Search forward starting at START_IDX for the 2-char sequence
     770             :    (PRE_BRACKET_CHAR,']') in the string P of length P_LEN.  If such
     771             :    a sequence is found, set *RESULT_IDX to the index of the first
     772             :    character and return true.  Otherwise return false.  P may contain
     773             :    zero bytes.  */
     774             : 
     775             : static bool
     776          11 : find_closing_delim (const struct E_string *es, size_t start_idx,
     777             :                     char pre_bracket_char, size_t *result_idx)
     778             : {
     779             :   size_t i;
     780             : 
     781          19 :   for (i = start_idx; i < es->len - 1; i++)
     782          12 :     if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']'
     783           5 :         && !es->escaped[i] && !es->escaped[i + 1])
     784             :       {
     785           4 :         *result_idx = i;
     786           4 :         return true;
     787             :       }
     788           7 :   return false;
     789             : }
     790             : 
     791             : /* Parse the bracketed repeat-char syntax.  If the P_LEN characters
     792             :    beginning with P[ START_IDX ] comprise a valid [c*n] construct,
     793             :    then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX
     794             :    and return zero. If the second character following
     795             :    the opening bracket is not `*' or if no closing bracket can be
     796             :    found, return -1.  If a closing bracket is found and the
     797             :    second char is `*', but the string between the `*' and `]' isn't
     798             :    empty, an octal number, or a decimal number, print an error message
     799             :    and return -2.  */
     800             : 
     801             : static int
     802          11 : find_bracketed_repeat (const struct E_string *es, size_t start_idx,
     803             :                        unsigned char *char_to_repeat, count *repeat_count,
     804             :                        size_t *closing_bracket_idx)
     805             : {
     806             :   size_t i;
     807             : 
     808          11 :   assert (start_idx + 1 < es->len);
     809          11 :   if (!es_match (es, start_idx + 1, '*'))
     810           7 :     return -1;
     811             : 
     812           5 :   for (i = start_idx + 2; i < es->len && !es->escaped[i]; i++)
     813             :     {
     814           2 :       if (es->s[i] == ']')
     815             :         {
     816           1 :           size_t digit_str_len = i - start_idx - 2;
     817             : 
     818           1 :           *char_to_repeat = es->s[start_idx];
     819           1 :           if (digit_str_len == 0)
     820             :             {
     821             :               /* We've matched [c*] -- no explicit repeat count.  */
     822           1 :               *repeat_count = 0;
     823             :             }
     824             :           else
     825             :             {
     826             :               /* Here, we have found [c*s] where s should be a string
     827             :                  of octal (if it starts with `0') or decimal digits.  */
     828           0 :               char const *digit_str = &es->s[start_idx + 2];
     829             :               char *d_end;
     830           0 :               if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10,
     831             :                                repeat_count, NULL)
     832             :                    != LONGINT_OK)
     833           0 :                   || REPEAT_COUNT_MAXIMUM < *repeat_count
     834           0 :                   || digit_str + digit_str_len != d_end)
     835             :                 {
     836           0 :                   char *tmp = make_printable_str (digit_str, digit_str_len);
     837           0 :                   error (0, 0,
     838             :                          _("invalid repeat count %s in [c*n] construct"),
     839             :                          quote (tmp));
     840           0 :                   free (tmp);
     841           0 :                   return -2;
     842             :                 }
     843             :             }
     844           1 :           *closing_bracket_idx = i;
     845           1 :           return 0;
     846             :         }
     847             :     }
     848           3 :   return -1;                    /* No bracket found.  */
     849             : }
     850             : 
     851             : /* Return true if the string at ES->s[IDX] matches the regular
     852             :    expression `\*[0-9]*\]', false otherwise.  The string does not
     853             :    match if any of its characters are escaped.  */
     854             : 
     855             : static bool
     856           2 : star_digits_closebracket (const struct E_string *es, size_t idx)
     857             : {
     858             :   size_t i;
     859             : 
     860           2 :   if (!es_match (es, idx, '*'))
     861           2 :     return false;
     862             : 
     863           0 :   for (i = idx + 1; i < es->len; i++)
     864           0 :     if (!ISDIGIT (to_uchar (es->s[i])) || es->escaped[i])
     865           0 :       return es_match (es, i, ']');
     866           0 :   return false;
     867             : }
     868             : 
     869             : /* Convert string UNESCAPED_STRING (which has been preprocessed to
     870             :    convert backslash-escape sequences) of length LEN characters into
     871             :    a linked list of the following 5 types of constructs:
     872             :       - [:str:] Character class where `str' is one of the 12 valid strings.
     873             :       - [=c=] Equivalence class where `c' is any single character.
     874             :       - [c*n] Repeat the single character `c' `n' times. n may be omitted.
     875             :           However, if `n' is present, it must be a non-negative octal or
     876             :           decimal integer.
     877             :       - r-s Range of characters from `r' to `s'.  The second endpoint must
     878             :           not precede the first in the current collating sequence.
     879             :       - c Any other character is interpreted as itself.  */
     880             : 
     881             : static bool
     882         109 : build_spec_list (const struct E_string *es, struct Spec_list *result)
     883             : {
     884             :   char const *p;
     885             :   size_t i;
     886             : 
     887         109 :   p = es->s;
     888             : 
     889             :   /* The main for-loop below recognizes the 4 multi-character constructs.
     890             :      A character that matches (in its context) none of the multi-character
     891             :      constructs is classified as `normal'.  Since all multi-character
     892             :      constructs have at least 3 characters, any strings of length 2 or
     893             :      less are composed solely of normal characters.  Hence, the index of
     894             :      the outer for-loop runs only as far as LEN-2.  */
     895             : 
     896         239 :   for (i = 0; i + 2 < es->len; /* empty */)
     897             :     {
     898          28 :       if (es_match (es, i, '['))
     899             :         {
     900             :           bool matched_multi_char_construct;
     901             :           size_t closing_bracket_idx;
     902             :           unsigned char char_to_repeat;
     903             :           count repeat_count;
     904             :           int err;
     905             : 
     906          15 :           matched_multi_char_construct = true;
     907          15 :           if (es_match (es, i + 1, ':') || es_match (es, i + 1, '='))
     908             :             {
     909             :               size_t closing_delim_idx;
     910             : 
     911          11 :               if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx))
     912             :                 {
     913           4 :                   size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
     914           4 :                   char const *opnd_str = p + i + 2;
     915             : 
     916           4 :                   if (opnd_str_len == 0)
     917             :                     {
     918           2 :                       if (p[i + 1] == ':')
     919           1 :                         error (0, 0, _("missing character class name `[::]'"));
     920             :                       else
     921           1 :                         error (0, 0,
     922             :                                _("missing equivalence class character `[==]'"));
     923           6 :                       return false;
     924             :                     }
     925             : 
     926           2 :                   if (p[i + 1] == ':')
     927             :                     {
     928             :                       /* FIXME: big comment.  */
     929           2 :                       if (!append_char_class (result, opnd_str, opnd_str_len))
     930             :                         {
     931           2 :                           if (star_digits_closebracket (es, i + 2))
     932           0 :                             goto try_bracketed_repeat;
     933             :                           else
     934             :                             {
     935           2 :                               char *tmp = make_printable_str (opnd_str,
     936             :                                                               opnd_str_len);
     937           2 :                               error (0, 0, _("invalid character class %s"),
     938             :                                      quote (tmp));
     939           2 :                               free (tmp);
     940           2 :                               return false;
     941             :                             }
     942             :                         }
     943             :                     }
     944             :                   else
     945             :                     {
     946             :                       /* FIXME: big comment.  */
     947           0 :                       if (!append_equiv_class (result, opnd_str, opnd_str_len))
     948             :                         {
     949           0 :                           if (star_digits_closebracket (es, i + 2))
     950           0 :                             goto try_bracketed_repeat;
     951             :                           else
     952             :                             {
     953           0 :                               char *tmp = make_printable_str (opnd_str,
     954             :                                                               opnd_str_len);
     955           0 :                               error (0, 0,
     956             :                _("%s: equivalence class operand must be a single character"),
     957             :                                      tmp);
     958           0 :                               free (tmp);
     959           0 :                               return false;
     960             :                             }
     961             :                         }
     962             :                     }
     963             : 
     964           0 :                   i = closing_delim_idx + 2;
     965           0 :                   continue;
     966             :                 }
     967             :               /* Else fall through.  This could be [:*] or [=*].  */
     968             :             }
     969             : 
     970          15 :         try_bracketed_repeat:
     971             : 
     972             :           /* Determine whether this is a bracketed repeat range
     973             :              matching the RE \[.\*(dec_or_oct_number)?\].  */
     974          11 :           err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
     975             :                                        &repeat_count,
     976             :                                        &closing_bracket_idx);
     977          11 :           if (err == 0)
     978             :             {
     979           1 :               append_repeated_char (result, char_to_repeat, repeat_count);
     980           1 :               i = closing_bracket_idx + 1;
     981             :             }
     982          10 :           else if (err == -1)
     983             :             {
     984          10 :               matched_multi_char_construct = false;
     985             :             }
     986             :           else
     987             :             {
     988             :               /* Found a string that looked like [c*n] but the
     989             :                  numeric part was invalid.  */
     990           0 :               return false;
     991             :             }
     992             : 
     993          11 :           if (matched_multi_char_construct)
     994           1 :             continue;
     995             : 
     996             :           /* We reach this point if P does not match [:str:], [=c=],
     997             :              [c*n], or [c*].  Now, see if P looks like a range `[-c'
     998             :              (from `[' to `c').  */
     999             :         }
    1000             : 
    1001             :       /* Look ahead one char for ranges like a-z.  */
    1002          23 :       if (es_match (es, i + 1, '-'))
    1003             :         {
    1004           6 :           if (!append_range (result, p[i], p[i + 2]))
    1005           3 :             return false;
    1006           3 :           i += 3;
    1007             :         }
    1008             :       else
    1009             :         {
    1010          17 :           append_normal_char (result, p[i]);
    1011          17 :           ++i;
    1012             :         }
    1013             :     }
    1014             : 
    1015             :   /* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1].  */
    1016         168 :   for (; i < es->len; i++)
    1017          66 :     append_normal_char (result, p[i]);
    1018             : 
    1019         102 :   return true;
    1020             : }
    1021             : 
    1022             : /* Advance past the current construct.
    1023             :    S->tail must be non-NULL.  */
    1024             : static void
    1025           0 : skip_construct (struct Spec_list *s)
    1026             : {
    1027           0 :   s->tail = s->tail->next;
    1028           0 :   s->state = NEW_ELEMENT;
    1029           0 : }
    1030             : 
    1031             : /* Given a Spec_list S (with its saved state implicit in the values
    1032             :    of its members `tail' and `state'), return the next single character
    1033             :    in the expansion of S's constructs.  If the last character of S was
    1034             :    returned on the previous call or if S was empty, this function
    1035             :    returns -1.  For example, successive calls to get_next where S
    1036             :    represents the spec-string 'a-d[y*3]' will return the sequence
    1037             :    of values a, b, c, d, y, y, y, -1.  Finally, if the construct from
    1038             :    which the returned character comes is [:upper:] or [:lower:], the
    1039             :    parameter CLASS is given a value to indicate which it was.  Otherwise
    1040             :    CLASS is set to UL_NONE.  This value is used only when constructing
    1041             :    the translation table to verify that any occurrences of upper and
    1042             :    lower class constructs in the spec-strings appear in the same relative
    1043             :    positions.  */
    1044             : 
    1045             : static int
    1046         851 : get_next (struct Spec_list *s, enum Upper_Lower_class *class)
    1047             : {
    1048             :   struct List_element *p;
    1049             :   int return_val;
    1050             :   int i;
    1051             : 
    1052         851 :   if (class)
    1053          36 :     *class = UL_NONE;
    1054             : 
    1055         851 :   if (s->state == BEGIN_STATE)
    1056             :     {
    1057          60 :       s->tail = s->head->next;
    1058          60 :       s->state = NEW_ELEMENT;
    1059             :     }
    1060             : 
    1061         851 :   p = s->tail;
    1062         851 :   if (p == NULL)
    1063          49 :     return -1;
    1064             : 
    1065         802 :   switch (p->type)
    1066             :     {
    1067          34 :     case RE_NORMAL_CHAR:
    1068          34 :       return_val = p->u.normal_char;
    1069          34 :       s->state = NEW_ELEMENT;
    1070          34 :       s->tail = p->next;
    1071          34 :       break;
    1072             : 
    1073           2 :     case RE_RANGE:
    1074           2 :       if (s->state == NEW_ELEMENT)
    1075           1 :         s->state = p->u.range.first_char;
    1076             :       else
    1077           1 :         ++(s->state);
    1078           2 :       return_val = s->state;
    1079           2 :       if (s->state == p->u.range.last_char)
    1080             :         {
    1081           1 :           s->tail = p->next;
    1082           1 :           s->state = NEW_ELEMENT;
    1083             :         }
    1084           2 :       break;
    1085             : 
    1086           0 :     case RE_CHAR_CLASS:
    1087           0 :       if (class)
    1088             :         {
    1089           0 :           switch (p->u.char_class)
    1090             :             {
    1091           0 :             case CC_LOWER:
    1092           0 :               *class = UL_LOWER;
    1093           0 :               break;
    1094           0 :             case CC_UPPER:
    1095           0 :               *class = UL_UPPER;
    1096           0 :               break;
    1097           0 :             default:
    1098           0 :               break;
    1099             :             }
    1100           0 :         }
    1101             : 
    1102           0 :       if (s->state == NEW_ELEMENT)
    1103             :         {
    1104           0 :           for (i = 0; i < N_CHARS; i++)
    1105           0 :             if (is_char_class_member (p->u.char_class, i))
    1106           0 :               break;
    1107           0 :           assert (i < N_CHARS);
    1108           0 :           s->state = i;
    1109             :         }
    1110           0 :       assert (is_char_class_member (p->u.char_class, s->state));
    1111           0 :       return_val = s->state;
    1112           0 :       for (i = s->state + 1; i < N_CHARS; i++)
    1113           0 :         if (is_char_class_member (p->u.char_class, i))
    1114           0 :           break;
    1115           0 :       if (i < N_CHARS)
    1116           0 :         s->state = i;
    1117             :       else
    1118             :         {
    1119           0 :           s->tail = p->next;
    1120           0 :           s->state = NEW_ELEMENT;
    1121             :         }
    1122           0 :       break;
    1123             : 
    1124           0 :     case RE_EQUIV_CLASS:
    1125             :       /* FIXME: this assumes that each character is alone in its own
    1126             :          equivalence class (which appears to be correct for my
    1127             :          LC_COLLATE.  But I don't know of any function that allows
    1128             :          one to determine a character's equivalence class.  */
    1129             : 
    1130           0 :       return_val = p->u.equiv_code;
    1131           0 :       s->state = NEW_ELEMENT;
    1132           0 :       s->tail = p->next;
    1133           0 :       break;
    1134             : 
    1135         766 :     case RE_REPEATED_CHAR:
    1136             :       /* Here, a repeat count of n == 0 means don't repeat at all.  */
    1137         766 :       if (p->u.repeated_char.repeat_count == 0)
    1138             :         {
    1139           0 :           s->tail = p->next;
    1140           0 :           s->state = NEW_ELEMENT;
    1141           0 :           return_val = get_next (s, class);
    1142             :         }
    1143             :       else
    1144             :         {
    1145         766 :           if (s->state == NEW_ELEMENT)
    1146             :             {
    1147           5 :               s->state = 0;
    1148             :             }
    1149         766 :           ++(s->state);
    1150         766 :           return_val = p->u.repeated_char.the_repeated_char;
    1151         766 :           if (s->state == p->u.repeated_char.repeat_count)
    1152             :             {
    1153           5 :               s->tail = p->next;
    1154           5 :               s->state = NEW_ELEMENT;
    1155             :             }
    1156             :         }
    1157         766 :       break;
    1158             : 
    1159           0 :     default:
    1160           0 :       abort ();
    1161             :       break;
    1162             :     }
    1163             : 
    1164         802 :   return return_val;
    1165             : }
    1166             : 
    1167             : /* This is a minor kludge.  This function is called from
    1168             :    get_spec_stats to determine the cardinality of a set derived
    1169             :    from a complemented string.  It's a kludge in that some of the
    1170             :    same operations are (duplicated) performed in set_initialize.  */
    1171             : 
    1172             : static int
    1173           9 : card_of_complement (struct Spec_list *s)
    1174             : {
    1175             :   int c;
    1176           9 :   int cardinality = N_CHARS;
    1177           9 :   bool in_set[N_CHARS] = { 0, };
    1178             : 
    1179           9 :   s->state = BEGIN_STATE;
    1180          22 :   while ((c = get_next (s, NULL)) != -1)
    1181             :     {
    1182           4 :       cardinality -= (!in_set[c]);
    1183           4 :       in_set[c] = true;
    1184             :     }
    1185           9 :   return cardinality;
    1186             : }
    1187             : 
    1188             : /* Gather statistics about the spec-list S in preparation for the tests
    1189             :    in validate that determine the consistency of the specs.  This function
    1190             :    is called at most twice; once for string1, and again for any string2.
    1191             :    LEN_S1 < 0 indicates that this is the first call and that S represents
    1192             :    string1.  When LEN_S1 >= 0, it is the length of the expansion of the
    1193             :    constructs in string1, and we can use its value to resolve any
    1194             :    indefinite repeat construct in S (which represents string2).  Hence,
    1195             :    this function has the side-effect that it converts a valid [c*]
    1196             :    construct in string2 to [c*n] where n is large enough (or 0) to give
    1197             :    string2 the same length as string1.  For example, with the command
    1198             :    tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would
    1199             :    be 26 and S (representing string2) would be converted to 'A[\n*24]Z'.  */
    1200             : 
    1201             : static void
    1202         101 : get_spec_stats (struct Spec_list *s)
    1203             : {
    1204             :   struct List_element *p;
    1205         101 :   count length = 0;
    1206             : 
    1207         101 :   s->n_indefinite_repeats = 0;
    1208         101 :   s->has_equiv_class = false;
    1209         101 :   s->has_restricted_char_class = false;
    1210         101 :   s->has_char_class = false;
    1211         188 :   for (p = s->head->next; p; p = p->next)
    1212             :     {
    1213             :       int i;
    1214          87 :       count len = 0;
    1215             :       count new_length;
    1216             : 
    1217          87 :       switch (p->type)
    1218             :         {
    1219          83 :         case RE_NORMAL_CHAR:
    1220          83 :           len = 1;
    1221          83 :           break;
    1222             : 
    1223           3 :         case RE_RANGE:
    1224           3 :           assert (p->u.range.last_char >= p->u.range.first_char);
    1225           3 :           len = p->u.range.last_char - p->u.range.first_char + 1;
    1226           3 :           break;
    1227             : 
    1228           0 :         case RE_CHAR_CLASS:
    1229           0 :           s->has_char_class = true;
    1230           0 :           for (i = 0; i < N_CHARS; i++)
    1231           0 :             if (is_char_class_member (p->u.char_class, i))
    1232           0 :               ++len;
    1233           0 :           switch (p->u.char_class)
    1234             :             {
    1235           0 :             case CC_UPPER:
    1236             :             case CC_LOWER:
    1237           0 :               break;
    1238           0 :             default:
    1239           0 :               s->has_restricted_char_class = true;
    1240           0 :               break;
    1241             :             }
    1242           0 :           break;
    1243             : 
    1244           0 :         case RE_EQUIV_CLASS:
    1245           0 :           for (i = 0; i < N_CHARS; i++)
    1246           0 :             if (is_equiv_class_member (p->u.equiv_code, i))
    1247           0 :               ++len;
    1248           0 :           s->has_equiv_class = true;
    1249           0 :           break;
    1250             : 
    1251           1 :         case RE_REPEATED_CHAR:
    1252           1 :           if (p->u.repeated_char.repeat_count > 0)
    1253           0 :             len = p->u.repeated_char.repeat_count;
    1254             :           else
    1255             :             {
    1256           1 :               s->indefinite_repeat_element = p;
    1257           1 :               ++(s->n_indefinite_repeats);
    1258             :             }
    1259           1 :           break;
    1260             : 
    1261           0 :         default:
    1262           0 :           abort ();
    1263             :           break;
    1264             :         }
    1265             : 
    1266             :       /* Check for arithmetic overflow in computing length.  Also, reject
    1267             :          any length greater than the maximum repeat count, in case the
    1268             :          length is later used to compute the repeat count for an
    1269             :          indefinite element.  */
    1270          87 :       new_length = length + len;
    1271          87 :       if (! (length <= new_length && new_length <= REPEAT_COUNT_MAXIMUM))
    1272           0 :         error (EXIT_FAILURE, 0, _("too many characters in set"));
    1273          87 :       length = new_length;
    1274             :     }
    1275             : 
    1276         101 :   s->length = length;
    1277         101 : }
    1278             : 
    1279             : static void
    1280          58 : get_s1_spec_stats (struct Spec_list *s1)
    1281             : {
    1282          58 :   get_spec_stats (s1);
    1283          58 :   if (complement)
    1284           9 :     s1->length = card_of_complement (s1);
    1285          58 : }
    1286             : 
    1287             : static void
    1288          43 : get_s2_spec_stats (struct Spec_list *s2, count len_s1)
    1289             : {
    1290          43 :   get_spec_stats (s2);
    1291          43 :   if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1)
    1292             :     {
    1293           0 :       s2->indefinite_repeat_element->u.repeated_char.repeat_count =
    1294           0 :         len_s1 - s2->length;
    1295           0 :       s2->length = len_s1;
    1296             :     }
    1297          43 : }
    1298             : 
    1299             : static void
    1300         109 : spec_init (struct Spec_list *spec_list)
    1301             : {
    1302         109 :   struct List_element *new = xmalloc (sizeof *new);
    1303         109 :   spec_list->head = spec_list->tail = new;
    1304         109 :   spec_list->head->next = NULL;
    1305         109 : }
    1306             : 
    1307             : /* This function makes two passes over the argument string S.  The first
    1308             :    one converts all \c and \ddd escapes to their one-byte representations.
    1309             :    The second constructs a linked specification list, SPEC_LIST, of the
    1310             :    characters and constructs that comprise the argument string.  If either
    1311             :    of these passes detects an error, this function returns false.  */
    1312             : 
    1313             : static bool
    1314         109 : parse_str (char const *s, struct Spec_list *spec_list)
    1315             : {
    1316             :   struct E_string es;
    1317         109 :   bool ok = unquote (s, &es) && build_spec_list (&es, spec_list);
    1318         109 :   es_free (&es);
    1319         109 :   return ok;
    1320             : }
    1321             : 
    1322             : /* Given two specification lists, S1 and S2, and assuming that
    1323             :    S1->length > S2->length, append a single [c*n] element to S2 where c
    1324             :    is the last character in the expansion of S2 and n is the difference
    1325             :    between the two lengths.
    1326             :    Upon successful completion, S2->length is set to S1->length.  The only
    1327             :    way this function can fail to make S2 as long as S1 is when S2 has
    1328             :    zero-length, since in that case, there is no last character to repeat.
    1329             :    So S2->length is required to be at least 1.
    1330             : 
    1331             :    Providing this functionality allows the user to do some pretty
    1332             :    non-BSD (and non-portable) things:  For example, the command
    1333             :        tr -cs '[:upper:]0-9' '[:lower:]'
    1334             :    is almost guaranteed to give results that depend on your collating
    1335             :    sequence.  */
    1336             : 
    1337             : static void
    1338           5 : string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
    1339             : {
    1340             :   struct List_element *p;
    1341             :   unsigned char char_to_repeat;
    1342             :   int i;
    1343             : 
    1344           5 :   assert (translating);
    1345           5 :   assert (s1->length > s2->length);
    1346           5 :   assert (s2->length > 0);
    1347             : 
    1348           5 :   p = s2->tail;
    1349           5 :   switch (p->type)
    1350             :     {
    1351           5 :     case RE_NORMAL_CHAR:
    1352           5 :       char_to_repeat = p->u.normal_char;
    1353           5 :       break;
    1354           0 :     case RE_RANGE:
    1355           0 :       char_to_repeat = p->u.range.last_char;
    1356           0 :       break;
    1357           0 :     case RE_CHAR_CLASS:
    1358           0 :       for (i = N_CHARS - 1; i >= 0; i--)
    1359           0 :         if (is_char_class_member (p->u.char_class, i))
    1360           0 :           break;
    1361           0 :       assert (i >= 0);
    1362           0 :       char_to_repeat = i;
    1363           0 :       break;
    1364             : 
    1365           0 :     case RE_REPEATED_CHAR:
    1366           0 :       char_to_repeat = p->u.repeated_char.the_repeated_char;
    1367           0 :       break;
    1368             : 
    1369           0 :     case RE_EQUIV_CLASS:
    1370             :       /* This shouldn't happen, because validate exits with an error
    1371             :          if it finds an equiv class in string2 when translating.  */
    1372           0 :       abort ();
    1373             :       break;
    1374             : 
    1375           0 :     default:
    1376           0 :       abort ();
    1377             :       break;
    1378             :     }
    1379             : 
    1380           5 :   append_repeated_char (s2, char_to_repeat, s1->length - s2->length);
    1381           5 :   s2->length = s1->length;
    1382           5 : }
    1383             : 
    1384             : /* Return true if S is a non-empty list in which exactly one
    1385             :    character (but potentially, many instances of it) appears.
    1386             :    E.g., [X*] or xxxxxxxx.  */
    1387             : 
    1388             : static bool
    1389           0 : homogeneous_spec_list (struct Spec_list *s)
    1390             : {
    1391             :   int b, c;
    1392             : 
    1393           0 :   s->state = BEGIN_STATE;
    1394             : 
    1395           0 :   if ((b = get_next (s, NULL)) == -1)
    1396           0 :     return false;
    1397             : 
    1398           0 :   while ((c = get_next (s, NULL)) != -1)
    1399           0 :     if (c != b)
    1400           0 :       return false;
    1401             : 
    1402           0 :   return true;
    1403             : }
    1404             : 
    1405             : /* Die with an error message if S1 and S2 describe strings that
    1406             :    are not valid with the given command line switches.
    1407             :    A side effect of this function is that if a valid [c*] or
    1408             :    [c*0] construct appears in string2, it is converted to [c*n]
    1409             :    with a value for n that makes s2->length == s1->length.  By
    1410             :    the same token, if the --truncate-set1 option is not
    1411             :    given, S2 may be extended.  */
    1412             : 
    1413             : static void
    1414          58 : validate (struct Spec_list *s1, struct Spec_list *s2)
    1415             : {
    1416          58 :   get_s1_spec_stats (s1);
    1417          58 :   if (s1->n_indefinite_repeats > 0)
    1418             :     {
    1419           1 :       error (EXIT_FAILURE, 0,
    1420             :              _("the [c*] repeat construct may not appear in string1"));
    1421             :     }
    1422             : 
    1423          57 :   if (s2)
    1424             :     {
    1425          43 :       get_s2_spec_stats (s2, s1->length);
    1426             : 
    1427          43 :       if (s2->n_indefinite_repeats > 1)
    1428             :         {
    1429           0 :           error (EXIT_FAILURE, 0,
    1430             :                  _("only one [c*] repeat construct may appear in string2"));
    1431             :         }
    1432             : 
    1433          43 :       if (translating)
    1434             :         {
    1435          42 :           if (s2->has_equiv_class)
    1436             :             {
    1437           0 :               error (EXIT_FAILURE, 0,
    1438             :                      _("[=c=] expressions may not appear in string2 \
    1439             : when translating"));
    1440             :             }
    1441             : 
    1442          42 :           if (s1->length > s2->length)
    1443             :             {
    1444          32 :               if (!truncate_set1)
    1445             :                 {
    1446             :                   /* string2 must be non-empty unless --truncate-set1 is
    1447             :                      given or string1 is empty.  */
    1448             : 
    1449          30 :                   if (s2->length == 0)
    1450          25 :                     error (EXIT_FAILURE, 0,
    1451             :                      _("when not truncating set1, string2 must be non-empty"));
    1452           5 :                   string2_extend (s1, s2);
    1453             :                 }
    1454             :             }
    1455             : 
    1456          17 :           if (complement && s1->has_char_class
    1457           0 :               && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
    1458             :             {
    1459           0 :               error (EXIT_FAILURE, 0,
    1460             :                      _("when translating with complemented character classes,\
    1461             : \nstring2 must map all characters in the domain to one"));
    1462             :             }
    1463             : 
    1464          17 :           if (s2->has_restricted_char_class)
    1465             :             {
    1466           0 :               error (EXIT_FAILURE, 0,
    1467             :                      _("when translating, the only character classes that may \
    1468             : appear in\nstring2 are `upper' and `lower'"));
    1469             :             }
    1470             :         }
    1471             :       else
    1472             :         /* Not translating.  */
    1473             :         {
    1474           1 :           if (s2->n_indefinite_repeats > 0)
    1475           0 :             error (EXIT_FAILURE, 0,
    1476             :                    _("the [c*] construct may appear in string2 only \
    1477             : when translating"));
    1478             :         }
    1479             :     }
    1480          32 : }
    1481             : 
    1482             : /* Read buffers of SIZE bytes via the function READER (if READER is
    1483             :    NULL, read from stdin) until EOF.  When non-NULL, READER is either
    1484             :    read_and_delete or read_and_xlate.  After each buffer is read, it is
    1485             :    processed and written to stdout.  The buffers are processed so that
    1486             :    multiple consecutive occurrences of the same character in the input
    1487             :    stream are replaced by a single occurrence of that character if the
    1488             :    character is in the squeeze set.  */
    1489             : 
    1490             : static void
    1491          11 : squeeze_filter (char *buf, size_t size, size_t (*reader) (char *, size_t))
    1492             : {
    1493             :   /* A value distinct from any character that may have been stored in a
    1494             :      buffer as the result of a block-read in the function squeeze_filter.  */
    1495             :   enum { NOT_A_CHAR = CHAR_MAX + 1 };
    1496             : 
    1497          11 :   int char_to_squeeze = NOT_A_CHAR;
    1498          11 :   size_t i = 0;
    1499          11 :   size_t nr = 0;
    1500             : 
    1501             :   for (;;)
    1502          13 :     {
    1503             :       size_t begin;
    1504             : 
    1505          24 :       if (i >= nr)
    1506             :         {
    1507          22 :           nr = reader (buf, size);
    1508          22 :           if (nr == 0)
    1509          11 :             break;
    1510          11 :           i = 0;
    1511             :         }
    1512             : 
    1513          13 :       begin = i;
    1514             : 
    1515          13 :       if (char_to_squeeze == NOT_A_CHAR)
    1516             :         {
    1517             :           size_t out_len;
    1518             :           /* Here, by being a little tricky, we can get a significant
    1519             :              performance increase in most cases when the input is
    1520             :              reasonably large.  Since tr will modify the input only
    1521             :              if two consecutive (and identical) input characters are
    1522             :              in the squeeze set, we can step by two through the data
    1523             :              when searching for a character in the squeeze set.  This
    1524             :              means there may be a little more work in a few cases and
    1525             :              perhaps twice as much work in the worst cases where most
    1526             :              of the input is removed by squeezing repeats.  But most
    1527             :              uses of this functionality seem to remove less than 20-30%
    1528             :              of the input.  */
    1529          43 :           for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2)
    1530          30 :             continue;
    1531             : 
    1532             :           /* There is a special case when i == nr and we've just
    1533             :              skipped a character (the last one in buf) that is in
    1534             :              the squeeze set.  */
    1535          13 :           if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])])
    1536           2 :             --i;
    1537             : 
    1538          13 :           if (i >= nr)
    1539           6 :             out_len = nr - begin;
    1540             :           else
    1541             :             {
    1542           7 :               char_to_squeeze = buf[i];
    1543             :               /* We're about to output buf[begin..i].  */
    1544           7 :               out_len = i - begin + 1;
    1545             : 
    1546             :               /* But since we stepped by 2 in the loop above,
    1547             :                  out_len may be one too large.  */
    1548           7 :               if (i > 0 && buf[i - 1] == char_to_squeeze)
    1549           2 :                 --out_len;
    1550             : 
    1551             :               /* Advance i to the index of first character to be
    1552             :                  considered when looking for a char different from
    1553             :                  char_to_squeeze.  */
    1554           7 :               ++i;
    1555             :             }
    1556          13 :           if (out_len > 0
    1557          13 :               && fwrite (&buf[begin], 1, out_len, stdout) != out_len)
    1558           0 :             error (EXIT_FAILURE, errno, _("write error"));
    1559             :         }
    1560             : 
    1561          13 :       if (char_to_squeeze != NOT_A_CHAR)
    1562             :         {
    1563             :           /* Advance i to index of first char != char_to_squeeze
    1564             :              (or to nr if all the rest of the characters in this
    1565             :              buffer are the same as char_to_squeeze).  */
    1566          32 :           for (; i < nr && buf[i] == char_to_squeeze; i++)
    1567          25 :             continue;
    1568           7 :           if (i < nr)
    1569           2 :             char_to_squeeze = NOT_A_CHAR;
    1570             :           /* If (i >= nr) we've squeezed the last character in this buffer.
    1571             :              So now we have to read a new buffer and continue comparing
    1572             :              characters against char_to_squeeze.  */
    1573             :         }
    1574             :     }
    1575          11 : }
    1576             : 
    1577             : static size_t
    1578          64 : plain_read (char *buf, size_t size)
    1579             : {
    1580          64 :   size_t nr = safe_read (STDIN_FILENO, buf, size);
    1581          64 :   if (nr == SAFE_READ_ERROR)
    1582           0 :     error (EXIT_FAILURE, errno, _("read error"));
    1583          64 :   return nr;
    1584             : }
    1585             : 
    1586             : /* Read buffers of SIZE bytes from stdin until one is found that
    1587             :    contains at least one character not in the delete set.  Store
    1588             :    in the array BUF, all characters from that buffer that are not
    1589             :    in the delete set, and return the number of characters saved
    1590             :    or 0 upon EOF.  */
    1591             : 
    1592             : static size_t
    1593          12 : read_and_delete (char *buf, size_t size)
    1594             : {
    1595             :   size_t n_saved;
    1596             : 
    1597             :   /* This enclosing do-while loop is to make sure that
    1598             :      we don't return zero (indicating EOF) when we've
    1599             :      just deleted all the characters in a buffer.  */
    1600             :   do
    1601             :     {
    1602             :       size_t i;
    1603          12 :       size_t nr = plain_read (buf, size);
    1604             : 
    1605          12 :       if (nr == 0)
    1606           6 :         return 0;
    1607             : 
    1608             :       /* This first loop may be a waste of code, but gives much
    1609             :          better performance when no characters are deleted in
    1610             :          the beginning of a buffer.  It just avoids the copying
    1611             :          of buf[i] into buf[n_saved] when it would be a NOP.  */
    1612             : 
    1613          32 :       for (i = 0; i < nr && !in_delete_set[to_uchar (buf[i])]; i++)
    1614          26 :         continue;
    1615           6 :       n_saved = i;
    1616             : 
    1617          24 :       for (++i; i < nr; i++)
    1618          18 :         if (!in_delete_set[to_uchar (buf[i])])
    1619          10 :           buf[n_saved++] = buf[i];
    1620             :     }
    1621           6 :   while (n_saved == 0);
    1622             : 
    1623           5 :   return n_saved;
    1624             : }
    1625             : 
    1626             : /* Read at most SIZE bytes from stdin into the array BUF.  Then
    1627             :    perform the in-place and one-to-one mapping specified by the global
    1628             :    array `xlate'.  Return the number of characters read, or 0 upon EOF.  */
    1629             : 
    1630             : static size_t
    1631          34 : read_and_xlate (char *buf, size_t size)
    1632             : {
    1633          34 :   size_t bytes_read = plain_read (buf, size);
    1634             :   size_t i;
    1635             : 
    1636         170 :   for (i = 0; i < bytes_read; i++)
    1637         136 :     buf[i] = xlate[to_uchar (buf[i])];
    1638             : 
    1639          34 :   return bytes_read;
    1640             : }
    1641             : 
    1642             : /* Initialize a boolean membership set, IN_SET, with the character
    1643             :    values obtained by traversing the linked list of constructs S
    1644             :    using the function `get_next'.  IN_SET is expected to have been
    1645             :    initialized to all zeros by the caller.  If COMPLEMENT_THIS_SET
    1646             :    is true the resulting set is complemented.  */
    1647             : 
    1648             : static void
    1649          21 : set_initialize (struct Spec_list *s, bool complement_this_set, bool *in_set)
    1650             : {
    1651             :   int c;
    1652             :   size_t i;
    1653             : 
    1654          21 :   s->state = BEGIN_STATE;
    1655          55 :   while ((c = get_next (s, NULL)) != -1)
    1656          13 :     in_set[c] = true;
    1657          21 :   if (complement_this_set)
    1658         514 :     for (i = 0; i < N_CHARS; i++)
    1659         512 :       in_set[i] = (!in_set[i]);
    1660          21 : }
    1661             : 
    1662             : int
    1663         103 : main (int argc, char **argv)
    1664             : {
    1665             :   int c;
    1666             :   int non_option_args;
    1667             :   int min_operands;
    1668             :   int max_operands;
    1669             :   struct Spec_list buf1, buf2;
    1670         103 :   struct Spec_list *s1 = &buf1;
    1671         103 :   struct Spec_list *s2 = &buf2;
    1672             : 
    1673             :   initialize_main (&argc, &argv);
    1674         103 :   program_name = argv[0];
    1675         103 :   setlocale (LC_ALL, "");
    1676             :   bindtextdomain (PACKAGE, LOCALEDIR);
    1677             :   textdomain (PACKAGE);
    1678             : 
    1679         103 :   atexit (close_stdout);
    1680             : 
    1681         245 :   while ((c = getopt_long (argc, argv, "+cCdst", long_options, NULL)) != -1)
    1682             :     {
    1683          48 :       switch (c)
    1684             :         {
    1685          15 :         case 'c':
    1686             :         case 'C':
    1687          15 :           complement = true;
    1688          15 :           break;
    1689             : 
    1690           9 :         case 'd':
    1691           9 :           delete = true;
    1692           9 :           break;
    1693             : 
    1694          12 :         case 's':
    1695          12 :           squeeze_repeats = true;
    1696          12 :           break;
    1697             : 
    1698           3 :         case 't':
    1699           3 :           truncate_set1 = true;
    1700           3 :           break;
    1701             : 
    1702           2 :         case_GETOPT_HELP_CHAR;
    1703             : 
    1704           1 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
    1705             : 
    1706           6 :         default:
    1707           6 :           usage (EXIT_FAILURE);
    1708           0 :           break;
    1709             :         }
    1710             :     }
    1711             : 
    1712          94 :   non_option_args = argc - optind;
    1713          94 :   translating = (non_option_args == 2 && !delete);
    1714          94 :   min_operands = 1 + (delete == squeeze_repeats);
    1715          94 :   max_operands = 1 + (delete <= squeeze_repeats);
    1716             : 
    1717          94 :   if (non_option_args < min_operands)
    1718             :     {
    1719          22 :       if (non_option_args == 0)
    1720           8 :         error (0, 0, _("missing operand"));
    1721             :       else
    1722             :         {
    1723          14 :           error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
    1724          14 :           fprintf (stderr, "%s\n",
    1725          14 :                    _(squeeze_repeats
    1726             :                      ? ("Two strings must be given when "
    1727             :                         "both deleting and squeezing repeats.")
    1728             :                      : "Two strings must be given when translating."));
    1729             :         }
    1730          22 :       usage (EXIT_FAILURE);
    1731             :     }
    1732             : 
    1733          72 :   if (max_operands < non_option_args)
    1734             :     {
    1735           7 :       error (0, 0, _("extra operand %s"), quote (argv[optind + max_operands]));
    1736           7 :       if (non_option_args == 2)
    1737           1 :         fprintf (stderr, "%s\n",
    1738             :                  _("Only one string may be given when "
    1739             :                    "deleting without squeezing repeats."));
    1740           7 :       usage (EXIT_FAILURE);
    1741             :     }
    1742             : 
    1743          65 :   spec_init (s1);
    1744          65 :   if (!parse_str (argv[optind], s1))
    1745           7 :     exit (EXIT_FAILURE);
    1746             : 
    1747          58 :   if (non_option_args == 2)
    1748             :     {
    1749          44 :       spec_init (s2);
    1750          44 :       if (!parse_str (argv[optind + 1], s2))
    1751           0 :         exit (EXIT_FAILURE);
    1752             :     }
    1753             :   else
    1754          14 :     s2 = NULL;
    1755             : 
    1756          58 :   validate (s1, s2);
    1757             : 
    1758             :   /* Use binary I/O, since `tr' is sometimes used to transliterate
    1759             :      non-printable characters, or characters which are stripped away
    1760             :      by text-mode reads (like CR and ^Z).  */
    1761             :   if (O_BINARY && ! isatty (STDIN_FILENO))
    1762             :     freopen (NULL, "rb", stdin);
    1763             :   if (O_BINARY && ! isatty (STDOUT_FILENO))
    1764             :     freopen (NULL, "wb", stdout);
    1765             : 
    1766          32 :   if (squeeze_repeats && non_option_args == 1)
    1767             :     {
    1768           9 :       set_initialize (s1, complement, in_squeeze_set);
    1769           9 :       squeeze_filter (io_buf, sizeof io_buf, plain_read);
    1770             :     }
    1771          28 :   else if (delete && non_option_args == 1)
    1772             :     {
    1773           5 :       set_initialize (s1, complement, in_delete_set);
    1774             : 
    1775             :       for (;;)
    1776           4 :         {
    1777           9 :           size_t nr = read_and_delete (io_buf, sizeof io_buf);
    1778           9 :           if (nr == 0)
    1779           5 :             break;
    1780           4 :           if (fwrite (io_buf, 1, nr, stdout) != nr)
    1781           0 :             error (EXIT_FAILURE, errno, _("write error"));
    1782             :         }
    1783             :     }
    1784          18 :   else if (squeeze_repeats && delete && non_option_args == 2)
    1785             :     {
    1786           1 :       set_initialize (s1, complement, in_delete_set);
    1787           1 :       set_initialize (s2, false, in_squeeze_set);
    1788           1 :       squeeze_filter (io_buf, sizeof io_buf, read_and_delete);
    1789             :     }
    1790          17 :   else if (translating)
    1791             :     {
    1792          17 :       if (complement)
    1793             :         {
    1794             :           int i;
    1795           4 :           bool *in_s1 = in_delete_set;
    1796             : 
    1797           4 :           set_initialize (s1, false, in_s1);
    1798           4 :           s2->state = BEGIN_STATE;
    1799        1028 :           for (i = 0; i < N_CHARS; i++)
    1800        1024 :             xlate[i] = i;
    1801         772 :           for (i = 0; i < N_CHARS; i++)
    1802             :             {
    1803         769 :               if (!in_s1[i])
    1804             :                 {
    1805         768 :                   int ch = get_next (s2, NULL);
    1806         768 :                   assert (ch != -1 || truncate_set1);
    1807         768 :                   if (ch == -1)
    1808             :                     {
    1809             :                       /* This will happen when tr is invoked like e.g.
    1810             :                          tr -cs A-Za-z0-9 '\012'.  */
    1811           1 :                       break;
    1812             :                     }
    1813         767 :                   xlate[i] = ch;
    1814             :                 }
    1815             :             }
    1816             :         }
    1817             :       else
    1818             :         {
    1819             :           int c1, c2;
    1820             :           int i;
    1821          13 :           bool case_convert = false;
    1822             :           enum Upper_Lower_class class_s1;
    1823             :           enum Upper_Lower_class class_s2;
    1824             : 
    1825        3341 :           for (i = 0; i < N_CHARS; i++)
    1826        3328 :             xlate[i] = i;
    1827          13 :           s1->state = BEGIN_STATE;
    1828          13 :           s2->state = BEGIN_STATE;
    1829             :           for (;;)
    1830             :             {
    1831             :               /* When the previous pair identified case-converting classes,
    1832             :                  advance S1 and S2 so that each points to the following
    1833             :                  construct.  */
    1834          23 :               if (case_convert)
    1835             :                 {
    1836           0 :                   skip_construct (s1);
    1837           0 :                   skip_construct (s2);
    1838           0 :                   case_convert = false;
    1839             :                 }
    1840             : 
    1841          18 :               c1 = get_next (s1, &class_s1);
    1842          18 :               c2 = get_next (s2, &class_s2);
    1843             : 
    1844             :               /* When translating and there is an [:upper:] or [:lower:]
    1845             :                  class in SET2, then there must be a corresponding [:lower:]
    1846             :                  or [:upper:] class in SET1.  */
    1847          18 :               if (class_s1 == UL_NONE
    1848          18 :                   && (class_s2 == UL_LOWER || class_s2 == UL_UPPER))
    1849           0 :                 error (EXIT_FAILURE, 0,
    1850             :                        _("misaligned [:upper:] and/or [:lower:] construct"));
    1851             : 
    1852          18 :               if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
    1853             :                 {
    1854           0 :                   case_convert = true;
    1855           0 :                   for (i = 0; i < N_CHARS; i++)
    1856           0 :                     if (islower (i))
    1857           0 :                       xlate[i] = toupper (i);
    1858             :                 }
    1859          18 :               else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
    1860             :                 {
    1861           0 :                   case_convert = true;
    1862           0 :                   for (i = 0; i < N_CHARS; i++)
    1863           0 :                     if (isupper (i))
    1864           0 :                       xlate[i] = tolower (i);
    1865             :                 }
    1866          18 :               else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
    1867          18 :                        || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
    1868             :                 {
    1869             :                   /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
    1870             :                      is undefined.  Treat it as a no-op.  */
    1871             :                 }
    1872             :               else
    1873             :                 {
    1874             :                   /* The following should have been checked by validate...  */
    1875          18 :                   if (c1 == -1 || c2 == -1)
    1876             :                     break;
    1877           5 :                   xlate[c1] = c2;
    1878             :                 }
    1879             :             }
    1880          13 :           assert (c1 == -1 || truncate_set1);
    1881             :         }
    1882          17 :       if (squeeze_repeats)
    1883             :         {
    1884           1 :           set_initialize (s2, false, in_squeeze_set);
    1885           1 :           squeeze_filter (io_buf, sizeof io_buf, read_and_xlate);
    1886             :         }
    1887             :       else
    1888             :         {
    1889             :           for (;;)
    1890          16 :             {
    1891          32 :               size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf);
    1892          32 :               if (bytes_read == 0)
    1893          16 :                 break;
    1894          16 :               if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read)
    1895           0 :                 error (EXIT_FAILURE, errno, _("write error"));
    1896             :             }
    1897             :         }
    1898             :     }
    1899             : 
    1900          32 :   if (close (STDIN_FILENO) != 0)
    1901           0 :     error (EXIT_FAILURE, errno, _("standard input"));
    1902             : 
    1903          32 :   exit (EXIT_SUCCESS);
    1904             : }

Generated by: LCOV version 1.10