LCOV - code coverage report
Current view: top level - src - ptx.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 440 779 56.5 %
Date: 2018-01-30 Functions: 18 22 81.8 %

          Line data    Source code
       1             : /* Permuted index for GNU, with keywords in their context.
       2             :    Copyright (C) 1990, 1991, 1993, 1998-2008 Free Software Foundation, Inc.
       3             :    François Pinard <pinard@iro.umontreal.ca>, 1988.
       4             : 
       5             :    This program is free software: you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation, either version 3 of the License, or
       8             :    (at your option) any later version.
       9             : 
      10             :    This program is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU General Public License
      16             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      17             : 
      18             :    François Pinard <pinard@iro.umontreal.ca> */
      19             : 
      20             : #include <config.h>
      21             : 
      22             : #include <stdio.h>
      23             : #include <getopt.h>
      24             : #include <sys/types.h>
      25             : #include "system.h"
      26             : #include "argmatch.h"
      27             : #include "diacrit.h"
      28             : #include "error.h"
      29             : #include "quote.h"
      30             : #include "quotearg.h"
      31             : #include "regex.h"
      32             : #include "xstrtol.h"
      33             : 
      34             : /* The official name of this program (e.g., no `g' prefix).  */
      35             : #define PROGRAM_NAME "ptx"
      36             : 
      37             : /* Note to translator: Please translate "F. Pinard" to "François
      38             :    Pinard" if "ç" (c-with-cedilla) is available in the
      39             :    translation's character set and encoding.  */
      40             : #define AUTHORS _("F. Pinard")
      41             : 
      42             : /* Number of possible characters in a byte.  */
      43             : #define CHAR_SET_SIZE 256
      44             : 
      45             : #define ISODIGIT(C) ((C) >= '0' && (C) <= '7')
      46             : #define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \
      47             :                      : (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0')
      48             : #define OCTTOBIN(C) ((C) - '0')
      49             : 
      50             : /* Debugging the memory allocator.  */
      51             : 
      52             : #if WITH_DMALLOC
      53             : # define MALLOC_FUNC_CHECK 1
      54             : # include <dmalloc.h>
      55             : #endif
      56             : 
      57             : /* Global definitions.  */
      58             : 
      59             : /* FIXME: There are many unchecked integer overflows in this file,
      60             :    that will cause this command to misbehave given large inputs or
      61             :    options.  Many of the "int" values below should be "size_t" or
      62             :    something else like that.  */
      63             : 
      64             : /* Reallocation step when swallowing non regular files.  The value is not
      65             :    the actual reallocation step, but its base two logarithm.  */
      66             : #define SWALLOW_REALLOC_LOG 12
      67             : 
      68             : /* Imported from "regex.c".  */
      69             : #define Sword 1
      70             : 
      71             : /* The name this program was run with. */
      72             : char *program_name;
      73             : 
      74             : /* Program options.  */
      75             : 
      76             : enum Format
      77             : {
      78             :   UNKNOWN_FORMAT,               /* output format still unknown */
      79             :   DUMB_FORMAT,                  /* output for a dumb terminal */
      80             :   ROFF_FORMAT,                  /* output for `troff' or `nroff' */
      81             :   TEX_FORMAT                    /* output for `TeX' or `LaTeX' */
      82             : };
      83             : 
      84             : static bool gnu_extensions = true;      /* trigger all GNU extensions */
      85             : static bool auto_reference = false;     /* refs are `file_name:line_number:' */
      86             : static bool input_reference = false;    /* refs at beginning of input lines */
      87             : static bool right_reference = false;    /* output refs after right context  */
      88             : static int line_width = 72;     /* output line width in characters */
      89             : static int gap_size = 3;        /* number of spaces between output fields */
      90             : static const char *truncation_string = "/";
      91             :                                 /* string used to mark line truncations */
      92             : static const char *macro_name = "xx"; /* macro name for roff or TeX output */
      93             : static enum Format output_format = UNKNOWN_FORMAT;
      94             :                                 /* output format */
      95             : 
      96             : static bool ignore_case = false;        /* fold lower to upper for sorting */
      97             : static const char *break_file = NULL;   /* name of the `Break characters' file */
      98             : static const char *only_file = NULL;    /* name of the `Only words' file */
      99             : static const char *ignore_file = NULL;  /* name of the `Ignore words' file */
     100             : 
     101             : /* Options that use regular expressions.  */
     102             : struct regex_data
     103             : {
     104             :   /* The original regular expression, as a string.  */
     105             :   char const *string;
     106             : 
     107             :   /* The compiled regular expression, and its fastmap.  */
     108             :   struct re_pattern_buffer pattern;
     109             :   char fastmap[UCHAR_MAX + 1];
     110             : };
     111             : 
     112             : static struct regex_data context_regex; /* end of context */
     113             : static struct regex_data word_regex;    /* keyword */
     114             : 
     115             : /* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
     116             :    whole file.  A WORD is something smaller, its length should fit in a
     117             :    short integer.  A WORD_TABLE may contain several WORDs.  */
     118             : 
     119             : typedef struct
     120             :   {
     121             :     char *start;                /* pointer to beginning of region */
     122             :     char *end;                  /* pointer to end + 1 of region */
     123             :   }
     124             : BLOCK;
     125             : 
     126             : typedef struct
     127             :   {
     128             :     char *start;                /* pointer to beginning of region */
     129             :     short int size;             /* length of the region */
     130             :   }
     131             : WORD;
     132             : 
     133             : typedef struct
     134             :   {
     135             :     WORD *start;                /* array of WORDs */
     136             :     size_t alloc;               /* allocated length */
     137             :     size_t length;              /* number of used entries */
     138             :   }
     139             : WORD_TABLE;
     140             : 
     141             : /* Pattern description tables.  */
     142             : 
     143             : /* For each character, provide its folded equivalent.  */
     144             : static unsigned char folded_chars[CHAR_SET_SIZE];
     145             : 
     146             : /* End of context pattern register indices.  */
     147             : static struct re_registers context_regs;
     148             : 
     149             : /* Keyword pattern register indices.  */
     150             : static struct re_registers word_regs;
     151             : 
     152             : /* A word characters fastmap is used only when no word regexp has been
     153             :    provided.  A word is then made up of a sequence of one or more characters
     154             :    allowed by the fastmap.  Contains !0 if character allowed in word.  Not
     155             :    only this is faster in most cases, but it simplifies the implementation
     156             :    of the Break files.  */
     157             : static char word_fastmap[CHAR_SET_SIZE];
     158             : 
     159             : /* Maximum length of any word read.  */
     160             : static int maximum_word_length;
     161             : 
     162             : /* Maximum width of any reference used.  */
     163             : static int reference_max_width;
     164             : 
     165             : /* Ignore and Only word tables.  */
     166             : 
     167             : static WORD_TABLE ignore_table; /* table of words to ignore */
     168             : static WORD_TABLE only_table;           /* table of words to select */
     169             : 
     170             : /* Source text table, and scanning macros.  */
     171             : 
     172             : static int number_input_files;  /* number of text input files */
     173             : static int total_line_count;    /* total number of lines seen so far */
     174             : static const char **input_file_name;    /* array of text input file names */
     175             : static int *file_line_count;    /* array of `total_line_count' values at end */
     176             : 
     177             : static BLOCK text_buffer;       /* file to study */
     178             : 
     179             : /* SKIP_NON_WHITE used only for getting or skipping the reference.  */
     180             : 
     181             : #define SKIP_NON_WHITE(cursor, limit) \
     182             :   while (cursor < limit && ! isspace (to_uchar (*cursor)))           \
     183             :     cursor++
     184             : 
     185             : #define SKIP_WHITE(cursor, limit) \
     186             :   while (cursor < limit && isspace (to_uchar (*cursor)))             \
     187             :     cursor++
     188             : 
     189             : #define SKIP_WHITE_BACKWARDS(cursor, start) \
     190             :   while (cursor > start && isspace (to_uchar (cursor[-1])))          \
     191             :     cursor--
     192             : 
     193             : #define SKIP_SOMETHING(cursor, limit) \
     194             :   if (word_regex.string)                                                \
     195             :     {                                                                   \
     196             :       regoff_t count;                                                   \
     197             :       count = re_match (&word_regex.pattern, cursor, limit - cursor, 0, NULL); \
     198             :       if (count == -2)                                                  \
     199             :         matcher_error ();                                               \
     200             :       cursor += count == -1 ? 1 : count;                                \
     201             :     }                                                                   \
     202             :   else if (word_fastmap[to_uchar (*cursor)])                            \
     203             :     while (cursor < limit && word_fastmap[to_uchar (*cursor)])               \
     204             :       cursor++;                                                         \
     205             :   else                                                                  \
     206             :     cursor++
     207             : 
     208             : /* Occurrences table.
     209             : 
     210             :    The `keyword' pointer provides the central word, which is surrounded
     211             :    by a left context and a right context.  The `keyword' and `length'
     212             :    field allow full 8-bit characters keys, even including NULs.  At other
     213             :    places in this program, the name `keyafter' refers to the keyword
     214             :    followed by its right context.
     215             : 
     216             :    The left context does not extend, towards the beginning of the file,
     217             :    further than a distance given by the `left' value.  This value is
     218             :    relative to the keyword beginning, it is usually negative.  This
     219             :    insures that, except for white space, we will never have to backward
     220             :    scan the source text, when it is time to generate the final output
     221             :    lines.
     222             : 
     223             :    The right context, indirectly attainable through the keyword end, does
     224             :    not extend, towards the end of the file, further than a distance given
     225             :    by the `right' value.  This value is relative to the keyword
     226             :    beginning, it is usually positive.
     227             : 
     228             :    When automatic references are used, the `reference' value is the
     229             :    overall line number in all input files read so far, in this case, it
     230             :    is of type (int).  When input references are used, the `reference'
     231             :    value indicates the distance between the keyword beginning and the
     232             :    start of the reference field, it is of type (DELTA) and usually
     233             :    negative.  */
     234             : 
     235             : typedef short int DELTA;        /* to hold displacement within one context */
     236             : 
     237             : typedef struct
     238             :   {
     239             :     WORD key;                   /* description of the keyword */
     240             :     DELTA left;                 /* distance to left context start */
     241             :     DELTA right;                /* distance to right context end */
     242             :     int reference;              /* reference descriptor */
     243             :   }
     244             : OCCURS;
     245             : 
     246             : /* The various OCCURS tables are indexed by the language.  But the time
     247             :    being, there is no such multiple language support.  */
     248             : 
     249             : static OCCURS *occurs_table[1]; /* all words retained from the read text */
     250             : static size_t occurs_alloc[1];  /* allocated size of occurs_table */
     251             : static size_t number_of_occurs[1]; /* number of used slots in occurs_table */
     252             : 
     253             : 
     254             : /* Communication among output routines.  */
     255             : 
     256             : /* Indicate if special output processing is requested for each character.  */
     257             : static char edited_flag[CHAR_SET_SIZE];
     258             : 
     259             : static int half_line_width;     /* half of line width, reference excluded */
     260             : static int before_max_width;    /* maximum width of before field */
     261             : static int keyafter_max_width;  /* maximum width of keyword-and-after field */
     262             : static int truncation_string_length;/* length of string used to flag truncation */
     263             : 
     264             : /* When context is limited by lines, wraparound may happen on final output:
     265             :    the `head' pointer gives access to some supplementary left context which
     266             :    will be seen at the end of the output line, the `tail' pointer gives
     267             :    access to some supplementary right context which will be seen at the
     268             :    beginning of the output line. */
     269             : 
     270             : static BLOCK tail;              /* tail field */
     271             : static int tail_truncation;     /* flag truncation after the tail field */
     272             : 
     273             : static BLOCK before;            /* before field */
     274             : static int before_truncation;   /* flag truncation before the before field */
     275             : 
     276             : static BLOCK keyafter;          /* keyword-and-after field */
     277             : static int keyafter_truncation; /* flag truncation after the keyafter field */
     278             : 
     279             : static BLOCK head;              /* head field */
     280             : static int head_truncation;     /* flag truncation before the head field */
     281             : 
     282             : static BLOCK reference;         /* reference field for input reference mode */
     283             : 
     284             : /* Miscellaneous routines.  */
     285             : 
     286             : /* Diagnose an error in the regular expression matcher.  Then exit.  */
     287             : 
     288             : static void ATTRIBUTE_NORETURN
     289           0 : matcher_error (void)
     290             : {
     291           0 :   error (0, errno, _("error in regular expression matcher"));
     292           0 :   exit (EXIT_FAILURE);
     293             : }
     294             : 
     295             : /*------------------------------------------------------.
     296             : | Duplicate string STRING, while evaluating \-escapes.  |
     297             : `------------------------------------------------------*/
     298             : 
     299             : /* Loosely adapted from GNU sh-utils printf.c code.  */
     300             : 
     301             : static char *
     302          18 : copy_unescaped_string (const char *string)
     303             : {
     304             :   char *result;                 /* allocated result */
     305             :   char *cursor;                 /* cursor in result */
     306             :   int value;                    /* value of \nnn escape */
     307             :   int length;                   /* length of \nnn escape */
     308             : 
     309          18 :   result = xmalloc (strlen (string) + 1);
     310          18 :   cursor = result;
     311             : 
     312          53 :   while (*string)
     313             :     {
     314          17 :       if (*string == '\\')
     315             :         {
     316          14 :           string++;
     317          14 :           switch (*string)
     318             :             {
     319           1 :             case 'x':           /* \xhhh escape, 3 chars maximum */
     320           1 :               value = 0;
     321           2 :               for (length = 0, string++;
     322           1 :                    length < 3 && isxdigit (to_uchar (*string));
     323           0 :                    length++, string++)
     324           0 :                 value = value * 16 + HEXTOBIN (*string);
     325           1 :               if (length == 0)
     326             :                 {
     327           1 :                   *cursor++ = '\\';
     328           1 :                   *cursor++ = 'x';
     329             :                 }
     330             :               else
     331           0 :                 *cursor++ = value;
     332           1 :               break;
     333             : 
     334           1 :             case '0':           /* \0ooo escape, 3 chars maximum */
     335           1 :               value = 0;
     336           2 :               for (length = 0, string++;
     337           1 :                    length < 3 && ISODIGIT (*string);
     338           0 :                    length++, string++)
     339           0 :                 value = value * 8 + OCTTOBIN (*string);
     340           1 :               *cursor++ = value;
     341           1 :               break;
     342             : 
     343           1 :             case 'a':           /* alert */
     344             : #if __STDC__
     345           1 :               *cursor++ = '\a';
     346             : #else
     347             :               *cursor++ = 7;
     348             : #endif
     349           1 :               string++;
     350           1 :               break;
     351             : 
     352           1 :             case 'b':           /* backspace */
     353           1 :               *cursor++ = '\b';
     354           1 :               string++;
     355           1 :               break;
     356             : 
     357           1 :             case 'c':           /* cancel the rest of the output */
     358           3 :               while (*string)
     359           1 :                 string++;
     360           1 :               break;
     361             : 
     362           1 :             case 'f':           /* form feed */
     363           1 :               *cursor++ = '\f';
     364           1 :               string++;
     365           1 :               break;
     366             : 
     367           1 :             case 'n':           /* new line */
     368           1 :               *cursor++ = '\n';
     369           1 :               string++;
     370           1 :               break;
     371             : 
     372           1 :             case 'r':           /* carriage return */
     373           1 :               *cursor++ = '\r';
     374           1 :               string++;
     375           1 :               break;
     376             : 
     377           1 :             case 't':           /* horizontal tab */
     378           1 :               *cursor++ = '\t';
     379           1 :               string++;
     380           1 :               break;
     381             : 
     382           1 :             case 'v':           /* vertical tab */
     383             : #if __STDC__
     384           1 :               *cursor++ = '\v';
     385             : #else
     386             :               *cursor++ = 11;
     387             : #endif
     388           1 :               string++;
     389           1 :               break;
     390             : 
     391           3 :             case '\0':          /* lone backslash at end of string */
     392             :               /* ignore it */
     393           3 :               break;
     394             : 
     395           1 :             default:
     396           1 :               *cursor++ = '\\';
     397           1 :               *cursor++ = *string++;
     398           1 :               break;
     399             :             }
     400             :         }
     401             :       else
     402           3 :         *cursor++ = *string++;
     403             :     }
     404             : 
     405          18 :   *cursor = '\0';
     406          18 :   return result;
     407             : }
     408             : 
     409             : /*--------------------------------------------------------------------------.
     410             : | Compile the regex represented by REGEX, diagnose and abort if any error.  |
     411             : `--------------------------------------------------------------------------*/
     412             : 
     413             : static void
     414          73 : compile_regex (struct regex_data *regex)
     415             : {
     416          73 :   struct re_pattern_buffer *pattern = &regex->pattern;
     417          73 :   char const *string = regex->string;
     418             :   char const *message;
     419             : 
     420          73 :   pattern->buffer = NULL;
     421          73 :   pattern->allocated = 0;
     422          73 :   pattern->fastmap = regex->fastmap;
     423          73 :   pattern->translate = ignore_case ? folded_chars : NULL;
     424             : 
     425          73 :   message = re_compile_pattern (string, strlen (string), pattern);
     426          73 :   if (message)
     427           0 :     error (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string));
     428             : 
     429             :   /* The fastmap should be compiled before `re_match'.  The following
     430             :      call is not mandatory, because `re_search' is always called sooner,
     431             :      and it compiles the fastmap if this has not been done yet.  */
     432             : 
     433          73 :   re_compile_fastmap (pattern);
     434          73 : }
     435             : 
     436             : /*------------------------------------------------------------------------.
     437             : | This will initialize various tables for pattern match and compiles some |
     438             : | regexps.                                                                |
     439             : `------------------------------------------------------------------------*/
     440             : 
     441             : static void
     442          73 : initialize_regex (void)
     443             : {
     444             :   int character;                /* character value */
     445             : 
     446             :   /* Initialize the case folding table.  */
     447             : 
     448          73 :   if (ignore_case)
     449           0 :     for (character = 0; character < CHAR_SET_SIZE; character++)
     450           0 :       folded_chars[character] = toupper (character);
     451             : 
     452             :   /* Unless the user already provided a description of the end of line or
     453             :      end of sentence sequence, select an end of line sequence to compile.
     454             :      If the user provided an empty definition, thus disabling end of line
     455             :      or sentence feature, make it NULL to speed up tests.  If GNU
     456             :      extensions are enabled, use end of sentence like in GNU emacs.  If
     457             :      disabled, use end of lines.  */
     458             : 
     459          73 :   if (context_regex.string)
     460             :     {
     461           0 :       if (!*context_regex.string)
     462           0 :         context_regex.string = NULL;
     463             :     }
     464          73 :   else if (gnu_extensions & !input_reference)
     465          70 :     context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
     466             :   else
     467           3 :     context_regex.string = "\n";
     468             : 
     469          73 :   if (context_regex.string)
     470          73 :     compile_regex (&context_regex);
     471             : 
     472             :   /* If the user has already provided a non-empty regexp to describe
     473             :      words, compile it.  Else, unless this has already been done through
     474             :      a user provided Break character file, construct a fastmap of
     475             :      characters that may appear in a word.  If GNU extensions enabled,
     476             :      include only letters of the underlying character set.  If disabled,
     477             :      include almost everything, even punctuations; stop only on white
     478             :      space.  */
     479             : 
     480          73 :   if (word_regex.string)
     481           0 :     compile_regex (&word_regex);
     482          73 :   else if (!break_file)
     483             :     {
     484          71 :       if (gnu_extensions)
     485             :         {
     486             : 
     487             :           /* Simulate \w+.  */
     488             : 
     489       17476 :           for (character = 0; character < CHAR_SET_SIZE; character++)
     490       17408 :             word_fastmap[character] = !! isalpha (character);
     491             :         }
     492             :       else
     493             :         {
     494             : 
     495             :           /* Simulate [^ \t\n]+.  */
     496             : 
     497           3 :           memset (word_fastmap, 1, CHAR_SET_SIZE);
     498           3 :           word_fastmap[' '] = 0;
     499           3 :           word_fastmap['\t'] = 0;
     500           3 :           word_fastmap['\n'] = 0;
     501             :         }
     502             :     }
     503          73 : }
     504             : 
     505             : /*------------------------------------------------------------------------.
     506             : | This routine will attempt to swallow a whole file name FILE_NAME into a |
     507             : | contiguous region of memory and return a description of it into BLOCK.  |
     508             : | Standard input is assumed whenever FILE_NAME is NULL, empty or "-".   |
     509             : |                                                                         |
     510             : | Previously, in some cases, white space compression was attempted while  |
     511             : | inputting text.  This was defeating some regexps like default end of    |
     512             : | sentence, which checks for two consecutive spaces.  If white space      |
     513             : | compression is ever reinstated, it should be in output routines.        |
     514             : `------------------------------------------------------------------------*/
     515             : 
     516             : static void
     517          86 : swallow_file_in_memory (const char *file_name, BLOCK *block)
     518             : {
     519             :   int file_handle;              /* file descriptor number */
     520             :   struct stat stat_block;       /* stat block for file */
     521             :   size_t allocated_length;      /* allocated length of memory buffer */
     522             :   size_t used_length;           /* used length in memory buffer */
     523             :   int read_length;              /* number of character gotten on last read */
     524             : 
     525             :   /* As special cases, a file name which is NULL or "-" indicates standard
     526             :      input, which is already opened.  In all other cases, open the file from
     527             :      its name.  */
     528          86 :   bool using_stdin = !file_name || !*file_name || STREQ (file_name, "-");
     529          86 :   if (using_stdin)
     530          82 :     file_handle = STDIN_FILENO;
     531             :   else
     532           4 :     if ((file_handle = open (file_name, O_RDONLY)) < 0)
     533           0 :       error (EXIT_FAILURE, errno, "%s", file_name);
     534             : 
     535             :   /* If the file is a plain, regular file, allocate the memory buffer all at
     536             :      once and swallow the file in one blow.  In other cases, read the file
     537             :      repeatedly in smaller chunks until we have it all, reallocating memory
     538             :      once in a while, as we go.  */
     539             : 
     540          86 :   if (fstat (file_handle, &stat_block) < 0)
     541           0 :     error (EXIT_FAILURE, errno, "%s", file_name);
     542             : 
     543          86 :   if (S_ISREG (stat_block.st_mode))
     544             :     {
     545             :       size_t in_memory_size;
     546             : 
     547          61 :       block->start = xmalloc ((size_t) stat_block.st_size);
     548             : 
     549         183 :       if ((in_memory_size = read (file_handle,
     550         122 :                                   block->start, (size_t) stat_block.st_size))
     551          61 :           != stat_block.st_size)
     552             :         {
     553             : #if MSDOS
     554             :           /* On MSDOS, in memory size may be smaller than the file
     555             :              size, because of end of line conversions.  But it can
     556             :              never be smaller than half the file size, because the
     557             :              minimum is when all lines are empty and terminated by
     558             :              CR+LF.  */
     559             :           if (in_memory_size != (size_t)-1
     560             :               && in_memory_size >= stat_block.st_size / 2)
     561             :             block->start = xrealloc (block->start, in_memory_size);
     562             :           else
     563             : #endif /* not MSDOS */
     564             : 
     565          11 :             error (EXIT_FAILURE, errno, "%s", file_name);
     566             :         }
     567          50 :       block->end = block->start + in_memory_size;
     568             :     }
     569             :   else
     570             :     {
     571          25 :       block->start = xmalloc ((size_t) 1 << SWALLOW_REALLOC_LOG);
     572          25 :       used_length = 0;
     573          25 :       allocated_length = (1 << SWALLOW_REALLOC_LOG);
     574             : 
     575         115 :       while (read_length = read (file_handle,
     576          45 :                                  block->start + used_length,
     577             :                                  allocated_length - used_length),
     578             :              read_length > 0)
     579             :         {
     580          20 :           used_length += read_length;
     581          20 :           if (used_length == allocated_length)
     582             :             {
     583           0 :               allocated_length += (1 << SWALLOW_REALLOC_LOG);
     584             :               block->start
     585           0 :                 = xrealloc (block->start, allocated_length);
     586             :             }
     587             :         }
     588             : 
     589          25 :       if (read_length < 0)
     590           3 :         error (EXIT_FAILURE, errno, "%s", file_name);
     591             : 
     592          22 :       block->end = block->start + used_length;
     593             :     }
     594             : 
     595             :   /* Close the file, but only if it was not the standard input.  */
     596             : 
     597          72 :   if (! using_stdin && close (file_handle) != 0)
     598           0 :     error (EXIT_FAILURE, errno, "%s", file_name);
     599          72 : }
     600             : 
     601             : /* Sort and search routines.  */
     602             : 
     603             : /*--------------------------------------------------------------------------.
     604             : | Compare two words, FIRST and SECOND, and return 0 if they are identical.  |
     605             : | Return less than 0 if the first word goes before the second; return       |
     606             : | greater than 0 if the first word goes after the second.                   |
     607             : |                                                                           |
     608             : | If a word is indeed a prefix of the other, the shorter should go first.   |
     609             : `--------------------------------------------------------------------------*/
     610             : 
     611             : static int
     612          15 : compare_words (const void *void_first, const void *void_second)
     613             : {
     614             : #define first ((const WORD *) void_first)
     615             : #define second ((const WORD *) void_second)
     616             :   int length;                   /* minimum of two lengths */
     617             :   int counter;                  /* cursor in words */
     618             :   int value;                    /* value of comparison */
     619             : 
     620          15 :   length = first->size < second->size ? first->size : second->size;
     621             : 
     622          15 :   if (ignore_case)
     623             :     {
     624           0 :       for (counter = 0; counter < length; counter++)
     625             :         {
     626           0 :           value = (folded_chars [to_uchar (first->start[counter])]
     627           0 :                    - folded_chars [to_uchar (second->start[counter])]);
     628           0 :           if (value != 0)
     629           0 :             return value;
     630             :         }
     631             :     }
     632             :   else
     633             :     {
     634          26 :       for (counter = 0; counter < length; counter++)
     635             :         {
     636          32 :           value = (to_uchar (first->start[counter])
     637          16 :                    - to_uchar (second->start[counter]));
     638          16 :           if (value != 0)
     639           5 :             return value;
     640             :         }
     641             :     }
     642             : 
     643          10 :   return first->size - second->size;
     644             : #undef first
     645             : #undef second
     646             : }
     647             : 
     648             : /*-----------------------------------------------------------------------.
     649             : | Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
     650             : | go first.  In case of a tie, preserve the original order through a     |
     651             : | pointer comparison.                                                    |
     652             : `-----------------------------------------------------------------------*/
     653             : 
     654             : static int
     655          15 : compare_occurs (const void *void_first, const void *void_second)
     656             : {
     657             : #define first ((const OCCURS *) void_first)
     658             : #define second ((const OCCURS *) void_second)
     659             :   int value;
     660             : 
     661          15 :   value = compare_words (&first->key, &second->key);
     662          15 :   return value == 0 ? first->key.start - second->key.start : value;
     663             : #undef first
     664             : #undef second
     665             : }
     666             : 
     667             : /*------------------------------------------------------------.
     668             : | Return !0 if WORD appears in TABLE.  Uses a binary search.  |
     669             : `------------------------------------------------------------*/
     670             : 
     671             : static int
     672           0 : search_table (WORD *word, WORD_TABLE *table)
     673             : {
     674             :   int lowest;                   /* current lowest possible index */
     675             :   int highest;                  /* current highest possible index */
     676             :   int middle;                   /* current middle index */
     677             :   int value;                    /* value from last comparison */
     678             : 
     679           0 :   lowest = 0;
     680           0 :   highest = table->length - 1;
     681           0 :   while (lowest <= highest)
     682             :     {
     683           0 :       middle = (lowest + highest) / 2;
     684           0 :       value = compare_words (word, table->start + middle);
     685           0 :       if (value < 0)
     686           0 :         highest = middle - 1;
     687           0 :       else if (value > 0)
     688           0 :         lowest = middle + 1;
     689             :       else
     690           0 :         return 1;
     691             :     }
     692           0 :   return 0;
     693             : }
     694             : 
     695             : /*---------------------------------------------------------------------.
     696             : | Sort the whole occurs table in memory.  Presumably, `qsort' does not |
     697             : | take intermediate copies or table elements, so the sort will be      |
     698             : | stabilized throughout the comparison routine.                        |
     699             : `---------------------------------------------------------------------*/
     700             : 
     701             : static void
     702          59 : sort_found_occurs (void)
     703             : {
     704             : 
     705             :   /* Only one language for the time being.  */
     706             : 
     707          59 :   qsort (occurs_table[0], number_of_occurs[0], sizeof **occurs_table,
     708             :          compare_occurs);
     709          59 : }
     710             : 
     711             : /* Parameter files reading routines.  */
     712             : 
     713             : /*----------------------------------------------------------------------.
     714             : | Read a file named FILE_NAME, containing a set of break characters.    |
     715             : | Build a content to the array word_fastmap in which all characters are |
     716             : | allowed except those found in the file.  Characters may be repeated.  |
     717             : `----------------------------------------------------------------------*/
     718             : 
     719             : static void
     720           2 : digest_break_file (const char *file_name)
     721             : {
     722             :   BLOCK file_contents;          /* to receive a copy of the file */
     723             :   char *cursor;                 /* cursor in file copy */
     724             : 
     725           2 :   swallow_file_in_memory (file_name, &file_contents);
     726             : 
     727             :   /* Make the fastmap and record the file contents in it.  */
     728             : 
     729           2 :   memset (word_fastmap, 1, CHAR_SET_SIZE);
     730          18 :   for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
     731          16 :     word_fastmap[to_uchar (*cursor)] = 0;
     732             : 
     733           2 :   if (!gnu_extensions)
     734             :     {
     735             : 
     736             :       /* If GNU extensions are enabled, the only way to avoid newline as
     737             :          a break character is to write all the break characters in the
     738             :          file with no newline at all, not even at the end of the file.
     739             :          If disabled, spaces, tabs and newlines are always considered as
     740             :          break characters even if not included in the break file.  */
     741             : 
     742           0 :       word_fastmap[' '] = 0;
     743           0 :       word_fastmap['\t'] = 0;
     744           0 :       word_fastmap['\n'] = 0;
     745             :     }
     746             : 
     747             :   /* Return the space of the file, which is no more required.  */
     748             : 
     749           2 :   free (file_contents.start);
     750           2 : }
     751             : 
     752             : /*-----------------------------------------------------------------------.
     753             : | Read a file named FILE_NAME, containing one word per line, then        |
     754             : | construct in TABLE a table of WORD descriptors for them.  The routine  |
     755             : | swallows the whole file in memory; this is at the expense of space     |
     756             : | needed for newlines, which are useless; however, the reading is fast.  |
     757             : `-----------------------------------------------------------------------*/
     758             : 
     759             : static void
     760           0 : digest_word_file (const char *file_name, WORD_TABLE *table)
     761             : {
     762             :   BLOCK file_contents;          /* to receive a copy of the file */
     763             :   char *cursor;                 /* cursor in file copy */
     764             :   char *word_start;             /* start of the current word */
     765             : 
     766           0 :   swallow_file_in_memory (file_name, &file_contents);
     767             : 
     768           0 :   table->start = NULL;
     769           0 :   table->alloc = 0;
     770           0 :   table->length = 0;
     771             : 
     772             :   /* Read the whole file.  */
     773             : 
     774           0 :   cursor = file_contents.start;
     775           0 :   while (cursor < file_contents.end)
     776             :     {
     777             : 
     778             :       /* Read one line, and save the word in contains.  */
     779             : 
     780           0 :       word_start = cursor;
     781           0 :       while (cursor < file_contents.end && *cursor != '\n')
     782           0 :         cursor++;
     783             : 
     784             :       /* Record the word in table if it is not empty.  */
     785             : 
     786           0 :       if (cursor > word_start)
     787             :         {
     788           0 :           if (table->length == table->alloc)
     789             :             {
     790           0 :               if ((SIZE_MAX / sizeof *table->start - 1) / 2 < table->alloc)
     791           0 :                 xalloc_die ();
     792           0 :               table->alloc = table->alloc * 2 + 1;
     793           0 :               table->start = xrealloc (table->start,
     794           0 :                                        table->alloc * sizeof *table->start);
     795             :             }
     796             : 
     797           0 :           table->start[table->length].start = word_start;
     798           0 :           table->start[table->length].size = cursor - word_start;
     799           0 :           table->length++;
     800             :         }
     801             : 
     802             :       /* This test allows for an incomplete line at end of file.  */
     803             : 
     804           0 :       if (cursor < file_contents.end)
     805           0 :         cursor++;
     806             :     }
     807             : 
     808             :   /* Finally, sort all the words read.  */
     809             : 
     810           0 :   qsort (table->start, table->length, sizeof table->start[0], compare_words);
     811           0 : }
     812             : 
     813             : /* Keyword recognition and selection.  */
     814             : 
     815             : /*----------------------------------------------------------------------.
     816             : | For each keyword in the source text, constructs an OCCURS structure.  |
     817             : `----------------------------------------------------------------------*/
     818             : 
     819             : static void
     820          70 : find_occurs_in_text (void)
     821             : {
     822             :   char *cursor;                 /* for scanning the source text */
     823             :   char *scan;                   /* for scanning the source text also */
     824             :   char *line_start;             /* start of the current input line */
     825             :   char *line_scan;              /* newlines scanned until this point */
     826             :   int reference_length;         /* length of reference in input mode */
     827             :   WORD possible_key;            /* possible key, to ease searches */
     828             :   OCCURS *occurs_cursor;        /* current OCCURS under construction */
     829             : 
     830             :   char *context_start;          /* start of left context */
     831             :   char *context_end;            /* end of right context */
     832             :   char *word_start;             /* start of word */
     833             :   char *word_end;               /* end of word */
     834             :   char *next_context_start;     /* next start of left context */
     835             : 
     836             :   /* reference_length is always used within `if (input_reference)'.
     837             :      However, GNU C diagnoses that it may be used uninitialized.  The
     838             :      following assignment is merely to shut it up.  */
     839             : 
     840          70 :   reference_length = 0;
     841             : 
     842             :   /* Tracking where lines start is helpful for reference processing.  In
     843             :      auto reference mode, this allows counting lines.  In input reference
     844             :      mode, this permits finding the beginning of the references.
     845             : 
     846             :      The first line begins with the file, skip immediately this very first
     847             :      reference in input reference mode, to help further rejection any word
     848             :      found inside it.  Also, unconditionally assigning these variable has
     849             :      the happy effect of shutting up lint.  */
     850             : 
     851          70 :   line_start = text_buffer.start;
     852          70 :   line_scan = line_start;
     853          70 :   if (input_reference)
     854             :     {
     855           0 :       SKIP_NON_WHITE (line_scan, text_buffer.end);
     856           0 :       reference_length = line_scan - line_start;
     857           0 :       SKIP_WHITE (line_scan, text_buffer.end);
     858             :     }
     859             : 
     860             :   /* Process the whole buffer, one line or one sentence at a time.  */
     861             : 
     862         213 :   for (cursor = text_buffer.start;
     863         143 :        cursor < text_buffer.end;
     864          73 :        cursor = next_context_start)
     865             :     {
     866             : 
     867             :       /* `context_start' gets initialized before the processing of each
     868             :          line, or once for the whole buffer if no end of line or sentence
     869             :          sequence separator.  */
     870             : 
     871          73 :       context_start = cursor;
     872             : 
     873             :       /* If a end of line or end of sentence sequence is defined and
     874             :          non-empty, `next_context_start' will be recomputed to be the end of
     875             :          each line or sentence, before each one is processed.  If no such
     876             :          sequence, then `next_context_start' is set at the end of the whole
     877             :          buffer, which is then considered to be a single line or sentence.
     878             :          This test also accounts for the case of an incomplete line or
     879             :          sentence at the end of the buffer.  */
     880             : 
     881          73 :       next_context_start = text_buffer.end;
     882          73 :       if (context_regex.string)
     883         146 :         switch (re_search (&context_regex.pattern, cursor,
     884          73 :                            text_buffer.end - cursor,
     885          73 :                            0, text_buffer.end - cursor, &context_regs))
     886             :           {
     887           0 :           case -2:
     888           0 :             matcher_error ();
     889             : 
     890          67 :           case -1:
     891          67 :             break;
     892             : 
     893           6 :           default:
     894           6 :             next_context_start = cursor + context_regs.end[0];
     895           6 :             break;
     896             :           }
     897             : 
     898             :       /* Include the separator into the right context, but not any suffix
     899             :          white space in this separator; this insures it will be seen in
     900             :          output and will not take more space than necessary.  */
     901             : 
     902          73 :       context_end = next_context_start;
     903          73 :       SKIP_WHITE_BACKWARDS (context_end, context_start);
     904             : 
     905             :       /* Read and process a single input line or sentence, one word at a
     906             :          time.  */
     907             : 
     908             :       while (1)
     909             :         {
     910         149 :           if (word_regex.string)
     911             : 
     912             :             /* If a word regexp has been compiled, use it to skip at the
     913             :                beginning of the next word.  If there is no such word, exit
     914             :                the loop.  */
     915             : 
     916             :             {
     917           0 :               regoff_t r = re_search (&word_regex.pattern, cursor,
     918           0 :                                       context_end - cursor,
     919             :                                       0, context_end - cursor, &word_regs);
     920           0 :               if (r == -2)
     921           0 :                 matcher_error ();
     922           0 :               if (r == -1)
     923           0 :                 break;
     924           0 :               word_start = cursor + word_regs.start[0];
     925           0 :               word_end = cursor + word_regs.end[0];
     926             :             }
     927             :           else
     928             : 
     929             :             /* Avoid re_search and use the fastmap to skip to the
     930             :                beginning of the next word.  If there is no more word in
     931             :                the buffer, exit the loop.  */
     932             : 
     933             :             {
     934         111 :               scan = cursor;
     935         688 :               while (scan < context_end
     936         504 :                      && !word_fastmap[to_uchar (*scan)])
     937         466 :                 scan++;
     938             : 
     939         111 :               if (scan == context_end)
     940          73 :                 break;
     941             : 
     942          38 :               word_start = scan;
     943             : 
     944         139 :               while (scan < context_end
     945          92 :                      && word_fastmap[to_uchar (*scan)])
     946          63 :                 scan++;
     947             : 
     948          38 :               word_end = scan;
     949             :             }
     950             : 
     951             :           /* Skip right to the beginning of the found word.  */
     952             : 
     953          38 :           cursor = word_start;
     954             : 
     955             :           /* Skip any zero length word.  Just advance a single position,
     956             :              then go fetch the next word.  */
     957             : 
     958          38 :           if (word_end == word_start)
     959             :             {
     960           0 :               cursor++;
     961           0 :               continue;
     962             :             }
     963             : 
     964             :           /* This is a genuine, non empty word, so save it as a possible
     965             :              key.  Then skip over it.  Also, maintain the maximum length of
     966             :              all words read so far.  It is mandatory to take the maximum
     967             :              length of all words in the file, without considering if they
     968             :              are actually kept or rejected, because backward jumps at output
     969             :              generation time may fall in *any* word.  */
     970             : 
     971          38 :           possible_key.start = cursor;
     972          38 :           possible_key.size = word_end - word_start;
     973          38 :           cursor += possible_key.size;
     974             : 
     975          38 :           if (possible_key.size > maximum_word_length)
     976          27 :             maximum_word_length = possible_key.size;
     977             : 
     978             :           /* In input reference mode, update `line_start' from its previous
     979             :              value.  Count the lines just in case auto reference mode is
     980             :              also selected. If it happens that the word just matched is
     981             :              indeed part of a reference; just ignore it.  */
     982             : 
     983          38 :           if (input_reference)
     984             :             {
     985           0 :               while (line_scan < possible_key.start)
     986           0 :                 if (*line_scan == '\n')
     987             :                   {
     988           0 :                     total_line_count++;
     989           0 :                     line_scan++;
     990           0 :                     line_start = line_scan;
     991           0 :                     SKIP_NON_WHITE (line_scan, text_buffer.end);
     992           0 :                     reference_length = line_scan - line_start;
     993             :                   }
     994             :                 else
     995           0 :                   line_scan++;
     996           0 :               if (line_scan > possible_key.start)
     997           0 :                 continue;
     998             :             }
     999             : 
    1000             :           /* Ignore the word if an `Ignore words' table exists and if it is
    1001             :              part of it.  Also ignore the word if an `Only words' table and
    1002             :              if it is *not* part of it.
    1003             : 
    1004             :              It is allowed that both tables be used at once, even if this
    1005             :              may look strange for now.  Just ignore a word that would appear
    1006             :              in both.  If regexps are eventually implemented for these
    1007             :              tables, the Ignore table could then reject words that would
    1008             :              have been previously accepted by the Only table.  */
    1009             : 
    1010          38 :           if (ignore_file && search_table (&possible_key, &ignore_table))
    1011           0 :             continue;
    1012          38 :           if (only_file && !search_table (&possible_key, &only_table))
    1013           0 :             continue;
    1014             : 
    1015             :           /* A non-empty word has been found.  First of all, insure
    1016             :              proper allocation of the next OCCURS, and make a pointer to
    1017             :              where it will be constructed.  */
    1018             : 
    1019          38 :           if (number_of_occurs[0] == occurs_alloc[0])
    1020             :             {
    1021          34 :               if ((SIZE_MAX / sizeof *occurs_table[0] - 1) / 2
    1022          34 :                   < occurs_alloc[0])
    1023           0 :                 xalloc_die ();
    1024          34 :               occurs_alloc[0] = occurs_alloc[0] * 2 + 1;
    1025          34 :               occurs_table[0] = xrealloc (occurs_table[0],
    1026          34 :                                           occurs_alloc[0] * sizeof *occurs_table[0]);
    1027             :             }
    1028             : 
    1029          38 :           occurs_cursor = occurs_table[0] + number_of_occurs[0];
    1030             : 
    1031             :           /* Define the refence field, if any.  */
    1032             : 
    1033          38 :           if (auto_reference)
    1034             :             {
    1035             : 
    1036             :               /* While auto referencing, update `line_start' from its
    1037             :                  previous value, counting lines as we go.  If input
    1038             :                  referencing at the same time, `line_start' has been
    1039             :                  advanced earlier, and the following loop is never really
    1040             :                  executed.  */
    1041             : 
    1042           4 :               while (line_scan < possible_key.start)
    1043           0 :                 if (*line_scan == '\n')
    1044             :                   {
    1045           0 :                     total_line_count++;
    1046           0 :                     line_scan++;
    1047           0 :                     line_start = line_scan;
    1048           0 :                     SKIP_NON_WHITE (line_scan, text_buffer.end);
    1049             :                   }
    1050             :                 else
    1051           0 :                   line_scan++;
    1052             : 
    1053           2 :               occurs_cursor->reference = total_line_count;
    1054             :             }
    1055          36 :           else if (input_reference)
    1056             :             {
    1057             : 
    1058             :               /* If only input referencing, `line_start' has been computed
    1059             :                  earlier to detect the case the word matched would be part
    1060             :                  of the reference.  The reference position is simply the
    1061             :                  value of `line_start'.  */
    1062             : 
    1063             :               occurs_cursor->reference
    1064           0 :                 = (DELTA) (line_start - possible_key.start);
    1065           0 :               if (reference_length > reference_max_width)
    1066           0 :                 reference_max_width = reference_length;
    1067             :             }
    1068             : 
    1069             :           /* Exclude the reference from the context in simple cases.  */
    1070             : 
    1071          38 :           if (input_reference && line_start == context_start)
    1072             :             {
    1073           0 :               SKIP_NON_WHITE (context_start, context_end);
    1074           0 :               SKIP_WHITE (context_start, context_end);
    1075             :             }
    1076             : 
    1077             :           /* Completes the OCCURS structure.  */
    1078             : 
    1079          38 :           occurs_cursor->key = possible_key;
    1080          38 :           occurs_cursor->left = context_start - possible_key.start;
    1081          38 :           occurs_cursor->right = context_end - possible_key.start;
    1082             : 
    1083          38 :           number_of_occurs[0]++;
    1084             :         }
    1085             :     }
    1086          70 : }
    1087             : 
    1088             : /* Formatting and actual output - service routines.  */
    1089             : 
    1090             : /*-----------------------------------------.
    1091             : | Prints some NUMBER of spaces on stdout.  |
    1092             : `-----------------------------------------*/
    1093             : 
    1094             : static void
    1095         105 : print_spaces (int number)
    1096             : {
    1097             :   int counter;
    1098             : 
    1099        1366 :   for (counter = number; counter > 0; counter--)
    1100        1261 :     putchar (' ');
    1101         105 : }
    1102             : 
    1103             : /*-------------------------------------.
    1104             : | Prints the field provided by FIELD.  |
    1105             : `-------------------------------------*/
    1106             : 
    1107             : static void
    1108         119 : print_field (BLOCK field)
    1109             : {
    1110             :   char *cursor;                 /* Cursor in field to print */
    1111             :   int base;                     /* Base character, without diacritic */
    1112             :   int diacritic;                /* Diacritic code for the character */
    1113             : 
    1114             :   /* Whitespace is not really compressed.  Instead, each white space
    1115             :      character (tab, vt, ht etc.) is printed as one single space.  */
    1116             : 
    1117         406 :   for (cursor = field.start; cursor < field.end; cursor++)
    1118             :     {
    1119         287 :       unsigned char character = *cursor;
    1120         287 :       if (edited_flag[character])
    1121             :         {
    1122             : 
    1123             :           /* First check if this is a diacriticized character.
    1124             : 
    1125             :              This works only for TeX.  I do not know how diacriticized
    1126             :              letters work with `roff'.  Please someone explain it to me!  */
    1127             : 
    1128           2 :           diacritic = todiac (character);
    1129           2 :           if (diacritic != 0 && output_format == TEX_FORMAT)
    1130             :             {
    1131           0 :               base = tobase (character);
    1132           0 :               switch (diacritic)
    1133             :                 {
    1134             : 
    1135           0 :                 case 1:         /* Latin diphthongs */
    1136           0 :                   switch (base)
    1137             :                     {
    1138           0 :                     case 'o':
    1139           0 :                       fputs ("\\oe{}", stdout);
    1140           0 :                       break;
    1141             : 
    1142           0 :                     case 'O':
    1143           0 :                       fputs ("\\OE{}", stdout);
    1144           0 :                       break;
    1145             : 
    1146           0 :                     case 'a':
    1147           0 :                       fputs ("\\ae{}", stdout);
    1148           0 :                       break;
    1149             : 
    1150           0 :                     case 'A':
    1151           0 :                       fputs ("\\AE{}", stdout);
    1152           0 :                       break;
    1153             : 
    1154           0 :                     default:
    1155           0 :                       putchar (' ');
    1156             :                     }
    1157           0 :                   break;
    1158             : 
    1159           0 :                 case 2:         /* Acute accent */
    1160           0 :                   printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base);
    1161           0 :                   break;
    1162             : 
    1163           0 :                 case 3:         /* Grave accent */
    1164           0 :                   printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base);
    1165           0 :                   break;
    1166             : 
    1167           0 :                 case 4:         /* Circumflex accent */
    1168           0 :                   printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base);
    1169           0 :                   break;
    1170             : 
    1171           0 :                 case 5:         /* Diaeresis */
    1172           0 :                   printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base);
    1173           0 :                   break;
    1174             : 
    1175           0 :                 case 6:         /* Tilde accent */
    1176           0 :                   printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base);
    1177           0 :                   break;
    1178             : 
    1179           0 :                 case 7:         /* Cedilla */
    1180           0 :                   printf ("\\c{%c}", base);
    1181           0 :                   break;
    1182             : 
    1183           0 :                 case 8:         /* Small circle beneath */
    1184           0 :                   switch (base)
    1185             :                     {
    1186           0 :                     case 'a':
    1187           0 :                       fputs ("\\aa{}", stdout);
    1188           0 :                       break;
    1189             : 
    1190           0 :                     case 'A':
    1191           0 :                       fputs ("\\AA{}", stdout);
    1192           0 :                       break;
    1193             : 
    1194           0 :                     default:
    1195           0 :                       putchar (' ');
    1196             :                     }
    1197           0 :                   break;
    1198             : 
    1199           0 :                 case 9:         /* Strike through */
    1200           0 :                   switch (base)
    1201             :                     {
    1202           0 :                     case 'o':
    1203           0 :                       fputs ("\\o{}", stdout);
    1204           0 :                       break;
    1205             : 
    1206           0 :                     case 'O':
    1207           0 :                       fputs ("\\O{}", stdout);
    1208           0 :                       break;
    1209             : 
    1210           0 :                     default:
    1211           0 :                       putchar (' ');
    1212             :                     }
    1213           0 :                   break;
    1214             :                 }
    1215           0 :             }
    1216             :           else
    1217             : 
    1218             :             /* This is not a diacritic character, so handle cases which are
    1219             :                really specific to `roff' or TeX.  All white space processing
    1220             :                is done as the default case of this switch.  */
    1221             : 
    1222           2 :             switch (character)
    1223             :               {
    1224           0 :               case '"':
    1225             :                 /* In roff output format, double any quote.  */
    1226           0 :                 putchar ('"');
    1227           0 :                 putchar ('"');
    1228           0 :                 break;
    1229             : 
    1230           0 :               case '$':
    1231             :               case '%':
    1232             :               case '&':
    1233             :               case '#':
    1234             :               case '_':
    1235             :                 /* In TeX output format, precede these with a backslash.  */
    1236           0 :                 putchar ('\\');
    1237           0 :                 putchar (character);
    1238           0 :                 break;
    1239             : 
    1240           0 :               case '{':
    1241             :               case '}':
    1242             :                 /* In TeX output format, precede these with a backslash and
    1243             :                    force mathematical mode.  */
    1244           0 :                 printf ("$\\%c$", character);
    1245           0 :                 break;
    1246             : 
    1247           0 :               case '\\':
    1248             :                 /* In TeX output mode, request production of a backslash.  */
    1249           0 :                 fputs ("\\backslash{}", stdout);
    1250           0 :                 break;
    1251             : 
    1252           2 :               default:
    1253             :                 /* Any other flagged character produces a single space.  */
    1254           2 :                 putchar (' ');
    1255             :               }
    1256             :         }
    1257             :       else
    1258         285 :         putchar (*cursor);
    1259             :     }
    1260         119 : }
    1261             : 
    1262             : /* Formatting and actual output - planning routines.  */
    1263             : 
    1264             : /*--------------------------------------------------------------------.
    1265             : | From information collected from command line options and input file |
    1266             : | readings, compute and fix some output parameter values.             |
    1267             : `--------------------------------------------------------------------*/
    1268             : 
    1269             : static void
    1270          59 : fix_output_parameters (void)
    1271             : {
    1272             :   int file_index;               /* index in text input file arrays */
    1273             :   int line_ordinal;             /* line ordinal value for reference */
    1274             :   char ordinal_string[12];      /* edited line ordinal for reference */
    1275             :   int reference_width;          /* width for the whole reference */
    1276             :   int character;                /* character ordinal */
    1277             :   const char *cursor;           /* cursor in some constant strings */
    1278             : 
    1279             :   /* In auto reference mode, the maximum width of this field is
    1280             :      precomputed and subtracted from the overall line width.  Add one for
    1281             :      the column which separate the file name from the line number.  */
    1282             : 
    1283          59 :   if (auto_reference)
    1284             :     {
    1285           5 :       reference_max_width = 0;
    1286          10 :       for (file_index = 0; file_index < number_input_files; file_index++)
    1287             :         {
    1288           5 :           line_ordinal = file_line_count[file_index] + 1;
    1289           5 :           if (file_index > 0)
    1290           0 :             line_ordinal -= file_line_count[file_index - 1];
    1291           5 :           sprintf (ordinal_string, "%d", line_ordinal);
    1292           5 :           reference_width = strlen (ordinal_string);
    1293           5 :           if (input_file_name[file_index])
    1294           0 :             reference_width += strlen (input_file_name[file_index]);
    1295           5 :           if (reference_width > reference_max_width)
    1296           5 :             reference_max_width = reference_width;
    1297             :         }
    1298           5 :       reference_max_width++;
    1299           5 :       reference.start = xmalloc ((size_t) reference_max_width + 1);
    1300             :     }
    1301             : 
    1302             :   /* If the reference appears to the left of the output line, reserve some
    1303             :      space for it right away, including one gap size.  */
    1304             : 
    1305          59 :   if ((auto_reference | input_reference) & !right_reference)
    1306           5 :     line_width -= reference_max_width + gap_size;
    1307             : 
    1308             :   /* The output lines, minimally, will contain from left to right a left
    1309             :      context, a gap, and a keyword followed by the right context with no
    1310             :      special intervening gap.  Half of the line width is dedicated to the
    1311             :      left context and the gap, the other half is dedicated to the keyword
    1312             :      and the right context; these values are computed once and for all here.
    1313             :      There also are tail and head wrap around fields, used when the keyword
    1314             :      is near the beginning or the end of the line, or when some long word
    1315             :      cannot fit in, but leave place from wrapped around shorter words.  The
    1316             :      maximum width of these fields are recomputed separately for each line,
    1317             :      on a case by case basis.  It is worth noting that it cannot happen that
    1318             :      both the tail and head fields are used at once.  */
    1319             : 
    1320          59 :   half_line_width = line_width / 2;
    1321          59 :   before_max_width = half_line_width - gap_size;
    1322          59 :   keyafter_max_width = half_line_width;
    1323             : 
    1324             :   /* If truncation_string is the empty string, make it NULL to speed up
    1325             :      tests.  In this case, truncation_string_length will never get used, so
    1326             :      there is no need to set it.  */
    1327             : 
    1328          59 :   if (truncation_string && *truncation_string)
    1329          53 :     truncation_string_length = strlen (truncation_string);
    1330             :   else
    1331           6 :     truncation_string = NULL;
    1332             : 
    1333          59 :   if (gnu_extensions)
    1334             :     {
    1335             : 
    1336             :       /* When flagging truncation at the left of the keyword, the
    1337             :          truncation mark goes at the beginning of the before field,
    1338             :          unless there is a head field, in which case the mark goes at the
    1339             :          left of the head field.  When flagging truncation at the right
    1340             :          of the keyword, the mark goes at the end of the keyafter field,
    1341             :          unless there is a tail field, in which case the mark goes at the
    1342             :          end of the tail field.  Only eight combination cases could arise
    1343             :          for truncation marks:
    1344             : 
    1345             :          . None.
    1346             :          . One beginning the before field.
    1347             :          . One beginning the head field.
    1348             :          . One ending the keyafter field.
    1349             :          . One ending the tail field.
    1350             :          . One beginning the before field, another ending the keyafter field.
    1351             :          . One ending the tail field, another beginning the before field.
    1352             :          . One ending the keyafter field, another beginning the head field.
    1353             : 
    1354             :          So, there is at most two truncation marks, which could appear both
    1355             :          on the left side of the center of the output line, both on the
    1356             :          right side, or one on either side.  */
    1357             : 
    1358          56 :       before_max_width -= 2 * truncation_string_length;
    1359          56 :       keyafter_max_width -= 2 * truncation_string_length;
    1360             :     }
    1361             :   else
    1362             :     {
    1363             : 
    1364             :       /* I never figured out exactly how UNIX' ptx plans the output width
    1365             :          of its various fields.  If GNU extensions are disabled, do not
    1366             :          try computing the field widths correctly; instead, use the
    1367             :          following formula, which does not completely imitate UNIX' ptx,
    1368             :          but almost.  */
    1369             : 
    1370           3 :       keyafter_max_width -= 2 * truncation_string_length + 1;
    1371             :     }
    1372             : 
    1373             :   /* Compute which characters need special output processing.  Initialize
    1374             :      by flagging any white space character.  Some systems do not consider
    1375             :      form feed as a space character, but we do.  */
    1376             : 
    1377       15163 :   for (character = 0; character < CHAR_SET_SIZE; character++)
    1378       15104 :     edited_flag[character] = !! isspace (character);
    1379          59 :   edited_flag['\f'] = 1;
    1380             : 
    1381             :   /* Complete the special character flagging according to selected output
    1382             :      format.  */
    1383             : 
    1384          59 :   switch (output_format)
    1385             :     {
    1386          56 :     case UNKNOWN_FORMAT:
    1387             :       /* Should never happen.  */
    1388             : 
    1389             :     case DUMB_FORMAT:
    1390          56 :       break;
    1391             : 
    1392           3 :     case ROFF_FORMAT:
    1393             : 
    1394             :       /* `Quote' characters should be doubled.  */
    1395             : 
    1396           3 :       edited_flag['"'] = 1;
    1397           3 :       break;
    1398             : 
    1399           0 :     case TEX_FORMAT:
    1400             : 
    1401             :       /* Various characters need special processing.  */
    1402             : 
    1403           0 :       for (cursor = "$%&#_{}\\"; *cursor; cursor++)
    1404           0 :         edited_flag[to_uchar (*cursor)] = 1;
    1405             : 
    1406             :       /* Any character with 8th bit set will print to a single space, unless
    1407             :          it is diacriticized.  */
    1408             : 
    1409           0 :       for (character = 0200; character < CHAR_SET_SIZE; character++)
    1410           0 :         edited_flag[character] = todiac (character) != 0;
    1411           0 :       break;
    1412             :     }
    1413          59 : }
    1414             : 
    1415             : /*------------------------------------------------------------------.
    1416             : | Compute the position and length of all the output fields, given a |
    1417             : | pointer to some OCCURS.                                           |
    1418             : `------------------------------------------------------------------*/
    1419             : 
    1420             : static void
    1421          38 : define_all_fields (OCCURS *occurs)
    1422             : {
    1423             :   int tail_max_width;           /* allowable width of tail field */
    1424             :   int head_max_width;           /* allowable width of head field */
    1425             :   char *cursor;                 /* running cursor in source text */
    1426             :   char *left_context_start;     /* start of left context */
    1427             :   char *right_context_end;      /* end of right context */
    1428             :   char *left_field_start;       /* conservative start for `head'/`before' */
    1429             :   int file_index;               /* index in text input file arrays */
    1430             :   const char *file_name;        /* file name for reference */
    1431             :   int line_ordinal;             /* line ordinal for reference */
    1432             : 
    1433             :   /* Define `keyafter', start of left context and end of right context.
    1434             :      `keyafter' starts at the saved position for keyword and extend to the
    1435             :      right from the end of the keyword, eating separators or full words, but
    1436             :      not beyond maximum allowed width for `keyafter' field or limit for the
    1437             :      right context.  Suffix spaces will be removed afterwards.  */
    1438             : 
    1439          38 :   keyafter.start = occurs->key.start;
    1440          38 :   keyafter.end = keyafter.start + occurs->key.size;
    1441          38 :   left_context_start = keyafter.start + occurs->left;
    1442          38 :   right_context_end = keyafter.start + occurs->right;
    1443             : 
    1444          38 :   cursor = keyafter.end;
    1445         185 :   while (cursor < right_context_end
    1446         109 :          && cursor <= keyafter.start + keyafter_max_width)
    1447             :     {
    1448         109 :       keyafter.end = cursor;
    1449         109 :       SKIP_SOMETHING (cursor, right_context_end);
    1450             :     }
    1451          38 :   if (cursor <= keyafter.start + keyafter_max_width)
    1452          38 :     keyafter.end = cursor;
    1453             : 
    1454          38 :   keyafter_truncation = truncation_string && keyafter.end < right_context_end;
    1455             : 
    1456          38 :   SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
    1457             : 
    1458             :   /* When the left context is wide, it might take some time to catch up from
    1459             :      the left context boundary to the beginning of the `head' or `before'
    1460             :      fields.  So, in this case, to speed the catchup, we jump back from the
    1461             :      keyword, using some secure distance, possibly falling in the middle of
    1462             :      a word.  A secure backward jump would be at least half the maximum
    1463             :      width of a line, plus the size of the longest word met in the whole
    1464             :      input.  We conclude this backward jump by a skip forward of at least
    1465             :      one word.  In this manner, we should not inadvertently accept only part
    1466             :      of a word.  From the reached point, when it will be time to fix the
    1467             :      beginning of `head' or `before' fields, we will skip forward words or
    1468             :      delimiters until we get sufficiently near.  */
    1469             : 
    1470          38 :   if (-occurs->left > half_line_width + maximum_word_length)
    1471             :     {
    1472             :       left_field_start
    1473           0 :         = keyafter.start - (half_line_width + maximum_word_length);
    1474           0 :       SKIP_SOMETHING (left_field_start, keyafter.start);
    1475             :     }
    1476             :   else
    1477          38 :     left_field_start = keyafter.start + occurs->left;
    1478             : 
    1479             :   /* `before' certainly ends at the keyword, but not including separating
    1480             :      spaces.  It starts after than the saved value for the left context, by
    1481             :      advancing it until it falls inside the maximum allowed width for the
    1482             :      before field.  There will be no prefix spaces either.  `before' only
    1483             :      advances by skipping single separators or whole words. */
    1484             : 
    1485          38 :   before.start = left_field_start;
    1486          38 :   before.end = keyafter.start;
    1487          38 :   SKIP_WHITE_BACKWARDS (before.end, before.start);
    1488             : 
    1489          76 :   while (before.start + before_max_width < before.end)
    1490           0 :     SKIP_SOMETHING (before.start, before.end);
    1491             : 
    1492          38 :   if (truncation_string)
    1493             :     {
    1494          38 :       cursor = before.start;
    1495          38 :       SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
    1496          38 :       before_truncation = cursor > left_context_start;
    1497             :     }
    1498             :   else
    1499           0 :     before_truncation = 0;
    1500             : 
    1501          38 :   SKIP_WHITE (before.start, text_buffer.end);
    1502             : 
    1503             :   /* The tail could not take more columns than what has been left in the
    1504             :      left context field, and a gap is mandatory.  It starts after the
    1505             :      right context, and does not contain prefixed spaces.  It ends at
    1506             :      the end of line, the end of buffer or when the tail field is full,
    1507             :      whichever comes first.  It cannot contain only part of a word, and
    1508             :      has no suffixed spaces.  */
    1509             : 
    1510             :   tail_max_width
    1511          38 :     = before_max_width - (before.end - before.start) - gap_size;
    1512             : 
    1513          38 :   if (tail_max_width > 0)
    1514             :     {
    1515          38 :       tail.start = keyafter.end;
    1516          38 :       SKIP_WHITE (tail.start, text_buffer.end);
    1517             : 
    1518          38 :       tail.end = tail.start;
    1519          38 :       cursor = tail.end;
    1520          76 :       while (cursor < right_context_end
    1521           0 :              && cursor < tail.start + tail_max_width)
    1522             :         {
    1523           0 :           tail.end = cursor;
    1524           0 :           SKIP_SOMETHING (cursor, right_context_end);
    1525             :         }
    1526             : 
    1527          38 :       if (cursor < tail.start + tail_max_width)
    1528          38 :         tail.end = cursor;
    1529             : 
    1530          38 :       if (tail.end > tail.start)
    1531             :         {
    1532           0 :           keyafter_truncation = 0;
    1533           0 :           tail_truncation = truncation_string && tail.end < right_context_end;
    1534             :         }
    1535             :       else
    1536          38 :         tail_truncation = 0;
    1537             : 
    1538          38 :       SKIP_WHITE_BACKWARDS (tail.end, tail.start);
    1539             :     }
    1540             :   else
    1541             :     {
    1542             : 
    1543             :       /* No place left for a tail field.  */
    1544             : 
    1545           0 :       tail.start = NULL;
    1546           0 :       tail.end = NULL;
    1547           0 :       tail_truncation = 0;
    1548             :     }
    1549             : 
    1550             :   /* `head' could not take more columns than what has been left in the right
    1551             :      context field, and a gap is mandatory.  It ends before the left
    1552             :      context, and does not contain suffixed spaces.  Its pointer is advanced
    1553             :      until the head field has shrunk to its allowed width.  It cannot
    1554             :      contain only part of a word, and has no suffixed spaces.  */
    1555             : 
    1556             :   head_max_width
    1557          38 :     = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size;
    1558             : 
    1559          38 :   if (head_max_width > 0)
    1560             :     {
    1561          38 :       head.end = before.start;
    1562          38 :       SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
    1563             : 
    1564          38 :       head.start = left_field_start;
    1565          76 :       while (head.start + head_max_width < head.end)
    1566           0 :         SKIP_SOMETHING (head.start, head.end);
    1567             : 
    1568          38 :       if (head.end > head.start)
    1569             :         {
    1570           0 :           before_truncation = 0;
    1571           0 :           head_truncation = (truncation_string
    1572           0 :                              && head.start > left_context_start);
    1573             :         }
    1574             :       else
    1575          38 :         head_truncation = 0;
    1576             : 
    1577          38 :       SKIP_WHITE (head.start, head.end);
    1578             :     }
    1579             :   else
    1580             :     {
    1581             : 
    1582             :       /* No place left for a head field.  */
    1583             : 
    1584           0 :       head.start = NULL;
    1585           0 :       head.end = NULL;
    1586           0 :       head_truncation = 0;
    1587             :     }
    1588             : 
    1589          38 :   if (auto_reference)
    1590             :     {
    1591             : 
    1592             :       /* Construct the reference text in preallocated space from the file
    1593             :          name and the line number.  Find out in which file the reference
    1594             :          occurred.  Standard input yields an empty file name.  Insure line
    1595             :          numbers are one based, even if they are computed zero based.  */
    1596             : 
    1597           2 :       file_index = 0;
    1598           4 :       while (file_line_count[file_index] < occurs->reference)
    1599           0 :         file_index++;
    1600             : 
    1601           2 :       file_name = input_file_name[file_index];
    1602           2 :       if (!file_name)
    1603           2 :         file_name = "";
    1604             : 
    1605           2 :       line_ordinal = occurs->reference + 1;
    1606           2 :       if (file_index > 0)
    1607           0 :         line_ordinal -= file_line_count[file_index - 1];
    1608             : 
    1609           2 :       sprintf (reference.start, "%s:%d", file_name, line_ordinal);
    1610           2 :       reference.end = reference.start + strlen (reference.start);
    1611             :     }
    1612          36 :   else if (input_reference)
    1613             :     {
    1614             : 
    1615             :       /* Reference starts at saved position for reference and extends right
    1616             :          until some white space is met.  */
    1617             : 
    1618           0 :       reference.start = keyafter.start + (DELTA) occurs->reference;
    1619           0 :       reference.end = reference.start;
    1620           0 :       SKIP_NON_WHITE (reference.end, right_context_end);
    1621             :     }
    1622          38 : }
    1623             : 
    1624             : /* Formatting and actual output - control routines.  */
    1625             : 
    1626             : /*----------------------------------------------------------------------.
    1627             : | Output the current output fields as one line for `troff' or `nroff'.  |
    1628             : `----------------------------------------------------------------------*/
    1629             : 
    1630             : static void
    1631           3 : output_one_roff_line (void)
    1632             : {
    1633             :   /* Output the `tail' field.  */
    1634             : 
    1635           3 :   printf (".%s \"", macro_name);
    1636           3 :   print_field (tail);
    1637           3 :   if (tail_truncation)
    1638           0 :     fputs (truncation_string, stdout);
    1639           3 :   putchar ('"');
    1640             : 
    1641             :   /* Output the `before' field.  */
    1642             : 
    1643           3 :   fputs (" \"", stdout);
    1644           3 :   if (before_truncation)
    1645           0 :     fputs (truncation_string, stdout);
    1646           3 :   print_field (before);
    1647           3 :   putchar ('"');
    1648             : 
    1649             :   /* Output the `keyafter' field.  */
    1650             : 
    1651           3 :   fputs (" \"", stdout);
    1652           3 :   print_field (keyafter);
    1653           3 :   if (keyafter_truncation)
    1654           0 :     fputs (truncation_string, stdout);
    1655           3 :   putchar ('"');
    1656             : 
    1657             :   /* Output the `head' field.  */
    1658             : 
    1659           3 :   fputs (" \"", stdout);
    1660           3 :   if (head_truncation)
    1661           0 :     fputs (truncation_string, stdout);
    1662           3 :   print_field (head);
    1663           3 :   putchar ('"');
    1664             : 
    1665             :   /* Conditionally output the `reference' field.  */
    1666             : 
    1667           3 :   if (auto_reference | input_reference)
    1668             :     {
    1669           2 :       fputs (" \"", stdout);
    1670           2 :       print_field (reference);
    1671           2 :       putchar ('"');
    1672             :     }
    1673             : 
    1674           3 :   putchar ('\n');
    1675           3 : }
    1676             : 
    1677             : /*---------------------------------------------------------.
    1678             : | Output the current output fields as one line for `TeX'.  |
    1679             : `---------------------------------------------------------*/
    1680             : 
    1681             : static void
    1682           0 : output_one_tex_line (void)
    1683             : {
    1684             :   BLOCK key;                    /* key field, isolated */
    1685             :   BLOCK after;                  /* after field, isolated */
    1686             :   char *cursor;                 /* running cursor in source text */
    1687             : 
    1688           0 :   printf ("\\%s ", macro_name);
    1689           0 :   putchar ('{');
    1690           0 :   print_field (tail);
    1691           0 :   fputs ("}{", stdout);
    1692           0 :   print_field (before);
    1693           0 :   fputs ("}{", stdout);
    1694           0 :   key.start = keyafter.start;
    1695           0 :   after.end = keyafter.end;
    1696           0 :   cursor = keyafter.start;
    1697           0 :   SKIP_SOMETHING (cursor, keyafter.end);
    1698           0 :   key.end = cursor;
    1699           0 :   after.start = cursor;
    1700           0 :   print_field (key);
    1701           0 :   fputs ("}{", stdout);
    1702           0 :   print_field (after);
    1703           0 :   fputs ("}{", stdout);
    1704           0 :   print_field (head);
    1705           0 :   putchar ('}');
    1706           0 :   if (auto_reference | input_reference)
    1707             :     {
    1708           0 :       putchar ('{');
    1709           0 :       print_field (reference);
    1710           0 :       putchar ('}');
    1711             :     }
    1712           0 :   putchar ('\n');
    1713           0 : }
    1714             : 
    1715             : /*-------------------------------------------------------------------.
    1716             : | Output the current output fields as one line for a dumb terminal.  |
    1717             : `-------------------------------------------------------------------*/
    1718             : 
    1719             : static void
    1720          35 : output_one_dumb_line (void)
    1721             : {
    1722          35 :   if (!right_reference)
    1723             :     {
    1724          35 :       if (auto_reference)
    1725             :         {
    1726             : 
    1727             :           /* Output the `reference' field, in such a way that GNU emacs
    1728             :              next-error will handle it.  The ending colon is taken from the
    1729             :              gap which follows.  */
    1730             : 
    1731           0 :           print_field (reference);
    1732           0 :           putchar (':');
    1733           0 :           print_spaces (reference_max_width
    1734           0 :                         + gap_size
    1735           0 :                         - (reference.end - reference.start)
    1736           0 :                         - 1);
    1737             :         }
    1738             :       else
    1739             :         {
    1740             : 
    1741             :           /* Output the `reference' field and its following gap.  */
    1742             : 
    1743          35 :           print_field (reference);
    1744          35 :           print_spaces (reference_max_width
    1745          35 :                         + gap_size
    1746          35 :                         - (reference.end - reference.start));
    1747             :         }
    1748             :     }
    1749             : 
    1750          35 :   if (tail.start < tail.end)
    1751             :     {
    1752             :       /* Output the `tail' field.  */
    1753             : 
    1754           0 :       print_field (tail);
    1755           0 :       if (tail_truncation)
    1756           0 :         fputs (truncation_string, stdout);
    1757             : 
    1758           0 :       print_spaces (half_line_width - gap_size
    1759           0 :                     - (before.end - before.start)
    1760           0 :                     - (before_truncation ? truncation_string_length : 0)
    1761           0 :                     - (tail.end - tail.start)
    1762           0 :                     - (tail_truncation ? truncation_string_length : 0));
    1763             :     }
    1764             :   else
    1765          70 :     print_spaces (half_line_width - gap_size
    1766          35 :                   - (before.end - before.start)
    1767          35 :                   - (before_truncation ? truncation_string_length : 0));
    1768             : 
    1769             :   /* Output the `before' field.  */
    1770             : 
    1771          35 :   if (before_truncation)
    1772           0 :     fputs (truncation_string, stdout);
    1773          35 :   print_field (before);
    1774             : 
    1775          35 :   print_spaces (gap_size);
    1776             : 
    1777             :   /* Output the `keyafter' field.  */
    1778             : 
    1779          35 :   print_field (keyafter);
    1780          35 :   if (keyafter_truncation)
    1781           0 :     fputs (truncation_string, stdout);
    1782             : 
    1783          35 :   if (head.start < head.end)
    1784             :     {
    1785             :       /* Output the `head' field.  */
    1786             : 
    1787           0 :       print_spaces (half_line_width
    1788           0 :                     - (keyafter.end - keyafter.start)
    1789           0 :                     - (keyafter_truncation ? truncation_string_length : 0)
    1790           0 :                     - (head.end - head.start)
    1791           0 :                     - (head_truncation ? truncation_string_length : 0));
    1792           0 :       if (head_truncation)
    1793           0 :         fputs (truncation_string, stdout);
    1794           0 :       print_field (head);
    1795             :     }
    1796             :   else
    1797             : 
    1798          35 :     if ((auto_reference | input_reference) & right_reference)
    1799           0 :       print_spaces (half_line_width
    1800           0 :                     - (keyafter.end - keyafter.start)
    1801           0 :                     - (keyafter_truncation ? truncation_string_length : 0));
    1802             : 
    1803          35 :   if ((auto_reference | input_reference) & right_reference)
    1804             :     {
    1805             :       /* Output the `reference' field.  */
    1806             : 
    1807           0 :       print_spaces (gap_size);
    1808           0 :       print_field (reference);
    1809             :     }
    1810             : 
    1811          35 :   putchar ('\n');
    1812          35 : }
    1813             : 
    1814             : /*------------------------------------------------------------------------.
    1815             : | Scan the whole occurs table and, for each entry, output one line in the |
    1816             : | appropriate format.                                                     |
    1817             : `------------------------------------------------------------------------*/
    1818             : 
    1819             : static void
    1820          59 : generate_all_output (void)
    1821             : {
    1822             :   size_t occurs_index;          /* index of keyword entry being processed */
    1823             :   OCCURS *occurs_cursor;        /* current keyword entry being processed */
    1824             : 
    1825             :   /* The following assignments are useful to provide default values in case
    1826             :      line contexts or references are not used, in which case these variables
    1827             :      would never be computed.  */
    1828             : 
    1829          59 :   tail.start = NULL;
    1830          59 :   tail.end = NULL;
    1831          59 :   tail_truncation = 0;
    1832             : 
    1833          59 :   head.start = NULL;
    1834          59 :   head.end = NULL;
    1835          59 :   head_truncation = 0;
    1836             : 
    1837             :   /* Loop over all keyword occurrences.  */
    1838             : 
    1839          59 :   occurs_cursor = occurs_table[0];
    1840             : 
    1841          97 :   for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
    1842             :     {
    1843             :       /* Compute the exact size of every field and whenever truncation flags
    1844             :          are present or not.  */
    1845             : 
    1846          38 :       define_all_fields (occurs_cursor);
    1847             : 
    1848             :       /* Produce one output line according to selected format.  */
    1849             : 
    1850          38 :       switch (output_format)
    1851             :         {
    1852          35 :         case UNKNOWN_FORMAT:
    1853             :           /* Should never happen.  */
    1854             : 
    1855             :         case DUMB_FORMAT:
    1856          35 :           output_one_dumb_line ();
    1857          35 :           break;
    1858             : 
    1859           3 :         case ROFF_FORMAT:
    1860           3 :           output_one_roff_line ();
    1861           3 :           break;
    1862             : 
    1863           0 :         case TEX_FORMAT:
    1864           0 :           output_one_tex_line ();
    1865           0 :           break;
    1866             :         }
    1867             : 
    1868             :       /* Advance the cursor into the occurs table.  */
    1869             : 
    1870          38 :       occurs_cursor++;
    1871             :     }
    1872          59 : }
    1873             : 
    1874             : /* Option decoding and main program.  */
    1875             : 
    1876             : /*------------------------------------------------------.
    1877             : | Print program identification and options, then exit.  |
    1878             : `------------------------------------------------------*/
    1879             : 
    1880             : void
    1881           7 : usage (int status)
    1882             : {
    1883           7 :   if (status != EXIT_SUCCESS)
    1884           7 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
    1885             :              program_name);
    1886             :   else
    1887             :     {
    1888           0 :       printf (_("\
    1889             : Usage: %s [OPTION]... [INPUT]...   (without -G)\n\
    1890             :   or:  %s -G [OPTION]... [INPUT [OUTPUT]]\n"),
    1891             :               program_name, program_name);
    1892           0 :       fputs (_("\
    1893             : Output a permuted index, including context, of the words in the input files.\n\
    1894             : \n\
    1895             : "), stdout);
    1896           0 :       fputs (_("\
    1897             : Mandatory arguments to long options are mandatory for short options too.\n\
    1898             : "), stdout);
    1899           0 :       fputs (_("\
    1900             :   -A, --auto-reference           output automatically generated references\n\
    1901             :   -G, --traditional              behave more like System V `ptx'\n\
    1902             :   -F, --flag-truncation=STRING   use STRING for flagging line truncations\n\
    1903             : "), stdout);
    1904           0 :       fputs (_("\
    1905             :   -M, --macro-name=STRING        macro name to use instead of `xx'\n\
    1906             :   -O, --format=roff              generate output as roff directives\n\
    1907             :   -R, --right-side-refs          put references at right, not counted in -w\n\
    1908             :   -S, --sentence-regexp=REGEXP   for end of lines or end of sentences\n\
    1909             :   -T, --format=tex               generate output as TeX directives\n\
    1910             : "), stdout);
    1911           0 :       fputs (_("\
    1912             :   -W, --word-regexp=REGEXP       use REGEXP to match each keyword\n\
    1913             :   -b, --break-file=FILE          word break characters in this FILE\n\
    1914             :   -f, --ignore-case              fold lower case to upper case for sorting\n\
    1915             :   -g, --gap-size=NUMBER          gap size in columns between output fields\n\
    1916             :   -i, --ignore-file=FILE         read ignore word list from FILE\n\
    1917             :   -o, --only-file=FILE           read only word list from this FILE\n\
    1918             : "), stdout);
    1919           0 :       fputs (_("\
    1920             :   -r, --references               first field of each line is a reference\n\
    1921             :   -t, --typeset-mode               - not implemented -\n\
    1922             :   -w, --width=NUMBER             output width in columns, reference excluded\n\
    1923             : "), stdout);
    1924           0 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
    1925           0 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
    1926           0 :       fputs (_("\
    1927             : \n\
    1928             : With no FILE or if FILE is -, read Standard Input.  `-F /' by default.\n\
    1929             : "), stdout);
    1930           0 :       emit_bug_reporting_address ();
    1931             :     }
    1932           7 :   exit (status);
    1933             : }
    1934             : 
    1935             : /*----------------------------------------------------------------------.
    1936             : | Main program.  Decode ARGC arguments passed through the ARGV array of |
    1937             : | strings, then launch execution.                                       |
    1938             : `----------------------------------------------------------------------*/
    1939             : 
    1940             : /* Long options equivalences.  */
    1941             : static const struct option long_options[] =
    1942             : {
    1943             :   {"auto-reference", no_argument, NULL, 'A'},
    1944             :   {"break-file", required_argument, NULL, 'b'},
    1945             :   {"flag-truncation", required_argument, NULL, 'F'},
    1946             :   {"ignore-case", no_argument, NULL, 'f'},
    1947             :   {"gap-size", required_argument, NULL, 'g'},
    1948             :   {"ignore-file", required_argument, NULL, 'i'},
    1949             :   {"macro-name", required_argument, NULL, 'M'},
    1950             :   {"only-file", required_argument, NULL, 'o'},
    1951             :   {"references", no_argument, NULL, 'r'},
    1952             :   {"right-side-refs", no_argument, NULL, 'R'},
    1953             :   {"format", required_argument, NULL, 10},
    1954             :   {"sentence-regexp", required_argument, NULL, 'S'},
    1955             :   {"traditional", no_argument, NULL, 'G'},
    1956             :   {"typeset-mode", no_argument, NULL, 't'},
    1957             :   {"width", required_argument, NULL, 'w'},
    1958             :   {"word-regexp", required_argument, NULL, 'W'},
    1959             :   {GETOPT_HELP_OPTION_DECL},
    1960             :   {GETOPT_VERSION_OPTION_DECL},
    1961             :   {NULL, 0, NULL, 0},
    1962             : };
    1963             : 
    1964             : static char const* const format_args[] =
    1965             : {
    1966             :   "roff", "tex", NULL
    1967             : };
    1968             : 
    1969             : static enum Format const format_vals[] =
    1970             : {
    1971             :   ROFF_FORMAT, TEX_FORMAT
    1972             : };
    1973             : 
    1974             : int
    1975          83 : main (int argc, char **argv)
    1976             : {
    1977             :   int optchar;                  /* argument character */
    1978             :   int file_index;               /* index in text input file arrays */
    1979             : 
    1980             :   /* Decode program options.  */
    1981             : 
    1982             :   initialize_main (&argc, &argv);
    1983          83 :   program_name = argv[0];
    1984          83 :   setlocale (LC_ALL, "");
    1985             :   bindtextdomain (PACKAGE, LOCALEDIR);
    1986             :   textdomain (PACKAGE);
    1987             : 
    1988          83 :   atexit (close_stdout);
    1989             : 
    1990             : #if HAVE_SETCHRCLASS
    1991             :   setchrclass (NULL);
    1992             : #endif
    1993             : 
    1994          83 :   while (optchar = getopt_long (argc, argv, "AF:GM:ORS:TW:b:i:fg:o:trw:",
    1995             :                                 long_options, NULL),
    1996             :          optchar != EOF)
    1997             :     {
    1998          44 :       switch (optchar)
    1999             :         {
    2000           7 :         default:
    2001           7 :           usage (EXIT_FAILURE);
    2002             : 
    2003           3 :         case 'G':
    2004           3 :           gnu_extensions = false;
    2005           3 :           break;
    2006             : 
    2007           2 :         case 'b':
    2008           2 :           break_file = optarg;
    2009           2 :           break;
    2010             : 
    2011           0 :         case 'f':
    2012           0 :           ignore_case = true;
    2013           0 :           break;
    2014             : 
    2015           3 :         case 'g':
    2016             :           {
    2017             :             unsigned long int tmp_ulong;
    2018           3 :             if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) != LONGINT_OK
    2019           0 :                 || ! (0 < tmp_ulong && tmp_ulong <= INT_MAX))
    2020           3 :               error (EXIT_FAILURE, 0, _("invalid gap width: %s"),
    2021             :                      quotearg (optarg));
    2022           0 :             gap_size = tmp_ulong;
    2023           0 :             break;
    2024             :           }
    2025             : 
    2026           0 :         case 'i':
    2027           0 :           ignore_file = optarg;
    2028           0 :           break;
    2029             : 
    2030           0 :         case 'o':
    2031           0 :           only_file = optarg;
    2032           0 :           break;
    2033             : 
    2034           0 :         case 'r':
    2035           0 :           input_reference = true;
    2036           0 :           break;
    2037             : 
    2038           0 :         case 't':
    2039             :           /* Yet to understand...  */
    2040           0 :           break;
    2041             : 
    2042           0 :         case 'w':
    2043             :           {
    2044             :             unsigned long int tmp_ulong;
    2045           0 :             if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) != LONGINT_OK
    2046           0 :                 || ! (0 < tmp_ulong && tmp_ulong <= INT_MAX))
    2047           0 :               error (EXIT_FAILURE, 0, _("invalid line width: %s"),
    2048             :                      quotearg (optarg));
    2049           0 :             line_width = tmp_ulong;
    2050           0 :             break;
    2051             :           }
    2052             : 
    2053          11 :         case 'A':
    2054          11 :           auto_reference = true;
    2055          11 :           break;
    2056             : 
    2057          18 :         case 'F':
    2058          18 :           truncation_string = copy_unescaped_string (optarg);
    2059          18 :           break;
    2060             : 
    2061           0 :         case 'M':
    2062           0 :           macro_name = optarg;
    2063           0 :           break;
    2064             : 
    2065           0 :         case 'O':
    2066           0 :           output_format = ROFF_FORMAT;
    2067           0 :           break;
    2068             : 
    2069           0 :         case 'R':
    2070           0 :           right_reference = true;
    2071           0 :           break;
    2072             : 
    2073           0 :         case 'S':
    2074           0 :           context_regex.string = copy_unescaped_string (optarg);
    2075           0 :           break;
    2076             : 
    2077           0 :         case 'T':
    2078           0 :           output_format = TEX_FORMAT;
    2079           0 :           break;
    2080             : 
    2081           0 :         case 'W':
    2082           0 :           word_regex.string = copy_unescaped_string (optarg);
    2083           0 :           if (!*word_regex.string)
    2084           0 :             word_regex.string = NULL;
    2085           0 :           break;
    2086             : 
    2087           0 :         case 10:
    2088           0 :           output_format = XARGMATCH ("--format", optarg,
    2089             :                                      format_args, format_vals);
    2090           0 :         case_GETOPT_HELP_CHAR;
    2091             : 
    2092           0 :         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
    2093             :         }
    2094             :     }
    2095             : 
    2096             :   /* Process remaining arguments.  If GNU extensions are enabled, process
    2097             :      all arguments as input parameters.  If disabled, accept at most two
    2098             :      arguments, the second of which is an output parameter.  */
    2099             : 
    2100          73 :   if (optind == argc)
    2101             :     {
    2102             : 
    2103             :       /* No more argument simply means: read standard input.  */
    2104             : 
    2105          39 :       input_file_name = xmalloc (sizeof *input_file_name);
    2106          39 :       file_line_count = xmalloc (sizeof *file_line_count);
    2107          39 :       number_input_files = 1;
    2108          39 :       input_file_name[0] = NULL;
    2109             :     }
    2110          34 :   else if (gnu_extensions)
    2111             :     {
    2112          32 :       number_input_files = argc - optind;
    2113          32 :       input_file_name = xmalloc (number_input_files * sizeof *input_file_name);
    2114          32 :       file_line_count = xmalloc (number_input_files * sizeof *file_line_count);
    2115             : 
    2116          78 :       for (file_index = 0; file_index < number_input_files; file_index++)
    2117             :         {
    2118          46 :           input_file_name[file_index] = argv[optind];
    2119          46 :           if (!*argv[optind] || STREQ (argv[optind], "-"))
    2120          42 :             input_file_name[0] = NULL;
    2121             :           else
    2122           4 :             input_file_name[0] = argv[optind];
    2123          46 :           optind++;
    2124             :         }
    2125             :     }
    2126             :   else
    2127             :     {
    2128             : 
    2129             :       /* There is one necessary input file.  */
    2130             : 
    2131           2 :       number_input_files = 1;
    2132           2 :       input_file_name = xmalloc (sizeof *input_file_name);
    2133           2 :       file_line_count = xmalloc (sizeof *file_line_count);
    2134           2 :       if (!*argv[optind] || STREQ (argv[optind], "-"))
    2135           2 :         input_file_name[0] = NULL;
    2136             :       else
    2137           0 :         input_file_name[0] = argv[optind];
    2138           2 :       optind++;
    2139             : 
    2140             :       /* Redirect standard output, only if requested.  */
    2141             : 
    2142           2 :       if (optind < argc)
    2143             :         {
    2144           0 :           if (! freopen (argv[optind], "w", stdout))
    2145           0 :             error (EXIT_FAILURE, errno, "%s", argv[optind]);
    2146           0 :           optind++;
    2147             :         }
    2148             : 
    2149             :       /* Diagnose any other argument as an error.  */
    2150             : 
    2151           2 :       if (optind < argc)
    2152             :         {
    2153           0 :           error (0, 0, _("extra operand %s"), quote (argv[optind]));
    2154           0 :           usage (EXIT_FAILURE);
    2155             :         }
    2156             :     }
    2157             : 
    2158             :   /* If the output format has not been explicitly selected, choose dumb
    2159             :      terminal format if GNU extensions are enabled, else `roff' format.  */
    2160             : 
    2161          73 :   if (output_format == UNKNOWN_FORMAT)
    2162          73 :     output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT;
    2163             : 
    2164             :   /* Initialize the main tables.  */
    2165             : 
    2166          73 :   initialize_regex ();
    2167             : 
    2168             :   /* Read `Break character' file, if any.  */
    2169             : 
    2170          73 :   if (break_file)
    2171           2 :     digest_break_file (break_file);
    2172             : 
    2173             :   /* Read `Ignore words' file and `Only words' files, if any.  If any of
    2174             :      these files is empty, reset the name of the file to NULL, to avoid
    2175             :      unnecessary calls to search_table. */
    2176             : 
    2177          73 :   if (ignore_file)
    2178             :     {
    2179           0 :       digest_word_file (ignore_file, &ignore_table);
    2180           0 :       if (ignore_table.length == 0)
    2181           0 :         ignore_file = NULL;
    2182             :     }
    2183             : 
    2184          73 :   if (only_file)
    2185             :     {
    2186           0 :       digest_word_file (only_file, &only_table);
    2187           0 :       if (only_table.length == 0)
    2188           0 :         only_file = NULL;
    2189             :     }
    2190             : 
    2191             :   /* Prepare to study all the input files.  */
    2192             : 
    2193          73 :   number_of_occurs[0] = 0;
    2194          73 :   total_line_count = 0;
    2195          73 :   maximum_word_length = 0;
    2196          73 :   reference_max_width = 0;
    2197             : 
    2198         143 :   for (file_index = 0; file_index < number_input_files; file_index++)
    2199             :     {
    2200             : 
    2201             :       /* Read the file in core, than study it.  */
    2202             : 
    2203          84 :       swallow_file_in_memory (input_file_name[file_index], &text_buffer);
    2204          70 :       find_occurs_in_text ();
    2205             : 
    2206             :       /* Maintain for each file how many lines has been read so far when its
    2207             :          end is reached.  Incrementing the count first is a simple kludge to
    2208             :          handle a possible incomplete line at end of file.  */
    2209             : 
    2210          70 :       total_line_count++;
    2211          70 :       file_line_count[file_index] = total_line_count;
    2212             :     }
    2213             : 
    2214             :   /* Do the output process phase.  */
    2215             : 
    2216          59 :   sort_found_occurs ();
    2217          59 :   fix_output_parameters ();
    2218          59 :   generate_all_output ();
    2219             : 
    2220             :   /* All done.  */
    2221             : 
    2222          59 :   exit (EXIT_SUCCESS);
    2223             : }

Generated by: LCOV version 1.10