LCOV - code coverage report
Current view: top level - src - fmt.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 284 325 87.4 %
Date: 2018-01-30 Functions: 19 20 95.0 %

          Line data    Source code
       1             : /* GNU fmt -- simple text formatter.
       2             :    Copyright (C) 1994-2006 Free Software Foundation, Inc.
       3             : 
       4             :    This program is free software: you can redistribute it and/or modify
       5             :    it under the terms of the GNU General Public License as published by
       6             :    the Free Software Foundation, either version 3 of the License, or
       7             :    (at your option) any later version.
       8             : 
       9             :    This program is distributed in the hope that it will be useful,
      10             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :    GNU General Public License for more details.
      13             : 
      14             :    You should have received a copy of the GNU General Public License
      15             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      16             : 
      17             : /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
      18             : 
      19             : #include <config.h>
      20             : #include <stdio.h>
      21             : #include <sys/types.h>
      22             : #include <getopt.h>
      23             : 
      24             : /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
      25             :    it to be a type get syntax errors for the variable declaration below.  */
      26             : #define word unused_word_type
      27             : 
      28             : #include "system.h"
      29             : #include "error.h"
      30             : #include "quote.h"
      31             : #include "xstrtol.h"
      32             : 
      33             : /* The official name of this program (e.g., no `g' prefix).  */
      34             : #define PROGRAM_NAME "fmt"
      35             : 
      36             : #define AUTHORS "Ross Paterson"
      37             : 
      38             : /* The following parameters represent the program's idea of what is
      39             :    "best".  Adjust to taste, subject to the caveats given.  */
      40             : 
      41             : /* Default longest permitted line length (max_width).  */
      42             : #define WIDTH   75
      43             : 
      44             : /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
      45             :    room for optimization.  */
      46             : #define LEEWAY  7
      47             : 
      48             : /* The default secondary indent of tagged paragraph used for unindented
      49             :    one-line paragraphs not preceded by any multi-line paragraphs.  */
      50             : #define DEF_INDENT 3
      51             : 
      52             : /* Costs and bonuses are expressed as the equivalent departure from the
      53             :    optimal line length, multiplied by 10.  e.g. assigning something a
      54             :    cost of 50 means that it is as bad as a line 5 characters too short
      55             :    or too long.  The definition of SHORT_COST(n) should not be changed.
      56             :    However, EQUIV(n) may need tuning.  */
      57             : 
      58             : /* FIXME: "fmt" misbehaves given large inputs or options.  One
      59             :    possible workaround for part of the problem is to change COST to be
      60             :    a floating-point type.  There are other problems besides COST,
      61             :    though; see MAXWORDS below.  */
      62             : 
      63             : typedef long int COST;
      64             : 
      65             : #define MAXCOST TYPE_MAXIMUM (COST)
      66             : 
      67             : #define SQR(n)          ((n) * (n))
      68             : #define EQUIV(n)        SQR ((COST) (n))
      69             : 
      70             : /* Cost of a filled line n chars longer or shorter than best_width.  */
      71             : #define SHORT_COST(n)   EQUIV ((n) * 10)
      72             : 
      73             : /* Cost of the difference between adjacent filled lines.  */
      74             : #define RAGGED_COST(n)  (SHORT_COST (n) / 2)
      75             : 
      76             : /* Basic cost per line.  */
      77             : #define LINE_COST       EQUIV (70)
      78             : 
      79             : /* Cost of breaking a line after the first word of a sentence, where
      80             :    the length of the word is N.  */
      81             : #define WIDOW_COST(n)   (EQUIV (200) / ((n) + 2))
      82             : 
      83             : /* Cost of breaking a line before the last word of a sentence, where
      84             :    the length of the word is N.  */
      85             : #define ORPHAN_COST(n)  (EQUIV (150) / ((n) + 2))
      86             : 
      87             : /* Bonus for breaking a line at the end of a sentence.  */
      88             : #define SENTENCE_BONUS  EQUIV (50)
      89             : 
      90             : /* Cost of breaking a line after a period not marking end of a sentence.
      91             :    With the definition of sentence we are using (borrowed from emacs, see
      92             :    get_line()) such a break would then look like a sentence break.  Hence
      93             :    we assign a very high cost -- it should be avoided unless things are
      94             :    really bad.  */
      95             : #define NOBREAK_COST    EQUIV (600)
      96             : 
      97             : /* Bonus for breaking a line before open parenthesis.  */
      98             : #define PAREN_BONUS     EQUIV (40)
      99             : 
     100             : /* Bonus for breaking a line after other punctuation.  */
     101             : #define PUNCT_BONUS     EQUIV(40)
     102             : 
     103             : /* Credit for breaking a long paragraph one line later.  */
     104             : #define LINE_CREDIT     EQUIV(3)
     105             : 
     106             : /* Size of paragraph buffer, in words and characters.  Longer paragraphs
     107             :    are handled neatly (cf. flush_paragraph()), so long as these values
     108             :    are considerably greater than required by the width.  These values
     109             :    cannot be extended indefinitely: doing so would run into size limits
     110             :    and/or cause more overflows in cost calculations.  FIXME: Remove these
     111             :    arbitrary limits.  */
     112             : 
     113             : #define MAXWORDS        1000
     114             : #define MAXCHARS        5000
     115             : 
     116             : /* Extra ctype(3)-style macros.  */
     117             : 
     118             : #define isopen(c)       (strchr ("([`'\"", c) != NULL)
     119             : #define isclose(c)      (strchr (")]'\"", c) != NULL)
     120             : #define isperiod(c)     (strchr (".?!", c) != NULL)
     121             : 
     122             : /* Size of a tab stop, for expansion on input and re-introduction on
     123             :    output.  */
     124             : #define TABWIDTH        8
     125             : 
     126             : /* Word descriptor structure.  */
     127             : 
     128             : typedef struct Word WORD;
     129             : 
     130             : struct Word
     131             :   {
     132             : 
     133             :     /* Static attributes determined during input.  */
     134             : 
     135             :     const char *text;           /* the text of the word */
     136             :     int length;                 /* length of this word */
     137             :     int space;                  /* the size of the following space */
     138             :     unsigned int paren:1;       /* starts with open paren */
     139             :     unsigned int period:1;      /* ends in [.?!])* */
     140             :     unsigned int punct:1;       /* ends in punctuation */
     141             :     unsigned int final:1;       /* end of sentence */
     142             : 
     143             :     /* The remaining fields are computed during the optimization.  */
     144             : 
     145             :     int line_length;            /* length of the best line starting here */
     146             :     COST best_cost;             /* cost of best paragraph starting here */
     147             :     WORD *next_break;           /* break which achieves best_cost */
     148             :   };
     149             : 
     150             : /* Forward declarations.  */
     151             : 
     152             : static void set_prefix (char *p);
     153             : static void fmt (FILE *f);
     154             : static bool get_paragraph (FILE *f);
     155             : static int get_line (FILE *f, int c);
     156             : static int get_prefix (FILE *f);
     157             : static int get_space (FILE *f, int c);
     158             : static int copy_rest (FILE *f, int c);
     159             : static bool same_para (int c);
     160             : static void flush_paragraph (void);
     161             : static void fmt_paragraph (void);
     162             : static void check_punctuation (WORD *w);
     163             : static COST base_cost (WORD *this);
     164             : static COST line_cost (WORD *next, int len);
     165             : static void put_paragraph (WORD *finish);
     166             : static void put_line (WORD *w, int indent);
     167             : static void put_word (WORD *w);
     168             : static void put_space (int space);
     169             : 
     170             : /* The name this program was run with.  */
     171             : const char *program_name;
     172             : 
     173             : /* Option values.  */
     174             : 
     175             : /* If true, first 2 lines may have different indent (default false).  */
     176             : static bool crown;
     177             : 
     178             : /* If true, first 2 lines _must_ have different indent (default false).  */
     179             : static bool tagged;
     180             : 
     181             : /* If true, each line is a paragraph on its own (default false).  */
     182             : static bool split;
     183             : 
     184             : /* If true, don't preserve inter-word spacing (default false).  */
     185             : static bool uniform;
     186             : 
     187             : /* Prefix minus leading and trailing spaces (default "").  */
     188             : static const char *prefix;
     189             : 
     190             : /* User-supplied maximum line width (default WIDTH).  The only output
     191             :    lines longer than this will each comprise a single word.  */
     192             : static int max_width;
     193             : 
     194             : /* Values derived from the option values.  */
     195             : 
     196             : /* The length of prefix minus leading space.  */
     197             : static int prefix_full_length;
     198             : 
     199             : /* The length of the leading space trimmed from the prefix.  */
     200             : static int prefix_lead_space;
     201             : 
     202             : /* The length of prefix minus leading and trailing space.  */
     203             : static int prefix_length;
     204             : 
     205             : /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
     206             : static int best_width;
     207             : 
     208             : /* Dynamic variables.  */
     209             : 
     210             : /* Start column of the character most recently read from the input file.  */
     211             : static int in_column;
     212             : 
     213             : /* Start column of the next character to be written to stdout.  */
     214             : static int out_column;
     215             : 
     216             : /* Space for the paragraph text -- longer paragraphs are handled neatly
     217             :    (cf. flush_paragraph()).  */
     218             : static char parabuf[MAXCHARS];
     219             : 
     220             : /* A pointer into parabuf, indicating the first unused character position.  */
     221             : static char *wptr;
     222             : 
     223             : /* The words of a paragraph -- longer paragraphs are handled neatly
     224             :    (cf. flush_paragraph()).  */
     225             : static WORD word[MAXWORDS];
     226             : 
     227             : /* A pointer into the above word array, indicating the first position
     228             :    after the last complete word.  Sometimes it will point at an incomplete
     229             :    word.  */
     230             : static WORD *word_limit;
     231             : 
     232             : /* If true, current input file contains tab characters, and so tabs can be
     233             :    used for white space on output.  */
     234             : static bool tabs;
     235             : 
     236             : /* Space before trimmed prefix on each line of the current paragraph.  */
     237             : static int prefix_indent;
     238             : 
     239             : /* Indentation of the first line of the current paragraph.  */
     240             : static int first_indent;
     241             : 
     242             : /* Indentation of other lines of the current paragraph */
     243             : static int other_indent;
     244             : 
     245             : /* To detect the end of a paragraph, we need to look ahead to the first
     246             :    non-blank character after the prefix on the next line, or the first
     247             :    character on the following line that failed to match the prefix.
     248             :    We can reconstruct the lookahead from that character (next_char), its
     249             :    position on the line (in_column) and the amount of space before the
     250             :    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
     251             : 
     252             : /* The last character read from the input file.  */
     253             : static int next_char;
     254             : 
     255             : /* The space before the trimmed prefix (or part of it) on the next line
     256             :    after the current paragraph.  */
     257             : static int next_prefix_indent;
     258             : 
     259             : /* If nonzero, the length of the last line output in the current
     260             :    paragraph, used to charge for raggedness at the split point for long
     261             :    paragraphs chosen by fmt_paragraph().  */
     262             : static int last_line_length;
     263             : 
     264             : void
     265          10 : usage (int status)
     266             : {
     267          10 :   if (status != EXIT_SUCCESS)
     268          10 :     fprintf (stderr, _("Try `%s --help' for more information.\n"),
     269             :              program_name);
     270             :   else
     271             :     {
     272           0 :       printf (_("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n"), program_name);
     273           0 :       fputs (_("\
     274             : Reformat each paragraph in the FILE(s), writing to standard output.\n\
     275             : If no FILE or if FILE is `-', read standard input.\n\
     276             : \n\
     277             : "), stdout);
     278           0 :       fputs (_("\
     279             : Mandatory arguments to long options are mandatory for short options too.\n\
     280             : "), stdout);
     281           0 :       fputs (_("\
     282             :   -c, --crown-margin        preserve indentation of first two lines\n\
     283             :   -p, --prefix=STRING       reformat only lines beginning with STRING,\n\
     284             :                               reattaching the prefix to reformatted lines\n\
     285             :   -s, --split-only          split long lines, but do not refill\n\
     286             : "),
     287             :              stdout);
     288           0 :       fputs (_("\
     289             :   -t, --tagged-paragraph    indentation of first line different from second\n\
     290             :   -u, --uniform-spacing     one space between words, two after sentences\n\
     291             :   -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
     292             : "), stdout);
     293           0 :       fputs (HELP_OPTION_DESCRIPTION, stdout);
     294           0 :       fputs (VERSION_OPTION_DESCRIPTION, stdout);
     295           0 :       fputs (_("\
     296             : \n\
     297             : With no FILE, or when FILE is -, read standard input.\n"),
     298             :              stdout);
     299           0 :       emit_bug_reporting_address ();
     300             :     }
     301          10 :   exit (status);
     302             : }
     303             : 
     304             : /* Decode options and launch execution.  */
     305             : 
     306             : static const struct option long_options[] =
     307             : {
     308             :   {"crown-margin", no_argument, NULL, 'c'},
     309             :   {"prefix", required_argument, NULL, 'p'},
     310             :   {"split-only", no_argument, NULL, 's'},
     311             :   {"tagged-paragraph", no_argument, NULL, 't'},
     312             :   {"uniform-spacing", no_argument, NULL, 'u'},
     313             :   {"width", required_argument, NULL, 'w'},
     314             :   {GETOPT_HELP_OPTION_DECL},
     315             :   {GETOPT_VERSION_OPTION_DECL},
     316             :   {NULL, 0, NULL, 0},
     317             : };
     318             : 
     319             : int
     320          99 : main (int argc, char **argv)
     321             : {
     322             :   int optchar;
     323          99 :   bool ok = true;
     324          99 :   char const *max_width_option = NULL;
     325             : 
     326             :   initialize_main (&argc, &argv);
     327          99 :   program_name = argv[0];
     328          99 :   setlocale (LC_ALL, "");
     329             :   bindtextdomain (PACKAGE, LOCALEDIR);
     330             :   textdomain (PACKAGE);
     331             : 
     332          99 :   atexit (close_stdout);
     333             : 
     334          99 :   crown = tagged = split = uniform = false;
     335          99 :   max_width = WIDTH;
     336          99 :   prefix = "";
     337          99 :   prefix_length = prefix_lead_space = prefix_full_length = 0;
     338             : 
     339          99 :   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     340             :     {
     341             :       /* Old option syntax; a dash followed by one or more digits.  */
     342          26 :       max_width_option = argv[1] + 1;
     343             : 
     344             :       /* Make the option we just parsed invisible to getopt.  */
     345          26 :       argv[1] = argv[0];
     346          26 :       argv++;
     347          26 :       argc--;
     348             :     }
     349             : 
     350         231 :   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
     351             :                                  long_options, NULL))
     352             :          != -1)
     353          43 :     switch (optchar)
     354             :       {
     355          10 :       default:
     356          10 :         if (ISDIGIT (optchar))
     357           4 :           error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
     358             :  only when it is the first\noption; use -w N instead"),
     359             :                  optchar);
     360          10 :         usage (EXIT_FAILURE);
     361             : 
     362           8 :       case 'c':
     363           8 :         crown = true;
     364           8 :         break;
     365             : 
     366           2 :       case 's':
     367           2 :         split = true;
     368           2 :         break;
     369             : 
     370           4 :       case 't':
     371           4 :         tagged = true;
     372           4 :         break;
     373             : 
     374           2 :       case 'u':
     375           2 :         uniform = true;
     376           2 :         break;
     377             : 
     378           5 :       case 'w':
     379           5 :         max_width_option = optarg;
     380           5 :         break;
     381             : 
     382          12 :       case 'p':
     383          12 :         set_prefix (optarg);
     384          12 :         break;
     385             : 
     386           0 :       case_GETOPT_HELP_CHAR;
     387             : 
     388           0 :       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
     389             : 
     390             :       }
     391             : 
     392          89 :   if (max_width_option)
     393             :     {
     394             :       /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
     395             :          output can be quite ugly.  */
     396             :       unsigned long int tmp;
     397          40 :       if (! (xstrtoul (max_width_option, NULL, 10, &tmp, "") == LONGINT_OK
     398          11 :              && tmp <= MAXCHARS / 2))
     399          19 :         error (EXIT_FAILURE, 0, _("invalid width: %s"),
     400             :                quote (max_width_option));
     401          10 :       max_width = tmp;
     402             :     }
     403             : 
     404          70 :   best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
     405             : 
     406          70 :   if (optind == argc)
     407          37 :     fmt (stdin);
     408             :   else
     409             :     {
     410          81 :       for (; optind < argc; optind++)
     411             :         {
     412          48 :           char *file = argv[optind];
     413          48 :           if (STREQ (file, "-"))
     414          26 :             fmt (stdin);
     415             :           else
     416             :             {
     417             :               FILE *in_stream;
     418          22 :               in_stream = fopen (file, "r");
     419          22 :               if (in_stream != NULL)
     420             :                 {
     421          11 :                   fmt (in_stream);
     422          11 :                   if (fclose (in_stream) == EOF)
     423             :                     {
     424           0 :                       error (0, errno, "%s", file);
     425           0 :                       ok = false;
     426             :                     }
     427             :                 }
     428             :               else
     429             :                 {
     430          11 :                   error (0, errno, _("cannot open %s for reading"),
     431             :                          quote (file));
     432          11 :                   ok = false;
     433             :                 }
     434             :             }
     435             :         }
     436             :     }
     437             : 
     438          70 :   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
     439             : }
     440             : 
     441             : /* Trim space from the front and back of the string P, yielding the prefix,
     442             :    and record the lengths of the prefix and the space trimmed.  */
     443             : 
     444             : static void
     445          12 : set_prefix (char *p)
     446             : {
     447             :   char *s;
     448             : 
     449          12 :   prefix_lead_space = 0;
     450          33 :   while (*p == ' ')
     451             :     {
     452           9 :       prefix_lead_space++;
     453           9 :       p++;
     454             :     }
     455          12 :   prefix = p;
     456          12 :   prefix_full_length = strlen (p);
     457          12 :   s = p + prefix_full_length;
     458          24 :   while (s > p && s[-1] == ' ')
     459           0 :     s--;
     460          12 :   *s = '\0';
     461          12 :   prefix_length = s - p;
     462          12 : }
     463             : 
     464             : /* read file F and send formatted output to stdout.  */
     465             : 
     466             : static void
     467          74 : fmt (FILE *f)
     468             : {
     469          74 :   tabs = false;
     470          74 :   other_indent = 0;
     471          74 :   next_char = get_prefix (f);
     472         182 :   while (get_paragraph (f))
     473             :     {
     474          34 :       fmt_paragraph ();
     475          34 :       put_paragraph (word_limit);
     476             :     }
     477          74 : }
     478             : 
     479             : /* Set the global variable `other_indent' according to SAME_PARAGRAPH
     480             :    and other global variables.  */
     481             : 
     482             : static void
     483          34 : set_other_indent (bool same_paragraph)
     484             : {
     485          34 :   if (split)
     486           1 :     other_indent = first_indent;
     487          33 :   else if (crown)
     488             :     {
     489           3 :       other_indent = (same_paragraph ? in_column : first_indent);
     490             :     }
     491          30 :   else if (tagged)
     492             :     {
     493           3 :       if (same_paragraph && in_column != first_indent)
     494             :         {
     495           1 :           other_indent = in_column;
     496             :         }
     497             : 
     498             :       /* Only one line: use the secondary indent from last time if it
     499             :          splits, or 0 if there have been no multi-line paragraphs in the
     500             :          input so far.  But if these rules make the two indents the same,
     501             :          pick a new secondary indent.  */
     502             : 
     503           2 :       else if (other_indent == first_indent)
     504           1 :         other_indent = first_indent == 0 ? DEF_INDENT : 0;
     505             :     }
     506             :   else
     507             :     {
     508          27 :       other_indent = first_indent;
     509             :     }
     510          34 : }
     511             : 
     512             : /* Read a paragraph from input file F.  A paragraph consists of a
     513             :    maximal number of non-blank (excluding any prefix) lines subject to:
     514             :    * In split mode, a paragraph is a single non-blank line.
     515             :    * In crown mode, the second and subsequent lines must have the
     516             :    same indentation, but possibly different from the indent of the
     517             :    first line.
     518             :    * Tagged mode is similar, but the first and second lines must have
     519             :    different indentations.
     520             :    * Otherwise, all lines of a paragraph must have the same indent.
     521             :    If a prefix is in effect, it must be present at the same indent for
     522             :    each line in the paragraph.
     523             : 
     524             :    Return false if end-of-file was encountered before the start of a
     525             :    paragraph, else true.  */
     526             : 
     527             : static bool
     528         108 : get_paragraph (FILE *f)
     529             : {
     530             :   int c;
     531             : 
     532         108 :   last_line_length = 0;
     533         108 :   c = next_char;
     534             : 
     535             :   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
     536             : 
     537         223 :   while (c == '\n' || c == EOF
     538          36 :          || next_prefix_indent < prefix_lead_space
     539          35 :          || in_column < next_prefix_indent + prefix_full_length)
     540             :     {
     541          81 :       c = copy_rest (f, c);
     542          81 :       if (c == EOF)
     543             :         {
     544          74 :           next_char = EOF;
     545          74 :           return false;
     546             :         }
     547           7 :       putchar ('\n');
     548           7 :       c = get_prefix (f);
     549             :     }
     550             : 
     551             :   /* Got a suitable first line for a paragraph.  */
     552             : 
     553          34 :   prefix_indent = next_prefix_indent;
     554          34 :   first_indent = in_column;
     555          34 :   wptr = parabuf;
     556          34 :   word_limit = word;
     557          34 :   c = get_line (f, c);
     558          34 :   set_other_indent (same_para (c));
     559             : 
     560             :   /* Read rest of paragraph (unless split is specified).  */
     561             : 
     562          34 :   if (split)
     563             :     {
     564             :       /* empty */
     565             :     }
     566          33 :   else if (crown)
     567             :     {
     568           3 :       if (same_para (c))
     569             :         {
     570             :           do
     571             :             {                   /* for each line till the end of the para */
     572           3 :               c = get_line (f, c);
     573             :             }
     574           3 :           while (same_para (c) && in_column == other_indent);
     575             :         }
     576             :     }
     577          30 :   else if (tagged)
     578             :     {
     579           3 :       if (same_para (c) && in_column != first_indent)
     580             :         {
     581             :           do
     582             :             {                   /* for each line till the end of the para */
     583           1 :               c = get_line (f, c);
     584             :             }
     585           1 :           while (same_para (c) && in_column == other_indent);
     586             :         }
     587             :     }
     588             :   else
     589             :     {
     590          58 :       while (same_para (c) && in_column == other_indent)
     591           4 :         c = get_line (f, c);
     592             :     }
     593          34 :   (word_limit - 1)->period = (word_limit - 1)->final = true;
     594          34 :   next_char = c;
     595          34 :   return true;
     596             : }
     597             : 
     598             : /* Copy to the output a line that failed to match the prefix, or that
     599             :    was blank after the prefix.  In the former case, C is the character
     600             :    that failed to match the prefix.  In the latter, C is \n or EOF.
     601             :    Return the character (\n or EOF) ending the line.  */
     602             : 
     603             : static int
     604          81 : copy_rest (FILE *f, int c)
     605             : {
     606             :   const char *s;
     607             : 
     608          81 :   out_column = 0;
     609          81 :   if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
     610             :     {
     611          39 :       put_space (next_prefix_indent);
     612          42 :       for (s = prefix; out_column != in_column && *s; out_column++)
     613           3 :         putchar (*s++);
     614          39 :       if (c != EOF && c != '\n')
     615           2 :         put_space (in_column - out_column);
     616          39 :       if (c == EOF && in_column >= next_prefix_indent + prefix_length)
     617          34 :         putchar ('\n');
     618             :     }
     619         164 :   while (c != '\n' && c != EOF)
     620             :     {
     621           2 :       putchar (c);
     622           2 :       c = getc (f);
     623             :     }
     624          81 :   return c;
     625             : }
     626             : 
     627             : /* Return true if a line whose first non-blank character after the
     628             :    prefix (if any) is C could belong to the current paragraph,
     629             :    otherwise false.  */
     630             : 
     631             : static bool
     632          75 : same_para (int c)
     633             : {
     634          75 :   return (next_prefix_indent == prefix_indent
     635          71 :           && in_column >= next_prefix_indent + prefix_full_length
     636         146 :           && c != '\n' && c != EOF);
     637             : }
     638             : 
     639             : /* Read a line from input file F, given first non-blank character C
     640             :    after the prefix, and the following indent, and break it into words.
     641             :    A word is a maximal non-empty string of non-white characters.  A word
     642             :    ending in [.?!]["')\]]* and followed by end-of-line or at least two
     643             :    spaces ends a sentence, as in emacs.
     644             : 
     645             :    Return the first non-blank character of the next line.  */
     646             : 
     647             : static int
     648          42 : get_line (FILE *f, int c)
     649             : {
     650             :   int start;
     651             :   char *end_of_parabuf;
     652             :   WORD *end_of_word;
     653             : 
     654          42 :   end_of_parabuf = &parabuf[MAXCHARS];
     655          42 :   end_of_word = &word[MAXWORDS - 2];
     656             : 
     657             :   do
     658             :     {                           /* for each word in a line */
     659             : 
     660             :       /* Scan word.  */
     661             : 
     662          55 :       word_limit->text = wptr;
     663             :       do
     664             :         {
     665         161 :           if (wptr == end_of_parabuf)
     666             :             {
     667           0 :               set_other_indent (true);
     668           0 :               flush_paragraph ();
     669             :             }
     670         161 :           *wptr++ = c;
     671         161 :           c = getc (f);
     672             :         }
     673         161 :       while (c != EOF && !isspace (c));
     674          55 :       in_column += word_limit->length = wptr - word_limit->text;
     675          55 :       check_punctuation (word_limit);
     676             : 
     677             :       /* Scan inter-word space.  */
     678             : 
     679          55 :       start = in_column;
     680          55 :       c = get_space (f, c);
     681          55 :       word_limit->space = in_column - start;
     682         110 :       word_limit->final = (c == EOF
     683         142 :                            || (word_limit->period
     684           4 :                                && (c == '\n' || word_limit->space > 1)));
     685          55 :       if (c == '\n' || c == EOF || uniform)
     686          43 :         word_limit->space = word_limit->final ? 2 : 1;
     687          55 :       if (word_limit == end_of_word)
     688             :         {
     689           0 :           set_other_indent (true);
     690           0 :           flush_paragraph ();
     691             :         }
     692          55 :       word_limit++;
     693             :     }
     694          55 :   while (c != '\n' && c != EOF);
     695          42 :   return get_prefix (f);
     696             : }
     697             : 
     698             : /* Read a prefix from input file F.  Return either first non-matching
     699             :    character, or first non-blank character after the prefix.  */
     700             : 
     701             : static int
     702         123 : get_prefix (FILE *f)
     703             : {
     704             :   int c;
     705             : 
     706         123 :   in_column = 0;
     707         123 :   c = get_space (f, getc (f));
     708         123 :   if (prefix_length == 0)
     709         115 :     next_prefix_indent = prefix_lead_space < in_column ?
     710         115 :       prefix_lead_space : in_column;
     711             :   else
     712             :     {
     713             :       const char *p;
     714           8 :       next_prefix_indent = in_column;
     715          12 :       for (p = prefix; *p != '\0'; p++)
     716             :         {
     717           8 :           unsigned char pc = *p;
     718           8 :           if (c != pc)
     719           4 :             return c;
     720           4 :           in_column++;
     721           4 :           c = getc (f);
     722             :         }
     723           4 :       c = get_space (f, c);
     724             :     }
     725         119 :   return c;
     726             : }
     727             : 
     728             : /* Read blank characters from input file F, starting with C, and keeping
     729             :    in_column up-to-date.  Return first non-blank character.  */
     730             : 
     731             : static int
     732         524 : get_space (FILE *f, int c)
     733             : {
     734             :   for (;;)
     735             :     {
     736         866 :       if (c == ' ')
     737          21 :         in_column++;
     738         503 :       else if (c == '\t')
     739             :         {
     740         321 :           tabs = true;
     741         321 :           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
     742             :         }
     743             :       else
     744         364 :         return c;
     745         342 :       c = getc (f);
     746             :     }
     747             : }
     748             : 
     749             : /* Set extra fields in word W describing any attached punctuation.  */
     750             : 
     751             : static void
     752          55 : check_punctuation (WORD *w)
     753             : {
     754          55 :   char const *start = w->text;
     755          55 :   char const *finish = start + (w->length - 1);
     756          55 :   unsigned char fin = *finish;
     757             : 
     758          55 :   w->paren = isopen (*start);
     759          55 :   w->punct = !! ispunct (fin);
     760         214 :   while (start < finish && isclose (*finish))
     761         104 :     finish--;
     762          55 :   w->period = isperiod (*finish);
     763          55 : }
     764             : 
     765             : /* Flush part of the paragraph to make room.  This function is called on
     766             :    hitting the limit on the number of words or characters.  */
     767             : 
     768             : static void
     769           0 : flush_paragraph (void)
     770             : {
     771             :   WORD *split_point;
     772             :   WORD *w;
     773             :   int shift;
     774             :   COST best_break;
     775             : 
     776             :   /* In the special case where it's all one word, just flush it.  */
     777             : 
     778           0 :   if (word_limit == word)
     779             :     {
     780           0 :       fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
     781           0 :       wptr = parabuf;
     782           0 :       return;
     783             :     }
     784             : 
     785             :   /* Otherwise:
     786             :      - format what you have so far as a paragraph,
     787             :      - find a low-cost line break near the end,
     788             :      - output to there,
     789             :      - make that the start of the paragraph.  */
     790             : 
     791           0 :   fmt_paragraph ();
     792             : 
     793             :   /* Choose a good split point.  */
     794             : 
     795           0 :   split_point = word_limit;
     796           0 :   best_break = MAXCOST;
     797           0 :   for (w = word->next_break; w != word_limit; w = w->next_break)
     798             :     {
     799           0 :       if (w->best_cost - w->next_break->best_cost < best_break)
     800             :         {
     801           0 :           split_point = w;
     802           0 :           best_break = w->best_cost - w->next_break->best_cost;
     803             :         }
     804           0 :       if (best_break <= MAXCOST - LINE_CREDIT)
     805           0 :         best_break += LINE_CREDIT;
     806             :     }
     807           0 :   put_paragraph (split_point);
     808             : 
     809             :   /* Copy text of words down to start of parabuf -- we use memmove because
     810             :      the source and target may overlap.  */
     811             : 
     812           0 :   memmove (parabuf, split_point->text, wptr - split_point->text);
     813           0 :   shift = split_point->text - parabuf;
     814           0 :   wptr -= shift;
     815             : 
     816             :   /* Adjust text pointers.  */
     817             : 
     818           0 :   for (w = split_point; w <= word_limit; w++)
     819           0 :     w->text -= shift;
     820             : 
     821             :   /* Copy words from split_point down to word -- we use memmove because
     822             :      the source and target may overlap.  */
     823             : 
     824           0 :   memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
     825           0 :   word_limit -= split_point - word;
     826             : }
     827             : 
     828             : /* Compute the optimal formatting for the whole paragraph by computing
     829             :    and remembering the optimal formatting for each suffix from the empty
     830             :    one to the whole paragraph.  */
     831             : 
     832             : static void
     833          34 : fmt_paragraph (void)
     834             : {
     835             :   WORD *start, *w;
     836             :   int len;
     837             :   COST wcost, best;
     838             :   int saved_length;
     839             : 
     840          34 :   word_limit->best_cost = 0;
     841          34 :   saved_length = word_limit->length;
     842          34 :   word_limit->length = max_width;    /* sentinel */
     843             : 
     844          89 :   for (start = word_limit - 1; start >= word; start--)
     845             :     {
     846          55 :       best = MAXCOST;
     847          55 :       len = start == word ? first_indent : other_indent;
     848             : 
     849             :       /* At least one word, however long, in the line.  */
     850             : 
     851          55 :       w = start;
     852          55 :       len += w->length;
     853             :       do
     854             :         {
     855          77 :           w++;
     856             : 
     857             :           /* Consider breaking before w.  */
     858             : 
     859          77 :           wcost = line_cost (w, len) + w->best_cost;
     860          77 :           if (start == word && last_line_length > 0)
     861           0 :             wcost += RAGGED_COST (len - last_line_length);
     862          77 :           if (wcost < best)
     863             :             {
     864          76 :               best = wcost;
     865          76 :               start->next_break = w;
     866          76 :               start->line_length = len;
     867             :             }
     868             : 
     869             :           /* This is a kludge to keep us from computing `len' as the
     870             :              sum of the sentinel length and some non-zero number.
     871             :              Since the sentinel w->length may be INT_MAX, adding
     872             :              to that would give a negative result.  */
     873          77 :           if (w == word_limit)
     874          51 :             break;
     875             : 
     876          26 :           len += (w - 1)->space + w->length;      /* w > start >= word */
     877             :         }
     878          26 :       while (len < max_width);
     879          55 :       start->best_cost = best + base_cost (start);
     880             :     }
     881             : 
     882          34 :   word_limit->length = saved_length;
     883          34 : }
     884             : 
     885             : /* Return the constant component of the cost of breaking before the
     886             :    word THIS.  */
     887             : 
     888             : static COST
     889          55 : base_cost (WORD *this)
     890             : {
     891             :   COST cost;
     892             : 
     893          55 :   cost = LINE_COST;
     894             : 
     895          55 :   if (this > word)
     896             :     {
     897          21 :       if ((this - 1)->period)
     898             :         {
     899           3 :           if ((this - 1)->final)
     900           1 :             cost -= SENTENCE_BONUS;
     901             :           else
     902           2 :             cost += NOBREAK_COST;
     903             :         }
     904          18 :       else if ((this - 1)->punct)
     905          14 :         cost -= PUNCT_BONUS;
     906           4 :       else if (this > word + 1 && (this - 2)->final)
     907           1 :         cost += WIDOW_COST ((this - 1)->length);
     908             :     }
     909             : 
     910          55 :   if (this->paren)
     911          43 :     cost -= PAREN_BONUS;
     912          12 :   else if (this->final)
     913           9 :     cost += ORPHAN_COST (this->length);
     914             : 
     915          55 :   return cost;
     916             : }
     917             : 
     918             : /* Return the component of the cost of breaking before word NEXT that
     919             :    depends on LEN, the length of the line beginning there.  */
     920             : 
     921             : static COST
     922          77 : line_cost (WORD *next, int len)
     923             : {
     924             :   int n;
     925             :   COST cost;
     926             : 
     927          77 :   if (next == word_limit)
     928          51 :     return 0;
     929          26 :   n = best_width - len;
     930          26 :   cost = SHORT_COST (n);
     931          26 :   if (next->next_break != word_limit)
     932             :     {
     933           1 :       n = len - next->line_length;
     934           1 :       cost += RAGGED_COST (n);
     935             :     }
     936          26 :   return cost;
     937             : }
     938             : 
     939             : /* Output to stdout a paragraph from word up to (but not including)
     940             :    FINISH, which must be in the next_break chain from word.  */
     941             : 
     942             : static void
     943          34 : put_paragraph (WORD *finish)
     944             : {
     945             :   WORD *w;
     946             : 
     947          34 :   put_line (word, first_indent);
     948          38 :   for (w = word->next_break; w != finish; w = w->next_break)
     949           4 :     put_line (w, other_indent);
     950          34 : }
     951             : 
     952             : /* Output to stdout the line beginning with word W, beginning in column
     953             :    INDENT, including the prefix (if any).  */
     954             : 
     955             : static void
     956          38 : put_line (WORD *w, int indent)
     957             : {
     958             :   WORD *endline;
     959             : 
     960          38 :   out_column = 0;
     961          38 :   put_space (prefix_indent);
     962          38 :   fputs (prefix, stdout);
     963          38 :   out_column += prefix_length;
     964          38 :   put_space (indent - out_column);
     965             : 
     966          38 :   endline = w->next_break - 1;
     967          55 :   for (; w != endline; w++)
     968             :     {
     969          17 :       put_word (w);
     970          17 :       put_space (w->space);
     971             :     }
     972          38 :   put_word (w);
     973          38 :   last_line_length = out_column;
     974          38 :   putchar ('\n');
     975          38 : }
     976             : 
     977             : /* Output to stdout the word W.  */
     978             : 
     979             : static void
     980          55 : put_word (WORD *w)
     981             : {
     982             :   const char *s;
     983             :   int n;
     984             : 
     985          55 :   s = w->text;
     986         216 :   for (n = w->length; n != 0; n--)
     987         161 :     putchar (*s++);
     988          55 :   out_column += w->length;
     989          55 : }
     990             : 
     991             : /* Output to stdout SPACE spaces, or equivalent tabs.  */
     992             : 
     993             : static void
     994         134 : put_space (int space)
     995             : {
     996             :   int space_target, tab_target;
     997             : 
     998         134 :   space_target = out_column + space;
     999         134 :   if (tabs)
    1000             :     {
    1001          99 :       tab_target = space_target / TABWIDTH * TABWIDTH;
    1002          99 :       if (out_column + 1 < tab_target)
    1003          81 :         while (out_column < tab_target)
    1004             :           {
    1005          37 :             putchar ('\t');
    1006          37 :             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
    1007             :           }
    1008             :     }
    1009         299 :   while (out_column < space_target)
    1010             :     {
    1011          31 :       putchar (' ');
    1012          31 :       out_column++;
    1013             :     }
    1014         134 : }

Generated by: LCOV version 1.10