Line data Source code
1 : /* GNU fmt -- simple text formatter.
2 : Copyright (C) 1994-2006 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
18 :
19 : #include <config.h>
20 : #include <stdio.h>
21 : #include <sys/types.h>
22 : #include <getopt.h>
23 :
24 : /* Redefine. Otherwise, systems (Unicos for one) with headers that define
25 : it to be a type get syntax errors for the variable declaration below. */
26 : #define word unused_word_type
27 :
28 : #include "system.h"
29 : #include "error.h"
30 : #include "quote.h"
31 : #include "xstrtol.h"
32 :
33 : /* The official name of this program (e.g., no `g' prefix). */
34 : #define PROGRAM_NAME "fmt"
35 :
36 : #define AUTHORS "Ross Paterson"
37 :
38 : /* The following parameters represent the program's idea of what is
39 : "best". Adjust to taste, subject to the caveats given. */
40 :
41 : /* Default longest permitted line length (max_width). */
42 : #define WIDTH 75
43 :
44 : /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
45 : room for optimization. */
46 : #define LEEWAY 7
47 :
48 : /* The default secondary indent of tagged paragraph used for unindented
49 : one-line paragraphs not preceded by any multi-line paragraphs. */
50 : #define DEF_INDENT 3
51 :
52 : /* Costs and bonuses are expressed as the equivalent departure from the
53 : optimal line length, multiplied by 10. e.g. assigning something a
54 : cost of 50 means that it is as bad as a line 5 characters too short
55 : or too long. The definition of SHORT_COST(n) should not be changed.
56 : However, EQUIV(n) may need tuning. */
57 :
58 : /* FIXME: "fmt" misbehaves given large inputs or options. One
59 : possible workaround for part of the problem is to change COST to be
60 : a floating-point type. There are other problems besides COST,
61 : though; see MAXWORDS below. */
62 :
63 : typedef long int COST;
64 :
65 : #define MAXCOST TYPE_MAXIMUM (COST)
66 :
67 : #define SQR(n) ((n) * (n))
68 : #define EQUIV(n) SQR ((COST) (n))
69 :
70 : /* Cost of a filled line n chars longer or shorter than best_width. */
71 : #define SHORT_COST(n) EQUIV ((n) * 10)
72 :
73 : /* Cost of the difference between adjacent filled lines. */
74 : #define RAGGED_COST(n) (SHORT_COST (n) / 2)
75 :
76 : /* Basic cost per line. */
77 : #define LINE_COST EQUIV (70)
78 :
79 : /* Cost of breaking a line after the first word of a sentence, where
80 : the length of the word is N. */
81 : #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
82 :
83 : /* Cost of breaking a line before the last word of a sentence, where
84 : the length of the word is N. */
85 : #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
86 :
87 : /* Bonus for breaking a line at the end of a sentence. */
88 : #define SENTENCE_BONUS EQUIV (50)
89 :
90 : /* Cost of breaking a line after a period not marking end of a sentence.
91 : With the definition of sentence we are using (borrowed from emacs, see
92 : get_line()) such a break would then look like a sentence break. Hence
93 : we assign a very high cost -- it should be avoided unless things are
94 : really bad. */
95 : #define NOBREAK_COST EQUIV (600)
96 :
97 : /* Bonus for breaking a line before open parenthesis. */
98 : #define PAREN_BONUS EQUIV (40)
99 :
100 : /* Bonus for breaking a line after other punctuation. */
101 : #define PUNCT_BONUS EQUIV(40)
102 :
103 : /* Credit for breaking a long paragraph one line later. */
104 : #define LINE_CREDIT EQUIV(3)
105 :
106 : /* Size of paragraph buffer, in words and characters. Longer paragraphs
107 : are handled neatly (cf. flush_paragraph()), so long as these values
108 : are considerably greater than required by the width. These values
109 : cannot be extended indefinitely: doing so would run into size limits
110 : and/or cause more overflows in cost calculations. FIXME: Remove these
111 : arbitrary limits. */
112 :
113 : #define MAXWORDS 1000
114 : #define MAXCHARS 5000
115 :
116 : /* Extra ctype(3)-style macros. */
117 :
118 : #define isopen(c) (strchr ("([`'\"", c) != NULL)
119 : #define isclose(c) (strchr (")]'\"", c) != NULL)
120 : #define isperiod(c) (strchr (".?!", c) != NULL)
121 :
122 : /* Size of a tab stop, for expansion on input and re-introduction on
123 : output. */
124 : #define TABWIDTH 8
125 :
126 : /* Word descriptor structure. */
127 :
128 : typedef struct Word WORD;
129 :
130 : struct Word
131 : {
132 :
133 : /* Static attributes determined during input. */
134 :
135 : const char *text; /* the text of the word */
136 : int length; /* length of this word */
137 : int space; /* the size of the following space */
138 : unsigned int paren:1; /* starts with open paren */
139 : unsigned int period:1; /* ends in [.?!])* */
140 : unsigned int punct:1; /* ends in punctuation */
141 : unsigned int final:1; /* end of sentence */
142 :
143 : /* The remaining fields are computed during the optimization. */
144 :
145 : int line_length; /* length of the best line starting here */
146 : COST best_cost; /* cost of best paragraph starting here */
147 : WORD *next_break; /* break which achieves best_cost */
148 : };
149 :
150 : /* Forward declarations. */
151 :
152 : static void set_prefix (char *p);
153 : static void fmt (FILE *f);
154 : static bool get_paragraph (FILE *f);
155 : static int get_line (FILE *f, int c);
156 : static int get_prefix (FILE *f);
157 : static int get_space (FILE *f, int c);
158 : static int copy_rest (FILE *f, int c);
159 : static bool same_para (int c);
160 : static void flush_paragraph (void);
161 : static void fmt_paragraph (void);
162 : static void check_punctuation (WORD *w);
163 : static COST base_cost (WORD *this);
164 : static COST line_cost (WORD *next, int len);
165 : static void put_paragraph (WORD *finish);
166 : static void put_line (WORD *w, int indent);
167 : static void put_word (WORD *w);
168 : static void put_space (int space);
169 :
170 : /* The name this program was run with. */
171 : const char *program_name;
172 :
173 : /* Option values. */
174 :
175 : /* If true, first 2 lines may have different indent (default false). */
176 : static bool crown;
177 :
178 : /* If true, first 2 lines _must_ have different indent (default false). */
179 : static bool tagged;
180 :
181 : /* If true, each line is a paragraph on its own (default false). */
182 : static bool split;
183 :
184 : /* If true, don't preserve inter-word spacing (default false). */
185 : static bool uniform;
186 :
187 : /* Prefix minus leading and trailing spaces (default ""). */
188 : static const char *prefix;
189 :
190 : /* User-supplied maximum line width (default WIDTH). The only output
191 : lines longer than this will each comprise a single word. */
192 : static int max_width;
193 :
194 : /* Values derived from the option values. */
195 :
196 : /* The length of prefix minus leading space. */
197 : static int prefix_full_length;
198 :
199 : /* The length of the leading space trimmed from the prefix. */
200 : static int prefix_lead_space;
201 :
202 : /* The length of prefix minus leading and trailing space. */
203 : static int prefix_length;
204 :
205 : /* The preferred width of text lines, set to LEEWAY % less than max_width. */
206 : static int best_width;
207 :
208 : /* Dynamic variables. */
209 :
210 : /* Start column of the character most recently read from the input file. */
211 : static int in_column;
212 :
213 : /* Start column of the next character to be written to stdout. */
214 : static int out_column;
215 :
216 : /* Space for the paragraph text -- longer paragraphs are handled neatly
217 : (cf. flush_paragraph()). */
218 : static char parabuf[MAXCHARS];
219 :
220 : /* A pointer into parabuf, indicating the first unused character position. */
221 : static char *wptr;
222 :
223 : /* The words of a paragraph -- longer paragraphs are handled neatly
224 : (cf. flush_paragraph()). */
225 : static WORD word[MAXWORDS];
226 :
227 : /* A pointer into the above word array, indicating the first position
228 : after the last complete word. Sometimes it will point at an incomplete
229 : word. */
230 : static WORD *word_limit;
231 :
232 : /* If true, current input file contains tab characters, and so tabs can be
233 : used for white space on output. */
234 : static bool tabs;
235 :
236 : /* Space before trimmed prefix on each line of the current paragraph. */
237 : static int prefix_indent;
238 :
239 : /* Indentation of the first line of the current paragraph. */
240 : static int first_indent;
241 :
242 : /* Indentation of other lines of the current paragraph */
243 : static int other_indent;
244 :
245 : /* To detect the end of a paragraph, we need to look ahead to the first
246 : non-blank character after the prefix on the next line, or the first
247 : character on the following line that failed to match the prefix.
248 : We can reconstruct the lookahead from that character (next_char), its
249 : position on the line (in_column) and the amount of space before the
250 : prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
251 :
252 : /* The last character read from the input file. */
253 : static int next_char;
254 :
255 : /* The space before the trimmed prefix (or part of it) on the next line
256 : after the current paragraph. */
257 : static int next_prefix_indent;
258 :
259 : /* If nonzero, the length of the last line output in the current
260 : paragraph, used to charge for raggedness at the split point for long
261 : paragraphs chosen by fmt_paragraph(). */
262 : static int last_line_length;
263 :
264 : void
265 10 : usage (int status)
266 : {
267 10 : if (status != EXIT_SUCCESS)
268 10 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
269 : program_name);
270 : else
271 : {
272 0 : printf (_("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n"), program_name);
273 0 : fputs (_("\
274 : Reformat each paragraph in the FILE(s), writing to standard output.\n\
275 : If no FILE or if FILE is `-', read standard input.\n\
276 : \n\
277 : "), stdout);
278 0 : fputs (_("\
279 : Mandatory arguments to long options are mandatory for short options too.\n\
280 : "), stdout);
281 0 : fputs (_("\
282 : -c, --crown-margin preserve indentation of first two lines\n\
283 : -p, --prefix=STRING reformat only lines beginning with STRING,\n\
284 : reattaching the prefix to reformatted lines\n\
285 : -s, --split-only split long lines, but do not refill\n\
286 : "),
287 : stdout);
288 0 : fputs (_("\
289 : -t, --tagged-paragraph indentation of first line different from second\n\
290 : -u, --uniform-spacing one space between words, two after sentences\n\
291 : -w, --width=WIDTH maximum line width (default of 75 columns)\n\
292 : "), stdout);
293 0 : fputs (HELP_OPTION_DESCRIPTION, stdout);
294 0 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
295 0 : fputs (_("\
296 : \n\
297 : With no FILE, or when FILE is -, read standard input.\n"),
298 : stdout);
299 0 : emit_bug_reporting_address ();
300 : }
301 10 : exit (status);
302 : }
303 :
304 : /* Decode options and launch execution. */
305 :
306 : static const struct option long_options[] =
307 : {
308 : {"crown-margin", no_argument, NULL, 'c'},
309 : {"prefix", required_argument, NULL, 'p'},
310 : {"split-only", no_argument, NULL, 's'},
311 : {"tagged-paragraph", no_argument, NULL, 't'},
312 : {"uniform-spacing", no_argument, NULL, 'u'},
313 : {"width", required_argument, NULL, 'w'},
314 : {GETOPT_HELP_OPTION_DECL},
315 : {GETOPT_VERSION_OPTION_DECL},
316 : {NULL, 0, NULL, 0},
317 : };
318 :
319 : int
320 99 : main (int argc, char **argv)
321 : {
322 : int optchar;
323 99 : bool ok = true;
324 99 : char const *max_width_option = NULL;
325 :
326 : initialize_main (&argc, &argv);
327 99 : program_name = argv[0];
328 99 : setlocale (LC_ALL, "");
329 : bindtextdomain (PACKAGE, LOCALEDIR);
330 : textdomain (PACKAGE);
331 :
332 99 : atexit (close_stdout);
333 :
334 99 : crown = tagged = split = uniform = false;
335 99 : max_width = WIDTH;
336 99 : prefix = "";
337 99 : prefix_length = prefix_lead_space = prefix_full_length = 0;
338 :
339 99 : if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
340 : {
341 : /* Old option syntax; a dash followed by one or more digits. */
342 26 : max_width_option = argv[1] + 1;
343 :
344 : /* Make the option we just parsed invisible to getopt. */
345 26 : argv[1] = argv[0];
346 26 : argv++;
347 26 : argc--;
348 : }
349 :
350 231 : while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:",
351 : long_options, NULL))
352 : != -1)
353 43 : switch (optchar)
354 : {
355 10 : default:
356 10 : if (ISDIGIT (optchar))
357 4 : error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
358 : only when it is the first\noption; use -w N instead"),
359 : optchar);
360 10 : usage (EXIT_FAILURE);
361 :
362 8 : case 'c':
363 8 : crown = true;
364 8 : break;
365 :
366 2 : case 's':
367 2 : split = true;
368 2 : break;
369 :
370 4 : case 't':
371 4 : tagged = true;
372 4 : break;
373 :
374 2 : case 'u':
375 2 : uniform = true;
376 2 : break;
377 :
378 5 : case 'w':
379 5 : max_width_option = optarg;
380 5 : break;
381 :
382 12 : case 'p':
383 12 : set_prefix (optarg);
384 12 : break;
385 :
386 0 : case_GETOPT_HELP_CHAR;
387 :
388 0 : case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
389 :
390 : }
391 :
392 89 : if (max_width_option)
393 : {
394 : /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
395 : output can be quite ugly. */
396 : unsigned long int tmp;
397 40 : if (! (xstrtoul (max_width_option, NULL, 10, &tmp, "") == LONGINT_OK
398 11 : && tmp <= MAXCHARS / 2))
399 19 : error (EXIT_FAILURE, 0, _("invalid width: %s"),
400 : quote (max_width_option));
401 10 : max_width = tmp;
402 : }
403 :
404 70 : best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
405 :
406 70 : if (optind == argc)
407 37 : fmt (stdin);
408 : else
409 : {
410 81 : for (; optind < argc; optind++)
411 : {
412 48 : char *file = argv[optind];
413 48 : if (STREQ (file, "-"))
414 26 : fmt (stdin);
415 : else
416 : {
417 : FILE *in_stream;
418 22 : in_stream = fopen (file, "r");
419 22 : if (in_stream != NULL)
420 : {
421 11 : fmt (in_stream);
422 11 : if (fclose (in_stream) == EOF)
423 : {
424 0 : error (0, errno, "%s", file);
425 0 : ok = false;
426 : }
427 : }
428 : else
429 : {
430 11 : error (0, errno, _("cannot open %s for reading"),
431 : quote (file));
432 11 : ok = false;
433 : }
434 : }
435 : }
436 : }
437 :
438 70 : exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
439 : }
440 :
441 : /* Trim space from the front and back of the string P, yielding the prefix,
442 : and record the lengths of the prefix and the space trimmed. */
443 :
444 : static void
445 12 : set_prefix (char *p)
446 : {
447 : char *s;
448 :
449 12 : prefix_lead_space = 0;
450 33 : while (*p == ' ')
451 : {
452 9 : prefix_lead_space++;
453 9 : p++;
454 : }
455 12 : prefix = p;
456 12 : prefix_full_length = strlen (p);
457 12 : s = p + prefix_full_length;
458 24 : while (s > p && s[-1] == ' ')
459 0 : s--;
460 12 : *s = '\0';
461 12 : prefix_length = s - p;
462 12 : }
463 :
464 : /* read file F and send formatted output to stdout. */
465 :
466 : static void
467 74 : fmt (FILE *f)
468 : {
469 74 : tabs = false;
470 74 : other_indent = 0;
471 74 : next_char = get_prefix (f);
472 182 : while (get_paragraph (f))
473 : {
474 34 : fmt_paragraph ();
475 34 : put_paragraph (word_limit);
476 : }
477 74 : }
478 :
479 : /* Set the global variable `other_indent' according to SAME_PARAGRAPH
480 : and other global variables. */
481 :
482 : static void
483 34 : set_other_indent (bool same_paragraph)
484 : {
485 34 : if (split)
486 1 : other_indent = first_indent;
487 33 : else if (crown)
488 : {
489 3 : other_indent = (same_paragraph ? in_column : first_indent);
490 : }
491 30 : else if (tagged)
492 : {
493 3 : if (same_paragraph && in_column != first_indent)
494 : {
495 1 : other_indent = in_column;
496 : }
497 :
498 : /* Only one line: use the secondary indent from last time if it
499 : splits, or 0 if there have been no multi-line paragraphs in the
500 : input so far. But if these rules make the two indents the same,
501 : pick a new secondary indent. */
502 :
503 2 : else if (other_indent == first_indent)
504 1 : other_indent = first_indent == 0 ? DEF_INDENT : 0;
505 : }
506 : else
507 : {
508 27 : other_indent = first_indent;
509 : }
510 34 : }
511 :
512 : /* Read a paragraph from input file F. A paragraph consists of a
513 : maximal number of non-blank (excluding any prefix) lines subject to:
514 : * In split mode, a paragraph is a single non-blank line.
515 : * In crown mode, the second and subsequent lines must have the
516 : same indentation, but possibly different from the indent of the
517 : first line.
518 : * Tagged mode is similar, but the first and second lines must have
519 : different indentations.
520 : * Otherwise, all lines of a paragraph must have the same indent.
521 : If a prefix is in effect, it must be present at the same indent for
522 : each line in the paragraph.
523 :
524 : Return false if end-of-file was encountered before the start of a
525 : paragraph, else true. */
526 :
527 : static bool
528 108 : get_paragraph (FILE *f)
529 : {
530 : int c;
531 :
532 108 : last_line_length = 0;
533 108 : c = next_char;
534 :
535 : /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
536 :
537 223 : while (c == '\n' || c == EOF
538 36 : || next_prefix_indent < prefix_lead_space
539 35 : || in_column < next_prefix_indent + prefix_full_length)
540 : {
541 81 : c = copy_rest (f, c);
542 81 : if (c == EOF)
543 : {
544 74 : next_char = EOF;
545 74 : return false;
546 : }
547 7 : putchar ('\n');
548 7 : c = get_prefix (f);
549 : }
550 :
551 : /* Got a suitable first line for a paragraph. */
552 :
553 34 : prefix_indent = next_prefix_indent;
554 34 : first_indent = in_column;
555 34 : wptr = parabuf;
556 34 : word_limit = word;
557 34 : c = get_line (f, c);
558 34 : set_other_indent (same_para (c));
559 :
560 : /* Read rest of paragraph (unless split is specified). */
561 :
562 34 : if (split)
563 : {
564 : /* empty */
565 : }
566 33 : else if (crown)
567 : {
568 3 : if (same_para (c))
569 : {
570 : do
571 : { /* for each line till the end of the para */
572 3 : c = get_line (f, c);
573 : }
574 3 : while (same_para (c) && in_column == other_indent);
575 : }
576 : }
577 30 : else if (tagged)
578 : {
579 3 : if (same_para (c) && in_column != first_indent)
580 : {
581 : do
582 : { /* for each line till the end of the para */
583 1 : c = get_line (f, c);
584 : }
585 1 : while (same_para (c) && in_column == other_indent);
586 : }
587 : }
588 : else
589 : {
590 58 : while (same_para (c) && in_column == other_indent)
591 4 : c = get_line (f, c);
592 : }
593 34 : (word_limit - 1)->period = (word_limit - 1)->final = true;
594 34 : next_char = c;
595 34 : return true;
596 : }
597 :
598 : /* Copy to the output a line that failed to match the prefix, or that
599 : was blank after the prefix. In the former case, C is the character
600 : that failed to match the prefix. In the latter, C is \n or EOF.
601 : Return the character (\n or EOF) ending the line. */
602 :
603 : static int
604 81 : copy_rest (FILE *f, int c)
605 : {
606 : const char *s;
607 :
608 81 : out_column = 0;
609 81 : if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
610 : {
611 39 : put_space (next_prefix_indent);
612 42 : for (s = prefix; out_column != in_column && *s; out_column++)
613 3 : putchar (*s++);
614 39 : if (c != EOF && c != '\n')
615 2 : put_space (in_column - out_column);
616 39 : if (c == EOF && in_column >= next_prefix_indent + prefix_length)
617 34 : putchar ('\n');
618 : }
619 164 : while (c != '\n' && c != EOF)
620 : {
621 2 : putchar (c);
622 2 : c = getc (f);
623 : }
624 81 : return c;
625 : }
626 :
627 : /* Return true if a line whose first non-blank character after the
628 : prefix (if any) is C could belong to the current paragraph,
629 : otherwise false. */
630 :
631 : static bool
632 75 : same_para (int c)
633 : {
634 75 : return (next_prefix_indent == prefix_indent
635 71 : && in_column >= next_prefix_indent + prefix_full_length
636 146 : && c != '\n' && c != EOF);
637 : }
638 :
639 : /* Read a line from input file F, given first non-blank character C
640 : after the prefix, and the following indent, and break it into words.
641 : A word is a maximal non-empty string of non-white characters. A word
642 : ending in [.?!]["')\]]* and followed by end-of-line or at least two
643 : spaces ends a sentence, as in emacs.
644 :
645 : Return the first non-blank character of the next line. */
646 :
647 : static int
648 42 : get_line (FILE *f, int c)
649 : {
650 : int start;
651 : char *end_of_parabuf;
652 : WORD *end_of_word;
653 :
654 42 : end_of_parabuf = ¶buf[MAXCHARS];
655 42 : end_of_word = &word[MAXWORDS - 2];
656 :
657 : do
658 : { /* for each word in a line */
659 :
660 : /* Scan word. */
661 :
662 55 : word_limit->text = wptr;
663 : do
664 : {
665 161 : if (wptr == end_of_parabuf)
666 : {
667 0 : set_other_indent (true);
668 0 : flush_paragraph ();
669 : }
670 161 : *wptr++ = c;
671 161 : c = getc (f);
672 : }
673 161 : while (c != EOF && !isspace (c));
674 55 : in_column += word_limit->length = wptr - word_limit->text;
675 55 : check_punctuation (word_limit);
676 :
677 : /* Scan inter-word space. */
678 :
679 55 : start = in_column;
680 55 : c = get_space (f, c);
681 55 : word_limit->space = in_column - start;
682 110 : word_limit->final = (c == EOF
683 142 : || (word_limit->period
684 4 : && (c == '\n' || word_limit->space > 1)));
685 55 : if (c == '\n' || c == EOF || uniform)
686 43 : word_limit->space = word_limit->final ? 2 : 1;
687 55 : if (word_limit == end_of_word)
688 : {
689 0 : set_other_indent (true);
690 0 : flush_paragraph ();
691 : }
692 55 : word_limit++;
693 : }
694 55 : while (c != '\n' && c != EOF);
695 42 : return get_prefix (f);
696 : }
697 :
698 : /* Read a prefix from input file F. Return either first non-matching
699 : character, or first non-blank character after the prefix. */
700 :
701 : static int
702 123 : get_prefix (FILE *f)
703 : {
704 : int c;
705 :
706 123 : in_column = 0;
707 123 : c = get_space (f, getc (f));
708 123 : if (prefix_length == 0)
709 115 : next_prefix_indent = prefix_lead_space < in_column ?
710 115 : prefix_lead_space : in_column;
711 : else
712 : {
713 : const char *p;
714 8 : next_prefix_indent = in_column;
715 12 : for (p = prefix; *p != '\0'; p++)
716 : {
717 8 : unsigned char pc = *p;
718 8 : if (c != pc)
719 4 : return c;
720 4 : in_column++;
721 4 : c = getc (f);
722 : }
723 4 : c = get_space (f, c);
724 : }
725 119 : return c;
726 : }
727 :
728 : /* Read blank characters from input file F, starting with C, and keeping
729 : in_column up-to-date. Return first non-blank character. */
730 :
731 : static int
732 524 : get_space (FILE *f, int c)
733 : {
734 : for (;;)
735 : {
736 866 : if (c == ' ')
737 21 : in_column++;
738 503 : else if (c == '\t')
739 : {
740 321 : tabs = true;
741 321 : in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
742 : }
743 : else
744 364 : return c;
745 342 : c = getc (f);
746 : }
747 : }
748 :
749 : /* Set extra fields in word W describing any attached punctuation. */
750 :
751 : static void
752 55 : check_punctuation (WORD *w)
753 : {
754 55 : char const *start = w->text;
755 55 : char const *finish = start + (w->length - 1);
756 55 : unsigned char fin = *finish;
757 :
758 55 : w->paren = isopen (*start);
759 55 : w->punct = !! ispunct (fin);
760 214 : while (start < finish && isclose (*finish))
761 104 : finish--;
762 55 : w->period = isperiod (*finish);
763 55 : }
764 :
765 : /* Flush part of the paragraph to make room. This function is called on
766 : hitting the limit on the number of words or characters. */
767 :
768 : static void
769 0 : flush_paragraph (void)
770 : {
771 : WORD *split_point;
772 : WORD *w;
773 : int shift;
774 : COST best_break;
775 :
776 : /* In the special case where it's all one word, just flush it. */
777 :
778 0 : if (word_limit == word)
779 : {
780 0 : fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
781 0 : wptr = parabuf;
782 0 : return;
783 : }
784 :
785 : /* Otherwise:
786 : - format what you have so far as a paragraph,
787 : - find a low-cost line break near the end,
788 : - output to there,
789 : - make that the start of the paragraph. */
790 :
791 0 : fmt_paragraph ();
792 :
793 : /* Choose a good split point. */
794 :
795 0 : split_point = word_limit;
796 0 : best_break = MAXCOST;
797 0 : for (w = word->next_break; w != word_limit; w = w->next_break)
798 : {
799 0 : if (w->best_cost - w->next_break->best_cost < best_break)
800 : {
801 0 : split_point = w;
802 0 : best_break = w->best_cost - w->next_break->best_cost;
803 : }
804 0 : if (best_break <= MAXCOST - LINE_CREDIT)
805 0 : best_break += LINE_CREDIT;
806 : }
807 0 : put_paragraph (split_point);
808 :
809 : /* Copy text of words down to start of parabuf -- we use memmove because
810 : the source and target may overlap. */
811 :
812 0 : memmove (parabuf, split_point->text, wptr - split_point->text);
813 0 : shift = split_point->text - parabuf;
814 0 : wptr -= shift;
815 :
816 : /* Adjust text pointers. */
817 :
818 0 : for (w = split_point; w <= word_limit; w++)
819 0 : w->text -= shift;
820 :
821 : /* Copy words from split_point down to word -- we use memmove because
822 : the source and target may overlap. */
823 :
824 0 : memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
825 0 : word_limit -= split_point - word;
826 : }
827 :
828 : /* Compute the optimal formatting for the whole paragraph by computing
829 : and remembering the optimal formatting for each suffix from the empty
830 : one to the whole paragraph. */
831 :
832 : static void
833 34 : fmt_paragraph (void)
834 : {
835 : WORD *start, *w;
836 : int len;
837 : COST wcost, best;
838 : int saved_length;
839 :
840 34 : word_limit->best_cost = 0;
841 34 : saved_length = word_limit->length;
842 34 : word_limit->length = max_width; /* sentinel */
843 :
844 89 : for (start = word_limit - 1; start >= word; start--)
845 : {
846 55 : best = MAXCOST;
847 55 : len = start == word ? first_indent : other_indent;
848 :
849 : /* At least one word, however long, in the line. */
850 :
851 55 : w = start;
852 55 : len += w->length;
853 : do
854 : {
855 77 : w++;
856 :
857 : /* Consider breaking before w. */
858 :
859 77 : wcost = line_cost (w, len) + w->best_cost;
860 77 : if (start == word && last_line_length > 0)
861 0 : wcost += RAGGED_COST (len - last_line_length);
862 77 : if (wcost < best)
863 : {
864 76 : best = wcost;
865 76 : start->next_break = w;
866 76 : start->line_length = len;
867 : }
868 :
869 : /* This is a kludge to keep us from computing `len' as the
870 : sum of the sentinel length and some non-zero number.
871 : Since the sentinel w->length may be INT_MAX, adding
872 : to that would give a negative result. */
873 77 : if (w == word_limit)
874 51 : break;
875 :
876 26 : len += (w - 1)->space + w->length; /* w > start >= word */
877 : }
878 26 : while (len < max_width);
879 55 : start->best_cost = best + base_cost (start);
880 : }
881 :
882 34 : word_limit->length = saved_length;
883 34 : }
884 :
885 : /* Return the constant component of the cost of breaking before the
886 : word THIS. */
887 :
888 : static COST
889 55 : base_cost (WORD *this)
890 : {
891 : COST cost;
892 :
893 55 : cost = LINE_COST;
894 :
895 55 : if (this > word)
896 : {
897 21 : if ((this - 1)->period)
898 : {
899 3 : if ((this - 1)->final)
900 1 : cost -= SENTENCE_BONUS;
901 : else
902 2 : cost += NOBREAK_COST;
903 : }
904 18 : else if ((this - 1)->punct)
905 14 : cost -= PUNCT_BONUS;
906 4 : else if (this > word + 1 && (this - 2)->final)
907 1 : cost += WIDOW_COST ((this - 1)->length);
908 : }
909 :
910 55 : if (this->paren)
911 43 : cost -= PAREN_BONUS;
912 12 : else if (this->final)
913 9 : cost += ORPHAN_COST (this->length);
914 :
915 55 : return cost;
916 : }
917 :
918 : /* Return the component of the cost of breaking before word NEXT that
919 : depends on LEN, the length of the line beginning there. */
920 :
921 : static COST
922 77 : line_cost (WORD *next, int len)
923 : {
924 : int n;
925 : COST cost;
926 :
927 77 : if (next == word_limit)
928 51 : return 0;
929 26 : n = best_width - len;
930 26 : cost = SHORT_COST (n);
931 26 : if (next->next_break != word_limit)
932 : {
933 1 : n = len - next->line_length;
934 1 : cost += RAGGED_COST (n);
935 : }
936 26 : return cost;
937 : }
938 :
939 : /* Output to stdout a paragraph from word up to (but not including)
940 : FINISH, which must be in the next_break chain from word. */
941 :
942 : static void
943 34 : put_paragraph (WORD *finish)
944 : {
945 : WORD *w;
946 :
947 34 : put_line (word, first_indent);
948 38 : for (w = word->next_break; w != finish; w = w->next_break)
949 4 : put_line (w, other_indent);
950 34 : }
951 :
952 : /* Output to stdout the line beginning with word W, beginning in column
953 : INDENT, including the prefix (if any). */
954 :
955 : static void
956 38 : put_line (WORD *w, int indent)
957 : {
958 : WORD *endline;
959 :
960 38 : out_column = 0;
961 38 : put_space (prefix_indent);
962 38 : fputs (prefix, stdout);
963 38 : out_column += prefix_length;
964 38 : put_space (indent - out_column);
965 :
966 38 : endline = w->next_break - 1;
967 55 : for (; w != endline; w++)
968 : {
969 17 : put_word (w);
970 17 : put_space (w->space);
971 : }
972 38 : put_word (w);
973 38 : last_line_length = out_column;
974 38 : putchar ('\n');
975 38 : }
976 :
977 : /* Output to stdout the word W. */
978 :
979 : static void
980 55 : put_word (WORD *w)
981 : {
982 : const char *s;
983 : int n;
984 :
985 55 : s = w->text;
986 216 : for (n = w->length; n != 0; n--)
987 161 : putchar (*s++);
988 55 : out_column += w->length;
989 55 : }
990 :
991 : /* Output to stdout SPACE spaces, or equivalent tabs. */
992 :
993 : static void
994 134 : put_space (int space)
995 : {
996 : int space_target, tab_target;
997 :
998 134 : space_target = out_column + space;
999 134 : if (tabs)
1000 : {
1001 99 : tab_target = space_target / TABWIDTH * TABWIDTH;
1002 99 : if (out_column + 1 < tab_target)
1003 81 : while (out_column < tab_target)
1004 : {
1005 37 : putchar ('\t');
1006 37 : out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
1007 : }
1008 : }
1009 299 : while (out_column < space_target)
1010 : {
1011 31 : putchar (' ');
1012 31 : out_column++;
1013 : }
1014 134 : }
|