1 /* GNU fmt -- simple text formatter.
2    Copyright (C) 1994-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Ross Paterson <rap@doc.ic.ac.uk>.  */
18 
19 #include <config.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <getopt.h>
23 
24 /* Redefine.  Otherwise, systems (Unicos for one) with headers that define
25    it to be a type get syntax errors for the variable declaration below.  */
26 #define word unused_word_type
27 
28 #include "c-ctype.h"
29 #include "system.h"
30 #include "fadvise.h"
31 #include "xdectoint.h"
32 
33 /* The official name of this program (e.g., no 'g' prefix).  */
34 #define PROGRAM_NAME "fmt"
35 
36 #define AUTHORS proper_name ("Ross Paterson")
37 
38 /* The following parameters represent the program's idea of what is
39    "best".  Adjust to taste, subject to the caveats given.  */
40 
41 /* Default longest permitted line length (max_width).  */
42 #define WIDTH	75
43 
44 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
45    room for optimization.  */
46 #define LEEWAY	7
47 
48 /* The default secondary indent of tagged paragraph used for unindented
49    one-line paragraphs not preceded by any multi-line paragraphs.  */
50 #define DEF_INDENT 3
51 
52 /* Costs and bonuses are expressed as the equivalent departure from the
53    optimal line length, multiplied by 10.  e.g. assigning something a
54    cost of 50 means that it is as bad as a line 5 characters too short
55    or too long.  The definition of SHORT_COST(n) should not be changed.
56    However, EQUIV(n) may need tuning.  */
57 
58 /* FIXME: "fmt" misbehaves given large inputs or options.  One
59    possible workaround for part of the problem is to change COST to be
60    a floating-point type.  There are other problems besides COST,
61    though; see MAXWORDS below.  */
62 
63 typedef long int COST;
64 
65 #define MAXCOST	TYPE_MAXIMUM (COST)
66 
67 #define SQR(n)		((n) * (n))
68 #define EQUIV(n)	SQR ((COST) (n))
69 
70 /* Cost of a filled line n chars longer or shorter than goal_width.  */
71 #define SHORT_COST(n)	EQUIV ((n) * 10)
72 
73 /* Cost of the difference between adjacent filled lines.  */
74 #define RAGGED_COST(n)	(SHORT_COST (n) / 2)
75 
76 /* Basic cost per line.  */
77 #define LINE_COST	EQUIV (70)
78 
79 /* Cost of breaking a line after the first word of a sentence, where
80    the length of the word is N.  */
81 #define WIDOW_COST(n)	(EQUIV (200) / ((n) + 2))
82 
83 /* Cost of breaking a line before the last word of a sentence, where
84    the length of the word is N.  */
85 #define ORPHAN_COST(n)	(EQUIV (150) / ((n) + 2))
86 
87 /* Bonus for breaking a line at the end of a sentence.  */
88 #define SENTENCE_BONUS	EQUIV (50)
89 
90 /* Cost of breaking a line after a period not marking end of a sentence.
91    With the definition of sentence we are using (borrowed from emacs, see
92    get_line()) such a break would then look like a sentence break.  Hence
93    we assign a very high cost -- it should be avoided unless things are
94    really bad.  */
95 #define NOBREAK_COST	EQUIV (600)
96 
97 /* Bonus for breaking a line before open parenthesis.  */
98 #define PAREN_BONUS	EQUIV (40)
99 
100 /* Bonus for breaking a line after other punctuation.  */
101 #define PUNCT_BONUS	EQUIV(40)
102 
103 /* Credit for breaking a long paragraph one line later.  */
104 #define LINE_CREDIT	EQUIV(3)
105 
106 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
107    are handled neatly (cf. flush_paragraph()), so long as these values
108    are considerably greater than required by the width.  These values
109    cannot be extended indefinitely: doing so would run into size limits
110    and/or cause more overflows in cost calculations.  FIXME: Remove these
111    arbitrary limits.  */
112 
113 #define MAXWORDS	1000
114 #define MAXCHARS	5000
115 
116 /* Extra ctype(3)-style macros.  */
117 
118 #define isopen(c)	(strchr ("(['`\"", c) != nullptr)
119 #define isclose(c)	(strchr (")]'\"", c) != nullptr)
120 #define isperiod(c)	(strchr (".?!", c) != nullptr)
121 
122 /* Size of a tab stop, for expansion on input and re-introduction on
123    output.  */
124 #define TABWIDTH	8
125 
126 /* Word descriptor structure.  */
127 
128 typedef struct Word WORD;
129 
130 struct Word
131   {
132 
133     /* Static attributes determined during input.  */
134 
135     char const *text;		/* the text of the word */
136     int length;			/* length of this word */
137     int space;			/* the size of the following space */
138     unsigned int paren:1;	/* starts with open paren */
139     unsigned int period:1;	/* ends in [.?!])* */
140     unsigned int punct:1;	/* ends in punctuation */
141     unsigned int final:1;	/* end of sentence */
142 
143     /* The remaining fields are computed during the optimization.  */
144 
145     int line_length;		/* length of the best line starting here */
146     COST best_cost;		/* cost of best paragraph starting here */
147     WORD *next_break;		/* break which achieves best_cost */
148   };
149 
150 /* Forward declarations.  */
151 
152 static void set_prefix (char *p);
153 static bool fmt (FILE *f, char const *);
154 static bool get_paragraph (FILE *f);
155 static int get_line (FILE *f, int c);
156 static int get_prefix (FILE *f);
157 static int get_space (FILE *f, int c);
158 static int copy_rest (FILE *f, int c);
159 static bool same_para (int c);
160 static void flush_paragraph (void);
161 static void fmt_paragraph (void);
162 static void check_punctuation (WORD *w);
163 static COST base_cost (WORD *this);
164 static COST line_cost (WORD *next, int len);
165 static void put_paragraph (WORD *finish);
166 static void put_line (WORD *w, int indent);
167 static void put_word (WORD *w);
168 static void put_space (int space);
169 
170 /* Option values.  */
171 
172 /* If true, first 2 lines may have different indent (default false).  */
173 static bool crown;
174 
175 /* If true, first 2 lines _must_ have different indent (default false).  */
176 static bool tagged;
177 
178 /* If true, each line is a paragraph on its own (default false).  */
179 static bool split;
180 
181 /* If true, don't preserve inter-word spacing (default false).  */
182 static bool uniform;
183 
184 /* Prefix minus leading and trailing spaces (default "").  */
185 static char const *prefix;
186 
187 /* User-supplied maximum line width (default WIDTH).  The only output
188    lines longer than this will each comprise a single word.  */
189 static int max_width;
190 
191 /* Values derived from the option values.  */
192 
193 /* The length of prefix minus leading space.  */
194 static int prefix_full_length;
195 
196 /* The length of the leading space trimmed from the prefix.  */
197 static int prefix_lead_space;
198 
199 /* The length of prefix minus leading and trailing space.  */
200 static int prefix_length;
201 
202 /* The preferred width of text lines, set to LEEWAY % less than max_width.  */
203 static int goal_width;
204 
205 /* Dynamic variables.  */
206 
207 /* Start column of the character most recently read from the input file.  */
208 static int in_column;
209 
210 /* Start column of the next character to be written to stdout.  */
211 static int out_column;
212 
213 /* Space for the paragraph text -- longer paragraphs are handled neatly
214    (cf. flush_paragraph()).  */
215 static char parabuf[MAXCHARS];
216 
217 /* A pointer into parabuf, indicating the first unused character position.  */
218 static char *wptr;
219 
220 /* The words of a paragraph -- longer paragraphs are handled neatly
221    (cf. flush_paragraph()).  */
222 static WORD word[MAXWORDS];
223 
224 /* A pointer into the above word array, indicating the first position
225    after the last complete word.  Sometimes it will point at an incomplete
226    word.  */
227 static WORD *word_limit;
228 
229 /* If true, current input file contains tab characters, and so tabs can be
230    used for white space on output.  */
231 static bool tabs;
232 
233 /* Space before trimmed prefix on each line of the current paragraph.  */
234 static int prefix_indent;
235 
236 /* Indentation of the first line of the current paragraph.  */
237 static int first_indent;
238 
239 /* Indentation of other lines of the current paragraph */
240 static int other_indent;
241 
242 /* To detect the end of a paragraph, we need to look ahead to the first
243    non-blank character after the prefix on the next line, or the first
244    character on the following line that failed to match the prefix.
245    We can reconstruct the lookahead from that character (next_char), its
246    position on the line (in_column) and the amount of space before the
247    prefix (next_prefix_indent).  See get_paragraph() and copy_rest().  */
248 
249 /* The last character read from the input file.  */
250 static int next_char;
251 
252 /* The space before the trimmed prefix (or part of it) on the next line
253    after the current paragraph.  */
254 static int next_prefix_indent;
255 
256 /* If nonzero, the length of the last line output in the current
257    paragraph, used to charge for raggedness at the split point for long
258    paragraphs chosen by fmt_paragraph().  */
259 static int last_line_length;
260 
261 void
usage(int status)262 usage (int status)
263 {
264   if (status != EXIT_SUCCESS)
265     emit_try_help ();
266   else
267     {
268       printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name);
269       fputs (_("\
270 Reformat each paragraph in the FILE(s), writing to standard output.\n\
271 The option -WIDTH is an abbreviated form of --width=DIGITS.\n\
272 "), stdout);
273 
274       emit_stdin_note ();
275       emit_mandatory_arg_note ();
276 
277       fputs (_("\
278   -c, --crown-margin        preserve indentation of first two lines\n\
279   -p, --prefix=STRING       reformat only lines beginning with STRING,\n\
280                               reattaching the prefix to reformatted lines\n\
281   -s, --split-only          split long lines, but do not refill\n\
282 "),
283              stdout);
284       /* Tell xgettext that the "% o" below is not a printf-style
285          format string:  xgettext:no-c-format */
286       fputs (_("\
287   -t, --tagged-paragraph    indentation of first line different from second\n\
288   -u, --uniform-spacing     one space between words, two after sentences\n\
289   -w, --width=WIDTH         maximum line width (default of 75 columns)\n\
290   -g, --goal=WIDTH          goal width (default of 93% of width)\n\
291 "), stdout);
292       fputs (HELP_OPTION_DESCRIPTION, stdout);
293       fputs (VERSION_OPTION_DESCRIPTION, stdout);
294       emit_ancillary_info (PROGRAM_NAME);
295     }
296   exit (status);
297 }
298 
299 /* Decode options and launch execution.  */
300 
301 static struct option const long_options[] =
302 {
303   {"crown-margin", no_argument, nullptr, 'c'},
304   {"prefix", required_argument, nullptr, 'p'},
305   {"split-only", no_argument, nullptr, 's'},
306   {"tagged-paragraph", no_argument, nullptr, 't'},
307   {"uniform-spacing", no_argument, nullptr, 'u'},
308   {"width", required_argument, nullptr, 'w'},
309   {"goal", required_argument, nullptr, 'g'},
310   {GETOPT_HELP_OPTION_DECL},
311   {GETOPT_VERSION_OPTION_DECL},
312   {nullptr, 0, nullptr, 0},
313 };
314 
315 int
main(int argc,char ** argv)316 main (int argc, char **argv)
317 {
318   int optchar;
319   bool ok = true;
320   char const *max_width_option = nullptr;
321   char const *goal_width_option = nullptr;
322 
323   initialize_main (&argc, &argv);
324   set_program_name (argv[0]);
325   setlocale (LC_ALL, "");
326   bindtextdomain (PACKAGE, LOCALEDIR);
327   textdomain (PACKAGE);
328 
329   atexit (close_stdout);
330 
331   crown = tagged = split = uniform = false;
332   max_width = WIDTH;
333   prefix = "";
334   prefix_length = prefix_lead_space = prefix_full_length = 0;
335 
336   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
337     {
338       /* Old option syntax; a dash followed by one or more digits.  */
339       max_width_option = argv[1] + 1;
340 
341       /* Make the option we just parsed invisible to getopt.  */
342       argv[1] = argv[0];
343       argv++;
344       argc--;
345     }
346 
347   while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:",
348                                  long_options, nullptr))
349          != -1)
350     switch (optchar)
351       {
352       default:
353         if (ISDIGIT (optchar))
354           error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
355  only when it is the first\noption; use -w N instead"),
356                  optchar);
357         usage (EXIT_FAILURE);
358 
359       case 'c':
360         crown = true;
361         break;
362 
363       case 's':
364         split = true;
365         break;
366 
367       case 't':
368         tagged = true;
369         break;
370 
371       case 'u':
372         uniform = true;
373         break;
374 
375       case 'w':
376         max_width_option = optarg;
377         break;
378 
379       case 'g':
380         goal_width_option = optarg;
381         break;
382 
383       case 'p':
384         set_prefix (optarg);
385         break;
386 
387       case_GETOPT_HELP_CHAR;
388 
389       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
390 
391       }
392 
393   if (max_width_option)
394     {
395       /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
396          output can be quite ugly.  */
397       max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "",
398                               _("invalid width"), 0);
399     }
400 
401   if (goal_width_option)
402     {
403       /* Limit goal_width to max_width.  */
404       goal_width = xdectoumax (goal_width_option, 0, max_width, "",
405                                _("invalid width"), 0);
406       if (max_width_option == nullptr)
407         max_width = goal_width + 10;
408     }
409   else
410     {
411       goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
412     }
413 
414   bool have_read_stdin = false;
415 
416   if (optind == argc)
417     {
418       have_read_stdin = true;
419       ok = fmt (stdin, "-");
420     }
421   else
422     {
423       for (; optind < argc; optind++)
424         {
425           char *file = argv[optind];
426           if (STREQ (file, "-"))
427             {
428               ok &= fmt (stdin, file);
429               have_read_stdin = true;
430             }
431           else
432             {
433               FILE *in_stream;
434               in_stream = fopen (file, "r");
435               if (in_stream != nullptr)
436                 ok &= fmt (in_stream, file);
437               else
438                 {
439                   error (0, errno, _("cannot open %s for reading"),
440                          quoteaf (file));
441                   ok = false;
442                 }
443             }
444         }
445     }
446 
447   if (have_read_stdin && fclose (stdin) != 0)
448     error (EXIT_FAILURE, errno, "%s", _("closing standard input"));
449 
450   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
451 }
452 
453 /* Trim space from the front and back of the string P, yielding the prefix,
454    and record the lengths of the prefix and the space trimmed.  */
455 
456 static void
set_prefix(char * p)457 set_prefix (char *p)
458 {
459   char *s;
460 
461   prefix_lead_space = 0;
462   while (*p == ' ')
463     {
464       prefix_lead_space++;
465       p++;
466     }
467   prefix = p;
468   prefix_full_length = strlen (p);
469   s = p + prefix_full_length;
470   while (s > p && s[-1] == ' ')
471     s--;
472   *s = '\0';
473   prefix_length = s - p;
474 }
475 
476 /* Read F and send formatted output to stdout.
477    Close F when done, unless F is stdin.  Diagnose input errors, using FILE.
478    If !F, assume F resulted from an fopen failure and diagnose that.
479    Return true if successful.  */
480 
481 static bool
fmt(FILE * f,char const * file)482 fmt (FILE *f, char const *file)
483 {
484   fadvise (f, FADVISE_SEQUENTIAL);
485   tabs = false;
486   other_indent = 0;
487   next_char = get_prefix (f);
488   while (get_paragraph (f))
489     {
490       fmt_paragraph ();
491       put_paragraph (word_limit);
492     }
493 
494   int err = ferror (f) ? 0 : -1;
495   if (f == stdin)
496     clearerr (f);
497   else if (fclose (f) != 0 && err < 0)
498     err = errno;
499   if (0 <= err)
500     error (0, err, err ? "%s" : _("read error"), quotef (file));
501   return err < 0;
502 }
503 
504 /* Set the global variable 'other_indent' according to SAME_PARAGRAPH
505    and other global variables.  */
506 
507 static void
set_other_indent(bool same_paragraph)508 set_other_indent (bool same_paragraph)
509 {
510   if (split)
511     other_indent = first_indent;
512   else if (crown)
513     {
514       other_indent = (same_paragraph ? in_column : first_indent);
515     }
516   else if (tagged)
517     {
518       if (same_paragraph && in_column != first_indent)
519         {
520           other_indent = in_column;
521         }
522 
523       /* Only one line: use the secondary indent from last time if it
524          splits, or 0 if there have been no multi-line paragraphs in the
525          input so far.  But if these rules make the two indents the same,
526          pick a new secondary indent.  */
527 
528       else if (other_indent == first_indent)
529         other_indent = first_indent == 0 ? DEF_INDENT : 0;
530     }
531   else
532     {
533       other_indent = first_indent;
534     }
535 }
536 
537 /* Read a paragraph from input file F.  A paragraph consists of a
538    maximal number of non-blank (excluding any prefix) lines subject to:
539    * In split mode, a paragraph is a single non-blank line.
540    * In crown mode, the second and subsequent lines must have the
541    same indentation, but possibly different from the indent of the
542    first line.
543    * Tagged mode is similar, but the first and second lines must have
544    different indentations.
545    * Otherwise, all lines of a paragraph must have the same indent.
546    If a prefix is in effect, it must be present at the same indent for
547    each line in the paragraph.
548 
549    Return false if end-of-file was encountered before the start of a
550    paragraph, else true.  */
551 
552 static bool
get_paragraph(FILE * f)553 get_paragraph (FILE *f)
554 {
555   int c;
556 
557   last_line_length = 0;
558   c = next_char;
559 
560   /* Scan (and copy) blank lines, and lines not introduced by the prefix.  */
561 
562   while (c == '\n' || c == EOF
563          || next_prefix_indent < prefix_lead_space
564          || in_column < next_prefix_indent + prefix_full_length)
565     {
566       c = copy_rest (f, c);
567       if (c == EOF)
568         {
569           next_char = EOF;
570           return false;
571         }
572       putchar ('\n');
573       c = get_prefix (f);
574     }
575 
576   /* Got a suitable first line for a paragraph.  */
577 
578   prefix_indent = next_prefix_indent;
579   first_indent = in_column;
580   wptr = parabuf;
581   word_limit = word;
582   c = get_line (f, c);
583   set_other_indent (same_para (c));
584 
585   /* Read rest of paragraph (unless split is specified).  */
586 
587   if (split)
588     {
589       /* empty */
590     }
591   else if (crown)
592     {
593       if (same_para (c))
594         {
595           do
596             {			/* for each line till the end of the para */
597               c = get_line (f, c);
598             }
599           while (same_para (c) && in_column == other_indent);
600         }
601     }
602   else if (tagged)
603     {
604       if (same_para (c) && in_column != first_indent)
605         {
606           do
607             {			/* for each line till the end of the para */
608               c = get_line (f, c);
609             }
610           while (same_para (c) && in_column == other_indent);
611         }
612     }
613   else
614     {
615       while (same_para (c) && in_column == other_indent)
616         c = get_line (f, c);
617     }
618 
619   (word_limit - 1)->period = (word_limit - 1)->final = true;
620   next_char = c;
621   return true;
622 }
623 
624 /* Copy to the output a line that failed to match the prefix, or that
625    was blank after the prefix.  In the former case, C is the character
626    that failed to match the prefix.  In the latter, C is \n or EOF.
627    Return the character (\n or EOF) ending the line.  */
628 
629 static int
copy_rest(FILE * f,int c)630 copy_rest (FILE *f, int c)
631 {
632   char const *s;
633 
634   out_column = 0;
635   if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
636     {
637       put_space (next_prefix_indent);
638       for (s = prefix; out_column != in_column && *s; out_column++)
639         putchar (*s++);
640       if (c != EOF && c != '\n')
641         put_space (in_column - out_column);
642       if (c == EOF && in_column >= next_prefix_indent + prefix_length)
643         putchar ('\n');
644     }
645   while (c != '\n' && c != EOF)
646     {
647       putchar (c);
648       c = getc (f);
649     }
650   return c;
651 }
652 
653 /* Return true if a line whose first non-blank character after the
654    prefix (if any) is C could belong to the current paragraph,
655    otherwise false.  */
656 
657 static bool
same_para(int c)658 same_para (int c)
659 {
660   return (next_prefix_indent == prefix_indent
661           && in_column >= next_prefix_indent + prefix_full_length
662           && c != '\n' && c != EOF);
663 }
664 
665 /* Read a line from input file F, given first non-blank character C
666    after the prefix, and the following indent, and break it into words.
667    A word is a maximal non-empty string of non-white characters.  A word
668    ending in [.?!][])"']* and followed by end-of-line or at least two
669    spaces ends a sentence, as in emacs.
670 
671    Return the first non-blank character of the next line.  */
672 
673 static int
get_line(FILE * f,int c)674 get_line (FILE *f, int c)
675 {
676   int start;
677   char *end_of_parabuf;
678   WORD *end_of_word;
679 
680   end_of_parabuf = &parabuf[MAXCHARS];
681   end_of_word = &word[MAXWORDS - 2];
682 
683   do
684     {				/* for each word in a line */
685 
686       /* Scan word.  */
687 
688       word_limit->text = wptr;
689       do
690         {
691           if (wptr == end_of_parabuf)
692             {
693               set_other_indent (true);
694               flush_paragraph ();
695             }
696           *wptr++ = c;
697           c = getc (f);
698         }
699       while (c != EOF && !c_isspace (c));
700       in_column += word_limit->length = wptr - word_limit->text;
701       check_punctuation (word_limit);
702 
703       /* Scan inter-word space.  */
704 
705       start = in_column;
706       c = get_space (f, c);
707       word_limit->space = in_column - start;
708       word_limit->final = (c == EOF
709                            || (word_limit->period
710                                && (c == '\n' || word_limit->space > 1)));
711       if (c == '\n' || c == EOF || uniform)
712         word_limit->space = word_limit->final ? 2 : 1;
713       if (word_limit == end_of_word)
714         {
715           set_other_indent (true);
716           flush_paragraph ();
717         }
718       word_limit++;
719     }
720   while (c != '\n' && c != EOF);
721   return get_prefix (f);
722 }
723 
724 /* Read a prefix from input file F.  Return either first non-matching
725    character, or first non-blank character after the prefix.  */
726 
727 static int
get_prefix(FILE * f)728 get_prefix (FILE *f)
729 {
730   int c;
731 
732   in_column = 0;
733   c = get_space (f, getc (f));
734   if (prefix_length == 0)
735     next_prefix_indent = prefix_lead_space < in_column ?
736       prefix_lead_space : in_column;
737   else
738     {
739       char const *p;
740       next_prefix_indent = in_column;
741       for (p = prefix; *p != '\0'; p++)
742         {
743           unsigned char pc = *p;
744           if (c != pc)
745             return c;
746           in_column++;
747           c = getc (f);
748         }
749       c = get_space (f, c);
750     }
751   return c;
752 }
753 
754 /* Read blank characters from input file F, starting with C, and keeping
755    in_column up-to-date.  Return first non-blank character.  */
756 
757 static int
get_space(FILE * f,int c)758 get_space (FILE *f, int c)
759 {
760   while (true)
761     {
762       if (c == ' ')
763         in_column++;
764       else if (c == '\t')
765         {
766           tabs = true;
767           in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
768         }
769       else
770         return c;
771       c = getc (f);
772     }
773 }
774 
775 /* Set extra fields in word W describing any attached punctuation.  */
776 
777 static void
check_punctuation(WORD * w)778 check_punctuation (WORD *w)
779 {
780   char const *start = w->text;
781   char const *finish = start + (w->length - 1);
782   unsigned char fin = *finish;
783 
784   w->paren = isopen (*start);
785   w->punct = !! ispunct (fin);
786   while (start < finish && isclose (*finish))
787     finish--;
788   w->period = isperiod (*finish);
789 }
790 
791 /* Flush part of the paragraph to make room.  This function is called on
792    hitting the limit on the number of words or characters.  */
793 
794 static void
flush_paragraph(void)795 flush_paragraph (void)
796 {
797   WORD *split_point;
798   WORD *w;
799   int shift;
800   COST best_break;
801 
802   /* In the special case where it's all one word, just flush it.  */
803 
804   if (word_limit == word)
805     {
806       fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
807       wptr = parabuf;
808       return;
809     }
810 
811   /* Otherwise:
812      - format what you have so far as a paragraph,
813      - find a low-cost line break near the end,
814      - output to there,
815      - make that the start of the paragraph.  */
816 
817   fmt_paragraph ();
818 
819   /* Choose a good split point.  */
820 
821   split_point = word_limit;
822   best_break = MAXCOST;
823   for (w = word->next_break; w != word_limit; w = w->next_break)
824     {
825       if (w->best_cost - w->next_break->best_cost < best_break)
826         {
827           split_point = w;
828           best_break = w->best_cost - w->next_break->best_cost;
829         }
830       if (best_break <= MAXCOST - LINE_CREDIT)
831         best_break += LINE_CREDIT;
832     }
833   put_paragraph (split_point);
834 
835   /* Copy text of words down to start of parabuf -- we use memmove because
836      the source and target may overlap.  */
837 
838   memmove (parabuf, split_point->text, wptr - split_point->text);
839   shift = split_point->text - parabuf;
840   wptr -= shift;
841 
842   /* Adjust text pointers.  */
843 
844   for (w = split_point; w <= word_limit; w++)
845     w->text -= shift;
846 
847   /* Copy words from split_point down to word -- we use memmove because
848      the source and target may overlap.  */
849 
850   memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
851   word_limit -= split_point - word;
852 }
853 
854 /* Compute the optimal formatting for the whole paragraph by computing
855    and remembering the optimal formatting for each suffix from the empty
856    one to the whole paragraph.  */
857 
858 static void
fmt_paragraph(void)859 fmt_paragraph (void)
860 {
861   WORD *start, *w;
862   int len;
863   COST wcost, best;
864   int saved_length;
865 
866   word_limit->best_cost = 0;
867   saved_length = word_limit->length;
868   word_limit->length = max_width;	/* sentinel */
869 
870   for (start = word_limit - 1; start >= word; start--)
871     {
872       best = MAXCOST;
873       len = start == word ? first_indent : other_indent;
874 
875       /* At least one word, however long, in the line.  */
876 
877       w = start;
878       len += w->length;
879       do
880         {
881           w++;
882 
883           /* Consider breaking before w.  */
884 
885           wcost = line_cost (w, len) + w->best_cost;
886           if (start == word && last_line_length > 0)
887             wcost += RAGGED_COST (len - last_line_length);
888           if (wcost < best)
889             {
890               best = wcost;
891               start->next_break = w;
892               start->line_length = len;
893             }
894 
895           /* This is a kludge to keep us from computing 'len' as the
896              sum of the sentinel length and some non-zero number.
897              Since the sentinel w->length may be INT_MAX, adding
898              to that would give a negative result.  */
899           if (w == word_limit)
900             break;
901 
902           len += (w - 1)->space + w->length;	/* w > start >= word */
903         }
904       while (len < max_width);
905       start->best_cost = best + base_cost (start);
906     }
907 
908   word_limit->length = saved_length;
909 }
910 
911 /* Work around <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109628>.  */
912 #if 13 <= __GNUC__
913 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
914 #endif
915 
916 /* Return the constant component of the cost of breaking before the
917    word THIS.  */
918 
919 static COST
base_cost(WORD * this)920 base_cost (WORD *this)
921 {
922   COST cost;
923 
924   cost = LINE_COST;
925 
926   if (this > word)
927     {
928       if ((this - 1)->period)
929         {
930           if ((this - 1)->final)
931             cost -= SENTENCE_BONUS;
932           else
933             cost += NOBREAK_COST;
934         }
935       else if ((this - 1)->punct)
936         cost -= PUNCT_BONUS;
937       else if (this > word + 1 && (this - 2)->final)
938         cost += WIDOW_COST ((this - 1)->length);
939     }
940 
941   if (this->paren)
942     cost -= PAREN_BONUS;
943   else if (this->final)
944     cost += ORPHAN_COST (this->length);
945 
946   return cost;
947 }
948 
949 /* Return the component of the cost of breaking before word NEXT that
950    depends on LEN, the length of the line beginning there.  */
951 
952 static COST
line_cost(WORD * next,int len)953 line_cost (WORD *next, int len)
954 {
955   int n;
956   COST cost;
957 
958   if (next == word_limit)
959     return 0;
960   n = goal_width - len;
961   cost = SHORT_COST (n);
962   if (next->next_break != word_limit)
963     {
964       n = len - next->line_length;
965       cost += RAGGED_COST (n);
966     }
967   return cost;
968 }
969 
970 /* Output to stdout a paragraph from word up to (but not including)
971    FINISH, which must be in the next_break chain from word.  */
972 
973 static void
put_paragraph(WORD * finish)974 put_paragraph (WORD *finish)
975 {
976   WORD *w;
977 
978   put_line (word, first_indent);
979   for (w = word->next_break; w != finish; w = w->next_break)
980     put_line (w, other_indent);
981 }
982 
983 /* Output to stdout the line beginning with word W, beginning in column
984    INDENT, including the prefix (if any).  */
985 
986 static void
put_line(WORD * w,int indent)987 put_line (WORD *w, int indent)
988 {
989   WORD *endline;
990 
991   out_column = 0;
992   put_space (prefix_indent);
993   fputs (prefix, stdout);
994   out_column += prefix_length;
995   put_space (indent - out_column);
996 
997   endline = w->next_break - 1;
998   for (; w != endline; w++)
999     {
1000       put_word (w);
1001       put_space (w->space);
1002     }
1003   put_word (w);
1004   last_line_length = out_column;
1005   putchar ('\n');
1006 }
1007 
1008 /* Output to stdout the word W.  */
1009 
1010 static void
put_word(WORD * w)1011 put_word (WORD *w)
1012 {
1013   char const *s;
1014   int n;
1015 
1016   s = w->text;
1017   for (n = w->length; n != 0; n--)
1018     putchar (*s++);
1019   out_column += w->length;
1020 }
1021 
1022 /* Output to stdout SPACE spaces, or equivalent tabs.  */
1023 
1024 static void
put_space(int space)1025 put_space (int space)
1026 {
1027   int space_target, tab_target;
1028 
1029   space_target = out_column + space;
1030   if (tabs)
1031     {
1032       tab_target = space_target / TABWIDTH * TABWIDTH;
1033       if (out_column + 1 < tab_target)
1034         while (out_column < tab_target)
1035           {
1036             putchar ('\t');
1037             out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
1038           }
1039     }
1040   while (out_column < space_target)
1041     {
1042       putchar (' ');
1043       out_column++;
1044     }
1045 }
1046