1 /* tsort - topological sort.
2    Copyright (C) 1998-2023 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>.  */
18 
19 /* The topological sort is done according to Algorithm T (Topological
20    sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21    1/Fundamental Algorithms, page 262.  */
22 
23 #include <config.h>
24 
25 #include <sys/types.h>
26 
27 #include "system.h"
28 #include "assure.h"
29 #include "long-options.h"
30 #include "fadvise.h"
31 #include "readtokens.h"
32 #include "stdio--.h"
33 #include "quote.h"
34 
35 /* The official name of this program (e.g., no 'g' prefix).  */
36 #define PROGRAM_NAME "tsort"
37 
38 #define AUTHORS proper_name ("Mark Kettenis")
39 
40 /* Token delimiters when reading from a file.  */
41 #define DELIM " \t\n"
42 
43 /* Members of the list of successors.  */
44 struct successor
45 {
46   struct item *suc;
47   struct successor *next;
48 };
49 
50 /* Each string is held in memory as the head of a list of successors.  */
51 struct item
52 {
53   char const *str;
54   struct item *left, *right;
55   signed char balance; /* -1, 0, or +1 */
56   bool printed;
57   size_t count;
58   struct item *qlink;
59   struct successor *top;
60 };
61 
62 /* The head of the sorted list.  */
63 static struct item *head = nullptr;
64 
65 /* The tail of the list of 'zeros', strings that have no predecessors.  */
66 static struct item *zeros = nullptr;
67 
68 /* Used for loop detection.  */
69 static struct item *loop = nullptr;
70 
71 /* The number of strings to sort.  */
72 static size_t n_strings = 0;
73 
74 void
usage(int status)75 usage (int status)
76 {
77   if (status != EXIT_SUCCESS)
78     emit_try_help ();
79   else
80     {
81       printf (_("\
82 Usage: %s [OPTION] [FILE]\n\
83 Write totally ordered list consistent with the partial ordering in FILE.\n\
84 "), program_name);
85 
86       emit_stdin_note ();
87 
88       fputs (_("\
89 \n\
90 "), stdout);
91       fputs (HELP_OPTION_DESCRIPTION, stdout);
92       fputs (VERSION_OPTION_DESCRIPTION, stdout);
93       emit_ancillary_info (PROGRAM_NAME);
94     }
95 
96   exit (status);
97 }
98 
99 /* Create a new item/node for STR.  */
100 static struct item *
new_item(char const * str)101 new_item (char const *str)
102 {
103   /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^).  */
104   struct item *k = xzalloc (sizeof *k);
105   if (str)
106     k->str = xstrdup (str);
107   return k;
108 }
109 
110 /* Search binary tree rooted at *ROOT for STR.  Allocate a new tree if
111    *ROOT is null.  Insert a node/item for STR if not found.  Return
112    the node/item found/created for STR.
113 
114    This is done according to Algorithm A (Balanced tree search and
115    insertion) in Donald E. Knuth, The Art of Computer Programming,
116    Volume 3/Searching and Sorting, pages 455--457.  */
117 
118 static struct item *
search_item(struct item * root,char const * str)119 search_item (struct item *root, char const *str)
120 {
121   struct item *p, *q, *r, *s, *t;
122   int a;
123 
124   /* Make sure the tree is not empty, since that is what the algorithm
125      below expects.  */
126   if (root->right == nullptr)
127     return (root->right = new_item (str));
128 
129   /* A1. Initialize.  */
130   t = root;
131   s = p = root->right;
132 
133   while (true)
134     {
135       /* A2. Compare.  */
136       a = strcmp (str, p->str);
137       if (a == 0)
138         return p;
139 
140       /* A3 & A4.  Move left & right.  */
141       if (a < 0)
142         q = p->left;
143       else
144         q = p->right;
145 
146       if (q == nullptr)
147         {
148           /* A5. Insert.  */
149           q = new_item (str);
150 
151           /* A3 & A4.  (continued).  */
152           if (a < 0)
153             p->left = q;
154           else
155             p->right = q;
156 
157           /* A6. Adjust balance factors.  */
158           a = strcmp (str, s->str);
159           if (a < 0)
160             {
161               r = p = s->left;
162               a = -1;
163             }
164           else
165             {
166               affirm (0 < a);
167               r = p = s->right;
168               a = 1;
169             }
170 
171           while (p != q)
172             {
173               int cmp = strcmp (str, p->str);
174               if (cmp < 0)
175                 {
176                   p->balance = -1;
177                   p = p->left;
178                 }
179               else
180                 {
181                   affirm (0 < cmp);
182                   p->balance = 1;
183                   p = p->right;
184                 }
185             }
186 
187           /* A7. Balancing act.  */
188           if (s->balance == 0 || s->balance == -a)
189             {
190               s->balance += a;
191               return q;
192             }
193 
194           if (r->balance == a)
195             {
196               /* A8. Single Rotation.  */
197               p = r;
198               if (a < 0)
199                 {
200                   s->left = r->right;
201                   r->right = s;
202                 }
203               else
204                 {
205                   s->right = r->left;
206                   r->left = s;
207                 }
208               s->balance = r->balance = 0;
209             }
210           else
211             {
212               /* A9. Double rotation.  */
213               if (a < 0)
214                 {
215                   p = r->right;
216                   r->right = p->left;
217                   p->left = r;
218                   s->left = p->right;
219                   p->right = s;
220                 }
221               else
222                 {
223                   p = r->left;
224                   r->left = p->right;
225                   p->right = r;
226                   s->right = p->left;
227                   p->left = s;
228                 }
229 
230               s->balance = 0;
231               r->balance = 0;
232               if (p->balance == a)
233                 s->balance = -a;
234               else if (p->balance == -a)
235                 r->balance = a;
236               p->balance = 0;
237             }
238 
239           /* A10. Finishing touch.  */
240           if (s == t->right)
241             t->right = p;
242           else
243             t->left = p;
244 
245           return q;
246         }
247 
248       /* A3 & A4.  (continued).  */
249       if (q->balance)
250         {
251           t = p;
252           s = q;
253         }
254 
255       p = q;
256     }
257 
258   /* NOTREACHED */
259 }
260 
261 /* Record the fact that J precedes K.  */
262 
263 static void
record_relation(struct item * j,struct item * k)264 record_relation (struct item *j, struct item *k)
265 {
266   struct successor *p;
267 
268   if (!STREQ (j->str, k->str))
269     {
270       k->count++;
271       p = xmalloc (sizeof *p);
272       p->suc = k;
273       p->next = j->top;
274       j->top = p;
275     }
276 }
277 
278 static bool
count_items(MAYBE_UNUSED struct item * unused)279 count_items (MAYBE_UNUSED struct item *unused)
280 {
281   n_strings++;
282   return false;
283 }
284 
285 static bool
scan_zeros(struct item * k)286 scan_zeros (struct item *k)
287 {
288   /* Ignore strings that have already been printed.  */
289   if (k->count == 0 && !k->printed)
290     {
291       if (head == nullptr)
292         head = k;
293       else
294         zeros->qlink = k;
295 
296       zeros = k;
297     }
298 
299   return false;
300 }
301 
302 /* Try to detect the loop.  If we have detected that K is part of a
303    loop, print the loop on standard error, remove a relation to break
304    the loop, and return true.
305 
306    The loop detection strategy is as follows: Realize that what we're
307    dealing with is essentially a directed graph.  If we find an item
308    that is part of a graph that contains a cycle we traverse the graph
309    in backwards direction.  In general there is no unique way to do
310    this, but that is no problem.  If we encounter an item that we have
311    encountered before, we know that we've found a cycle.  All we have
312    to do now is retrace our steps, printing out the items until we
313    encounter that item again.  (This is not necessarily the item that
314    we started from originally.)  Since the order in which the items
315    are stored in the tree is not related to the specified partial
316    ordering, we may need to walk the tree several times before the
317    loop has completely been constructed.  If the loop was found, the
318    global variable LOOP will be null.  */
319 
320 static bool
detect_loop(struct item * k)321 detect_loop (struct item *k)
322 {
323   if (k->count > 0)
324     {
325       /* K does not have to be part of a cycle.  It is however part of
326          a graph that contains a cycle.  */
327 
328       if (loop == nullptr)
329         /* Start traversing the graph at K.  */
330         loop = k;
331       else
332         {
333           struct successor **p = &k->top;
334 
335           while (*p)
336             {
337               if ((*p)->suc == loop)
338                 {
339                   if (k->qlink)
340                     {
341                       /* We have found a loop.  Retrace our steps.  */
342                       while (loop)
343                         {
344                           struct item *tmp = loop->qlink;
345 
346                           error (0, 0, "%s", (loop->str));
347 
348                           /* Until we encounter K again.  */
349                           if (loop == k)
350                             {
351                               /* Remove relation.  */
352                               struct successor *s = *p;
353                               s->suc->count--;
354                               *p = s->next;
355                               IF_LINT (free (s));
356                               break;
357                             }
358 
359                           /* Tidy things up since we might have to
360                              detect another loop.  */
361                           loop->qlink = nullptr;
362                           loop = tmp;
363                         }
364 
365                       while (loop)
366                         {
367                           struct item *tmp = loop->qlink;
368 
369                           loop->qlink = nullptr;
370                           loop = tmp;
371                         }
372 
373                       /* Since we have found the loop, stop walking
374                          the tree.  */
375                       return true;
376                     }
377                   else
378                     {
379                       k->qlink = loop;
380                       loop = k;
381                       break;
382                     }
383                 }
384 
385               p = &(*p)->next;
386             }
387         }
388     }
389 
390   return false;
391 }
392 
393 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
394    Stop when ACTION returns true.  */
395 
396 static bool
recurse_tree(struct item * root,bool (* action)(struct item *))397 recurse_tree (struct item *root, bool (*action) (struct item *))
398 {
399   if (root->left == nullptr && root->right == nullptr)
400     return (*action) (root);
401   else
402     {
403       if (root->left != nullptr)
404         if (recurse_tree (root->left, action))
405           return true;
406       if ((*action) (root))
407         return true;
408       if (root->right != nullptr)
409         if (recurse_tree (root->right, action))
410           return true;
411     }
412 
413   return false;
414 }
415 
416 /* Walk the tree specified by the head ROOT, calling ACTION for
417    each node.  */
418 
419 static void
walk_tree(struct item * root,bool (* action)(struct item *))420 walk_tree (struct item *root, bool (*action) (struct item *))
421 {
422   if (root->right)
423     recurse_tree (root->right, action);
424 }
425 
426 /* Do a topological sort on FILE.  Exit with appropriate exit status.  */
427 
428 static _Noreturn void
tsort(char const * file)429 tsort (char const *file)
430 {
431   bool ok = true;
432   struct item *j = nullptr;
433   struct item *k = nullptr;
434   token_buffer tokenbuffer;
435   bool is_stdin = STREQ (file, "-");
436 
437   /* Initialize the head of the tree holding the strings we're sorting.  */
438   struct item *root = new_item (nullptr);
439 
440   if (!is_stdin && ! freopen (file, "r", stdin))
441     error (EXIT_FAILURE, errno, "%s", quotef (file));
442 
443   fadvise (stdin, FADVISE_SEQUENTIAL);
444 
445   init_tokenbuffer (&tokenbuffer);
446 
447   while (true)
448     {
449       /* T2. Next Relation.  */
450       size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
451       if (len == (size_t) -1)
452         {
453           if (ferror (stdin))
454             error (EXIT_FAILURE, errno, _("%s: read error"), quotef (file));
455           break;
456         }
457 
458       affirm (len != 0);
459 
460       k = search_item (root, tokenbuffer.buffer);
461       if (j)
462         {
463           /* T3. Record the relation.  */
464           record_relation (j, k);
465           k = nullptr;
466         }
467 
468       j = k;
469     }
470 
471   if (k != nullptr)
472     error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
473            quotef (file));
474 
475   /* T1. Initialize (N <- n).  */
476   walk_tree (root, count_items);
477 
478   while (n_strings > 0)
479     {
480       /* T4. Scan for zeros.  */
481       walk_tree (root, scan_zeros);
482 
483       while (head)
484         {
485           struct successor *p = head->top;
486 
487           /* T5. Output front of queue.  */
488           puts (head->str);
489           head->printed = true;
490           n_strings--;
491 
492           /* T6. Erase relations.  */
493           while (p)
494             {
495               p->suc->count--;
496               if (p->suc->count == 0)
497                 {
498                   zeros->qlink = p->suc;
499                   zeros = p->suc;
500                 }
501 
502               p = p->next;
503             }
504 
505           /* T7. Remove from queue.  */
506           head = head->qlink;
507         }
508 
509       /* T8.  End of process.  */
510       if (n_strings > 0)
511         {
512           /* The input contains a loop.  */
513           error (0, 0, _("%s: input contains a loop:"), quotef (file));
514           ok = false;
515 
516           /* Print the loop and remove a relation to break it.  */
517           do
518             walk_tree (root, detect_loop);
519           while (loop);
520         }
521     }
522 
523   if (fclose (stdin) != 0)
524     error (EXIT_FAILURE, errno, "%s",
525            is_stdin ? _("standard input") : quotef (file));
526 
527   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
528 }
529 
530 int
main(int argc,char ** argv)531 main (int argc, char **argv)
532 {
533   initialize_main (&argc, &argv);
534   set_program_name (argv[0]);
535   setlocale (LC_ALL, "");
536   bindtextdomain (PACKAGE, LOCALEDIR);
537   textdomain (PACKAGE);
538 
539   atexit (close_stdout);
540 
541   parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME,
542                                    Version, true, usage, AUTHORS,
543                                    (char const *) nullptr);
544 
545   if (1 < argc - optind)
546     {
547       error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
548       usage (EXIT_FAILURE);
549     }
550 
551   tsort (optind == argc ? "-" : argv[optind]);
552 }
553