1 /* tsort - topological sort.
2 Copyright (C) 1998-2023 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
18
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
22
23 #include <config.h>
24
25 #include <sys/types.h>
26
27 #include "system.h"
28 #include "assure.h"
29 #include "long-options.h"
30 #include "fadvise.h"
31 #include "readtokens.h"
32 #include "stdio--.h"
33 #include "quote.h"
34
35 /* The official name of this program (e.g., no 'g' prefix). */
36 #define PROGRAM_NAME "tsort"
37
38 #define AUTHORS proper_name ("Mark Kettenis")
39
40 /* Token delimiters when reading from a file. */
41 #define DELIM " \t\n"
42
43 /* Members of the list of successors. */
44 struct successor
45 {
46 struct item *suc;
47 struct successor *next;
48 };
49
50 /* Each string is held in memory as the head of a list of successors. */
51 struct item
52 {
53 char const *str;
54 struct item *left, *right;
55 signed char balance; /* -1, 0, or +1 */
56 bool printed;
57 size_t count;
58 struct item *qlink;
59 struct successor *top;
60 };
61
62 /* The head of the sorted list. */
63 static struct item *head = nullptr;
64
65 /* The tail of the list of 'zeros', strings that have no predecessors. */
66 static struct item *zeros = nullptr;
67
68 /* Used for loop detection. */
69 static struct item *loop = nullptr;
70
71 /* The number of strings to sort. */
72 static size_t n_strings = 0;
73
74 void
usage(int status)75 usage (int status)
76 {
77 if (status != EXIT_SUCCESS)
78 emit_try_help ();
79 else
80 {
81 printf (_("\
82 Usage: %s [OPTION] [FILE]\n\
83 Write totally ordered list consistent with the partial ordering in FILE.\n\
84 "), program_name);
85
86 emit_stdin_note ();
87
88 fputs (_("\
89 \n\
90 "), stdout);
91 fputs (HELP_OPTION_DESCRIPTION, stdout);
92 fputs (VERSION_OPTION_DESCRIPTION, stdout);
93 emit_ancillary_info (PROGRAM_NAME);
94 }
95
96 exit (status);
97 }
98
99 /* Create a new item/node for STR. */
100 static struct item *
new_item(char const * str)101 new_item (char const *str)
102 {
103 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
104 struct item *k = xzalloc (sizeof *k);
105 if (str)
106 k->str = xstrdup (str);
107 return k;
108 }
109
110 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
111 *ROOT is null. Insert a node/item for STR if not found. Return
112 the node/item found/created for STR.
113
114 This is done according to Algorithm A (Balanced tree search and
115 insertion) in Donald E. Knuth, The Art of Computer Programming,
116 Volume 3/Searching and Sorting, pages 455--457. */
117
118 static struct item *
search_item(struct item * root,char const * str)119 search_item (struct item *root, char const *str)
120 {
121 struct item *p, *q, *r, *s, *t;
122 int a;
123
124 /* Make sure the tree is not empty, since that is what the algorithm
125 below expects. */
126 if (root->right == nullptr)
127 return (root->right = new_item (str));
128
129 /* A1. Initialize. */
130 t = root;
131 s = p = root->right;
132
133 while (true)
134 {
135 /* A2. Compare. */
136 a = strcmp (str, p->str);
137 if (a == 0)
138 return p;
139
140 /* A3 & A4. Move left & right. */
141 if (a < 0)
142 q = p->left;
143 else
144 q = p->right;
145
146 if (q == nullptr)
147 {
148 /* A5. Insert. */
149 q = new_item (str);
150
151 /* A3 & A4. (continued). */
152 if (a < 0)
153 p->left = q;
154 else
155 p->right = q;
156
157 /* A6. Adjust balance factors. */
158 a = strcmp (str, s->str);
159 if (a < 0)
160 {
161 r = p = s->left;
162 a = -1;
163 }
164 else
165 {
166 affirm (0 < a);
167 r = p = s->right;
168 a = 1;
169 }
170
171 while (p != q)
172 {
173 int cmp = strcmp (str, p->str);
174 if (cmp < 0)
175 {
176 p->balance = -1;
177 p = p->left;
178 }
179 else
180 {
181 affirm (0 < cmp);
182 p->balance = 1;
183 p = p->right;
184 }
185 }
186
187 /* A7. Balancing act. */
188 if (s->balance == 0 || s->balance == -a)
189 {
190 s->balance += a;
191 return q;
192 }
193
194 if (r->balance == a)
195 {
196 /* A8. Single Rotation. */
197 p = r;
198 if (a < 0)
199 {
200 s->left = r->right;
201 r->right = s;
202 }
203 else
204 {
205 s->right = r->left;
206 r->left = s;
207 }
208 s->balance = r->balance = 0;
209 }
210 else
211 {
212 /* A9. Double rotation. */
213 if (a < 0)
214 {
215 p = r->right;
216 r->right = p->left;
217 p->left = r;
218 s->left = p->right;
219 p->right = s;
220 }
221 else
222 {
223 p = r->left;
224 r->left = p->right;
225 p->right = r;
226 s->right = p->left;
227 p->left = s;
228 }
229
230 s->balance = 0;
231 r->balance = 0;
232 if (p->balance == a)
233 s->balance = -a;
234 else if (p->balance == -a)
235 r->balance = a;
236 p->balance = 0;
237 }
238
239 /* A10. Finishing touch. */
240 if (s == t->right)
241 t->right = p;
242 else
243 t->left = p;
244
245 return q;
246 }
247
248 /* A3 & A4. (continued). */
249 if (q->balance)
250 {
251 t = p;
252 s = q;
253 }
254
255 p = q;
256 }
257
258 /* NOTREACHED */
259 }
260
261 /* Record the fact that J precedes K. */
262
263 static void
record_relation(struct item * j,struct item * k)264 record_relation (struct item *j, struct item *k)
265 {
266 struct successor *p;
267
268 if (!STREQ (j->str, k->str))
269 {
270 k->count++;
271 p = xmalloc (sizeof *p);
272 p->suc = k;
273 p->next = j->top;
274 j->top = p;
275 }
276 }
277
278 static bool
count_items(MAYBE_UNUSED struct item * unused)279 count_items (MAYBE_UNUSED struct item *unused)
280 {
281 n_strings++;
282 return false;
283 }
284
285 static bool
scan_zeros(struct item * k)286 scan_zeros (struct item *k)
287 {
288 /* Ignore strings that have already been printed. */
289 if (k->count == 0 && !k->printed)
290 {
291 if (head == nullptr)
292 head = k;
293 else
294 zeros->qlink = k;
295
296 zeros = k;
297 }
298
299 return false;
300 }
301
302 /* Try to detect the loop. If we have detected that K is part of a
303 loop, print the loop on standard error, remove a relation to break
304 the loop, and return true.
305
306 The loop detection strategy is as follows: Realize that what we're
307 dealing with is essentially a directed graph. If we find an item
308 that is part of a graph that contains a cycle we traverse the graph
309 in backwards direction. In general there is no unique way to do
310 this, but that is no problem. If we encounter an item that we have
311 encountered before, we know that we've found a cycle. All we have
312 to do now is retrace our steps, printing out the items until we
313 encounter that item again. (This is not necessarily the item that
314 we started from originally.) Since the order in which the items
315 are stored in the tree is not related to the specified partial
316 ordering, we may need to walk the tree several times before the
317 loop has completely been constructed. If the loop was found, the
318 global variable LOOP will be null. */
319
320 static bool
detect_loop(struct item * k)321 detect_loop (struct item *k)
322 {
323 if (k->count > 0)
324 {
325 /* K does not have to be part of a cycle. It is however part of
326 a graph that contains a cycle. */
327
328 if (loop == nullptr)
329 /* Start traversing the graph at K. */
330 loop = k;
331 else
332 {
333 struct successor **p = &k->top;
334
335 while (*p)
336 {
337 if ((*p)->suc == loop)
338 {
339 if (k->qlink)
340 {
341 /* We have found a loop. Retrace our steps. */
342 while (loop)
343 {
344 struct item *tmp = loop->qlink;
345
346 error (0, 0, "%s", (loop->str));
347
348 /* Until we encounter K again. */
349 if (loop == k)
350 {
351 /* Remove relation. */
352 struct successor *s = *p;
353 s->suc->count--;
354 *p = s->next;
355 IF_LINT (free (s));
356 break;
357 }
358
359 /* Tidy things up since we might have to
360 detect another loop. */
361 loop->qlink = nullptr;
362 loop = tmp;
363 }
364
365 while (loop)
366 {
367 struct item *tmp = loop->qlink;
368
369 loop->qlink = nullptr;
370 loop = tmp;
371 }
372
373 /* Since we have found the loop, stop walking
374 the tree. */
375 return true;
376 }
377 else
378 {
379 k->qlink = loop;
380 loop = k;
381 break;
382 }
383 }
384
385 p = &(*p)->next;
386 }
387 }
388 }
389
390 return false;
391 }
392
393 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
394 Stop when ACTION returns true. */
395
396 static bool
recurse_tree(struct item * root,bool (* action)(struct item *))397 recurse_tree (struct item *root, bool (*action) (struct item *))
398 {
399 if (root->left == nullptr && root->right == nullptr)
400 return (*action) (root);
401 else
402 {
403 if (root->left != nullptr)
404 if (recurse_tree (root->left, action))
405 return true;
406 if ((*action) (root))
407 return true;
408 if (root->right != nullptr)
409 if (recurse_tree (root->right, action))
410 return true;
411 }
412
413 return false;
414 }
415
416 /* Walk the tree specified by the head ROOT, calling ACTION for
417 each node. */
418
419 static void
walk_tree(struct item * root,bool (* action)(struct item *))420 walk_tree (struct item *root, bool (*action) (struct item *))
421 {
422 if (root->right)
423 recurse_tree (root->right, action);
424 }
425
426 /* Do a topological sort on FILE. Exit with appropriate exit status. */
427
428 static _Noreturn void
tsort(char const * file)429 tsort (char const *file)
430 {
431 bool ok = true;
432 struct item *j = nullptr;
433 struct item *k = nullptr;
434 token_buffer tokenbuffer;
435 bool is_stdin = STREQ (file, "-");
436
437 /* Initialize the head of the tree holding the strings we're sorting. */
438 struct item *root = new_item (nullptr);
439
440 if (!is_stdin && ! freopen (file, "r", stdin))
441 error (EXIT_FAILURE, errno, "%s", quotef (file));
442
443 fadvise (stdin, FADVISE_SEQUENTIAL);
444
445 init_tokenbuffer (&tokenbuffer);
446
447 while (true)
448 {
449 /* T2. Next Relation. */
450 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
451 if (len == (size_t) -1)
452 {
453 if (ferror (stdin))
454 error (EXIT_FAILURE, errno, _("%s: read error"), quotef (file));
455 break;
456 }
457
458 affirm (len != 0);
459
460 k = search_item (root, tokenbuffer.buffer);
461 if (j)
462 {
463 /* T3. Record the relation. */
464 record_relation (j, k);
465 k = nullptr;
466 }
467
468 j = k;
469 }
470
471 if (k != nullptr)
472 error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
473 quotef (file));
474
475 /* T1. Initialize (N <- n). */
476 walk_tree (root, count_items);
477
478 while (n_strings > 0)
479 {
480 /* T4. Scan for zeros. */
481 walk_tree (root, scan_zeros);
482
483 while (head)
484 {
485 struct successor *p = head->top;
486
487 /* T5. Output front of queue. */
488 puts (head->str);
489 head->printed = true;
490 n_strings--;
491
492 /* T6. Erase relations. */
493 while (p)
494 {
495 p->suc->count--;
496 if (p->suc->count == 0)
497 {
498 zeros->qlink = p->suc;
499 zeros = p->suc;
500 }
501
502 p = p->next;
503 }
504
505 /* T7. Remove from queue. */
506 head = head->qlink;
507 }
508
509 /* T8. End of process. */
510 if (n_strings > 0)
511 {
512 /* The input contains a loop. */
513 error (0, 0, _("%s: input contains a loop:"), quotef (file));
514 ok = false;
515
516 /* Print the loop and remove a relation to break it. */
517 do
518 walk_tree (root, detect_loop);
519 while (loop);
520 }
521 }
522
523 if (fclose (stdin) != 0)
524 error (EXIT_FAILURE, errno, "%s",
525 is_stdin ? _("standard input") : quotef (file));
526
527 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
528 }
529
530 int
main(int argc,char ** argv)531 main (int argc, char **argv)
532 {
533 initialize_main (&argc, &argv);
534 set_program_name (argv[0]);
535 setlocale (LC_ALL, "");
536 bindtextdomain (PACKAGE, LOCALEDIR);
537 textdomain (PACKAGE);
538
539 atexit (close_stdout);
540
541 parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME,
542 Version, true, usage, AUTHORS,
543 (char const *) nullptr);
544
545 if (1 < argc - optind)
546 {
547 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
548 usage (EXIT_FAILURE);
549 }
550
551 tsort (optind == argc ? "-" : argv[optind]);
552 }
553