Line data Source code
1 : /* tsort - topological sort.
2 : Copyright (C) 1998-2005, 2007 Free Software Foundation, Inc.
3 :
4 : This program is free software: you can redistribute it and/or modify
5 : it under the terms of the GNU General Public License as published by
6 : the Free Software Foundation, either version 3 of the License, or
7 : (at your option) any later version.
8 :
9 : This program is distributed in the hope that it will be useful,
10 : but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : GNU General Public License for more details.
13 :
14 : You should have received a copy of the GNU General Public License
15 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
16 :
17 : /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
18 :
19 : /* The topological sort is done according to Algorithm T (Topological
20 : sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 : 1/Fundamental Algorithms, page 262. */
22 :
23 : #include <config.h>
24 :
25 : #include <stdio.h>
26 : #include <assert.h>
27 : #include <getopt.h>
28 : #include <sys/types.h>
29 :
30 : #include "system.h"
31 : #include "long-options.h"
32 : #include "error.h"
33 : #include "quote.h"
34 : #include "readtokens.h"
35 :
36 : /* The official name of this program (e.g., no `g' prefix). */
37 : #define PROGRAM_NAME "tsort"
38 :
39 : #define AUTHORS "Mark Kettenis"
40 :
41 : /* Token delimiters when reading from a file. */
42 : #define DELIM " \t\n"
43 :
44 : /* Members of the list of successors. */
45 : struct successor
46 : {
47 : struct item *suc;
48 : struct successor *next;
49 : };
50 :
51 : /* Each string is held in core as the head of a list of successors. */
52 : struct item
53 : {
54 : const char *str;
55 : struct item *left, *right;
56 : int balance; /* -1, 0, or +1 */
57 : size_t count;
58 : struct item *qlink;
59 : struct successor *top;
60 : };
61 :
62 : /* The name this program was run with. */
63 : char *program_name;
64 :
65 : /* The head of the sorted list. */
66 : static struct item *head = NULL;
67 :
68 : /* The tail of the list of `zeros', strings that have no predecessors. */
69 : static struct item *zeros = NULL;
70 :
71 : /* Used for loop detection. */
72 : static struct item *loop = NULL;
73 :
74 : /* The number of strings to sort. */
75 : static size_t n_strings = 0;
76 :
77 : void
78 26 : usage (int status)
79 : {
80 26 : if (status != EXIT_SUCCESS)
81 24 : fprintf (stderr, _("Try `%s --help' for more information.\n"),
82 : program_name);
83 : else
84 : {
85 2 : printf (_("\
86 : Usage: %s [OPTION] [FILE]\n\
87 : Write totally ordered list consistent with the partial ordering in FILE.\n\
88 : With no FILE, or when FILE is -, read standard input.\n\
89 : \n\
90 : "), program_name);
91 2 : fputs (HELP_OPTION_DESCRIPTION, stdout);
92 2 : fputs (VERSION_OPTION_DESCRIPTION, stdout);
93 2 : emit_bug_reporting_address ();
94 : }
95 :
96 26 : exit (status);
97 : }
98 :
99 : /* Create a new item/node for STR. */
100 : static struct item *
101 62 : new_item (const char *str)
102 : {
103 62 : struct item *k = xmalloc (sizeof *k);
104 :
105 62 : k->str = (str ? xstrdup (str): NULL);
106 62 : k->left = k->right = NULL;
107 62 : k->balance = 0;
108 :
109 : /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
110 62 : k->count = 0;
111 62 : k->qlink = NULL;
112 62 : k->top = NULL;
113 :
114 62 : return k;
115 : }
116 :
117 : /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
118 : *ROOT is NULL. Insert a node/item for STR if not found. Return
119 : the node/item found/created for STR.
120 :
121 : This is done according to Algorithm A (Balanced tree search and
122 : insertion) in Donald E. Knuth, The Art of Computer Programming,
123 : Volume 3/Searching and Sorting, pages 455--457. */
124 :
125 : static struct item *
126 42 : search_item (struct item *root, const char *str)
127 : {
128 : struct item *p, *q, *r, *s, *t;
129 : int a;
130 :
131 42 : assert (root);
132 :
133 : /* Make sure the tree is not empty, since that is what the algorithm
134 : below expects. */
135 42 : if (root->right == NULL)
136 22 : return (root->right = new_item (str));
137 :
138 : /* A1. Initialize. */
139 20 : t = root;
140 20 : s = p = root->right;
141 :
142 : for (;;)
143 : {
144 : /* A2. Compare. */
145 30 : a = strcmp (str, p->str);
146 25 : if (a == 0)
147 8 : return p;
148 :
149 : /* A3 & A4. Move left & right. */
150 17 : if (a < 0)
151 7 : q = p->left;
152 : else
153 10 : q = p->right;
154 :
155 17 : if (q == NULL)
156 : {
157 : /* A5. Insert. */
158 12 : q = new_item (str);
159 :
160 : /* A3 & A4. (continued). */
161 12 : if (a < 0)
162 5 : p->left = q;
163 : else
164 7 : p->right = q;
165 :
166 : /* A6. Adjust balance factors. */
167 12 : assert (!STREQ (str, s->str));
168 12 : if (strcmp (str, s->str) < 0)
169 : {
170 5 : r = p = s->left;
171 5 : a = -1;
172 : }
173 : else
174 : {
175 7 : r = p = s->right;
176 7 : a = 1;
177 : }
178 :
179 27 : while (p != q)
180 : {
181 3 : assert (!STREQ (str, p->str));
182 3 : if (strcmp (str, p->str) < 0)
183 : {
184 1 : p->balance = -1;
185 1 : p = p->left;
186 : }
187 : else
188 : {
189 2 : p->balance = 1;
190 2 : p = p->right;
191 : }
192 : }
193 :
194 : /* A7. Balancing act. */
195 12 : if (s->balance == 0 || s->balance == -a)
196 : {
197 9 : s->balance += a;
198 9 : return q;
199 : }
200 :
201 3 : if (r->balance == a)
202 : {
203 : /* A8. Single Rotation. */
204 1 : p = r;
205 1 : if (a < 0)
206 : {
207 0 : s->left = r->right;
208 0 : r->right = s;
209 : }
210 : else
211 : {
212 1 : s->right = r->left;
213 1 : r->left = s;
214 : }
215 1 : s->balance = r->balance = 0;
216 : }
217 : else
218 : {
219 : /* A9. Double rotation. */
220 2 : if (a < 0)
221 : {
222 1 : p = r->right;
223 1 : r->right = p->left;
224 1 : p->left = r;
225 1 : s->left = p->right;
226 1 : p->right = s;
227 : }
228 : else
229 : {
230 1 : p = r->left;
231 1 : r->left = p->right;
232 1 : p->right = r;
233 1 : s->right = p->left;
234 1 : p->left = s;
235 : }
236 :
237 2 : s->balance = 0;
238 2 : r->balance = 0;
239 2 : if (p->balance == a)
240 0 : s->balance = -a;
241 2 : else if (p->balance == -a)
242 0 : r->balance = a;
243 2 : p->balance = 0;
244 : }
245 :
246 : /* A10. Finishing touch. */
247 3 : if (s == t->right)
248 3 : t->right = p;
249 : else
250 0 : t->left = p;
251 :
252 3 : return q;
253 : }
254 :
255 : /* A3 & A4. (continued). */
256 5 : if (q->balance)
257 : {
258 0 : t = p;
259 0 : s = q;
260 : }
261 :
262 5 : p = q;
263 : }
264 :
265 : /* NOTREACHED */
266 : }
267 :
268 : /* Record the fact that J precedes K. */
269 :
270 : static void
271 13 : record_relation (struct item *j, struct item *k)
272 : {
273 : struct successor *p;
274 :
275 13 : if (!STREQ (j->str, k->str))
276 : {
277 9 : k->count++;
278 9 : p = xmalloc (sizeof *p);
279 9 : p->suc = k;
280 9 : p->next = j->top;
281 9 : j->top = p;
282 : }
283 13 : }
284 :
285 : static bool
286 9 : count_items (struct item *unused ATTRIBUTE_UNUSED)
287 : {
288 9 : n_strings++;
289 9 : return false;
290 : }
291 :
292 : static bool
293 9 : scan_zeros (struct item *k)
294 : {
295 : /* Ignore strings that have already been printed. */
296 9 : if (k->count == 0 && k->str)
297 : {
298 6 : if (head == NULL)
299 6 : head = k;
300 : else
301 0 : zeros->qlink = k;
302 :
303 6 : zeros = k;
304 : }
305 :
306 9 : return false;
307 : }
308 :
309 : /* Try to detect the loop. If we have detected that K is part of a
310 : loop, print the loop on standard error, remove a relation to break
311 : the loop, and return true.
312 :
313 : The loop detection strategy is as follows: Realise that what we're
314 : dealing with is essentially a directed graph. If we find an item
315 : that is part of a graph that contains a cycle we traverse the graph
316 : in backwards direction. In general there is no unique way to do
317 : this, but that is no problem. If we encounter an item that we have
318 : encountered before, we know that we've found a cycle. All we have
319 : to do now is retrace our steps, printing out the items until we
320 : encounter that item again. (This is not necessarily the item that
321 : we started from originally.) Since the order in which the items
322 : are stored in the tree is not related to the specified partial
323 : ordering, we may need to walk the tree several times before the
324 : loop has completely been constructed. If the loop was found, the
325 : global variable LOOP will be NULL. */
326 :
327 : static bool
328 0 : detect_loop (struct item *k)
329 : {
330 0 : if (k->count > 0)
331 : {
332 : /* K does not have to be part of a cycle. It is however part of
333 : a graph that contains a cycle. */
334 :
335 0 : if (loop == NULL)
336 : /* Start traversing the graph at K. */
337 0 : loop = k;
338 : else
339 : {
340 0 : struct successor **p = &k->top;
341 :
342 0 : while (*p)
343 : {
344 0 : if ((*p)->suc == loop)
345 : {
346 0 : if (k->qlink)
347 : {
348 : /* We have found a loop. Retrace our steps. */
349 0 : while (loop)
350 : {
351 0 : struct item *tmp = loop->qlink;
352 :
353 0 : fprintf (stderr, "%s: %s\n", program_name,
354 0 : loop->str);
355 :
356 : /* Until we encounter K again. */
357 0 : if (loop == k)
358 : {
359 : /* Remove relation. */
360 0 : (*p)->suc->count--;
361 0 : *p = (*p)->next;
362 0 : break;
363 : }
364 :
365 : /* Tidy things up since we might have to
366 : detect another loop. */
367 0 : loop->qlink = NULL;
368 0 : loop = tmp;
369 : }
370 :
371 0 : while (loop)
372 : {
373 0 : struct item *tmp = loop->qlink;
374 :
375 0 : loop->qlink = NULL;
376 0 : loop = tmp;
377 : }
378 :
379 : /* Since we have found the loop, stop walking
380 : the tree. */
381 0 : return true;
382 : }
383 : else
384 : {
385 0 : k->qlink = loop;
386 0 : loop = k;
387 0 : break;
388 : }
389 : }
390 :
391 0 : p = &(*p)->next;
392 : }
393 : }
394 : }
395 :
396 0 : return false;
397 : }
398 :
399 : /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
400 : Stop when ACTION returns true. */
401 :
402 : static bool
403 18 : recurse_tree (struct item *root, bool (*action) (struct item *))
404 : {
405 18 : if (root->left == NULL && root->right == NULL)
406 12 : return (*action) (root);
407 : else
408 : {
409 6 : if (root->left != NULL)
410 2 : if (recurse_tree (root->left, action))
411 0 : return true;
412 6 : if ((*action) (root))
413 0 : return true;
414 6 : if (root->right != NULL)
415 4 : if (recurse_tree (root->right, action))
416 0 : return true;
417 : }
418 :
419 6 : return false;
420 : }
421 :
422 : /* Walk the tree specified by the head ROOT, calling ACTION for
423 : each node. */
424 :
425 : static void
426 13 : walk_tree (struct item *root, bool (*action) (struct item *))
427 : {
428 13 : if (root->right)
429 12 : recurse_tree (root->right, action);
430 13 : }
431 :
432 : /* Do a topological sort on FILE. Return true if successful. */
433 :
434 : static bool
435 28 : tsort (const char *file)
436 : {
437 28 : bool ok = true;
438 : struct item *root;
439 28 : struct item *j = NULL;
440 28 : struct item *k = NULL;
441 : token_buffer tokenbuffer;
442 28 : bool is_stdin = STREQ (file, "-");
443 :
444 : /* Intialize the head of the tree will hold the strings we're sorting. */
445 28 : root = new_item (NULL);
446 :
447 28 : if (!is_stdin && ! freopen (file, "r", stdin))
448 5 : error (EXIT_FAILURE, errno, "%s", file);
449 :
450 23 : init_tokenbuffer (&tokenbuffer);
451 :
452 : while (1)
453 42 : {
454 : /* T2. Next Relation. */
455 65 : size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
456 65 : if (len == (size_t) -1)
457 23 : break;
458 :
459 42 : assert (len != 0);
460 :
461 42 : k = search_item (root, tokenbuffer.buffer);
462 42 : if (j)
463 : {
464 : /* T3. Record the relation. */
465 13 : record_relation (j, k);
466 13 : k = NULL;
467 : }
468 :
469 42 : j = k;
470 : }
471 :
472 23 : if (k != NULL)
473 16 : error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
474 : file);
475 :
476 : /* T1. Initialize (N <- n). */
477 7 : walk_tree (root, count_items);
478 :
479 20 : while (n_strings > 0)
480 : {
481 : /* T4. Scan for zeros. */
482 6 : walk_tree (root, scan_zeros);
483 :
484 21 : while (head)
485 : {
486 9 : struct successor *p = head->top;
487 :
488 : /* T5. Output front of queue. */
489 9 : puts (head->str);
490 9 : head->str = NULL; /* Avoid printing the same string twice. */
491 9 : n_strings--;
492 :
493 : /* T6. Erase relations. */
494 22 : while (p)
495 : {
496 4 : p->suc->count--;
497 4 : if (p->suc->count == 0)
498 : {
499 3 : zeros->qlink = p->suc;
500 3 : zeros = p->suc;
501 : }
502 :
503 4 : p = p->next;
504 : }
505 :
506 : /* T7. Remove from queue. */
507 9 : head = head->qlink;
508 : }
509 :
510 : /* T8. End of process. */
511 6 : if (n_strings > 0)
512 : {
513 : /* The input contains a loop. */
514 0 : error (0, 0, _("%s: input contains a loop:"), file);
515 0 : ok = false;
516 :
517 : /* Print the loop and remove a relation to break it. */
518 : do
519 0 : walk_tree (root, detect_loop);
520 0 : while (loop);
521 : }
522 : }
523 :
524 7 : if (fclose (stdin) != 0)
525 0 : error (EXIT_FAILURE, errno, "%s",
526 : is_stdin ? _("standard input") : quote (file));
527 :
528 7 : return ok;
529 : }
530 :
531 : int
532 55 : main (int argc, char **argv)
533 : {
534 : bool ok;
535 :
536 : initialize_main (&argc, &argv);
537 55 : program_name = argv[0];
538 55 : setlocale (LC_ALL, "");
539 : bindtextdomain (PACKAGE, LOCALEDIR);
540 : textdomain (PACKAGE);
541 :
542 55 : atexit (close_stdout);
543 :
544 55 : parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, VERSION,
545 : usage, AUTHORS, (char const *) NULL);
546 52 : if (getopt_long (argc, argv, "", NULL, NULL) != -1)
547 4 : usage (EXIT_FAILURE);
548 :
549 48 : if (1 < argc - optind)
550 : {
551 20 : error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
552 20 : usage (EXIT_FAILURE);
553 : }
554 :
555 28 : ok = tsort (optind == argc ? "-" : argv[optind]);
556 :
557 7 : exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
558 : }
|