[prev in list] [next in list] [prev in thread] [next in thread] 

List:       busybox
Subject:    Re: [PATCH] tsort: new applet
From:       David Leonard <d+busybox () adaptive-enterprises ! com>
Date:       2022-02-20 23:58:55
Message-ID: 788o3p8p-r677-sn2n-3p33-n33so48o97rs () nqncgvir-ragrecevfrf ! pbz
[Download RAW message or body]

Thanks. Patch fixed, attached.

By the way, is this the right default for config?
> +//config:      default n

David

On Sun, 20 Feb 2022, tito wrote:

> some minor fixes inline.
...
> +	/* binry search for name */
>
> Typo binry -> binary
...
> +#ifndef TINY
>
> 			bb_error_msg("cycle at %s", G.nodes[i]->name);
>
> +			fprintf(stderr, "tsort: cycle at %s\n", G.nodes[i]->name);
> +#endif
["tsort2.patch" (text/x-diff)]

From 354deec608184a9d82ad117328eff4b3c60f300d Mon Sep 17 00:00:00 2001
From: David Leonard <d+busybox@adaptive-enterprises.com>
Date: Sun, 20 Feb 2022 14:29:45 +1000
Subject: [PATCH] tsort: new applet

function                                         old     new   delta
packed_usage                                   34414   34433     +19
applet_main                                     3192    3200      +8
applet_names                                    2747    2753      +6
tsort_main                                               737    +737
---
 coreutils/tsort.c          | 196 +++++++++++++++++++++++++++++++++++++
 docs/posix_conformance.txt |   2 +-
 testsuite/tsort.tests      | 110 +++++++++++++++++++++
 3 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 coreutils/tsort.c
 create mode 100755 testsuite/tsort.tests

diff --git a/coreutils/tsort.c b/coreutils/tsort.c
new file mode 100644
index 000000000..a75b4a54c
--- /dev/null
+++ b/coreutils/tsort.c
@@ -0,0 +1,196 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * tsort implementation for busybox
+ *
+ * public domain -- David Leonard, 2022
+ */
+//config:config TSORT
+//config:	bool "tsort (0.7 kb)"
+//config:	default n
+//config:	help
+//config:	tsort performs a topological sort.
+
+//applet:IF_TSORT(APPLET(tsort, BB_DIR_USR_BIN, BB_SUID_DROP))
+
+//kbuild:lib-$(CONFIG_TSORT) += tsort.o
+
+/* BB_AUDIT SUSv3 compliant */
+/* http://www.opengroup.org/onlinepubs/007904975/utilities/tsort.html */
+
+//usage:#define tsort_trivial_usage
+//usage:       "[FILE]"
+//usage:#define tsort_full_usage "\n\n"
+//usage:       "Topological sort\n"
+//usage:#define tsort_example_usage
+//usage:       "$ echo -e \"a b\\nb c\" | tsort\n"
+//usage:       "a\n"
+//usage:       "b\n"
+//usage:       "c\n"
+
+#include "libbb.h"
+#include "common_bufsiz.h"
+
+struct node {
+	unsigned int in_count;
+	unsigned int out_count;
+	struct node **out;
+	char name[];
+};
+
+struct globals {
+	struct node **nodes;
+	unsigned int nodes_len;
+};
+#define G (*(struct globals*)bb_common_bufsiz1)
+#define INIT_G() do { \
+        setup_common_bufsiz(); \
+        BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
+        G.nodes = NULL; \
+        G.nodes_len = 0; \
+} while (0)
+
+static struct node *
+get_node(const char *name)
+{
+	struct node *n;
+	unsigned int a = 0;
+	unsigned int b = G.nodes_len;
+
+	/* Binary search for name */
+	while (a != b) {
+		unsigned int m = (a + b) / 2;
+		int cmp = strcmp(name, G.nodes[m]->name);
+		if (cmp == 0) {
+			return G.nodes[m]; /* found */
+		} else if (cmp < 0) {
+			b = m;
+		} else {
+			a = m + 1;
+		}
+	}
+
+	/* Allocate new node */
+	n = xmalloc(sizeof *n + strlen(name) + 1);
+	n->in_count = 0;
+	n->out_count = 0;
+	n->out = NULL;
+	strcpy(n->name, name);
+
+	/* Insert to maintain sort */
+	G.nodes = xrealloc(G.nodes, (G.nodes_len + 1) * sizeof *G.nodes);
+	memmove(&G.nodes[a + 1], &G.nodes[a],
+		(G.nodes_len - a) * sizeof *G.nodes);
+	G.nodes[a] = n;
+	G.nodes_len++;
+	return n;
+}
+
+static void
+add_edge(struct node *a, struct node *b)
+{
+	a->out = xrealloc(a->out, (a->out_count + 1) * sizeof *a->out);
+	a->out[a->out_count++] = b;
+	b->in_count++;
+}
+
+int tsort_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
+int tsort_main(int argc UNUSED_PARAM, char **argv)
+{
+	const char *input_filename = "stdin";
+	FILE *in = stdin;
+	char *line;
+	size_t linesz;
+	ssize_t len;
+	struct node *a;
+	int cycles;
+
+	INIT_G();
+
+	if (argv[1]) {
+		if (argv[2]) {
+			bb_show_usage();
+		}
+		input_filename = argv[1];
+		if (input_filename[0] != '-' || input_filename[1]) {
+			close(STDIN_FILENO); /* == 0 */
+			xopen(input_filename, O_RDONLY); /* fd will be 0 */
+		}
+	}
+
+	/* Read in words separated by <blank>s */
+	a = NULL;
+	line = NULL;
+	linesz = 0;
+	while ((len = getline(&line, &linesz, in)) != -1) {
+		char *s = line;
+		while (*(s = skip_whitespace(s))) {
+			struct node *b;
+			char *word;
+
+			word = s;
+			s = skip_non_whitespace(s);
+			if (*s) {
+				*s++ = '\0';
+			}
+
+			/* Create nodes and edges for each word pair */
+			b = get_node(word);
+			if (!a) {
+				a = b;
+			} else {
+				if (a != b) {
+					add_edge(a, b);
+				}
+				a = NULL;
+			}
+		}
+	}
+	die_if_ferror(in, input_filename);
+	if (a) {
+		bb_simple_error_msg_and_die("odd input");
+	}
+	free(line);
+
+	/*
+	 * Kahn's algorithm:
+	 *   - find a node that has no incoming edges, print and remove it
+	 *   - repeat until the graph is empty
+	 *   - if any nodes are left, they form cycles.
+	 */
+	cycles = 0;
+	while (G.nodes_len) {
+		struct node *n;
+		unsigned int i;
+
+		/* Search for first node with no incoming edges */
+		for (i = 0; i < G.nodes_len; i++) {
+			if (!G.nodes[i]->in_count) {
+				break;
+			}
+		}
+		if (i == G.nodes_len) {
+			/* Must be a cycle; arbitraily break it at node 0 */
+			cycles++;
+			i = 0;
+#ifndef TINY
+			bb_error_msg("tsort: cycle at %s\n", G.nodes[i]->name);
+#endif
+		}
+
+		/* Remove the node (need no longer maintain sort) */
+		n = G.nodes[i];
+		G.nodes[i] = G.nodes[--G.nodes_len];
+
+		/* And remove its outgoing edges */
+		for (i = 0; i < n->out_count; i++) {
+			n->out[i]->in_count--;
+		}
+		free(n->out);
+
+		puts(n->name);
+		free(n);
+	}
+	free(G.nodes);
+
+	fflush_stdout_and_exit(cycles ? 1 : 0);
+}
diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt
index 5e107d74d..8edbe3e15 100644
--- a/docs/posix_conformance.txt
+++ b/docs/posix_conformance.txt
@@ -24,7 +24,7 @@ POSIX Tools not supported:
   gencat, getconf, iconv, join, link, locale, localedef, lp, m4,
   mailx, newgrp, nl, pathchk, pax, pr, qalter, qdel, qhold, qmove,
   qmsg, qrerun, qrls, qselect, qsig, qstat, qsub, tabs, talk, tput,
-  tsort, unlink, uucp, uustat, uux
+  unlink, uucp, uustat, uux
 
 POSIX Tools not supported (DEVELOPMENT):
   admin, cflow, ctags, cxref, delta, fort77, get, lex, make, nm, prs, rmdel,
diff --git a/testsuite/tsort.tests b/testsuite/tsort.tests
new file mode 100755
index 000000000..c6fe78272
--- /dev/null
+++ b/testsuite/tsort.tests
@@ -0,0 +1,110 @@
+#!/bin/sh
+
+# SUSv3 compliant sort tests.
+# Public Domain, David Leonard 2022
+
+. ./testing.sh
+
+#       name cmd expected ./input stdin
+testing "" "tsort"       "a\n" "" "a a\n"
+testing "" "tsort -"     "a\n" "" "a a\n"
+testing "" "tsort input" "a\n" "a a\n" ""
+testing "tsort input (w/o eol)" "tsort input" "a\n" "a a" ""
+testing "" "tsort /dev/null" "" "" ""
+
+testing "tsort empty" tsort "" "" ""
+testing "tsort blank" tsort "" "" "\n"
+testing "tsort blanks" tsort "" "" "\n\n \t\n "
+
+# simple inputs having exactly one solution
+testing "tsort 1-edge" tsort "a\nb\n" "" "a b\n"
+testing "tsort 2-edge" tsort "a\nb\nc\n" "" "a b b c\n"
+
+
+# The following test helper accommodates future variable output because, as
+# tsort is allowed to emit any total ordering that satisfies its input,
+# should the implementation changes, these tests will remain valid.
+#
+# The idea is to verify that:
+# - each input word is present EXACTLY ONCE in tsort's output
+# - for each input pair 'a b', the occurrence of 'a' APPEARS BEFORE 'b'
+# - the exit code is 0
+
+tsort_test () {
+	fail=
+	name="$1"; shift
+	args="$*"
+	if [ $VERBOSE ]; then
+		echo "============"
+		echo "echo \"$args\" | tsort >actual"
+	fi
+	echo "$args" | tsort >actual
+	ec=$?
+	if [ $ec -ne 0 ]; then
+		fail "tsort exit $ec, expected 0"
+	fi
+	while [ $# -ne 0 ]; do
+		a=$1; shift
+		b=$1; shift
+		aline=$(grep -nxF "$a" <actual | cut -d: -f1)
+		bline=$(grep -nxF "$b" <actual | cut -d: -f1)
+		case $aline in
+			"") fail "word $a missing from output ($args)";;
+			*" "*) fail "word $a duplicated ($args)";;
+		esac
+		case $bline in
+			"") fail "word $b missing from output ($args)";;
+			*" "*) fail "word $b duplicated ($args)";;
+		esac
+		if [ $aline -gt $bline ]; then
+			fail "$a appears after $b ($args)"
+		fi
+	done
+	if [ $fail ] && [ $VERBOSE ]; then
+		echo "exit $ec, actual:"
+		cat actual
+	fi
+	rm actual
+	report "$name"
+}
+
+# Test that erroneous input causes an unsuccessful exit code
+# we don't test the output error message
+tsort_test_err () {
+	fail=
+	name="$1"; shift
+	echo "$*" | tsort >/dev/null 2>/dev/null
+	ec=$?
+	if [ $ec -eq 0 ]; then
+		fail "$name: unexpected exit 0 ($*)"
+	fi
+	report "$name"
+}
+
+fail () {
+	[ $VERBOSE ] && echo "ERROR: $*"
+	fail=1
+}
+
+report () {
+	if [ $fail ]; then
+		FAILCOUNT=$(($FAILCOUNT + 1))
+		echo "FAIL: $*"
+	else
+		echo "PASS: $*"
+	fi
+}
+
+tsort_test "tsort empty2"
+tsort_test "tsort singleton"    a a
+tsort_test "tsort simple"       a b b c
+tsort_test "tsort 2singleton"   a a b b
+tsort_test "tsort medium"       a b a b b c
+tsort_test "tsort std.example"  a b c c d e g g f g e f h h
+tsort_test "tsort prefixes"     a aa aa aaa aaaa aaaaa a aaaaa
+
+tsort_test_err "tsort odd"      a
+tsort_test_err "tsort odd2"     a b c
+tsort_test_err "tsort cycle"    a b b a
+
+exit $FAILCOUNT
-- 
2.30.2



_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic