[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    Re: cut: Fix segmentation fault
From:       Tobias Stoeckmann <tobias () stoeckmann ! org>
Date:       2018-03-28 7:23:30
Message-ID: 20180328072330.GA39757 () pepper ! home ! stoeckmann ! org
[Download RAW message or body]

Hi,

On Wed, Mar 28, 2018 at 12:53:47AM +0200, Ingo Schwarze wrote:
> Ouch, you are right.  But then, the code feels counter-intuitive
> and the error message confusing.

I agree. Talking about a zero out of nothing seems weird, even though
I have learned yesterday that "a=''; echo $((a))" is "0". ;)

As this special zero case is handled by strtonum already, how about we
just stick with "invalid list value" which is true for non-digits as
well?

	if (*p != '\0' || !stop || !start)
 		errx(1, "[-bcf] list: illegal list value");

I've extended the regression tests to cover the overflow part as well
as handling invalid list values.


Tobias

Index: usr.bin/cut/cut.c
===================================================================
RCS file: /cvs/src/usr.bin/cut/cut.c,v
retrieving revision 1.23
diff -u -p -u -p -r1.23 cut.c
--- usr.bin/cut/cut.c	2 Dec 2015 00:56:46 -0000	1.23
+++ usr.bin/cut/cut.c	28 Mar 2018 07:21:28 -0000
@@ -154,11 +154,32 @@ int autostart, autostop, maxval;
 
 char positions[_POSIX2_LINE_MAX + 1];
 
+int
+read_number(char **p)
+{
+	size_t pos;
+	int dash, n;
+	const char *errstr;
+	char *q;
+
+	q = *p + strcspn(*p, "-");
+	dash = *q == '-';
+	*q = '\0';
+	n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr);
+	if (errstr != NULL)
+		errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr,
+		    _POSIX2_LINE_MAX);
+	if (dash)
+		*q = '-';
+	*p = q;
+
+	return n;
+}
+
 void
 get_list(char *list)
 {
 	int setautostart, start, stop;
-	char *pos;
 	char *p;
 
 	/*
@@ -176,30 +197,27 @@ get_list(char *list)
 			setautostart = 1;
 		}
 		if (isdigit((unsigned char)*p)) {
-			start = stop = strtol(p, &p, 10);
+			start = stop = read_number(&p);
 			if (setautostart && start > autostart)
 				autostart = start;
 		}
 		if (*p == '-') {
-			if (isdigit((unsigned char)p[1]))
-				stop = strtol(p + 1, &p, 10);
+			if (isdigit((unsigned char)p[1])) {
+				++p;
+				stop = read_number(&p);
+			}
 			if (*p == '-') {
 				++p;
 				if (!autostop || autostop > stop)
 					autostop = stop;
 			}
 		}
-		if (*p)
+		if (*p != '\0' || !stop || !start)
 			errx(1, "[-bcf] list: illegal list value");
-		if (!stop || !start)
-			errx(1, "[-bcf] list: values may not include zero");
-		if (stop > _POSIX2_LINE_MAX)
-			errx(1, "[-bcf] list: %d too large (max %d)",
-			    stop, _POSIX2_LINE_MAX);
 		if (maxval < stop)
 			maxval = stop;
-		for (pos = positions + start; start++ <= stop; *pos++ = 1)
-			;
+		if (start <= stop)
+			memset(positions + start, 1, stop - start + 1);
 	}
 
 	/* overlapping ranges */
Index: regress/usr.bin/cut/cut.sh
===================================================================
RCS file: /cvs/src/regress/usr.bin/cut/cut.sh,v
retrieving revision 1.1
diff -u -p -u -p -r1.1 cut.sh
--- regress/usr.bin/cut/cut.sh	7 Oct 2016 17:22:12 -0000	1.1
+++ regress/usr.bin/cut/cut.sh	28 Mar 2018 07:21:28 -0000
@@ -16,15 +16,26 @@
 
 unset LC_ALL
 
+: ${CUT=cut}
+
 test_cut()
 {
-	args=`echo "$1"`
-	stdin=$2
-	expected=`echo "$3"`
+	expected_retval=$1
+	args=`echo "$2"`
+	stdin=$3
+	expected=`echo "$4"`
 	export LC_CTYPE=en_US.UTF-8
-	result=`echo -n "$stdin" | cut $args`
+	result=`echo -n "$stdin" | $CUT $args 2>/dev/null`
+	retval=$?
+	if [ "$retval" -ne "${expected_retval}" ]; then
+		echo "echo -n \"$stdin\" | $CUT $args"
+		echo -n "$stdin" | hexdump -C
+		echo "expected return value: \"${expected_retval}\""
+		echo "actual return value: \"$retval\""
+		exit 1;
+	fi
 	if [ "$result" != "${expected}" ]; then
-		echo "echo -n \"$stdin\" | cut $args"
+		echo "echo -n \"$stdin\" | $CUT $args"
 		echo -n "$stdin" | hexdump -C
 		echo "expected: \"$expected\""
 		echo -n "$expected" | hexdump -C
@@ -33,13 +44,20 @@ test_cut()
 		exit 1;
 	fi
 
-	if [ -n "$4" ]; then
-		expected=`echo "$4"`
+	if [ -n "$5" ]; then
+		expected=`echo "$5"`
 	fi
 	export LC_CTYPE=C
-	result=`echo -n "$stdin" | cut $args`
+	result=`echo -n "$stdin" | $CUT $args 2>/dev/null`
+	if [ "$retval" -ne "${expected_retval}" ]; then
+		echo "echo -n \"$stdin\" | $CUT $args"
+		echo -n "$stdin" | hexdump -C
+		echo "expected return value: \"${expected_retval}\""
+		echo "actual return value: \"$retval\""
+		exit 1;
+	fi
 	if [ "$result" != "${expected}" ]; then
-		echo "[C] echo -n \"$stdin\" | cut $args"
+		echo "[C] echo -n \"$stdin\" | $CUT $args"
 		echo -n "$stdin" | hexdump -C
 		echo "expected: \"$expected\""
 		echo -n "$expected" | hexdump -C
@@ -50,41 +68,49 @@ test_cut()
 }
 
 # single byte characters
-test_cut "-b 4,2" "abcde" "bd"
-test_cut "-b 2-4" "abcde" "bcd"
-test_cut "-b 4-,-2" "abcde" "abde"
-test_cut "-nb 4,2" "abcde" "bd"
-test_cut "-nb 2-4" "abcde" "bcd"
-test_cut "-nb 4-,-2" "abcde" "abde"
-test_cut "-c 4,2" "abcde" "bd"
-test_cut "-c 2-4" "abcde" "bcd"
-test_cut "-c 4-,-2" "abcde" "abde"
+test_cut 0 "-b 4,2" "abcde" "bd"
+test_cut 0 "-b 2-4" "abcde" "bcd"
+test_cut 0 "-b 4-,-2" "abcde" "abde"
+test_cut 0 "-nb 4,2" "abcde" "bd"
+test_cut 0 "-nb 2-4" "abcde" "bcd"
+test_cut 0 "-nb 4-,-2" "abcde" "abde"
+test_cut 0 "-c 4,2" "abcde" "bd"
+test_cut 0 "-c 2-4" "abcde" "bcd"
+test_cut 0 "-c 4-,-2" "abcde" "abde"
 
 # multibyte characters
-test_cut "-b 2-3" "ax\0314\0200b" "x\0314"
-test_cut "-b 1,3" "ax\0314\0200b" "a\0314"
-test_cut "-nb 2-3" "ax\0314\0200b" "x" "x\0314"
-test_cut "-nb 1,3" "ax\0314\0200b" "a" "a\0314"
-test_cut "-nb 2,4" "ax\0314\0200b" "x\0314\0200" "x\0200"
-test_cut "-c 2-3" "ax\0314\0200b" "x\0314\0200" "x\0314"
-test_cut "-c 1,3" "ax\0314\0200b" "a\0314\0200" "a\0314"
+test_cut 0 "-b 2-3" "ax\0314\0200b" "x\0314"
+test_cut 0 "-b 1,3" "ax\0314\0200b" "a\0314"
+test_cut 0 "-nb 2-3" "ax\0314\0200b" "x" "x\0314"
+test_cut 0 "-nb 1,3" "ax\0314\0200b" "a" "a\0314"
+test_cut 0 "-nb 2,4" "ax\0314\0200b" "x\0314\0200" "x\0200"
+test_cut 0 "-c 2-3" "ax\0314\0200b" "x\0314\0200" "x\0314"
+test_cut 0 "-c 1,3" "ax\0314\0200b" "a\0314\0200" "a\0314"
 
 # double width multibyte characters
-test_cut "-b -3" "a\0354\0277\0277b" "a\0354\0277"
-test_cut "-nb 4-" "a\0354\0277\0277b" "\0354\0277\0277b" "\0277b"
-test_cut "-c 2" "a\0354\0277\0277b" "\0354\0277\0277" "\0354"
+test_cut 0 "-b -3" "a\0354\0277\0277b" "a\0354\0277"
+test_cut 0 "-nb 4-" "a\0354\0277\0277b" "\0354\0277\0277b" "\0277b"
+test_cut 0 "-c 2" "a\0354\0277\0277b" "\0354\0277\0277" "\0354"
 
 # invalid bytes
-test_cut "-b -2" "a\0377\0277b" "a\0377"
-test_cut "-b 3-" "a\0377\0277b" "\0277b"
-test_cut "-nb 2-5" "\0303\0251\0377\0277\0303\0251" "\0303\0251\0377\0277" \
+test_cut 0 "-b -2" "a\0377\0277b" "a\0377"
+test_cut 0 "-b 3-" "a\0377\0277b" "\0277b"
+test_cut 0 "-nb 2-5" "\0303\0251\0377\0277\0303\0251" "\0303\0251\0377\0277" \
 	"\0251\0377\0277\0303"
-test_cut "-c 4,1" "\0303\0251\0377\0277\0303\0250" "\0303\0251\0303\0250" \
+test_cut 0 "-c 4,1" "\0303\0251\0377\0277\0303\0250" "\0303\0251\0303\0250" \
 	"\0303\0277"
 
 # multibyte delimiter
-test_cut "-d \0302\0267 -f 2" "a\0302\0267b\0302\0267c" "b" "\0267b"
-test_cut "-d \0302\0267 -f 3,2" "a\0302\0267b\0302\0267c" "b\0302\0267c" \
+test_cut 0 "-d \0302\0267 -f 2" "a\0302\0267b\0302\0267c" "b" "\0267b"
+test_cut 0 "-d \0302\0267 -f 3,2" "a\0302\0267b\0302\0267c" "b\0302\0267c" \
 	"\0267b\0302\0267c"
+
+# invalid list values
+test_cut 1 "-b 2,-,4"
+test_cut 1 "-c 2,--,4"
+test_cut 1 "-f 2,---,4"
+test_cut 1 "-b 0-1"
+test_cut 1 "-c 2147483648"
+test_cut 1 "-f not,a-number"
 
 exit 0

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic