[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    Re: dd(1): overhaul operand parsing
From:       Scott Cheloha <scottcheloha () gmail ! com>
Date:       2018-03-31 17:48:42
Message-ID: 20180331174842.l2xsmtgipclu7vvz () rascal ! austin ! ibm ! com
[Download RAW message or body]

Updated diff with changes from tobias@:

This patch now removes (u_int) casts from input/output buffer
allocation in dd.c, which was incorrect beforehand anyway, but
with the other changes here was manifesting as a segfault for
combinations of cbs and ibs/obs that overflowed u_int.

--
Scott Cheloha

Index: bin/dd/args.c
===================================================================
RCS file: /cvs/src/bin/dd/args.c,v
retrieving revision 1.29
diff -u -p -r1.29 args.c
--- bin/dd/args.c	3 Jan 2018 19:12:20 -0000	1.29
+++ bin/dd/args.c	31 Mar 2018 17:47:04 -0000
@@ -39,8 +39,10 @@
 
 #include <err.h>
 #include <errno.h>
+#include <ctype.h>
 #include <limits.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -62,6 +64,9 @@ static void	f_skip(char *);
 static void	f_status(char *);
 static size_t	get_bsz(char *);
 static off_t	get_off(char *);
+static unsigned long long
+		get_expr(const char *, unsigned long long, unsigned long long);
+static long long get_multiplier(const char *);
 
 static const struct arg {
 	const char *name;
@@ -94,6 +99,7 @@ jcl(char **argv)
 	char *arg;
 
 	in.dbsz = out.dbsz = 512;
+	files_cnt = 1;
 
 	while ((oper = *++argv) != NULL) {
 		if ((oper = strdup(oper)) == NULL)
@@ -138,8 +144,6 @@ jcl(char **argv)
 	if (ddflags & (C_BLOCK|C_UNBLOCK)) {
 		if (!(ddflags & C_CBS))
 			errx(1, "record operations require cbs");
-		if (cbsz == 0)
-			errx(1, "cbs cannot be zero");
 		cfunc = ddflags & C_BLOCK ? block : unblock;
 	} else if (ddflags & C_CBS) {
 		if (ddflags & (C_ASCII|C_EBCDIC)) {
@@ -152,23 +156,14 @@ jcl(char **argv)
 			}
 		} else
 			errx(1, "cbs meaningless if not doing record operations");
-		if (cbsz == 0)
-			errx(1, "cbs cannot be zero");
 	} else
 		cfunc = def;
 
-	if (in.dbsz == 0 || out.dbsz == 0)
-		errx(1, "buffer sizes cannot be zero");
-
-	/*
-	 * Read and write take size_t's as arguments.  Lseek, however,
-	 * takes an off_t.
-	 */
-	if (cbsz > SSIZE_MAX || in.dbsz > SSIZE_MAX || out.dbsz > SSIZE_MAX)
-		errx(1, "buffer sizes cannot be greater than %zd",
-		    (ssize_t)SSIZE_MAX);
-	if (in.offset > LLONG_MAX / in.dbsz || out.offset > LLONG_MAX / out.dbsz)
-		errx(1, "seek offsets cannot be larger than %lld", LLONG_MAX);
+	/* Ensure the seek won't overflow. */
+	if (in.offset != 0 && in.dbsz > LLONG_MAX / in.offset)
+		errx(1, "total skip is too large");
+	if (out.offset != 0 && out.dbsz > LLONG_MAX / out.offset)
+		errx(1, "total seek is too large");
 }
 
 static int
@@ -196,15 +191,14 @@ static void
 f_count(char *arg)
 {
 
-	if ((cpy_cnt = get_bsz(arg)) == 0)
-		cpy_cnt = (size_t)-1;
+	cpy_cnt = get_expr(arg, 0, ULLONG_MAX);
 }
 
 static void
 f_files(char *arg)
 {
 
-	files_cnt = get_bsz(arg);
+	files_cnt = get_expr(arg, 0, ULLONG_MAX);
 }
 
 static void
@@ -307,165 +301,108 @@ f_conv(char *arg)
 	}
 }
 
-/*
- * Convert an expression of the following forms to a size_t
- *	1) A positive decimal number, optionally followed by
- *		b - multiply by 512.
- *		k, m or g - multiply by 1024 each.
- *		w - multiply by sizeof int
- *	2) Two or more of the above, separated by x
- *	   (or * for backwards compatibility), specifying
- *	   the product of the indicated values.
- */
 static size_t
-get_bsz(char *val)
+get_bsz(char *arg)
 {
-	size_t num, t;
-	char *expr;
+	/* read(2) and write(2) can't handle more than SSIZE_MAX bytes. */
+	return get_expr(arg, 1, SSIZE_MAX);
+}
 
-	if (strchr(val, '-'))
-		errx(1, "%s: illegal numeric value", oper);
+static off_t
+get_off(char *arg)
+{
+	return get_expr(arg, 0, LLONG_MAX);
+}
 
-	errno = 0;
-	num = strtoul(val, &expr, 0);
-	if (num == ULONG_MAX && errno == ERANGE)	/* Overflow. */
-		err(1, "%s", oper);
-	if (expr == val)			/* No digits. */
-		errx(1, "%s: illegal numeric value", oper);
+/*
+ * An expression is composed of one or more terms separated by exes ('x').
+ * Terms separated by asterisks ('*') are supported for backwards
+ * compatibility.  The value of an expression is the product of its terms.
+ *
+ * A term is composed of a non-negative decimal integer and an optional
+ * suffix of a single character.  Suffixes are valued as follows:
+ *
+ *	b   - 512		("block")
+ *	G|g - 1073741824	("gigabyte")
+ *	K|k - 1024		("kilobyte")
+ *	M|m - 1048576		("megabyte")
+ *	w   - sizeof(int)	("word")
+ *
+ * The value of a term without a suffix is that of its integer.  The value
+ * of a term with a suffix is the product of its integer and its suffix.
+ */
+static unsigned long long
+get_expr(const char *arg, unsigned long long min, unsigned long long max)
+{
+	unsigned long long number, value;
+	long long multiplier;
+	char *expr, *term, *suffix;
 
-	switch(*expr) {
-	case 'b':
-		t = num;
-		num *= 512;
-		if (t > num)
-			goto erange;
-		++expr;
-		break;
-	case 'g':
-	case 'G':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		/* fallthrough */
-	case 'm':
-	case 'M':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		/* fallthrough */
-	case 'k':
-	case 'K':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		++expr;
-		break;
-	case 'w':
-		t = num;
-		num *= sizeof(int);
-		if (t > num)
-			goto erange;
-		++expr;
-		break;
-	}
+	value = 1;
+
+	expr = strdup(arg);
+	if (expr == NULL)
+		err(1, NULL);
 
-	switch(*expr) {
-		case '\0':
-			break;
-		case '*':			/* Backward compatible. */
-		case 'x':
-			t = num;
-			num *= get_bsz(expr + 1);
-			if (t > num)
-				goto erange;
-			break;
-		default:
-			errx(1, "%s: illegal numeric value", oper);
+	while ((term = strsep(&expr, "*x")) != NULL) {
+		/*
+		 * strtoull(3) ignores leading whitespace and quietly
+		 * underflows terms prefixed with a '-', so we have to check.
+		 */
+		if (!isdigit((unsigned char)term[0]))
+			errx(1, "%s is invalid: %s", oper, arg);
+		errno = 0;
+		number = strtoull(term, &suffix, 10);
+		if (term[0] == '\0' || suffix == term)
+			errx(1, "%s is invalid: %s", oper, arg);
+		if (number < min)
+			errx(1, "%s is too small: %s", oper, arg);
+		if (number > max || (number == ULLONG_MAX && errno == ERANGE))
+			errx(1, "%s is too large: %s", oper, arg);
+		multiplier = get_multiplier(suffix);
+		if (multiplier == -1)
+			errx(1, "%s is invalid: %s", oper, arg);
+		if (number > max / multiplier ||
+		    (value > 0 && number * multiplier > max / value))
+			errx(1, "%s is too large: %s", oper, arg);
+		value *= number * multiplier;
 	}
-	return (num);
-erange:
-	errc(1, ERANGE, "%s", oper);
+	free(expr);
+	return (value);
 }
 
-/*
- * Convert an expression of the following forms to an off_t
- *	1) A positive decimal number, optionally followed by
- *		b - multiply by 512.
- *		k, m or g - multiply by 1024 each.
- *		w - multiply by sizeof int
- *	2) Two or more of the above, separated by x
- *	   (or * for backwards compatibility), specifying
- *	   the product of the indicated values.
- */
-static off_t
-get_off(char *val)
+static long long
+get_multiplier(const char *str)
 {
-	off_t num, t;
-	char *expr;
-
-	errno = 0;
-	num = strtoll(val, &expr, 0);
-	if (num == LLONG_MAX && errno == ERANGE)	/* Overflow. */
-		err(1, "%s", oper);
-	if (expr == val)			/* No digits. */
-		errx(1, "%s: illegal numeric value", oper);
+	long long value;
+	int unknown = 0;
 
-	switch(*expr) {
+	switch(str[0]) {
+	case '\0':
+		value = 1;
+		break;
 	case 'b':
-		t = num;
-		num *= 512;
-		if (t > num)
-			goto erange;
-		++expr;
+		value = 512;
 		break;
-	case 'g':
 	case 'G':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		/* fallthrough */
-	case 'm':
-	case 'M':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		/* fallthrough */
-	case 'k':
+	case 'g':
+		value = 1073741824LL;
+		break;
 	case 'K':
-		t = num;
-		num *= 1024;
-		if (t > num)
-			goto erange;
-		++expr;
+	case 'k':
+		value = 1024;
+		break;
+	case 'M':
+	case 'm':
+		value = 1048576;
 		break;
 	case 'w':
-		t = num;
-		num *= sizeof(int);
-		if (t > num)
-			goto erange;
-		++expr;
+		value = sizeof(int);
 		break;
+	default:
+		unknown = 1;
 	}
-
-	switch(*expr) {
-		case '\0':
-			break;
-		case '*':			/* Backward compatible. */
-		case 'x':
-			t = num;
-			num *= get_off(expr + 1);
-			if (t > num)
-				goto erange;
-			break;
-		default:
-			errx(1, "%s: illegal numeric value", oper);
-	}
-	return (num);
-erange:
-	errc(1, ERANGE, "%s", oper);
+	if (unknown || (str[0] != '\0' && str[1] != '\0'))
+		value = -1;
+	return (value);
 }
Index: bin/dd/dd.c
===================================================================
RCS file: /cvs/src/bin/dd/dd.c,v
retrieving revision 1.24
diff -u -p -r1.24 dd.c
--- bin/dd/dd.c	13 Aug 2017 02:06:42 -0000	1.24
+++ bin/dd/dd.c	31 Mar 2018 17:47:04 -0000
@@ -62,10 +62,10 @@ static void setup(void);
 IO	in, out;		/* input/output state */
 STAT	st;			/* statistics */
 void	(*cfunc)(void);		/* conversion function */
-size_t	cpy_cnt;		/* # of blocks to copy */
+unsigned long long cpy_cnt;	/* # of blocks to copy */
 u_int	ddflags;		/* conversion options */
 size_t	cbsz;			/* conversion block size */
-size_t	files_cnt = 1;		/* # of files to copy */
+unsigned long long files_cnt;	/* # of files to copy */
 const	u_char	*ctab;		/* conversion table */
 
 int
@@ -79,7 +79,7 @@ main(int argc, char *argv[])
 
 	atexit(summary);
 
-	if (cpy_cnt != (size_t)-1) {
+	if (!(ddflags & C_COUNT) || cpy_cnt > 0) {
 		while (files_cnt--)
 			dd_in();
 	}
@@ -102,7 +102,7 @@ setup(void)
 
 	getfdtype(&in);
 
-	if (files_cnt > 1 && !(in.flags & ISTAPE))
+	if ((ddflags & C_FILES) && !(in.flags & ISTAPE))
 		errx(1, "files is not supported for non-tape devices");
 
 	if (out.name == NULL) {
@@ -136,10 +136,14 @@ setup(void)
 		if ((in.db = malloc(out.dbsz + in.dbsz - 1)) == NULL)
 			err(1, "input buffer");
 		out.db = in.db;
-	} else if ((in.db =
-	    malloc((u_int)(MAXIMUM(in.dbsz, cbsz) + cbsz))) == NULL ||
-	    (out.db = malloc((u_int)(out.dbsz + cbsz))) == NULL)
-		err(1, "output buffer");
+	} else {
+		in.db = malloc(MAXIMUM(in.dbsz, cbsz) + cbsz);
+		if (in.db == NULL)
+			err(1, "input buffer");
+		out.db = malloc(out.dbsz + cbsz);
+		if (out.db == NULL)
+			err(1, "output buffer");
+	}
 	in.dbp = in.db;
 	out.dbp = out.db;
 
@@ -232,7 +236,7 @@ dd_in(void)
 	ssize_t n;
 
 	for (;;) {
-		if (cpy_cnt && (st.in_full + st.in_part) >= cpy_cnt)
+		if ((ddflags & C_COUNT) && st.in_full + st.in_part >= cpy_cnt)
 			return;
 
 		/*
Index: bin/dd/extern.h
===================================================================
RCS file: /cvs/src/bin/dd/extern.h,v
retrieving revision 1.9
diff -u -p -r1.9 extern.h
--- bin/dd/extern.h	27 Mar 2014 15:32:13 -0000	1.9
+++ bin/dd/extern.h	31 Mar 2018 17:47:04 -0000
@@ -53,10 +53,10 @@ void unblock_close(void);
 extern IO in, out;
 extern STAT st;
 extern void (*cfunc)(void);
-extern size_t cpy_cnt;
+extern unsigned long long cpy_cnt;
 extern size_t cbsz;
 extern u_int ddflags;
-extern size_t files_cnt;
+extern unsigned long long files_cnt;
 extern const u_char *ctab;
 extern const u_char a2e_POSIX[];
 extern const u_char e2a_POSIX[];

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic