[prev in list] [next in list] [prev in thread] [next in thread]
List: dash
Subject: Re: ${#a#a} bug?
From: "Vladimir N. Oleynik" <dzo () simtreas ! ru>
Date: 2021-02-10 9:31:13
Message-ID: 313e78ef-747a-2582-6cad-bdc2c9decd92 () simtreas ! ru
[Download RAW message or body]
Hello.
Vladimir N. Oleynik wrote:
> $ a=abc
> $ echo ${#a#a}
>
> dash-0.5.11.3 produce:
> 3a
> bash:
> ${#a#a}: bad substitution
Ok. I rewrote the var substitutions parser.
Its now:
* strong find bad substitution
* support indirections ${!var}
* may be good skip unsupport syntax: ${v/p/x} ${v//p/x} ${v:offs:l} ${v@trans}
${!prev*} ${!prev@} and mark as "bad substitution".
my variant of parser.c ready for parse this syntax,
but expand.c very knotty for me for realize this syntax.
* also small simplify for expand $- :)
--w
vodz
["substitute.diff" (text/x-patch)]
--- expand.c.orig 2021-02-08 23:23:54.000000000 +0400
+++ expand.c 2021-02-10 12:58:11.508816065 +0400
@@ -854,6 +854,9 @@
(flags & EXP_DISCARD);
ssize_t len = 0;
char c;
+ char str_opts[NOPTS + 1];
+ int len_for_num = max_int_length(sizeof(int));
+ char str_num[len_for_num];
if (!subtype) {
if (discard)
@@ -866,6 +869,7 @@
flags &= discard ? ~QUOTES_ESC : ~0;
sep = (flags & EXP_FULL) << CHAR_BIT;
+again:
switch (*name) {
case '$':
num = rootpid;
@@ -881,18 +885,16 @@
if (num == 0)
return -1;
numvar:
- len = cvtnum(num, flags);
- break;
+ fmtstr(p = str_num, len_for_num, "%d", num);
+ goto value;
case '-':
- p = makestrspace(NOPTS, expdest);
for (i = NOPTS - 1; i >= 0; i--) {
- if (optlist[i] && optletters[i]) {
- USTPUTC(optletters[i], p);
- len++;
- }
+ if (optlist[i] && optletters[i])
+ str_opts[len++] = optletters[i];
}
- expdest = p;
- break;
+ str_opts[len] = 0;
+ p = str_opts;
+ goto value;
case '@':
if (quoted && sep)
goto param;
@@ -945,9 +947,37 @@
goto value;
default:
p = lookupvar(name);
-value:
if (!p)
return -1;
+value:
+ if (subtype == VSINDIRECT) {
+ char *v;
+ int ok = 0, alpha = 0;
+
+ for (v = p; *v; v++) {
+ if (is_name(*v)) {
+ if (alpha < 0)
+ goto errv;
+ else
+ alpha = 1;
+ } else if (isdigit(*v)) {
+ if (!alpha) alpha = -1;
+ else if (alpha == -2) goto errv;
+ } else if (ok == 0 && is_special(*v)) {
+ alpha = -2;
+ } else {
+ errv:
+ ok = 0; break;
+ }
+ ok = 1;
+ }
+
+ if (!ok)
+ sh_error("Bad variable name for indirect");
+ name = p;
+ subtype = 0;
+ goto again;
+ }
len = strtodest(p, flags);
break;
--- parser.h.orig 2021-02-10 01:47:34.000000000 +0400
+++ parser.h 2021-02-10 13:13:59.276818541 +0400
@@ -48,20 +48,25 @@
#define CTL_LAST -120 /* last 'special' character */
/* variable substitution byte (follows CTLVAR) */
-#define VSTYPE 0x0f /* type of variable substitution */
-#define VSNUL 0x10 /* colon--treat the empty string as unset */
-
+#define VSTYPE 0x0f /* type of variable substitution */
+#define VSNUL 0x10 /* colon+VSTYPE -- treat the empty string as unset,
+ or unrealized ${var:offset[:lenght]} */
+#define VSBADSUB 0 /* bad substitunion */
/* values of VSTYPE field */
-#define VSNORMAL 0x1 /* normal variable: $var or ${var} */
-#define VSMINUS 0x2 /* ${var-text} */
-#define VSPLUS 0x3 /* ${var+text} */
-#define VSQUESTION 0x4 /* ${var?message} */
-#define VSASSIGN 0x5 /* ${var=text} */
-#define VSTRIMRIGHT 0x6 /* ${var%pattern} */
-#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */
-#define VSTRIMLEFT 0x8 /* ${var#pattern} */
-#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */
-#define VSLENGTH 0xa /* ${#var} */
+#define VSNORMAL 0x1 /* normal variable: $var or ${var} */
+#define VSMINUS 0x2 /* ${var-text} */
+#define VSPLUS 0x3 /* ${var+text} */
+#define VSQUESTION 0x4 /* ${var?message} */
+#define VSASSIGN 0x5 /* ${var=text} */
+#define VSTRIMRIGHT 0x6 /* ${var%pattern} */
+#define VSTRIMRIGHTMAX 0x7 /* ${var%%pattern} */
+#define VSTRIMLEFT 0x8 /* ${var#pattern} */
+#define VSTRIMLEFTMAX 0x9 /* ${var##pattern} */
+#define VSLENGTH 0xa /* ${#var} */
+#define VSINDIRECT 0xb /* ${!var} */
+#define VSPATERN 0xc /* ${var/pattern/string} unrealized */
+#define VSPATERNALL 0xd /* ${var//pattern/string} unrealized */
+#define VSTRANS 0xe /* ${var@transformation} unrealized */
/* values of checkkwd variable */
#define CHKALIAS 0x1
--- parser.c.orig 2021-02-10 01:47:50.000000000 +0400
+++ parser.c 2021-02-10 12:47:27.609814383 +0400
@@ -1235,8 +1235,9 @@
parsesub: {
int subtype;
int typeloc;
- char *p;
- static const char types[] = "}-+?=";
+ const char *newsyn;
+ int lenght;
+ int alpha;
c = pgetc_eatbnl();
if (
@@ -1246,6 +1247,7 @@
) {
USTPUTC('$', out);
pungetc();
+ goto parsesub_return;
} else if (c == '(') { /* $(command) or $((arith)) */
if (pgetc_eatbnl() == '(') {
PARSEARITH();
@@ -1253,113 +1255,176 @@
pungetc();
PARSEBACKQNEW();
}
- } else {
- const char *newsyn = synstack->syntax;
+ goto parsesub_return;
+ }
+
+ newsyn = synstack->syntax;
+ USTPUTC(CTLVAR, out);
+ typeloc = out - (char *)stackblock();
+ STADJUST(1, out);
- USTPUTC(CTLVAR, out);
- typeloc = out - (char *)stackblock();
- STADJUST(1, out);
+ if (c != '{') {
subtype = VSNORMAL;
- if (likely(c == '{')) {
- c = pgetc_eatbnl();
- subtype = 0;
- }
-varname:
- if (is_name(c)) {
- do {
- STPUTC(c, out);
- c = pgetc_eatbnl();
- } while (is_in_name(c));
- } else if (is_digit(c)) {
+ if(is_special(c)) {
+ /* $[!#$*-0123456789?@] */
+ STPUTC(c, out);
+ } else {
+ /* found simple $var */
do {
STPUTC(c, out);
c = pgetc_eatbnl();
- } while (!subtype && is_digit(c));
- } else if (c != '}') {
- int cc = c;
+ } while(is_in_name(c));
+ pungetc ();
+ }
+ goto eov;
+ }
+ subtype = lenght = alpha = 0;
+ c = pgetc_eatbnl();
+ if (c == '#') {
+ /* very special */
+ c = pgetc_eatbnl();
+ if (is_in_name(c) || (is_special(c) && c != '#')) {
+ /* lenght($var) */
+ subtype = VSLENGTH;
+ goto varc;
+ }
+ if (c != '#') {
+ /* $# and may be mods */
+ var_is_num_arg:
+ pungetc ();
+ c = '#';
+ goto var_spec;
+ }
+ /* ${##... */
+ c = pgetc_eatbnl();
+ if (c == '}') {
+ /* ${##} as lenght($#) */
+ subtype = VSLENGTH;
+ goto var_is_num_arg;
+ }
+ /* ${##(#)[#]*...} as (max)stripleft($#, [#]*...) */
+ pungetc ();
+ STPUTC('#', out);
+ c = '#';
+ goto var_num_sub;
+ } else if (c == '!') {
+ c = pgetc_eatbnl();
+ if ((is_special(c) && c != '@' && c != '*') || is_in_name(c)) {
+ /* ${!var} */
+ subtype = VSINDIRECT;
+ goto varc;
+ } else {
+ /* $! and may be mods */
+ pungetc ();
+ c = '!';
+ goto var_spec;
+ }
+ }
+ while (1) {
+ if (c == '}') {
+ if (!lenght)
+ subtype = VSBADSUB; /* bad: ${} */
+ if (subtype)
+ pungetc ();
+ else
+ subtype = VSNORMAL;
+ goto eov;
+ }
+ varc:
+ if (is_name(c)) {
+ if (alpha < 0)
+ subtype = VSBADSUB; /* bad: ${ [!#]? [0-9]*|specvar and _alpha* */
+ else
+ alpha = 1;
+ } else if (isdigit(c)) {
+ if (!alpha) alpha = -1; /* ${ [!#]? [0-9]* and _alpha* -> bad */
+ else if (alpha == -2) subtype = VSBADSUB; /* bad: ${ [!#]? and specvar and [0-9]* */
+ } else if(!lenght && is_special(c)) {
+ var_spec:
+ alpha = -2; /* ${ [!#]? and specvar and is_in_name* -> bad */
+ } else
+ break;
+ STPUTC(c, out);
+ lenght = 1;
+ c = pgetc_eatbnl();
+ }
+ if (!subtype) {
+ var_num_sub:
+ lenght = 0;
+ switch (c) {
+ case '#': subtype = VSTRIMLEFT; lenght = 2; break;
+ case '%': subtype = VSTRIMRIGHT; lenght = 2; break;
+ case '/': subtype = VSPATERN; lenght = 2; break;
+ case '@': subtype = VSTRANS; lenght = 1; break;
+ }
+ if (lenght) {
+ if (lenght == 2) {
+ if (c == pgetc_eatbnl())
+ subtype |= 1;
+ else
+ pungetc ();
+ }
+ newsyn = BASESYNTAX;
+ goto eov;
+ }
+ if (c == ':') {
c = pgetc_eatbnl();
-
- if (!subtype && cc == '#') {
- subtype = VSLENGTH;
-
- if (c == '_' || isalnum(c))
- goto varname;
-
- cc = c;
- c = pgetc_eatbnl();
- if (cc == '}' || c != '}') {
- pungetc();
- subtype = 0;
- c = cc;
- cc = '#';
- }
+ if (c == '}') {
+ /* ${var:} bad: empty expr syntax eq 0,
+ but for ${@}/${*} default is 1
+ its a conflict, simple - set as error */
+ subtype = VSBADSUB;
+ pungetc ();
+ goto eov;
}
-
- if (!is_special(cc)) {
- if (subtype == VSLENGTH)
- subtype = 0;
- goto badsub;
+ subtype = VSNUL;
+ }
+ {
+ static const char types[] = "-+?=";
+ const char *p = strchr(types, c);
+
+ if (p != NULL) {
+ subtype |= VSMINUS + (p - types);
+ goto eov;
}
+ }
- USTPUTC(cc, out);
- } else
- goto badsub;
-
- if (subtype == 0) {
- int cc = c;
-
- switch (c) {
- case ':':
- subtype = VSNUL;
- c = pgetc_eatbnl();
- /*FALLTHROUGH*/
- default:
- p = strchr(types, c);
- if (p == NULL)
- break;
- subtype |= p - types + VSNORMAL;
- break;
- case '%':
- case '#':
- subtype = c == '#' ? VSTRIMLEFT :
- VSTRIMRIGHT;
- c = pgetc_eatbnl();
- if (c == cc)
- subtype++;
- else
- pungetc();
-
- newsyn = BASESYNTAX;
- break;
- }
+ pungetc ();
+ if (subtype == VSNUL) {
+ newsyn = ARISYNTAX; /* ${var:offset[:lenght]} */
} else {
-badsub:
- pungetc();
+ /* unknown char for subtype==VSNORMAL */
+ subtype = VSBADSUB;
}
+ } else {
+ /* expected '}' but unknown char for subtype==lenght|indirect */
+ subtype = VSBADSUB;
+ pungetc ();
+ }
- if (newsyn == ARISYNTAX)
- newsyn = DQSYNTAX;
+ eov:
+ if (subtype == VSBADSUB)
+ newsyn = BASESYNTAX;
- if ((newsyn != synstack->syntax || synstack->innerdq) &&
- subtype != VSNORMAL) {
- synstack_push(&synstack,
- synstack->prev ?:
- alloca(sizeof(*synstack)),
- newsyn);
-
- synstack->varpushed++;
- synstack->dblquote = newsyn != BASESYNTAX;
- }
-
- *((char *)stackblock() + typeloc) = subtype;
- if (subtype != VSNORMAL) {
- synstack->varnest++;
- if (synstack->dblquote)
- synstack->dqvarnest++;
- }
- STPUTC('=', out);
+ if ((newsyn != synstack->syntax || synstack->innerdq) && subtype != VSNORMAL) {
+ synstack_push(&synstack, synstack->prev ?: alloca(sizeof(*synstack)), newsyn);
+
+ synstack->varpushed++;
+ synstack->dblquote = newsyn != BASESYNTAX;
+ }
+
+ /* unrealized -> to VSBADSUB */
+ if (subtype == VSNUL || subtype == VSPATERN || subtype == VSPATERNALL || subtype == VSTRANS) {
+ subtype = VSBADSUB;
+ }
+ *((char *)stackblock() + typeloc) = subtype;
+ if (subtype != VSNORMAL) {
+ synstack->varnest++;
+ if (synstack->dblquote)
+ synstack->dqvarnest++;
}
+ STPUTC('=', out);
goto parsesub_return;
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic