SVN commit 636908 by porten: avoid syntax error on incomplete \u escapes in regexps. M +7 -0 ChangeLog M +26 -13 regexp.cpp --- branches/KDE/3.5/kdelibs/kjs/ChangeLog #636907:636908 @@ -1,3 +1,10 @@ +2007-02-24 Harri Porten + + * regexp.cpp: gracefully handle incomplete \u sequences in regular + expressions the IE way. Fixes the syntax error reported in bug + #141731 although the page might be relying on the equally + undefined FF way. + 2007-02-23 Harri Porten * number_object.cpp: applied patch by Justin that fixes --- branches/KDE/3.5/kdelibs/kjs/regexp.cpp #636907:636908 @@ -60,19 +60,33 @@ UChar c = p[i]; if (escape) { escape = false; - // we only care about \uxxxx - if (c == 'u' && i + 4 < p.size()) { - int c0 = p[i+1].unicode(); - int c1 = p[i+2].unicode(); - int c2 = p[i+3].unicode(); - int c3 = p[i+4].unicode(); - if (Lexer::isHexDigit(c0) && Lexer::isHexDigit(c1) && - Lexer::isHexDigit(c2) && Lexer::isHexDigit(c3)) { - c = Lexer::convertUnicode(c0, c1, c2, c3); - switch (c.unicode()) { + // we only care about \u + if (c == 'u') { + // standard unicode escape sequence looks like \uxxxx but + // other browsers also accept less then 4 hex digits + unsigned short u = 0; + int j = 0; + for (j = 0; j < 4; ++j) { + if (i + 1 < p.size() && Lexer::isHexDigit(p[i + 1].unicode())) { + u = (u << 4) + Lexer::convertHex(p[i + 1].unicode()); + ++i; + } else { + // sequence incomplete. restore index. + // TODO: cleaner way to propagate warning + fprintf(stderr, "KJS: saw %d digit \\u sequence.\n", j); + i -= j; + break; + } + } + if (j < 4) { + // sequence was incomplete. treat \u as u which IE always + // and FF sometimes does. + intern.append(UString('u')); + } else { + c = UChar(u); + switch (u) { case 0: // Make sure to encode 0, to avoid terminating the string - fprintf(stderr, "NULL escape\n"); intern += UString(nil); break; case '^': @@ -93,9 +107,8 @@ intern += UString(&c, 1); break; } - i += 4; - continue; } + continue; } intern += UString('\\'); intern += UString(&c, 1);