SVN commit 636908 by porten:

avoid syntax error on incomplete \u escapes in regexps.


 M  +7 -0      ChangeLog  
 M  +26 -13    regexp.cpp  


--- branches/KDE/3.5/kdelibs/kjs/ChangeLog #636907:636908
@@ -1,3 +1,10 @@
+2007-02-24  Harri Porten  <porten@kde.org>
+
+	* regexp.cpp: gracefully handle incomplete \u sequences in regular
+	expressions the IE way. Fixes the syntax error reported in bug
+	#141731 although the page might be relying on the equally
+	undefined FF way.
+
 2007-02-23  Harri Porten  <porten@kde.org>
 
 	* number_object.cpp: applied patch by Justin that fixes
--- branches/KDE/3.5/kdelibs/kjs/regexp.cpp #636907:636908
@@ -60,19 +60,33 @@
       UChar c = p[i];
       if (escape) {
         escape = false;
-        // we only care about \uxxxx
-        if (c == 'u' && i + 4 < p.size()) {
-          int c0 = p[i+1].unicode();
-          int c1 = p[i+2].unicode();
-          int c2 = p[i+3].unicode();
-          int c3 = p[i+4].unicode();
-          if (Lexer::isHexDigit(c0) && Lexer::isHexDigit(c1) &&
-              Lexer::isHexDigit(c2) && Lexer::isHexDigit(c3)) {
-            c = Lexer::convertUnicode(c0, c1, c2, c3);
-            switch (c.unicode()) {
+        // we only care about \u
+        if (c == 'u') {
+	  // standard unicode escape sequence looks like \uxxxx but
+	  // other browsers also accept less then 4 hex digits
+	  unsigned short u = 0;
+	  int j = 0;
+	  for (j = 0; j < 4; ++j) {
+	    if (i + 1 < p.size() && Lexer::isHexDigit(p[i + 1].unicode())) {
+	      u = (u << 4) + Lexer::convertHex(p[i + 1].unicode());
+	      ++i;
+	    } else {
+	      // sequence incomplete. restore index.
+	      // TODO: cleaner way to propagate warning
+	      fprintf(stderr, "KJS: saw %d digit \\u sequence.\n", j);
+	      i -= j;
+	      break;
+	    }
+	  }
+	  if (j < 4) {
+	    // sequence was incomplete. treat \u as u which IE always
+	    // and FF sometimes does.
+	    intern.append(UString('u'));
+	  } else {
+            c = UChar(u);
+            switch (u) {
             case 0:
 	      // Make sure to encode 0, to avoid terminating the string
-	      fprintf(stderr, "NULL escape\n");
 	      intern += UString(nil);
 	      break;
             case '^':
@@ -93,9 +107,8 @@
 	      intern += UString(&c, 1);
 	      break;
 	    }
-            i += 4;
-            continue;
           }
+          continue;
         }
         intern += UString('\\');
         intern += UString(&c, 1);