[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdelibs/kjs
From:       Maks Orlovich <maksim () kde ! org>
Date:       2010-06-05 18:10:04
Message-ID: 20100605181004.8BD37AC8CA () svn ! kde ! org
[Download RAW message or body]

SVN commit 1134920 by orlovich:

Fix problems with reentry of operations on the same regexp object
from helper JS functions passed to its ops, by properly separating out 
per-match state out of RegExp.

BUG:225332
CCBUG: 213606
(#213606 no longer crashes, but I am concerned about an another warning in the vg log \
there)



 M  +20 -23    regexp.cpp  
 M  +27 -16    regexp.h  
 M  +7 -5      regexp_object.cpp  
 M  +2 -1      regexp_object.h  
 M  +11 -12    string_object.cpp  


--- trunk/KDE/kdelibs/kjs/regexp.cpp #1134919:1134920
@@ -261,7 +261,7 @@
 int RegExp::availableStackSize = 8*1024*1024;
 
 RegExp::RegExp(const UString &p, char flags)
-  : _pat(p), _flags(flags), _valid(true), _numSubPatterns(0), _buffer(0), \
_originalPos(0) +  : _pat(p), _flags(flags), _valid(true), _numSubPatterns(0)
 {
 #ifdef HAVE_PCREPOSIX
   // Determine whether libpcre has unicode support if need be..
@@ -297,11 +297,10 @@
   bool secondTry = false;
 
   while (1) {
-    // Fill our buffer with an encoded version, whether utf-8, or,
-    // if PCRE is incapable, truncated.
-    prepareMatch(intern);
-    _regex = pcre_compile(_buffer, options, &errorMessage, &errorOffset, NULL);
-    doneMatch(); //Cleanup buffers
+    RegExpStringContext converted(intern);
+    
+    _regex = pcre_compile(converted.buffer(), options, &errorMessage, &errorOffset, \
NULL); +
     if (!_regex) {
 #ifdef PCRE_JAVASCRIPT_COMPAT
       // The compilation failed. It is likely the pattern contains non-standard \
extensions. @@ -359,7 +358,6 @@
 
 RegExp::~RegExp()
 {
-  doneMatch(); // Be 100% sure buffers are freed
 #ifdef HAVE_PCREPOSIX
   pcre_free(_regex);
 #else
@@ -368,7 +366,7 @@
 #endif
 }
 
-void RegExp::prepareUtf8(const UString& s)
+void RegExpStringContext::prepareUtf8(const UString& s)
 {
   // Allocate a buffer big enough to hold all the characters plus \0
   const int length = s.size();
@@ -418,7 +416,7 @@
   *(posOut+1) = length+1;
 }
 
-void RegExp::prepareASCII (const UString& s)
+void RegExpStringContext::prepareASCII (const UString& s)
 {
   _originalPos = 0;
 
@@ -432,30 +430,28 @@
   _bufferSize = truncated.size();
 }
 
-void RegExp::prepareMatch(const UString &s)
+RegExpStringContext::RegExpStringContext(const UString &s)
 {
-  delete[] _originalPos; // Just to be sure..
-  delete[] _buffer;
-  if (utf8Support == Supported)
-    prepareUtf8(s);
-  else
-    prepareASCII(s);
-
 #ifndef NDEBUG
   _originalS = s;
 #endif
+
+  if (RegExp::utf8Support == RegExp::Supported)
+    prepareUtf8(s);
+  else
+    prepareASCII(s);
 }
 
-void RegExp::doneMatch()
+RegExpStringContext::~RegExpStringContext()
 {
   delete[] _originalPos; _originalPos = 0;
   delete[] _buffer;      _buffer      = 0;
 }
 
-UString RegExp::match(const UString &s, bool *error, int i, int *pos, int **ovector)
+UString RegExp::match(const RegExpStringContext& ctx, const UString &s, bool *error, \
int i, int *pos, int **ovector)  {
 #ifndef NDEBUG
-  assert(s.data() == _originalS.data()); // Make sure prepareMatch got called \
right.. +  assert(s.data() == ctx._originalS.data()); // Make sure the context is \
right..  #endif
 
   if (i < 0)
@@ -491,7 +487,7 @@
   int startPos;
   if (utf8Support == Supported) {
     startPos = i;
-    while (_originalPos[startPos] < i)
+    while (ctx.originalPos(startPos) < i)
       ++startPos;
   } else {
     startPos = i;
@@ -530,13 +526,14 @@
     limits.match_limit_recursion = (availableStackSize/1300)*3/4;
   }
   
-  const int numMatches = pcre_exec(_regex, stackGlutton ? &limits : 0, _buffer, \
_bufferSize, startPos, baseFlags, offsetVector, offsetVectorSize); +  const int \
numMatches = pcre_exec(_regex, stackGlutton ? &limits : 0, ctx.buffer(), +            \
ctx.bufferSize(), startPos, baseFlags, offsetVector, offsetVectorSize);  
   //Now go through and patch up the offsetVector
   if (utf8Support == Supported)
     for (int c = 0; c < 2 * numMatches; ++c)
       if (offsetVector[c] != -1)
-        offsetVector[c] = _originalPos[offsetVector[c]];
+        offsetVector[c] = ctx.originalPos(offsetVector[c]);
 
   if (numMatches < 0) {
 #ifndef NDEBUG
--- trunk/KDE/kdelibs/kjs/regexp.h #1134919:1134920
@@ -40,6 +40,31 @@
 
 namespace KJS {
 
+
+  // Represents a strign re-encoded to whatever PCRE can handle
+  class RegExpStringContext {
+  public:
+    explicit RegExpStringContext(const UString& pattern);
+    ~RegExpStringContext();
+
+    char* buffer() const { return _buffer; }
+    int   bufferSize() const { return _bufferSize; }
+    int   originalPos(int c) const { return _originalPos[c]; }
+
+  private:
+    // Cached encoding info...
+    char* _buffer;
+    int*  _originalPos;
+    int   _bufferSize;
+
+    void prepareUtf8  (const UString& s);
+    void prepareASCII (const UString& s);
+#ifndef NDEBUG
+  public:
+    UString _originalS; // the original string, used for sanity-checking
+#endif
+  };
+
   class RegExp {
   public:
     enum { None = 0, Global = 1, IgnoreCase = 2, Multiline = 4 };
@@ -50,14 +75,10 @@
     char flags() const { return _flags; }
     bool isValid() const { return _valid; }
 
-    UString match(const UString &s, bool *error, int i, int *pos = 0, int **ovector \
= 0); +    UString match(const RegExpStringContext& c, const UString& s, bool *error, \
int i, int *pos = 0, int **ovector = 0);  unsigned subPatterns() const { return \
_numSubPatterns; }  UString  pattern() const { return _pat; }
 
-    //These methods should be called around the match of the same string..
-    void prepareMatch(const UString &s);
-    void doneMatch();
-
     static bool tryGrowingMaxStackSize;
     static bool didIncreaseMaxStackSize;
     static int availableStackSize;
@@ -72,20 +93,10 @@
     bool _valid;
     unsigned _numSubPatterns;
     
-    // Cached encoding info...
-    char* _buffer;
-    int*  _originalPos;
-    int   _bufferSize;
-
-    void prepareUtf8  (const UString& s);
-    void prepareASCII (const UString& s);
-#ifndef NDEBUG
-    UString _originalS; // the original string, used for sanity-checking
-#endif
-
     RegExp(const RegExp &);
     RegExp &operator=(const RegExp &);
     
+  public:
     enum UTF8SupportState {
       Unknown,
       Supported,
--- trunk/KDE/kdelibs/kjs/regexp_object.cpp #1134919:1134920
@@ -105,9 +105,10 @@
     }
 
     int foundIndex;
-    regExp->prepareMatch(input);
-    UString match = regExpObj->performMatch(regExp, exec, input, \
                static_cast<int>(lastIndex), &foundIndex);
-    regExp->doneMatch();
+
+    RegExpStringContext ctx(input);
+    UString match = regExpObj->performMatch(regExp, exec, ctx, input, \
static_cast<int>(lastIndex), &foundIndex); +
     if (exec->hadException())
       return jsUndefined();
 
@@ -248,13 +249,14 @@
   expression matching through the performMatch function. We use cached results to \
calculate,  e.g., RegExp.lastMatch and RegExp.leftParen.
 */
-UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const UString& s,
+UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const \
RegExpStringContext& c, +                                      const UString& s,
                                       int startOffset, int *endOffset, int \
**ovector)  {
   int tmpOffset;
   int *tmpOvector;
   bool error = false;
-  UString match = r->match(s, &error, startOffset, &tmpOffset, &tmpOvector);
+  UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector);
   if (error) {
     if (endOffset)
       *endOffset = -1;
--- trunk/KDE/kdelibs/kjs/regexp_object.h #1134919:1134920
@@ -87,7 +87,8 @@
 
     // If resources are exhaused during a match, exec parameter will have an \
exception  // set, and endOffset will be -1
-    UString performMatch(RegExp *, ExecState *, const UString&, int startOffset = 0, \
int *endOffset = 0, int **ovector = 0); +    UString performMatch(RegExp *, ExecState \
*, const RegExpStringContext&, const UString&, +                         int \
                startOffset = 0, int *endOffset = 0, int **ovector = 0);
     JSObject *arrayOfMatches(ExecState *exec, const UString &result) const;
 
     static void throwRegExpError(ExecState *);
--- trunk/KDE/kdelibs/kjs/string_object.cpp #1134919:1134920
@@ -363,10 +363,10 @@
     int replacementCapacity = 0;
 
     // This is either a loop (if global is set) or a one-way (if not).
-    reg->prepareMatch(source);
+    RegExpStringContext ctx(source);
     do {
       int *ovector;
-      UString matchString = regExpObj->performMatch(reg, exec, source, \
startPosition, &matchIndex, &ovector); +      UString matchString = \
regExpObj->performMatch(reg, exec, ctx, source, startPosition, &matchIndex, \
&ovector);  if (matchIndex == -1)
         break;
       int matchLen = matchString.size();
@@ -407,7 +407,6 @@
       }
     } while (global);
 
-    reg->doneMatch();
 
     if (lastIndex < source.size())
       pushSourceRange(sourceRanges, sourceRangeCount, sourceRangeCapacity, \
UString::Range(lastIndex, source.size() - lastIndex)); @@ -563,9 +562,10 @@
       return throwError(exec, SyntaxError, "Invalid regular expression");
     }
     RegExpObjectImp* regExpObj = \
                static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp());
                
-    reg->prepareMatch(u);
-    UString mstr = regExpObj->performMatch(reg, exec, u, 0, &pos);
 
+    RegExpStringContext ctx(u);
+    UString mstr = regExpObj->performMatch(reg, exec, ctx, u, 0, &pos);
+
     if (id == Search) {
       result = jsNumber(pos);
     } else {
@@ -588,7 +588,7 @@
             list.append(jsString(mstr));
           lastIndex = pos;
           pos += mstr.isEmpty() ? 1 : mstr.size();
-          mstr = regExpObj->performMatch(reg, exec, u, pos, &pos);
+          mstr = regExpObj->performMatch(reg, exec, ctx, u, pos, &pos);
         }
         if (imp)
           imp->put(exec, "lastIndex", jsNumber(lastIndex), DontDelete|DontEnum);
@@ -602,7 +602,6 @@
         }
       }
     }
-    reg->doneMatch();
 
     delete tmpReg;
     break;
@@ -637,10 +636,10 @@
     uint32_t limit = a1->isUndefined() ? 0xFFFFFFFFU : a1->toUInt32(exec);
     if (a0->isObject() && static_cast<JSObject *>(a0)->inherits(&RegExpImp::info)) {
       RegExp *reg = static_cast<RegExpImp *>(a0)->regExp();
-      reg->prepareMatch(u);
+
+      RegExpStringContext ctx(u);
       bool error = false;
-      if (u.isEmpty() && !reg->match(u, &error, 0).isNull()) {
-        reg->doneMatch();
+      if (u.isEmpty() && !reg->match(ctx, u, &error, 0).isNull()) {
 
         // empty string matched by regexp -> empty array
         res->put(exec, exec->propertyNames().length, jsNumber(0));
@@ -651,7 +650,7 @@
         // TODO: back references
         int mpos;
         int *ovector = 0L;
-        UString mstr = reg->match(u, &error, pos, &mpos, &ovector);
+        UString mstr = reg->match(ctx, u, &error, pos, &mpos, &ovector);
         delete [] ovector; ovector = 0L;
         if (mpos < 0)
           break;
@@ -662,7 +661,7 @@
           i++;
         }
       }
-      reg->doneMatch();
+
       if (error)
         RegExpObjectImp::throwRegExpError(exec);
 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic