SVN commit 1134920 by orlovich: Fix problems with reentry of operations on the same regexp object from helper JS functions passed to its ops, by properly separating out per-match state out of RegExp. BUG:225332 CCBUG: 213606 (#213606 no longer crashes, but I am concerned about an another warning in the vg log there) M +20 -23 regexp.cpp M +27 -16 regexp.h M +7 -5 regexp_object.cpp M +2 -1 regexp_object.h M +11 -12 string_object.cpp --- trunk/KDE/kdelibs/kjs/regexp.cpp #1134919:1134920 @@ -261,7 +261,7 @@ int RegExp::availableStackSize = 8*1024*1024; RegExp::RegExp(const UString &p, char flags) - : _pat(p), _flags(flags), _valid(true), _numSubPatterns(0), _buffer(0), _originalPos(0) + : _pat(p), _flags(flags), _valid(true), _numSubPatterns(0) { #ifdef HAVE_PCREPOSIX // Determine whether libpcre has unicode support if need be.. @@ -297,11 +297,10 @@ bool secondTry = false; while (1) { - // Fill our buffer with an encoded version, whether utf-8, or, - // if PCRE is incapable, truncated. - prepareMatch(intern); - _regex = pcre_compile(_buffer, options, &errorMessage, &errorOffset, NULL); - doneMatch(); //Cleanup buffers + RegExpStringContext converted(intern); + + _regex = pcre_compile(converted.buffer(), options, &errorMessage, &errorOffset, NULL); + if (!_regex) { #ifdef PCRE_JAVASCRIPT_COMPAT // The compilation failed. It is likely the pattern contains non-standard extensions. @@ -359,7 +358,6 @@ RegExp::~RegExp() { - doneMatch(); // Be 100% sure buffers are freed #ifdef HAVE_PCREPOSIX pcre_free(_regex); #else @@ -368,7 +366,7 @@ #endif } -void RegExp::prepareUtf8(const UString& s) +void RegExpStringContext::prepareUtf8(const UString& s) { // Allocate a buffer big enough to hold all the characters plus \0 const int length = s.size(); @@ -418,7 +416,7 @@ *(posOut+1) = length+1; } -void RegExp::prepareASCII (const UString& s) +void RegExpStringContext::prepareASCII (const UString& s) { _originalPos = 0; @@ -432,30 +430,28 @@ _bufferSize = truncated.size(); } -void RegExp::prepareMatch(const UString &s) +RegExpStringContext::RegExpStringContext(const UString &s) { - delete[] _originalPos; // Just to be sure.. - delete[] _buffer; - if (utf8Support == Supported) - prepareUtf8(s); - else - prepareASCII(s); - #ifndef NDEBUG _originalS = s; #endif + + if (RegExp::utf8Support == RegExp::Supported) + prepareUtf8(s); + else + prepareASCII(s); } -void RegExp::doneMatch() +RegExpStringContext::~RegExpStringContext() { delete[] _originalPos; _originalPos = 0; delete[] _buffer; _buffer = 0; } -UString RegExp::match(const UString &s, bool *error, int i, int *pos, int **ovector) +UString RegExp::match(const RegExpStringContext& ctx, const UString &s, bool *error, int i, int *pos, int **ovector) { #ifndef NDEBUG - assert(s.data() == _originalS.data()); // Make sure prepareMatch got called right.. + assert(s.data() == ctx._originalS.data()); // Make sure the context is right.. #endif if (i < 0) @@ -491,7 +487,7 @@ int startPos; if (utf8Support == Supported) { startPos = i; - while (_originalPos[startPos] < i) + while (ctx.originalPos(startPos) < i) ++startPos; } else { startPos = i; @@ -530,13 +526,14 @@ limits.match_limit_recursion = (availableStackSize/1300)*3/4; } - const int numMatches = pcre_exec(_regex, stackGlutton ? &limits : 0, _buffer, _bufferSize, startPos, baseFlags, offsetVector, offsetVectorSize); + const int numMatches = pcre_exec(_regex, stackGlutton ? &limits : 0, ctx.buffer(), + ctx.bufferSize(), startPos, baseFlags, offsetVector, offsetVectorSize); //Now go through and patch up the offsetVector if (utf8Support == Supported) for (int c = 0; c < 2 * numMatches; ++c) if (offsetVector[c] != -1) - offsetVector[c] = _originalPos[offsetVector[c]]; + offsetVector[c] = ctx.originalPos(offsetVector[c]); if (numMatches < 0) { #ifndef NDEBUG --- trunk/KDE/kdelibs/kjs/regexp.h #1134919:1134920 @@ -40,6 +40,31 @@ namespace KJS { + + // Represents a strign re-encoded to whatever PCRE can handle + class RegExpStringContext { + public: + explicit RegExpStringContext(const UString& pattern); + ~RegExpStringContext(); + + char* buffer() const { return _buffer; } + int bufferSize() const { return _bufferSize; } + int originalPos(int c) const { return _originalPos[c]; } + + private: + // Cached encoding info... + char* _buffer; + int* _originalPos; + int _bufferSize; + + void prepareUtf8 (const UString& s); + void prepareASCII (const UString& s); +#ifndef NDEBUG + public: + UString _originalS; // the original string, used for sanity-checking +#endif + }; + class RegExp { public: enum { None = 0, Global = 1, IgnoreCase = 2, Multiline = 4 }; @@ -50,14 +75,10 @@ char flags() const { return _flags; } bool isValid() const { return _valid; } - UString match(const UString &s, bool *error, int i, int *pos = 0, int **ovector = 0); + UString match(const RegExpStringContext& c, const UString& s, bool *error, int i, int *pos = 0, int **ovector = 0); unsigned subPatterns() const { return _numSubPatterns; } UString pattern() const { return _pat; } - //These methods should be called around the match of the same string.. - void prepareMatch(const UString &s); - void doneMatch(); - static bool tryGrowingMaxStackSize; static bool didIncreaseMaxStackSize; static int availableStackSize; @@ -72,20 +93,10 @@ bool _valid; unsigned _numSubPatterns; - // Cached encoding info... - char* _buffer; - int* _originalPos; - int _bufferSize; - - void prepareUtf8 (const UString& s); - void prepareASCII (const UString& s); -#ifndef NDEBUG - UString _originalS; // the original string, used for sanity-checking -#endif - RegExp(const RegExp &); RegExp &operator=(const RegExp &); + public: enum UTF8SupportState { Unknown, Supported, --- trunk/KDE/kdelibs/kjs/regexp_object.cpp #1134919:1134920 @@ -105,9 +105,10 @@ } int foundIndex; - regExp->prepareMatch(input); - UString match = regExpObj->performMatch(regExp, exec, input, static_cast(lastIndex), &foundIndex); - regExp->doneMatch(); + + RegExpStringContext ctx(input); + UString match = regExpObj->performMatch(regExp, exec, ctx, input, static_cast(lastIndex), &foundIndex); + if (exec->hadException()) return jsUndefined(); @@ -248,13 +249,14 @@ expression matching through the performMatch function. We use cached results to calculate, e.g., RegExp.lastMatch and RegExp.leftParen. */ -UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const UString& s, +UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const RegExpStringContext& c, + const UString& s, int startOffset, int *endOffset, int **ovector) { int tmpOffset; int *tmpOvector; bool error = false; - UString match = r->match(s, &error, startOffset, &tmpOffset, &tmpOvector); + UString match = r->match(c, s, &error, startOffset, &tmpOffset, &tmpOvector); if (error) { if (endOffset) *endOffset = -1; --- trunk/KDE/kdelibs/kjs/regexp_object.h #1134919:1134920 @@ -87,7 +87,8 @@ // If resources are exhaused during a match, exec parameter will have an exception // set, and endOffset will be -1 - UString performMatch(RegExp *, ExecState *, const UString&, int startOffset = 0, int *endOffset = 0, int **ovector = 0); + UString performMatch(RegExp *, ExecState *, const RegExpStringContext&, const UString&, + int startOffset = 0, int *endOffset = 0, int **ovector = 0); JSObject *arrayOfMatches(ExecState *exec, const UString &result) const; static void throwRegExpError(ExecState *); --- trunk/KDE/kdelibs/kjs/string_object.cpp #1134919:1134920 @@ -363,10 +363,10 @@ int replacementCapacity = 0; // This is either a loop (if global is set) or a one-way (if not). - reg->prepareMatch(source); + RegExpStringContext ctx(source); do { int *ovector; - UString matchString = regExpObj->performMatch(reg, exec, source, startPosition, &matchIndex, &ovector); + UString matchString = regExpObj->performMatch(reg, exec, ctx, source, startPosition, &matchIndex, &ovector); if (matchIndex == -1) break; int matchLen = matchString.size(); @@ -407,7 +407,6 @@ } } while (global); - reg->doneMatch(); if (lastIndex < source.size()) pushSourceRange(sourceRanges, sourceRangeCount, sourceRangeCapacity, UString::Range(lastIndex, source.size() - lastIndex)); @@ -563,9 +562,10 @@ return throwError(exec, SyntaxError, "Invalid regular expression"); } RegExpObjectImp* regExpObj = static_cast(exec->lexicalInterpreter()->builtinRegExp()); - reg->prepareMatch(u); - UString mstr = regExpObj->performMatch(reg, exec, u, 0, &pos); + RegExpStringContext ctx(u); + UString mstr = regExpObj->performMatch(reg, exec, ctx, u, 0, &pos); + if (id == Search) { result = jsNumber(pos); } else { @@ -588,7 +588,7 @@ list.append(jsString(mstr)); lastIndex = pos; pos += mstr.isEmpty() ? 1 : mstr.size(); - mstr = regExpObj->performMatch(reg, exec, u, pos, &pos); + mstr = regExpObj->performMatch(reg, exec, ctx, u, pos, &pos); } if (imp) imp->put(exec, "lastIndex", jsNumber(lastIndex), DontDelete|DontEnum); @@ -602,7 +602,6 @@ } } } - reg->doneMatch(); delete tmpReg; break; @@ -637,10 +636,10 @@ uint32_t limit = a1->isUndefined() ? 0xFFFFFFFFU : a1->toUInt32(exec); if (a0->isObject() && static_cast(a0)->inherits(&RegExpImp::info)) { RegExp *reg = static_cast(a0)->regExp(); - reg->prepareMatch(u); + + RegExpStringContext ctx(u); bool error = false; - if (u.isEmpty() && !reg->match(u, &error, 0).isNull()) { - reg->doneMatch(); + if (u.isEmpty() && !reg->match(ctx, u, &error, 0).isNull()) { // empty string matched by regexp -> empty array res->put(exec, exec->propertyNames().length, jsNumber(0)); @@ -651,7 +650,7 @@ // TODO: back references int mpos; int *ovector = 0L; - UString mstr = reg->match(u, &error, pos, &mpos, &ovector); + UString mstr = reg->match(ctx, u, &error, pos, &mpos, &ovector); delete [] ovector; ovector = 0L; if (mpos < 0) break; @@ -662,7 +661,7 @@ i++; } } - reg->doneMatch(); + if (error) RegExpObjectImp::throwRegExpError(exec);