'[kdevelop] languages/cpp: optimize: reduce memory consumption of Token class by 50% on 64Bit'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [kdevelop] languages/cpp: optimize: reduce memory consumption of Token class by 50% on 64Bit
From:       Milian Wolff <mail () milianw ! de>
Date:       2012-03-01 0:23:38
Message-ID: 20120301002338.40A17A60BB () git ! kde ! org
[Download RAW message or body]

Git commit 79edb4115f9950a98dd395a05a45f391e19bd0e3 by Milian Wolff.
Committed on 01/03/2012 at 01:16.
Pushed by mwolff into branch 'master'.

optimize: reduce memory consumption of Token class by 50% on 64Bit

By removing the ParseSession pointer from it, we get rid of 8 bytes
and furthermore reduce the alignment size to 4. This way, we now
only require 12 bytes per token compared to 24 bytes previously.

This also allows us to define the Token class as a primitive type,
potentially speeding up the TokenStream even further.

The "cost" is a changed API, to get the string representation of
a token, one must now ask the TokenStream. In practice this is very
rarely a real pita, as before one often did

stream->token(i)->symbol()

now you just do

stream->symbol(i)

Furthermore I've consolidated the tons of custom "AST* node to QString"
functions into one central ParseSession::stringForNode.

Finally, I've replaced some costly token.symbol() == IndexedChar("somechar")
with the much faster token.kind == Token_xyz comparisons.

All in all, this should a) make our code faster and b) let it use much
less memory while at it. For the big resource file in the bug below,
the difference of 50% in the Token class results in ~250MB less memory
consumption

CCBUG: 291248

M  +4    -4    languages/cpp/cppduchain/cppeditorintegrator.cpp
M  +2    -2    languages/cpp/cppduchain/declarationbuilder.cpp
M  +1    -7    languages/cpp/cppduchain/dumpchain.cpp
M  +7    -11   languages/cpp/cppduchain/expressionvisitor.cpp
M  +6    -23   languages/cpp/cppduchain/name_visitor.cpp
M  +3    -2    languages/cpp/parser/codegenerator.cpp
M  +3    -3    languages/cpp/parser/dumptree.cpp
M  +35   -38   languages/cpp/parser/lexer.cpp
M  +53   -21   languages/cpp/parser/lexer.h
M  +7    -22   languages/cpp/parser/name_compiler.cpp
M  +6    -11   languages/cpp/parser/parser.cpp
M  +15   -0    languages/cpp/parser/parsesession.cpp
M  +10   -0    languages/cpp/parser/parsesession.h
M  +15   -5    languages/cpp/parser/tests/test_generator.cpp
M  +2    -11   languages/cpp/parser/tests/test_parser.cpp
M  +0    -4    languages/cpp/parser/tests/test_parser.h
M  +2    -2    languages/cpp/parser/tests/test_parser_cpp2011.cpp
M  +3    -2    languages/cpp/tests/cpp-parser.cpp

http://commits.kde.org/kdevelop/79edb4115f9950a98dd395a05a45f391e19bd0e3

diff --git a/languages/cpp/cppduchain/cppeditorintegrator.cpp \
b/languages/cpp/cppduchain/cppeditorintegrator.cpp index 0be5718..7564829 100644
--- a/languages/cpp/cppduchain/cppeditorintegrator.cpp
+++ b/languages/cpp/cppduchain/cppeditorintegrator.cpp
@@ -53,7 +53,7 @@ CursorInRevision CppEditorIntegrator::findPosition( const Token & \
token, Edge ed  if(position.collapsed)
       return position;
     else {
-      uint length = token.symbolLength();
+      uint length = m_session->token_stream->symbolLength(token);
       if(a.second && length > a.second)
         length = a.second;
       //We have to check the following anchor in the location-table to make sure we \
don't make the range longer than possible @@ -81,7 +81,7 @@ RangeInRevision \
CppEditorIntegrator::findRangeForContext( size_t start_token, si  rpp::Anchor start = \
m_session->positionAt(tStart.position, true);  rpp::Anchor end = \
m_session->positionAt(tEnd.position, true);  if(!end.collapsed)
-    end.column += tEnd.symbolLength(); //We want the back edge
+    end.column += m_session->token_stream->symbolLength(tEnd); //We want the back \
edge  
   if(start.macroExpansion.isValid() && start.macroExpansion == end.macroExpansion)
     return RangeInRevision(start.macroExpansion, start.macroExpansion);
@@ -111,7 +111,7 @@ RangeInRevision CppEditorIntegrator::findRange( const Token & \
token )  
 QString CppEditorIntegrator::tokenToString(std::size_t token) const
 {
-  return m_session->token_stream->token(token).symbolString();
+  return m_session->token_stream->symbolString(token);
 }
 
 QString CppEditorIntegrator::tokensToStrings(std::size_t start, std::size_t end) \
const @@ -125,7 +125,7 @@ QString CppEditorIntegrator::tokensToStrings(std::size_t \
start, std::size_t end)  
 QByteArray CppEditorIntegrator::tokenToByteArray(std::size_t token) const
 {
-  return m_session->token_stream->token(token).symbolByteArray();
+  return m_session->token_stream->symbolByteArray(token);
 }
 
 QByteArray CppEditorIntegrator::tokensToByteArray(std::size_t start, std::size_t \
                end) const
diff --git a/languages/cpp/cppduchain/declarationbuilder.cpp \
b/languages/cpp/cppduchain/declarationbuilder.cpp index f21464f..80bee78 100644
--- a/languages/cpp/cppduchain/declarationbuilder.cpp
+++ b/languages/cpp/cppduchain/declarationbuilder.cpp
@@ -978,7 +978,7 @@ void DeclarationBuilder::visitEnumerator(EnumeratorAST* node)
   uint oldEndToken = node->end_token;
   node->end_token = node->id + 1;
 
-  Identifier id(editor()->parseSession()->token_stream->token(node->id).symbol());
+  Identifier id(editor()->parseSession()->token_stream->symbol(node->id));
   Declaration* decl = openNormalDeclaration(0, node, id);
 
   node->end_token = oldEndToken;
@@ -1317,7 +1317,7 @@ void \
DeclarationBuilder::visitNamespaceAliasDefinition(NamespaceAliasDefinitionA  if( \
compilingContexts() ) {  RangeInRevision range = \
editor()->findRange(node->namespace_name);  DUChainWriteLocker lock(DUChain::lock());
-    NamespaceAliasDeclaration* decl = \
openDeclarationReal<NamespaceAliasDeclaration>(0, 0, \
Identifier(editor()->parseSession()->token_stream->token(node->namespace_name).symbol()), \
false, false, &range); +    NamespaceAliasDeclaration* decl = \
openDeclarationReal<NamespaceAliasDeclaration>(0, 0, \
Identifier(editor()->parseSession()->token_stream->symbol(node->namespace_name)), \
false, false, &range);  {
       QualifiedIdentifier id;
       identifierForNode(node->alias_name, id);
diff --git a/languages/cpp/cppduchain/dumpchain.cpp \
b/languages/cpp/cppduchain/dumpchain.cpp index 9874fbd..ffdfe7d 100644
--- a/languages/cpp/cppduchain/dumpchain.cpp
+++ b/languages/cpp/cppduchain/dumpchain.cpp
@@ -67,13 +67,7 @@ void DumpChain::visit(AST *node)
 
   if (node) {
     if (m_editor) {
-      QString nodeText;
-      for( std::size_t a = node->start_token; a != node->end_token; a++ ) {
-        const Token& tok( m_editor->parseSession()->token_stream->token((int) a) );
-        if( !nodeText.isEmpty() )
-          nodeText += ' ';
-        nodeText += stringFromContents( tok.session->contentsVector(), tok.position, \
                tok.size );
-      }
+      QString nodeText = m_editor->parseSession()->stringForNode(node);
       if( !nodeText.isEmpty() ) nodeText = "\"" + nodeText + "\"";
 
 
diff --git a/languages/cpp/cppduchain/expressionvisitor.cpp \
b/languages/cpp/cppduchain/expressionvisitor.cpp index 77a0e0b..0ef76a6 100644
--- a/languages/cpp/cppduchain/expressionvisitor.cpp
+++ b/languages/cpp/cppduchain/expressionvisitor.cpp
@@ -622,9 +622,7 @@ void ExpressionVisitor::findMember( AST* node, AbstractType::Ptr \
base, const Ide  
       if( isNumber(startNumber) )
       {
-        QString num;
-        for( size_t a = node->start_token; a < node->end_token; a++ )
-          num += tokenFromIndex(a).symbolString();
+        QString num = m_session->stringForNode(node, true);
 
         LOCKDUCHAIN;
         if( num.indexOf('.') != -1 || num.endsWith('f') || num.endsWith('d') ) {
@@ -684,9 +682,9 @@ void ExpressionVisitor::findMember( AST* node, AbstractType::Ptr \
base, const Ide  LOCKDUCHAIN;
       ConstantIntegralType* charType = new \
ConstantIntegralType(IntegralType::TypeChar);  if ( token.size == 3 ) {
-        charType->setValue<char>( token.symbolByteArray().at(1) );
+        charType->setValue<char>( \
m_session->token_stream->symbolByteArray(token).at(1) );  } else {
-        QByteArray symbol = token.symbolByteArray();
+        QByteArray symbol = m_session->token_stream->symbolByteArray(token);
         if (symbol.startsWith('L')) {
           charType->setDataType(IntegralType::TypeWchar_t);
           symbol.right(symbol.size() - 1);
@@ -710,12 +708,12 @@ void ExpressionVisitor::findMember( AST* node, \
AbstractType::Ptr base, const Ide  
       m_lastType = AbstractType::Ptr(charType);
       m_lastInstance = Instance( true );
-    } else if(token.symbol() == True || token.symbol() == False) {
+    } else if(token.kind == Token_true || token.kind == Token_false) {
       ///We have a boolean constant, we need to catch that here
       LOCKDUCHAIN;
       m_lastType = AbstractType::Ptr(new \
ConstantIntegralType(IntegralType::TypeBoolean));  m_lastInstance = Instance( true );
-      static_cast<ConstantIntegralType*>(m_lastType.unsafeData())->setValue<qint64>( \
token.symbol() == True ); +      \
static_cast<ConstantIntegralType*>(m_lastType.unsafeData())->setValue<qint64>( \
token.kind == Token_true );  }
 
     //Respect "this" token
@@ -917,9 +915,7 @@ QString toString(AbstractType::Ptr t) {
 
 void ExpressionVisitor::createDelayedType( AST* node , bool expression ) {
   DelayedType::Ptr type(new DelayedType());
-  QString id;
-  for( size_t s = node->start_token; s < node->end_token; ++s )
-    id += m_session->token_stream->token(s).symbolString();
+  QString id = m_session->stringForNode(node, true);
 
   //We have  to prevent automatic parsing and splitting by QualifiedIdentifier and \
Identifier  Identifier idd;
@@ -2107,7 +2103,7 @@ void ExpressionVisitor::createDelayedType( AST* node , bool \
expression ) {  sig = sig.mid(1, sig.length()-2);
     }
 
-    Identifier id(tokenFromIndex(node->name->id).symbol());
+    Identifier id(m_session->token_stream->symbol(node->name->id));
 
     if(!id.isEmpty()) {
       foreach(Declaration* decl, container->findDeclarations(id, \
CursorInRevision::invalid(), m_topContext, \
                (DUContext::SearchFlags)(DUContext::DontSearchInParent | \
                DUContext::NoFiltering))) {
diff --git a/languages/cpp/cppduchain/name_visitor.cpp \
b/languages/cpp/cppduchain/name_visitor.cpp index d15769e..2457d50 100644
--- a/languages/cpp/cppduchain/name_visitor.cpp
+++ b/languages/cpp/cppduchain/name_visitor.cpp
@@ -44,23 +44,6 @@ NameASTVisitor::NameASTVisitor(ParseSession* session, \
Cpp::ExpressionVisitor* vi  m_stopSearch = false;
 }
 
-QString decode(ParseSession* session, AST* ast, bool without_spaces)
-{
-  QString ret;
-  if( without_spaces ) {
-    //Decode operator-names without spaces for now, since we rely on it in other \
                places.
-    ///@todo change this, here and in all the places that rely on it. Operators \
                should then by written like "operator [ ]"(space between each token)
-    for( size_t a = ast->start_token; a < ast->end_token; a++ ) {
-      ret += session->token_stream->token(a).symbolString();
-    }
-  } else {
-    for( size_t a = ast->start_token; a < ast->end_token; a++ ) {
-      ret += session->token_stream->token(a).symbolString() + " ";
-    }
-  }
-  return ret;
-}
-
 void NameASTVisitor::visitUnqualifiedName(UnqualifiedNameAST *node)
 {
   if(m_stopSearch)
@@ -68,7 +51,7 @@ void NameASTVisitor::visitUnqualifiedName(UnqualifiedNameAST *node)
   IndexedString tmp_name;
 
   if (node->id)
-    tmp_name = m_session->token_stream->token(node->id).symbol();
+    tmp_name = m_session->token_stream->symbol(node->id);
   if (node->tilde)
     tmp_name = IndexedString(QLatin1String("~") + tmp_name.str());
   if (OperatorFunctionIdAST *op_id = node->operator_id) {
@@ -80,7 +63,7 @@ void NameASTVisitor::visitUnqualifiedName(UnqualifiedNameAST *node)
     tmpString += QLatin1String("operator");
 
     if (op_id->op && op_id->op->op)
-      tmpString +=  decode(m_session, op_id->op, true);
+      tmpString +=  m_session->stringForNode(op_id->op, true);
     else
       tmpString += QLatin1String("{...cast...}");
 
@@ -94,7 +77,7 @@ void NameASTVisitor::visitUnqualifiedName(UnqualifiedNameAST *node)
   if (node->template_arguments) {
     visitNodes(this, node->template_arguments);
   } else if(node->end_token == node->start_token + 3 && node->id == \
                node->start_token
-            && m_session->token_stream->token(node->id+1).symbol() == \
KDevelop::IndexedString('<')) +            && \
m_session->token_stream->kind(node->id+1) == '<')  {
     ///@todo Represent this nicer in the AST
     ///It's probably a type-specifier with instantiation of the default-parameter, \
like "Bla<>". @@ -143,7 +126,7 @@ void \
NameASTVisitor::visitUnqualifiedName(UnqualifiedNameAST *node)  }
       
       if( m_debug )
-        kDebug( 9007 ) << "failed to find " << m_currentIdentifier << " as part of " \
<< decode( m_session, node ) << ", searched in " << m_find.describeLastContext(); +   \
kDebug( 9007 ) << "failed to find " << m_currentIdentifier << " as part of " << \
m_session->stringForNode(node) << ", searched in " << m_find.describeLastContext();  \
}  }
 
@@ -201,7 +184,7 @@ ExpressionEvaluationResult \
NameASTVisitor::processTemplateArgument(TemplateArgum  opened = true;
         
     }else if( m_debug ) {
-      kDebug(9007) << "Failed to resolve template-argument " << decode(m_session, \
node->expression); +      kDebug(9007) << "Failed to resolve template-argument " << \
m_session->stringForNode(node->expression);  }
     
     if(ownVisitor) {
@@ -234,7 +217,7 @@ ExpressionEvaluationResult \
NameASTVisitor::processTemplateArgument(TemplateArgum  }else{
     LOCKDUCHAIN;
     m_find.openQualifiedIdentifier(false);
-    m_find.openIdentifier(Identifier(decode(m_session, node)));
+    m_find.openIdentifier(Identifier(m_session->stringForNode(node)));
     m_find.closeIdentifier(false);
     opened = true;
   }
diff --git a/languages/cpp/parser/codegenerator.cpp \
b/languages/cpp/parser/codegenerator.cpp index 9e92d76..962fe5b 100644
--- a/languages/cpp/parser/codegenerator.cpp
+++ b/languages/cpp/parser/codegenerator.cpp
@@ -50,8 +50,9 @@ void CodeGenerator::outputToken(uint tokenPosition)
 {
   if (tokenPosition) {
     const Token& t = m_session->token_stream->token(tokenPosition);
-    m_output << t.symbolString();/*
-    if (t.kind == Token_identifier || t.kind == Token_string_literal || t.kind == \
Token_number_literal || t.kind == Token_char_literal) +    m_output << \
m_session->token_stream->symbolString(t); +
+    /* if (t.kind == Token_identifier || t.kind == Token_string_literal || t.kind == \
Token_number_literal || t.kind == Token_char_literal)  m_output << \
t.symbolString().str();  else
       m_output << token_text( t.kind );*/
diff --git a/languages/cpp/parser/dumptree.cpp b/languages/cpp/parser/dumptree.cpp
index 80fa2e3..5d0df8e 100644
--- a/languages/cpp/parser/dumptree.cpp
+++ b/languages/cpp/parser/dumptree.cpp
@@ -145,9 +145,9 @@ void DumpTree::visit(AST *node)
 
   QString nodeText;
   if( m_tokenStream ) {
-    for( std::size_t a = node->start_token; a < node->end_token; a++ ) {
-      const Token& tok( m_tokenStream->token((int) a) );
-      nodeText += tok.symbolString() + ' ';
+    ///TODO: reuse parsesession->stringForNode
+    for( uint a = node->start_token; a < node->end_token; a++ ) {
+      nodeText += m_tokenStream->symbolString(a) + ' ';
     }
   }
 
diff --git a/languages/cpp/parser/lexer.cpp b/languages/cpp/parser/lexer.cpp
index e40cb28..0cdad1e 100644
--- a/languages/cpp/parser/lexer.cpp
+++ b/languages/cpp/parser/lexer.cpp
@@ -46,6 +46,41 @@ void TokenStream::splitRightShift(uint index)
   insert(index+1, next_token);
 }
 
+KDevelop::IndexedString TokenStream::symbol(const Token& t) const
+{
+  if(t.size == 1)
+    return KDevelop::IndexedString::fromIndex(session->contents()[t.position]);
+  else
+    return KDevelop::IndexedString();
+}
+
+uint TokenStream::symbolIndex(const Token& t) const
+{
+  return session->contents()[t.position];
+}
+
+QByteArray TokenStream::symbolByteArray(const Token& t) const
+{
+  if (t.size == 0) // esp. for EOF
+    return QByteArray();
+
+  return stringFromContents(session->contentsVector(), t.position, t.size);
+}
+
+QString TokenStream::symbolString(const Token& t) const
+{
+  return QString::fromUtf8(symbolByteArray(t));
+}
+
+uint TokenStream::symbolLength(const Token& t) const
+{
+  uint ret = 0;
+  for(uint a = t.position; a < t.position+t.size; ++a) {
+    ret += KDevelop::IndexedString::lengthFromIndex(session->contents()[a]);
+  }
+  return ret;
+}
+
 QString Lexer::SpecialCursor::toString() const
 {
   return KDevelop::IndexedString::fromIndex(*current).str();
@@ -118,40 +153,6 @@ void Lexer::skipComment()
   return;
 }
 
-KDevelop::IndexedString Token::symbol() const {
-  if(size == 1)
-    return KDevelop::IndexedString::fromIndex(session->contents()[position]);
-  else
-    return KDevelop::IndexedString();
-}
-
-uint Token::symbolIndex() const
-{
-  return session->contents()[position];
-}
-
-QByteArray Token::symbolByteArray() const {
-  if (size == 0) // esp. for EOF
-    return QByteArray();
-
-  return stringFromContents(session->contentsVector(), position, size);
-}
-
-QString Token::symbolString() const {
-  if (size == 0) // esp. for EOF
-    return QString();
-
-  return QString::fromUtf8(stringFromContents(session->contentsVector(), position, \
                size));
-}
-
-uint Token::symbolLength() const {
-  uint ret = 0;
-  for(uint a = position; a < position+size; ++a) {
-    ret += KDevelop::IndexedString::lengthFromIndex(session->contents()[a]);
-  }
-  return ret;
-}
-
 const uint index_size = 200;
 
 KDevVarLengthArray<KDevVarLengthArray<QPair<uint, TOKEN_KIND>, 10 >, index_size > \
createIndicesForTokens() { @@ -290,7 +291,6 @@ void Lexer::tokenize(ParseSession* \
_session)  {
   Token eof;
   eof.kind = Token_EOF;
-  eof.session = session;
   eof.position = 0;
   eof.size = 0;
   stream->append(eof);
@@ -309,7 +309,6 @@ void Lexer::tokenize(ParseSession* _session)
 
     {
     Token token;
-    token.session = session;
     token.position = cursor.offsetIn( session->contents() );
     token.size = 0;
     stream->append(token);
@@ -367,7 +366,6 @@ void Lexer::tokenize(ParseSession* _session)
   {
   Token eof;
   eof.kind = Token_EOF;
-  eof.session = session;
   eof.position = cursor.offsetIn( session->contents() );
   eof.size = 0;
   stream->append(eof);
@@ -895,7 +893,6 @@ void Lexer::scan_divide()
           (*session->token_stream)[index++].kind = Token_comment;
           (*session->token_stream)[index-1].size = (size_t)(cursor - commentBegin);
           (*session->token_stream)[index-1].position = commentBegin.offsetIn( \
                session->contents() );
-          (*session->token_stream)[index-1].session = session;
         }else{
           //Merge with previous comment
           (*session->token_stream)[index-1].size = \
                cursor.offsetIn(session->contents()) - \
                (*session->token_stream)[index-1].position;
diff --git a/languages/cpp/parser/lexer.h b/languages/cpp/parser/lexer.h
index b370f89..49e365d 100644
--- a/languages/cpp/parser/lexer.h
+++ b/languages/cpp/parser/lexer.h
@@ -39,30 +39,15 @@ typedef void (Lexer::*scan_fun_ptr)();
 class KDEVCPPPARSER_EXPORT Token
 {
 public:
-  ///kind of the token @see TOKEN_KIND enum reference.
-  quint16 kind;
   ///position in the preprocessed buffer
   uint position;
   ///size of the token in the preprocessed buffer. Do not confuse this with \
symbolLength.  uint size;
-  ///pointer to the parse session.
-  const ParseSession* session;
-
-  //Symbol associated to the token. This only works if this is a simple symbol
-  //only consisting of one identifier(not comments), does not work for operators \
                like "->" or numbers like "50"
-  KDevelop::IndexedString symbol() const;
-  //The index of the symbol associated to the token.
-  //The notes from @c symbol() apply as well.
-  uint symbolIndex() const;
-
-  //This always works, but is expensive
-  QString symbolString() const;
-  QByteArray symbolByteArray() const;
-
-  uint symbolLength() const;
+  ///kind of the token @see TOKEN_KIND enum reference.
+  quint16 kind;
 };
 
-Q_DECLARE_TYPEINFO(Token, Q_MOVABLE_TYPE);
+Q_DECLARE_TYPEINFO(Token, Q_PRIMITIVE_TYPE);
 
 /**Stream of tokens found by lexer.
 Internally works like an array of @ref Token continuosly allocated.
@@ -76,7 +61,7 @@ TODO: reuse some pool / container class for the token array
 NOTE: token_count is actually the *size* of the token pool
       the last actually used token is lastToken
 */
-class TokenStream : public QVector<Token>
+class KDEVCPPPARSER_EXPORT TokenStream : public QVector<Token>
 {
 private:
   TokenStream(const TokenStream &);
@@ -84,8 +69,9 @@ private:
 
 public:
   /**Creates a token stream with the default reserved size of 1024 tokens.*/
-  inline TokenStream(uint size = 1024)
-    : index(0)
+  inline TokenStream(ParseSession* _session, uint size = 1024)
+    : session(_session)
+    , index(0)
   {
     reserve(size);
   }
@@ -121,6 +107,51 @@ public:
   { return at(i).position; }
 
   /**
+   * @return The symbol associated to the token.
+   *
+   * @note This only works if this is a simple symbol, i.e.
+   * only consisting of one identifier (not comments).
+   * does not work for operators like "->" or numbers like "50"
+   */
+  KDevelop::IndexedString symbol(const Token& t) const;
+  inline KDevelop::IndexedString symbol(uint i) const
+  { return symbol(token(i)); }
+
+  /**
+   * @return The index of the symbol associated to the token.
+   *
+   * @note The notes from @c symbol() apply as well.
+   */
+  uint symbolIndex(const Token& t) const;
+  inline uint symbolIndex(uint i) const
+  { return symbolIndex(token(i)); }
+
+  /**
+   * @return The string representation of the token.
+   *
+   * @note This always works but is expensive
+   */
+  QString symbolString(const Token& t) const;
+  inline QString symbolString(uint i) const
+  { return symbolString(token(i)); }
+
+  /**
+   * @return The bytearray representation of the token.
+   *
+   * @note This always works but is expensive
+   */
+  QByteArray symbolByteArray(const Token& t) const;
+  inline QByteArray symbolByteArray(uint i) const
+  { return symbolByteArray(token(i)); }
+
+  /**
+   * @return The length of this tokens text representation
+   */
+  uint symbolLength(const Token& t) const;
+  inline uint symbolLength(uint i) const
+  { return symbolLength(token(i)); }
+
+  /**
    * Split the right shift token at @p index into two distinct right angle brackets.
    * 
    * Required to support 14.2/3 of the spec, see also:
@@ -129,6 +160,7 @@ public:
   void splitRightShift(uint index);
 
 private:
+  ParseSession* session;
   uint index;
 };
 
diff --git a/languages/cpp/parser/name_compiler.cpp \
b/languages/cpp/parser/name_compiler.cpp index ca72e66..6060c57 100644
--- a/languages/cpp/parser/name_compiler.cpp
+++ b/languages/cpp/parser/name_compiler.cpp
@@ -29,24 +29,6 @@
 
 using namespace KDevelop;
 
-///@todo this is very expensive
-QString decode(ParseSession* session, AST* ast, bool without_spaces = false)
-{
-  QString ret;
-  if( without_spaces ) {
-    //Decode operator-names without spaces for now, since we rely on it in other \
                places.
-    ///@todo change this, here and in all the places that rely on it. Operators \
                should then by written like "operator [ ]"(space between each token)
-    for( size_t a = ast->start_token; a < ast->end_token; a++ ) {
-      ret += session->token_stream->token(a).symbolString();
-    }
-  } else {
-    for( size_t a = ast->start_token; a < ast->end_token; a++ ) {
-      ret += session->token_stream->token(a).symbolString() + ' ';
-    }
-  }
-  return ret;
-}
-
 uint parseConstVolatile(ParseSession* session, const ListNode<uint> *cv)
 {
   uint ret = AbstractType::NoModifiers;
@@ -72,7 +54,7 @@ IndexedTypeIdentifier \
typeIdentifierFromTemplateArgument(ParseSession* session,  {
   IndexedTypeIdentifier id;
   if(node->expression) {
-    id = IndexedTypeIdentifier(decode(session, node), true);
+    id = IndexedTypeIdentifier(session->stringForNode(node), true);
   }else if(node->type_id) {
     //Parse the pointer operators
     TypeCompiler tc(session);
@@ -145,7 +127,7 @@ void NameCompiler::visitUnqualifiedName(UnqualifiedNameAST *node)
   IndexedString tmp_name;
 
   if (node->id)
-    tmp_name = m_session->token_stream->token(node->id).symbol();
+    tmp_name = m_session->token_stream->symbol(node->id);
 
   if (node->ellipsis)
     tmp_name = IndexedString("...");
@@ -162,7 +144,7 @@ void NameCompiler::visitUnqualifiedName(UnqualifiedNameAST *node)
       QString tmp = operatorString;
 
       if (op_id->op && op_id->op->op)
-        tmp +=  decode(m_session, op_id->op, true);
+        tmp +=  m_session->stringForNode(op_id->op, true);
       else
         tmp += QLatin1String("{...cast...}");
 
@@ -175,7 +157,10 @@ void NameCompiler::visitUnqualifiedName(UnqualifiedNameAST \
*node)  if (node->template_arguments)
     {
       visitNodes(this, node->template_arguments);
-    }else if(node->end_token == node->start_token + 3 && node->id == \
node->start_token && m_session->token_stream->token(node->id+1).symbol() == \
KDevelop::IndexedString('<')) { +    }
+  else if (node->end_token == node->start_token + 3 && node->id == node->start_token
+      && m_session->token_stream->kind(node->id+1) == '<')
+    {
       ///@todo Represent this nicer in the AST
       ///It's probably a type-specifier with instantiation of the default-parameter, \
                like "Bla<>".
       m_currentIdentifier.appendTemplateIdentifier( IndexedTypeIdentifier() );
diff --git a/languages/cpp/parser/parser.cpp b/languages/cpp/parser/parser.cpp
index 16bb199..4435cb3 100644
--- a/languages/cpp/parser/parser.cpp
+++ b/languages/cpp/parser/parser.cpp
@@ -197,7 +197,7 @@ TranslationUnitAST *Parser::parse(ParseSession* _session)
   session = _session;
 
   if (!session->token_stream)
-    session->token_stream = new TokenStream;
+    session->token_stream = new TokenStream(session);
 
   lexer.tokenize(session);
   advance(); // skip the first token
@@ -213,7 +213,7 @@ StatementAST *Parser::parseStatement(ParseSession* _session)
   session = _session;
 
   if (!session->token_stream)
-    session->token_stream = new TokenStream;
+    session->token_stream = new TokenStream(session);
 
   lexer.tokenize(session);
   advance(); // skip the first token
@@ -229,7 +229,7 @@ AST *Parser::parseTypeOrExpression(ParseSession* _session, bool \
forceExpression)  session = _session;
 
   if (!session->token_stream)
-    session->token_stream = new TokenStream;
+    session->token_stream = new TokenStream(session);
 
   lexer.tokenize(session);
   advance(); // skip the first token
@@ -280,7 +280,7 @@ bool Parser::parseWinDeclSpec(WinDeclSpecAST *&node)
 
   uint start = session->token_stream->cursor();
 
-  const uint tokenIndex = \
session->token_stream->token(session->token_stream->cursor()).symbolIndex(); +  const \
uint tokenIndex = session->token_stream->symbolIndex(session->token_stream->cursor());
  static const KDevelop::IndexedString declSpecString("__declspec");
   if (declSpecString.index() != tokenIndex)
     return false;
@@ -5184,8 +5184,7 @@ bool Parser::parseQProperty(DeclarationAST *&node)
     static KDevelop::IndexedString finalStr("FINAL");
 
     while(session->token_stream->lookAhead() != ')') {
-      const Token token = \
                session->token_stream->token(session->token_stream->cursor());
-      const KDevelop::IndexedString propertyField = token.symbol();
+      const KDevelop::IndexedString propertyField = \
session->token_stream->symbol(session->token_stream->cursor());  if(propertyField == \
readStr) {  advance(); // skip READ
         if(!parseName(ast->getter))
@@ -5513,9 +5512,5 @@ QString Parser::stringForNode(AST* node) const
     return "<invalid node>";
   }
 
-  QString str;
-  for(int i = node->start_token; i < node->end_token; ++i) {
-    str += session->token_stream->token(i).symbolString();
-  }
-  return str;
+  return session->stringForNode(node);
 }
diff --git a/languages/cpp/parser/parsesession.cpp \
b/languages/cpp/parser/parsesession.cpp index d3669ff..e1fcef5 100644
--- a/languages/cpp/parser/parsesession.cpp
+++ b/languages/cpp/parser/parsesession.cpp
@@ -190,6 +190,21 @@ const KDevelop::IndexedString& ParseSession::url() const
   return m_url;
 }
 
+QString ParseSession::stringForNode(AST* node, bool withoutSpaces) const
+{
+  QString ret;
+  for( uint a = node->start_token; a < node->end_token; a++ ) {
+    ret += token_stream->symbolString(a);
+    if (!withoutSpaces) {
+      // Decode operator-names without spaces for now, since we rely on it in other \
places. +      /// @todo change this, here and in all the places that rely on it.
+      /// Operators should then by written like "operator [ ]"(space between each \
token) +      ret += QLatin1Char(' ');
+    }
+  }
+  return ret;
+}
+
 void ParseSession::dumpNode(AST* node) const
 {
   DumpTree dumper;
diff --git a/languages/cpp/parser/parsesession.h \
b/languages/cpp/parser/parsesession.h index 2859a43..1f69da7 100644
--- a/languages/cpp/parser/parsesession.h
+++ b/languages/cpp/parser/parsesession.h
@@ -128,6 +128,16 @@ public:
 
   CommentFormatter m_commentFormatter;
 
+  /**
+   * Get the string representation of all tokens in the AST's token range
+   *
+   * @param withSpaces if set to true, no additional space are added
+   *                   between every token string
+   *
+   * @note This is very expensive
+   */
+  QString stringForNode(AST* node, bool withoutSpaces = false) const;
+
   /// useful for debugging, can be called from GDB
   void dumpNode(AST* node) const;
 
diff --git a/languages/cpp/parser/tests/test_generator.cpp \
b/languages/cpp/parser/tests/test_generator.cpp index 66eef94..440bd29 100644
--- a/languages/cpp/parser/tests/test_generator.cpp
+++ b/languages/cpp/parser/tests/test_generator.cpp
@@ -23,16 +23,29 @@
 #include <tests/autotestshell.h>
 #include <tests/testcore.h>
 
+static ParseSession* lastSession = 0;
+static ParseSession* lastGeneratedSession = 0;
+
+QString tokenString(const Token& t)
+{
+  if (lastSession->token_stream->contains(t)) {
+    return lastSession->token_stream->symbolString(t);
+  } else {
+    Q_ASSERT(lastGeneratedSession->token_stream->contains(t));
+    return lastGeneratedSession->token_stream->symbolString(t);
+  }
+}
+
 bool operator==(const Token& t1, const Token& t2)
 {
-  return t1.kind == t2.kind && t1.symbolString() == t2.symbolString();
+  return t1.kind == t2.kind && tokenString(t1) == tokenString(t2);
 }
 
 namespace QTest {
   template<>
   char* toString(const Token& t)
   {
-    return qstrdup(QString("%1 [ %2 \
]").arg(token_name(t.kind)).arg(t.symbolString()).toUtf8()); +    return \
qstrdup(QString("%1 [ %2 ]").arg(token_name(t.kind)).arg(tokenString(t)).toUtf8());  \
}  }
 
@@ -222,9 +235,6 @@ private slots:
   }
 
 private:
-  ParseSession* lastSession;
-  ParseSession* lastGeneratedSession;
-
   TranslationUnitAST* parseOriginal(const QByteArray& unit)
   {
     Parser parser(&control);
diff --git a/languages/cpp/parser/tests/test_parser.cpp \
b/languages/cpp/parser/tests/test_parser.cpp index 3bcf186..3c35618 100644
--- a/languages/cpp/parser/tests/test_parser.cpp
+++ b/languages/cpp/parser/tests/test_parser.cpp
@@ -870,11 +870,11 @@ void TestParser::testMultiByteCStrings()
   QVERIFY(control.problems().isEmpty());
   AST* str = getAST(ast, AST::Kind_StringLiteral);
   QVERIFY(str);
-  QCOMPARE(stringForNode(str), QString::fromUtf8("\"ä\""));
+  QCOMPARE(lastSession->stringForNode(str, true), QString::fromUtf8("\"ä\""));
   Token token = lastSession->token_stream->token(str->start_token);
   QEXPECT_FAIL("", "the wide ä-char takes two indizes in a QByteArray, which breaks \
our lexer", Abort);  QCOMPARE(token.size, 3u);
-  QCOMPARE(token.symbolLength(), 3u);
+  QCOMPARE(lastSession->token_stream->symbolLength(token), 3u);
   Token endToken = lastSession->token_stream->token(str->end_token);
   rpp::Anchor pos = lastSession->positionAt(endToken.position);
   // should end just before the semicolon
@@ -974,15 +974,6 @@ AST* TestParser::getAST(AST* ast, AST::NODE_KIND kind, int num)
   return visitor.ast;
 }
 
-QString TestParser::stringForNode(AST* node) const
-{
-  QString ret;
-  for(int i = node->start_token; i < node->end_token; ++i) {
-    ret += lastSession->token_stream->token(i).symbolString();
-  }
-  return ret;
-}
-
 #include "test_parser.moc"
 
 QTEST_MAIN(TestParser)
diff --git a/languages/cpp/parser/tests/test_parser.h \
b/languages/cpp/parser/tests/test_parser.h index 14e390d..43dae1a 100644
--- a/languages/cpp/parser/tests/test_parser.h
+++ b/languages/cpp/parser/tests/test_parser.h
@@ -133,10 +133,6 @@ protected:
    * dump @p node and print problems of @c control
    */
   void dump(AST* node);
-  /**
-   * Retrieve string representation of @p node.
-   */
-  QString stringForNode(AST* node) const;
 
   bool hasKind(AST*, AST::NODE_KIND);
   AST* getAST(AST*, AST::NODE_KIND, int num = 0);
diff --git a/languages/cpp/parser/tests/test_parser_cpp2011.cpp \
b/languages/cpp/parser/tests/test_parser_cpp2011.cpp index b566f40..4414f0f 100644
--- a/languages/cpp/parser/tests/test_parser_cpp2011.cpp
+++ b/languages/cpp/parser/tests/test_parser_cpp2011.cpp
@@ -433,9 +433,9 @@ void TestParser::testInitListFalsePositives()
   FunctionDefinitionAST* funcDef = \
static_cast<FunctionDefinitionAST*>(ast->declarations->at(0)->element);  \
QVERIFY(!funcDef->constructor_initializers);  QVERIFY(funcDef->type_specifier);
-  QCOMPARE(stringForNode(funcDef->type_specifier).trimmed(), QString("void"));
+  QCOMPARE(lastSession->stringForNode(funcDef->type_specifier, true).trimmed(), \
QString("void"));  QVERIFY(funcDef->function_body);
-  QCOMPARE(stringForNode(funcDef->function_body).trimmed(), QString("{}"));
+  QCOMPARE(lastSession->stringForNode(funcDef->function_body, true).trimmed(), \
QString("{}"));  // ;
   QVERIFY(!ast->declarations->at(1)->element);
   }
diff --git a/languages/cpp/tests/cpp-parser.cpp b/languages/cpp/tests/cpp-parser.cpp
index 6600697..c23ac72 100644
--- a/languages/cpp/tests/cpp-parser.cpp
+++ b/languages/cpp/tests/cpp-parser.cpp
@@ -90,8 +90,9 @@ private:
           qout << "token stream:" << endl;
           for(int i = 0; i < m_session.token_stream->count(); ++i) {
             const Token& t = m_session.token_stream->at(i);
-            Q_ASSERT(t.size || t.symbolString().isEmpty());
-            qout << token_name(t.kind) << ": " << t.symbolString() << endl;
+            const QString str = m_session.token_stream->symbolString(t);
+            Q_ASSERT(t.size || str.isEmpty());
+            qout << token_name(t.kind) << ": " << str << endl;
           }
         }
       }


[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic