[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdesdk/strigi-analyzer/diff
From:       Jakub Stachowski <qbast () go2 ! pl>
Date:       2008-11-14 22:56:17
Message-ID: 1226703377.504519.1765.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 884430 by qbast:

Fix counting files for unified diff.
Make analyzer actually emit collected data.
20x speedup. This analyzer was taking over 80% of indexing time, so 
if you disabled strigi because of its slowness it is time to try again.



 M  +62 -58    difflineanalyzer.cpp  
 M  +23 -12    difflineanalyzer.h  


--- trunk/KDE/kdesdk/strigi-analyzer/diff/difflineanalyzer.cpp #884429:884430
@@ -34,6 +34,19 @@
 using namespace std;
 using namespace Strigi;
 
+DiffLineAnalyzer::DiffLineAnalyzer(const DiffLineAnalyzerFactory* f)
+    : factory(f),
+    eq3("==="), plus3("+++"), minus3("---"), asterisk3("***"), index("Index:"),
+    retrieving("retrieving revision"), diff("diff"), asterisks("***************"),
+    normalFormat("^[0-9]+[0-9,]*[acd][0-9]+[0-9,]*$"), contextFormat("^\\*\\*\\* \
[^\\t]+\\t"), +    rcsFormat("^[acd][0-9]+ [0-9]+"), edFormat("^[0-9]+[0-9,]*[acd]"),
+    edAdd( "([0-9]+)(|,([0-9]+))a" ), edDel( "([0-9]+)(|,([0-9]+))d" ),
+    edMod( "([0-9]+)(|,([0-9]+))c" ), normalAdd( "[0-9]+a([0-9]+)(|,([0-9]+))" ),
+    normalDel( "([0-9]+)(|,([0-9]+))d(|[0-9]+)" ), normalMod( \
"([0-9]+)(|,([0-9]+))c([0-9]+)(|,([0-9]+))" ), +    rcsAdd( "a[0-9]+ ([0-9]+)" ), \
rcsDel( "d[0-9]+ ([0-9]+)" ) +    {}
+
+
 void DiffLineAnalyzerFactory::registerFields(FieldRegister& reg) {
     nbFilesField = reg.registerField("diff.stats.modify_file_count" , \
                FieldRegister::integerType, 1, 0);
     firstFileField = reg.registerField("diff.first_modify_file" , \
FieldRegister::stringType, 1, 0); @@ -62,91 +75,80 @@
 void DiffLineAnalyzer::handleLine(const char* data, uint32_t length) {
     QString line(QString::fromUtf8(data, length));
 
-    if(line.startsWith( "Index:" ) && !indexFound)
+    if( !indexFound && line.startsWith(index) ) 
     {
-	if(m_firstFile.exactMatch( line))
-	{
-        	QString filename = m_firstFile.cap(1);
-        	analysisResult->addValue(factory->firstFileField, (const \
                char*)filename.toUtf8());
-	}
+        QString fileName=line.mid(7);
+        analysisResult->addValue(factory->firstFileField, (const \
char*)fileName.toUtf8().data());  indexFound = true;
     }
-    else if ( line.startsWith( "retrieving revision") )
+    else if ( line.startsWith(retrieving) )
 	diffProgram = DiffLineAnalyzer::CVSDiff;
-    else if ( m_diffRE.exactMatch( line ) )
+    else if ( line.startsWith(diff) && line[4]==' ' )
         diffProgram = DiffLineAnalyzer::Diff;
-    else if ( m_p4sRE.exactMatch( line ) )
+    else if ( line.startsWith(eq3) && data[3]==' ' )
         diffProgram = DiffLineAnalyzer::Perforce;
 
+    bool digit0=data[0]>='0' && data[0]<='9';
+    
     if(diffFormat == DiffLineAnalyzer::Unknown) //search format
     {
-        if ( QRegExp( "^[0-9]+[0-9,]*[acd][0-9]+[0-9,]*$" ).exactMatch( line ) )
+        if ( digit0 && normalFormat.exactMatch( line ) )
         {
             diffFormat = DiffLineAnalyzer::Normal;
         }
-        else if ( line.contains( QRegExp( "^--- " ) ) )
+        else if ( line[3]==' ' && line.startsWith(minus3) ) 
         {
             // unified has first a '^--- ' line, then a '^+++ ' line
             diffFormat = DiffLineAnalyzer::Unified;
         }
-        else if ( line.contains( QRegExp( "^\\*\\*\\* [^\\t]+\\t" ) ) )
+        else if ( line[0] == '*' && line.contains( contextFormat ) )
         {
             // context has first a '^*** ' line, then a '^--- ' line
             diffFormat = DiffLineAnalyzer::Context;
         }
-        else if ( line.contains( QRegExp( "^[acd][0-9]+ [0-9]+" ) ) )
+        else if ( (line[0]=='a' || line[0]=='c' || line[0]=='d') &&  line.contains( \
rcsFormat ) )  {
             diffFormat =  DiffLineAnalyzer::RCS;
         }
-        else if ( line.contains( QRegExp( "^[0-9]+[0-9,]*[acd]" ) ) )
+        else if ( digit0 && line.contains( edFormat ) )
         {
             diffFormat = DiffLineAnalyzer::Ed;
         }
     }
-    else //analyze files
+    
+    if (diffFormat != DiffLineAnalyzer::Unknown)
+    //analyze files
     {
-	QRegExp edAdd( "([0-9]+)(|,([0-9]+))a" );
-	QRegExp edDel( "([0-9]+)(|,([0-9]+))d" );
-	QRegExp edMod( "([0-9]+)(|,([0-9]+))c" );
-
-	QRegExp normalAdd( "[0-9]+a([0-9]+)(|,([0-9]+))" );
-	QRegExp normalDel( "([0-9]+)(|,([0-9]+))d(|[0-9]+)" );
-	QRegExp normalMod( "([0-9]+)(|,([0-9]+))c([0-9]+)(|,([0-9]+))" );
-
-	QRegExp rcsAdd( "a[0-9]+ ([0-9]+)" );
-	QRegExp rcsDel( "d[0-9]+ ([0-9]+)" );
-
-
 	switch( diffFormat )
 	{
 	case DiffLineAnalyzer::Context:
-		if ( line.startsWith("***************") )
+		if ( line.startsWith(asterisks) )
 		{
 			numberOfHunks++;
 			//kDebug(7034) << "Context Hunk      : " << line << endl;
 		}
-		else if ( line.startsWith("***") )
+		else if ( line.startsWith(asterisk3) )
 		{
 			numberOfFiles++;
 			//kDebug(7034) << "Context File      : " << line << endl;
 		}
-		else if ( line.startsWith("---") ) {} // ignore
-		else if ( line.startsWith("+") )
+		else if ( line.startsWith(minus3) ) {} // ignore
+		else if ( line[0]=='+' )
 		{
 			numberOfAdditions++;
 //			kDebug(7034) << "Context Insertion : " << line << endl;
 		}
-		else if ( line.startsWith("-") )
+		else if ( line[0]=='-' )
 		{
 			numberOfDeletions++;
 //			kDebug(7034) << "Context Deletion  : " << line << endl;
 		}
-		else if ( line.startsWith("!") )
+		else if ( line[0]=='!' )
 		{
 			numberOfChanges++;
 //			kDebug(7034) << "Context Modified  : " << line << endl;
 		}
-		else if ( line.startsWith(" ") )
+		else if ( line[0]==' ' )
 		{
 //				kDebug(7034) << "Context Context   : " << line << endl;
 		}
@@ -160,17 +162,17 @@
 #endif
 		break;
 	case DiffLineAnalyzer::Ed:
-		if ( line.startsWith( "diff" ) )
+		if ( line.startsWith( diff ) )
 		{
 			numberOfFiles++;
 //			kDebug(7034) << "Ed File         : " << line << endl;
 		}
-		else if ( edAdd.exactMatch( line ) )
+		else if ( digit0 && edAdd.exactMatch( line ) )
 		{
 //				kDebug(7034) << "Ed Insertion    : " << line << endl;
 			numberOfHunks++;
 #if 0
-				while( it != lines.end() && !(*it).startsWith(".") )
+				while( it != lines.end() && !(*it)[0]=='.' )
 				{
 					(*numberOfAdditions)++;
 //					kDebug(7034) << "Ed Insertion    : " << (*it) << endl;
@@ -178,14 +180,14 @@
 				}
 #endif
 		}
-		else if ( edDel.exactMatch( line ) )
+		else if ( digit0 && edDel.exactMatch( line ) )
 		{
 //				kDebug(7034) << "Ed Deletion     : " << line << endl;
 			numberOfHunks++;
 			numberOfDeletions += (edDel.cap(3).isEmpty() ? 1 : edDel.cap(3).toInt() - \
edDel.cap(1).toInt() + 1);  //			kDebug(7034) << "Ed noOfLines    : " << \
(edDel.cap(3).isEmpty() ? 1 : edDel.cap(3).toInt() - edDel.cap(1).toInt() + 1) << \
endl;  }
-		else if ( edMod.exactMatch( line ) )
+		else if ( digit0 && edMod.exactMatch( line ) )
 		{
 //				kDebug(7034) << "Ed Modification : " << line << endl;
 			if ( edMod.cap(3).isEmpty() )
@@ -210,12 +212,12 @@
 
 		break;
 	case DiffLineAnalyzer::Normal:
-		if ( line.startsWith( "diff" ) )
+		if ( line.startsWith( diff ) )
 		{
 			numberOfFiles++;
 //			kDebug(7034) << "Normal File         : " << line << endl;
 		}
-		else if ( normalAdd.exactMatch( line ) )
+		else if ( digit0 && normalAdd.exactMatch( line ) )
 		{
 //				kDebug(7034) << "Normal Insertion    : " << line << endl;
 			numberOfHunks++;
@@ -230,7 +232,7 @@
 //					kDebug(7034) << "Normal Addition : " << normalAdd.cap(3).toInt() - \
normalAdd.cap(1).toInt() + 1 << endl;  }
 		}
-		else if ( normalDel.exactMatch(line) )
+		else if ( digit0 && normalDel.exactMatch(line) )
 			{
 //				kDebug(7034) << "Normal Deletion     : " << line << endl;
 			numberOfHunks++;
@@ -245,7 +247,7 @@
 //					kDebug(7034) << "Normal Deletion : " << normalDel.cap(3).toInt() - \
normalDel.cap(1).toInt() + 1 << endl;  }
 		}
-		else if ( normalMod.exactMatch( line ) )
+		else if ( digit0 && normalMod.exactMatch( line ) )
 			{
 //				kDebug(7034) << "Normal Modification : " << line << endl;
 			numberOfHunks++;
@@ -270,12 +272,12 @@
 //					kDebug(7034) << "Normal Addition : " << normalMod.cap(6).toInt() - \
normalMod.cap(4).toInt() + 1 << endl;  }
 		}
-		else if ( line.startsWith(">") )
+		else if ( line[0]=='>' )
 		{
 //				numberOfAdditions++;
 //				kDebug(7034) << "Normal Insertion    : " << line << endl;
 		}
-		else if ( line.startsWith("<") )
+		else if ( line[0]=='<' )
 		{
 //				numberOfDeletions++;
 //				kDebug(7034) << "Normal Deletion     : " << line << endl;
@@ -286,7 +288,7 @@
 		}
 		break;
 	case DiffLineAnalyzer::RCS:
-		if ( line.startsWith( "diff" ) ) // works for cvs diff, have to test for normal \
diff +		if ( line.startsWith( diff ) ) // works for cvs diff, have to test for normal \
diff  {
 //				kDebug(7034) << "RCS File      : " << line << endl;
 			numberOfFiles++;
@@ -311,28 +313,28 @@
 		}
 		break;
 	case DiffLineAnalyzer::Unified:
-		if ( line.startsWith("@@ ") )
+		if ( line[0]=='@' && line[1]=='@' && line[2]==' ' )
 		{
 			numberOfHunks++;
 			//kDebug(7034) << "Unified Hunk      : " << line << endl;
 		}
-		else if ( line.startsWith("---") )
+		else if ( line.startsWith(minus3) )
 		{
 			numberOfFiles++;
 			//kDebug(7034) << "Unified File      : " << line << endl;
 		}
-		else if ( line.startsWith("+++") ) {} // ignore (don't count as insertion)
-		else if ( line.startsWith("+") )
+		else if ( line.startsWith(plus3) ) {} // ignore (don't count as insertion)
+		else if ( line[0]=='+' )
 		{
 			numberOfAdditions++;
 			//kDebug(7034) << "Unified Insertion : " << line << endl;
 		}
-		else if ( line.startsWith("-") )
+		else if ( line[0]=='-' )
 		{
 			numberOfDeletions++;
 			//kDebug(7034) << "Unified Deletion  : " << line << endl;
 		}
-		else if ( line.startsWith(" ") )
+		else if ( line[0]==' ' )
 		{
 			//kDebug(7034) << "Unified Context   : " << line << endl;
 		}
@@ -349,7 +351,7 @@
     }
 }
 
-void DiffLineAnalyzer::endAnalysis(){
+void DiffLineAnalyzer::endAnalysis(bool complete){
     //don't add info if we didn't know diff format
     if(diffFormat != DiffLineAnalyzer::Unknown)
     {
@@ -357,11 +359,13 @@
        if ( indexFound && diffProgram ==DiffLineAnalyzer::Undeterminable) // but no \
"retrieving revision" found like only cvs diff adds.  diffProgram = \
                DiffLineAnalyzer::SubVersion;
        analysisResult->addValue(factory->diffProgramField, (const \
                char*)determineI18nedProgram(diffProgram).toUtf8());
-       analysisResult->addValue(factory->nbFilesField, numberOfFiles);
-       analysisResult->addValue(factory->insertFilesField, numberOfAdditions);
-       analysisResult->addValue(factory->modifyFilesField, numberOfChanges);
-       analysisResult->addValue(factory->deleteFilesField, numberOfDeletions);
-       analysisResult->addValue(factory->hunksField, numberOfHunks);
+       if (complete) {
+        analysisResult->addValue(factory->nbFilesField, numberOfFiles);
+        analysisResult->addValue(factory->insertFilesField, numberOfAdditions);
+        analysisResult->addValue(factory->modifyFilesField, numberOfChanges);
+        analysisResult->addValue(factory->deleteFilesField, numberOfDeletions);
+        analysisResult->addValue(factory->hunksField, numberOfHunks);
+       }
     }
     ready = true;
 }
--- trunk/KDE/kdesdk/strigi-analyzer/diff/difflineanalyzer.h #884429:884430
@@ -49,23 +49,34 @@
     bool indexFound;
     Format diffFormat;
     DiffProgram diffProgram;
-
-    QRegExp m_diffRE;
-    QRegExp m_p4sRE;
-    QRegExp m_firstFile;
+    const QString eq3;
+    const QString plus3;
+    const QString minus3;
+    const QString asterisk3;
+    const QString index;
+    const QString retrieving;
+    const QString diff;
+    const QString asterisks;
+    const QRegExp normalFormat;
+    const QRegExp contextFormat;
+    const QRegExp rcsFormat;
+    const QRegExp edFormat;
+    QRegExp edAdd;    
+    QRegExp edDel;    
+    QRegExp edMod;    
+    QRegExp normalAdd;    
+    QRegExp normalDel;    
+    QRegExp normalMod;    
+    QRegExp rcsAdd;    
+    QRegExp rcsDel;    
+    
 public:
-    DiffLineAnalyzer(const DiffLineAnalyzerFactory* f)
-            : factory(f)
-            , m_diffRE( "^diff .*" )
-            , m_p4sRE("^==== ")
-            , m_firstFile( "^Index: (.*)" )
-            {}
+    DiffLineAnalyzer(const DiffLineAnalyzerFactory* f);
     ~DiffLineAnalyzer() {}
-    virtual void endAnalysis(bool /*complete*/) {} 
+    virtual void endAnalysis(bool complete); 
     const char* name() const { return "DiffLineAnalyzer"; }
     void startAnalysis(Strigi::AnalysisResult*);
     void handleLine(const char* data, uint32_t length);
-    void endAnalysis();
     bool isReadyWithStream();
 };
 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic