[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdebase/runtime/nepomuk/services/strigi
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2010-08-11 14:18:56
Message-ID: 20100811141856.607CEAC7E9 () svn ! kde ! org
[Download RAW message or body]

SVN commit 1162155 by trueg:

1. Remove all index graphs that contain resources without a nie:url (junk from buggy \
runs) 2. Remove graphs in batches to avoid the iterator breakage that comes with \
removing iterated items.

 M  +1 -0      CMakeLists.txt  
 M  +58 -31    indexscheduler.cpp  
 M  +1 -0      indexscheduler.h  


--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/CMakeLists.txt #1162154:1162155
@@ -60,6 +60,7 @@
   ${KDE4_KIO_LIBS}
   ${KDE4_SOLID_LIBS}
   ${KDE4_KIDLETIME_LIBS}
+  ${NEPOMUK_QUERY_LIBRARIES}
   ${NEPOMUK_LIBRARIES}
   ${SOPRANO_LIBRARIES}
   )
--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/indexscheduler.cpp \
#1162154:1162155 @@ -23,6 +23,7 @@
 #include "indexscheduler.h"
 #include "strigiserviceconfig.h"
 #include "nepomukindexer.h"
+#include "util.h"
 #include "nfo.h"
 #include "nie.h"
 
@@ -42,11 +43,15 @@
 #include <Nepomuk/Resource>
 #include <Nepomuk/ResourceManager>
 #include <Nepomuk/Variant>
+#include <Nepomuk/Query/Query>
+#include <Nepomuk/Query/ComparisonTerm>
+#include <Nepomuk/Query/ResourceTerm>
 
 #include <Soprano/Model>
 #include <Soprano/QueryResultIterator>
 #include <Soprano/NodeIterator>
 #include <Soprano/Node>
+#include <Soprano/Vocabulary/RDF>
 #include <Soprano/Vocabulary/Xesam>
 
 #include <map>
@@ -544,7 +549,7 @@
     // We query all files that should not be in the store
     // This for example excludes all filex:/ URLs.
     //
-    QString query = QString::fromLatin1( "select distinct ?g ?url where { "
+    QString query = QString::fromLatin1( "select distinct ?g where { "
                                          "?r %1 ?url . "
                                          "?g \
                <http://www.strigi.org/fields#indexGraphFor> ?r . "
                                          "FILTER(REGEX(STR(?url),'^file:/')) . "
@@ -552,21 +557,10 @@
                     .arg( Soprano::Node::resourceToN3( \
Nepomuk::Vocabulary::NIE::url() ),  folderFilter );
     kDebug() << query;
+    if ( !removeAllGraphsFromQuery( query ) )
+        return;
 
-    Soprano::QueryResultIterator it = \
ResourceManager::instance()->mainModel()->executeQuery( query, \
                Soprano::Query::QueryLanguageSparql );
-    while ( it.next() ) {
 
-        // wait for resume or stop (or simply continue)
-        if ( !waitForContinue() ) {
-            break;
-        }
-
-        const Soprano::Node& g = it[0];
-        kDebug() << "REMOVING" << it["url"].uri();
-        ResourceManager::instance()->mainModel()->removeContext( g );
-    }
-
-
     //
     // Build filter query for all exclude filters
     //
@@ -584,7 +578,7 @@
     else if( !includeExcludeFilters.isEmpty() )
         filters = QString::fromLatin1("FILTER(%1) .").arg( includeExcludeFilters );
     
-    query = QString::fromLatin1( "select distinct ?g ?url where { "
+    query = QString::fromLatin1( "select distinct ?g where { "
                                  "?r %1 ?url . "
                                  "?r %2 ?fn . "
                                  "?g <http://www.strigi.org/fields#indexGraphFor> ?r \
. " @@ -594,20 +588,10 @@
                   Soprano::Node::resourceToN3( Nepomuk::Vocabulary::NFO::fileName() \
),  filters );
     kDebug() << query;
-    it = ResourceManager::instance()->mainModel()->executeQuery( query, \
                Soprano::Query::QueryLanguageSparql );
-    while ( it.next() ) {
+    if ( !removeAllGraphsFromQuery( query ) )
+        return;
 
-        // wait for resume or stop (or simply continue)
-        if ( !waitForContinue() ) {
-            break;
-        }
 
-        const Soprano::Node& g = it[0];
-        kDebug() << "REMOVING" << it["url"].uri();
-        ResourceManager::instance()->mainModel()->removeContext( g );
-    }
-
-
     //
     // Remove all old data from Xesam-times. While we leave out the data created by \
                libnepomuk
     // there is no problem since libnepomuk still uses backwards compatible queries \
and we use @@ -620,19 +604,62 @@
                                  "{ graph ?g { ?r2 %1 ?u2 . } } "
                                  "}" )
             .arg( Soprano::Node::resourceToN3( Soprano::Vocabulary::Xesam::url() ) \
                );
-    it = ResourceManager::instance()->mainModel()->executeQuery( query, \
                Soprano::Query::QueryLanguageSparql );
-    while ( it.next() ) {
+    kDebug() << query;
+    if ( !removeAllGraphsFromQuery( query ) )
+        return;
 
+
+    //
+    // Remove data which is useless but still around from before. This could happen \
due to some buggy version of +    // the indexer or the filewatch service or even \
some application messing up the data. +    // We look for indexed files that do not \
have a nie:url defined and thus, will never be catched by any of the +    // other \
queries. +    //
+    query = Query::Query(
+        Strigi::Ontology::indexGraphFor() == ( Soprano::Vocabulary::RDF::type() == \
Query::ResourceTerm( Nepomuk::Vocabulary::NFO::FileDataObject() ) && +                \
!( Nepomuk::Vocabulary::NIE::url() == Query::Term() ) ) +        \
).toSparqlQuery(Query::Query::NoResultRestrictions); +    kDebug() << query;
+    removeAllGraphsFromQuery( query );
+}
+
+
+
+/**
+ * Runs the query using a limit until all graphs have been deleted. This is not done
+ * in one big loop to avoid the problems with messed up iterators when one of the \
iterated + * item is deleted.
+ */
+bool Nepomuk::IndexScheduler::removeAllGraphsFromQuery( const QString& query )
+{
+    while ( 1 ) {
+        // get the next batch of graphs
+        QList<Soprano::Node> graphs
+            = ResourceManager::instance()->mainModel()->executeQuery( query + \
QLatin1String( " LIMIT 200" ), +                                                      \
Soprano::Query::QueryLanguageSparql ).iterateBindings( 0 ).allNodes(); +
+        // remove all graphs in the batch
+        Q_FOREACH( const Soprano::Node& graph, graphs ) {
+
         // wait for resume or stop (or simply continue)
         if ( !waitForContinue() ) {
-            break;
+                return false;
         }
 
-        ResourceManager::instance()->mainModel()->removeContext( it[0] );
+            ResourceManager::instance()->mainModel()->removeContext( graph );
     }
+
+        // we are done when the last graphs are queried
+        if ( graphs.count() < 200 ) {
+            return true;
 }
+    }
 
+    // make gcc shut up
+    return true;
+}
 
+
 QDebug Nepomuk::operator<<( QDebug dbg, IndexScheduler::IndexingSpeed speed )
 {
     dbg << ( int )speed;
--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/indexscheduler.h \
#1162154:1162155 @@ -206,6 +206,7 @@
          * to index anymore.
          */
         void removeOldAndUnwantedEntries();
+        bool removeAllGraphsFromQuery( const QString& query_ );
 
         bool m_suspended;
         bool m_stopped;


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic