[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    branches/KDE/4.4/kdebase/runtime/nepomuk/services/strigi
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2010-02-19 16:23:42
Message-ID: 1266596622.760459.12766.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 1092849 by trueg:

Backport: Query all entries to remove using one fancy query. This way we get them \
all.

CCMAIL: nlecureuil@mandriva.com


 M  +64 -62    indexscheduler.cpp  


--- branches/KDE/4.4/kdebase/runtime/nepomuk/services/strigi/indexscheduler.cpp \
#1092848:1092849 @@ -455,7 +455,6 @@
 
 void Nepomuk::IndexScheduler::slotConfigChanged()
 {
-    readConfig();
     if ( isRunning() )
         restart();
 }
@@ -533,82 +532,85 @@
 }
 
 
+namespace {
+    void insertSortFolders( const QStringList& folders, bool include, \
QList<QPair<QString, bool> >& result ) +    {
+        foreach( const QString& f, folders ) {
+            // insertion sort
+            int pos = 0;
+            while ( result.count() > pos &&
+                    result[pos].first < f )
+                ++pos;
+            result.insert( pos, qMakePair( f, include ) );
+        }
+    }
+
+    QString constructFolderFilter( const QList<QPair<QString, bool> > folders, int& \
index ) +    {
+        const QString path = folders[index].first;
+        const bool include = folders[index].second;
+
+        ++index;
+
+        QStringList subFilters;
+        while ( index < folders.count() &&
+                folders[index].first.startsWith( path ) ) {
+            subFilters << constructFolderFilter( folders, index );
+        }
+
+        QString thisFilter = QString::fromLatin1( "REGEX(STR(?url),'^file://%1')" \
).arg( path ); +
+        // we want all folders that should NOT be indexed
+        if ( include ) {
+            thisFilter.prepend( '!' );
+        }
+
+        subFilters.prepend( thisFilter );
+        if ( subFilters.count() > 1 ) {
+            return '(' + subFilters.join( include ? QLatin1String( " || " ) : \
QLatin1String( " && " ) ) + ')'; +        }
+        else {
+            return subFilters.first();
+        }
+    }
+}
+
 void Nepomuk::IndexScheduler::removeOldAndUnwantedEntries()
 {
-    kDebug();
     //
-    // Get all folders that are stored as parent folders of indexed files
+    // We now query all indexed files that are in folders that should not
+    // be indexed at once.
     //
-    QString query = QString::fromLatin1( "select distinct ?d ?dir where { "
-                                         "?r a %1 . "
-                                         "?g \
                <http://www.strigi.org/fields#indexGraphFor> ?r . "
-                                         "?r %2 ?d . "
-                                         "?d %3 ?dir . }" )
-                    .arg( Soprano::Node::resourceToN3( \
                Nepomuk::Vocabulary::NFO::FileDataObject() ) )
-                    .arg( Soprano::Node::resourceToN3( \
                Nepomuk::Vocabulary::NIE::isPartOf() ) )
-                    .arg( Soprano::Node::resourceToN3( \
                Nepomuk::Vocabulary::NIE::url() ) );
-    Soprano::QueryResultIterator it = \
ResourceManager::instance()->mainModel()->executeQuery( query, \
                Soprano::Query::QueryLanguageSparql );
-    QList<QPair<KUrl, QUrl> > storedFolders;
-    while ( it.next() && !m_stopped ) {
-        storedFolders << qMakePair( KUrl( it["dir"].uri() ), it["d"].uri() );
-    }
+    QList<QPair<QString, bool> > folders;
+    insertSortFolders( StrigiServiceConfig::self()->folders(), true, folders );
+    insertSortFolders( StrigiServiceConfig::self()->excludeFolders(), false, folders \
); +    int i = 0;
+    QString folderFilter = constructFolderFilter( folders, i );
 
     //
-    // Now compare that list with the configured folders and remove any
-    // entries that are children of folders that should not be indexed.
+    // We query all files that should not be in the store
+    // This for example excludes all filex:/ URLs.
     //
-    QList<QUrl> storedFoldersToRemove;
-    for ( int i = 0; i < storedFolders.count(); ++i ) {
-        const KUrl& url = storedFolders[i].first;
-        if ( !StrigiServiceConfig::self()->shouldFolderBeIndexed( url.path() ) ) {
-            storedFoldersToRemove << storedFolders[i].second;
-        }
-    }
+    QString query = QString::fromLatin1( "select distinct ?g where { "
+                                         "?r %1 ?url . "
+                                         "?g \
<http://www.strigi.org/fields#indexGraphFor> ?r . " +                                 \
"FILTER(REGEX(STR(?url),'^file:/')) . " +                                         \
"FILTER(%2) . }" ) +                    .arg( Soprano::Node::resourceToN3( \
Nepomuk::Vocabulary::NIE::url() ), +                          folderFilter );
+    kDebug() << query;
 
-    // cleanup
-    storedFolders.clear();
+    Soprano::QueryResultIterator it = \
ResourceManager::instance()->mainModel()->executeQuery( query, \
Soprano::Query::QueryLanguageSparql ); +    while ( it.next() ) {
 
-    //
-    // Now we gathered all the folders whose children we need to delete from the \
                storage.
-    // We now need to query all entries in those folders to get a list of graphs we \
                actually
-    // need to delete.
-    //
-    for ( int i = 0; i < storedFoldersToRemove.count() && !m_stopped; ++i ) {
-
         // wait for resume or stop (or simply continue)
         if ( !waitForContinue() ) {
             break;
         }
 
-        QString query = QString::fromLatin1( "select ?g where { "
-                                             "?r a %1 . "
-                                             "?r %2 %3 . "
-                                             "?g \
                <http://www.strigi.org/fields#indexGraphFor> ?r . }" )
-                        .arg( Soprano::Node::resourceToN3( \
                Nepomuk::Vocabulary::NFO::FileDataObject() ) )
-                        .arg( Soprano::Node::resourceToN3( \
                Nepomuk::Vocabulary::NIE::isPartOf() ) )
-                        .arg( Soprano::Node::resourceToN3( storedFoldersToRemove[i] \
                ) );
-        QList<Soprano::Node> entriesToRemove
-            = ResourceManager::instance()->mainModel()->executeQuery( query, \
                Soprano::Query::QueryLanguageSparql )
-            .iterateBindings( "g" )
-            .allNodes();
-
-        //
-        // Finally delete the entries. The corresponding metadata graphs will be \
                deleted automatically
-        // by the storage service.
-        //
-        for( int j = 0; j < entriesToRemove.count() && !m_stopped; ++j ) {
-
-            // wait for resume or stop (or simply continue)
-            if ( !waitForContinue() ) {
-                break;
-            }
-
-            const Soprano::Node& g = entriesToRemove[j];
-            kDebug() << "Removing old index entry graph" << g;
-            ResourceManager::instance()->mainModel()->removeContext( g );
-        }
+        const Soprano::Node& g = it[0];
+        ResourceManager::instance()->mainModel()->removeContext( g );
     }
-    kDebug() << "done";
 }
 
 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic