[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: KDE/kdebase/runtime/nepomuk/services/strigi
From: Sebastian Trueg <sebastian () trueg ! de>
Date: 2010-08-11 14:18:56
Message-ID: 20100811141856.607CEAC7E9 () svn ! kde ! org
[Download RAW message or body]
SVN commit 1162155 by trueg:
1. Remove all index graphs that contain resources without a nie:url (junk from buggy \
runs) 2. Remove graphs in batches to avoid the iterator breakage that comes with \
removing iterated items.
M +1 -0 CMakeLists.txt
M +58 -31 indexscheduler.cpp
M +1 -0 indexscheduler.h
--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/CMakeLists.txt #1162154:1162155
@@ -60,6 +60,7 @@
${KDE4_KIO_LIBS}
${KDE4_SOLID_LIBS}
${KDE4_KIDLETIME_LIBS}
+ ${NEPOMUK_QUERY_LIBRARIES}
${NEPOMUK_LIBRARIES}
${SOPRANO_LIBRARIES}
)
--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/indexscheduler.cpp \
#1162154:1162155 @@ -23,6 +23,7 @@
#include "indexscheduler.h"
#include "strigiserviceconfig.h"
#include "nepomukindexer.h"
+#include "util.h"
#include "nfo.h"
#include "nie.h"
@@ -42,11 +43,15 @@
#include <Nepomuk/Resource>
#include <Nepomuk/ResourceManager>
#include <Nepomuk/Variant>
+#include <Nepomuk/Query/Query>
+#include <Nepomuk/Query/ComparisonTerm>
+#include <Nepomuk/Query/ResourceTerm>
#include <Soprano/Model>
#include <Soprano/QueryResultIterator>
#include <Soprano/NodeIterator>
#include <Soprano/Node>
+#include <Soprano/Vocabulary/RDF>
#include <Soprano/Vocabulary/Xesam>
#include <map>
@@ -544,7 +549,7 @@
// We query all files that should not be in the store
// This for example excludes all filex:/ URLs.
//
- QString query = QString::fromLatin1( "select distinct ?g ?url where { "
+ QString query = QString::fromLatin1( "select distinct ?g where { "
"?r %1 ?url . "
"?g \
<http://www.strigi.org/fields#indexGraphFor> ?r . "
"FILTER(REGEX(STR(?url),'^file:/')) . "
@@ -552,21 +557,10 @@
.arg( Soprano::Node::resourceToN3( \
Nepomuk::Vocabulary::NIE::url() ), folderFilter );
kDebug() << query;
+ if ( !removeAllGraphsFromQuery( query ) )
+ return;
- Soprano::QueryResultIterator it = \
ResourceManager::instance()->mainModel()->executeQuery( query, \
Soprano::Query::QueryLanguageSparql );
- while ( it.next() ) {
- // wait for resume or stop (or simply continue)
- if ( !waitForContinue() ) {
- break;
- }
-
- const Soprano::Node& g = it[0];
- kDebug() << "REMOVING" << it["url"].uri();
- ResourceManager::instance()->mainModel()->removeContext( g );
- }
-
-
//
// Build filter query for all exclude filters
//
@@ -584,7 +578,7 @@
else if( !includeExcludeFilters.isEmpty() )
filters = QString::fromLatin1("FILTER(%1) .").arg( includeExcludeFilters );
- query = QString::fromLatin1( "select distinct ?g ?url where { "
+ query = QString::fromLatin1( "select distinct ?g where { "
"?r %1 ?url . "
"?r %2 ?fn . "
"?g <http://www.strigi.org/fields#indexGraphFor> ?r \
. " @@ -594,20 +588,10 @@
Soprano::Node::resourceToN3( Nepomuk::Vocabulary::NFO::fileName() \
), filters );
kDebug() << query;
- it = ResourceManager::instance()->mainModel()->executeQuery( query, \
Soprano::Query::QueryLanguageSparql );
- while ( it.next() ) {
+ if ( !removeAllGraphsFromQuery( query ) )
+ return;
- // wait for resume or stop (or simply continue)
- if ( !waitForContinue() ) {
- break;
- }
- const Soprano::Node& g = it[0];
- kDebug() << "REMOVING" << it["url"].uri();
- ResourceManager::instance()->mainModel()->removeContext( g );
- }
-
-
//
// Remove all old data from Xesam-times. While we leave out the data created by \
libnepomuk
// there is no problem since libnepomuk still uses backwards compatible queries \
and we use @@ -620,19 +604,62 @@
"{ graph ?g { ?r2 %1 ?u2 . } } "
"}" )
.arg( Soprano::Node::resourceToN3( Soprano::Vocabulary::Xesam::url() ) \
);
- it = ResourceManager::instance()->mainModel()->executeQuery( query, \
Soprano::Query::QueryLanguageSparql );
- while ( it.next() ) {
+ kDebug() << query;
+ if ( !removeAllGraphsFromQuery( query ) )
+ return;
+
+ //
+ // Remove data which is useless but still around from before. This could happen \
due to some buggy version of + // the indexer or the filewatch service or even \
some application messing up the data. + // We look for indexed files that do not \
have a nie:url defined and thus, will never be catched by any of the + // other \
queries. + //
+ query = Query::Query(
+ Strigi::Ontology::indexGraphFor() == ( Soprano::Vocabulary::RDF::type() == \
Query::ResourceTerm( Nepomuk::Vocabulary::NFO::FileDataObject() ) && + \
!( Nepomuk::Vocabulary::NIE::url() == Query::Term() ) ) + \
).toSparqlQuery(Query::Query::NoResultRestrictions); + kDebug() << query;
+ removeAllGraphsFromQuery( query );
+}
+
+
+
+/**
+ * Runs the query using a limit until all graphs have been deleted. This is not done
+ * in one big loop to avoid the problems with messed up iterators when one of the \
iterated + * item is deleted.
+ */
+bool Nepomuk::IndexScheduler::removeAllGraphsFromQuery( const QString& query )
+{
+ while ( 1 ) {
+ // get the next batch of graphs
+ QList<Soprano::Node> graphs
+ = ResourceManager::instance()->mainModel()->executeQuery( query + \
QLatin1String( " LIMIT 200" ), + \
Soprano::Query::QueryLanguageSparql ).iterateBindings( 0 ).allNodes(); +
+ // remove all graphs in the batch
+ Q_FOREACH( const Soprano::Node& graph, graphs ) {
+
// wait for resume or stop (or simply continue)
if ( !waitForContinue() ) {
- break;
+ return false;
}
- ResourceManager::instance()->mainModel()->removeContext( it[0] );
+ ResourceManager::instance()->mainModel()->removeContext( graph );
}
+
+ // we are done when the last graphs are queried
+ if ( graphs.count() < 200 ) {
+ return true;
}
+ }
+ // make gcc shut up
+ return true;
+}
+
QDebug Nepomuk::operator<<( QDebug dbg, IndexScheduler::IndexingSpeed speed )
{
dbg << ( int )speed;
--- trunk/KDE/kdebase/runtime/nepomuk/services/strigi/indexscheduler.h \
#1162154:1162155 @@ -206,6 +206,7 @@
* to index anymore.
*/
void removeOldAndUnwantedEntries();
+ bool removeAllGraphsFromQuery( const QString& query_ );
bool m_suspended;
bool m_stopped;
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic