[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: KDE/kdebase/runtime/nepomuk/strigibackend
From: Sebastian Trueg <sebastian () trueg ! de>
Date: 2009-01-07 11:18:24
Message-ID: 1231327104.579020.27028.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 906990 by trueg:
Store both xesam:url and strigi:parentUrl as URIs rather than literals. The advantage \
is that they won't be indexed (a lot less false positives in searches) and we can \
perform relation queries between folders and files
M +20 -6 sopranoindexreader.cpp
M +36 -27 sopranoindexwriter.cpp
M +6 -5 util.cpp
M +4 -0 util.h
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexreader.cpp \
#906989:906990 @@ -40,6 +40,7 @@
#include <QtCore/QDebug>
#include <QtCore/QString>
#include <QtCore/QLatin1String>
+#include <QtCore/QFile>
using namespace Soprano;
@@ -58,7 +59,7 @@
const Strigi::Query& query );
static lucene::search::Query* createMultiFieldQuery( const Strigi::Query& query );
-
+#if 0
static QString luceneQueryEscape( const QString& s )
{
/* Chars to escape: + - && || ! ( ) { } [ ] ^ " ~ : \ */
@@ -68,6 +69,7 @@
es.replace( rx, "\\\\1" );
return es;
}
+#endif
static lucene::index::Term* createWildCardTerm( const TString& name,
const string& value )
@@ -383,12 +385,17 @@
std::map<std::string, time_t>& \
children ) {
// qDebug() << "IndexReader::getChildren in thread" << QThread::currentThread();
- QString query = QString( "select distinct ?path ?mtime where { ?r <%1> \
\"%2\"^^<%3> . ?r <%4> ?mtime . ?r <%5> ?path . }") + QString query = QString( \
"select distinct ?path ?mtime where { " + "{ { ?r <%1> \
\"%2\"^^<%3> . } UNION { ?r <%1> %6 . } } . " + "?r <%4> \
?mtime . " + "?r <%5> ?path . "
+ "}")
.arg( Util::fieldUri( FieldRegister::parentLocationFieldName \
).toString(),
escapeLiteralForSparqlQuery( QString::fromUtf8( \
parent.c_str() ) ), Vocabulary::XMLSchema::string().toString(),
Util::fieldUri( FieldRegister::mtimeFieldName \
).toString(),
- Util::fieldUri( FieldRegister::pathFieldName ).toString() \
); + Util::fieldUri( FieldRegister::pathFieldName \
).toString(), + Node( QUrl::fromLocalFile( \
QFile::decodeName( parent.c_str() ) ) ).toN3() );
// qDebug() << "running getChildren query:" << query;
@@ -399,12 +406,19 @@
Node mTimeNode = result.binding( "mtime" );
// qDebug() << "file in index: " << pathNode.toString() << "mtime:" << \
mTimeNode.literal().toDateTime() << "(" << \
mTimeNode.literal().toDateTime().toTime_t() << ")";
- // FIXME: Sadly in Xesam sourceModified is not typed as DateTime but \
defaults to an int :( We try to be compatible + // be backwards compatible in \
case there are paths left encoded as literals + std::string path;
+ if ( pathNode.isLiteral() )
+ path = pathNode.toString().toUtf8().data();
+ else
+ path = QFile::encodeName( pathNode.uri().toLocalFile() ).data();
+
+ // Sadly in Xesam sourceModified is not typed as DateTime but defaults to an \
int :( We try to be compatible if ( mTimeNode.literal().isDateTime() ) {
- children[std::string( pathNode.toString().toUtf8().data() )] = \
mTimeNode.literal().toDateTime().toTime_t(); + children[path] = \
mTimeNode.literal().toDateTime().toTime_t(); }
else {
- children[std::string( pathNode.toString().toUtf8().data() )] = \
mTimeNode.literal().toUnsignedInt(); + children[path] = \
mTimeNode.literal().toUnsignedInt(); }
}
}
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.cpp \
#906989:906990 @@ -34,6 +34,7 @@
#include <QtCore/QDebug>
#include <QtCore/QThread>
#include <QtCore/QDateTime>
+#include <QtCore/QByteArray>
#include <QtCore/QUuid>
#include <KUrl>
@@ -213,15 +214,17 @@
for ( unsigned int i = 0; i < entries.size(); ++i ) {
QString path = QString::fromUtf8( entries[i].c_str() );
QString query = QString( "select ?g ?mg where { "
- "?r <%1> \"%2\"^^<%3> . "
- "?g <http://www.strigi.org/fields#indexGraphFor> ?r \
. "
- "OPTIONAL { ?mg <%4> ?g . } }" )
+ "{ { ?r <%1> \"%2\"^^<%3> . } UNION { ?r <%1> %6 . \
} } . " + "?g <%4> ?r . "
+ "OPTIONAL { ?mg <%5> ?g . } }" )
.arg( systemLocationUri )
.arg( path )
.arg( Vocabulary::XMLSchema::string().toString() )
- .arg( Vocabulary::NRL::coreGraphMetadataFor().toString() );
+ .arg( Strigi::Ontology::indexGraphFor().toString() )
+ .arg( Vocabulary::NRL::coreGraphMetadataFor().toString() )
+ .arg( Node( QUrl::fromLocalFile( path ) ).toN3() );
-// qDebug() << "deleteEntries query:" << query;
+ qDebug() << "deleteEntries query:" << query;
QueryResultIterator result = d->repository->executeQuery( query, \
::Soprano::Query::QueryLanguageSparql ); if ( result.next() ) {
@@ -248,7 +251,7 @@
// qDebug() << "IndexWriter::deleteAllEntries in thread" << \
QThread::currentThread();
// query all index graphs (FIXME: would a type derived from nrl:Graph be better \
than only the predicate?)
- QString query = QString( "select ?g where { ?g \
<http://www.strigi.org/fields#indexGraphFor> ?r . }" ); + QString query = QString( \
"select ?g where { ?g <%1> ?r . }" ).arg( \
Strigi::Ontology::indexGraphFor().toString() );
qDebug() << "deleteAllEntries query:" << query;
@@ -281,7 +284,7 @@
// let's check if we already have data on the file
StatementIterator it = d->repository->listStatements( Node(),
- QUrl::fromEncoded( \
"http://www.strigi.org/fields#indexGraphFor", QUrl::StrictMode ), // FIXME: put the \
URI somewhere else + \
Strigi::Ontology::indexGraphFor(),
data->fileUri );
if ( it.next() ) {
data->context = it.current().subject().uri();
@@ -320,29 +323,33 @@
FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );
+ // Strigi uses rdf:type improperly since it stores the value as a string. We \
have to + // make sure it is a resource.
if ( rfd->isRdfType ) {
-
- // Strigi uses rdf:type improperly since it stores the value as a \
string. We have to
- // make sure it is a resource. The problem is that this results in the \
type not being
- // indexed properly. Thus, it cannot be searched with normal lucene \
queries.
- // That is why we need to introduce a stringType property
-
- d->repository->addStatement( Statement( md->fileUri,
- \
::Soprano::Vocabulary::RDF::type(),
- QUrl::fromEncoded( \
value.c_str(), QUrl::StrictMode ), // fromEncoded is faster than the plain \
constructor and all Xesam URIs work here
- md->context) );
- d->repository->addStatement( Statement( md->fileUri,
- QUrl::fromEncoded( \
"http://strigi.sourceforge.net/fields#rdf-string-type", \
QUrl::StrictMode ),
- LiteralValue( QString::fromUtf8( \
value.c_str() ) ),
- md->context) );
+ d->repository->addStatement( md->fileUri,
+ ::Soprano::Vocabulary::RDF::type(),
+ QUrl::fromEncoded( value.c_str(), \
QUrl::StrictMode ), + md->context );
}
-
else {
- d->repository->addStatement( Statement( md->fileUri,
- rfd->property,
- d->createLiteralValue( \
rfd->dataType, ( unsigned char* )value.c_str(), value.length() ),
- md->context) );
+ // we bend the plain strigi properties into something nicer, also \
because we do not want paths to be indexed, way too many false positives + \
// in standard desktop searches + if ( field->key() == \
FieldRegister::pathFieldName || + field->key() == \
FieldRegister::parentLocationFieldName ) { + \
d->repository->addStatement( md->fileUri, + \
rfd->property, + QUrl::fromLocalFile( \
QFile::decodeName( QByteArray::fromRawData( value.c_str(), value.length() ) ) ), + \
md->context ); + }
+ else {
+ d->repository->addStatement( Statement( md->fileUri,
+ rfd->property,
+ d->createLiteralValue( \
rfd->dataType, ( unsigned char* )value.c_str(), value.length() ), + \
md->context) ); + }
}
+ if ( d->repository->lastError() )
+ qDebug() << "Failed to add value" << value.c_str();
}
// qDebug() << "IndexWriter::addValue done in thread" << \
QThread::currentThread(); }
@@ -459,6 +466,8 @@
Vocabulary::Xesam::asText(),
LiteralValue( QString::fromUtf8( \
md->content.c_str() ) ), md->context ) );
+ if ( d->repository->lastError() )
+ qDebug() << "Failed to add" << md->fileUri << "as text" << \
QString::fromUtf8( md->content.c_str() ); }
// Strigi only indexes files and extractors mostly (if at all) store the \
xesam:DataObject type (i.e. the contents) @@ -487,7 +496,7 @@
LiteralValue( \
QDateTime::currentDateTime() ), metaDataContext ) );
d->repository->addStatement( Statement( md->context,
- QUrl::fromEncoded( \
"http://www.strigi.org/fields#indexGraphFor", QUrl::StrictMode ), // FIXME: put the \
URI somewhere else + \
Strigi::Ontology::indexGraphFor(), md->fileUri,
metaDataContext ) );
d->repository->addStatement( Statement( metaDataContext,
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/util.cpp #906989:906990
@@ -84,11 +84,6 @@
if ( QString::fromUtf8( field.c_str() ) == \
::Soprano::Index::CLuceneIndex::defaultSearchField() ) { return TString::fromUtf8( \
field.c_str() ); }
- else if ( QString( field.c_str() ) == \
::Soprano::Vocabulary::RDF::type().toString() ) {
- // see sopranoindexwriter:addValue for details on this conversion
- static TString strigiType( \
"http://strigi.sourceforge.net/fields#rdf-string-type" );
- return strigiType;
- }
else {
return fieldUri( field ).toString();
}
@@ -166,3 +161,9 @@
model->addStatement( metaDataType );
}
}
+
+
+QUrl Strigi::Ontology::indexGraphFor()
+{
+ return QUrl::fromEncoded( "http://www.strigi.org/fields#indexGraphFor", \
QUrl::StrictMode ); +}
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/util.h #906989:906990
@@ -49,6 +49,10 @@
void storeStrigiMiniOntology( ::Soprano::Model* model );
}
}
+
+ namespace Ontology {
+ QUrl indexGraphFor();
+ }
}
#endif
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic