[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: KDE/kdebase/runtime/nepomuk/strigibackend
From: Sebastian Trueg <sebastian () trueg ! de>
Date: 2009-07-21 9:26:07
Message-ID: 1248168367.905237.22015.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 1000403 by trueg:
* nfo:fileUrl is now nie:url
* Strigi now uses NIE only, no more Xesam. This also means that addTriples is used. \
Since its API does not provide context (the AnalysisResult) we need to do a little \
hacking. Also anonymeous nodes are passed as :NODENAME. We match these to urns via \
an internal map.
CCMAIL: Evgeny Egorochkin <phreedom.stdin@gmail.com>
M +102 -33 sopranoindexwriter.cpp
M +55 -55 sopranoindexwriter.h
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.cpp \
#1000402:1000403 @@ -25,6 +25,8 @@
#include <Soprano/Soprano>
#include <Soprano/Vocabulary/RDF>
#include <Soprano/LiteralValue>
+#include <Soprano/Node>
+#include <Soprano/QueryResultIterator>
#include <QtCore/QList>
#include <QtCore/QHash>
@@ -49,6 +51,9 @@
#include <sstream>
#include <algorithm>
+#include <Nepomuk/Types/Property>
+#include <Nepomuk/Types/Class>
+#include <Nepomuk/Types/Literal>
// IMPORTANT: strings in Strigi are apparently UTF8! Except for file names. Those \
are in local encoding.
@@ -111,10 +116,6 @@
return uri;
}
- QUrl createGraphUri() {
- return QUrl( "urn:nepomuk:local:" + QUuid::createUuid().toString().remove( \
QRegExp( "[\\{\\}]" ) ) );
- }
-
class FileMetaData
{
public:
@@ -122,6 +123,9 @@
QUrl fileUri;
QUrl context;
std::string content;
+
+ // mapping from blank nodes used in addTriplet to our urns
+ QMap<std::string, QUrl> blankNodeMap;
};
class RegisteredFieldData
@@ -144,7 +148,8 @@
{
public:
Private()
- : indexTransactionID( 0 ) {
+ : indexTransactionID( 0 ),
+ currentResult( 0 ) {
literalTypes[FieldRegister::stringType] = QVariant::String;
literalTypes[FieldRegister::floatType] = QVariant::Double;
literalTypes[FieldRegister::integerType] = QVariant::Int;
@@ -179,9 +184,48 @@
}
}
+ QUrl createUrn() {
+ QUrl urn;
+ do {
+ urn = QUrl( "urn:nepomuk:local:" + \
QUuid::createUuid().toString().remove( QRegExp( "[\\{\\}]" ) ) ); + } while ( \
repository->executeQuery( QString("ask where { " + \
"{ %1 ?p1 ?o1 . } " + "UNION "
+ "{ ?r2 %1 ?o2 . } "
+ "UNION "
+ "{ ?r3 ?p3 %1 . } "
+ "}")
+ .arg( ::Soprano::Node::resourceToN3( urn \
) ), + \
::Soprano::Query::QueryLanguageSparql ).boolValue() ); + return urn;
+ }
+
+ QUrl mapNode( FileMetaData* fmd, const std::string& s ) {
+ if ( s[0] == ':' ) {
+ if( fmd->blankNodeMap.contains( s ) ) {
+ return fmd->blankNodeMap[s];
+ }
+ else {
+ QUrl urn = createUrn();
+ fmd->blankNodeMap.insert( s, urn );
+ return urn;
+ }
+ }
+ else {
+ return QUrl::fromEncoded( s.c_str() );
+ }
+ }
+
::Soprano::Model* repository;
int indexTransactionID;
+ //
+ // The Strigi API does not provide context information in addTriplet, i.e. the \
AnalysisResult. + // However, we only use one thread, only one AnalysisResult at \
the time. + // Thus, we can just remember that and use it in addTriplet.
+ //
+ const Strigi::AnalysisResult* currentResult;
+
private:
QHash<std::string, QVariant::Type> literalTypes;
};
@@ -294,12 +338,14 @@
data->context = it.current().subject().uri();
}
else {
- data->context = createGraphUri();
+ data->context = d->createUrn();
}
// qDebug() << "Starting analysis for" << data->fileUri << "in thread" << \
QThread::currentThread();
idx->setWriterData( data );
+
+ d->currentResult = idx;
}
@@ -322,41 +368,52 @@
return;
}
-// qDebug() << "IndexWriter::addValue in thread" << QThread::currentThread();
if ( value.length() > 0 ) {
FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );
+ // the statement we will create, we will determine the object below
+ ::Soprano::Statement statement( md->fileUri, rfd->property, \
::Soprano::Node(), md->context ); +
+ //
// Strigi uses rdf:type improperly since it stores the value as a string. We \
have to // make sure it is a resource.
+ //
if ( rfd->isRdfType ) {
- d->repository->addStatement( md->fileUri,
- ::Soprano::Vocabulary::RDF::type(),
- QUrl::fromEncoded( value.c_str(), \
QUrl::StrictMode ),
- md->context );
+ statement.setPredicate( ::Soprano::Vocabulary::RDF::type() );
+ statement.setObject( QUrl::fromEncoded( value.c_str(), QUrl::StrictMode \
) ); }
+
else {
- // we bend the plain strigi properties into something nicer, also \
because we do not want paths to be indexed, way too many false positives + \
// + // we bend the plain strigi properties into something nicer, also \
because we + // do not want paths to be indexed, way too many false \
positives // in standard desktop searches
+ //
if ( field->key() == FieldRegister::pathFieldName ||
field->key() == FieldRegister::parentLocationFieldName ) {
-#warning FIXME: this is where relative file URLs are to be generated and our new \
fancy file system class should provide us with the file system URI
- d->repository->addStatement( md->fileUri,
- rfd->property,
- QUrl::fromLocalFile( QFile::decodeName( \
QByteArray::fromRawData( value.c_str(), value.length() ) ) ),
- md->context );
+ // TODO: this is where relative file URLs are to be generated and \
our new fancy file system + // class should provide us with the file \
system URI + statement.setObject( QUrl::fromLocalFile( \
QFile::decodeName( QByteArray::fromRawData( value.c_str(), value.length() ) ) ) ); }
else {
- d->repository->addStatement( Statement( md->fileUri,
- rfd->property,
- d->createLiteralValue( \
rfd->dataType, ( unsigned char* )value.c_str(), value.length() ),
- md->context) );
+ statement.setObject( d->createLiteralValue( rfd->dataType, ( \
unsigned char* )value.c_str(), value.length() ) ); }
+
+ //
+ // Strigi uses anonymeous nodes prefixed with ':'. However, it is \
possible that literals + // start with a ':'. Thus, we also check the \
range of the property + //
+ if ( value[0] == ':' ) {
+ Nepomuk::Types::Property property( rfd->property );
+ if ( property.range().isValid() ) {
+ statement.setObject( d->mapNode( md, value ) );
+ }
+ }
}
- if ( d->repository->lastError() )
- qDebug() << "Failed to add value" << value.c_str();
+
+ d->repository->addStatement( statement );
}
-// qDebug() << "IndexWriter::addValue done in thread" << \
QThread::currentThread(); }
@@ -442,23 +499,35 @@
}
-void Strigi::Soprano::IndexWriter::addTriplet( const std::string& subject,
- const std::string& predicate, const \
std::string& object ) +void Strigi::Soprano::IndexWriter::addTriplet( const \
std::string& s, + const std::string& p,
+ const std::string& o )
{
- // PROBLEM: which named graph (context) should we use here? Create a new one for \
each triple? Use one until the
- // next commit()?
+ //
+ // The Strigi API does not provide context information here, i.e. the \
AnalysisResult this triple + // belongs to. However, we only use one thread, only \
one AnalysisResult at the time. + // Thus, we can just remember that and use it \
here. + //
- // FIXME: create an NRL metadata graph
- d->repository->addStatement( Statement( Node( QUrl( QString::fromUtf8( \
subject.c_str() ) ) ),
- Node( QUrl( QString::fromUtf8( \
predicate.c_str() ) ) ),
- Node( QUrl( QString::fromUtf8( \
object.c_str() ) ) ),
- Node() ) );
+ FileMetaData* md = static_cast<FileMetaData*>( d->currentResult->writerData() );
+
+ QUrl subject = d->mapNode( md, s );
+ Nepomuk::Types::Property property( d->mapNode( md, p ) );
+ ::Soprano::Node object;
+ if ( property.range().isValid() )
+ object = d->mapNode( md, o );
+ else
+ object = ::Soprano::LiteralValue::fromString( QString::fromUtf8( o.c_str() \
), property.literalRangeType().dataTypeUri() ); +
+ d->repository->addStatement( subject, property.uri(), object, md->context );
}
// called after each indexed file
void Strigi::Soprano::IndexWriter::finishAnalysis( const AnalysisResult* idx )
{
+ d->currentResult = 0;
+
if ( idx->depth() > 0 ) {
return;
}
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.h \
#1000402:1000403 @@ -1,22 +1,22 @@
/*
- Copyright (C) 2007-2008 Sebastian Trueg <trueg@kde.org>
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of
- the License, or (at your option) any later version.
+ Copyright (C) 2007-2009 Sebastian Trueg <trueg@kde.org>
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of
+ the License, or (at your option) any later version.
- You should have received a copy of the GNU General Public License
- along with this library; see the file COPYING. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
- */
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this library; see the file COPYING. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
#ifndef _SOPRANO_STRIGI_INDEX_WRITER_H_
#define _SOPRANO_STRIGI_INDEX_WRITER_H_
@@ -27,7 +27,7 @@
namespace Soprano {
class Model;
namespace Index {
- class IndexFilterModel;
+ class IndexFilterModel;
}
}
@@ -35,51 +35,51 @@
namespace Strigi {
namespace Soprano {
- class IndexWriter : public Strigi::IndexWriter
- {
- public:
- IndexWriter( ::Soprano::Model* );
- ~IndexWriter();
+ class IndexWriter : public Strigi::IndexWriter
+ {
+ public:
+ IndexWriter( ::Soprano::Model* );
+ ~IndexWriter();
- void commit();
+ void commit();
- /**
- * Delete the entries with the given paths from the index.
- *
- * @param entries the paths of the files that should be deleted
- **/
- void deleteEntries( const std::vector<std::string>& entries );
+ /**
+ * Delete the entries with the given paths from the index.
+ *
+ * @param entries the paths of the files that should be deleted
+ **/
+ void deleteEntries( const std::vector<std::string>& entries );
- /**
- * Delete all indexed documents from the index.
- **/
- void deleteAllEntries();
+ /**
+ * Delete all indexed documents from the index.
+ **/
+ void deleteAllEntries();
- void initWriterData( const Strigi::FieldRegister& );
- void releaseWriterData( const Strigi::FieldRegister& );
+ void initWriterData( const Strigi::FieldRegister& );
+ void releaseWriterData( const Strigi::FieldRegister& );
- void startAnalysis( const AnalysisResult* );
- void addText( const AnalysisResult*, const char* text, int32_t length );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- const std::string& value );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- const unsigned char* data, uint32_t size );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- int32_t value );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- uint32_t value );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- double value );
- void addTriplet( const std::string& subject,
- const std::string& predicate, const std::string& object );
- void addValue( const AnalysisResult*, const RegisteredField* field,
- const std::string& name, const std::string& value );
- void finishAnalysis( const AnalysisResult* );
+ void startAnalysis( const AnalysisResult* );
+ void addText( const AnalysisResult*, const char* text, int32_t length );
+ void addValue( const AnalysisResult*, const RegisteredField* field,
+ const std::string& value );
+ void addValue( const AnalysisResult*, const RegisteredField* field,
+ const unsigned char* data, uint32_t size );
+ void addValue( const AnalysisResult*, const RegisteredField* field,
+ int32_t value );
+ void addValue( const AnalysisResult*, const RegisteredField* field,
+ uint32_t value );
+ void addValue( const AnalysisResult*, const RegisteredField* field,
+ double value );
+ void addTriplet( const std::string& subject,
+ const std::string& predicate, const std::string& object \
); + void addValue( const AnalysisResult*, const RegisteredField* field,
+ const std::string& name, const std::string& value );
+ void finishAnalysis( const AnalysisResult* );
- private:
- class Private;
- Private* d;
- };
+ private:
+ class Private;
+ Private* d;
+ };
}
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic