[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: KDE/kdebase/runtime/nepomuk/strigibackend
From: Vishesh Handa <handa.vish () gmail ! com>
Date: 2010-07-14 12:33:11
Message-ID: 20100714123311.A2248AC736 () svn ! kde ! org
[Download RAW message or body]
SVN commit 1149834 by vhanda:
Created the NepomukIndexFeeder. It takes all the statements from the StrigiAnalyzer, \
checks for duplicates and accordingly creates new resources. This gives us the added \
benefit that all Resources will be connected with the same additional resources \
instead of creating duplicates.
Please refer to the Nepomuk Mailing list (StrgiFeeder) 13-14th July 2010 for a more \
indepth discussion of the problem.
M +1 -0 CMakeLists.txt
A nepomukindexfeeder.cpp [License: GPL (v2+)]
A nepomukindexfeeder.h [License: GPL (v2+)]
M +76 -112 nepomukindexwriter.cpp
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/CMakeLists.txt #1149833:1149834
@@ -11,6 +11,7 @@
nepomukindexmanager.cpp
nepomukindexreader.cpp
nepomukindexwriter.cpp
+ nepomukindexfeeder.cpp
util.cpp
)
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/nepomukindexwriter.cpp \
#1149833:1149834 @@ -1,5 +1,6 @@
/*
Copyright (C) 2007-2010 Sebastian Trueg <trueg@kde.org>
+ Copyright (C) 2010 Vishesh Handa <handa.vish@gmail.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -22,6 +23,7 @@
#include "nfo.h"
#include "nie.h"
#include "nrl.h"
+#include "nepomukindexfeeder.h"
#include <Soprano/Soprano>
#include <Soprano/Vocabulary/RDF>
@@ -118,6 +120,25 @@
return uri;
}
+ class RegisteredFieldData
+ {
+ public:
+ RegisteredFieldData( const QUrl& prop, QVariant::Type t )
+ : property( prop ),
+ dataType( t ),
+ isRdfType( prop == Vocabulary::RDF::type() ) {
+ }
+
+ /// The actual property URI
+ QUrl property;
+
+ /// the literal range of the property (if applicable)
+ QVariant::Type dataType;
+
+ /// caching QUrl comparison
+ bool isRdfType;
+ };
+
/**
* Data objects that are used to store information relative to one
* indexing run.
@@ -128,7 +149,7 @@
FileMetaData( const Strigi::AnalysisResult* idx );
/// stores basic data including the nie:url and the nrl:GraphMetadata in \p \
model
- void storeBasicData( Soprano::Model* model );
+ void storeBasicData( Nepomuk::NepomukIndexFeeder* feeder );
/// map a blank node to a resource
QUrl mapNode( const std::string& s );
@@ -142,39 +163,14 @@
/// The file info - saved to prevent multiple stats
QFileInfo fileInfo;
- /// The URI of the graph that contains all indexed statements
- QUrl context;
-
/// a buffer for all plain-text content generated by strigi
std::string content;
private:
/// The Strigi result
const Strigi::AnalysisResult* m_analysisResult;
-
- /// mapping from blank nodes used in addTriplet to our urns
- QMap<std::string, QUrl> m_blankNodeMap;
};
- class RegisteredFieldData
- {
- public:
- RegisteredFieldData( const QUrl& prop, QVariant::Type t )
- : property( prop ),
- dataType( t ),
- isRdfType( prop == Vocabulary::RDF::type() ) {
- }
-
- /// The actual property URI
- QUrl property;
-
- /// the literal range of the property (if applicable)
- QVariant::Type dataType;
-
- /// caching QUrl comparison
- bool isRdfType;
- };
-
FileMetaData::FileMetaData( const Strigi::AnalysisResult* idx )
: m_analysisResult( idx )
{
@@ -185,94 +181,68 @@
// this will automatically find previous uses of the file in question
// with backwards compatibility
resourceUri = Nepomuk::Resource( fileUrl ).resourceUri();
-
- // use a new random context URI
- context = Nepomuk::ResourceManager::instance()->generateUniqueUri( "ctx" );
}
- QUrl FileMetaData::mapNode( const std::string& s )
+ void FileMetaData::storeBasicData( Nepomuk::NepomukIndexFeeder * feeder )
{
- if ( s[0] == ':' ) {
- if( m_blankNodeMap.contains( s ) ) {
- return m_blankNodeMap[s];
- }
- else {
- QUrl urn = Nepomuk::ResourceManager::instance()->generateUniqueUri( \
QString() );
- m_blankNodeMap.insert( s, urn );
- return urn;
- }
- }
- // special case to properly handle nie:isPartOf relations created for \
containers
- else if ( s == m_analysisResult->path() ) {
- return resourceUri;
- }
- else {
- return QUrl::fromEncoded( s.c_str() );
- }
- }
+ feeder->addStatement( resourceUri, Nepomuk::Vocabulary::NIE::url(), fileUrl \
);
- void FileMetaData::storeBasicData( Soprano::Model* model )
- {
- model->addStatement( resourceUri, Nepomuk::Vocabulary::NIE::url(), fileUrl, \
context );
-
// Strigi only indexes files and extractors mostly (if at all) store the \
nie:DataObject type (i.e. the contents)
// Thus, here we go the easy way and mark each indexed file as a \
nfo:FileDataObject.
- model->addStatement( resourceUri,
+ feeder->addStatement( resourceUri,
Vocabulary::RDF::type(),
- Nepomuk::Vocabulary::NFO::FileDataObject(),
- context );
+ Nepomuk::Vocabulary::NFO::FileDataObject() );
if ( fileInfo.isDir() ) {
- model->addStatement( resourceUri,
+ feeder->addStatement( resourceUri,
Vocabulary::RDF::type(),
- Nepomuk::Vocabulary::NFO::Folder(),
- context );
+ Nepomuk::Vocabulary::NFO::Folder() );
}
-
-
- // create the provedance data for the data graph
- // TODO: add more data at some point when it becomes of interest
- QUrl metaDataContext = \
Nepomuk::ResourceManager::instance()->generateUniqueUri( "ctx" );
- model->addStatement( context,
- Vocabulary::RDF::type(),
- Nepomuk::Vocabulary::NRL::DiscardableInstanceBase(),
- metaDataContext );
- model->addStatement( context,
- Vocabulary::NAO::created(),
- LiteralValue( QDateTime::currentDateTime() ),
- metaDataContext );
- model->addStatement( context,
- Strigi::Ontology::indexGraphFor(),
- resourceUri,
- metaDataContext );
- model->addStatement( metaDataContext,
- Vocabulary::RDF::type(),
- Nepomuk::Vocabulary::NRL::GraphMetadata(),
- metaDataContext );
- model->addStatement( metaDataContext,
- Nepomuk::Vocabulary::NRL::coreGraphMetadataFor(),
- context,
- metaDataContext );
}
FileMetaData* fileDataForResult( const Strigi::AnalysisResult* idx )
{
return static_cast<FileMetaData*>( idx->writerData() );
}
+
+ /**
+ * Creates a Blank or Resource Node based on the contents of the string \
provided. + * If the string is of the form ':identifier', a Blank node is \
created. + * Otherwise a Resource Node is returned.
+ */
+ Soprano::Node createBlankOrResourceNode( const std::string & str ) {
+ QString identifier = QString::fromUtf8( str.c_str() );
+
+ if( !identifier.isEmpty() && identifier[0] == ':' ) {
+ identifier.remove( 0, 1 );
+ return Soprano::Node::createBlankNode( identifier );
}
+ //Not a blank node
+ return Soprano::Node( QUrl(identifier) );
+ }
+}
+
class Strigi::NepomukIndexWriter::Private
{
public:
- Private()
+ Private( Soprano::Model * model )
+ : repository( model )
{
literalTypes[FieldRegister::stringType] = QVariant::String;
literalTypes[FieldRegister::floatType] = QVariant::Double;
literalTypes[FieldRegister::integerType] = QVariant::Int;
literalTypes[FieldRegister::binaryType] = QVariant::ByteArray;
literalTypes[FieldRegister::datetimeType] = QVariant::DateTime; // Strigi \
encodes datetime as unsigned integer, i.e. addValue( ..., uint ) +
+ feeder = new Nepomuk::NepomukIndexFeeder( model );
}
+ ~Private()
+ {
+ delete feeder;
+ }
+
QVariant::Type literalType( const Strigi::FieldProperties& strigiType ) {
// it looks as if the typeUri can contain arbitrary values, URIs or stuff \
like "string"
QHash<std::string, QVariant::Type>::const_iterator it = \
literalTypes.constFind( strigiType.typeUri() ); @@ -310,6 +280,8 @@
QStack<const Strigi::AnalysisResult*> currentResultStack;
+ Nepomuk::NepomukIndexFeeder* feeder;
+
private:
QHash<std::string, QVariant::Type> literalTypes;
};
@@ -318,8 +290,7 @@
Strigi::NepomukIndexWriter::NepomukIndexWriter( Soprano::Model* model )
: Strigi::IndexWriter()
{
- d = new Private;
- d->repository = model;
+ d = new Private( model );
Util::storeStrigiMiniOntology( d->repository );
}
@@ -387,8 +358,11 @@
if ( data->resourceUri.isEmpty() )
data->resourceUri = Nepomuk::ResourceManager::instance()->generateUniqueUri( \
QString() );
+ // Initialize the feeder to accept statements
+ d->feeder->begin( data->resourceUri );
+
// store initial data to make sure newly created URIs are reused directly by \
libnepomuk
- data->storeBasicData( d->repository );
+ data->storeBasicData( d->feeder );
// remember the file data
idx->setWriterData( data );
@@ -419,7 +393,7 @@
RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );
// the statement we will create, we will determine the object below
- Soprano::Statement statement( md->resourceUri, rfd->property, \
Soprano::Node(), md->context ); + Soprano::Statement statement( \
md->resourceUri, rfd->property, Soprano::Node() );
//
// Strigi uses rdf:type improperly since it stores the value as a string. We \
have to @@ -461,12 +435,12 @@
if ( value[0] == ':' ) {
Nepomuk::Types::Property property( rfd->property );
if ( property.range().isValid() ) {
- statement.setObject( md->mapNode( value ) );
+ statement.setObject( createBlankOrResourceNode( value ) );
}
}
}
- d->repository->addStatement( statement );
+ d->feeder->addStatement( statement );
}
}
@@ -504,10 +478,7 @@
val = QDateTime::fromTime_t( value );
}
- d->repository->addStatement( Statement( md->resourceUri,
- rfd->property,
- val,
- md->context) );
+ d->feeder->addStatement( md->resourceUri, rfd->property, val);
}
@@ -522,10 +493,7 @@
FileMetaData* md = fileDataForResult( idx );
RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );
- d->repository->addStatement( Statement( md->resourceUri,
- rfd->property,
- LiteralValue( value ),
- md->context) );
+ d->repository->addStatement( md->resourceUri, rfd->property, LiteralValue( value \
) ); }
@@ -540,10 +508,7 @@
FileMetaData* md = fileDataForResult( idx );
RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );
- d->repository->addStatement( Statement( md->resourceUri,
- rfd->property,
- LiteralValue( value ),
- md->context) );
+ d->repository->addStatement( md->resourceUri, rfd->property, LiteralValue( value \
) ); }
@@ -555,17 +520,17 @@
return;
}
- FileMetaData* md = fileDataForResult( d->currentResultStack.top() );
+ //FileMetaData* md = fileDataForResult( d->currentResultStack.top() );
- QUrl subject = md->mapNode( s );
- Nepomuk::Types::Property property( md->mapNode( p ) );
+ Soprano::Node subject( createBlankOrResourceNode( s ) );
+ Nepomuk::Types::Property property( QUrl( QString::fromUtf8(p.c_str()) ) ); // \
Was mapped earlier Soprano::Node object;
if ( property.range().isValid() )
- object = md->mapNode( o );
+ object = Soprano::Node( createBlankOrResourceNode( o ) );
else
object = Soprano::LiteralValue::fromString( QString::fromUtf8( o.c_str() ), \
property.literalRangeType().dataTypeUri() );
- d->repository->addStatement( subject, property.uri(), object, md->context );
+ d->feeder->addStatement( subject, property.uri(), object );
}
@@ -582,14 +547,13 @@
// store the full text of the file
if ( md->content.length() > 0 ) {
- d->repository->addStatement( Statement( md->resourceUri,
+ d->feeder->addStatement( md->resourceUri,
\
Nepomuk::Vocabulary::NIE::plainTextContent(),
- LiteralValue( QString::fromUtf8( \
md->content.c_str() ) ),
- md->context ) );
- if ( d->repository->lastError() )
- kDebug() << "Failed to add" << md->resourceUri << "as text" << \
QString::fromUtf8( md->content.c_str() ); + \
LiteralValue( QString::fromUtf8( md->content.c_str() ) ) ); }
+ d->feeder->end();
+
// cleanup
delete md;
idx->setWriterData( 0 );
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic