[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdebase/runtime/nepomuk/strigibackend
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2009-07-21 9:26:07
Message-ID: 1248168367.905237.22015.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 1000403 by trueg:

* nfo:fileUrl is now nie:url
* Strigi now uses NIE only, no more Xesam. This also means that addTriples is used. \
Since its API  does not provide context (the AnalysisResult) we need to do a little \
hacking.  Also anonymeous nodes are passed as :NODENAME. We match these to urns via \
an internal map.

CCMAIL: Evgeny Egorochkin <phreedom.stdin@gmail.com>


 M  +102 -33   sopranoindexwriter.cpp  
 M  +55 -55    sopranoindexwriter.h  


--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.cpp \
#1000402:1000403 @@ -25,6 +25,8 @@
 #include <Soprano/Soprano>
 #include <Soprano/Vocabulary/RDF>
 #include <Soprano/LiteralValue>
+#include <Soprano/Node>
+#include <Soprano/QueryResultIterator>
 
 #include <QtCore/QList>
 #include <QtCore/QHash>
@@ -49,6 +51,9 @@
 #include <sstream>
 #include <algorithm>
 
+#include <Nepomuk/Types/Property>
+#include <Nepomuk/Types/Class>
+#include <Nepomuk/Types/Literal>
 
 // IMPORTANT: strings in Strigi are apparently UTF8! Except for file names. Those \
are in local encoding.  
@@ -111,10 +116,6 @@
         return uri;
     }
 
-    QUrl createGraphUri() {
-        return QUrl( "urn:nepomuk:local:" + QUuid::createUuid().toString().remove( \
                QRegExp( "[\\{\\}]" ) ) );
-    }
-
     class FileMetaData
     {
     public:
@@ -122,6 +123,9 @@
         QUrl fileUri;
         QUrl context;
         std::string content;
+
+        // mapping from blank nodes used in addTriplet to our urns
+        QMap<std::string, QUrl> blankNodeMap;
     };
 
     class RegisteredFieldData
@@ -144,7 +148,8 @@
 {
 public:
     Private()
-        : indexTransactionID( 0 ) {
+        : indexTransactionID( 0 ),
+          currentResult( 0 ) {
         literalTypes[FieldRegister::stringType] = QVariant::String;
         literalTypes[FieldRegister::floatType] = QVariant::Double;
         literalTypes[FieldRegister::integerType] = QVariant::Int;
@@ -179,9 +184,48 @@
         }
     }
 
+    QUrl createUrn() {
+        QUrl urn;
+        do {
+            urn = QUrl( "urn:nepomuk:local:" + \
QUuid::createUuid().toString().remove( QRegExp( "[\\{\\}]" ) ) ); +        } while ( \
repository->executeQuery( QString("ask where { " +                                    \
"{ %1 ?p1 ?o1 . } " +                                                    "UNION "
+                                                    "{ ?r2 %1 ?o2 . } "
+                                                    "UNION "
+                                                    "{ ?r3 ?p3 %1 . } "
+                                                    "}")
+                                            .arg( ::Soprano::Node::resourceToN3( urn \
) ), +                                            \
::Soprano::Query::QueryLanguageSparql ).boolValue() ); +        return urn;
+    }
+
+    QUrl mapNode( FileMetaData* fmd, const std::string& s ) {
+        if ( s[0] == ':' ) {
+            if( fmd->blankNodeMap.contains( s ) ) {
+                return fmd->blankNodeMap[s];
+            }
+            else {
+                QUrl urn = createUrn();
+                fmd->blankNodeMap.insert( s, urn );
+                return urn;
+            }
+        }
+        else {
+            return QUrl::fromEncoded( s.c_str() );
+        }
+    }
+
     ::Soprano::Model* repository;
     int indexTransactionID;
 
+    //
+    // The Strigi API does not provide context information in addTriplet, i.e. the \
AnalysisResult. +    // However, we only use one thread, only one AnalysisResult at \
the time. +    // Thus, we can just remember that and use it in addTriplet.
+    //
+    const Strigi::AnalysisResult* currentResult;
+
 private:
     QHash<std::string, QVariant::Type> literalTypes;
 };
@@ -294,12 +338,14 @@
         data->context = it.current().subject().uri();
     }
     else {
-        data->context = createGraphUri();
+        data->context = d->createUrn();
     }
 
 //    qDebug() << "Starting analysis for" << data->fileUri << "in thread" << \
QThread::currentThread();  
     idx->setWriterData( data );
+
+    d->currentResult = idx;
 }
 
 
@@ -322,41 +368,52 @@
         return;
     }
 
-//    qDebug() << "IndexWriter::addValue in thread" << QThread::currentThread();
     if ( value.length() > 0 ) {
         FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
         RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );  
+        // the statement we will create, we will determine the object below
+        ::Soprano::Statement statement( md->fileUri, rfd->property, \
::Soprano::Node(), md->context ); +
+        //
         // Strigi uses rdf:type improperly since it stores the value as a string. We \
have to  // make sure it is a resource.
+        //
         if ( rfd->isRdfType ) {
-            d->repository->addStatement( md->fileUri,
-                                         ::Soprano::Vocabulary::RDF::type(),
-                                         QUrl::fromEncoded( value.c_str(), \
                QUrl::StrictMode ),
-                                         md->context );
+            statement.setPredicate( ::Soprano::Vocabulary::RDF::type() );
+            statement.setObject( QUrl::fromEncoded( value.c_str(), QUrl::StrictMode \
) );  }
+
         else {
-            // we bend the plain strigi properties into something nicer, also \
because we do not want paths to be indexed, way too many false positives +            \
// +            // we bend the plain strigi properties into something nicer, also \
because we +            // do not want paths to be indexed, way too many false \
positives  // in standard desktop searches
+            //
             if ( field->key() == FieldRegister::pathFieldName ||
                  field->key() == FieldRegister::parentLocationFieldName ) {
-#warning FIXME: this is where relative file URLs are to be generated and our new \
                fancy file system class should provide us with the file system URI
-                d->repository->addStatement( md->fileUri,
-                                             rfd->property,
-                                             QUrl::fromLocalFile( QFile::decodeName( \
                QByteArray::fromRawData( value.c_str(), value.length() ) ) ),
-                                             md->context );
+                // TODO: this is where relative file URLs are to be generated and \
our new fancy file system +                // class should provide us with the file \
system URI +                statement.setObject( QUrl::fromLocalFile( \
QFile::decodeName( QByteArray::fromRawData( value.c_str(), value.length() ) ) ) );  }
             else {
-                d->repository->addStatement( Statement( md->fileUri,
-                                                        rfd->property,
-                                                        d->createLiteralValue( \
                rfd->dataType, ( unsigned char* )value.c_str(), value.length() ),
-                                                        md->context) );
+                statement.setObject( d->createLiteralValue( rfd->dataType, ( \
unsigned char* )value.c_str(), value.length() ) );  }
+
+            //
+            // Strigi uses anonymeous nodes prefixed with ':'. However, it is \
possible that literals +            // start with a ':'. Thus, we also check the \
range of the property +            //
+            if ( value[0] == ':' ) {
+                Nepomuk::Types::Property property( rfd->property );
+                if ( property.range().isValid() ) {
+                    statement.setObject( d->mapNode( md, value ) );
+                }
+            }
         }
-        if ( d->repository->lastError() )
-            qDebug() << "Failed to add value" << value.c_str();
+
+        d->repository->addStatement( statement );
     }
-//    qDebug() << "IndexWriter::addValue done in thread" << \
QThread::currentThread();  }
 
 
@@ -442,23 +499,35 @@
 }
 
 
-void Strigi::Soprano::IndexWriter::addTriplet( const std::string& subject,
-                                               const std::string& predicate, const \
std::string& object ) +void Strigi::Soprano::IndexWriter::addTriplet( const \
std::string& s, +                                               const std::string& p,
+                                               const std::string& o )
 {
-    // PROBLEM: which named graph (context) should we use here? Create a new one for \
                each triple? Use one until the
-    // next commit()?
+    //
+    // The Strigi API does not provide context information here, i.e. the \
AnalysisResult this triple +    // belongs to. However, we only use one thread, only \
one AnalysisResult at the time. +    // Thus, we can just remember that and use it \
here. +    //
 
-    // FIXME: create an NRL metadata graph
-    d->repository->addStatement( Statement( Node( QUrl( QString::fromUtf8( \
                subject.c_str() ) ) ),
-                                            Node( QUrl( QString::fromUtf8( \
                predicate.c_str() ) ) ),
-                                            Node( QUrl( QString::fromUtf8( \
                object.c_str() ) ) ),
-                                            Node() ) );
+    FileMetaData* md = static_cast<FileMetaData*>( d->currentResult->writerData() );
+
+    QUrl subject = d->mapNode( md, s );
+    Nepomuk::Types::Property property( d->mapNode( md, p ) );
+    ::Soprano::Node object;
+    if ( property.range().isValid() )
+        object = d->mapNode( md, o );
+    else
+        object = ::Soprano::LiteralValue::fromString( QString::fromUtf8( o.c_str() \
), property.literalRangeType().dataTypeUri() ); +
+    d->repository->addStatement( subject, property.uri(), object, md->context );
 }
 
 
 // called after each indexed file
 void Strigi::Soprano::IndexWriter::finishAnalysis( const AnalysisResult* idx )
 {
+    d->currentResult = 0;
+
     if ( idx->depth() > 0 ) {
         return;
     }
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.h \
#1000402:1000403 @@ -1,22 +1,22 @@
 /*
-   Copyright (C) 2007-2008 Sebastian Trueg <trueg@kde.org>
- 
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of
-   the License, or (at your option) any later version.
+  Copyright (C) 2007-2009 Sebastian Trueg <trueg@kde.org>
 
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of
+  the License, or (at your option) any later version.
 
-   You should have received a copy of the GNU General Public License
-   along with this library; see the file COPYING.  If not, write to
-   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-   Boston, MA 02110-1301, USA.
- */
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Library General Public License for more details.
 
+  You should have received a copy of the GNU General Public License
+  along with this library; see the file COPYING.  If not, write to
+  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+  Boston, MA 02110-1301, USA.
+*/
+
 #ifndef _SOPRANO_STRIGI_INDEX_WRITER_H_
 #define _SOPRANO_STRIGI_INDEX_WRITER_H_
 
@@ -27,7 +27,7 @@
 namespace Soprano {
     class Model;
     namespace Index {
-	class IndexFilterModel;
+        class IndexFilterModel;
     }
 }
 
@@ -35,51 +35,51 @@
 namespace Strigi {
     namespace Soprano {
 
-	class IndexWriter : public Strigi::IndexWriter
-	{
-	public:
-	    IndexWriter( ::Soprano::Model* );
-	    ~IndexWriter();
+        class IndexWriter : public Strigi::IndexWriter
+        {
+        public:
+            IndexWriter( ::Soprano::Model* );
+            ~IndexWriter();
 
-	    void commit();
+            void commit();
 
-	    /**
-	     * Delete the entries with the given paths from the index.
-	     *
-	     * @param entries the paths of the files that should be deleted
-	     **/
-	    void deleteEntries( const std::vector<std::string>& entries );
+            /**
+             * Delete the entries with the given paths from the index.
+             *
+             * @param entries the paths of the files that should be deleted
+             **/
+            void deleteEntries( const std::vector<std::string>& entries );
 
-	    /**
-	     * Delete all indexed documents from the index.
-	     **/
-	    void deleteAllEntries();
+            /**
+             * Delete all indexed documents from the index.
+             **/
+            void deleteAllEntries();
 
-	    void initWriterData( const Strigi::FieldRegister& );
-	    void releaseWriterData( const Strigi::FieldRegister& );
+            void initWriterData( const Strigi::FieldRegister& );
+            void releaseWriterData( const Strigi::FieldRegister& );
 
-	    void startAnalysis( const AnalysisResult* );
-	    void addText( const AnalysisResult*, const char* text, int32_t length );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   const std::string& value );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   const unsigned char* data, uint32_t size );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   int32_t value );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   uint32_t value );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   double value );
-	    void addTriplet( const std::string& subject,
-			     const std::string& predicate, const std::string& object );
-	    void addValue( const AnalysisResult*, const RegisteredField* field,
-			   const std::string& name, const std::string& value );
-	    void finishAnalysis( const AnalysisResult* );
+            void startAnalysis( const AnalysisResult* );
+            void addText( const AnalysisResult*, const char* text, int32_t length );
+            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           const std::string& value );
+            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           const unsigned char* data, uint32_t size );
+            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           int32_t value );
+            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           uint32_t value );
+            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           double value );
+            void addTriplet( const std::string& subject,
+                             const std::string& predicate, const std::string& object \
); +            void addValue( const AnalysisResult*, const RegisteredField* field,
+                           const std::string& name, const std::string& value );
+            void finishAnalysis( const AnalysisResult* );
 
-	private:
-	    class Private;
-	    Private* d;
-	};
+        private:
+            class Private;
+            Private* d;
+        };
     }
 }
 


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic