[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdebase/runtime/nepomuk/strigibackend
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2009-01-07 11:18:24
Message-ID: 1231327104.579020.27028.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 906990 by trueg:

Store both xesam:url and strigi:parentUrl as URIs rather than literals. The advantage \
is that they won't be indexed (a lot less false positives in searches) and we can \
perform relation queries between folders and files

 M  +20 -6     sopranoindexreader.cpp  
 M  +36 -27    sopranoindexwriter.cpp  
 M  +6 -5      util.cpp  
 M  +4 -0      util.h  


--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexreader.cpp \
#906989:906990 @@ -40,6 +40,7 @@
 #include <QtCore/QDebug>
 #include <QtCore/QString>
 #include <QtCore/QLatin1String>
+#include <QtCore/QFile>
 
 
 using namespace Soprano;
@@ -58,7 +59,7 @@
                                                       const Strigi::Query& query );
 static lucene::search::Query* createMultiFieldQuery( const Strigi::Query& query );
 
-
+#if 0
 static QString luceneQueryEscape( const QString& s )
 {
     /* Chars to escape: + - && || ! ( ) { } [ ] ^ " ~  : \ */
@@ -68,6 +69,7 @@
     es.replace( rx, "\\\\1" );
     return es;
 }
+#endif
 
 static lucene::index::Term* createWildCardTerm( const TString& name,
                                                 const string& value )
@@ -383,12 +385,17 @@
                                                 std::map<std::string, time_t>& \
children )  {
 //    qDebug() << "IndexReader::getChildren in thread" << QThread::currentThread();
-    QString query = QString( "select distinct ?path ?mtime where { ?r <%1> \
\"%2\"^^<%3> . ?r <%4> ?mtime . ?r <%5> ?path . }") +    QString query = QString( \
"select distinct ?path ?mtime where { " +                             "{ { ?r <%1> \
\"%2\"^^<%3> . } UNION { ?r <%1> %6 . } } . " +                             "?r <%4> \
?mtime . " +                             "?r <%5> ?path . "
+                             "}")
                     .arg( Util::fieldUri( FieldRegister::parentLocationFieldName \
                ).toString(),
                           escapeLiteralForSparqlQuery( QString::fromUtf8( \
parent.c_str() ) ),  Vocabulary::XMLSchema::string().toString(),
                           Util::fieldUri( FieldRegister::mtimeFieldName \
                ).toString(),
-                          Util::fieldUri( FieldRegister::pathFieldName ).toString() \
); +                          Util::fieldUri( FieldRegister::pathFieldName \
).toString(), +                          Node( QUrl::fromLocalFile( \
QFile::decodeName( parent.c_str() ) ) ).toN3() );  
 //    qDebug() << "running getChildren query:" << query;
 
@@ -399,12 +406,19 @@
         Node mTimeNode = result.binding( "mtime" );
 //        qDebug() << "file in index: " << pathNode.toString() << "mtime:" << \
mTimeNode.literal().toDateTime() << "(" << \
mTimeNode.literal().toDateTime().toTime_t() << ")";  
-        // FIXME: Sadly in Xesam sourceModified is not typed as DateTime but \
defaults to an int :( We try to be compatible +        // be backwards compatible in \
case there are paths left encoded as literals +        std::string path;
+        if ( pathNode.isLiteral() )
+            path = pathNode.toString().toUtf8().data();
+        else
+            path = QFile::encodeName( pathNode.uri().toLocalFile() ).data();
+
+        // Sadly in Xesam sourceModified is not typed as DateTime but defaults to an \
int :( We try to be compatible  if ( mTimeNode.literal().isDateTime() ) {
-            children[std::string( pathNode.toString().toUtf8().data() )] = \
mTimeNode.literal().toDateTime().toTime_t(); +            children[path] = \
mTimeNode.literal().toDateTime().toTime_t();  }
         else {
-            children[std::string( pathNode.toString().toUtf8().data() )] = \
mTimeNode.literal().toUnsignedInt(); +            children[path] = \
mTimeNode.literal().toUnsignedInt();  }
     }
 }
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/sopranoindexwriter.cpp \
#906989:906990 @@ -34,6 +34,7 @@
 #include <QtCore/QDebug>
 #include <QtCore/QThread>
 #include <QtCore/QDateTime>
+#include <QtCore/QByteArray>
 #include <QtCore/QUuid>
 
 #include <KUrl>
@@ -213,15 +214,17 @@
     for ( unsigned int i = 0; i < entries.size(); ++i ) {
         QString path = QString::fromUtf8( entries[i].c_str() );
         QString query = QString( "select ?g ?mg where { "
-                                 "?r <%1> \"%2\"^^<%3> . "
-                                 "?g <http://www.strigi.org/fields#indexGraphFor> ?r \
                . "
-                                 "OPTIONAL { ?mg <%4> ?g . } }" )
+                                 "{ { ?r <%1> \"%2\"^^<%3> . } UNION { ?r <%1> %6 . \
} } . " +                                 "?g <%4> ?r . "
+                                 "OPTIONAL { ?mg <%5> ?g . } }" )
                         .arg( systemLocationUri )
                         .arg( path )
                         .arg( Vocabulary::XMLSchema::string().toString() )
-                        .arg( Vocabulary::NRL::coreGraphMetadataFor().toString() );
+                        .arg( Strigi::Ontology::indexGraphFor().toString() )
+                        .arg( Vocabulary::NRL::coreGraphMetadataFor().toString() )
+                        .arg( Node( QUrl::fromLocalFile( path ) ).toN3() );
 
-//        qDebug() << "deleteEntries query:" << query;
+        qDebug() << "deleteEntries query:" << query;
 
         QueryResultIterator result = d->repository->executeQuery( query, \
::Soprano::Query::QueryLanguageSparql );  if ( result.next() ) {
@@ -248,7 +251,7 @@
 //    qDebug() << "IndexWriter::deleteAllEntries in thread" << \
QThread::currentThread();  
     // query all index graphs (FIXME: would a type derived from nrl:Graph be better \
                than only the predicate?)
-    QString query = QString( "select ?g where { ?g \
<http://www.strigi.org/fields#indexGraphFor> ?r . }" ); +    QString query = QString( \
"select ?g where { ?g <%1> ?r . }" ).arg( \
Strigi::Ontology::indexGraphFor().toString() );  
     qDebug() << "deleteAllEntries query:" << query;
 
@@ -281,7 +284,7 @@
 
     // let's check if we already have data on the file
     StatementIterator it = d->repository->listStatements( Node(),
-                                                          QUrl::fromEncoded( \
"http://www.strigi.org/fields#indexGraphFor", QUrl::StrictMode ), // FIXME: put the \
URI somewhere else +                                                          \
                Strigi::Ontology::indexGraphFor(),
                                                           data->fileUri );
     if ( it.next() ) {
         data->context = it.current().subject().uri();
@@ -320,29 +323,33 @@
         FileMetaData* md = reinterpret_cast<FileMetaData*>( idx->writerData() );
         RegisteredFieldData* rfd = reinterpret_cast<RegisteredFieldData*>( \
field->writerData() );  
+        // Strigi uses rdf:type improperly since it stores the value as a string. We \
have to +        // make sure it is a resource.
         if ( rfd->isRdfType ) {
-
-            // Strigi uses rdf:type improperly since it stores the value as a \
                string. We have to
-            // make sure it is a resource. The problem is that this results in the \
                type not being
-            // indexed properly. Thus, it cannot be searched with normal lucene \
                queries.
-            // That is why we need to introduce a stringType property
-
-            d->repository->addStatement( Statement( md->fileUri,
-                                                    \
                ::Soprano::Vocabulary::RDF::type(),
-                                                    QUrl::fromEncoded( \
value.c_str(), QUrl::StrictMode ), // fromEncoded is faster than the plain \
                constructor and all Xesam URIs work here
-                                                    md->context) );
-            d->repository->addStatement( Statement( md->fileUri,
-                                                    QUrl::fromEncoded( \
                "http://strigi.sourceforge.net/fields#rdf-string-type", \
                QUrl::StrictMode ),
-                                                    LiteralValue( QString::fromUtf8( \
                value.c_str() ) ),
-                                                    md->context) );
+            d->repository->addStatement( md->fileUri,
+                                         ::Soprano::Vocabulary::RDF::type(),
+                                         QUrl::fromEncoded( value.c_str(), \
QUrl::StrictMode ), +                                         md->context );
         }
-
         else {
-            d->repository->addStatement( Statement( md->fileUri,
-                                                    rfd->property,
-                                                    d->createLiteralValue( \
                rfd->dataType, ( unsigned char* )value.c_str(), value.length() ),
-                                                    md->context) );
+            // we bend the plain strigi properties into something nicer, also \
because we do not want paths to be indexed, way too many false positives +            \
// in standard desktop searches +            if ( field->key() == \
FieldRegister::pathFieldName || +                 field->key() == \
FieldRegister::parentLocationFieldName ) { +                \
d->repository->addStatement( md->fileUri, +                                           \
rfd->property, +                                             QUrl::fromLocalFile( \
QFile::decodeName( QByteArray::fromRawData( value.c_str(), value.length() ) ) ), +    \
md->context ); +            }
+            else {
+                d->repository->addStatement( Statement( md->fileUri,
+                                                        rfd->property,
+                                                        d->createLiteralValue( \
rfd->dataType, ( unsigned char* )value.c_str(), value.length() ), +                   \
md->context) ); +            }
         }
+        if ( d->repository->lastError() )
+            qDebug() << "Failed to add value" << value.c_str();
     }
 //    qDebug() << "IndexWriter::addValue done in thread" << \
QThread::currentThread();  }
@@ -459,6 +466,8 @@
                                                 Vocabulary::Xesam::asText(),
                                                 LiteralValue( QString::fromUtf8( \
md->content.c_str() ) ),  md->context ) );
+        if ( d->repository->lastError() )
+            qDebug() << "Failed to add" << md->fileUri << "as text" << \
QString::fromUtf8( md->content.c_str() );  }
 
     // Strigi only indexes files and extractors mostly (if at all) store the \
xesam:DataObject type (i.e. the contents) @@ -487,7 +496,7 @@
                                             LiteralValue( \
QDateTime::currentDateTime() ),  metaDataContext ) );
     d->repository->addStatement( Statement( md->context,
-                                            QUrl::fromEncoded( \
"http://www.strigi.org/fields#indexGraphFor", QUrl::StrictMode ), // FIXME: put the \
URI somewhere else +                                            \
Strigi::Ontology::indexGraphFor(),  md->fileUri,
                                             metaDataContext ) );
     d->repository->addStatement( Statement( metaDataContext,
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/util.cpp #906989:906990
@@ -84,11 +84,6 @@
     if ( QString::fromUtf8( field.c_str() ) == \
::Soprano::Index::CLuceneIndex::defaultSearchField() ) {  return TString::fromUtf8( \
field.c_str() );  }
-    else if ( QString( field.c_str() ) == \
                ::Soprano::Vocabulary::RDF::type().toString() ) {
-        // see sopranoindexwriter:addValue for details on this conversion
-        static TString strigiType( \
                "http://strigi.sourceforge.net/fields#rdf-string-type" );
-        return strigiType;
-    }
     else {
         return fieldUri( field ).toString();
     }
@@ -166,3 +161,9 @@
         model->addStatement( metaDataType );
     }
 }
+
+
+QUrl Strigi::Ontology::indexGraphFor()
+{
+    return QUrl::fromEncoded( "http://www.strigi.org/fields#indexGraphFor", \
QUrl::StrictMode ); +}
--- trunk/KDE/kdebase/runtime/nepomuk/strigibackend/util.h #906989:906990
@@ -49,6 +49,10 @@
             void storeStrigiMiniOntology( ::Soprano::Model* model );
         }
     }
+
+    namespace Ontology {
+        QUrl indexGraphFor();
+    }
 }
 
 #endif


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic