[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdelibs/nepomuk
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2011-01-05 14:21:45
Message-ID: 20110105142145.721ACAC8B0 () svn ! kde ! org
[Download RAW message or body]

SVN commit 1212019 by trueg:

Use a regex filter instead of bif:contains if wildcards are used without the \
necessary leading chars.

BUG: 258693

 M  +7 -19     query/comparisonterm.cpp  
 M  +46 -13    query/literalterm.cpp  
 M  +1 -2      query/literalterm_p.h  
 M  +22 -0     test/querytest.cpp  


--- trunk/KDE/kdelibs/nepomuk/query/comparisonterm.cpp #1212018:1212019
@@ -159,18 +159,11 @@
         }
         else if ( m_comparator == ComparisonTerm::Contains ) {
             const QString v = getMainVariableName(qbd);
-            QString scoringPattern;
-            if( qbd->query()->m_fullTextScoringEnabled ) {
-                scoringPattern = QString::fromLatin1("OPTION (score %1) \
                ").arg(qbd->createScoringVariable());
-            }
-            const QString text = static_cast<const \
                LiteralTermPrivate*>(m_subTerm.toLiteralTerm().d_ptr.constData())->queryText();
                
-            qbd->addFullTextSearchTerm( v, text );
-            return QString::fromLatin1( "%1 %2 %3 . %3 bif:contains \"%4\" %5. " )
+            return QString::fromLatin1( "%1 %2 %3 . " )
                 .arg( resourceVarName,
                       propertyToString( qbd ),
-                      v,
-                      text,
-                      scoringPattern );
+                          v )
+                    + LiteralTermPrivate::createContainsPattern( v, \
m_subTerm.toLiteralTerm().value().toString(), qbd );  }
         else if ( m_comparator == ComparisonTerm::Regexp ) {
             QString v = getMainVariableName(qbd);
@@ -247,16 +240,11 @@
             }
             else if ( m_comparator == ComparisonTerm::Contains ) {
                 QString v3 = qbd->uniqueVarName();
-                QString scoringPattern;
-                if( qbd->query()->m_fullTextScoringEnabled ) {
-                    scoringPattern = QString::fromLatin1("OPTION (score %1) \
                ").arg(qbd->createScoringVariable());
-                }
                 // since this is not a "real" full text search but rather a match on \
                resource "names" we do not call \
                QueryBuilderData::addFullTextSearchTerm
-                return QString::fromLatin1( "%1%2 bif:contains \"%3\"  %4. " )
-                    .arg( pattern.arg(v3),
-                          v3,
-                          static_cast<const \
                LiteralTermPrivate*>(m_subTerm.toLiteralTerm().d_ptr.constData())->queryText(),
                
-                          scoringPattern );
+                return pattern.arg(v3)
+                        + LiteralTermPrivate::createContainsPattern( v3,
+                                                                     \
m_subTerm.toLiteralTerm().value().toString(), +                                       \
qbd );  }
             else if ( m_comparator == ComparisonTerm::Regexp ) {
                 QString v3 = qbd->uniqueVarName();
--- trunk/KDE/kdelibs/nepomuk/query/literalterm.cpp #1212018:1212019
@@ -56,29 +56,24 @@
     const QString v2 = qbd->uniqueVarName();
     const QString v3 = qbd->uniqueVarName();
     const QString v4 = qbd->uniqueVarName();
-    const QString text = queryText();
-    QString scoringPattern;
-    if( qbd->query()->m_fullTextScoringEnabled ) {
-        scoringPattern = QString::fromLatin1("OPTION (score %1) \
                ").arg(qbd->createScoringVariable());
-    }
-    qbd->addFullTextSearchTerm( v2, text );
+    const QString containsPattern = createContainsPattern( v2, m_value.toString(), \
qbd );  
-    return QString::fromLatin1( "{ %1 %2 %3 . %3 bif:contains \"%4\" %9. } "
+    return QString::fromLatin1( "{ %1 %2 %3 . %4 } "
                                 "UNION "
-                                "{ %1 %2 %5 . %5 %6 %3 . %6 %7 %8 . %3 bif:contains \
\"%4\" %9. } . " ) +                                "{ %1 %2 %5 . %5 %6 %3 . %6 %7 %8 \
                . %4 } . " )
         .arg( resourceVarName,
               v1,
               v2,
-              text,
+              containsPattern,
               v3,
               v4,
               Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::subPropertyOf()),
                
-              Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()),
-              scoringPattern );
+              Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()) );
 }
 
 
-QString Nepomuk::Query::LiteralTermPrivate::queryText() const
+namespace {
+QString prepareQueryText( const QString& text )
 {
     //
     // we try to be a little smart about creating the query text
@@ -90,7 +85,7 @@
     // [4. wildcards can only be used if they are preceeded by at least 4 chars]
     //
 
-    QString s = m_value.toString().simplified();
+    QString s = text.simplified();
     if( s.isEmpty() )
         return s;
 
@@ -113,7 +108,45 @@
     return s;
 }
 
+QString prepareRegexText( const QString& text )
+{
+    QString filterRxStr = QRegExp::escape( text );
+    filterRxStr.replace( "\\*", QLatin1String( ".*" ) );
+    filterRxStr.replace( "\\?", QLatin1String( "." ) );
+    filterRxStr.replace( '\\',"\\\\" );
+    return filterRxStr;
+}
+}
 
+
+QString Nepomuk::Query::LiteralTermPrivate::createContainsPattern( const QString& \
varName, const QString& text, Nepomuk::Query::QueryBuilderData* qbd ) +{
+    const int i = text.indexOf( QRegExp(QLatin1String("[\\?\\*]")) );
+
+    //
+    // Virtuoso needs four leading chars when using wildcards. Thus, if there is \
less (this includes 0) we fall back to the slower regex filter +    //
+    if( i < 0 || i > 3 ) {
+        const QString finalText = prepareQueryText( text );
+
+        QString scoringPattern;
+        if( qbd->query()->m_fullTextScoringEnabled ) {
+            scoringPattern = QString::fromLatin1("OPTION (score %1) \
").arg(qbd->createScoringVariable()); +        }
+        qbd->addFullTextSearchTerm( varName, finalText );
+
+        return QString::fromLatin1( "%1 bif:contains \"%2\" %3. " )
+                .arg( varName,
+                     finalText,
+                     scoringPattern );
+    }
+    else {
+        return QString::fromLatin1( "FILTER(REGEX(%1, \"%2\")) . " )
+                .arg( varName, prepareRegexText(text) );
+    }
+}
+
+
 Nepomuk::Query::LiteralTerm::LiteralTerm( const LiteralTerm& term )
     : Term( term )
 {
--- trunk/KDE/kdelibs/nepomuk/query/literalterm_p.h #1212018:1212019
@@ -39,8 +39,7 @@
             bool isValid() const { return m_value.isValid(); }
             QString toSparqlGraphPattern( const QString& resourceVarName, \
QueryBuilderData* qbd ) const;  
-            /// The text that is to be used in the bif:contains pattern.
-            QString queryText() const;
+            static QString createContainsPattern( const QString& varName, const \
QString& text, QueryBuilderData* qbd );  
             Soprano::LiteralValue m_value;
         };
--- trunk/KDE/kdelibs/nepomuk/test/querytest.cpp #1212018:1212019
@@ -88,6 +88,28 @@
         << Query( LiteralTerm( "\"Hello World\"" ) )
         << helloWorldQuery;
 
+    QTest::newRow( "simple literal query with wildcard 1" )
+        << Query( LiteralTerm( "Hello*" ) )
+        << QString::fromLatin1( "select distinct ?r where { { ?r ?v1 ?v2 . ?v2 \
bif:contains \"'Hello*'\" . } " +                                "UNION "
+                                "{ ?r ?v1 ?v3 . ?v3 ?v4 ?v2 . ?v4 %1 %2 . ?v2 \
bif:contains \"'Hello*'\" . } . }" ) +           .arg( \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::subPropertyOf()), +            \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()) ); +    \
QTest::newRow( "simple literal query with wildcard 2" ) +        << Query( \
LiteralTerm( "*Hello" ) ) +        << QString::fromLatin1( "select distinct ?r where \
{ { ?r ?v1 ?v2 . FILTER(REGEX(?v2, \".*Hello\")) . } " +                              \
"UNION " +                                "{ ?r ?v1 ?v3 . ?v3 ?v4 ?v2 . ?v4 %1 %2 . \
FILTER(REGEX(?v2, \".*Hello\")) . } . }" ) +           .arg( \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::subPropertyOf()), +            \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()) ); +    \
QTest::newRow( "simple literal query with wildcard 3" ) +        << Query( \
LiteralTerm( "Hel?o" ) ) +        << QString::fromLatin1( "select distinct ?r where { \
{ ?r ?v1 ?v2 . FILTER(REGEX(?v2, \"Hel.o\")) . } " +                                \
"UNION " +                                "{ ?r ?v1 ?v3 . ?v3 ?v4 ?v2 . ?v4 %1 %2 . \
FILTER(REGEX(?v2, \"Hel.o\")) . } . }" ) +           .arg( \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::subPropertyOf()), +            \
Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()) ); +
     Query literalQueryWithDepth2(
         AndTerm( LiteralTerm("foo"),
                  ComparisonTerm( Soprano::Vocabulary::NAO::hasTag(),


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic