[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage
From:       Sebastian Trueg <sebastian () trueg ! de>
Date:       2009-08-03 14:08:14
Message-ID: 1249308494.590514.3750.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 1006336 by trueg:

Backported improved clucene query analyzer instead of using the plain whitespace one. \
This fixes for example querying for filenames.

 M  +14 -13    cluceneanalyzer.cpp  
 M  +3 -5      cluceneanalyzer.h  
 M  +0 -10     repository.cpp  


--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/cluceneanalyzer.cpp \
#1006335:1006336 @@ -38,17 +38,12 @@
 namespace Nepomuk {
 
 	CLuceneAnalyzer::CLuceneAnalyzer()
-        : stopSet(false)
+        : stopSet(false),
+          m_rdfType( L"http://www.w3.org/1999/02/22-rdf-syntax-ns#type" )
 	{
         CL_NS(analysis)::StopFilter::fillStopTable( \
                &stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS);
-	}
+    }
 
-	CLuceneAnalyzer::CLuceneAnalyzer( const TCHAR** stopWords):
-		stopSet(false)
-	{
-		CL_NS(analysis)::StopFilter::fillStopTable( &stopSet,stopWords );
-	}
-
 	CLuceneAnalyzer::~CLuceneAnalyzer()
     {
 	}
@@ -56,10 +51,16 @@
 
 	CL_NS(analysis)::TokenStream* CLuceneAnalyzer::tokenStream(const TCHAR* fieldName, \
Reader* reader)  {
-		CL_NS(analysis)::TokenStream* ret = _CLNEW CLuceneTokenizer(reader);
-		ret = _CLNEW CLuceneFilter(ret,true);
-		ret = _CLNEW CL_NS(analysis)::LowerCaseFilter(ret,true);
-		ret = _CLNEW CL_NS(analysis)::StopFilter(ret,true, &stopSet);
-		return ret;
+        if ( !::wcscmp( fieldName, m_rdfType ) ) {
+            // never tokenize the type URIs
+            return _CLNEW CL_NS(analysis)::WhitespaceTokenizer(reader);
+        }
+        else {
+            CL_NS(analysis)::TokenStream* ret = _CLNEW CLuceneTokenizer(reader);
+            ret = _CLNEW CLuceneFilter(ret,true);
+            ret = _CLNEW CL_NS(analysis)::LowerCaseFilter(ret,true);
+            ret = _CLNEW CL_NS(analysis)::StopFilter(ret,true, &stopSet);
+            return ret;
+        }
 	}
 }
--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/cluceneanalyzer.h \
#1006335:1006336 @@ -37,25 +37,23 @@
 namespace Nepomuk {
 
 	/** Represents a standard analyzer. */
-	class CLuceneAnalyzer : public CL_NS(analysis)::Analyzer 
+	class CLuceneAnalyzer : public CL_NS(analysis)::Analyzer
 	{
 	public:
 		/** Builds an analyzer.*/
 		CLuceneAnalyzer();
 
-		/** Builds an analyzer with the given stop words. */
-		CLuceneAnalyzer( const TCHAR** stopWords);
-
 		~CLuceneAnalyzer();
 
 		/**
-         * Constructs a StandardTokenizer filtered by a 
+         * Constructs a StandardTokenizer filtered by a
          * StandardFilter, a LowerCaseFilter and a StopFilter.
          */
 		CL_NS(analysis)::TokenStream* tokenStream(const TCHAR* fieldName, \
CL_NS(util)::Reader* reader);  
  	private:
 		CL_NS(util)::CLSetList<const TCHAR*> stopSet;
+        TCHAR* m_rdfType;
 	};
 }
 
--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/repository.cpp \
#1006335:1006336 @@ -97,9 +97,6 @@
     if ( m_state == OPEN ) {
 #ifdef HAVE_SOPRANO_INDEX
         delete m_indexModel;
-#if SOPRANO_IS_VERSION(2,2,66)
-        delete m_index->queryAnalyzer();
-#endif
         delete m_index;
         m_indexModel = 0;
         m_index = 0;
@@ -173,13 +170,6 @@
     m_analyzer = new CLuceneAnalyzer();
     m_index = new Soprano::Index::CLuceneIndex( m_analyzer );
 
-#if SOPRANO_IS_VERSION(2,2,66)
-    // in order to be able to query the resource type through clucene we need to \
                avoid having
-    // URLs borked by the standard analyzer. By using the WhitespaceAnalyzer we \
                shift resposibility
-    // for proper query term encoding to the client.
-    m_index->setQueryAnalyzer( new lucene::analysis::WhitespaceAnalyzer() );
-#endif
-
     if ( m_index->open( indexPath, true ) ) {
         kDebug() << "Successfully created new index for repository" << name();
         m_indexModel = new Soprano::Index::IndexFilterModel( m_index, m_model );


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic