[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage
From: Sebastian Trueg <sebastian () trueg ! de>
Date: 2009-08-03 14:08:14
Message-ID: 1249308494.590514.3750.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 1006336 by trueg:
Backported improved clucene query analyzer instead of using the plain whitespace one. \
This fixes for example querying for filenames.
M +14 -13 cluceneanalyzer.cpp
M +3 -5 cluceneanalyzer.h
M +0 -10 repository.cpp
--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/cluceneanalyzer.cpp \
#1006335:1006336 @@ -38,17 +38,12 @@
namespace Nepomuk {
CLuceneAnalyzer::CLuceneAnalyzer()
- : stopSet(false)
+ : stopSet(false),
+ m_rdfType( L"http://www.w3.org/1999/02/22-rdf-syntax-ns#type" )
{
CL_NS(analysis)::StopFilter::fillStopTable( \
&stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS);
- }
+ }
- CLuceneAnalyzer::CLuceneAnalyzer( const TCHAR** stopWords):
- stopSet(false)
- {
- CL_NS(analysis)::StopFilter::fillStopTable( &stopSet,stopWords );
- }
-
CLuceneAnalyzer::~CLuceneAnalyzer()
{
}
@@ -56,10 +51,16 @@
CL_NS(analysis)::TokenStream* CLuceneAnalyzer::tokenStream(const TCHAR* fieldName, \
Reader* reader) {
- CL_NS(analysis)::TokenStream* ret = _CLNEW CLuceneTokenizer(reader);
- ret = _CLNEW CLuceneFilter(ret,true);
- ret = _CLNEW CL_NS(analysis)::LowerCaseFilter(ret,true);
- ret = _CLNEW CL_NS(analysis)::StopFilter(ret,true, &stopSet);
- return ret;
+ if ( !::wcscmp( fieldName, m_rdfType ) ) {
+ // never tokenize the type URIs
+ return _CLNEW CL_NS(analysis)::WhitespaceTokenizer(reader);
+ }
+ else {
+ CL_NS(analysis)::TokenStream* ret = _CLNEW CLuceneTokenizer(reader);
+ ret = _CLNEW CLuceneFilter(ret,true);
+ ret = _CLNEW CL_NS(analysis)::LowerCaseFilter(ret,true);
+ ret = _CLNEW CL_NS(analysis)::StopFilter(ret,true, &stopSet);
+ return ret;
+ }
}
}
--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/cluceneanalyzer.h \
#1006335:1006336 @@ -37,25 +37,23 @@
namespace Nepomuk {
/** Represents a standard analyzer. */
- class CLuceneAnalyzer : public CL_NS(analysis)::Analyzer
+ class CLuceneAnalyzer : public CL_NS(analysis)::Analyzer
{
public:
/** Builds an analyzer.*/
CLuceneAnalyzer();
- /** Builds an analyzer with the given stop words. */
- CLuceneAnalyzer( const TCHAR** stopWords);
-
~CLuceneAnalyzer();
/**
- * Constructs a StandardTokenizer filtered by a
+ * Constructs a StandardTokenizer filtered by a
* StandardFilter, a LowerCaseFilter and a StopFilter.
*/
CL_NS(analysis)::TokenStream* tokenStream(const TCHAR* fieldName, \
CL_NS(util)::Reader* reader);
private:
CL_NS(util)::CLSetList<const TCHAR*> stopSet;
+ TCHAR* m_rdfType;
};
}
--- branches/KDE/4.3/kdebase/runtime/nepomuk/services/storage/repository.cpp \
#1006335:1006336 @@ -97,9 +97,6 @@
if ( m_state == OPEN ) {
#ifdef HAVE_SOPRANO_INDEX
delete m_indexModel;
-#if SOPRANO_IS_VERSION(2,2,66)
- delete m_index->queryAnalyzer();
-#endif
delete m_index;
m_indexModel = 0;
m_index = 0;
@@ -173,13 +170,6 @@
m_analyzer = new CLuceneAnalyzer();
m_index = new Soprano::Index::CLuceneIndex( m_analyzer );
-#if SOPRANO_IS_VERSION(2,2,66)
- // in order to be able to query the resource type through clucene we need to \
avoid having
- // URLs borked by the standard analyzer. By using the WhitespaceAnalyzer we \
shift resposibility
- // for proper query term encoding to the client.
- m_index->setQueryAnalyzer( new lucene::analysis::WhitespaceAnalyzer() );
-#endif
-
if ( m_index->open( indexPath, true ) ) {
kDebug() << "Successfully created new index for repository" << name();
m_indexModel = new Soprano::Index::IndexFilterModel( m_index, m_model );
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic