[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    playground/base/strigi/src
From:       Jos van den Oever <jos () vandenoever ! info>
Date:       2006-11-30 23:36:05
Message-ID: 1164929765.452049.9412.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 609482 by vandenoever:

Remove the big strigi lock! In the CLucene backend, I was being conservative about \
allowing concurrent reads and writes to the index, hence making indexing slower if \
you were looking at the status. This has now been fixed and it makes a huge speed \
difference if you are indexing and searching at the same time.

 M  +44 -61    luceneindexer/cluceneindexmanager.cpp  
 M  +8 -11     luceneindexer/cluceneindexmanager.h  
 M  +62 -30    luceneindexer/cluceneindexreader.cpp  
 M  +16 -3     luceneindexer/cluceneindexreader.h  
 M  +6 -8      luceneindexer/cluceneindexwriter.cpp  
 M  +0 -1      streamindexer/indexerconfiguration.cpp  


--- trunk/playground/base/strigi/src/luceneindexer/cluceneindexmanager.cpp \
#609481:609482 @@ -23,14 +23,14 @@
 #include "cluceneindexwriter.h"
 #include "cluceneindexreader.h"
 #include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
 #include "stgdirent.h" //our dirent compatibility header... uses native if available
 
 using namespace lucene::index;
 using lucene::analysis::standard::StandardAnalyzer;
 using lucene::store::FSDirectory;
 
-StrigiMutex CLuceneIndexManager::lock;
-
 jstreams::IndexManager*
 createCLuceneIndexManager(const char* path) {
     return new CLuceneIndexManager(path);
@@ -41,14 +41,11 @@
 CLuceneIndexManager::CLuceneIndexManager(const std::string& path)
         {//: bitsets(this) {
     ++numberOfManagers;
-    dblock = &lock;
     dbdir = path;
-    indexreader = 0;
     indexwriter = 0;
-    version = 0;
     writer = new CLuceneIndexWriter(this);
-    reader = new CLuceneIndexReader(this);
     analyzer = new StandardAnalyzer();
+    mtime = 0;
 
     //remove any old segments lying around from crashes, etc
     //writer->cleanUp();
@@ -58,8 +55,10 @@
 CLuceneIndexManager::~CLuceneIndexManager() {
     // close the writer and analyzer
     delete writer;
-    delete reader;
-    closeReader();
+    std::map<pthread_t, CLuceneIndexReader*>::iterator r;
+    for (r = readers.begin(); r != readers.end(); ++r) {
+        delete r->second;
+    }
     closeWriter();
     delete analyzer;
     if (--numberOfManagers == 0) {
@@ -69,8 +68,24 @@
 }
 jstreams::IndexReader*
 CLuceneIndexManager::getIndexReader() {
-    return reader;
+    return getReader();
 }
+CLuceneIndexReader*
+CLuceneIndexManager::getReader() {
+    // TODO check if we should update/reopen the reader
+    pthread_t self = pthread_self();
+    CLuceneIndexReader* r;
+    STRIGI_MUTEX_LOCK(&lock.lock);
+    r = readers[self];
+    STRIGI_MUTEX_UNLOCK(&lock.lock);
+    if (r == 0) {
+        r = new CLuceneIndexReader(this, dbdir);
+        STRIGI_MUTEX_LOCK(&lock.lock);
+        readers[self] = r;
+        STRIGI_MUTEX_UNLOCK(&lock.lock);
+    }
+    return r;
+}
 jstreams::IndexWriter*
 CLuceneIndexManager::getIndexWriter() {
     return writer;
@@ -81,53 +96,18 @@
 }*/
 IndexWriter*
 CLuceneIndexManager::refWriter() {
-    STRIGI_MUTEX_LOCK(&dblock->lock);
+    STRIGI_MUTEX_LOCK(&writelock.lock);
     if (indexwriter == 0) {
-        closeReader();
         openWriter();
     }
     return indexwriter;
 }
 void
 CLuceneIndexManager::derefWriter() {
-    STRIGI_MUTEX_UNLOCK(&dblock->lock);
+    STRIGI_MUTEX_UNLOCK(&writelock.lock);
 }
-IndexReader*
-CLuceneIndexManager::refReader() {
-    STRIGI_MUTEX_LOCK(&dblock->lock);
-    if (indexreader == 0) {
-        closeWriter();
-        openReader();
-    }
-    return indexreader;
-}
 void
-CLuceneIndexManager::derefReader() {
-    STRIGI_MUTEX_UNLOCK(&dblock->lock);
-}
-void
-CLuceneIndexManager::openReader() {
-    try {
-//        printf("reader at %s\n", dbdir.c_str());
-        indexreader = IndexReader::open(dbdir.c_str());
-    } catch (CLuceneError& err) {
-        printf("could not create reader: %s\n", err.what());
-    }
-}
-void
-CLuceneIndexManager::closeReader() {
-    if (indexreader == 0) return;
-    try {
-        indexreader->close();
-    } catch (CLuceneError& err) {
-        printf("could not close clucene: %s\n", err.what());
-    }
-    delete indexreader;
-    indexreader = 0;
-}
-void
 CLuceneIndexManager::openWriter(bool truncate) {
-    version++;
     try {
         if (!truncate && IndexReader::indexExists(dbdir.c_str())) {
             if (IndexReader::isLocked(dbdir.c_str())) {
@@ -152,20 +132,7 @@
 }
 int
 CLuceneIndexManager::docCount() {
-    int count = 0;
-    STRIGI_MUTEX_LOCK(&dblock->lock);
-    if (indexwriter) {
-        count = indexwriter->docCount();
-    } else {
-        if (indexreader == 0) {
-            openReader();
-        }
-        if (indexreader) {
-            count = indexreader->numDocs();
-        }
-    }
-    STRIGI_MUTEX_UNLOCK(&dblock->lock);
-    return count;
+    return getReader()->reader->numDocs();
 }
 int64_t
 CLuceneIndexManager::getIndexSize() {
@@ -196,10 +163,26 @@
 }
 void
 CLuceneIndexManager::deleteIndex() {
-    closeReader();
+    // todo: close all readers
     closeWriter();
     openWriter(true);
 }
+time_t
+CLuceneIndexManager::getIndexMTime() {
+    time_t t;
+    STRIGI_MUTEX_LOCK(&lock.lock);
+    t = mtime;
+    STRIGI_MUTEX_UNLOCK(&lock.lock);
+    return t;
+}
+void
+CLuceneIndexManager::setIndexMTime() {
+    struct timeval t;
+    gettimeofday(&t, 0);
+    STRIGI_MUTEX_LOCK(&lock.lock);
+    mtime = t.tv_sec;
+    STRIGI_MUTEX_UNLOCK(&lock.lock);
+}
 std::wstring
 utf8toucs2(const char*p, const char*e) {
     wstring ucs2;
--- trunk/playground/base/strigi/src/luceneindexer/cluceneindexmanager.h \
#609481:609482 @@ -46,37 +46,34 @@
 class CLuceneIndexWriter;
 class CLuceneIndexManager : public jstreams::IndexManager {
 private:
-    StrigiMutex* dblock;
-    static StrigiMutex lock;
+    StrigiMutex writelock;
+    StrigiMutex lock;
     std::string dbdir;
-    CLuceneIndexReader* reader;
+    std::map<pthread_t, CLuceneIndexReader*> readers;
     CLuceneIndexWriter* writer;
     lucene::index::IndexWriter* indexwriter;
-    lucene::index::IndexReader* indexreader;
     //jstreams::QueryBitsetCache bitsets;
     lucene::analysis::Analyzer* analyzer;
-    int version;
+    time_t mtime;
     static int numberOfManagers;
 
-    void openReader();
-    void closeReader();
     void openWriter(bool truncate=false);
-    void closeWriter();
 public:
     explicit CLuceneIndexManager(const std::string& path);
     ~CLuceneIndexManager();
 
     lucene::index::IndexWriter* refWriter();
     void derefWriter();
-    lucene::index::IndexReader* refReader();
-    void derefReader();
     jstreams::IndexReader* getIndexReader();
     jstreams::IndexWriter* getIndexWriter();
+    CLuceneIndexReader* getReader();
 //    jstreams::QueryBitsetCache* getBitSets();
     int32_t docCount();
     int64_t getIndexSize();
-    int getVersion() const { return version; }
     void deleteIndex();
+    void closeWriter();
+    time_t getIndexMTime();
+    void setIndexMTime();
 };
 
 jstreams::IndexManager*
--- trunk/playground/base/strigi/src/luceneindexer/cluceneindexreader.cpp \
#609481:609482 @@ -64,12 +64,53 @@
         jstreams::IndexedDocument&);
 };
 
-CLuceneIndexReader::CLuceneIndexReader(CLuceneIndexManager* m)
-        :manager(m), countversion(-1) {
+CLuceneIndexReader::CLuceneIndexReader(CLuceneIndexManager* m,
+    const string& dir) :manager(m), dbdir(dir), otime(0), reader(0) {
+    openReader();
 }
 
 CLuceneIndexReader::~CLuceneIndexReader() {
+    closeReader();
 }
+void
+CLuceneIndexReader::openReader() {
+    doccount = -1;
+    wordcount = -1;
+    try {
+//        printf("reader at %s\n", dbdir.c_str());
+        reader = lucene::index::IndexReader::open(dbdir.c_str());
+    } catch (CLuceneError& err) {
+        printf("could not create reader: %s\n", err.what());
+        reader = 0;
+    }
+}
+void
+CLuceneIndexReader::closeReader() {
+    if (reader == 0) return;
+    try {
+        reader->close();
+    } catch (CLuceneError& err) {
+        printf("could not close clucene: %s\n", err.what());
+    }
+    delete reader;
+    reader = 0;
+}
+bool
+CLuceneIndexReader::checkReader(bool enforceCurrent) {
+    if (manager->getIndexMTime() > otime) {
+        struct timeval t;
+        gettimeofday(&t, 0);
+        if (enforceCurrent || t.tv_sec-otime > 60) {
+            fprintf(stderr, "reopening reader.\n");
+            otime = t.tv_sec;
+            closeReader();
+        }
+    }
+    if (reader == 0) {
+        openReader();
+    }
+    return reader;
+}
 
 #ifdef _UCS2
 typedef map<wstring, wstring> CLuceneIndexReaderFieldMapType;
@@ -207,11 +248,10 @@
 }
 int32_t
 CLuceneIndexReader::countHits(const Query& q) {
+    if (!checkReader()) return -1;
     BooleanQuery bq;
     Private::createBooleanQuery(q, bq);
-    lucene::index::IndexReader* reader = manager->refReader();
     if (reader == 0) {
-        manager->derefReader();
         return 0;
     }
     IndexSearcher searcher(reader);
@@ -250,7 +290,6 @@
         delete hits;
     }
     searcher.close();
-    manager->derefReader();
     return s;
 }
 std::vector<IndexedDocument>
@@ -258,9 +297,7 @@
     BooleanQuery bq;
     Private::createBooleanQuery(q, bq);
     std::vector<IndexedDocument> results;
-    lucene::index::IndexReader* reader = manager->refReader();
-    if (reader == 0) {
-        manager->derefReader();
+    if (!checkReader()) {
         return results;
     }
     IndexSearcher searcher(reader);
@@ -293,15 +330,12 @@
         _CLDELETE(hits);
     }
     searcher.close();
-    manager->derefReader();
     return results;
 }
 std::map<std::string, time_t>
 CLuceneIndexReader::getFiles(char depth) {
     std::map<std::string, time_t> files;
-    lucene::index::IndexReader* reader = manager->refReader();
-    if (reader == 0) {
-        manager->derefReader();
+    if (!checkReader()) {
         return files;
     }
 
@@ -325,28 +359,28 @@
         _CLDELETE(d);
     }
     _CLDELETE(docs);
-    manager->derefReader();
     return files;
 }
 int32_t
 CLuceneIndexReader::countDocuments() {
-    return manager->docCount();
+    if (!checkReader()) return -1;
+    if (doccount == -1) {
+        doccount = manager->docCount();
+    }
+    return doccount;
 }
 int32_t
 CLuceneIndexReader::countWords() {
-    if (manager->getVersion() == countversion) {
-        return count;
+    if (!checkReader()) return -1;
+    if (wordcount == -1) {
+        if (reader) {
+            wordcount = 0;
+            lucene::index::TermEnum *terms = reader->terms();
+            while (terms->next()) wordcount++;
+            _CLDELETE(terms);
+        }
     }
-    count = 0;
-    countversion = manager->getVersion();
-    lucene::index::IndexReader* reader = manager->refReader();
-    if (reader) {
-        lucene::index::TermEnum *terms = reader->terms();
-        while (terms->next()) count++;
-        _CLDELETE(terms);
-    }
-    manager->derefReader();
-    return count;
+    return wordcount;
 }
 int64_t
 CLuceneIndexReader::getIndexSize() {
@@ -354,7 +388,7 @@
 }
 int64_t
 CLuceneIndexReader::getDocumentId(const std::string& uri) {
-    lucene::index::IndexReader* reader = manager->refReader();
+    if (!checkReader()) return -1;
     int64_t id = -1;
 
     TCHAR tstr[CL_MAX_DIR];
@@ -370,7 +404,6 @@
         id = -1;
     }
 
-    manager->derefReader();
     return id;
 }
 /**
@@ -380,7 +413,7 @@
 time_t
 CLuceneIndexReader::getMTime(int64_t docid) {
     if (docid < 0) return 0;
-    lucene::index::IndexReader* reader = manager->refReader();
+    if (!checkReader(true)) return 0;
     time_t mtime = 0;
     Document *d = reader->document(docid);
     if (d) {
@@ -390,6 +423,5 @@
         mtime = atoi(cstr);
         delete d;
     }
-    manager->derefReader();
     return mtime;
 }
--- trunk/playground/base/strigi/src/luceneindexer/cluceneindexreader.h \
#609481:609482 @@ -22,6 +22,13 @@
 
 #include "indexreader.h"
 #include <map>
+#include <sys/time.h>
+#include <time.h>
+namespace lucene {
+    namespace index {
+        class IndexReader;
+    }
+}
 
 class CLuceneIndexManager;
 class CLuceneIndexReader : public jstreams::IndexReader {
@@ -29,13 +36,18 @@
 private:
     CLuceneIndexManager* manager;
     class Private;
-    int countversion;
-    int32_t count;
+    int32_t wordcount;
+    int32_t doccount;
+    const std::string dbdir;
+    time_t otime;
 
-    CLuceneIndexReader(CLuceneIndexManager* m);
+    CLuceneIndexReader(CLuceneIndexManager* m, const std::string& dbdir);
     ~CLuceneIndexReader();
     static const TCHAR* mapId(const wchar_t* id);
 	static std::wstring mapId(const char* id);
+    void openReader();
+    void closeReader();
+    bool checkReader(bool ensureCurrent = false);
 
     friend class CLuceneIndexReader::Private;
 public:
@@ -48,6 +60,7 @@
     int64_t getDocumentId(const std::string& uri);
     time_t getMTime(int64_t docid);
     static void addMapping(const TCHAR* from, const TCHAR* to);
+    lucene::index::IndexReader* reader;
 };
 
 #endif
--- trunk/playground/base/strigi/src/luceneindexer/cluceneindexwriter.cpp \
#609481:609482 @@ -21,6 +21,7 @@
 #include <CLucene.h>
 #include <CLucene/store/Lock.h>
 #include "cluceneindexwriter.h"
+#include "cluceneindexreader.h"
 #include "cluceneindexmanager.h"
 #include "stringreader.h"
 #include "inputstreamreader.h"
@@ -178,16 +179,19 @@
     delete doc;
     if ( sr )
         delete sr;
+    manager->setIndexMTime();
 }
 void
 CLuceneIndexWriter::deleteEntries(const std::vector<std::string>& entries) {
+    manager->closeWriter();
     for (uint i=0; i<entries.size(); ++i) {
         deleteEntry(entries[i]);
     }
+    manager->setIndexMTime();
 }
 void
 CLuceneIndexWriter::deleteEntry(const string& entry) {
-    lucene::index::IndexReader* reader = manager->refReader();
+    lucene::index::IndexReader* reader = manager->getReader()->reader;
 
     wstring tstr(utf8toucs2(entry));
     Term term(_T("path"), tstr.c_str());
@@ -208,8 +212,6 @@
         }
         _CLDELETE(bits);
     }
-
-    manager->derefReader();
 }
 void
 CLuceneIndexWriter::deleteAllEntries() {
@@ -275,9 +277,8 @@
     //remove all unused lucene file elements... unused elements are the result of \
unexpected shutdowns...  //this can add up to a lot of after a while.
 
-    lucene::index::IndexReader* reader = manager->refReader();
+    lucene::index::IndexReader* reader = manager->getReader()->reader;
     if (!reader) {
-        manager->derefReader();
         return;
     }
     lucene::store::Directory* directory = reader->getDirectory();
@@ -291,7 +292,6 @@
     bool locked = lock->obtain(lucene::index::IndexWriter::COMMIT_LOCK_TIMEOUT);
 #endif
     if (!locked) {
-        manager->derefReader();
         return;
     }
     lucene::index::SegmentInfos infos;
@@ -300,7 +300,6 @@
         infos.read(directory);
     } catch(...) {
         lock->release();
-        manager->derefReader();
         return; //todo: this may suggest an error...
     }
     lock->release();
@@ -344,6 +343,5 @@
     }
     _CLDELETE_ARRAY(files)
 
-    manager->derefReader();
 }
 
--- trunk/playground/base/strigi/src/streamindexer/indexerconfiguration.cpp \
#609481:609482 @@ -69,7 +69,6 @@
                 FNM_PERIOD);
         }
         if (match) {
-            printf("dir '%s' %i\n", path, i->include);
             return i->include;
         }
     }


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic