[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [baloo] src: Balooctl: Add checkDb command
From:       Vishesh Handa <me () vhanda ! in>
Date:       2015-09-30 16:41:39
Message-ID: E1ZhKRn-0001io-UO () scm ! kde ! org
[Download RAW message or body]

Git commit 9da558f643eaac1e6c37aba5a2ff1dfe2c84bf98 by Vishesh Handa.
Committed on 30/09/2015 at 16:05.
Pushed by vhanda into branch 'master'.

Balooctl: Add checkDb command

This command can be used by developers to check if their PostingDb and
DocumentUrlDB is in a valid state. We have a few bugs caused because all
the databases are not correctly updated. This will help us diagnose this
better.

REVIEW: 125424

M  +137  -0    src/engine/transaction.cpp
M  +5    -0    src/engine/transaction.h
M  +15   -0    src/tools/balooctl/main.cpp

http://commits.kde.org/baloo/9da558f643eaac1e6c37aba5a2ff1dfe2c84bf98

diff --git a/src/engine/transaction.cpp b/src/engine/transaction.cpp
index 8e9700c..b670662 100644
--- a/src/engine/transaction.cpp
+++ b/src/engine/transaction.cpp
@@ -467,3 +467,140 @@ DatabaseSize Transaction::dbSize()
 
     return dbSize;
 }
+
+//
+// Debugging
+//
+void Transaction::checkFsTree()
+{
+    DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+    DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+    DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+    DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
+    PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+    auto map = postingDb.toTestMap();
+
+    QList<PostingList> allLists = map.values();
+    QSet<quint64> allIds;
+    for (auto list : allLists) {
+        for (quint64 id : list) {
+            allIds << id;
+        }
+    }
+
+    QTextStream out(stdout);
+    out << "Total Document IDs: " << allIds.size() << endl;
+
+    int count = 0;
+    for (quint64 id: allIds) {
+        QByteArray url = docUrlDb.get(id);
+        if (url.isEmpty()) {
+            auto terms = documentTermsDB.get(id);
+            auto fileNameTerms = documentFileNameTermsDB.get(id);
+            auto xAttrTerms = documentXattrTermsDB.get(id);
+
+            // Lets reverse enginer the terms
+            QList<QByteArray> newTerms;
+            QMapIterator<QByteArray, PostingList> it(map);
+            while (it.hasNext()) {
+                it.next();
+                if (it.value().contains(id)) {
+                    newTerms << it.key();
+                }
+            }
+
+            out << "Missing filePath for " << id << endl;
+            out << "\tPostingDB Terms: ";
+            for (const QByteArray& term : newTerms) {
+                out << term << " ";
+            }
+            out << endl;
+
+            out << "\tDocumentTermsDB: ";
+            for (const QByteArray& term : terms) {
+                out << term << " ";
+            }
+            out << endl;
+
+            out << "\tFileNameTermsDB: ";
+            for (const QByteArray& term : terms) {
+                out << term << " ";
+            }
+            out << endl;
+
+            out << "\tXAttrTermsDB: ";
+            for (const QByteArray& term : terms) {
+                out << term << " ";
+            }
+            out << endl;
+
+            count++;
+        }
+    }
+
+    out << "Invalid Entries: " << count << " (" << count * 100.0 / allIds.size() << \
"%)" << endl; +}
+
+void Transaction::checkTermsDbinPostingDb()
+{
+    DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+    DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+    DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+    PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+    // Iterate over each document, and fetch all terms
+    // check if each term maps to its own id in the posting db
+
+    auto map = postingDb.toTestMap();
+
+    QList<PostingList> allLists = map.values();
+    QSet<quint64> allIds;
+    for (auto list : allLists) {
+        for (quint64 id : list) {
+            allIds << id;
+        }
+    }
+
+    QTextStream out(stdout);
+    out << "PostingDB check .." << endl;
+    for (quint64 id : allIds) {
+        QVector<QByteArray> terms = documentTermsDB.get(id);
+        terms += documentXattrTermsDB.get(id);
+        terms += documentFileNameTermsDB.get(id);
+
+        for (const QByteArray& term : terms) {
+            PostingList plist = postingDb.get(term);
+            if (!plist.contains(id)) {
+                out << id << " is missing term " << term << endl;
+            }
+        }
+    }
+}
+
+void Transaction::checkPostingDbinTermsDb()
+{
+    DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+    DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+    DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+    PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+    QMap<QByteArray, PostingList> map = postingDb.toTestMap();
+    QMapIterator<QByteArray, PostingList> it(map);
+
+    QTextStream out(stdout);
+    out << "DocumentTermsDB check .." << endl;
+    while (it.hasNext()) {
+        it.next();
+
+        const QByteArray term = it.key();
+        const PostingList list = it.value();
+        for (quint64 id : list) {
+            QVector<QByteArray> allTerms = documentTermsDB.get(id) + \
documentFileNameTermsDB.get(id) + documentXattrTermsDB.get(id); +            if \
(!allTerms.contains(term)) { +                out << id << " is missing " << \
QString::fromUtf8(term) << " from document terms db" << endl; +            }
+        }
+    }
+}
+
diff --git a/src/engine/transaction.h b/src/engine/transaction.h
index 845d9f0..77e9ac0 100644
--- a/src/engine/transaction.h
+++ b/src/engine/transaction.h
@@ -119,6 +119,11 @@ public:
 
     void renameFilePath(quint64 id, const Document& newDoc);
 
+    // Debugging
+    void checkFsTree();
+    void checkTermsDbinPostingDb();
+    void checkPostingDbinTermsDb();
+
 private:
     Transaction(const Transaction& rhs) = delete;
 
diff --git a/src/tools/balooctl/main.cpp b/src/tools/balooctl/main.cpp
index 0e74761..305c82b 100644
--- a/src/tools/balooctl/main.cpp
+++ b/src/tools/balooctl/main.cpp
@@ -389,5 +389,20 @@ int main(int argc, char* argv[])
         app.exec();
     }
 
+    if (command == QStringLiteral("checkDb")) {
+        Database *db = globalDatabaseInstance();
+        if (!db->open(Database::OpenDatabase)) {
+            out << "Baloo Index could not be opened\n";
+            return 1;
+        }
+
+        Transaction tr(db, Transaction::ReadOnly);
+        tr.checkPostingDbinTermsDb();
+        tr.checkTermsDbinPostingDb();
+        out << "Checking file paths .. " << endl;
+        tr.checkFsTree();
+        return 0;
+    }
+
     return 0;
 }


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic