[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: [baloo] src: Balooctl: Add checkDb command
From: Vishesh Handa <me () vhanda ! in>
Date: 2015-09-30 16:41:39
Message-ID: E1ZhKRn-0001io-UO () scm ! kde ! org
[Download RAW message or body]
Git commit 9da558f643eaac1e6c37aba5a2ff1dfe2c84bf98 by Vishesh Handa.
Committed on 30/09/2015 at 16:05.
Pushed by vhanda into branch 'master'.
Balooctl: Add checkDb command
This command can be used by developers to check if their PostingDb and
DocumentUrlDB is in a valid state. We have a few bugs caused because all
the databases are not correctly updated. This will help us diagnose this
better.
REVIEW: 125424
M +137 -0 src/engine/transaction.cpp
M +5 -0 src/engine/transaction.h
M +15 -0 src/tools/balooctl/main.cpp
http://commits.kde.org/baloo/9da558f643eaac1e6c37aba5a2ff1dfe2c84bf98
diff --git a/src/engine/transaction.cpp b/src/engine/transaction.cpp
index 8e9700c..b670662 100644
--- a/src/engine/transaction.cpp
+++ b/src/engine/transaction.cpp
@@ -467,3 +467,140 @@ DatabaseSize Transaction::dbSize()
return dbSize;
}
+
+//
+// Debugging
+//
+void Transaction::checkFsTree()
+{
+ DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+ DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+ DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+ DocumentUrlDB docUrlDb(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
+ PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+ auto map = postingDb.toTestMap();
+
+ QList<PostingList> allLists = map.values();
+ QSet<quint64> allIds;
+ for (auto list : allLists) {
+ for (quint64 id : list) {
+ allIds << id;
+ }
+ }
+
+ QTextStream out(stdout);
+ out << "Total Document IDs: " << allIds.size() << endl;
+
+ int count = 0;
+ for (quint64 id: allIds) {
+ QByteArray url = docUrlDb.get(id);
+ if (url.isEmpty()) {
+ auto terms = documentTermsDB.get(id);
+ auto fileNameTerms = documentFileNameTermsDB.get(id);
+ auto xAttrTerms = documentXattrTermsDB.get(id);
+
+ // Lets reverse enginer the terms
+ QList<QByteArray> newTerms;
+ QMapIterator<QByteArray, PostingList> it(map);
+ while (it.hasNext()) {
+ it.next();
+ if (it.value().contains(id)) {
+ newTerms << it.key();
+ }
+ }
+
+ out << "Missing filePath for " << id << endl;
+ out << "\tPostingDB Terms: ";
+ for (const QByteArray& term : newTerms) {
+ out << term << " ";
+ }
+ out << endl;
+
+ out << "\tDocumentTermsDB: ";
+ for (const QByteArray& term : terms) {
+ out << term << " ";
+ }
+ out << endl;
+
+ out << "\tFileNameTermsDB: ";
+ for (const QByteArray& term : terms) {
+ out << term << " ";
+ }
+ out << endl;
+
+ out << "\tXAttrTermsDB: ";
+ for (const QByteArray& term : terms) {
+ out << term << " ";
+ }
+ out << endl;
+
+ count++;
+ }
+ }
+
+ out << "Invalid Entries: " << count << " (" << count * 100.0 / allIds.size() << \
"%)" << endl; +}
+
+void Transaction::checkTermsDbinPostingDb()
+{
+ DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+ DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+ DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+ PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+ // Iterate over each document, and fetch all terms
+ // check if each term maps to its own id in the posting db
+
+ auto map = postingDb.toTestMap();
+
+ QList<PostingList> allLists = map.values();
+ QSet<quint64> allIds;
+ for (auto list : allLists) {
+ for (quint64 id : list) {
+ allIds << id;
+ }
+ }
+
+ QTextStream out(stdout);
+ out << "PostingDB check .." << endl;
+ for (quint64 id : allIds) {
+ QVector<QByteArray> terms = documentTermsDB.get(id);
+ terms += documentXattrTermsDB.get(id);
+ terms += documentFileNameTermsDB.get(id);
+
+ for (const QByteArray& term : terms) {
+ PostingList plist = postingDb.get(term);
+ if (!plist.contains(id)) {
+ out << id << " is missing term " << term << endl;
+ }
+ }
+ }
+}
+
+void Transaction::checkPostingDbinTermsDb()
+{
+ DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
+ DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
+ DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
+ PostingDB postingDb(m_dbis.postingDbi, m_txn);
+
+ QMap<QByteArray, PostingList> map = postingDb.toTestMap();
+ QMapIterator<QByteArray, PostingList> it(map);
+
+ QTextStream out(stdout);
+ out << "DocumentTermsDB check .." << endl;
+ while (it.hasNext()) {
+ it.next();
+
+ const QByteArray term = it.key();
+ const PostingList list = it.value();
+ for (quint64 id : list) {
+ QVector<QByteArray> allTerms = documentTermsDB.get(id) + \
documentFileNameTermsDB.get(id) + documentXattrTermsDB.get(id); + if \
(!allTerms.contains(term)) { + out << id << " is missing " << \
QString::fromUtf8(term) << " from document terms db" << endl; + }
+ }
+ }
+}
+
diff --git a/src/engine/transaction.h b/src/engine/transaction.h
index 845d9f0..77e9ac0 100644
--- a/src/engine/transaction.h
+++ b/src/engine/transaction.h
@@ -119,6 +119,11 @@ public:
void renameFilePath(quint64 id, const Document& newDoc);
+ // Debugging
+ void checkFsTree();
+ void checkTermsDbinPostingDb();
+ void checkPostingDbinTermsDb();
+
private:
Transaction(const Transaction& rhs) = delete;
diff --git a/src/tools/balooctl/main.cpp b/src/tools/balooctl/main.cpp
index 0e74761..305c82b 100644
--- a/src/tools/balooctl/main.cpp
+++ b/src/tools/balooctl/main.cpp
@@ -389,5 +389,20 @@ int main(int argc, char* argv[])
app.exec();
}
+ if (command == QStringLiteral("checkDb")) {
+ Database *db = globalDatabaseInstance();
+ if (!db->open(Database::OpenDatabase)) {
+ out << "Baloo Index could not be opened\n";
+ return 1;
+ }
+
+ Transaction tr(db, Transaction::ReadOnly);
+ tr.checkPostingDbinTermsDb();
+ tr.checkTermsDbinPostingDb();
+ out << "Checking file paths .. " << endl;
+ tr.checkFsTree();
+ return 0;
+ }
+
return 0;
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic