commit e480a49cf90ad61c3f2c371e7f4551e4d1ae0ed4 branch console Merge: 901f1fa 08f1765 Author: Sebastian Trueg Date: Sun Nov 21 13:51:09 2010 +0100 Merge branch 'console' of git.kde.org:nepomuk-web-extractor into console Conflicts: runtime/service/plugins/olena/olenadatappreply.cpp diff --cc runtime/service/plugins/olena/olenadatappreply.cpp index d2ff062,216c72f..14b7e8b --- a/runtime/service/plugins/olena/olenadatappreply.cpp +++ b/runtime/service/plugins/olena/olenadatappreply.cpp @@@ -22,20 -22,19 +22,43 @@@ #include "text_extraction.hh" ++#include #include --#include ++#include "decisionfactory.h" ++#include "decisioncreator.h" ++ #include #include #include #include +#include namespace { --// TODO: perform some useful checks ++/** ++ * We check two basic things: ++ * 1. is there any text at all ++ * 2. Is the letter/non-letter ratio useful - this is a primitive attempt to exclude garbage such as " W Y œe "ii" ï§ _* ," ++ */ bool checkText( const QString& text ) { -- return !text.isEmpty(); ++ if(!text.isEmpty()) { ++ int letterCnt = 0; ++ int spaceCnt = 0; ++ Q_FOREACH(const QChar& c, text) { ++ if(c.isLetterOrNumber()) ++ ++letterCnt; ++ else if(c.isSpace()) ++ ++spaceCnt; ++ } ++ double letterRatio = double(letterCnt)/double(text.length()); ++ double spaceRatio = double(spaceCnt)/double(text.length()); ++ kDebug() << "Letter ration of" << text << letterRatio << spaceRatio; ++ return letterRatio > 0.7 && spaceRatio < 0.3; ++ } ++ else { ++ return false; ++ } } }