[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: [nepomuk-web-extractor] e480a49: Merge branch 'console' of
From: Sebastian Trueg <trueg () kde ! org>
Date: 2010-11-21 13:03:51
Message-ID: 20101121130351.9B1E8A60B4 () git ! kde ! org
[Download RAW message or body]
commit e480a49cf90ad61c3f2c371e7f4551e4d1ae0ed4
branch console
Merge: 901f1fa 08f1765
Author: Sebastian Trueg <trueg@kde.org>
Date: Sun Nov 21 13:51:09 2010 +0100
Merge branch 'console' of git.kde.org:nepomuk-web-extractor into console
Conflicts:
runtime/service/plugins/olena/olenadatappreply.cpp
diff --cc runtime/service/plugins/olena/olenadatappreply.cpp
index d2ff062,216c72f..14b7e8b
--- a/runtime/service/plugins/olena/olenadatappreply.cpp
+++ b/runtime/service/plugins/olena/olenadatappreply.cpp
@@@ -22,20 -22,19 +22,43 @@@
#include "text_extraction.hh"
++#include <QtCore/QChar>
#include <QtGui/QImage>
--#include <webextractor/decisionfactory.h>
++#include "decisionfactory.h"
++#include "decisioncreator.h"
++
#include <Nepomuk/File>
#include <Nepomuk/Variant>
#include <Nepomuk/Vocabulary/NIE>
#include <KDebug>
+#include <KLocale>
namespace {
--// TODO: perform some useful checks
++/**
++ * We check two basic things:
++ * 1. is there any text at all
++ * 2. Is the letter/non-letter ratio useful - this is a primitive attempt to \
exclude garbage such as " W Y Å“e "ii" ï § _* ," ++ */
bool checkText( const QString& text ) {
-- return !text.isEmpty();
++ if(!text.isEmpty()) {
++ int letterCnt = 0;
++ int spaceCnt = 0;
++ Q_FOREACH(const QChar& c, text) {
++ if(c.isLetterOrNumber())
++ ++letterCnt;
++ else if(c.isSpace())
++ ++spaceCnt;
++ }
++ double letterRatio = double(letterCnt)/double(text.length());
++ double spaceRatio = double(spaceCnt)/double(text.length());
++ kDebug() << "Letter ration of" << text << letterRatio << spaceRatio;
++ return letterRatio > 0.7 && spaceRatio < 0.3;
++ }
++ else {
++ return false;
++ }
}
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic