[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [nepomuk-web-extractor] e480a49: Merge branch 'console' of
From:       Sebastian Trueg <trueg () kde ! org>
Date:       2010-11-21 13:03:51
Message-ID: 20101121130351.9B1E8A60B4 () git ! kde ! org
[Download RAW message or body]

commit e480a49cf90ad61c3f2c371e7f4551e4d1ae0ed4
branch console
Merge: 901f1fa 08f1765
Author: Sebastian Trueg <trueg@kde.org>
Date:   Sun Nov 21 13:51:09 2010 +0100

    Merge branch 'console' of git.kde.org:nepomuk-web-extractor into console
    
    Conflicts:
    	runtime/service/plugins/olena/olenadatappreply.cpp

diff --cc runtime/service/plugins/olena/olenadatappreply.cpp
index d2ff062,216c72f..14b7e8b
--- a/runtime/service/plugins/olena/olenadatappreply.cpp
+++ b/runtime/service/plugins/olena/olenadatappreply.cpp
@@@ -22,20 -22,19 +22,43 @@@
  
  #include "text_extraction.hh"
  
++#include <QtCore/QChar>
  #include <QtGui/QImage>
  
--#include <webextractor/decisionfactory.h>
++#include "decisionfactory.h"
++#include "decisioncreator.h"
++
  #include <Nepomuk/File>
  #include <Nepomuk/Variant>
  #include <Nepomuk/Vocabulary/NIE>
  
  #include <KDebug>
 +#include <KLocale>
  
  namespace {
--// TODO: perform some useful checks
++/**
++ * We check two basic things:
++ * 1. is there any text at all
++ * 2. Is the letter/non-letter ratio useful - this is a primitive attempt to \
exclude garbage such as " W Y Å“e "ii" ï § _* ," ++ */
  bool checkText( const QString& text ) {
--    return !text.isEmpty();
++    if(!text.isEmpty()) {
++        int letterCnt = 0;
++        int spaceCnt = 0;
++        Q_FOREACH(const QChar& c, text) {
++            if(c.isLetterOrNumber())
++                ++letterCnt;
++            else if(c.isSpace())
++                ++spaceCnt;
++        }
++        double letterRatio = double(letterCnt)/double(text.length());
++        double spaceRatio = double(spaceCnt)/double(text.length());
++        kDebug() << "Letter ration of" << text << letterRatio << spaceRatio;
++        return letterRatio > 0.7 && spaceRatio < 0.3;
++    }
++    else {
++        return false;
++    }
  }
  }
  


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic