[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: [okular] generators/txt: Drop detectEncoding method and just feed more data to KEncodingProber
From: Albert Astals Cid <aacid () kde ! org>
Date: 2013-02-28 22:58:20
Message-ID: 20130228225820.789DFA604F () git ! kde ! org
[Download RAW message or body]
Git commit b5abd493ab80f642388b1ae055e7f06cfa1c59d5 by Albert Astals Cid, on behalf of Azat Khuzhin.
Committed on 28/02/2013 at 23:57.
Pushed by aacid into branch 'master'.
Drop detectEncoding method and just feed more data to KEncodingProber
REVIEW: 109119
M +18 -15 generators/txt/document.cpp
M +0 -2 generators/txt/document.h
http://commits.kde.org/okular/b5abd493ab80f642388b1ae055e7f06cfa1c59d5
diff --git a/generators/txt/document.cpp b/generators/txt/document.cpp
index badb647..29aa2db 100644
--- a/generators/txt/document.cpp
+++ b/generators/txt/document.cpp
@@ -40,29 +40,32 @@ Document::~Document()
{
}
-QByteArray Document::detectEncoding( const QByteArray &array )
+QString Document::toUnicode( const QByteArray &array )
{
- // TODO: see to "katetextloader.h"
+ QByteArray encoding;
KEncodingProber prober(KEncodingProber::Universal);
- prober.feed(array);
- if (!prober.confidence() > 0.5)
+ int charsFeeded = 0;
+ int chunkSize = 3000; // ~= number of symbols in page.
+
+ // Try to detect encoding.
+ while ( encoding.isEmpty() && charsFeeded < array.size() )
{
- kDebug() << "Can't detect charset";
- return QByteArray();
- }
+ prober.feed( array.mid( charsFeeded, chunkSize ) );
+ charsFeeded += chunkSize;
-#ifdef TXT_DEBUG
- kDebug() << "Detected" << prober.encoding() << "encoding";
-#endif
- return prober.encoding();
-}
+ if (prober.confidence() >= 0.5)
+ {
+ encoding = prober.encoding();
+ break;
+ }
+ }
-QString Document::toUnicode( const QByteArray &array )
-{
- const QByteArray encoding = detectEncoding( array );
if ( encoding.isEmpty() )
{
return QString();
}
+
+ kDebug() << "Detected" << prober.encoding() << "encoding"
+ << "based on" << charsFeeded << "chars";
return QTextCodec::codecForName( encoding )->toUnicode( array );
}
diff --git a/generators/txt/document.h b/generators/txt/document.h
index 08babea..bbf2923 100644
--- a/generators/txt/document.h
+++ b/generators/txt/document.h
@@ -19,8 +19,6 @@ namespace Txt
~Document();
private:
- // TODO: write a better detecter, based on some number of chunks
- QByteArray detectEncoding( const QByteArray &array );
QString toUnicode( const QByteArray &array );
};
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic