[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [kbibtex] src: Refactoring IConvLaTeX class to no longer use iconv
From:       Thomas Fischer <fischer () unix-ag ! uni-kl ! de>
Date:       2016-12-07 22:08:46
Message-ID: E1cEkOM-0002rd-N2 () code ! kde ! org
[Download RAW message or body]

Git commit 1c7aefc33c5c4798117ac1de1922ed0e74192542 by Thomas Fischer.
Committed on 07/12/2016 at 21:56.
Pushed by thomasfischer into branch 'master'.

Refactoring IConvLaTeX class to no longer use iconv

Refactoring class IConvLaTeX to no longer use iconv to encode
Unicode strings into various codecs (ISO-8859-1, EUC-KR, ...).
Various platforms do not easily provide an iconv implementation.

Instead, Qt's own QTextCodec class is used, although some quirks
with this class have to be handled, too.

M  +1    -1    src/gui/widgets/filesettingswidget.cpp
M  +0    -12   src/io/CMakeLists.txt
M  +12   -15   src/io/fileexporterbibtex.cpp
M  +46   -123  src/io/iconvlatex.cpp
M  +6    -11   src/io/iconvlatex.h

https://commits.kde.org/kbibtex/1c7aefc33c5c4798117ac1de1922ed0e74192542

diff --git a/src/gui/widgets/filesettingswidget.cpp \
b/src/gui/widgets/filesettingswidget.cpp index 4fbb7c7c..4eec9129 100644
--- a/src/gui/widgets/filesettingswidget.cpp
+++ b/src/gui/widgets/filesettingswidget.cpp
@@ -131,7 +131,7 @@ void FileSettingsWidget::setupGUI()
     layout->addRow(i18n("Encoding:"), m_comboBoxEncodings);
     m_comboBoxEncodings->addItem(QStringLiteral("LaTeX"));
     m_comboBoxEncodings->insertSeparator(1);
-    m_comboBoxEncodings->addItems(IConvLaTeX::encodings());
+    m_comboBoxEncodings->addItems(IConvLaTeX::encodings);
     connect(m_comboBoxEncodings, \
static_cast<void(KComboBox::*)(int)>(&KComboBox::currentIndexChanged), this, \
&FileSettingsWidget::widgetsChanged);  
     m_comboBoxStringDelimiters = new KComboBox(false, this);
diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt
index a7a8af72..78a5beb0 100644
--- a/src/io/CMakeLists.txt
+++ b/src/io/CMakeLists.txt
@@ -98,18 +98,6 @@ target_link_libraries( kbibtexio
     kbibtexdata
 )
 
-# if(
-#     WIN32
-# )
-#     target_link_libraries(
-#         kbibtexio
-#         LINK_PRIVATE
-#         iconv
-#     )
-# endif(
-#     WIN32
-# )
-
 set_target_properties(
     kbibtexio
     PROPERTIES
diff --git a/src/io/fileexporterbibtex.cpp b/src/io/fileexporterbibtex.cpp
index d2a00ca1..5ae7ee54 100644
--- a/src/io/fileexporterbibtex.cpp
+++ b/src/io/fileexporterbibtex.cpp
@@ -22,6 +22,7 @@
 #include <QTextCodec>
 #include <QTextStream>
 #include <QStringList>
+#include <QTextCodec>
 
 #include <KSharedConfig>
 #include <KConfigGroup>
@@ -60,19 +61,15 @@ public:
     QString personNameFormatting;
     QString listSeparator;
     bool cancelFlag;
-    IConvLaTeX *iconvLaTeX;
+    QTextCodec *destinationCodec;
     KSharedConfigPtr config;
     const QString configGroupName, configGroupNameGeneral;
 
     FileExporterBibTeXPrivate(FileExporterBibTeX *parent)
-            : p(parent), keywordCasing(KBibTeX::cLowerCase), \
quoteComment(Preferences::qcNone), protectCasing(Qt::PartiallyChecked), \
cancelFlag(false), iconvLaTeX(NULL), \
config(KSharedConfig::openConfig(QStringLiteral("kbibtexrc"))), \
configGroupName(QStringLiteral("FileExporterBibTeX")), \
configGroupNameGeneral(QStringLiteral("General")) { +            : p(parent), \
keywordCasing(KBibTeX::cLowerCase), quoteComment(Preferences::qcNone), \
protectCasing(Qt::PartiallyChecked), cancelFlag(false), destinationCodec(NULL), \
config(KSharedConfig::openConfig(QStringLiteral("kbibtexrc"))), \
configGroupName(QStringLiteral("FileExporterBibTeX")), \
configGroupNameGeneral(QStringLiteral("General")) {  // nothing
     }
 
-    ~FileExporterBibTeXPrivate() {
-        delete iconvLaTeX;
-    }
-
     void loadState() {
         KConfigGroup configGroup(config, configGroupName);
         encoding = configGroup.readEntry(Preferences::keyEncoding, \
Preferences::defaultEncoding); @@ -164,7 +161,7 @@ public:
             iodevice->putChar(' ');
             iodevice->putChar('=');
             iodevice->putChar(' ');
-            iodevice->write(iconvLaTeX->encode(text));
+            iodevice->write(IConvLaTeX::encode(text, destinationCodec));
         }
         iodevice->putChar('\n');
         iodevice->putChar('}');
@@ -186,11 +183,11 @@ public:
         iodevice->putChar('@');
         iodevice->write(be->format(QStringLiteral("String"), \
keywordCasing).toLatin1().data());  iodevice->putChar('{');
-        iodevice->write(iconvLaTeX->encode(macro.key()));
+        iodevice->write(IConvLaTeX::encode(macro.key(), destinationCodec));
         iodevice->putChar(' ');
         iodevice->putChar('=');
         iodevice->putChar(' ');
-        iodevice->write(iconvLaTeX->encode(text));
+        iodevice->write(IConvLaTeX::encode(text, destinationCodec));
         iodevice->putChar('}');
         iodevice->putChar('\n');
         iodevice->putChar('\n');
@@ -207,14 +204,14 @@ public:
             iodevice->putChar('@');
             iodevice->write(be->format(QStringLiteral("Comment"), \
keywordCasing).toLatin1().data());  iodevice->putChar('{');
-            iodevice->write(iconvLaTeX->encode(text));
+            iodevice->write(IConvLaTeX::encode(text, destinationCodec));
             iodevice->putChar('}');
             iodevice->putChar('\n');
             iodevice->putChar('\n');
         } else if (quoteComment == Preferences::qcPercentSign) {
             QStringList commentLines = text.split('\n', QString::SkipEmptyParts);
             for (QStringList::Iterator it = commentLines.begin(); it != \
                commentLines.end(); ++it) {
-                const QByteArray line = iconvLaTeX->encode(*it);
+                const QByteArray line = IConvLaTeX::encode(*it, destinationCodec);
                 if (line.length() == 0 || line[0] != QLatin1Char('%')) {
                     /// Guarantee that every line starts with
                     /// a percent sign
@@ -225,7 +222,7 @@ public:
             }
             iodevice->putChar('\n');
         } else {
-            iodevice->write(iconvLaTeX->encode(text));
+            iodevice->write(IConvLaTeX::encode(text, destinationCodec));
             iodevice->putChar('\n');
             iodevice->putChar('\n');
         }
@@ -241,7 +238,7 @@ public:
         iodevice->putChar('{');
         /// Remember: strings from preamble do not get encoded,
         /// may contain raw LaTeX commands and code
-        iodevice->write(iconvLaTeX->encode(p->internalValueToBibTeX(preamble.value(), \
QString(), leRaw))); +        \
iodevice->write(IConvLaTeX::encode(p->internalValueToBibTeX(preamble.value(), \
QString(), leRaw), destinationCodec));  iodevice->putChar('}');
         iodevice->putChar('\n');
         iodevice->putChar('\n');
@@ -311,8 +308,8 @@ public:
 
     void applyEncoding(QString &encoding) {
         encoding = encoding.isEmpty() ? QStringLiteral("latex") : \
                encoding.toLower();
-        delete iconvLaTeX;
-        iconvLaTeX = new IConvLaTeX(encoding == QStringLiteral("latex") ? \
QStringLiteral("us-ascii") : encoding); +        destinationCodec = \
QTextCodec::codecForName(encoding == QStringLiteral("latex") ? "us-ascii" : \
encoding.toLatin1()); +        qCWarning(LOG_KBIBTEX_IO) << "destinationCodec=" << \
destinationCodec->name();  }
 
     bool requiresPersonQuoting(const QString &text, bool isLastName) {
diff --git a/src/io/iconvlatex.cpp b/src/io/iconvlatex.cpp
index 9a4562ca..77ccc270 100644
--- a/src/io/iconvlatex.cpp
+++ b/src/io/iconvlatex.cpp
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2004-2014 by Thomas Fischer <fischer@unix-ag.uni-kl.de> *
+ *   Copyright (C) 2004-2016 by Thomas Fischer <fischer@unix-ag.uni-kl.de> *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
@@ -17,144 +17,67 @@
 
 #include "iconvlatex.h"
 
-#include <iconv.h>
-
 #include <QStringList>
+#include <QTextCodec>
+#include <QDebug>
+#include "logging_io.h"
 
 #include "encoderlatex.h"
 
-QStringList IConvLaTeX::encodingList;
-
-class IConvLaTeX::IConvLaTeXPrivate
-{
-private:
-    // UNUSED IConvLaTeX *p;
-
-public:
-    const QString destEncoding;
-    iconv_t iconvHandle;
-
-    IConvLaTeXPrivate(IConvLaTeX */* UNUSED parent*/, const QString &_destEncoding)
-            : destEncoding(_destEncoding) // , UNUSED p(parent)
-    {
-        iconvHandle = iconv_open(destEncoding.toLatin1().data(), "utf-8");
-    }
-
-    ~IConvLaTeXPrivate()
-    {
-        iconv_close(iconvHandle);
-    }
-};
-
-IConvLaTeX::IConvLaTeX(const QString &destEncoding)
-        : d(new IConvLaTeXPrivate(this, destEncoding))
+IConvLaTeX::IConvLaTeX()
 {
     /// nothing
 }
 
-IConvLaTeX::IConvLaTeX(const IConvLaTeX &other)
-        : d(new IConvLaTeXPrivate(this, other.d->destEncoding))
+QByteArray IConvLaTeX::encode(const QString &input, const QString \
&destinationEncoding)  {
-    /// nothing
+    QTextCodec *destinationCodec(QTextCodec::codecForName(destinationEncoding.toLatin1()));
 +    return encode(input, destinationCodec);
 }
 
-IConvLaTeX::~IConvLaTeX()
+QByteArray IConvLaTeX::encode(const QString &input, const QTextCodec \
*destinationCodec)  {
-    delete d;
-}
+    /// Invalid codec? Cannot do anything
+    if (destinationCodec == NULL)
+        return QByteArray();
 
-IConvLaTeX &IConvLaTeX::operator=(const IConvLaTeX &other) {
-    delete d;
-    d = new IConvLaTeXPrivate(this, other.d->destEncoding);
-    return *this;
-}
-
-QByteArray IConvLaTeX::encode(const QString &ninput)
-{
     /// Perform Canonical Decomposition followed by Canonical Composition
-    const QString input = ninput.normalized(QString::NormalizationForm_C);
-    /// Get an UTF-8 representation of the input string
-    QByteArray inputByteArray = input.toUtf8();
-    /// Limit the size of the output buffer
-    /// by making an educated guess of its maximum size
-    size_t maxOutputByteArraySize = inputByteArray.size() * 4 + 1024;
-#ifdef Q_WS_WIN
-    /// iconv on Windows likes to have it as const char *
-    const char *inputBuffer = inputByteArray.data();
-#else
-    /// iconv on Linux likes to have it as char *
-    char *inputBuffer = inputByteArray.data();
-#endif
-    QByteArray outputByteArray(maxOutputByteArraySize, '\0');
-    char *outputBuffer = outputByteArray.data();
-    size_t inputBufferBytesLeft = inputByteArray.size();
-    size_t ouputBufferBytesLeft = maxOutputByteArraySize;
-    Encoder *laTeXEncoder = EncoderLaTeX::instance();
-
-    while (iconv(d->iconvHandle, &inputBuffer, &inputBufferBytesLeft, &outputBuffer, \
                &ouputBufferBytesLeft) == (size_t)(-1) && inputBufferBytesLeft > 0) {
-        /// split text into character where iconv stopped and remaining text
-        QString remainingString = QString::fromUtf8(inputBuffer);
-        QChar problematicChar = remainingString.at(0);
-        remainingString = remainingString.mid(1);
+    const QString ninput = input.normalized(QString::NormalizationForm_C);
 
-        /// setup input buffer to continue with remaining text
-        inputByteArray = remainingString.toUtf8();
-        inputBuffer = inputByteArray.data();
-        inputBufferBytesLeft = inputByteArray.size();
-
-        /// encode problematic character in LaTeX encoding and append to output \
                buffer
-        const QString encodedProblem = laTeXEncoder->encode(problematicChar);
-        QByteArray encodedProblemByteArray = encodedProblem.toUtf8();
-        qstrncpy(outputBuffer, encodedProblemByteArray.data(), \
                ouputBufferBytesLeft);
-        ouputBufferBytesLeft -= encodedProblemByteArray.size();
-        outputBuffer += encodedProblemByteArray.size();
-    }
-
-    /// cut away unused space
-    outputByteArray.resize(maxOutputByteArraySize - ouputBufferBytesLeft);
-
-    return outputByteArray;
-}
-
-const QStringList IConvLaTeX::encodings()
-{
-    if (encodingList.isEmpty()) {
-        /* FIXME this list will contain encodings that are irreversible!
-        QProcess iconvProgram;
-        QStringList iconvProgramArgs = QStringList() << "--list";
-        iconvProgram.start(QStringLiteral("iconv"), iconvProgramArgs);
-        iconvProgram.waitForStarted(10000);
-        if (iconvProgram.state() == QProcess::Running) {
-            iconvProgram.waitForReadyRead(10000);
-            encodingList.clear();
-            QString allText = "";
-            while (iconvProgram.canReadLine()) {
-                allText += iconvProgram.readAllStandardOutput();
-                iconvProgram.waitForReadyRead(10000);
-            }
-            iconvProgram.waitForFinished(10000);
-            iconvProgram.close();
-
-            encodingList = allText.replace("//", "").split('\n', \
QString::SkipEmptyParts); +    QByteArray result;
+    Encoder *laTeXEncoder = EncoderLaTeX::instance();
+    /// Build result, character by character
+    foreach (const QChar & c, ninput) {
+        /// Get byte sequence representing current character in chosen codec
+        const QByteArray cba = destinationCodec->fromUnicode(c);
+        if (destinationCodec->canEncode(c) && (c == QChar(0x003f /** question mark \
*/) || cba.size() != 1 || cba[0] != 0x3f /** question mark */)) { +            /// \
Codec claims that it can encode current character, but some codecs +            /// \
still cannot encode character and simply return a question mark, so +            /// \
only accept question marks as encoding result if original character +            /// \
was question mark (assuming all codecs can encode question marks). +            \
result.append(cba); +        } else {
+            /// Chosen codec can NOT encode current Unicode character, so try to use
+            /// 'LaTeX encoder', which may translate 0x00c5 (A with ring above) into
+            /// '\AA'. LaTeX encoder returns UTF-8 representation if given character
+            /// cannot be encoded
+            result.append(laTeXEncoder->encode(QString(c)).toUtf8());
         }
-        */
-
-        /// approved encodings manually added to list
-        int dosCodepages[] = {437, 720, 737, 775, 850, 852, 855, 857, 858, 860, 861, \
                862, 863, 864, 865, 866, 869, -1};
-        int windowsCodepages[] = {1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, \
                1258, -1};
-        for (int *cur = dosCodepages; *cur > 0; ++cur)
-            encodingList << QStringLiteral("CP") + QString::number(*cur);
-        for (int *cur = windowsCodepages; *cur > 0; ++cur)
-            encodingList << QStringLiteral("CP") + QString::number(*cur);
-        for (int i = 1; i <= 16; ++i)
-            encodingList << QStringLiteral("ISO-8859-") + QString::number(i);
-        encodingList << QStringLiteral("KOI8-R");
-        for (int i = 1; i <= 10; ++i)
-            encodingList << QStringLiteral("Latin") + QString::number(i);
-        encodingList << QStringLiteral("UTF-8");
-        for (int *cur = windowsCodepages; *cur > 0; ++cur)
-            encodingList << QStringLiteral("Windows-") + QString::number(*cur);
     }
 
-    return encodingList;
+    return result;
 }
+
+const QStringList IConvLaTeX::encodings = QStringList()
+        /// the classics
+        << QStringLiteral("US-ASCII") /** effectively like 'LaTeX' encoding */
+        /// ISO 8859 a.k.a. Latin codecs
+        << QStringLiteral("ISO-8859-1") << QStringLiteral("ISO-8859-2") << \
QStringLiteral("ISO-8859-3") << QStringLiteral("ISO-8859-4") << \
QStringLiteral("ISO-8859-5") << QStringLiteral("ISO-8859-6") << \
QStringLiteral("ISO-8859-7") << QStringLiteral("ISO-8859-8") << \
QStringLiteral("ISO-8859-9") << QStringLiteral("ISO-8859-10") << \
QStringLiteral("ISO-8859-13") << QStringLiteral("ISO-8859-14") << \
QStringLiteral("ISO-8859-15") << QStringLiteral("ISO-8859-16") +        /// various \
Unicode codecs +        << QStringLiteral("UTF-8") << QStringLiteral("UTF-16") << \
QStringLiteral("UTF-16BE") << QStringLiteral("UTF-16LE") << QStringLiteral("UTF-32") \
<< QStringLiteral("UTF-32BE") << QStringLiteral("UTF-32LE") +        /// various \
Cyrillic codecs +        << QStringLiteral("KOI8-R") << QStringLiteral("KOI8-U")
+        /// various CJK codecs
+        << QStringLiteral("Big5") << QStringLiteral("Big5-HKSCS") << \
QStringLiteral("GB18030") << QStringLiteral("EUC-JP") << QStringLiteral("EUC-KR") << \
QStringLiteral("ISO 2022-JP") << QStringLiteral("Shift-JIS") +        /// Windows \
codecs +        << QStringLiteral("Windows-1250") << QStringLiteral("Windows-1251") \
<< QStringLiteral("Windows-1252") << QStringLiteral("Windows-1253") << \
QStringLiteral("Windows-1254") << QStringLiteral("Windows-1255") << \
QStringLiteral("Windows-1256") << QStringLiteral("Windows-1257") << \
                QStringLiteral("Windows-1258");
diff --git a/src/io/iconvlatex.h b/src/io/iconvlatex.h
index 2b101b74..1706f88a 100644
--- a/src/io/iconvlatex.h
+++ b/src/io/iconvlatex.h
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2004-2014 by Thomas Fischer <fischer@unix-ag.uni-kl.de> *
+ *   Copyright (C) 2004-2016 by Thomas Fischer <fischer@unix-ag.uni-kl.de> *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
@@ -23,6 +23,7 @@
 class QString;
 class QByteArray;
 class QStringList;
+class QTextCodec;
 
 /**
  * This class is a specialized wrapper around iconv. It will try to encode
@@ -38,20 +39,14 @@ class QStringList;
 class KBIBTEXIO_EXPORT IConvLaTeX
 {
 public:
-    explicit IConvLaTeX(const QString &destEncoding);
-    explicit IConvLaTeX(const IConvLaTeX &other);
-    ~IConvLaTeX();
-    IConvLaTeX &operator= (const IConvLaTeX &other);
+    static QByteArray encode(const QString &input, const QString \
&destinationEncoding); +    static QByteArray encode(const QString &input, const \
QTextCodec *destinationCodec);  
-    QByteArray encode(const QString &input);
-
-    static const QStringList encodings();
+    static const QStringList encodings;
 
 private:
-    class IConvLaTeXPrivate;
-    IConvLaTeXPrivate *d;
+    explicit IConvLaTeX();
 
-    static QStringList encodingList;
 };
 
 #endif // KBIBTEX_ICONVLATEX_H


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic