[prev in list] [next in list] [prev in thread] [next in thread] 

List:       pykde
Subject:    Re: [PyQt] Strange utf-8 conversions in PyQt3
From:       Hans-Peter Jansen <hpj () urpla ! net>
Date:       2012-12-16 23:49:19
Message-ID: 3409235.CpbRnqDLAI () pitu4
[Download RAW message or body]

Dear Phil,

Am Dienstag, 20. November 2012, 09:56:06 schrieb Phil Thompson:
> On Wed, 14 Nov 2012 00:39:52 +0100, Hans-Peter Jansen <hpj@urpla.net>
> 
> wrote:
> > Dear Phil,
> > 
> > since one of the more recent versions of SIP (4.14 or 4.14.1), MySQL
> > database
> > table data shows strange characters with PyQt3. The tables hold utf-8
> > data,
> > and all other database tools handle the utf-8 data as expected. Only
> 
> PyQt3
> 
> > apps show this anomaly, PyQt4 is still fine.
> > 
> > I can provoke this effect by adding a file sitecustomize.py
> > to /usr/lib64/python2.7/site-packages/, containing:
> > 
> > import sys
> > sys.setdefaultencoding('utf-8')
> > 
> > 
> > Then, a simple query results in: (check 2nd char)
> > 
> > 	LÃBBENAU/SPREEWALD
> > 
> > while this was expected:
> > 	LÜBBENAU/SPREEWALD
> > 
> > Converted to hex:
> > 
> > 00000000  4c c3 83 c2 9c 42 42 45  4e 41 55 2f 53 50 52 45
> > 
> > |L....BBENAU/SPRE|
> > 
> > 00000010  45 57 41 4c 44 0a 4c c3  9c 42 42 45 4e 41 55 2f
> > 
> > |EWALD.L..BBENAU/|
> > 
> > 00000020  53 50 52 45 45 57 41 4c  44 0a                    |SPREEWALD.|
> > 0000002a
> > 
> > Obviously, bytes 1-5 are converted in some strange way (like encoding
> > utf-8
> > two times). The bytes at offset 23 and 24 are the expected ones (utf-8
> > capital
> > U umlaut). If I remove sitecustomize.py, simple queries work, but
> 
> feeding
> 
> > values as QStrings back into any UI object results in the same mess
> 
> (e.g.
> 
> > reimplemented QDataTable.paintField, that formats some value, and calls
> > painter.drawText).
> > 
> > Does all this ring a bell for you? I'm pretty busted ATM.
> > 
> > TIA,
> > Pete
> > 
> > python: 2.7.3
> > sip:    4.14
> > qt4:    4.8.3
> > pyqt4:  4.9.5
> > qt3:    3.3.8c
> > pyqt3:  3.18.1
> > 
> > It's not an x86_64 vs i586 issue at least.
> 
> Nothing springs to mind, but it's not something I'm going to put any time
> into. You might try...
> 
> http://www.riverbankcomputing.com/static/Docs/sip4/using.html#building-a-pri
> vate-copy-of-the-sip-module
> 
> ...with a known working version of SIP.

After further investigation, it became clear, that the sip version was a red 
herring. Sorry for that claim, Phil. 

The encoding problem is a result of the mysql clients (>= 5.1), that uses utf8 
by default, while the qt3 driver misses utf8 support.

FWIW. I was able to fix this issue with the attached patch.

Pete
["mysql-utf8.patch" (mysql-utf8.patch)]

Index: b/src/sql/drivers/mysql/qsql_mysql.cpp
===================================================================
--- a/src/sql/drivers/mysql/qsql_mysql.cpp
+++ b/src/sql/drivers/mysql/qsql_mysql.cpp
@@ -44,6 +44,7 @@
 #include <qdatetime.h>
 #include <qvaluevector.h>
 #include <qsqlrecord.h>
+#include <qtextcodec.h>
 
 #define QMYSQL_DRIVER_NAME "QMYSQL3"
 
@@ -89,19 +90,72 @@ bool QMYSQLOpenExtension::open( const QS
 class QMYSQLDriverPrivate
 {
 public:
-    QMYSQLDriverPrivate() : mysql(0) {}
+    QMYSQLDriverPrivate() : mysql(0),
+#ifndef QT_NO_TEXTCODEC
+	tc(QTextCodec::codecForLocale())
+#else
+        tc(0),
+#endif
+	{}
     MYSQL*     mysql;
+    QTextCodec *tc;
 };
 
+static inline QString toUnicode(QTextCodec *tc, const char *str)
+{
+#ifdef QT_NO_TEXTCODEC
+    Q_UNUSED(tc);
+    return QString::fromLatin1(str);
+#else
+    return tc->toUnicode(str);
+#endif
+}
+
+static inline QString toUnicode(QTextCodec *tc, const char *str, int length)
+{
+#ifdef QT_NO_TEXTCODEC
+    Q_UNUSED(tc);
+    return QString::fromLatin1(str, length);
+#else
+    return tc->toUnicode(str, length);
+#endif
+}
+
+static inline QByteArray fromUnicode(QTextCodec *tc, const QString &str)
+{
+#ifdef QT_NO_TEXTCODEC
+    Q_UNUSED(tc);
+    return str.toLatin1();
+#else
+    return tc->fromUnicode(str);
+#endif
+}
+
+
 class QMYSQLResultPrivate : public QMYSQLDriverPrivate
 {
 public:
-    QMYSQLResultPrivate() : QMYSQLDriverPrivate(), result(0) {}
+    QMYSQLResultPrivate(const QMYSQLDriver* dp) : QMYSQLDriverPrivate(), driver(dp), result(0) \
{} +    const QMYSQLDriver* driver;
     MYSQL_RES* result;
     MYSQL_ROW  row;
     QValueVector<QVariant::Type> fieldTypes;
 };
 
+
+#ifndef QT_NO_TEXTCODEC
+static QTextCodec* codec(MYSQL* mysql)
+{
+#if MYSQL_VERSION_ID >= 32321
+    QTextCodec* heuristicCodec = QTextCodec::codecForName(mysql_character_set_name(mysql));
+    if (heuristicCodec)
+        return heuristicCodec;
+#endif
+    return QTextCodec::codecForLocale();
+}
+#endif // QT_NO_TEXTCODEC
+
+
 QSqlError qMakeError( const QString& err, int type, const QMYSQLDriverPrivate* p )
 {
     return QSqlError(QMYSQL_DRIVER_NAME ": " + err, QString(mysql_error( p->mysql )), type, \
mysql_errno( p->mysql )); @@ -124,6 +178,9 @@ QVariant::Type qDecodeMYSQLType( int mys
 	type = (flags & UNSIGNED_FLAG) ? QVariant::ULongLong : QVariant::LongLong;
 	break;
     case FIELD_TYPE_DECIMAL :
+#if defined(FIELD_TYPE_NEWDECIMAL)
+    case FIELD_TYPE_NEWDECIMAL:
+#endif
     case FIELD_TYPE_FLOAT :
     case FIELD_TYPE_DOUBLE :
 	type = QVariant::Double;
@@ -158,7 +215,7 @@ QVariant::Type qDecodeMYSQLType( int mys
 QMYSQLResult::QMYSQLResult( const QMYSQLDriver* db )
 : QSqlResult( db )
 {
-    d =   new QMYSQLResultPrivate();
+    d =   new QMYSQLResultPrivate(db);
     d->mysql = db->d->mysql;
 }
 
@@ -281,6 +338,7 @@ QVariant QMYSQLResult::data( int field )
     default:
     case QVariant::String:
     case QVariant::CString:
+        val = toUnicode(d->driver->d->tc, d->row[field]);
 	return QVariant( val );
     }
 #ifdef QT_CHECK_RANGE
@@ -393,6 +451,9 @@ QMYSQLDriver::QMYSQLDriver( MYSQL * con,
     init();
     if ( con ) {
 	d->mysql = (MYSQL *) con;
+#ifndef QT_NO_TEXTCODEC
+        d->tc = codec(con);
+#endif
 	setOpen( TRUE );
 	setOpenError( FALSE );
         if (qMySqlConnectionCount == 1)
@@ -440,7 +501,7 @@ bool QMYSQLDriver::hasFeature( DriverFea
     case BLOB:
 	return TRUE;
     case Unicode:
-	return FALSE;
+	return TRUE;
     default:
 	return FALSE;
     }
@@ -530,6 +591,15 @@ bool QMYSQLDriver::open( const QString&
 	    setOpenError( TRUE );
 	    return FALSE;
     }
+
+#if (MYSQL_VERSION_ID >= 40113 && MYSQL_VERSION_ID < 50000) || MYSQL_VERSION_ID >= 50007
+    // force the communication to be utf8
+    mysql_set_character_set(d->mysql, "utf8");
+#endif
+#ifndef QT_NO_TEXTCODEC
+    d->tc = codec(d->mysql);
+#endif
+
     setOpen( TRUE );
     setOpenError( FALSE );
     return TRUE;
@@ -539,6 +609,7 @@ void QMYSQLDriver::close()
 {
     if ( isOpen() ) {
 	mysql_close( d->mysql );
+        d->mysql = NULL;
 	setOpen( FALSE );
 	setOpenError( FALSE );
     }



_______________________________________________
PyQt mailing list    PyQt@riverbankcomputing.com
http://www.riverbankcomputing.com/mailman/listinfo/pyqt

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic