[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [kopete/Applications/16.08] plugins/history: history: Fix storing messages with Unicode chars above 
From:       Pali_Rohár <pali.rohar () gmail ! com>
Date:       2016-08-15 22:18:26
Message-ID: E1bZQDC-0001Ju-Cr () code ! kde ! org
[Download RAW message or body]

Git commit ac6dde3775556ea92e4cdc4c5e1f682347da87c5 by Pali Rohár.
Committed on 14/08/2016 at 22:12.
Pushed by pali into branch 'Applications/16.08'.

history: Fix storing messages with Unicode chars above 0xFFFF

QtXML internally encode per one 16bit character, so surrogate pairs are
encoded alone. So for encoding XML we need to use UTF-16 codec.

For portability convert UTF-16 surrogate pairs into XML entities.

XML history file stored on disk is encoded in UTF-8 codec for
encoding/decoding is properly set by this patch.

Before this patch low surrogate of UTF-16 was converted to XML entity and
alone high surrogate dropped (as alone it was invalid). This lead to
damaging of message.

M  +8    -4    plugins/history/converter.cpp
M  +2    -0    plugins/history/historydialog.cpp
M  +25   -6    plugins/history/historylogger.cpp

http://commits.kde.org/kopete/ac6dde3775556ea92e4cdc4c5e1f682347da87c5

diff --git a/plugins/history/converter.cpp b/plugins/history/converter.cpp
index dd56014..bbf2c94 100644
--- a/plugins/history/converter.cpp
+++ b/plugins/history/converter.cpp
@@ -219,9 +219,11 @@ void HistoryPlugin::convertOldHistory()
 									                                             QString::fromLatin1( ".xml" ) )  );
 									if( file.open() )
 									{
-										QTextStream stream ( &file );
-										//stream.setEncoding( QTextStream::UnicodeUTF8 ); //???? oui ou non?
+										QString buf;
+										QTextStream stream( &buf, QIODevice::WriteOnly );
+										stream.setCodec( "UTF-16" ); // QtXML works only with UTF-16
 										doc.save( stream , 1 );
+										file.write( buf.toUtf8() );
 										file.finalize();
 									}
 								}
@@ -282,9 +284,11 @@ void HistoryPlugin::convertOldHistory()
 						                                            QString::fromLatin1( ".xml" ) )  );
 						if( file.open() )
 						{
-							QTextStream stream ( &file );
-							//stream.setEncoding( QTextStream::UnicodeUTF8 ); //???? oui ou non?
+							QString buf;
+							QTextStream stream( &buf, QIODevice::WriteOnly );
+							stream.setCodec( "UTF-16" ); // QtXML works only with UTF-16
 							doc.save( stream ,1 );
+							file.write( buf.toUtf8() );
 							file.finalize();
 						}
 					}
diff --git a/plugins/history/historydialog.cpp b/plugins/history/historydialog.cpp
index 0720cb7..63e639c 100644
--- a/plugins/history/historydialog.cpp
+++ b/plugins/history/historydialog.cpp
@@ -506,6 +506,7 @@ void HistoryDialog::slotSearch()
 				}
 
 				QTextStream stream(&file);
+				stream.setCodec("UTF-8");
 				QString textLine;
     			QString msgItem;
 				while(!stream.atEnd())
@@ -691,6 +692,7 @@ QString HistoryDialog::escapeXMLText(const QString& text) const
 
 	QString excapedText;
 	QTextStream stream(&excapedText, QIODevice::WriteOnly);
+	stream.setCodec("UTF-16"); // QtXML works only with UTF-16
 	tmpTextNode.save(stream, 0);
 	return excapedText;
 }
diff --git a/plugins/history/historylogger.cpp b/plugins/history/historylogger.cpp
index 8a34c85..191d7db 100644
--- a/plugins/history/historylogger.cpp
+++ b/plugins/history/historylogger.cpp
@@ -293,12 +293,28 @@ void HistoryLogger::appendMessage( const Kopete::Message &msg , const Kopete::Co
 	msgElem.setAttribute( "nick",  msg.from()->displayName() ); //do we have to set this?
 	msgElem.setAttribute( "time", msg.timestamp().toString("d h:m:s") );
 
-	QDomText msgNode;
-
+	QString body;
 	if ( msg.format() != Qt::PlainText )
-		msgNode = doc.createTextNode( msg.escapedBody() );
+		body = msg.escapedBody();
 	else
-		msgNode = doc.createTextNode( Qt::escape(msg.plainBody()).replace('\n', "<br />") );
+		body = Qt::escape(msg.plainBody()).replace('\n', "<br />");
+
+	// Convert UTF-16 surrogate pairs into XML entities
+	if ( body.contains(QRegExp("[\\xD800-\\xDBFF][\\xDC00-\\xDFFF]")) ) {
+		QString bodyEsc;
+		int bodySize = body.size();
+		for ( int i = 0; i < bodySize; ++i ) {
+			if ( i + 1 < bodySize && body.at(i).isHighSurrogate() && body.at(i+1).isLowSurrogate() ) {
+				bodyEsc += "&#x" + QString::number(QChar::surrogateToUcs4(body.at(i), body.at(i+1)), 16) + ';';
+				++i;
+				continue;
+			}
+			bodyEsc += body.at(i);
+		}
+		body = bodyEsc;
+	}
+
+	QDomText msgNode = doc.createTextNode( body );
 
 	docElem.appendChild( msgElem );
 	msgElem.appendChild( msgNode );
@@ -342,9 +358,11 @@ void HistoryLogger::saveToDisk()
 	KSaveFile file( m_toSaveFileName );
 	if( file.open() )
 	{
-		QTextStream stream ( &file );
-		//stream.setEncoding( QTextStream::UnicodeUTF8 ); //???? oui ou non?
+		QString buf;
+		QTextStream stream( &buf, QIODevice::WriteOnly );
+		stream.setCodec( "UTF-16" ); // QtXML works only with UTF-16
 		m_toSaveDocument.save( stream, 1 );
+		file.write( buf.toUtf8() );
 		file.finalize();
 
 		m_saveTimerTime=qMin(t.elapsed()*1000, 300000);
@@ -872,6 +890,7 @@ QList<int> HistoryLogger::getDaysForMonth(QDate date)
 			continue;
 		}
 		QTextStream stream(&file);
+		stream.setCodec("UTF-8");
 		QString fullText = stream.readAll();
 		file.close();
 

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic