[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: KDE/kdelibs
From: Germain Garand <germain () ebooksfrance ! org>
Date: 2009-06-07 0:13:38
Message-ID: 1244333618.461001.26420.nullmailer () svn ! kde ! org
[Download RAW message or body]
SVN commit 978411 by ggarand:
.abort XML parsing when invalid characters were found in the input stream
(acid3-70)
.add helper methods to KEncodingDetector for reporting such errors
.add unit test asserting some KEncodingDetector properties
kindly reviewed by Maksim Orlovich.
CCBUG: 156947
M +18 -0 kdecore/localization/kencodingdetector.cpp
M +25 -0 kdecore/localization/kencodingdetector.h
M +1 -0 kdecore/tests/CMakeLists.txt
M +2 -3 khtml/khtml_part.cpp
M +1 -0 khtml/xml/dom_docimpl.cpp
M +4 -0 khtml/xml/dom_docimpl.h
M +12 -2 khtml/xml/xml_tokenizer.cpp
M +2 -2 khtml/xml/xml_tokenizer.h
--- trunk/KDE/kdelibs/kdecore/localization/kencodingdetector.cpp #978410:978411
@@ -696,6 +696,19 @@
return d->m_decoder;
}
+void KEncodingDetector::resetDecoder()
+{
+ assert(d->m_defaultCodec);
+ d->m_bufferForDefferedEncDetection.clear();
+ d->m_writtingHappened = false;
+ d->m_analyzeCalled = false;
+ d->m_multiByte = 0;
+ delete d->m_decoder;
+ if (!d->m_codec)
+ d->m_codec = d->m_defaultCodec;
+ d->m_decoder = d->m_codec->makeDecoder();
+}
+
bool KEncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource \
type) {
QTextCodec *codec;
@@ -840,6 +853,11 @@
return QString();
}
+bool KEncodingDetector::decodedInvalidCharacters() const
+{
+ return d->m_decoder ? d->m_decoder->hasFailure() : false;
+}
+
QString KEncodingDetector::flush()
{
if (d->m_bufferForDefferedEncDetection.isEmpty())
--- trunk/KDE/kdelibs/kdecore/localization/kencodingdetector.h #978410:978411
@@ -148,6 +148,31 @@
QString decodeWithBuffering(const char *data, int len);
/**
+ * This method checks whether invalid characters were found
+ * during a decoding operation.
+ *
+ * Note that this bit is never reset once invalid characters have been found.
+ * To force a reset, either change the encoding using setEncoding() or call
+ * resetDecoder()
+ *
+ * @returns a boolean reflecting said state.
+ * @since 4.3
+ * @see resetDecoder() setEncoding()
+ */
+ bool decodedInvalidCharacters() const;
+
+ /**
+ * Resets the decoder. Any stateful decoding information (such as resulting from \
previous calls + * to decodeWithBuffering()) will be lost.
+ * Will Reset the state of decodedInvalidCharacters() as a side effect.
+ *
+ * @since 4.3
+ * @see decodeWithBuffering() decodedInvalidCharacters()
+ *
+ */
+ void resetDecoder();
+
+ /**
* Convenience method to be used with decodeForHtml. Flushes buffer.
* @see decodeForHtml()
*/
--- trunk/KDE/kdelibs/kdecore/tests/CMakeLists.txt #978410:978411
@@ -57,6 +57,7 @@
ktcpsockettest
ksycocathreadtest
kdebug_unittest
+ kencodingdetectortest
)
if(UNIX)
--- trunk/KDE/kdelibs/khtml/khtml_part.cpp #978410:978411
@@ -6840,10 +6840,9 @@
? QByteArray( parentPart()->d->m_decoder->encoding() ) : \
settings()->encoding().toLatin1();
dec->setEncoding(defaultEncoding.constData(), \
KEncodingDetector::DefaultEncoding); }
-#ifdef APPLE_CHANGES
+
if (d->m_doc)
- d->m_doc->setDecoder(d->m_decoder);
-#endif
+ d->m_doc->setDecoder(dec);
dec->setAutoDetectLanguage( d->m_autoDetectLanguage );
return dec;
}
--- trunk/KDE/kdelibs/khtml/xml/dom_docimpl.cpp #978410:978411
@@ -409,6 +409,7 @@
m_docChanged = false;
m_elemSheet = 0;
m_tokenizer = 0;
+ m_decoder = 0;
m_doctype = 0;
m_implementation = _implementation;
m_implementation->ref();
--- trunk/KDE/kdelibs/khtml/xml/dom_docimpl.h #978410:978411
@@ -50,6 +50,7 @@
class QTextCodec;
class KHTMLView;
class QEventLoop;
+class KEncodingDetector;
namespace khtml {
class Tokenizer;
@@ -370,6 +371,8 @@
CSSStyleSheetImpl* elementSheet();
virtual khtml::Tokenizer *createTokenizer();
khtml::Tokenizer *tokenizer() { return m_tokenizer; }
+ KEncodingDetector* decoder() { return m_decoder; }
+ void setDecoder(KEncodingDetector* enc) { m_decoder = enc; }
void setPaintDevice(QPaintDevice *dev){m_paintDevice = dev;}
QPaintDevice *paintDevice() const {return m_paintDevice;}
@@ -613,6 +616,7 @@
khtml::DocLoader *m_docLoader;
khtml::Tokenizer *m_tokenizer;
+ KEncodingDetector *m_decoder;
KUrl m_url;
KUrl m_baseURL;
QString m_baseTarget;
--- trunk/KDE/kdelibs/khtml/xml/xml_tokenizer.cpp #978410:978411
@@ -37,6 +37,7 @@
#include <QtCore/QVariant>
#include <kdebug.h>
#include <klocale.h>
+#include <kencodingdetector.h>
// SVG includes
#include "svg/SVGScriptElement.h"
@@ -96,7 +97,7 @@
}
XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
- : errorLine(0)
+ : errorLine(-1)
{
m_doc = _doc;
m_view = _view;
@@ -464,6 +465,7 @@
{
if ( !m_noErrors && appendData )
return;
+
// check if we try to re-enter inside write()
// if so buffer the data
if (m_insideWrite) {
@@ -480,6 +482,14 @@
}
m_noErrors = m_reader.parseContinue();
+ if (m_doc->decoder() && m_doc->decoder()->decodedInvalidCharacters()) {
+ // any invalid character spotted by the decoder is fatal, per XML 1.0 spec. \
Tested by Acid 3 - 70 + m_handler.fatalError( QXmlParseException( \
m_handler.errorString() ) ); // ### FIXME: make that more informative after string \
freeze : i18n("input stream contains invalid characters") + m_noErrors = \
false; + finish();
+ return;
+ }
+
// check if while parsing we tried to re-enter write() method so now we have \
some buffered data we need to write to document while (m_noErrors && \
!m_bufferedData.isEmpty()) { m_source.appendXML(m_bufferedData);
@@ -520,7 +530,7 @@
static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
QString line, errorLocPtr;
- if ( m_handler.errorLine ) {
+ if ( m_handler.errorLine != -1 ) {
QString xmlCode = m_source.data();
QTextStream stream(&xmlCode, QIODevice::ReadOnly);
for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
--- trunk/KDE/kdelibs/khtml/xml/xml_tokenizer.h #978410:978411
@@ -93,8 +93,8 @@
bool fatalError( const QXmlParseException& exception );
- unsigned long errorLine;
- unsigned long errorCol;
+ int errorLine;
+ int errorCol;
private:
void pushNode( DOM::NodeImpl *node );
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic