[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    KDE/kdelibs
From:       Germain Garand <germain () ebooksfrance ! org>
Date:       2009-06-07 0:13:38
Message-ID: 1244333618.461001.26420.nullmailer () svn ! kde ! org
[Download RAW message or body]

SVN commit 978411 by ggarand:

.abort XML parsing when invalid characters were found in the input stream
 (acid3-70)
.add helper methods to KEncodingDetector for reporting such errors
.add unit test asserting some KEncodingDetector properties

kindly reviewed by Maksim Orlovich.

CCBUG: 156947


 M  +18 -0     kdecore/localization/kencodingdetector.cpp  
 M  +25 -0     kdecore/localization/kencodingdetector.h  
 M  +1 -0      kdecore/tests/CMakeLists.txt  
 M  +2 -3      khtml/khtml_part.cpp  
 M  +1 -0      khtml/xml/dom_docimpl.cpp  
 M  +4 -0      khtml/xml/dom_docimpl.h  
 M  +12 -2     khtml/xml/xml_tokenizer.cpp  
 M  +2 -2      khtml/xml/xml_tokenizer.h  


--- trunk/KDE/kdelibs/kdecore/localization/kencodingdetector.cpp #978410:978411
@@ -696,6 +696,19 @@
     return d->m_decoder;
 }
 
+void KEncodingDetector::resetDecoder()
+{
+    assert(d->m_defaultCodec);
+    d->m_bufferForDefferedEncDetection.clear();
+    d->m_writtingHappened = false;
+    d->m_analyzeCalled = false;
+    d->m_multiByte = 0;
+    delete d->m_decoder;
+    if (!d->m_codec)
+        d->m_codec = d->m_defaultCodec;
+    d->m_decoder = d->m_codec->makeDecoder();
+}
+
 bool KEncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource \
type)  {
     QTextCodec *codec;
@@ -840,6 +853,11 @@
     return QString();
 }
 
+bool KEncodingDetector::decodedInvalidCharacters() const
+{
+    return d->m_decoder ? d->m_decoder->hasFailure() : false;
+}
+
 QString KEncodingDetector::flush()
 {
     if (d->m_bufferForDefferedEncDetection.isEmpty())
--- trunk/KDE/kdelibs/kdecore/localization/kencodingdetector.h #978410:978411
@@ -148,6 +148,31 @@
     QString decodeWithBuffering(const char *data, int len);
 
     /**
+     * This method checks whether invalid characters were found
+     * during a decoding operation.
+     *
+     * Note that this bit is never reset once invalid characters have been found.
+     * To force a reset, either change the encoding using setEncoding() or call
+     * resetDecoder()
+     * 
+     * @returns a boolean reflecting said state.
+     * @since 4.3
+     * @see resetDecoder() setEncoding()
+     */    
+    bool decodedInvalidCharacters() const;
+
+    /**
+     * Resets the decoder. Any stateful decoding information (such as resulting from \
previous calls +     * to decodeWithBuffering()) will be lost.
+     * Will Reset the state of decodedInvalidCharacters() as a side effect.
+     *
+     * @since 4.3
+     * @see decodeWithBuffering() decodedInvalidCharacters()
+     *
+     */ 
+    void resetDecoder();
+
+    /**
     * Convenience method to be used with decodeForHtml. Flushes buffer.
     * @see decodeForHtml()
     */
--- trunk/KDE/kdelibs/kdecore/tests/CMakeLists.txt #978410:978411
@@ -57,6 +57,7 @@
  ktcpsockettest
  ksycocathreadtest
  kdebug_unittest
+ kencodingdetectortest
 )
 
 if(UNIX)
--- trunk/KDE/kdelibs/khtml/khtml_part.cpp #978410:978411
@@ -6840,10 +6840,9 @@
             ? QByteArray( parentPart()->d->m_decoder->encoding() ) : \
                settings()->encoding().toLatin1();
         dec->setEncoding(defaultEncoding.constData(), \
KEncodingDetector::DefaultEncoding);  }
-#ifdef APPLE_CHANGES
+
     if (d->m_doc)
-        d->m_doc->setDecoder(d->m_decoder);
-#endif
+        d->m_doc->setDecoder(dec);
     dec->setAutoDetectLanguage( d->m_autoDetectLanguage );
     return dec;
 }
--- trunk/KDE/kdelibs/khtml/xml/dom_docimpl.cpp #978410:978411
@@ -409,6 +409,7 @@
     m_docChanged = false;
     m_elemSheet = 0;
     m_tokenizer = 0;
+    m_decoder = 0;
     m_doctype = 0;
     m_implementation = _implementation;
     m_implementation->ref();
--- trunk/KDE/kdelibs/khtml/xml/dom_docimpl.h #978410:978411
@@ -50,6 +50,7 @@
 class QTextCodec;
 class KHTMLView;
 class QEventLoop;
+class KEncodingDetector;
 
 namespace khtml {
     class Tokenizer;
@@ -370,6 +371,8 @@
     CSSStyleSheetImpl* elementSheet();
     virtual khtml::Tokenizer *createTokenizer();
     khtml::Tokenizer *tokenizer() { return m_tokenizer; }
+    KEncodingDetector* decoder() { return m_decoder; }
+    void setDecoder(KEncodingDetector* enc) { m_decoder = enc; }
 
     void setPaintDevice(QPaintDevice *dev){m_paintDevice = dev;}
     QPaintDevice *paintDevice() const {return m_paintDevice;}
@@ -613,6 +616,7 @@
 
     khtml::DocLoader *m_docLoader;
     khtml::Tokenizer *m_tokenizer;
+    KEncodingDetector *m_decoder;
     KUrl m_url;
     KUrl m_baseURL;
     QString m_baseTarget;
--- trunk/KDE/kdelibs/khtml/xml/xml_tokenizer.cpp #978410:978411
@@ -37,6 +37,7 @@
 #include <QtCore/QVariant>
 #include <kdebug.h>
 #include <klocale.h>
+#include <kencodingdetector.h>
 
 // SVG includes
 #include "svg/SVGScriptElement.h"
@@ -96,7 +97,7 @@
 }
 
 XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
-    : errorLine(0)
+    : errorLine(-1)
 {
     m_doc = _doc;
     m_view = _view;
@@ -464,6 +465,7 @@
 {
     if ( !m_noErrors && appendData )
         return;
+ 
     // check if we try to re-enter inside write()
     // if so buffer the data
     if (m_insideWrite) {
@@ -480,6 +482,14 @@
     }
     m_noErrors = m_reader.parseContinue();
 
+    if (m_doc->decoder() && m_doc->decoder()->decodedInvalidCharacters()) {
+        // any invalid character spotted by the decoder is fatal, per XML 1.0 spec. \
Tested by Acid 3 - 70 +        m_handler.fatalError( QXmlParseException( \
m_handler.errorString() ) );  // ### FIXME: make that more informative after string \
freeze : i18n("input stream contains invalid characters") +        m_noErrors = \
false; +        finish();
+        return;
+    }
+
     // check if while parsing we tried to re-enter write() method so now we have \
some buffered data we need to write to document  while (m_noErrors && \
!m_bufferedData.isEmpty()) {  m_source.appendXML(m_bufferedData);
@@ -520,7 +530,7 @@
             static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
  
         QString line, errorLocPtr;
-        if ( m_handler.errorLine ) {
+        if ( m_handler.errorLine != -1 ) {
             QString xmlCode = m_source.data();
             QTextStream stream(&xmlCode, QIODevice::ReadOnly);
             for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
--- trunk/KDE/kdelibs/khtml/xml/xml_tokenizer.h #978410:978411
@@ -93,8 +93,8 @@
 
     bool fatalError( const QXmlParseException& exception );
 
-    unsigned long errorLine;
-    unsigned long errorCol;
+    int errorLine;
+    int errorCol;
 
 private:
     void pushNode( DOM::NodeImpl *node );


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic