From kde-commits  Wed Mar 31 21:44:19 2010
From: Shaheed Haque <srhaque () theiet ! org>
Date: Wed, 31 Mar 2010 21:44:19 +0000
To: kde-commits
Subject: branches/work/koffice-ko/filters/kword/msword-odf/wv2/src
Message-Id: <20100331214419.CCB87AC888 () svn ! kde ! org>
X-MARC-Message: https://marc.info/?l=kde-commits&m=127007178932725

SVN commit 1109726 by shaheed:

No functional changes:

1. Add comments to Parser to describe the overall structure of the filter.
2. Add comments to Parser9x to describe how this implementation works.
2a. Renamed some private methods to support the new description.


 M  +26 -0     parser.h  
 M  +8 -8      parser9x.cpp  
 M  +31 -6     parser9x.h
--- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser.h #1109725:1109726
@@ -52,6 +52,32 @@
 class AssociatedStrings;
 class Drawings;
 
+/**
+ * This class is the heart of the filter:
+ *<ul>
+ *  <li>
+ *    The layers above it consume the output, primarily using the handlers passed in
+ *    using the setXXXHandler() methods.
+ *  </li>
+ *  <li>
+ *    The layers below handle the specifics of different versions of Word.
+ *  </li>
+ *  <li>
+ *    This layer:
+ *  <ul>
+ *    <li>
+ *        Mostly hides visibility of structures that are specific to any given
+ *        version of Word (a few things like the FIB from Word97 *are* generic enough
+ *        to be used here).
+ *    </li>
+ *    <li>
+ *        Does all the processing during the call to parse(), by calling the provided
+ *        handlers as needed.
+ *    </li>
+ *  </ul>
+ *</li>
+ *</ul>
+ */
 class WV2_EXPORT Parser : public Shared
 {
 public:
--- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser9x.cpp #1109725:1109726
@@ -771,7 +771,7 @@
             length -= disLen;
             index += disLen;
             m_customFootnote = chunk.m_text.substr(index, length);
-            processFootnote( m_customFootnote, disruption, chp, length );
+            emitFootnote( m_customFootnote, disruption, chp, length );
             index+=length;
             length=0;
             m_customFootnote = "";
@@ -792,7 +792,7 @@
     if ( chp->fSpec ) {
         U32 i = 0;
         while ( i < length ) {
-            processSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp );
+            emitSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp );
             ++i;
         }
     }
@@ -802,7 +802,7 @@
     }
 }
 
-void Parser9x::processSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
+void Parser9x::emitSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp )
 {
     switch( character.unicode() ) {
         // Is it one of the "simple" special characters?
@@ -844,7 +844,7 @@
         if ( m_subDocument == Footnote || m_subDocument == Endnote )
             m_textHandler->footnoteAutoNumber( chp );
         else
-            processFootnote( UString(character), globalCP, chp);
+            emitFootnote( UString(character), globalCP, chp);
         break;
     case TextHandler::FieldBegin:
         {
@@ -869,7 +869,7 @@
         }
     case TextHandler::AnnotationRef:
         {
-            processAnnotation(UString(character), globalCP, chp);
+            emitAnnotation(UString(character), globalCP, chp);
         }
     case TextHandler::FieldEscapeChar:
             wvlog << "Found an escape character ++++++++++++++++++++?" << endl;
@@ -881,7 +881,7 @@
 }
 }
 
-void Parser9x::processFootnote( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 /* length */ )
+void Parser9x::emitFootnote( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 /* length */ )
 {
     if ( !m_footnotes ) {
         wvlog << "Bug: Found a footnote, but m_footnotes == 0!" << endl;
@@ -896,7 +896,7 @@
         m_textHandler->footnoteFound( data.type, characters, chp, make_functor( *this, &Parser9x::parseFootnote, data ));
 }
 
-void Parser9x::processAnnotation( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 /* length */ )
+void Parser9x::emitAnnotation( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 /* length */ )
 {
     for (int i = 0; i < characters.length(); ++i) {
         wvlog << characters[i].unicode();
@@ -1151,8 +1151,8 @@
                     z.SetBreak(blip.compressedImageSize());
                     std::vector<U8> outBuffer;
                     int err = z.Decompress( *stream, &outBuffer );
+                    wvlog << "  err=" << err << endl;
 #ifdef WV2_DEBUG_PICTURES
-                    wvlog << "  err=" << err << endl;
                     wvlog << "  outBuffer size = " << outBuffer.size() << endl;
 #endif
                     z.EndCompression(&outBuffer);
--- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser9x.h #1109725:1109726
@@ -211,18 +211,43 @@
         // plain old overloading. It's just a matter of compressed vs. real unicode (1 vs. 2 bytes)
         UString processPieceStringHelper( XCHAR* string, unsigned int start, unsigned int index ) const;
         UString processPieceStringHelper( U8* string, unsigned int start, unsigned int index ) const;
-        // Processes the current contents of the Paragraph structure and clears it when it's done
+
+        /**
+         * The basic structure of a Word text document is a sequence of paragraphs comprising
+         * runs of text with a given set of properties (i.e. a CHP). This model is implemented
+         * by having parse() call processParagraph().
+         *
+         * The processXXX() methods deal with text in blocks:
+         *<ul>
+         *  <li>
+         *  processParagraph() processes the current contents of the Paragraph structure
+         *  and clears it when it's done. Generally calls processChunk().
+         *  </li>
+         *  <li>
+         *  processChunk() processes the section text with a given CHP value. This is what
+         *  processRun(), except that processChunk() also handles points (such as for footnotes
+         *  and endnotes) which are marked by arrays of CPs (located via the FIB). Generally calls
+         *  processRun().
+         *  </li>
+         *  <li>
+         *  processRun() processes the section text with a given CHP value. If CHP.fSpec is set,
+         *  calls emitSpecialCharacter().
+         *  </li>
+         *</ul>
+         */
         void processParagraph( U32 fc );
         void processChunk( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
                            U32 length, U32 index, U32 currentStart );
         void processRun( const Chunk& chunk, SharedPtr<const Word97::CHP> chp,
                          U32 length, U32 index, U32 currentStart );
 
-        void processSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp );
-        void processFootnote( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 length=1 );
-        void processAnnotation( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 length=1 );
-
-        // Helper methods to gather and emit the information needed for the functors
+        /**
+         * Generally, the emitXXX() methods gather and emit the information needed for the
+         * corresponding functors.
+         */
+        void emitSpecialCharacter( UChar character, U32 globalCP, SharedPtr<const Word97::CHP> chp );
+        void emitFootnote( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 length=1 );
+        void emitAnnotation( UString characters, U32 globalCP, SharedPtr<const Word97::CHP> chp, U32 length=1 );
         void emitHeaderData( SharedPtr<const Word97::SEP> sep );
         void emitPictureData( SharedPtr<const Word97::CHP> chp );
         void emitDrawnObject( U32 globalCP );