From kde-commits Wed Mar 31 21:44:19 2010 From: Shaheed Haque Date: Wed, 31 Mar 2010 21:44:19 +0000 To: kde-commits Subject: branches/work/koffice-ko/filters/kword/msword-odf/wv2/src Message-Id: <20100331214419.CCB87AC888 () svn ! kde ! org> X-MARC-Message: https://marc.info/?l=kde-commits&m=127007178932725 SVN commit 1109726 by shaheed: No functional changes: 1. Add comments to Parser to describe the overall structure of the filter. 2. Add comments to Parser9x to describe how this implementation works. 2a. Renamed some private methods to support the new description. M +26 -0 parser.h M +8 -8 parser9x.cpp M +31 -6 parser9x.h --- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser.h #1109725:1109726 @@ -52,6 +52,32 @@ class AssociatedStrings; class Drawings; +/** + * This class is the heart of the filter: + * + */ class WV2_EXPORT Parser : public Shared { public: --- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser9x.cpp #1109725:1109726 @@ -771,7 +771,7 @@ length -= disLen; index += disLen; m_customFootnote = chunk.m_text.substr(index, length); - processFootnote( m_customFootnote, disruption, chp, length ); + emitFootnote( m_customFootnote, disruption, chp, length ); index+=length; length=0; m_customFootnote = ""; @@ -792,7 +792,7 @@ if ( chp->fSpec ) { U32 i = 0; while ( i < length ) { - processSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp ); + emitSpecialCharacter( chunk.m_text[ index + i ], currentStart + chunk.m_position.offset + index + i, chp ); ++i; } } @@ -802,7 +802,7 @@ } } -void Parser9x::processSpecialCharacter( UChar character, U32 globalCP, SharedPtr chp ) +void Parser9x::emitSpecialCharacter( UChar character, U32 globalCP, SharedPtr chp ) { switch( character.unicode() ) { // Is it one of the "simple" special characters? @@ -844,7 +844,7 @@ if ( m_subDocument == Footnote || m_subDocument == Endnote ) m_textHandler->footnoteAutoNumber( chp ); else - processFootnote( UString(character), globalCP, chp); + emitFootnote( UString(character), globalCP, chp); break; case TextHandler::FieldBegin: { @@ -869,7 +869,7 @@ } case TextHandler::AnnotationRef: { - processAnnotation(UString(character), globalCP, chp); + emitAnnotation(UString(character), globalCP, chp); } case TextHandler::FieldEscapeChar: wvlog << "Found an escape character ++++++++++++++++++++?" << endl; @@ -881,7 +881,7 @@ } } -void Parser9x::processFootnote( UString characters, U32 globalCP, SharedPtr chp, U32 /* length */ ) +void Parser9x::emitFootnote( UString characters, U32 globalCP, SharedPtr chp, U32 /* length */ ) { if ( !m_footnotes ) { wvlog << "Bug: Found a footnote, but m_footnotes == 0!" << endl; @@ -896,7 +896,7 @@ m_textHandler->footnoteFound( data.type, characters, chp, make_functor( *this, &Parser9x::parseFootnote, data )); } -void Parser9x::processAnnotation( UString characters, U32 globalCP, SharedPtr chp, U32 /* length */ ) +void Parser9x::emitAnnotation( UString characters, U32 globalCP, SharedPtr chp, U32 /* length */ ) { for (int i = 0; i < characters.length(); ++i) { wvlog << characters[i].unicode(); @@ -1151,8 +1151,8 @@ z.SetBreak(blip.compressedImageSize()); std::vector outBuffer; int err = z.Decompress( *stream, &outBuffer ); + wvlog << " err=" << err << endl; #ifdef WV2_DEBUG_PICTURES - wvlog << " err=" << err << endl; wvlog << " outBuffer size = " << outBuffer.size() << endl; #endif z.EndCompression(&outBuffer); --- branches/work/koffice-ko/filters/kword/msword-odf/wv2/src/parser9x.h #1109725:1109726 @@ -211,18 +211,43 @@ // plain old overloading. It's just a matter of compressed vs. real unicode (1 vs. 2 bytes) UString processPieceStringHelper( XCHAR* string, unsigned int start, unsigned int index ) const; UString processPieceStringHelper( U8* string, unsigned int start, unsigned int index ) const; - // Processes the current contents of the Paragraph structure and clears it when it's done + + /** + * The basic structure of a Word text document is a sequence of paragraphs comprising + * runs of text with a given set of properties (i.e. a CHP). This model is implemented + * by having parse() call processParagraph(). + * + * The processXXX() methods deal with text in blocks: + *
    + *
  • + * processParagraph() processes the current contents of the Paragraph structure + * and clears it when it's done. Generally calls processChunk(). + *
  • + *
  • + * processChunk() processes the section text with a given CHP value. This is what + * processRun(), except that processChunk() also handles points (such as for footnotes + * and endnotes) which are marked by arrays of CPs (located via the FIB). Generally calls + * processRun(). + *
  • + *
  • + * processRun() processes the section text with a given CHP value. If CHP.fSpec is set, + * calls emitSpecialCharacter(). + *
  • + *
+ */ void processParagraph( U32 fc ); void processChunk( const Chunk& chunk, SharedPtr chp, U32 length, U32 index, U32 currentStart ); void processRun( const Chunk& chunk, SharedPtr chp, U32 length, U32 index, U32 currentStart ); - void processSpecialCharacter( UChar character, U32 globalCP, SharedPtr chp ); - void processFootnote( UString characters, U32 globalCP, SharedPtr chp, U32 length=1 ); - void processAnnotation( UString characters, U32 globalCP, SharedPtr chp, U32 length=1 ); - - // Helper methods to gather and emit the information needed for the functors + /** + * Generally, the emitXXX() methods gather and emit the information needed for the + * corresponding functors. + */ + void emitSpecialCharacter( UChar character, U32 globalCP, SharedPtr chp ); + void emitFootnote( UString characters, U32 globalCP, SharedPtr chp, U32 length=1 ); + void emitAnnotation( UString characters, U32 globalCP, SharedPtr chp, U32 length=1 ); void emitHeaderData( SharedPtr sep ); void emitPictureData( SharedPtr chp ); void emitDrawnObject( U32 globalCP );