SVN commit 763246 by orlovich: Simplify and robustify the TreeWalker implementation somewhat, fixing a couple of bugs along the way. It actually seems very close to right (unlike the utterly wrong impl WebCore has :-) ) M +171 -188 dom2_traversalimpl.cpp M +16 -10 dom2_traversalimpl.h --- branches/KDE/4.0/kdelibs/khtml/xml/dom2_traversalimpl.cpp #763245:763246 @@ -4,6 +4,7 @@ * (C) 1999 Lars Knoll (knoll@kde.org) * (C) 2000 Frederik Holljen (frederik.holljen@hig.no) * (C) 2001 Peter Kelly (pmk@post.com) + * (C) 2008 Maksim Orlovich (maksim@kde.org) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -24,6 +25,14 @@ #include "dom/dom_exception.h" #include "xml/dom_docimpl.h" +/** + Robustness notes: we protect the object we hold on to, to + prevent crashes. To ensure termination in JS used, we propagate exceptions, + and rely on the JS CPU guard to set one to force us to bail out. + + ### TODO/CHECKONE +*/ + using namespace DOM; NodeIteratorImpl::NodeIteratorImpl(NodeImpl *_root, unsigned long _whatToShow, @@ -316,7 +325,7 @@ NodeImpl *TreeWalkerImpl::getRoot() const { - return m_rootNode; + return m_rootNode.get(); } unsigned long TreeWalkerImpl::getWhatToShow() const @@ -336,7 +345,7 @@ NodeImpl *TreeWalkerImpl::getCurrentNode() const { - return m_currentNode; + return m_currentNode.get(); } void TreeWalkerImpl::setWhatToShow(long _whatToShow) @@ -371,81 +380,85 @@ NodeImpl *TreeWalkerImpl::parentNode( ) { - NodeImpl *n = getParentNode( m_currentNode ); + NodePtr n = getParentNode( m_currentNode ); if ( n ) m_currentNode = n; - return n; + return n.get(); } NodeImpl *TreeWalkerImpl::firstChild( ) { - NodeImpl *n = getFirstChild( m_currentNode ); + NodePtr n = getFirstChild( m_currentNode ); if ( n ) m_currentNode = n; - return n; + return n.get(); } NodeImpl *TreeWalkerImpl::lastChild( ) { - NodeImpl *n = getLastChild(m_currentNode); + NodePtr n = getLastChild(m_currentNode); if( n ) m_currentNode = n; - return n; + return n.get(); } NodeImpl *TreeWalkerImpl::previousSibling( ) { - NodeImpl *n = getPreviousSibling( m_currentNode ); + NodePtr n = getPreviousSibling( m_currentNode ); if( n ) m_currentNode = n; - return n; + return n.get(); } NodeImpl *TreeWalkerImpl::nextSibling( ) { - NodeImpl *n = getNextSibling( m_currentNode ); + NodePtr n = getNextSibling( m_currentNode ); if( n ) m_currentNode = n; - return n; + return n.get(); } +NodeImpl *TreeWalkerImpl::nextNode( ) +{ + NodePtr n = getNextNode(); + if( n ) + m_currentNode = n; + return n.get(); +} + NodeImpl *TreeWalkerImpl::previousNode( ) { -/* 1. my previous sibling.lastchild - * 2. my previous sibling + NodePtr n = getPreviousNode(); + if( n ) + m_currentNode = n; + return n.get(); +} + +TreeWalkerImpl::NodePtr TreeWalkerImpl::getPreviousNode( ) +{ +/* 1. last node in my previous sibling's subtree + * 2. my previous sibling (special case of the above) * 3. my parent */ - NodeImpl *n = getPreviousSibling( m_currentNode ); - if( !n ) - { - n = getParentNode( m_currentNode ); - if( n ) //parent - { - m_currentNode = n; - return m_currentNode; + NodePtr n = getPreviousSibling(m_currentNode); + if (n) { + // Find the last kid in the tree's preorder traversal, if any, + // by following the lastChild links. + NodePtr desc = getLastChild(n); + while (desc) { + n = desc; + desc = getLastChild(desc); } - else // parent failed.. no previous node - return 0; + return n; } - NodeImpl *child = getLastChild( n ); - if( child ) // previous siblings last child - { - m_currentNode = child; - return m_currentNode; - } - else // previous sibling - { - m_currentNode = n; - return m_currentNode; - } - return 0; // should never get here! + return getParentNode(n); } -NodeImpl *TreeWalkerImpl::nextNode( ) +TreeWalkerImpl::NodePtr TreeWalkerImpl::getNextNode() { /* 1. my first child * 2. my next sibling @@ -453,215 +466,185 @@ * 4. not found */ - NodeImpl *n = getFirstChild( m_currentNode ); - if( n ) // my first child - { - m_currentNode = n; + NodePtr n = getFirstChild(m_currentNode); + if (n) // my first child return n; - } - n = getNextSibling( m_currentNode ); // my next sibling - if( n ) + n = getNextSibling(m_currentNode); // my next sibling + if (n) + return n; + + NodePtr parent = getParentNode(m_currentNode); + while (parent) // parent's sibling { - m_currentNode = n; - return m_currentNode; + n = getNextSibling(parent); + if (n) + return n; + parent = getParentNode(parent); } - NodeImpl *parent = getParentNode( m_currentNode ); - while( parent ) // parents sibling - { - n = getNextSibling( parent ); - if( n ) - { - m_currentNode = n; - return m_currentNode; - } - else - parent = getParentNode( parent ); - } return 0; } -short TreeWalkerImpl::isAccepted(NodeImpl *n) +short TreeWalkerImpl::isAccepted(TreeWalkerImpl::NodePtr n) { // if XML is implemented we have to check expandEntityRerefences in this function if( ( ( 1 << ( n->nodeType()-1 ) ) & m_whatToShow) != 0 ) { if(m_filter) - return m_filter->acceptNode(n); + return m_filter->acceptNode(n.get()); else return NodeFilter::FILTER_ACCEPT; } return NodeFilter::FILTER_SKIP; } -NodeImpl *TreeWalkerImpl::getParentNode(NodeImpl *n) -{ - short _result = NodeFilter::FILTER_ACCEPT; +/** + This is quite a bit more complicated than it would at first seem, due to the following note: + "Note that the structure of a TreeWalker's filtered view of a document may differ significantly from that of the document itself. For example, a TreeWalker with only SHOW_TEXT specified in its whatToShow parameter would present all the Text nodes as if they were siblings of each other yet had no parent." - if( n == m_rootNode /*|| !n*/ ) - return 0; + My read of this is that e.g. that when a node is FILTER_SKIP'd, its children are handled + as the children of its parent. -M.O. +*/ - NodeImpl *_tempCurrent = n->parentNode(); - - if( !_tempCurrent ) - return 0; - - _result = isAccepted( _tempCurrent ); - if ( _result == NodeFilter::FILTER_ACCEPT ) - return _tempCurrent; // match found - - return getParentNode( _tempCurrent ); -} - -NodeImpl *TreeWalkerImpl::getFirstChild(NodeImpl *n) +TreeWalkerImpl::NodePtr TreeWalkerImpl::getParentNode(TreeWalkerImpl::NodePtr n) { - short _result; + NodePtr cursor = n; - if( !n || !n->firstChild() ) - return 0; - n = n->firstChild(); - - _result = isAccepted( n ); - - switch( _result ) - { - case NodeFilter::FILTER_ACCEPT: - return n; - break; - case NodeFilter::FILTER_SKIP: - if( n->hasChildNodes() ) - return getFirstChild( n ); + // Walk up, to find the first visible node != n, until we run out of + // document or into the root (which we don't have to be inside of!) + while (cursor && cursor != m_rootNode) { + if (cursor != n && isAccepted(cursor) == NodeFilter::FILTER_ACCEPT) + return cursor; else - return getNextSibling( n ); - break; - - case NodeFilter::FILTER_REJECT: - return getNextSibling( n ); - break; + cursor = cursor->parentNode(); } - return 0; // should never get here! + + return 0; } -NodeImpl *TreeWalkerImpl::getLastChild(NodeImpl *n) +TreeWalkerImpl::NodePtr TreeWalkerImpl::getFirstChild(TreeWalkerImpl::NodePtr n) { - short _result; + NodePtr cursor; + for (cursor = n->firstChild(); cursor; cursor = cursor->nextSibling()) { + switch (isAccepted(cursor)) { + case NodeFilter::FILTER_ACCEPT: + return cursor; + case NodeFilter::FILTER_SKIP: { + // We ignore this candidate proper, but perhaps it has a kid? + NodePtr cand = getFirstChild(cursor); + if (cand) + return cand; + break; + } + case NodeFilter::FILTER_REJECT: + // It effectively doesn't exist.. + break; + } + // ### this is unclear: what should happen if we "pass through" the root?? + } - if( !n || !n->lastChild() ) - return 0; - n = n->lastChild(); - _result = isAccepted( n ); + // Nothing found.. + return 0; +} - switch( _result ) - { - case NodeFilter::FILTER_ACCEPT: - return n; - break; - - case NodeFilter::FILTER_SKIP: - if( n->hasChildNodes() ) - return getLastChild( n ); - else - return getPreviousSibling( n ); - break; - - case NodeFilter::FILTER_REJECT: - return getPreviousSibling( n ); - break; +TreeWalkerImpl::NodePtr TreeWalkerImpl::getLastChild(TreeWalkerImpl::NodePtr n) +{ + TreeWalkerImpl::NodePtr cursor; + for (cursor = n->lastChild(); cursor; cursor = cursor->previousSibling()) { + switch (isAccepted(cursor)) { + case NodeFilter::FILTER_ACCEPT: + return cursor; + case NodeFilter::FILTER_SKIP: { + // We ignore this candidate proper, but perhaps it has a kid? + TreeWalkerImpl::NodePtr cand = getLastChild(cursor); + if (cand) + return cand; + break; + } + case NodeFilter::FILTER_REJECT: + // It effectively doesn't exist.. + break; + } + // ### this is unclear: what should happen if we "pass through" the root?? } + + // Nothing found.. return 0; } -NodeImpl *TreeWalkerImpl::getPreviousSibling(NodeImpl *n) +TreeWalkerImpl::NodePtr TreeWalkerImpl::getNextSibling(TreeWalkerImpl::NodePtr n) { - short _result; - NodeImpl *_tempCurrent; - - if( !n ) + // If we're the root node, clearly we don't have any siblings. + if (n == m_rootNode) return 0; - //first the cases if we have a previousSibling - _tempCurrent = n->previousSibling(); - if( _tempCurrent ) - { - _result = isAccepted( _tempCurrent ); - switch ( _result ) - { + + // What would can our siblings be? Well, they can be our actual siblings, + // or if those are 'skip' their first kid. We may also have to walk up + // through any skipped nodes. (The behavior of going through rejected nodes is unspecified, + // we'll keep backwards compat and stop). + NodePtr cursor; + for (cursor = n->nextSibling(); cursor; cursor = cursor->nextSibling()) { + switch (isAccepted(cursor)) { case NodeFilter::FILTER_ACCEPT: - return _tempCurrent; + return cursor; + case NodeFilter::FILTER_SKIP: { + NodePtr cand = getFirstChild(cursor); + if (cand) + return cand; break; - - case NodeFilter::FILTER_SKIP: - { - NodeImpl *nskip = getLastChild( _tempCurrent ); - if( nskip ) - return nskip; - return getPreviousSibling( _tempCurrent ); - break; } - case NodeFilter::FILTER_REJECT: - return getPreviousSibling( _tempCurrent ); break; } } - // now the case if we don't have previous sibling - else - { - _tempCurrent = n->parentNode(); - if( !_tempCurrent || _tempCurrent == m_rootNode) - return 0; - _result = isAccepted( _tempCurrent ); - if ( _result == NodeFilter::FILTER_SKIP ) - return getPreviousSibling( _tempCurrent ); + // Now, we have scanned through all of our parent's kids. Our siblings may also be + // above if we could have skipped the parent, and are not captured by it. + NodePtr parent = n->parentNode(); + if (!parent || parent == m_rootNode) return 0; - } - return 0; // should never get here! + /* "In particular: If the currentNode becomes part of a subtree that would otherwise have + been Rejected by the filter, that entire subtree may be added as transient members of the + logical view. You will be able to navigate within that subtree (subject to all the usual + filtering) until you move upward past the Rejected ancestor. The behavior is as if the Rejected + node had only been Skipped (since we somehow wound up inside its subtree) until we leave it; + thereafter, standard filtering applies.*/ + if (isAccepted(parent) == NodeFilter::FILTER_ACCEPT) + return 0; + + return getNextSibling(parent); } -NodeImpl *TreeWalkerImpl::getNextSibling(NodeImpl *n) +TreeWalkerImpl::NodePtr TreeWalkerImpl::getPreviousSibling(TreeWalkerImpl::NodePtr n) { - NodeImpl *_tempCurrent = 0; - short _result; - - if( !n ) + // See the above for all the comments :-) + if (n == m_rootNode) return 0; - _tempCurrent = n->nextSibling(); - if( _tempCurrent ) - { - _result = isAccepted( _tempCurrent ); - switch ( _result ) - { + NodePtr cursor; + for (cursor = n->previousSibling(); cursor; cursor = cursor->previousSibling()) { + switch (isAccepted(cursor)) { case NodeFilter::FILTER_ACCEPT: - return _tempCurrent; + return cursor; + case NodeFilter::FILTER_SKIP: { + NodePtr cand = getLastChild(cursor); + if (cand) + return cand; break; - - case NodeFilter::FILTER_SKIP: - { - NodeImpl *nskip = getFirstChild( _tempCurrent ); - if( nskip ) - return nskip; - return getNextSibling( _tempCurrent ); - break; } - case NodeFilter::FILTER_REJECT: - return getNextSibling( _tempCurrent ); break; } } - else - { - _tempCurrent = n->parentNode(); - if( !_tempCurrent || _tempCurrent == m_rootNode) - return 0; - _result = isAccepted( _tempCurrent ); - if( _result == NodeFilter::FILTER_SKIP ) - return getNextSibling( _tempCurrent ); + NodePtr parent = n->parentNode(); + if (!parent || parent == m_rootNode) return 0; - } - return 0; + + if (isAccepted(parent) == NodeFilter::FILTER_ACCEPT) + return 0; + + return getPreviousSibling(parent); } --- branches/KDE/4.0/kdelibs/khtml/xml/dom2_traversalimpl.h #763245:763246 @@ -137,14 +137,21 @@ void setFilter(NodeFilterImpl *_filter); void setExpandEntityReferences(bool value); - NodeImpl *getParentNode(NodeImpl *n); - NodeImpl *getFirstChild(NodeImpl *n); - NodeImpl *getLastChild(NodeImpl *n); - NodeImpl *getPreviousSibling(NodeImpl *n); - NodeImpl *getNextSibling(NodeImpl *n); + typedef SharedPtr NodePtr; // lazy Maks... + + // These methods attempt to find the next node in given direction from + // the given reference point, w/o affecting the current node. + NodePtr getParentNode(NodePtr n); + NodePtr getFirstChild(NodePtr n); + NodePtr getLastChild(NodePtr n); + NodePtr getPreviousSibling(NodePtr n); + NodePtr getNextSibling(NodePtr n); - short isAccepted(NodeImpl *n); + NodePtr getNextNode(); + NodePtr getPreviousNode(); + short isAccepted(NodePtr n); + protected: /** * This attribute determines which node types are presented via @@ -184,10 +191,9 @@ * type. * */ - NodeImpl *m_currentNode; - - NodeImpl *m_rootNode; - DocumentImpl *m_doc; + SharedPtr m_currentNode; + SharedPtr m_rootNode; + DocumentImpl *m_doc; // always alive as long as the root is... };