[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xerces-c-dev
Subject:    [jira] Created: (XERCESC-1527) Problem (bug?) with the DOMBuilder and grammar loading/reusing
From:       "Dejan Miljkovic (JIRA)" <xerces-c-dev () xml ! apache ! org>
Date:       2005-11-24 1:58:55
Message-ID: 1754525894.1132797535751.JavaMail.jira () ajax ! apache ! org
[Download RAW message or body]

Problem (bug?) with the DOMBuilder and grammar loading/reusing 
---------------------------------------------------------------

         Key: XERCESC-1527
         URL: http://issues.apache.org/jira/browse/XERCESC-1527
     Project: Xerces-C++
        Type: Bug
  Components: DOM  
    Versions: 2.6.0    
 Environment: Linux knoppix
    Reporter: Dejan Miljkovic
 Attachments: test.zip

Hi there,

I am using DOMBulder for XML parsing. Since application that I am developing need to \
process many XML messages I would like to load schema only once and than reuse for \
every new message. 

I am using 
parser->setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
parser->loadGrammar("test.xsd", Grammar::SchemaGrammarType, true);

Unfortunately it looks that this method does not work. If I use 
parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, schLoc);

Things are OK but performance suffer greatly since schema is loaded any time when \
parser is called.

You can find complete code example in attachment. I used DOMParser.c example as a \
starting point. Any advice is greatly appreciated

Regards,
Dejan


test.cpp:

#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/parsers/AbstractDOMParser.hpp>
#include <xercesc/dom/DOMImplementation.hpp>
#include <xercesc/dom/DOMImplementationLS.hpp>
#include <xercesc/dom/DOMImplementationRegistry.hpp>
#include <xercesc/dom/DOMBuilder.hpp>
#include <xercesc/dom/DOMException.hpp>
#include <xercesc/dom/DOMDocument.hpp>
#include <xercesc/dom/DOMNodeList.hpp>
#include <xercesc/dom/DOMError.hpp>
#include <xercesc/dom/DOMLocator.hpp>
#include <xercesc/dom/DOMNamedNodeMap.hpp>
#include <xercesc/dom/DOMAttr.hpp>

#include <xercesc/validators/common/Grammar.hpp>

#include "xerces_misc.h"

#if defined(XERCES_NEW_IOSTREAMS)
#include <fstream>
#else
#include <fstream.h>
#endif

int initializeXML4C();

bool errorOccurred = false;
DOMCountErrorHandler errorHandler;

int main(int argc, char* argv[])
{
//	
//Initialize XERCES for C++ before assigning any XERCES variable
//
  initializeXML4C();
  
//
// Instantiate the DOM parser.
//
  static const XMLCh gLS[] = {chLatin_L, chLatin_S, chNull};
  DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(gLS);
  DOMBuilder *parser = \
((DOMImplementationLS*)impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, \
0);

//
// Set parsing parameters.
//
  parser->setFeature(XMLUni::fgDOMNamespaces, true);
  parser->setFeature(XMLUni::fgXercesSchema, true);
  parser->setFeature(XMLUni::fgDOMValidation, true);//Always validate schema

  // enable datatype normalization - default is off
  parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);

  parser->setErrorHandler(&errorHandler); //Create error handler and install it

  //Schema loading.
  try
  {
    XMLCh *schLoc = XMLString::transcode("test.xsd");
    //Remove setProperty(XMLUni::fgXercesS... if figure out how to use grammar.
    //parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, \
schLoc);

    //Checking with gramar loading doesn't work. Checking with schema location is
    //used instead. Down side of this method is that schema is loaded each time
    //when parser is invoked.
    parser->setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
    parser->loadGrammar("test.xsd", Grammar::SchemaGrammarType, true);

    parser->parseURI("test.xml");
  }
  catch (const XMLException& toCatch)
  {
    std::cerr << "\nXML Error during schema file reading : '" << "'\n"
              << "Exception message is:  \n"
              << StrX(toCatch.getMessage()) << "\n" << std::endl;
    errorOccurred = true;
  }
  catch (const DOMException& toCatch)
  {
    const unsigned int maxChars = 2047;
    XMLCh errText[maxChars + 1];

    std::cerr << "\nDOM Error during schema parsing: " << "\n"
              << "DOMException code is:  " << toCatch.code << std::endl;

    if (DOMImplementation::loadDOMExceptionMsg(toCatch.code, errText, maxChars))
    {
      std::cerr << "Message is: " << StrX(errText) << std::endl;
    }
    errorOccurred = true;
  }
  catch (...)
  {
    std::cerr << "\nUnexpected exception during schema parsing: '" << "'\n";
    errorOccurred = true;
  }
  
  parser->release();
  XMLPlatformUtils::Terminate();
  
  return 0;
}

int initializeXML4C()
{
  bool recognizeNEL = false;
  char localeStr[64];
  memset(localeStr, 0, sizeof localeStr);

  try
  {
    if (strlen(localeStr))
    {
      XMLPlatformUtils::Initialize(localeStr);
    }
    else
    {
      XMLPlatformUtils::Initialize();
    }

    if (recognizeNEL)
    {
      XMLPlatformUtils::recognizeNEL(recognizeNEL);
    }
  }
  catch (const XMLException& toCatch)
  {
    std::cerr << "Error during initialization! :\n"
              << StrX(toCatch.getMessage()) << std::endl;
    exit(-1);
  }
  
  return 0;
}

DOMCountErrorHandler::DOMCountErrorHandler() :
    fSawErrors(false)
{
}

DOMCountErrorHandler::~DOMCountErrorHandler()
{
}

// ---------------------------------------------------------------------------
//  DOMCountHandlers: Overrides of the DOM ErrorHandler interface
// ---------------------------------------------------------------------------
bool DOMCountErrorHandler::handleError(const DOMError& domError)
{
  fSawErrors = true;
  if (domError.getSeverity() == DOMError::DOM_SEVERITY_WARNING)
  {
    std::cerr << "\nWarning at file ";
  }
  else if (domError.getSeverity() == DOMError::DOM_SEVERITY_ERROR)
  {
    std::cerr << "\nError at file ";
  }
  else
  {
    std::cerr << "\nFatal Error at file ";
  }

  std::cerr << StrX(domError.getLocation()->getURI())
            << ", line " << domError.getLocation()->getLineNumber()
            << ", char " << domError.getLocation()->getColumnNumber()
            << "\n  Message: " << StrX(domError.getMessage()) << std::endl;

  return true;
}

void DOMCountErrorHandler::resetErrors()
{
  fSawErrors = false;
}

xerces_misc.hpp

/*
 * Copyright 1999-2000,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Log$
 * Revision 1.11  2004/09/08 13:55:31  peiyongz
 * Apache License Version 2.0
 *
 * Revision 1.10  2003/05/30 09:36:35  gareth
 * Use new macros for iostream.h and std:: issues.
 *
 * Revision 1.9  2003/02/05 18:53:22  tng
 * [Bug 11915] Utility for freeing memory.
 *
 * Revision 1.8  2002/11/05 21:46:19  tng
 * Explicit code using namespace in application.
 *
 * Revision 1.7  2002/06/18 16:19:40  knoaman
 * Replace XercesDOMParser with DOMBuilder for parsing XML documents.
 *
 * Revision 1.6  2002/02/01 22:35:01  peiyongz
 * sane_include
 *
 * Revision 1.5  2000/10/20 22:00:35  andyh
 * DOMCount sample Minor cleanup - rename error handler class to say that it is an \
                error handler.
 *
 * Revision 1.4  2000/03/02 19:53:39  roddey
 * This checkin includes many changes done while waiting for the
 * 1.1.0 code to be finished. I can't list them all here, but a list is
 * available elsewhere.
 *
 * Revision 1.3  2000/02/11 02:43:55  abagchi
 * Removed StrX::transcode
 *
 * Revision 1.2  2000/02/06 07:47:17  rahulj
 * Year 2K copyright swat.
 *
 * Revision 1.1.1.1  1999/11/09 01:09:52  twl
 * Initial checkin
 *
 * Revision 1.5  1999/11/08 20:43:35  rahul
 * Swat for adding in Product name and CVS comment log variable.
 *
 */

// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <xercesc/dom/DOMErrorHandler.hpp>
#include <xercesc/util/XMLString.hpp>
#if defined(XERCES_NEW_IOSTREAMS)
#include <iostream>
#else
#include <iostream.h>
#endif

XERCES_CPP_NAMESPACE_USE

// ---------------------------------------------------------------------------
//  Simple error handler deriviative to install on parser
// ---------------------------------------------------------------------------
class DOMCountErrorHandler : public DOMErrorHandler
{
public:
    // -----------------------------------------------------------------------
    //  Constructors and Destructor
    // -----------------------------------------------------------------------
    DOMCountErrorHandler();
    ~DOMCountErrorHandler();


    // -----------------------------------------------------------------------
    //  Getter methods
    // -----------------------------------------------------------------------
    bool getSawErrors() const;


    // -----------------------------------------------------------------------
    //  Implementation of the DOM ErrorHandler interface
    // -----------------------------------------------------------------------
    bool handleError(const DOMError& domError);
    void resetErrors();


private :
    // -----------------------------------------------------------------------
    //  Unimplemented constructors and operators
    // -----------------------------------------------------------------------
    DOMCountErrorHandler(const DOMCountErrorHandler&);
    void operator=(const DOMCountErrorHandler&);


    // -----------------------------------------------------------------------
    //  Private data members
    //
    //  fSawErrors
    //      This is set if we get any errors, and is queryable via a getter
    //      method. Its used by the main code to suppress output if there are
    //      errors.
    // -----------------------------------------------------------------------
    bool    fSawErrors;
};


// ---------------------------------------------------------------------------
//  This is a simple class that lets us do easy (though not terribly efficient)
//  trancoding of XMLCh data to local code page for display.
// ---------------------------------------------------------------------------
class StrX
{
public :
    // -----------------------------------------------------------------------
    //  Constructors and Destructor
    // -----------------------------------------------------------------------
    StrX(const XMLCh* const toTranscode)
    {
        // Call the private transcoding method
        fLocalForm = XMLString::transcode(toTranscode);
    }

    ~StrX()
    {
        XMLString::release(&fLocalForm);
    }


    // -----------------------------------------------------------------------
    //  Getter methods
    // -----------------------------------------------------------------------
    const char* localForm() const
    {
        return fLocalForm;
    }

private :
    // -----------------------------------------------------------------------
    //  Private data members
    //
    //  fLocalForm
    //      This is the local code page form of the string.
    // -----------------------------------------------------------------------
    char*   fLocalForm;
};

inline XERCES_STD_QUALIFIER ostream& operator<<(XERCES_STD_QUALIFIER ostream& target, \
const StrX& toDump) {
    target << toDump.localForm();
    return target;
}

inline bool DOMCountErrorHandler::getSawErrors() const
{
    return fSawErrors;
}



test.xml

<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Person>
  <FirstName>Joe</FirstName>
  <LastName>XXX</LastName>
</Person>

test.xsd

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" 
           elementFormDefault="qualified">

  <xs:element name="FirstName" type="xs:string"/>
  <xs:element name="LastName" type="xs:string"/>

  <xs:element name="Person">
    <xs:complexType>
      <xs:sequence>
        <xs:element ref="FirstName"/>
        <xs:element ref="LastName"/>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
</xs:schema>



-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic