[prev in list] [next in list] [prev in thread] [next in thread]
List: xerces-cvs
Subject: svn commit: r582785 - in
From: cargilld () apache ! org
Date: 2007-10-08 11:39:31
Message-ID: 20071008113932.4B3501A983A () eris ! apache ! org
[Download RAW message or body]
Author: cargilld
Date: Mon Oct 8 04:39:31 2007
New Revision: 582785
URL: http://svn.apache.org/viewvc?rev=582785&view=rev
Log:
Encode characters for anyuri using xlink 5.4 algorithm. Patch from Gilbert Chan.
Modified:
xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp
xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp
Modified: xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp
URL: http://svn.apache.org/viewvc/xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp?rev=582785&r1=582784&r2=582785&view=diff
==============================================================================
--- xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp \
(original)
+++ xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp \
Mon Oct 8 04:39:31 2007 @@ -22,10 +22,12 @@
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
+#include <stdio.h>
+#include <xercesc/util/OutOfMemoryException.hpp>
+#include <xercesc/util/XMLUTF8Transcoder.hpp>
#include <xercesc/validators/datatype/AnyURIDatatypeValidator.hpp>
#include <xercesc/validators/datatype/InvalidDatatypeFacetException.hpp>
#include <xercesc/validators/datatype/InvalidDatatypeValueException.hpp>
-#include <xercesc/util/OutOfMemoryException.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -67,6 +69,7 @@
void AnyURIDatatypeValidator::checkValueSpace(const XMLCh* const content
, MemoryManager* const manager)
{
+ bool validURI = true;
// check 3.2.17.c0 must: URI (rfc 2396/2723)
try
@@ -75,13 +78,14 @@
// According to Java 1.1: URLs may also be specified with a
// String and the URL object that it is related to.
//
- if (content && *content)
+ const unsigned int len = XMLString::stringLen(content);
+ if (len)
{
- if (!XMLUri::isValidURI(true, content))
- ThrowXMLwithMemMgr1(InvalidDatatypeValueException
- , XMLExcepts::VALUE_URI_Malformed
- , content
- , manager);
+ // Encode special characters using XLink 5.4 algorithm
+ XMLCh* encoded = (XMLCh*)manager->allocate((len*3+1) * sizeof(XMLCh));
+ ArrayJanitor<XMLCh> encodedJan(encoded);
+ encode(content, len, encoded, manager);
+ validURI = XMLUri::isValidURI(true, encoded);
}
}
catch(const OutOfMemoryException&)
@@ -95,7 +99,99 @@
, content
, manager);
}
+
+ if (!validURI) {
+ ThrowXMLwithMemMgr1(InvalidDatatypeValueException
+ , XMLExcepts::VALUE_URI_Malformed
+ , content
+ , manager);
+ }
+}
+
+/***
+ * To encode special characters in anyURI, by using %HH to represent
+ * special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
+ * and non-ASCII characters (whose value >= 128).
+ ***/
+void AnyURIDatatypeValidator::encode(const XMLCh* const content, const unsigned int \
len, XMLCh* encoded, MemoryManager* const manager) +{
+ static const bool needEscapeMap[] = {
+ true , true , true , true , true , true , true , true , true , true , true , \
true , true , true , true , true , /* 0x00 to 0x0F need escape */ + true , \
true , true , true , true , true , true , true , true , true , true , true , true , \
true , true , true , /* 0x10 to 0x1F need escape */ + true , false, true , \
false, false, false, false, false, false, false, false, false, false, false, false, \
false, /* 0x20:' ', 0x22:'"' */ + false, false, false, false, false, false, \
false, false, false, false, false, false, true , false, true , false, /* 0x3C:'<', \
0x3E:'>' */ + false, false, false, false, false, false, false, false, false, \
false, false, false, false, false, false, false, + false, false, false, false, \
false, false, false, false, false, false, false, false, true , false, true , false, \
/* 0x5C:'\\', 0x5E:'^' */ + true , false, false, false, false, false, false, \
false, false, false, false, false, false, false, false, false, /* 0x60:'`' */ + \
false, false, false, false, false, false, false, false, false, false, false, true , \
true , true , true , true /* 0x7B:'{', 0x7C:'|', 0x7D:'}', 0x7E:'~', 0x7F:DEL */ + \
}; +
+ int bufferIndex = 0;
+
+ // For each character in content
+ unsigned int i;
+ for (i = 0; i < len; i++)
+ {
+ int ch = (int)content[i];
+ // If it's not an ASCII character, break here, and use UTF-8 encoding
+ if (ch >= 128)
+ break;
+
+ if (needEscapeMap[ch])
+ {
+ char tempStr[2] = "\0";
+ sprintf(tempStr, "%02X", ch);
+ encoded[bufferIndex++] = '%';
+ encoded[bufferIndex++] = (XMLCh)tempStr[0];
+ encoded[bufferIndex++] = (XMLCh)tempStr[1];
+ }
+ else
+ {
+ encoded[bufferIndex++] = (XMLCh)ch;
+ }
+ }
+
+ // we saw some non-ascii character
+ if (i < len) {
+ // get UTF-8 bytes for the remaining sub-string
+ const XMLCh* remContent = (XMLCh*)&content[i];
+ const unsigned int remContentLen = len - i;
+ XMLByte* UTF8Byte = (XMLByte*)manager->allocate((remContentLen*4+1) * \
sizeof(XMLByte)); + unsigned int charsEaten;
+
+ XMLUTF8Transcoder transcoder(XMLUni::fgUTF8EncodingString, \
remContentLen*4+1, manager); + transcoder.transcodeTo(remContent, \
remContentLen, UTF8Byte, remContentLen*4, charsEaten, XMLTranscoder::UnRep_RepChar); \
+ assert(charsEaten == remContentLen); +
+ unsigned int j;
+ for (j = 0; j < remContentLen; j++) {
+ XMLByte b = UTF8Byte[j];
+ // for non-ascii character: make it positive, then escape
+ if (b < 0) {
+ int ch = b + 256;
+ char tempStr[2] = "\0";
+ sprintf(tempStr, "%02X", ch);
+ encoded[bufferIndex++] = '%';
+ encoded[bufferIndex++] = (XMLCh)tempStr[0];
+ encoded[bufferIndex++] = (XMLCh)tempStr[1];
+ }
+ else if (needEscapeMap[b])
+ {
+ char tempStr[2] = "\0";
+ sprintf(tempStr, "%02X", b);
+ encoded[bufferIndex++] = '%';
+ encoded[bufferIndex++] = (XMLCh)tempStr[0];
+ encoded[bufferIndex++] = (XMLCh)tempStr[1];
+ }
+ else
+ {
+ encoded[bufferIndex++] = (XMLCh)b;
+ }
+ }
+ manager->deallocate(UTF8Byte);
+ }
+ encoded[bufferIndex] = (XMLCh)0;
}
/***
Modified: xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp
URL: http://svn.apache.org/viewvc/xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp?rev=582785&r1=582784&r2=582785&view=diff
==============================================================================
--- xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp \
(original)
+++ xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp \
Mon Oct 8 04:39:31 2007 @@ -81,6 +81,7 @@
// -----------------------------------------------------------------------
AnyURIDatatypeValidator(const AnyURIDatatypeValidator&);
AnyURIDatatypeValidator& operator=(const AnyURIDatatypeValidator&);
+ void encode(const XMLCh* const content, const unsigned int len, XMLCh* encoded, \
MemoryManager* const manager); };
XERCES_CPP_NAMESPACE_END
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic