[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kupu-checkins
Subject:    [kupu-checkins] r36780 - kupu/trunk/kupu/plone
From:       duncan () codespeak ! net
Date:       2007-01-15 15:35:45
Message-ID: 20070115153545.54F55100C6 () code0 ! codespeak ! net
[Download RAW message or body]

Author: duncan
Date: Mon Jan 15 16:35:42 2007
New Revision: 36780

Modified:
   kupu/trunk/kupu/plone/html2captioned.py
Log:
Make entity decoding a bit more robust.

Modified: kupu/trunk/kupu/plone/html2captioned.py
==============================================================================
--- kupu/trunk/kupu/plone/html2captioned.py	(original)
+++ kupu/trunk/kupu/plone/html2captioned.py	Mon Jan 15 16:35:42 2007
@@ -17,6 +17,8 @@
 from urllib import unquote_plus, quote_plus
 from Acquisition import aq_base
 from htmlentitydefs import name2codepoint
+name2codepoint = name2codepoint.copy()
+name2codepoint['apos']=ord("'")
 
 __revision__ = '$Id$'
 
@@ -622,11 +624,14 @@
 EntityPattern = re.compile('&(?:#(\d+)|([a-zA-Z]+));')
 def decodeEntities(s, encoding='utf-8'):
     def unescape(match):
-	code = match.group(1)
+        code = match.group(1)
         if code:
             return unichr(int(code, 10))
         else:
             code = match.group(2)
-            return unichr(name2codepoint[code])
+            if code:
+                return unichr(int(code, 16))
+            else:
+                return unichr(name2codepoint[match.group(3)])
 
     return EntityPattern.sub(unescape, s)

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic