'[Mailman-checkins] [Branch ~mailman-coders/mailman/2.1] Rev 1536: Added character set recoding to ut'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       mailman-cvs
Subject:    [Mailman-checkins] [Branch ~mailman-coders/mailman/2.1] Rev 1536: Added character set recoding to ut
From:       noreply () launchpad ! net
Date:       2015-02-07 20:46:21
Message-ID: 20150207204621.26474.15521.launchpad () ackee ! canonical ! com
[Download RAW message or body]

------------------------------------------------------------
revno: 1536
tags: 2.1.19rc3
committer: Mark Sapiro <mark@msapiro.net>
branch nick: 2.1
timestamp: Sat 2015-02-07 12:44:08 -0800
message:
  Added character set recoding to utf-8 for list attributes for Romanian and
  Russian lists and bumped versions to 2.1.19rc3.
modified:
  Mailman/Version.py
  Mailman/versions.py
  NEWS


--
lp:mailman/2.1
https://code.launchpad.net/~mailman-coders/mailman/2.1

Your team Mailman Checkins is subscribed to branch lp:mailman/2.1.
To unsubscribe from this branch go to https://code.launchpad.net/~mailman-c=
oders/mailman/2.1/+edit-subscription

["revision-diff.txt" (revision-diff.txt)]

=== modified file 'Mailman/Version.py'
--- Mailman/Version.py	2015-02-07 03:33:40 +0000
+++ Mailman/Version.py	2015-02-07 20:44:08 +0000
@@ -16,7 +16,7 @@
 # USA.
 
 # Mailman version
-VERSION = '2.1.19rc2'
+VERSION = '2.1.19rc3'
 
 # And as a hex number in the manner of PY_VERSION_HEX
 ALPHA = 0xa
@@ -31,13 +31,13 @@
 MICRO_REV = 19
 REL_LEVEL = GAMMA
 # at most 15 beta releases!
-REL_SERIAL = 2
+REL_SERIAL = 3
 
 HEX_VERSION = ((MAJOR_REV << 24) | (MINOR_REV << 16) | (MICRO_REV << 8) |
                (REL_LEVEL << 4)  | (REL_SERIAL << 0))
 
 # config.pck schema version number
-DATA_FILE_VERSION = 107
+DATA_FILE_VERSION = 108
 
 # qfile/*.db schema version number
 QFILE_SCHEMA_VERSION = 3

=== modified file 'Mailman/versions.py'
--- Mailman/versions.py	2015-01-24 01:35:45 +0000
+++ Mailman/versions.py	2015-02-07 20:44:08 +0000
@@ -93,6 +93,93 @@
         if not hasattr(l, newname) and newdefault is not uniqueval:
                 setattr(l, newname, newdefault)
 
+    def recode(mlist, f, t):
+        """If the character set for a list's preferred_language has changed,
+        attempt to recode old string values into the new character set.
+
+        mlist is the list, f is the old charset and t is the new charset.
+        """
+        for x in dir(mlist):
+            if x.startswith('_'):
+                continue
+            nv = doitem(getattr(mlist, x), f, t)
+            if nv:
+                setattr(mlist, x, nv)
+
+    def doitem(v, f, t):
+        """Recursively process lists, tuples and dictionary values and
+        convert strings as needed. Return either the updated item or None
+        if no change."""
+        if isinstance(v, str):
+            nv = convert(v, f, t)
+            if nv == v:
+                return None
+            else:
+                return nv
+        elif isinstance(v, list):
+            changed = False
+            nl = []
+            for i in range(len(v)):
+                nv = doitem(v[i], f, t)
+                if nv:
+                    changed = True
+                    nl += [nv]
+                else:
+                    nl += v[i]
+            if changed:
+                return nl
+            else:
+                return None
+        elif isinstance(v, tuple):
+            changed = False
+            nt = ()
+            for i in range(len(v)):
+                nv = doitem(v[i], f, t)
+                if nv:
+                    changed = True
+                    nt += (nv,)
+                else:
+                    nt += (v[i],)
+            if changed:
+                return nt
+            else:
+                return None
+        elif isinstance(v, dict):
+            changed = False
+            nd = {}
+            for k, ov in v.items():
+                nv = doitem(ov, f, t)
+                if nv:
+                    changed = True
+                    nd[k] = nv
+                else:
+                    nd[k] = ov
+            if changed:
+                return nd
+            else:
+                return None
+        else:
+            return None 
+
+    def convert(s, f, t):
+        """This does the actual character set conversion of the string s
+        from charset f to charset t."""
+
+        try:
+            u = unicode(s, f)
+            is_f = True
+        except ValueError:
+            is_f = False
+        try:
+            unicode(s, t)
+            is_t = True
+        except ValueError:
+            is_t = False
+        if is_f and not is_t:
+            return u.encode(t, 'replace')
+        else:
+            return s
+
     # Migrate to 2.1b3, baw 17-Aug-2001
     if hasattr(l, 'dont_respond_to_post_requests'):
         oldval = getattr(l, 'dont_respond_to_post_requests')
@@ -322,6 +409,19 @@
         for name, pattern, description, emptyflag in stored_state['topics']:
             pattern = Utils.strip_verbose_pattern(pattern)
             l.topics.append((name, pattern, description, emptyflag))
+    #
+    # Romanian and Russian had their character sets changed in 2.1.19
+    # to utf-8. If there are any strings in the old encoding, try to recode
+    # them.
+    #
+    if stored_state['data_version'] < 108:
+        if l.preferred_language == 'ro':
+            if Utils.GetCharSet('ro') == 'utf-8':
+                recode(l, 'iso-8859-2', 'utf-8')
+        if l.preferred_language == 'ru':
+            if Utils.GetCharSet('ru') == 'utf-8':
+                recode(l, 'koi8-r', 'utf-8')
+    #
     # from_is_list was called author_is_list in 2.1.16rc2 (only).
     PreferStored('author_is_list', 'from_is_list',
                  mm_cfg.DEFAULT_FROM_IS_LIST)

=== modified file 'NEWS'
--- NEWS	2015-02-07 03:52:43 +0000
+++ NEWS	2015-02-07 20:44:08 +0000
@@ -60,7 +60,7 @@
       archive to emphasize that even if you got to the message from a
       subject, date or author index, previous and next are still by thread.
 
-2.1.19rc2 (06-Feb-2015)
+2.1.19rc3 (07-Feb-2015)
 
   New Features
 
@@ -134,6 +134,15 @@
       templates/site/ru/*.  It may also require recoding any existing koi8-r
       text in list attributes.  (LP: #1418448)
 
+    - Mailman's versions.py has been augmented to help with the above two
+      character set changes.  The first time a list with preferred_language
+      of Romanian or Russian is accessed or upon upgrade to this release,
+      any list attributes which have string values such as description, info,
+      welcome_msg, etc. that appear to be in the old character set will be
+      converted to utf-8.  This is done recursively for the values (but not
+      the keys) of dictionary attributes and the elements of list and tuple
+      attributes.
+
     - The Russian message catalog has been updated by Danil Smirnov.
 
     - The Romanian message catalog has been updated.  (LP: #1415489)



_______________________________________________
Mailman-checkins mailing list
Mailman-checkins@python.org
Unsubscribe: https://mail.python.org/mailman/options/mailman-checkins/mailman-cvs%40progressive-comp.com



[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic