'[Pywikipedia-l] SVN: [4787] trunk/pywikipedia'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       pywikipediabot-users
Subject:    [Pywikipedia-l] SVN:  [4787] trunk/pywikipedia
From:       rotem () mayflower ! knams ! wikimedia ! org
Date:       2007-12-31 20:25:05
Message-ID: E1J9RC5-0004Nl-3V () lily ! knams ! wikimedia ! org
[Download RAW message or body]

Revision: 4787
Author:   rotem
Date:     2007-12-31 20:25:04 +0000 (Mon, 31 Dec 2007)

Log Message:
-----------
In family.namespace, making the 'all' parameter include more than just the aliases: \
also the default namespaces, the lowercase forms and the the underscore forms. Using \
it in more places. This should also fix the problem for which patch 1861136 was \
suggested.

Modified Paths:
--------------
    trunk/pywikipedia/commonsdelinker/delinker.py
    trunk/pywikipedia/cosmetic_changes.py
    trunk/pywikipedia/family.py
    trunk/pywikipedia/image.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================

--- trunk/pywikipedia/commonsdelinker/delinker.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/commonsdelinker/delinker.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -186,7 +186,7 @@
 			def create_regex_i(s):
 				return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s])
 			
-			namespaces = ('Image', 'Media') + site.namespace(6, all = True) + \
site.namespace(-2, all = True) +			namespaces = site.namespace(6, all = True) + \
site.namespace(-2, all = True)  r_namespace = ur'\s*(?:%s)\s*\:\s*' % \
u'|'.join(map(create_regex_i, namespaces))  # Note that this regex creates a group!
 			r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')

Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/cosmetic_changes.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -149,38 +149,10 @@
                 continue
             namespaces = list(family.namespace(self.site.lang, nsNumber, all = \
True))  thisNs = namespaces.pop(0)
-            try:
-                default = list(family.namespace('_default', nsNumber, all = True))
-            except KeyError:
-                default = []
 
             # skip main (article) namespace
-            if thisNs:
-                wrongNamespaces = []
-
-                # Add aliases and default namespaces
-                wrongNamespaces.extend([ns for ns in namespaces if ns != thisNs])
-                wrongNamespaces.extend([ns for ns in default if ns != thisNs and ns \
                not in wrongNamespaces])
-
-                # Lowercase versions of namespaces
-                if not self.site.nocapitalize:
-                    # Add lowercase version of the current wrong namespaces
-                    wrongNamespaces.extend([ns[0].lower() + ns[1:] for ns in \
                wrongNamespaces if ns[0].lower() != ns[0].upper()])
-
-                    # Add lowercase version of the correct namespace
-                    uncapitalized = thisNs[0].lower() + thisNs[1:]
-                    if uncapitalized != thisNs:
-                        wrongNamespaces.append(uncapitalized)
-
-                # Underscore versions of namespaces
-                # Add underscore versions of all wrong namespaces
-                wrongNamespaces.extend([ns.replace(' ', '_') for ns in \
                wrongNamespaces if ' ' in ns])
-                # Add underscore version of correct namespace
-                if ' ' in thisNs:
-                    wrongNamespaces.append(thisNs.replace(' ', '_'))
-
-                if wrongNamespaces:
-                    text = wikipedia.replaceExcept(text, r'\[\[\s*(' + \
'|'.join(wrongNamespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
':\g<nameAndLabel>]]', exceptions) +            if thisNs and namespaces:
+                text = wikipedia.replaceExcept(text, r'\[\[\s*(' + \
'|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
':\g<nameAndLabel>]]', exceptions)  return text
 
     def cleanUpLinks(self, text):

Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/family.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -2437,21 +2437,39 @@
             raise KeyError('ERROR: Unknown namespace %d for %s:%s' % (ns_number, \
code, self.name))  elif self.isNsI18N(ns_number, code):
             v = self.namespaces[ns_number][code]
+            if type(v) is not list:
+                v = [v,]
+            if all and self.isNsI18N(ns_number, fallback):
+                v2 = self.namespaces[ns_number][fallback]
+                if type(v2) is list:
+                    v.extend(v2)
+                else:
+                    v.append(v2)
         elif fallback and self.isNsI18N(ns_number, fallback):
             v = self.namespaces[ns_number][fallback]
+            if type(v) is not list:
+                v = [v,]
         else:
             raise KeyError('ERROR: title for namespace %d in language %s unknown' % \
(ns_number, code))  
         if all:
-            if type(v) is list:
-                return tuple(v)
-            else:
-                return (v, )
+            namespaces = []
+
+            # Unique list
+            for ns in v:
+                if ns not in namespaces:
+                    namespaces.append(ns)
+
+            # Lowercase versions of namespaces
+            if code not in self.nocapitalize:
+                namespaces.extend([ns[0].lower() + ns[1:] for ns in namespaces if ns \
and ns[0].lower() != ns[0].upper()]) +
+            # Underscore versions of namespaces
+            namespaces.extend([ns.replace(' ', '_') for ns in namespaces if ns and ' \
' in ns]) +
+            return tuple(namespaces)
         else:
-            if type(v) is list:
-                return v[0]
-            else:
-                return v
+            return v[0]
 
     def isDefinedNS(self, ns_number):
         """Return True if the namespace has been defined in this family.
@@ -2561,17 +2579,7 @@
         return self.namespace(code, 14, fallback)
 
     def category_namespaces(self, code):
-        namespaces = []
-        namespace_title = self.namespace(code, 14)
-        namespaces.append(namespace_title)
-        if namespace_title != namespace_title.lower():
-            namespaces.append(namespace_title.lower())
-        default_namespace_title = self.namespace('_default', 14)
-        if namespace_title != default_namespace_title:
-            namespaces.append(default_namespace_title)
-            if default_namespace_title != default_namespace_title.lower():
-                namespaces.append(default_namespace_title.lower())
-        return namespaces
+        return self.namespace(code, 14, all = True)
 
     # Redirect code can be translated.
     # Note that redirect codes are case-insensitive, so it is enough

Modified: trunk/pywikipedia/image.py
===================================================================
--- trunk/pywikipedia/image.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/image.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -109,15 +109,14 @@
             old = self.oldImage
 
         old = re.sub('[_ ]', '[_ ]', old)
-        #TODO: Add internationalization of Image namespace name.
         if not self.loose or not self.newImage:
-            ImageRegex = re.compile(r'\[\[ *[Ii]mage:' + old + ' \
*(?P<parameters>\|[^\n]+|) *\]\]') +            ImageRegex = re.compile(r'\[\[ *(?:' \
+ '|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + old + ' \
*(?P<parameters>\|[^\n]+|) *\]\]')  else:
             ImageRegex = re.compile(r'' + old)
 
         if self.newImage:
             if not self.loose:
-                replacements.append((ImageRegex, '[[Image:' + self.newImage + \
'\g<parameters>]]')) +                replacements.append((ImageRegex, '[[(?:' + \
'|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + self.newImage + \
'\g<parameters>]]'))  else:
                 replacements.append((ImageRegex, self.newImage))
         else:

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/wikipedia.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -2182,14 +2182,6 @@
         text = self.get()
         new_text = text
 
-        def caseInsensitivePattern(s):
-            """
-            Creates a pattern that matches the string case-insensitively.
-            """
-            return ur'(?:%s)' % u''.join([u'[%s%s]'
-                                            % (re.escape(c.upper()), \
                re.escape(c.lower()))
-                                          for c in s])
-
         def capitalizationPattern(s):
             """
             Given a string, creates a pattern that matches the string, with
@@ -2201,9 +2193,9 @@
             else:
                 return ur'(?:[%s%s]%s)' % (re.escape(s[0].upper()), \
re.escape(s[0].lower()), re.escape(s[1:]))  
-        namespaces = set(('Image', 'Media') + site.namespace(6, all = True) + \
site.namespace(-2, all = True)) +        namespaces = set(site.namespace(6, all = \
True) + site.namespace(-2, all = True))  # note that the colon is already included \
                here
-        namespacePattern = ur'\s*(?:%s)\s*\:\s*' % \
u'|'.join(map(caseInsensitivePattern, namespaces)) +        namespacePattern = \
ur'\s*(?:%s)\s*\:\s*' % u'|'.join(namespaces)  
         imagePattern = u'(%s)' % capitalizationPattern(image).replace(r'\_', '[ _]')
 



_______________________________________________
Pywikipedia-l mailing list
Pywikipedia-l@lists.wikimedia.org
http://lists.wikimedia.org/mailman/listinfo/pywikipedia-l


[prev in list] [next in list] [prev in thread] [next in thread]