[prev in list] [next in list] [prev in thread] [next in thread]
List: pywikipediabot-users
Subject: [Pywikipedia-l] SVN: [4787] trunk/pywikipedia
From: rotem () mayflower ! knams ! wikimedia ! org
Date: 2007-12-31 20:25:05
Message-ID: E1J9RC5-0004Nl-3V () lily ! knams ! wikimedia ! org
[Download RAW message or body]
Revision: 4787
Author: rotem
Date: 2007-12-31 20:25:04 +0000 (Mon, 31 Dec 2007)
Log Message:
-----------
In family.namespace, making the 'all' parameter include more than just the aliases: \
also the default namespaces, the lowercase forms and the the underscore forms. Using \
it in more places. This should also fix the problem for which patch 1861136 was \
suggested.
Modified Paths:
--------------
trunk/pywikipedia/commonsdelinker/delinker.py
trunk/pywikipedia/cosmetic_changes.py
trunk/pywikipedia/family.py
trunk/pywikipedia/image.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/delinker.py 2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/commonsdelinker/delinker.py 2007-12-31 20:25:04 UTC (rev 4787)
@@ -186,7 +186,7 @@
def create_regex_i(s):
return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s])
- namespaces = ('Image', 'Media') + site.namespace(6, all = True) + \
site.namespace(-2, all = True) + namespaces = site.namespace(6, all = True) + \
site.namespace(-2, all = True) r_namespace = ur'\s*(?:%s)\s*\:\s*' % \
u'|'.join(map(create_regex_i, namespaces)) # Note that this regex creates a group!
r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/cosmetic_changes.py 2007-12-31 20:25:04 UTC (rev 4787)
@@ -149,38 +149,10 @@
continue
namespaces = list(family.namespace(self.site.lang, nsNumber, all = \
True)) thisNs = namespaces.pop(0)
- try:
- default = list(family.namespace('_default', nsNumber, all = True))
- except KeyError:
- default = []
# skip main (article) namespace
- if thisNs:
- wrongNamespaces = []
-
- # Add aliases and default namespaces
- wrongNamespaces.extend([ns for ns in namespaces if ns != thisNs])
- wrongNamespaces.extend([ns for ns in default if ns != thisNs and ns \
not in wrongNamespaces])
-
- # Lowercase versions of namespaces
- if not self.site.nocapitalize:
- # Add lowercase version of the current wrong namespaces
- wrongNamespaces.extend([ns[0].lower() + ns[1:] for ns in \
wrongNamespaces if ns[0].lower() != ns[0].upper()])
-
- # Add lowercase version of the correct namespace
- uncapitalized = thisNs[0].lower() + thisNs[1:]
- if uncapitalized != thisNs:
- wrongNamespaces.append(uncapitalized)
-
- # Underscore versions of namespaces
- # Add underscore versions of all wrong namespaces
- wrongNamespaces.extend([ns.replace(' ', '_') for ns in \
wrongNamespaces if ' ' in ns])
- # Add underscore version of correct namespace
- if ' ' in thisNs:
- wrongNamespaces.append(thisNs.replace(' ', '_'))
-
- if wrongNamespaces:
- text = wikipedia.replaceExcept(text, r'\[\[\s*(' + \
'|'.join(wrongNamespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
':\g<nameAndLabel>]]', exceptions) + if thisNs and namespaces:
+ text = wikipedia.replaceExcept(text, r'\[\[\s*(' + \
'|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
':\g<nameAndLabel>]]', exceptions) return text
def cleanUpLinks(self, text):
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/family.py 2007-12-31 20:25:04 UTC (rev 4787)
@@ -2437,21 +2437,39 @@
raise KeyError('ERROR: Unknown namespace %d for %s:%s' % (ns_number, \
code, self.name)) elif self.isNsI18N(ns_number, code):
v = self.namespaces[ns_number][code]
+ if type(v) is not list:
+ v = [v,]
+ if all and self.isNsI18N(ns_number, fallback):
+ v2 = self.namespaces[ns_number][fallback]
+ if type(v2) is list:
+ v.extend(v2)
+ else:
+ v.append(v2)
elif fallback and self.isNsI18N(ns_number, fallback):
v = self.namespaces[ns_number][fallback]
+ if type(v) is not list:
+ v = [v,]
else:
raise KeyError('ERROR: title for namespace %d in language %s unknown' % \
(ns_number, code))
if all:
- if type(v) is list:
- return tuple(v)
- else:
- return (v, )
+ namespaces = []
+
+ # Unique list
+ for ns in v:
+ if ns not in namespaces:
+ namespaces.append(ns)
+
+ # Lowercase versions of namespaces
+ if code not in self.nocapitalize:
+ namespaces.extend([ns[0].lower() + ns[1:] for ns in namespaces if ns \
and ns[0].lower() != ns[0].upper()]) +
+ # Underscore versions of namespaces
+ namespaces.extend([ns.replace(' ', '_') for ns in namespaces if ns and ' \
' in ns]) +
+ return tuple(namespaces)
else:
- if type(v) is list:
- return v[0]
- else:
- return v
+ return v[0]
def isDefinedNS(self, ns_number):
"""Return True if the namespace has been defined in this family.
@@ -2561,17 +2579,7 @@
return self.namespace(code, 14, fallback)
def category_namespaces(self, code):
- namespaces = []
- namespace_title = self.namespace(code, 14)
- namespaces.append(namespace_title)
- if namespace_title != namespace_title.lower():
- namespaces.append(namespace_title.lower())
- default_namespace_title = self.namespace('_default', 14)
- if namespace_title != default_namespace_title:
- namespaces.append(default_namespace_title)
- if default_namespace_title != default_namespace_title.lower():
- namespaces.append(default_namespace_title.lower())
- return namespaces
+ return self.namespace(code, 14, all = True)
# Redirect code can be translated.
# Note that redirect codes are case-insensitive, so it is enough
Modified: trunk/pywikipedia/image.py
===================================================================
--- trunk/pywikipedia/image.py 2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/image.py 2007-12-31 20:25:04 UTC (rev 4787)
@@ -109,15 +109,14 @@
old = self.oldImage
old = re.sub('[_ ]', '[_ ]', old)
- #TODO: Add internationalization of Image namespace name.
if not self.loose or not self.newImage:
- ImageRegex = re.compile(r'\[\[ *[Ii]mage:' + old + ' \
*(?P<parameters>\|[^\n]+|) *\]\]') + ImageRegex = re.compile(r'\[\[ *(?:' \
+ '|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + old + ' \
*(?P<parameters>\|[^\n]+|) *\]\]') else:
ImageRegex = re.compile(r'' + old)
if self.newImage:
if not self.loose:
- replacements.append((ImageRegex, '[[Image:' + self.newImage + \
'\g<parameters>]]')) + replacements.append((ImageRegex, '[[(?:' + \
'|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + self.newImage + \
'\g<parameters>]]')) else:
replacements.append((ImageRegex, self.newImage))
else:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/wikipedia.py 2007-12-31 20:25:04 UTC (rev 4787)
@@ -2182,14 +2182,6 @@
text = self.get()
new_text = text
- def caseInsensitivePattern(s):
- """
- Creates a pattern that matches the string case-insensitively.
- """
- return ur'(?:%s)' % u''.join([u'[%s%s]'
- % (re.escape(c.upper()), \
re.escape(c.lower()))
- for c in s])
-
def capitalizationPattern(s):
"""
Given a string, creates a pattern that matches the string, with
@@ -2201,9 +2193,9 @@
else:
return ur'(?:[%s%s]%s)' % (re.escape(s[0].upper()), \
re.escape(s[0].lower()), re.escape(s[1:]))
- namespaces = set(('Image', 'Media') + site.namespace(6, all = True) + \
site.namespace(-2, all = True)) + namespaces = set(site.namespace(6, all = \
True) + site.namespace(-2, all = True)) # note that the colon is already included \
here
- namespacePattern = ur'\s*(?:%s)\s*\:\s*' % \
u'|'.join(map(caseInsensitivePattern, namespaces)) + namespacePattern = \
ur'\s*(?:%s)\s*\:\s*' % u'|'.join(namespaces)
imagePattern = u'(%s)' % capitalizationPattern(image).replace(r'\_', '[ _]')
_______________________________________________
Pywikipedia-l mailing list
Pywikipedia-l@lists.wikimedia.org
http://lists.wikimedia.org/mailman/listinfo/pywikipedia-l
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic