[prev in list] [next in list] [prev in thread] [next in thread] 

List:       pywikipediabot-users
Subject:    [Pywikipedia-l] SVN:  [5268] branches/rewrite/pywikibot
From:       russblau () mayflower ! knams ! wikimedia ! org
Date:       2008-04-24 13:50:10
Message-ID: E1Jp1py-0000Cf-VW () lily ! knams ! wikimedia ! org
[Download RAW message or body]

Revision: 5268
Author:   russblau
Date:     2008-04-24 13:50:10 +0000 (Thu, 24 Apr 2008)

Log Message:
-----------
further implementation of getrevisions; documentation; refactoring.

Modified Paths:
--------------
    branches/rewrite/pywikibot/README-conversion.txt
    branches/rewrite/pywikibot/__init__.py
    branches/rewrite/pywikibot/data/api.py
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/site.py

Modified: branches/rewrite/pywikibot/README-conversion.txt
===================================================================
--- branches/rewrite/pywikibot/README-conversion.txt	2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/README-conversion.txt	2008-04-24 13:50:10 UTC (rev 5268)
@@ -12,15 +12,27 @@
 bot.
 
 With pywikipedia scripts were importing "wikipedia" or "pagegenerators"
-librairies; pywikibot is now written as a standard module. 
+libraries; pywikibot is now written as a standard package, and other modules
+are contained within it (e.g., pywikibot.site contains Site classes). However,
+most commonly-used names are imported into the pywikibot namespace, so that
+module names don't need to be used unless specified in the documentation.
+
 (To use it, just import "pywikibot", assuming that pywikibot/ is in sys.path)
 
 == Python librairies ==
 
-You will need, to run pywikibot, httplib2 and setuptools
-* httplib2 : http://code.google.com/p/httplib2/
+[Note: the goal will be to package pywikibot with setuptools easy_install,
+so that these dependencies will be loaded automatically when the package is
+installed, and users won't need to worry about this...]
+
+To run pywikibot, you will need the httplib2, simplejson, and setuptools packages--
+* httplib2   : http://code.google.com/p/httplib2/
 * setuptools : http://pypi.python.org/pypi/setuptools/
+* simplejson : http://https://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7.1/docs/index.html
 
+or, if you already have setuptools installed, just execute 'easy_install httplib2'
+and 'easy_install simplejson'
+
 If you run into errors involving httplib2.urlnorm, update httplib2 to
 0.4.0 (Ubuntu package python-httlib2 for example, is outdated)
 

Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py	2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/__init__.py	2008-04-24 13:50:10 UTC (rev 5268)
@@ -60,7 +60,7 @@
 
 getSite = Site # alias for backwards-compability
 
-from page import Page, ImagePage, Category
+from page import Page, ImagePage, Category, Link
 
 # DEBUG
 

Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py	2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/data/api.py	2008-04-24 13:50:10 UTC (rev 5268)
@@ -329,22 +329,7 @@
         
         """
         p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
-        if "pageid" in pagedata:
-            self._pageid = int(pagedata['pageid'])
-        elif "missing" in pagedata:
-            self._pageid = 0    # Non-existent page
-        else:
-            raise AssertionError(
-                "Page %s has neither 'pageid' nor 'missing' attribute"
-                 % pagedata['title'])
-        if 'lastrevid' in pagedata:
-            p._revid = pagedata['lastrevid']
-        if 'touched' in pagedata:
-            p._timestamp = pagedata['touched']
-        if 'protection' in pagedata:
-            p._protection = {}
-            for item in pagedata['protection']:
-                p._protection[item['type']] = item['level']
+        update_page(p, pagedata)
         return p
 
 
@@ -366,8 +351,12 @@
 
 
 class PropertyGenerator(object):
-    """Generator for queries of type action=query&property=..."""
+    """Generator for queries of type action=query&property=...
 
+    Note that this generator yields one or more dict object(s) corresponding
+    to each "page" item(s) from the API response; the calling module has to
+    decide what to do with the contents of the dict."""
+
     def __init__(self, prop, **kwargs):
         """
         Required and optional parameters are as for C{Request}, except that
@@ -384,7 +373,7 @@
         if self.limits[prop] and kwargs.pop("getAll", False):
             self.request['g'+self.limits[generator]] = "max"
         self.site = self.request.site
-        self.resultkey = prop # element to look for in result
+        self.resultkey = prop
 
     # dict mapping property types to their limit parameter names
     limits = {'revisions': 'rvlimit',
@@ -410,21 +399,8 @@
             if not ("query" in self.data and "pages" in self.data["query"]):
                 raise StopIteration
             pagedata = self.data["query"]["pages"].values()
-            assert len(pagedata)==1
-            pagedata = pagedata[0]
-            if not self.resultkey in pagedata:
-                raise StopIteration
-            if isinstance(pagedata[self.resultkey], dict):
-                for v in pagedata[self.resultkey].itervalues():
-                    yield v 
-            elif isinstance(pagedata[self.resultkey], list):
-                for v in pagedata[self.resultkey]:
-                    yield v
-            else:
-                raise APIError("Unknown",
-                               "Unknown format in ['%s'] value."
-                                 % self.resultkey,
-                               data=pagedata[self.resultkey])
+            for item in pagedata:
+                yield item
             if not "query-continue" in self.data:
                 return
             if not self.resultkey in self.data["query-continue"]:
@@ -472,6 +448,32 @@
         pywikibot.cookie_jar.save()
 
 
+def update_page(page, pagedict):
+    """Update attributes of Page object page, based on query data in pagequery
+
+    @param page: object to be updated
+    @type page: Page
+    @param pagedict: the contents of a "page" element of a query response
+    @type pagedict: dict
+
+    """
+    if "pageid" in pagedict:
+        page._pageid = int(pagedict['pageid'])
+    elif "missing" in pagedict:
+        page._pageid = 0    # Non-existent page
+    else:
+        raise AssertionError(
+            "Page %s has neither 'pageid' nor 'missing' attribute"
+             % pagedict['title'])
+    if 'lastrevid' in pagedict:
+        page._revid = pagedict['lastrevid']
+    if 'touched' in pagedict:
+        page._timestamp = pagedict['touched']
+    if 'protection' in pagedict:
+        page._protection = {}
+        for item in pagedict['protection']:
+            page._protection[item['type']] = item['level'], item['expiry']
+
 if __name__ == "__main__":
     from pywikibot import Site
     logging.getLogger().setLevel(logging.DEBUG)

Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py	2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/page.py	2008-04-24 13:50:10 UTC (rev 5268)
@@ -678,8 +678,8 @@
             limit = None
         else:
             limit = revCount
-        return self.site().getrevisions(self, withText=False,
-                                        older=not reverseOrder, limit=limit)
+        return self.site().getrevisions(self, getText=False,
+                                        rvdir=not reverseOrder, limit=limit)
 
     def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
                                getAll=False, revCount=500):

Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py	2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/site.py	2008-04-24 13:50:10 UTC (rev 5268)
@@ -491,8 +491,12 @@
                         "info", inprop="protection|talkid|subjectid",
                         titles=page.title(withSection=False
                                           ).encode(self.encoding()))
-            for item in query():
-                pass #FIXME
+            for pageitem in query:
+                if pageitem['title'] != page.title(withSection=False):
+                    raise RuntimeError(
+                        "page_exists: Query on %s returned data on '%s'"
+                        % (page, pageitem['title']))
+                page._pageid = pageitem['pageid']
         return page._pageid > 0
 
     # following group of methods map more-or-less directly to API queries
@@ -621,30 +625,80 @@
         return cmgen
 
     def getrevisions(self, page=None, getText=False, revids=None,
-                     older=True, limit=None, sysop=False, user=None,
-                     excludeuser=None):
+                     limit=None, startid=None, endid=None, starttime=None,
+                     endtime=None, rvdir=None, user=None, excludeuser=None,
+                     section=None, sysop=False):
         """Retrieve and store revision information.
 
-        @param page: retrieve the history of this Page (required unless ids
+        By default, retrieves the last (current) revision of the page,
+        I{unless} any of the optional parameters revids, startid, endid,
+        starttime, endtime, rvdir, user, excludeuser, or limit are
+        specified. Unless noted below, all parameters not specified
+        default to False.
+
+        If rvdir is False or not specified, startid must be greater than
+        endid if both are specified; likewise, starttime must be greater
+        than endtime. If rvdir is True, these relationships are reversed.
+
+        @param page: retrieve revisions of this Page (required unless ids
             is specified)
-        @param getText: if True, retrieve the wiki-text of each revision as
-            well
+        @param getText: if True, retrieve the wiki-text of each revision;
+            otherwise, only retrieve the revision metadata (default)
+        @param section: if specified, retrieve only this section of the text
+            (getText must be True); section must be given by number (top of
+            the article is section 0), not name
+        @type section: int
         @param revids: retrieve only the specified revision ids (required
             unless page is specified)
-        @param older: if True, retrieve newest revisions first; otherwise,
-            retrieve oldest revisions first
-        @param limit: if specified, retrieve no more than this number of
-            revisions (defaults to latest revision only)
+        @type revids: list of ints
+        @param limit: Retrieve no more than this number of revisions
         @type limit: int
+        @param startid: retrieve revisions starting with this revid
+        @param endid: stop upon retrieving this revid
+        @param starttime: retrieve revisions starting at this timestamp
+        @param endtime: stop upon reaching this timestamp
+        @param rvdir: if false, retrieve newest revisions first (default);
+            if true, retrieve earliest first
         @param user: retrieve only revisions authored by this user
         @param excludeuser: retrieve all revisions not authored by this user
         @param sysop: if True, switch to sysop account (if available) to
             retrieve this page
 
         """
+        latest = (revids is None and
+                  startid is None and
+                  endid is None and
+                  starttime is None and
+                  endtime is None and
+                  rvdir is None and
+                  user is None and
+                  excludeuser is None and
+                  limit is None)  # if True, we are retrieving current revision
+
+        # check for invalid argument combinations
         if page is None and revids is None:
             raise ValueError(
-                "getrevisions needs either page or revids argument.")
+                "getrevisions:  either page or revids argument required")
+        if (startid is not None or endid is not None) and \
+                (starttime is not None or endtime is not None):
+            raise ValueError(
+                "getrevisions: startid/endid combined with starttime/endtime")
+        if starttime is not None and endtime is not None:
+            if rvdir and starttime >= endtime:
+                raise ValueError(
+                    "getrevisions: starttime > endtime with rvdir=True")
+            if (not rvdir) and endtime >= starttime:
+                raise ValueError(
+                    "getrevisions: endtime > starttime with rvdir=False")
+        if startid is not None and endid is not None:
+            if rvdir and startid >= endid:
+                raise ValueError(
+                    "getrevisions: startid > endid with rvdir=True")
+            if (not rvdir) and endid >= startid:
+                raise ValueError(
+                    "getrevisions: endid > startid with rvdir=False")
+
+        # assemble API request
         if revids is None:
             rvtitle = page.title(withSection=False).encode(self.encoding())
             rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
@@ -654,28 +708,50 @@
         if getText:
             rvgen.request[u"rvprop"] = \
                     u"ids|flags|timestamp|user|comment|content"
-            if page.section():
-                rvgen.request[u"rvsection"] = unicode(page.section())
+            if section is not None:
+                rvgen.request[u"rvsection"] = unicode(section)
         if limit:
             rvgen.request[u"rvlimit"] = unicode(limit)
-        if not older:
+        if rvdir:
             rvgen.request[u"rvdir"] = u"newer"
+        elif rvdir is not None:
+            rvgen.request[u"rvdir"] = u"older"
+        if startid:
+            rvgen.request[u"rvstartid"] = startid
+        if endid:
+            rvgen.request[u"rvendid"] = endid
+        if starttime:
+            rvgen.request[u"rvstart"] = starttime
+        if endtime:
+            rvgen.request[u"rvend"] = endtime
         if user:
             rvgen.request[u"rvuser"] = user
         elif excludeuser:
             rvgen.request[u"rvexcludeuser"] = excludeuser
-        # TODO if sysop:
-        for rev in rvgen:
-            revision = pywikibot.page.Revision(revid=rev['revid'],
-                                               timestamp=rev['timestamp'],
-                                               user=rev['user'],
-                                               anon=rev.has_key('anon'),
-                                               comment=rev.get('comment', u''),
-                                               minor=rev.has_key('minor'),
-                                               text=rev.get('*', None))
-            page._revisions[revision.revid] = revision
-            if revids is None and limit is None and user is None and excludeuser is None:
-                page._revid = revision.revid
+        # TODO if sysop: something
+        for pagedata in rvgen:
+            if page is not None:
+                if pagedata['title'] != page.title(withSection=False):
+                    raise RuntimeError(
+                        "getrevisions: Query on %s returned data on '%s'"
+                        % (page, pagedata['title']))
+            else:
+                page = Page(self, pagedata['title'])
+            api.update_page(page, pagedata)
+
+            for rev in pagedata['revisions']:
+                revision = pywikibot.page.Revision(
+                                            revid=rev['revid'],
+                                            timestamp=rev['timestamp'],
+                                            user=rev['user'],
+                                            anon=rev.has_key('anon'),
+                                            comment=rev.get('comment',  u''),
+                                            minor=rev.has_key('minor'),
+                                            text=rev.get('*', None)
+                                          )
+                page._revisions[revision.revid] = revision
+                if latest:
+                    page._revid = revision.revid
                                 
 
 #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####



_______________________________________________
Pywikipedia-l mailing list
Pywikipedia-l@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic