[prev in list] [next in list] [prev in thread] [next in thread]
List: opencsw-devel
Subject: Re: SF.net SVN: gar:[22910] csw/mgar/gar/v2/lib/python/compare_catalog.py
From: Maciej_(Matchek)_BliziĆski <maciej () opencsw ! org>
Date: 2014-01-28 16:36:26
Message-ID: CALtRa-4KeAyn05HExaewfcuTAcF4UHZotooxExOzwPf-aQMh4Q () mail ! gmail ! com
[Download RAW message or body]
Hi Carsten,
More comments! I hope you'll be able to reduce the size of this script.
2014-01-28 <cgrzemba@users.sourceforge.net>
> Revision: 22910
> http://sourceforge.net/p/gar/code/22910
> Author: cgrzemba
> Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)
> Log Message:
> -----------
> use argparse, add out of order pkg compare
>
> Modified Paths:
> --------------
> csw/mgar/gar/v2/lib/python/compare_catalog.py
>
> Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py
> ===================================================================
> --- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
> 12:36:44 UTC (rev 22909)
> +++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28
> 16:20:56 UTC (rev 22910)
> @@ -2,55 +2,108 @@
>
> import cjson
> import logging
> -import optparse
> +import argparse
> import urllib2
> import sys
> +import re
>
> logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')
> logger = logging.getLogger(__name__)
>
> +remote_scheme = ['http','https']
> +local_scheme = ['file']
> +
> +def prepareCatListFromURI(uri):
> + catlst = []
> + if '://' in uri:
>
We can say that you have to have a valid URI that either starts with
http://or https://or file://
> + scheme = uri.split(':')[0]
> + if scheme in remote_scheme:
> + logger.info("fetch remote %s", uri)
> + data = urllib2.urlopen(uri).read()
>
Let's use the requests module. We have a package.
http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250
> + catlst = cjson.decode(data)
> + for e in catlst:
> + del e[9]
> + return catlst
> + elif scheme in local_scheme:
> + uri = re.sub('.*://','',uri)
> + else:
> + logger.error('unsupported URI format')
> + sys.exit(4)
> + with open(uri) as lcat:
> + logger.info("fetch local %s", uri)
> + for line in lcat: # skip 4 lines header '# CREATIONDATE'
>
We already have a parser, please use it.
http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66
> + if line.startswith("# CREATIONDATE"):
> + break
> + for line in lcat:
> + if line.startswith("-----BEGIN PGP SIGNATURE"):
> + break
> + catlst.append(line.rstrip().split(' '))
> + return catlst
> +
> +def compareOutOfOrder(a_catlst, b_catlst, idx):
> + a_pkgName2Idx = {}
> + i = idx
> + for j in range(idx,len(a_catlst)):
> + a_pkgName2Idx[a_catlst[j][0]] = j
> + # import pdb; pdb.set_trace()
> + while i < len(b_catlst):
> + if b_catlst[i][0] in a_pkgName2Idx:
> + if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:
> + logger.warning("pkgs different at {0},{1}: {2}
> {3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))
> + sys.exit(1)
> + else:
> + logger.warning("not in acat: %s", b_catlst[i])
> + sys.exit(1)
> + i += 1
> + b_pkgName2Idx = {}
> + for j in range(idx,len(b_catlst)):
> + b_pkgName2Idx[b_catlst[j][0]] = j
> + # import pdb; pdb.set_trace()
> + i = idx
> + while i < len(a_catlst):
> + if a_catlst[i][0] not in b_pkgName2Idx:
> + logger.warning("not in bcat: %s", a_catlst[i])
> + sys.exit(1)
> + i += 1
>
Why not convert both to a data structure consisting of basic types: nested
lists and dicts? Then you can just compare them using the == operator. If
you wanted some diagnostic output to display the difference, you can always
serialize them and display the textual diff - it will save you lots of
lines of code.
> def main():
> - parser = optparse.OptionParser()
> - parser.add_option("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> - parser.add_option("-a","--existing-catalog", dest="oldcatalog",
> - help='set URI of existing catalog', metavar =
> 'catalog')
> - parser.add_option("-b","--new-catalog", dest="newcatalog",
> - help='set URI of catalog to generate', metavar =
> 'catalog')
> - options, args = parser.parse_args()
> + parser = argparse.ArgumentParser()
> + parser.add_argument("-v","--verbose", dest="verbose",
> action="store_true",default=False)
> + parser.add_argument("acat",help="catalog URI")
> + parser.add_argument("bcat",help="catalog URI")
> + args = parser.parse_args()
> opterror = False
> - if options.verbose:
> + if args.verbose:
> logger.setLevel(logging.INFO)
> - if options.debug:
> - logger.setLevel(logging.DEBUG)
> - if options.newcatalog is None or options.oldcatalog is None:
> - logger.error("mandatory option missing")
> + if args.acat is None or args.bcat is None:
> + logger.error("mandatory args 'acat' 'bcat' missing")
> sys.exit(2)
> - oldcat = options.oldcatalog
> - newcat = options.newcatalog
> - logger.info(" compare %s with %s", oldcat, newcat)
>
> - data = urllib2.urlopen(oldcat).read()
> - a_catlst = cjson.decode(data)
> - for e in a_catlst:
> - del e[9]
> - b_catlst = []
> - with open(newcat) as nc:
> - for i in range(4): # skip 4 lines header
> - nc.readline()
> - for cl in nc.readlines():
> - if "-----BEGIN" == cl.split(' ')[0]:
> - break
> - b_catlst.append(cl.rstrip().split(' '))
> + logger.info("fetch cat_a %s", args.acat)
> + a_catlst = prepareCatListFromURI(args.acat)
> +
> + logger.info("fetch cat_b %s", args.bcat)
> + b_catlst = prepareCatListFromURI(args.bcat)
> +
> + logger.info("compare ...")
> if len(a_catlst) != len(b_catlst):
> - logger.warning("a has %d, b has %d
> packges",len(a_catlst),len(b_catlst))
> - sys.exit(1)
> + logger.warning("a has %d, b has %d
> packages",len(a_catlst),len(b_catlst))
> + # sys.exit(1)
> for i in range(len(b_catlst)):
> - if b_catlst[i] != a_catlst[i] :
> - logger.warning("a is {0}, b is
> {1}".format(a_catlst[i],b_catlst[i]))
> - sys.exit(1)
> + try:
> + if b_catlst[i] != a_catlst[i] :
> + if b_catlst[i][0] != a_catlst[i][0]:
> + logger.warning("packages out of order: A: %s; B:
> %s",a_catlst[i][0], b_catlst[i][0])
>
Hm, what I meant is that out of order comparing:
1. should just work
2. should not be a special case
The code should use such data structures that the ordering doesn't matter.
For example, if you use a dict, then the ordering doesn't matter:
>>> a = dict([('a', 1), ('b', 2)])
>>> b = dict([('b', 2), ('a', 1)])
>>> a == b
True
> + compareOutOfOrder(a_catlst, b_catlst, i)
> + break
> + else:
> + logger.warning("pkgs different: {0}
> {1}".format(a_catlst[i],b_catlst[i]))
> + sys.exit(1)
> + except IndexError as e:
> + logger.info("package %s not in acat", b_catlst[i])
>
> # import pdb; pdb.set_trace()
> - logger.debug("catalogs are same")
> + logger.info("catalogs are same")
> sys.exit(0)
>
>
>
> This was sent by the SourceForge.net collaborative development platform,
> the world's largest Open Source development site.
>
>
[Attachment #3 (text/html)]
<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">Hi Carsten, \
</div><div class="gmail_quote"><br></div><div class="gmail_quote">More comments! I \
hope you'll be able to reduce the size of this script.</div>
<div class="gmail_quote"><br></div><div class="gmail_quote">2014-01-28 <span \
dir="ltr"><<a href="mailto:cgrzemba@users.sourceforge.net" \
target="_blank">cgrzemba@users.sourceforge.net</a>></span><br><blockquote \
class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
Revision: 22910<br>
<a href="http://sourceforge.net/p/gar/code/22910" \
target="_blank">http://sourceforge.net/p/gar/code/22910</a><br>
Author: cgrzemba<br>
Date: 2014-01-28 16:20:56 +0000 (Tue, 28 Jan 2014)<br>
Log Message:<br>
-----------<br>
use argparse, add out of order pkg compare<br>
<br>
Modified Paths:<br>
--------------<br>
csw/mgar/gar/v2/lib/python/compare_catalog.py<br>
<br>
Modified: csw/mgar/gar/v2/lib/python/compare_catalog.py<br>
===================================================================<br>
--- csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 12:36:44 UTC \
(rev 22909)<br>
+++ csw/mgar/gar/v2/lib/python/compare_catalog.py 2014-01-28 16:20:56 UTC \
(rev 22910)<br> @@ -2,55 +2,108 @@<br>
<br>
import cjson<br>
import logging<br>
-import optparse<br>
+import argparse<br>
import urllib2<br>
import sys<br>
+import re<br>
<br>
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s')<br>
logger = logging.getLogger(__name__)<br>
<br>
+remote_scheme = ['http','https']<br>
+local_scheme = ['file']<br>
+<br>
+def prepareCatListFromURI(uri):<br>
+ catlst = []<br>
+ if '://' in uri:<br></blockquote><div><br></div><div>We can say that \
you have to have a valid URI that either starts with http:// or https:// or \
file://</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ scheme = uri.split(':')[0]<br>
+ if scheme in remote_scheme:<br>
+ <a href="http://logger.info" \
target="_blank">logger.info</a>("fetch remote %s", uri)<br> + \
data = urllib2.urlopen(uri).read()<br></blockquote><div><br></div><div>Let's use \
the requests module. We have a package.</div><div><br></div><div><a \
href="http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py# \
L250">http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/rest.py#L250</a><br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ catlst = cjson.decode(data)<br>
+ for e in catlst:<br>
+ del e[9]<br>
+ return catlst<br>
+ elif scheme in local_scheme:<br>
+ uri = re.sub('.*://','',uri)<br>
+ else:<br>
+ logger.error('unsupported URI format')<br>
+ sys.exit(4)<br>
+ with open(uri) as lcat:<br>
+ <a href="http://logger.info" target="_blank">logger.info</a>("fetch \
local %s", uri)<br> + for line in lcat: # skip 4 lines header '# \
CREATIONDATE'<br></blockquote><div><br></div><div>We already have a parser, \
please use it.</div><div><a \
href="http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog. \
py#L66">http://sourceforge.net/apps/trac/gar/browser/csw/mgar/gar/v2/lib/python/catalog.py#L66</a><br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ if line.startswith("# CREATIONDATE"):<br>
+ break<br>
+ for line in lcat:<br>
+ if line.startswith("-----BEGIN PGP SIGNATURE"):<br>
+ break<br>
+ catlst.append(line.rstrip().split(' '))<br>
+ return catlst<br>
+<br>
+def compareOutOfOrder(a_catlst, b_catlst, idx):<br>
+ a_pkgName2Idx = {}<br>
+ i = idx<br>
+ for j in range(idx,len(a_catlst)):<br>
+ a_pkgName2Idx[a_catlst[j][0]] = j<br>
+ # import pdb; pdb.set_trace()<br>
+ while i < len(b_catlst):<br>
+ if b_catlst[i][0] in a_pkgName2Idx:<br>
+ if b_catlst[i] != a_catlst[a_pkgName2Idx[b_catlst[i][0]]]:<br>
+ logger.warning("pkgs different at {0},{1}: {2} \
{3}".format(i,a_pkgName2Idx[b_catlst[i][0]],a_catlst[a_pkgName2Idx[b_catlst[i][0]]],b_catlst[i]))<br>
+ sys.exit(1)<br>
+ else:<br>
+ logger.warning("not in acat: %s", b_catlst[i])<br>
+ sys.exit(1)<br>
+ i += 1<br>
+ b_pkgName2Idx = {}<br>
+ for j in range(idx,len(b_catlst)):<br>
+ b_pkgName2Idx[b_catlst[j][0]] = j<br>
+ # import pdb; pdb.set_trace()<br>
+ i = idx<br>
+ while i < len(a_catlst):<br>
+ if a_catlst[i][0] not in b_pkgName2Idx:<br>
+ logger.warning("not in bcat: %s", a_catlst[i])<br>
+ sys.exit(1)<br>
+ i += 1<br>
</blockquote><div><br></div><div>Why not convert both to a data structure consisting \
of basic types: nested lists and dicts? Then you can just compare them using the == \
operator. If you wanted some diagnostic output to display the difference, you can \
always serialize them and display the textual diff - it will save you lots of lines \
of code.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
def main():<br>
- parser = optparse.OptionParser()<br>
- parser.add_option("-v","--verbose", \
dest="verbose", \
action="store_true",default=False)<br>
- parser.add_option("-a","--existing-catalog", \
dest="oldcatalog",<br>
- help='set URI of existing catalog', metavar = \
'catalog')<br>
- parser.add_option("-b","--new-catalog", \
dest="newcatalog",<br>
- help='set URI of catalog to generate', metavar \
= 'catalog')<br>
- options, args = parser.parse_args()<br>
+ parser = argparse.ArgumentParser()<br>
+ parser.add_argument("-v","--verbose", \
dest="verbose", action="store_true",default=False)<br> + \
parser.add_argument("acat",help="catalog URI")<br> + \
parser.add_argument("bcat",help="catalog URI")<br> + args = \
parser.parse_args()<br> opterror = False<br>
- if options.verbose:<br>
+ if args.verbose:<br>
logger.setLevel(logging.INFO)<br>
- if options.debug:<br>
- logger.setLevel(logging.DEBUG)<br>
- if options.newcatalog is None or options.oldcatalog is None:<br>
- logger.error("mandatory option missing")<br>
+ if args.acat is None or args.bcat is None:<br>
+ logger.error("mandatory args 'acat' 'bcat' \
missing")<br> sys.exit(2)<br>
- oldcat = options.oldcatalog<br>
- newcat = options.newcatalog<br>
- <a href="http://logger.info" target="_blank">logger.info</a>(" compare %s \
with %s", oldcat, newcat)<br> <br>
- data = urllib2.urlopen(oldcat).read()<br>
- a_catlst = cjson.decode(data)<br>
- for e in a_catlst:<br>
- del e[9]<br>
- b_catlst = []<br>
- with open(newcat) as nc:<br>
- for i in range(4): # skip 4 lines header<br>
- nc.readline()<br>
- for cl in nc.readlines():<br>
- if "-----BEGIN" == cl.split(' ')[0]:<br>
- break<br>
- b_catlst.append(cl.rstrip().split(' '))<br>
+ <a href="http://logger.info" target="_blank">logger.info</a>("fetch cat_a \
%s", args.acat)<br> + a_catlst = prepareCatListFromURI(args.acat)<br>
+<br>
+ <a href="http://logger.info" target="_blank">logger.info</a>("fetch cat_b \
%s", args.bcat)<br> + b_catlst = prepareCatListFromURI(args.bcat)<br>
+<br>
+ <a href="http://logger.info" target="_blank">logger.info</a>("compare \
...")<br> if len(a_catlst) != len(b_catlst):<br>
- logger.warning("a has %d, b has %d \
packges",len(a_catlst),len(b_catlst))<br>
- sys.exit(1)<br>
+ logger.warning("a has %d, b has %d \
packages",len(a_catlst),len(b_catlst))<br> + # sys.exit(1)<br>
for i in range(len(b_catlst)):<br>
- if b_catlst[i] != a_catlst[i] :<br>
- logger.warning("a is {0}, b is \
{1}".format(a_catlst[i],b_catlst[i]))<br>
- sys.exit(1)<br>
+ try:<br>
+ if b_catlst[i] != a_catlst[i] :<br>
+ if b_catlst[i][0] != a_catlst[i][0]:<br>
+ logger.warning("packages out of order: A: %s; B: \
%s",a_catlst[i][0], b_catlst[i][0])<br></blockquote><div><br></div><div>Hm, what \
I meant is that out of order comparing:</div><div><br></div>
<div>1. should just work</div><div>2. should not be a special \
case</div><div><br></div><div>The code should use such data structures that the \
ordering doesn't matter. For example, if you use a dict, then the ordering \
doesn't matter:</div>
<div><br></div><div><div>>>> a = dict([('a', 1), ('b', \
2)])</div><div>>>> b = dict([('b', 2), ('a', \
1)])</div><div>>>> a == b</div><div>True</div></div><div><br> </div>
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px \
0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
+ compareOutOfOrder(a_catlst, b_catlst, i)<br>
+ break<br>
+ else:<br>
+ logger.warning("pkgs different: {0} \
{1}".format(a_catlst[i],b_catlst[i]))<br> + \
sys.exit(1)<br> + except IndexError as e:<br>
+ <a href="http://logger.info" \
target="_blank">logger.info</a>("package %s not in acat", b_catlst[i])<br> \
<br> # import pdb; pdb.set_trace()<br>
- logger.debug("catalogs are same")<br>
+ <a href="http://logger.info" target="_blank">logger.info</a>("catalogs \
are same")<br> sys.exit(0)<br>
<br>
<br>
<br>
This was sent by the SourceForge.net collaborative development platform, the \
world's largest Open Source development site.<br> <br>
</blockquote></div><br></div></div>
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic