Hi, Apologies if this is against etiquette. I've just got my first python app up and running. It is a podcast aggregator depending on feedparser. I've really only learnt enough to get this up and running. Any tips on the code quality and use of python would be appreciated. I've got a feeling the overall structure is up the creek. approx 220 LOC. file: GodCast.py Cheers, Lex. #!/usr/bin/python # GodCast: podcast aggregator! # depends on wget & lynx # * one of the main features of GodCast is it's use of bandwidth. # Many podcatchers # http://www.faqts.com/knowledge_base/view.phtml/aid/422/fid/17 # TODO: not found log # TODO: # config file # opml feed list? # pygtk/pyqt/qtkde gui? # possible flags: test, print but don't actual do anything import re, feedparser, os, sys, shutil, time, getopt import urllib2 import urllib import md5 boz = "" HOME = os.path.expanduser("~") # user configurable #maxChecksPerDay = 8 #maxChecksPerDay = 12 maxChecksPerDay = 24 myTemp = '/tmp' #podDir = os.path.join(HOME, 'Audio/Podcasts') podDir = os.path.join(HOME, 'Podcasts') # end user configurable downDir = os.path.join(myTemp, 'Podcasts') dotDir = os.path.join(HOME, '.aGodCast') logFile = os.path.join(dotDir, 'log') #list of downloaded urls cacheDir = os.path.join(dotDir, 'cache') ignoreNotFound = False # if true, add files not found to log # list of feeds, ignore lines not beginning ^http feedList = os.path.join(dotDir, 'feeds.txt') def exitFunc(): #f.close() #log.close() if boz: print boz def makeDirs(*dirs): for dir in dirs: if not os.path.exists(dir): os.makedirs(dir) # render is used because feeds use a lot of html, not just plain text. def render(html): if html: html = re.sub('"', '\\"', html.encode('utf8')) #command = 'echo "' + html + '" | w3m -dump -T text/html' #command = 'echo "' + html + '" | html2text' command = 'echo "' + html + '" | lynx -dump -stdin -force_html' os.system(command) def localMD5(url): hash = md5.new(url).hexdigest() + '.xml' #unique name from url return os.path.join(cacheDir, hash) def cache(url): max = 60 * 60 * 24 / maxChecksPerDay #seconds myfile = localMD5(url) if os.path.isfile(myfile): elapsed = int(time.time()) - os.path.getmtime(myfile) if elapsed <= max: return print "FETCHING:", url + ' ...' urllib.urlretrieve(url, myfile) # handle half finish? def updateCache(feeds): l = [] print "updating local xml cache..." for feed in file(feeds, "r").read().split('\n'): if not re.match('^http://', feed): # feedList ignores anything but urls continue # TODO: handle whitespace, strip trailing cache(feed) l.append([localMD5(feed), feed]) print "cache up to date" return l def geturl(url): try: redir = urllib2.urlopen(url).geturl() except urllib2.HTTPError, e: if e.code != 404: print url print "geturl HTTPError:", e.code return e.code except urllib2.URLError, e: # (110, 'Connection timed out') print e.reason #print "geturl URLError:", e.code else: return redir return 0 def htmlTitle(mainTitle, subTitle): s = '