from email.Utils import formatdate import urllib import re import time import datetime TITLE = "Nemi" SITELINK = "http://www.dn.se/serier/nemi" data = urllib.urlopen("http://www.dn.se/serier/nemi").read() #matchExp = re.compile(r'/polopoly_fs/(.*?)!image/(.*?).gif_gen/derivatives/teaser-small/(.*?).gif') matchExp = re.compile(r'') matches = re.findall(matchExp, data) urls = [] for match in matches: urls.append("http://www.dn.se/polopoly_fs/%s!image/%s.gif" % match[0:2]) rssTemplate = """ {TITLE} {SITELINK} sv {PUBDATE} {PUBDATE} http://blogs.law.harvard.edu/tech/rss {ITEMS} """ itemTemplate = """ {TITLE} {LINK} ]]> {DATE} """ timestamp = time.time() items = "" for url in urls: title = str(datetime.date.fromtimestamp(timestamp)) date = formatdate(timestamp) item = itemTemplate.strip() item = item.replace("{LINK}", url) item = item.replace("{TITLE}", title) item = item.replace("{DATE}", date) items += item + "\n" timestamp -= 86400 output = rssTemplate.strip() output = output.replace("{TITLE}", TITLE) output = output.replace("{SITELINK}", SITELINK) output = output.replace("{PUBDATE}", formatdate(time.time())) output = output.replace("{ITEMS}", items) print output