BASE_URL = 'http://sourceforge.net/export/' def get_url(group_id, info_type): if info_type == 'releases': filename = 'rss2_projfiles.php?' elif info_type == 'news': filename = 'rss2_projnews.php?rss_fulltext=1&' else: raise RuntimeError, "bad info_type: %s" % info_type return BASE_URL + filename + ('&group_id=%s' % group_id) def __main__(group_id, info_type, list_htp, *list_header): list_header = ' '.join(list_header) url = get_url(group_id, info_type) import urllib2 rssdata = urllib2.urlopen(url) data = make_simple_structure(rssdata) personalise_data(data, info_type) print_out_data(data, list_header, list_htp) def personalise_data(data, info_type): for item in data: if info_type == 'news': # Strip off the "x comments bit". this_text = item['data'] this_text = this_text[:this_text.rindex(' (')] # Remove the first sentence - this should just be a description # of what DOPAL is. this_text = this_text[this_text.index('.')+1:].strip() # Needs to be made generic. while this_text.startswith('
'): this_text = this_text[6:].strip() item['data'] = this_text elif info_type == 'releases': this_text = item['data'] def make_simple_structure(rssdata): try: import cElementTree as ET except ImportError: from elementtree import ElementTree as ET result = [] for index, item in enumerate(ET.parse(rssdata).findall('channel/item')): this_item = {} this_item['index'] = index this_item['title'] = item.find('title').text.encode('utf-8') this_item['data'] = item.find('description').text.encode('utf-8') this_item['link'] = item.find('link').text.encode('utf-8') this_item['date'] = item.find('pubDate').text.encode('utf-8') result.append(this_item) return result def print_out_data(data, list_header, htp_template): # Doesn't work if we include like this, for some reason. # # Instead, we just put the contents into the output. #print '' #print '' if list_header is not None: print '' % list_header for item in data: print '' % item print '' % item print '%(data)s' % item print '' % item print '' % item if item['date']: print '' % item tmpl = open(htp_template) print tmpl.read() tmpl.close() if __name__ == '__main__': import sys __main__(*sys.argv[1:])