import os import cPickle import conf from book import Book class PGDB: """PGDB file contains the list of PG books.""" def get(self): """Download PGDB.""" import urllib f = urllib.urlopen(conf.pgdb_url) data = f.read() fout = open(conf.pgdb_f, 'w') fout.write(data) fout.close() os.system("unzip -j %s -d /tmp" % conf.pgdb_f) def parse(self, file): """Parse PGDB file, return list of book objects.""" books = [] f = open(file) while 1: l = f.readline() if not l.strip() or l.startswith('#'): continue flds = l.split('\t') try: author = flds[1] title = flds[4] language = flds[6] fname_gen = flds[12] directory = flds[13] fname = flds[14] fsize = flds[15] f_unit = flds[16] except IndexError: # reserved for future use, don't add continue book = Book(author, title, language, fname_gen, directory, fname, fsize, f_unit.strip()) books.append(book) if flds[0].strip() == '1': # last entry, as the file has no EOF or EOL?? # f.readline() above would hang break return books def save(self, books): """Save parsed list of books to local pickled file.""" f = open(conf.booklist, 'w') cPickle.dump(books, f, 1) f.close() print "Booklist saved." # vim: sts=4:ts=8:et:sw=4