# -*- coding: iso-8859-1 -*- # ----------------------------------------------------------------------- # helpers/fxdallocine.py - class and helpers for fxd/allocine generation # ----------------------------------------------------------------------- # $Id: fxdallocine.py,v 1.7 2004/07/10 12:33:42 dischi Exp $ # # Todo: # - add support making fxds without allocine (or documenting it) # - webradio support? # # ----------------------------------------------------------------------- # $Log: fxdallocine.py,v $ # # ----------------------------------------------------------------------- # Freevo - A Home Theater PC framework # Copyright (C) 2003 Krister Lagerstrom, et al. # Please see the file freevo/Docs/CREDITS for a complete list of authors. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MER- # CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ----------------------------------------------------------------------- */ # python has no data hiding, but this is the intended use... # subroutines completly in lowercase are regarded as more "private" functions # subRoutines are regarded as public # Modifications by Arthur Lutz (10/2004) #some data __author__ = "" __version__ = "Revision 1.0" __copyright__ = "Copyright (C) 2004" __license__ = "GPL" #Module Imports import re import urllib, urllib2, urlparse import sys import codecs import os freevo_version = '1.5' # headers for urllib2 txdata = None txheaders = { 'User-Agent': 'freevo %s (%s)' % (freevo_version, sys.platform), 'Accept-Language': 'fr-fr', } #Begin class class FxdAllocine: """Class for creating fxd files and fetching allocine information""" def __init__(self): self.allocine_id_list = [] self.allocine_id = None self.isdiscset = False self.title = '' self.info = {} self.url = '' self.image = None # full path image filename self.image_urls = [] # possible image url list self.image_url = None # final image url self.fxdfile = None # filename, full path, WITHOUT extension self.append = False self.device = None self.regexp = None self.mpl_global_opt = None self.media_id = None self.file_opts = [] self.video = [] self.variant = [] self.parts = [] self.var_mplopt = [] self.var_names = [] #image_url_handler stuff self.image_url_handler = {} def searchAllocine(self, name, type): """name (string), returns id list Search for name and returns an id list with tuples: (id , name, year, type)""" regexp_tag = re.compile('<[^>]+>', re.I) url = 'http://www.allocine.fr/recherche/?motcle=%s&rub=%s' % (urllib.quote(name), type) req = urllib2.Request(url, txdata, txheaders) searchstring = name log(LOG_DEBUG,'url= %s' % url) try: response = urllib2.urlopen(req) except urllib2.HTTPError, error: raise FxdAllocine_Net_Error("ALLOCINE unreachable : " + error) return None regexp_idmovie = re.compile('.*/film/fichefilm_gen_cfilm=([0-9]+)\.html" class="link1">(.*).*', re.I | re.S) regexp_iddate = re.compile('.*^

"): #print "[[%s]]" % block m = regexp_idmovie.match(block) if m: #print "Found film in block : %s" % block d = regexp_iddate.match(block) if d: date = d.group(1) else: date = 'Unknown' #print "Found film id : %s - %s - %s" % (m.group(1), re.sub(regexp_tag, '', m.group(2)), date) self.allocine_id_list += [ ( m.group(1), re.sub(regexp_tag, '', m.group(2)), date, 'Film' ) ] #print self.allocine_id_list return self.allocine_id_list def setAllocineId(self, id): """id (number) Set an allocine number for object, and fetch data""" self.allocine_id = id #print "Now trying to get %s" % self.allocine_id self.url = 'http://www.allocine.fr/film/fichefilm_gen_cfilm=%s.html' % id req = urllib2.Request(self.url, txdata, txheaders) try: idpage = urllib2.urlopen(req) except urllib2.HTTPError, error: raise FxdAllocine_Net_Error("ALLOCINE unreachable" + error) return None #print "Response : %s" % idpage.read() self.parsedata(idpage, id) idpage.close() def setAllocineListings(self, id, postcode): self.url = 'http://www.allocine.fr/seance/filmcp.html?cprojection=%s&codepostal=%s' % (id, postcode) req = urllib2.Request(self.url, txdata, txheaders) try: idpage = urllib2.urlopen(req) except urllib2.HTTPError, error: raise FxdAllocine_Net_Error("ALLOCINE unreachable" + error) return None #print "Response : %s" % idpage.read() self.listings = self.parsedatalistings(idpage) idpage.close() def setAllocineNearYou(self, postcode, day = 0, version = ''): """ Fetch data about films and listing at the proximity of a postcode for a given day or week day = 0 : current week day = [1-7] : 1 - wednesday to 7 - tuesday version = 0 : all version VO = 6002 version VF = 6001 """ version_id = {'' : 0, 'VF' : 6001, 'VO' : 6002} url = "http://www.allocine.fr/seance/salleproche.html?" \ "codepostal=%s&j=%s&version=0" % postcode, day, version_id[version.upper()] req = urllib2.Request(self.url, txdata, txheaders) try: idpage = urllib2.urlopen(req) except urllib2.HTTPError, error: raise FxdAllocine_Net_Error("ALLOCINE unreachable" + error) return None #print "Response : %s" % idpage.read() self.listings = self.parsedatanearview(idpage) idpage.close() def setFxdFile(self, fxdfilename = None, overwrite = False): """ setFxdFile (string, full path) Set fxd file to write to, may be omitted, may be an existing file (data will be added) unless overwrite = True """ if fxdfilename: if vfs.splitext(fxdfilename)[1] == '.fxd': self.fxdfile = vfs.splitext(fxdfilename)[0] else: self.fxdfile = fxdfilename else: if self.isdiscset == True: self.fxdfile = vfs.join(config.OVERLAY_DIR, 'disc-set', self.getmedia_id(self.device)) else: self.fxdfile = vfs.splitext(file)[0] if overwrite == False: try: vfs.open(self.fxdfile + '.fxd') self.append = True except: pass else: self.append = False # XXX: add this back in without using parseMovieFile # if self.append == True and \ # parseMovieFile(self.fxdfile + '.fxd', None, []) == []: # raise FxdAllocine_XML_Error("FXD file to be updated is invalid, please correct it.") if not vfs.isdir(vfs.dirname(self.fxdfile)): if vfs.dirname(self.fxdfile): os.makedirs(vfs.dirname(self.fxdfile)) def setVideo(self, *videos, **mplayer_opt): """ videos (tuple (type, id-ref, device, mplayer-opts, file/param) (multiple allowed), global_mplayer_opts Set media file(s) for fxd """ if self.isdiscset == True: raise FxdAllocine_XML_Error(" already used, can't use both "+ " and ") if videos: for video in videos: self.video += [ video ] if mplayer_opt and 'mplayer_opt' in mpl_global_opt: self.mpl_global_opt = mplayer_opt['mplayer_opt'] def setVariants(self, *parts, **mplayer_opt): """ variants/parts (tuple (name, ref, mpl_opts, sub, s_dev, audio, a_dev)), var_mplayer_opts Set Variants & parts """ if self.isdiscset == True: raise FxdAllocine_XML_Error(" already used, can't use both "+ " and ") if mplayer_opt and 'mplayer_opt' in mpl_global_opt: self.varmpl_opt = (mplayer_opt['mplayer_opt']) for part in parts: self.variant += [ part ] def writeFxd(self): """Write fxd file""" #if fxdfile is empty, set it yourself if not self.fxdfile: self.setFxdFile() try: #should we add to an existing file? if self.append == True : if self.isdiscset == True: self.update_discset() else: self.update_movie() else: #fetch images self.fetch_image() #should we write a disc-set ? if self.isdiscset == True: self.write_discset() else: self.write_movie() #check fxd # XXX: add this back in without using parseMovieFile # if parseMovieFile(self.fxdfile + '.fxd', None, []) == []: # raise FxdImdb_XML_Error("""FXD file generated is invalid, please "+ # "post bugreport, tracebacks and fxd file.""") except (IOError, FxdAllocine_IO_Error), error: raise FxdAllocine_IO_Error('error saving the file: %s' % str(error)) def setDiscset(self, device, regexp, *file_opts, **mpl_global_opt): """ device (string), regexp (string), file_opts (tuple (mplayer-opts,file)), mpl_global_opt (string) Set media is dvd/vcd, """ if len(self.video) != 0 or len(self.variant) != 0: raise FxdAllocine_XML_Error(" already used, can't use both "+ " and ") self.isdiscset = True if (not device and not regexp) or (device and regexp): raise FxdAllocine_XML_Error("Can't use both media-id and regexp") self.device = device self.regexp = regexp for opts in file_opts: self.file_opts += [ opts ] if mpl_global_opt and 'mplayer_opt' in mpl_global_opt: self.mpl_global_opt = (mpl_global_opt['mplayer_opt']) def isDiscset(self): """Check if fxd file describes a disc-set, returns 1 for true, 0 for false None for invalid file""" try: file = vfs.open(self.fxdfile + '.fxd') except IOError: return None content = file.read() file.close() if content.find('') != -1: return 1 return 0 #------ private functions below ..... def write_discset(self): """Write a to a fresh file""" try: i = vfs.codecs_open( (self.fxdfile + '.fxd') , 'wb', encoding='utf-8') except IOError, error: raise FxdAllocine_IO_Error("Writing FXD file failed : " + str(error)) return #header i.write("\n\n") i.write(" \n" + " The information in this file are from the Internet " + "Movie Database (Allocine).\n" + " Please visit http://www.allocine.fr for more informations.\n") i.write(" \n" % self.allocine_id + " \n") #disc-set i.write(" \n" % self.str2XML(self.title)) #disc i.write(" " % self.str2XML(self.mpl_global_opt)) else: i.write(">") #file-opts if self.file_opts: i.write("\n") for opts in self.file_opts: mplopts, fname = opts i.write(" " % self.str2XML(mplopts)) i.write("%s\n" % self.str2XML(fname)) i.write(" \n") else: i.write(" \n") #image if self.image: i.write(" " % self.str2XML(self.image_url)) i.write("%s\n" % self.str2XML(self.image)) #print info i.write(self.print_info()) #close tags i.write(" \n") i.write("\n") util.touch(os.path.join(config.FREEVO_CACHEDIR, 'freevo-rebuild-database')) def write_movie(self): """Write to fxd file""" try: i = vfs.codecs_open( (self.fxdfile + '.fxd') , 'w', encoding='utf-8') except IOError, error: raise FxdAllocine_IO_Error("Writing FXD file failed : " + str(error)) return #header i.write("\n\n") i.write(" \n" + " The information in this file are from the Internet " + "Movie Database (Allocine).\n" + " Please visit http://www.allocine.fr for more informations.\n") i.write(" \n" % self.allocine_id + " \n") # write movie i.write(" \n" % self.str2XML(self.title)) #image if self.image: i.write(" " % self.str2XML(self.image_url)) i.write("%s\n" % self.str2XML(self.image)) #video if self.mpl_global_opt: i.write(" \n') i.write('\n') util.touch(os.path.join(config.FREEVO_CACHEDIR, 'freevo-rebuild-database')) def update_movie(self): """Updates an existing file, adds exftra dvd|vcd|file and variant tags""" passedvid = False #read existing file in memory try: file = vfs.open(self.fxdfile + '.fxd') except IOError, error: raise FxdAllocine_IO_Error("Updating FXD file failed : " + str(error)) return content = file.read() file.close() if content.find('') == -1: raise FxdAllocine_XML_Error("FXD cannot be updated, doesn't contain ', re.I) file = vfs.open(self.fxdfile + '.fxd', 'w') for line in content.split('\n'): if passedvid == True and content.find('') == -1: #there is no variants tag if len(self.variant) != 0: file.write(' \n') file.write(self.print_variant()) file.write(' \n') file.write(line + '\n') passedvid = False elif regexp_video_end.match(line): if len(self.video) != 0: file.write(self.print_video()) file.write(line + '\n') passedvid = True elif regexp_variant_end.match(line): if len(self.variant) != 0: file.write(self.print_variant()) file.write(line + '\n') else: file.write(line + '\n') file.close() util.touch(os.path.join(config.FREEVO_CACHEDIR, 'freevo-rebuild-database')) def update_discset(self): """Updates an existing file, adds extra disc in discset""" #read existing file in memory try: file = vfs.open(self.fxdfile + '.fxd') except IOError, error: raise FxdAllocine_IO_Error("Updating FXD file failed : " + str(error)) return content = file.read() file.close() if content.find('') == -1: raise FxdAllocine_XML_Error("FXD file cannot be updated, doesn't contain ") regexp_discset_end = re.compile(' *', re.I) file = vfs.open(self.fxdfile + '.fxd', 'w') for line in content.split('\n'): if regexp_discset_end.match(line): file.write(" " % self.str2XML(self.mpl_global_opt)) else: file.write(">") #file-opts if self.file_opts: file.write("\n") for opts in self.file_opts: mplopts, fname = opts file.write(" " % \ self.str2XML(mplopts)) file.write("%s\n" % self.str2XML(fname)) file.write(" \n") else: file.write(" \n") file.write(line + '\n') else: file.write(line + '\n') file.close() util.touch(os.path.join(config.FREEVO_CACHEDIR, 'freevo-rebuild-database')) def parsedata(self, results, id=0): """results (allocine html page), allocine_id Returns tuple of (title, info(dict), image_urls)""" dvd = 0 inside_plot = None regexp_title = re.compile('.*(.*).*', re.I) regexp_year = re.compile('.*

Année de production : ([0-9]+)'+ '

.*', re.I) regexp_pic = re.compile('.*'+ 'Genre : (.*)

.*',re.I) regexp_runtime = re.compile('.*

Durée : (.*)min\.

.*', re.I) regexp_rating1 = re.compile('.*Presse.*/etoile_([0-4])\.gif.*', re.I) regexp_rating2 = re.compile('.*Spectateurs.*/etoile_([0-4])'+ '\.gif.*', re.I) regexp_plotfull = re.compile('^.*
'+ '

(.*)

.*$', re.I) regexp_plotstart = re.compile('^.*
'+ '

(.*)$', re.I) regexp_plotend = re.compile('(.*)

.*', re.I) regexp_projection = re.compile('.*.*', re.I) self.info['rating'] = '' plotfound = 0 plotfull = 0 plotcomplete = 0 for line in results.read().split("\n"): m = regexp_title.match(line) if m: self.title = m.group(1) #print "Title found : %s" % self.title m = regexp_pic.match(line) if m: self.image_urls += [ m.group(1) ] #print "Image URL 1 : %s" % m.group(1) m = regexp_year.match(line) if m: self.info['year'] = m.group(1) #print "Year found : %s" % self.info['year'] m = regexp_genre.match(line) if m: self.info['genre'] = m.group(1) #print "Genre found : %s" % self.info['genre'] m = regexp_runtime.match(line) if m: self.info['runtime'] = m.group(1) #print "Runtime found : %s" % self.info['runtime'] m = regexp_rating1.match(line) if m: self.info['rating'] += 'Presse: ' + m.group(1) + '/4 ' #print "Rating1 found : %s" % self.info['rating'] m = regexp_rating2.match(line) if m: self.info['rating'] += 'Public: ' + m.group(1) + '/4 ' #print "Rating2 found : %s" % self.info['rating'] m = regexp_projection.match(line) if m: self.info['cprojection'] = m.group(1) m = regexp_plotfull.match(line) if m: plotfull = 1 plotcomplete = 1 plotfound = 0 self.info['plot'] = m.group(1) #print "Synopsis full : %s" % self.info['plot'] else: m = regexp_plotstart.match(line) if m: plotfound = 1 self.info['plot'] = m.group(1) #print "Plot start : %s" % self.info['plot'] continue if plotfound == 1: m = regexp_plotend.match(line) if m: plotfound = 0 plotcomplete = 1 self.info['plot'] += m.group(1) #print "Synopsis End : %s" % self.info['plot'] else: self.info['plot'] += line continue # Clean the plot info removing all xml elements if plotcomplete: plot = re.sub('<[^>]+>', '', self.info['plot']) self.info['plot'] = plot if not id: return (self.title, self.info, self.image_urls) return (self.title, self.info, self.image_urls) def parsedatalistings(self, results): """results (allocine html page), Returns tuple of (title, info(dict), image_urls)""" dvd = 0 inside_plot = None listings = [] regexp_begin_cinema = re.compile('.*

(.*)

.*', re.I) #id_cinema, nom_cinema regexp_address_begin = re.compile('^.*

(.*)') # adresse_cinema regexp_address_end = re.compile('(.*)

.*') regexp_vo_times = re.compile('

en (.*) à (.*)

') # VO/VF, horaires* in_address = False in_times = False item = {} for line in results.read().split("\n"): m = regexp_begin_cinema.match(line) if m: if item != {}: listings.append(item) item = {} item['eid'] = m.group(1) item['url'] = 'http://www.allocine.fr/seance/salle_gen_csalle=C%s.html' % m.group(1) item['title'] = m.group(2) # FIXME XXX - not working m = regexp_address_begin.match(line) m_end = regexp_address_end.match(line) if m: in_address = True item['address'] = m.group(1) elif m_end and in_address: item['address'] += m.group(1) in_address = False elif in_address: item['address'] += line m = regexp_vo_times.match(line) if m: item['version'] = m.group(1) item['times'] = re.sub('<[^>]+>', '', m.group(2)).split(' | ') listings.append(item) return listings def parsedatanearview(self, results): # TODO dvd = 0 inside_plot = None listings = [] regexp_begin_cinema = re.compile('.*

(.*)

.*', re.I) #id_cinema, nom_cinema regexp_address_begin = re.compile('^.*

(.*)') # adresse_cinema regexp_address_end = re.compile('(.*)

.*') regexp_vo_times = re.compile('

en (.*) à (.*)

') # VO/VF, horaires* in_address = False in_times = False item = {} for line in results.read().split("\n"): m = regexp_begin_cinema.match(line) if m: if item != {}: listings.append(item) item = {} item['eid'] = m.group(1) item['url'] = 'http://www.allocine.fr/seance/salle_gen_csalle=C%s.html' % m.group(1) item['title'] = m.group(2) m = regexp_address_begin.match(line) m_end = regexp_address_end.match(line) if m: in_address = True item['address'] = m.group(1) elif m_end and in_address: item['address'] += m.group(1) in_address = False elif in_address: item['address'] += line m = regexp_vo_times.match(line) if m: item['version'] = m.group(1) item['times'] = re.sub('<[^>]+>', '', m.group(2)).split(' | ') listings.append(item) return listings def fetch_image(self): """Fetch the best image""" image_len = 0 if (len(self.image_urls) == 0): # No images return for image in self.image_urls: try: # get sizes of images req = urllib2.Request(image, txdata, txheaders) r = urllib2.urlopen(req) length = int(r.info()['Content-Length']) r.close() if length > image_len: image_len = length self.image_url = image except: pass if not self.image_url: print "Image dowloading failed" return self.image = (self.fxdfile + '.jpg') req = urllib2.Request(self.image_url, txdata, txheaders) r = urllib2.urlopen(req) i = vfs.open(self.image, 'w') i.write(r.read()) i.close() r.close() # try to crop the image to avoid borders by allocine try: import Image image = Image.open(filename) width, height = image.size image.crop((2,2,width-4, height-4)).save(filename) except: pass self.image = vfs.basename(self.image) print "Downloaded cover image from %s" % self.image_url print "Freevo knows nothing about the copyright of this image, please" print "go to %s to check for more informations about private." % self.image_url print "use of this image" def str2XML(self, line): """return a valid XML string""" try: s = Unicode(line) # First start to replace nbsp entity if found s = re.sub(' ', ' ', s) while s[-1] == u' ': s = s[:-1] if s[:4] == u'"': s = s[5:] if s[-4:] == u'#34;': s = s[:-5] # replace all & to & ... s = s.replace(u"&", u"&") # ... but this may be wrong for &# s = s.replace(u"&#", u"&#") return s except: return Unicode(line) def getmedia_id(self, drive): """drive (device string) return a unique identifier for the disc""" if not vfs.exists(drive): return drive return cdrom_disc_id(drive)[1] def print_info(self): """return info part for FXD writing""" ret = u'' if self.info: ret = u' \n' for k in self.info.keys(): ret += u' <%s>' % k + Unicode(self.str2XML(self.info[k])) + '\n' % k ret += u' \n' return ret def print_video(self): """return info part for FXD writing""" ret = '' for vid in self.video: type, idref, device, mpl_opts, fname = vid ret += ' <%s' % self.str2XML(type) ret += ' id=\"%s\"' % self.str2XML(idref) if device: ret += ' media-id=\"%s\"' % self.str2XML(self.getmedia_id(device)) if mpl_opts: ret += ' mplayer-options=\"%s\">' % self.str2XML(mpl_opts) else: ret += '>' ret += '%s' % self.str2XML(fname) ret += '\n' % self.str2XML(type) return ret def print_variant(self): """return info part for FXD writing""" ret = '' for x in range(len(self.variant)): name, idref, mpl_opts, sub, s_dev, audio, a_dev = self.variant[x] ret += '