# Copyright (c) 2004-5 Marek Hnilica. All rights reserved. # Module that searches for information in Windows Media Audio files. # Distributed under GPL version 2, or (at your option) later # I don't have read any single document about wma file format and about the # method how are tags stored in it. I probably have lowered ability of searching # on Internet.. Result of night with hexa-editor analyzing few WMA files is # presented here. Documents or (better...) patches would be greatly appreciated. # TODO: Get information about total time of the track import string,os.path class TagError: def __init__(self, err): self.err=err def __str__(self): return self.err def null_rem(text): text=string.replace(text,'\x00','') return text def Tag(filename): if not os.path.exists(filename): raise TagError,'File %s does not exist.' % filename null=chr(0) [AlbumString,YearString,GenreString,TrackString,kbpsString]=['','','','',''] for i in list('WM/AlbumTitle'): AlbumString+=i+chr(0) AlbumString=AlbumString[:-1] for i in list('WM/Year'): YearString+=i+chr(0) YearString=YearString[:-1] for i in list('WM/Genre'): GenreString+=i+chr(0) GenreString=GenreString[:-1] for i in list('WM/TrackNumber'): TrackString+=i+chr(0) TrackString=TrackString[:-1] for i in list('kbps'): kbpsString+=i+chr(0) kbpsString=kbpsString[:-1] # Following string is, as it looks, very important - it somewhat # introduces tag information. It's contained in file either twice or # three-times (or more, but it's not useful for us): # Twice: In this case there is no frame in the file about Title/Artist/Comment # Thrice: Title/Artist/Comment frames are used # It appears for first at the very beginning of the file - byte 13 (0x0d) # Position of second occurence depends on many factors like padding, etc. # If there is a third occurence, then (3?) bytes after second appearance # signifies, how long the tag is. Bytes after the third # occurence indicates how long the audia data is. For reasons, of which I cannot # explain any single one, the final size of data is that number minus 16 (0x10) # 1 M bytes to analyze should be more than enough. TagString=chr(0x62)+chr(0xce)+chr(0x6c) try: WmaFile=open(filename,'rb') except: raise TagError,'Error opening file %s' %filename if os.path.getsize(filename) >= 1000000: buff=WmaFile.read(1000000) else: buff=WmaFile.read() AlbumPosition=string.find(buff,AlbumString) if AlbumPosition>0: AlbumSize=ord(buff[(AlbumPosition+len(AlbumString)+5)])+(ord(buff[(AlbumPosition+len(AlbumString)+6)])*0x100) Album=buff[AlbumPosition+len(AlbumString)+7:AlbumPosition+len(AlbumString)+7+AlbumSize] else: Album='' YearPosition=string.find(buff,YearString) if YearPosition>0: YearSize=ord(buff[(YearPosition+len(YearString)+5)])+(ord(buff[(YearPosition+len(YearString)+6)])*0x100) Year=buff[YearPosition+len(YearString)+7:YearPosition+len(YearString)+7+YearSize] else: Year='' GenrePosition=string.find(buff,GenreString) if GenrePosition>0: GenreSize=ord(buff[(GenrePosition+len(GenreString)+5)])+(ord(buff[(GenrePosition+len(GenreString)+6)])*0x100) Genre=buff[GenrePosition+len(GenreString)+7:GenrePosition+len(GenreString)+7+GenreSize] else: Genre='' TrackPosition=string.find(buff,TrackString) if TrackPosition>0: TrackSize=ord(buff[(TrackPosition+len(TrackString)+5)])+(ord(buff[(TrackPosition+len(TrackString)+6)])*0x100) Track=buff[TrackPosition+len(TrackString)+7:TrackPosition+len(TrackString)+7+TrackSize] if buff[TrackPosition+len(TrackString)+3]!=null: FirstPos=TrackPosition+len(TrackString)+7 Track=str(ord(buff[FirstPos])+ord(buff[FirstPos+1])*0x100+ord(buff[FirstPos+2])*0x10000) else: Track='' kbpsPosition=string.find(buff,kbpsString) if kbpsPosition>0: Bitrate=buff[kbpsPosition-9:kbpsPosition-3] else: Bitrate='' Bitrate=null_rem(Bitrate) Album=null_rem(Album) Year=null_rem(Year) Genre=null_rem(Genre) Track=null_rem(Track) WmaFile.close() if string.count(buff,TagString) >= 3: buff=buff[16:] TagStartPosition=string.find(buff,TagString) # If it's more than 3 bytes - sorry; It shouldn't be a big defect since three # bytes are sufficient enough for about 16 megs of tag...moreover, there # are five frames in this order: 1) Title 2) Artist 3) Copyright 4) Comment # 5) something mysterious :o/ it might be a flag or something; all ot them # are described by two bytes, so maximum we can have is FFFF*5=4FFFFB. StringSizeBytes=buff[TagStartPosition+3:TagStartPosition+7] TagSize=ord(StringSizeBytes[0])+(ord(StringSizeBytes[1])*0x100)+(ord(StringSizeBytes[2])*0x10000) try: Tag=buff[TagStartPosition+11:TagStartPosition+3+TagSize] except: raise TagError,"Exception during getting information from %s" %filename TitleSize=ord(Tag[0])+(ord(Tag[1])*0x100) ArtistSize=ord(Tag[2])+(ord(Tag[3])*0x100) CopySize=ord(Tag[4])+(ord(Tag[5])*0x100) CommentSize=ord(Tag[6])+(ord(Tag[7])*0x100) Title=Tag[10:TitleSize+10] Artist=Tag[10+TitleSize:10+TitleSize+ArtistSize] Comment=Tag[10+TitleSize+ArtistSize+CopySize:10+TitleSize+ArtistSize+CopySize+CommentSize] else: Title='' Artist='' Comment='' Title=null_rem(Title) Artist=null_rem(Artist) Comment=null_rem(Comment) try: Bitrate=int(Bitrate) except: Bitrate=None return (Title,Album,Artist,Year,Genre,Track,Comment,Bitrate)