# This file is part of pybliographer
#  
# Original author of Ovid reader: Travis Oliphant <Oliphant.Travis@mayo.edu>
#
# Copyright (C) 1998,1999,2000 Frederic GOBRY
# Email : gobry@idiap.ch
# 	   
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2 
# of the License, or (at your option) any later version.
#   
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details. 
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# 
# $Id: OvidLike.py,v 1.2.2.4 2001/05/10 13:41:27 fredgo Exp $

""" Parser for files having an Ovid-like structure """

import sys, re, string

from Pyblio import Iterator, Base, Fields, Exceptions, Utils

SimpleField  = 0
AuthorField  = 1
SourceField  = 2
KeywordField = 3

separator_re = re.compile (r'<\d+>$')
source_re    = re.compile (r'(\w+)?\(([^\)]+)\):(\d+-\d+)')
compact_dot  = re.compile (r'\.(\s*\.)+')

long_month = {
    'Jan': 1,  'Feb': 2,  'Mar': 3,
    'Apr': 4,  'May': 5,  'Jun': 6,
    'Jul': 7,  'Aug': 8,  'Sep': 9,
    'Oct': 10, 'Nov': 11, 'Dec': 12
    }

# create list from month dict
month_name = range (0, 12)

for key in long_month.keys ():
    month_name [long_month [key] - 1] = key


class OvidLike (Iterator.Iterator):

    def __init__ (self, file, mapping, deftype):
        self.file    = file
        self.deftype = deftype
        self.mapping = mapping
        return

    def first (self):
        # rewind the file
        self.file.seek (0)

        # skip blank and <\d+> line
        while 1:
            line = self.file.readline ()
            if line == '': return None

            line = string.strip (line)
            if line == '': continue
            
            if separator_re.match (line): break
            
            raise Exceptions.ParserError (["bad file format"])
        
        return self.next ()

    
    def next (self):
        dict = {}
        
        # read entry till next blank line
        text  = ''
        field = ''
        while 1:
            line = self.file.readline ()
            if line == '': break

            line = string.rstrip (line)

            if line == '': continue
            
            # starting with a blank ?
            if line [0] == ' ':
                # ...then we continue the current text
                text = text + ' ' + string.lstrip (line)
                continue

            # new entry ?
            if separator_re.match (line): break

            # else, this is a new field
            if field:
                # save the previous one if needed
                dict [field] = text
                text = ''

            # store the name of this new field
            field = string.lower (line)

        # don't waste the last field content
        if field:
            dict [field] = text

        # did we parse a field ?
        if len (dict) == 0: return None
        
        # create the entry content
        entry = Base.Entry (type = self.deftype)

        for key in dict.keys ():
            if not self.mapping.has_key (key):
                print "warning: unused key `%s'" % key
                continue

            (name, type) = self.mapping [key]

            # parse a simple text field
            if type == SimpleField:
                entry [name] = Fields.Text (string.strip (dict [key]))
                continue

            if type == KeywordField:
                text = string.strip (dict [key])
                if entry.has_key (name):
                    text = str (entry [name]) + '  ' + text
                    
                entry [name] = Fields.Text (text)
                continue

            # parse an author field
            if type == AuthorField:
                ag = Fields.AuthorGroup ()

                for names in string.split (dict [key], '  '):
                    la = string.split (names)

                    last = la [0]
                    if len (la) > 1:
                        first = la [1]
                    else:
                        first = None

                    auth = Fields.Author ((None, first, last, None))
                    ag.append (auth)

                    # authors may be separated by just a single space if more
                    # than one line of authors appears in ovid file
                    if len (la) > 3:
                        last = la [-2]
                        first = la [-1]
                        auth = Fields.Author ((None, first, last, None))
                        ag.append (auth)

                entry [name] = ag
                continue

            # parse a source field
            if type == SourceField:
                # separate fields by ,
                fields = string.split(dict [key], ',')

                if len (fields) == 1:
                    print "warning: can't parse source"
                    continue

                journalName = string.strip(fields [0])
                # extract volume, number, pages, ...
                for i in range(1, len(fields)):
                    fs = string.strip(fields[i])
                    if fs[0:4] == 'vol.':
                        entry ['volume'] = Fields.Text (fs[4:])
                    elif fs[0:3] == "no.":
                        entry ['number'] = Fields.Text (fs[3:])
                    elif fs[0:3] == "pp.":
                        fss = string.split(fs,'.')
                        entry ['pages'] = Fields.Text(fss[1])
                        journalName = journalName + ","\
                                      + string.join(fss[2:], '.')
                        # the date field precedes pages
                        fss = string.split(fields[i-1])
                        # we have to work from the end since there may be
                        # characters unrelated to the date at the start of the
                        # field
                        try:
                            year = int(fss[-1])
                        except:
                            year  = None
                            print "warning: cannot parse year"
                            print "offending line:", dict[key]
                        try:
                            month = long_month [fss[-2][:3]]
                        except:
                            month = None
                        try:
                            day = int(fss[-3])
                        except:
                            day   = None
                        entry ['date'] = Fields.Date((year, month, day))
                    else:
                        # additional information we do not want to loose
                        journalName = journalName + ", " + fs
                        
                # the journal name and additional information
                entry [name [0]] = Fields.Text (journalName)
                continue

            raise TypeError, "unknown field type `%d'" % type
        
        return entry


def writer (iter, output, mapping):

    counter = 1
    entry   = iter.first ()

    while entry:
        output.write ('<%d>\n' % counter)
        counter = counter + 1

        for key in mapping.keys ():
            (name, type) = mapping [key]
            key = string.capwords (key)

            if type == SimpleField:
                if not entry.has_key (name): continue
                output.write (key + '\n')

                output.write (Utils.format (str (entry [name]),
                                            75, 2, 2) + '\n')
                continue

            if type == AuthorField:
                if not entry.has_key (name): continue
                output.write (key + '\n')

                auths = map (lambda auth: '%s %s' % (auth.last or '', auth.first or ''),
                             entry [name])

                output.write ('  ' + string.join (auths, '  ') + '\n')
                continue

            if type == SourceField:
                # do we have one of those fields ?
                if not (entry.has_key (name [0]) or
                        entry.has_key (name [1]) or
                        entry.has_key (name [2]) or
                        entry.has_key (name [3]) or
                        entry.has_key (name [4])): continue
                output.write (key + '\n')

                text = ''
                if entry.has_key (name [0]):
                    # put the title
                    text = text + str (entry [name [0]]) + '. '

                has_source = 0
                vals = ['', '', '']
                for i in range (0, 3):
                    if entry.has_key (name [i + 1]):
                        has_source = 1
                        vals [i] = str (entry [name [i + 1]])

                if has_source:
                    text = text + '%s(%s):%s' % tuple (vals)

                if entry.has_key (name [4]):
                    if has_source:
                        text = text + ', '

                    date = entry [name [4]]
                    text = text + str (date.year)
                    if date.month:
                        text = text + ' ' + month_name [date.month - 1]
                    if date.day:
                        text = text + ' ' + str (date.day)

                # final dot.
                if text: text = text + '.'

                # correct the number of dots...
                text = compact_dot.sub ('.', text)
                
                output.write (Utils.format (text,
                                            75, 2, 2) + '\n')
                
        entry = iter.next ()
        if entry: output.write ('\n')

    return