# This file is part of pybliographer
# 
# Copyright (C) 1998,1999,2000 Frederic GOBRY
# Email : gobry@idiap.ch
# 	   
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2 
# of the License, or (at your option) any later version.
#   
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details. 
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
# 
# $Id: Fields.py,v 1.8.2.3 2001/12/05 13:39:39 fredgo Exp $

from Pyblio import Key, Exceptions

import string, types, re, string, recode, urlparse, os, gettext, time

import copy, re
_ = gettext.gettext

year_match = re.compile ('(\d\d\d\d)')

formatter_cache = {}

def get_formatter (format):
    ''' This function is used to get a recode formatter in an
    efficient way '''
    
    format = string.lower (format)
    
    if formatter_cache.has_key (format):
        ft = formatter_cache [format]
    else:
        ft = recode.recode ('latin1..' + format)
        formatter_cache [format] = ft

    return ft


class Author:
    ''' Fine description of an author '''

    def __init__ (self, copy = None, strict = 0):
        ''' Initialize an author from a string or an other Author '''

        if type (copy) is types.StringType:
            # manually split the author into subfields.
            self.honorific = None
            self.lineage   = None

            blocs = string.split (copy, ',')
            
            if len (blocs) == 1:
                if strict:
                    # strict parsing, the whole block is the last name
                    self.last  = blocs [0]
                    self.first = None
                else:
                    # lazy parsing, last name is after lowercase or is last word
                    words = map (string.strip, string.split (blocs [0]))
                    i = 0
                    while i < len (words) - 1:
                        if words [i] == string.lower (words [i]): break
                        i = i + 1
                    
                    self.first = string.join (words [:i], ' ')
                    self.last  = string.join (words [i:], ' ')
                    
            elif len (blocs) == 2:
                self.last  = string.strip (blocs [0])
                self.first = string.strip (blocs [1])
                
            elif len (blocs) == 3:
                self.last    = string.strip (blocs [0])
                self.lineage = string.strip (blocs [1])
                self.first   = string.strip (blocs [2])

            else:
                self.last  = copy
                self.first = None

            # cleanup
            if self.last == '':    self.last    = None
            if self.first == '':   self.first   = None
            if self.lineage == '': self.lineage = None
                        
        else:
            if copy:
                def clean_entry (f):
                    if f is not None:
                        f = string.strip (f)
                        if f == '': f = None

                    return f
                
                copy = map (clean_entry, copy)

                self.honorific = copy [0]
                self.first     = copy [1]
                self.last      = copy [2]
                self.lineage   = copy [3]
            else:
                self.honorific = None
                self.first     = None
                self.last      = None
                self.lineage   = None

        self.text = None
        return


    def format (self, fmt = 'latin1'):
        ''' Returns the fields in a given format '''
        
        ft = get_formatter (fmt)
        
        return (ft (self.honorific), ft (self.first),
                ft (self.last), ft (self.lineage))

    
    def __str__ (self):
        ''' Returns textual representation '''

        if not self.text:
            text = ''
            if self.honorific: text = text + ' ' + self.honorific
            if self.last:      text = text + ' ' + self.last
            if self.lineage:   text = text + ', ' + self.lineage
            if self.first:     text = text + ', ' + self.first
            self.text = text [1:]
            
        return self.text


    def __repr__ (self):
        return 'Author ((%s, %s, %s, %s))' % (`self.honorific`, `self.first`,
                                              `self.last`, `self.lineage`)


    def match (self, regex):
        ''' '''
        return regex.search (str (self))


    def initials (self, format = None):
        ''' Extract initials from a first name '''

        total = []

        if self.first is None: return None
        
        for atom in string.split (self.first, ' '):
            list = []
            
            for word in string.split (atom, '-'):
                list.append (word [0] + '.')
                
            total.append (string.join (list, '-'))
            
        text = string.join (total, ' ')
        if format:
            ft = get_formatter (format)
            text = ft (text)

        return text

    def __cmp__ (self, other):
        ''' field comparison '''
        
        r = cmp (self.last, other.last)
        if r != 0: return r

        r = cmp (self.first, other.first)
        if r != 0: return r

        r = cmp (self.lineage, other.lineage)
        if r != 0: return r
        
        r = cmp (self.honorific, other.honorific)
        if r != 0: return r

        return 0


class AuthorGroup:
    ''' A group of Authors '''

    def __init__ (self, text = ''):
        self.authors = []
        return

    def __getitem__ (self, pos):
        return self.authors [pos]

    def __setitem__ (self, pos, val):
        self.authors [pos] = val
        self.val.touch ()
        return

    def __len__ (self):
        return len (self.authors)

    def append (self, value):
        self.authors.append (value)
        
    def __str__ (self):
        return string.join (map (str, self.authors), '; ')

    def __repr__ (self):
        return `self.authors`

    def match (self, regex):
        return regex.search (string.join (map (str, self.authors), ' '))

    def __cmp__ (self, other):
        i = 0
        try:
            s = len (self), len (other)
        except TypeError:
            return 1
        
        m = max (s)
        
        while i < m:
            if i >= s [0]: return -1
            if i >= s [1]: return +1

            r = cmp (self [i], other [i])
            if r != 0: return r

            i = i + 1

        return 0
            

class Date:
    ''' Fine description of a date '''

    def __init__ (self, arg = (None, None, None)):

        if type (arg) is types.StringType:
            try:
                year  = int (arg)
            except ValueError:
                g = year_match.search (arg)
                if g:
                    year = int (g.group (1))
                else:
                    year = None
                
            month = None
            day   = None
        else:
            year, month, day = arg
        
        if year and year < 0:
            raise Exceptions.DateError (_("Illegal year value"))
        self.year = year
        
        if month and (month < 1 or month > 12):
            raise Exceptions.DateError (_("Illegal month value"))
        self.month = month
        
        if day and (day < 1 or day > 31):
            raise Exceptions.DateError (_("Illegal day value"))
        self.day = day

        self.text = None
        return


    def __cmp__ (self, other):

        s = self.year  or -1
        o = other.year or -1
        
        diff = s - o
        if diff: return diff

        s = self.month  or -1
        o = other.month or -1
        
        diff = s - o
        if diff: return diff

        s = self.day  or -1
        o = other.day or -1
        
        return s - o


    def __str__ (self):
        ''' Returns textual representation '''

        if not self.text:
            if self.year and self.month and self.day:
                self.text = '%d/%d/%d' % (self.day, self.month, self.year)

            elif self.year and self.month:
                self.text = '%d/%d' % (self.month, self.year)
            
            elif self.year:
                self.text = str (self.year)

            else:
                self.text = ''
                
        return self.text


    def format (self, fmt = 'latin1'):
        ''' Returns the fields in a given format '''
        ft = get_formatter (fmt)

        if self.year:
            year = ft (str (self.year))
        else: year = None

        if self.month:
            month = ft (str (self.month))
        else: month = None

        if self.day:
            day = ft (str (self.day))
        else: day = None

        return year, month, day

    
    def __repr__ (self):
        return 'Date (\'%s\')' % str (self)


    def match (self, regex):
        ''' '''
        return regex.search (str (self))
        

class Text:
    ''' This class holds all the other fields (not an Author or a Date) '''

    def __init__ (self, text):
        self.text = text
        return


    def __str__ (self):
        return str (self.text)


    def __repr__ (self):
        return 'Text (%s)' % `self.text`


    def match (self, regex):
        '''   '''
        return regex.search (self.text)


    def __cmp__ (self, other):
        return cmp (self.text, str (other))


    def format (self, fmt = 'latin1'):
        ''' Returns the fields in a given format '''
        ft = get_formatter (fmt)

        return ft (self.text)


class URL:
    ''' Holder for URL data (for example, the location of a database) '''

    def __init__ (self, url):

        if type (url) is types.StringType:
            url = string.strip (url)
            url = list (urlparse.urlparse (url))

        if url [0] == '':
            # Consider we handle a local file
            url [0] = 'file'
            url [2] = os.path.expanduser (url [2])
            
            if not os.path.isabs (url [2]):
                url [2] = os.path.normpath (os.path.join (os.getcwd(), url [2]))

        self.url = tuple (url)
        return

    def match (self, regex):
        ''' '''
        return regex.search (str (self))
        
    def __cmp__ (self, other):
        return cmp (self.url, other.url)

    def __hash__ (self):
        return hash (str (self))

    def __str__ (self):
        return urlparse.urlunparse (self.url)
    

    def __repr__ (self):
        return 'URL (%s)' % `urlparse.urlunparse (self.url)`
    

class Reference:
    ''' Holder for a reference to a bibliographic entry (which can be
    a crossref, a link to related entries, ... '''

    def __init__ (self, keylist, database = None):
        
        if type (keylist) is types.StringType:
            self.list = map (lambda k, db = database: Key.Key (db, string.strip (k)),
                             string.split (keylist, ','))
        else:
            self.list = keylist
        return
    
    def __str__ (self):
        body = []
        # get the list of databases
        dbs = {}
        for refs in self.list:
            dbs [refs.base] = 1

        for db in dbs.keys ():
            keys = []
            for refs in self.list:
                if refs.base == db:
                    keys.append (refs.key)
            body.append ('(' + string.join (keys, ', ') + ') ' +
                         _("in %s") % db)
        
        return 'Reference on %s' % string.join (body, ', ')


    def __repr__ (self):
        return 'Reference (%s)' % `self.list`


    def match (self, regex):
        '''   '''
        for key in self.list:
            ret = regex.search (str (key))
            if ret: return ret
        return None


    def __cmp__ (self, other):
        return cmp (self.list, other.list)


    def format (self, fmt = 'latin1'):
        ''' Returns the fields in a given format '''
        ft = get_formatter (fmt)

        return map (ft, self.list)