package Search::OpenFTS::Index;
use strict;

=head1 NAME 

Search::OpenFTS::Index - Provides functions for indexing

=head1 SYNOPSIS

=head2 API

my $fts=Search::OpenFTS::Index->new( DBI ); 

my $fts=Search::OpenFTS::Index->new( DBI, prefix ); 

my $fts=Search::OpenFTS::Index->init(
    dbi=>DBI,
    txttid=>NAME_TXT_ID,
    dict=>[DICT1, DICT2, ...],
    parser=>PARSER,
    map=>'{IDTYPELEXEM1=>[IDDICT1, ...], ...}',
    tsvector_field=>FIELD_NAME, 
    ignore_id_index=>"IDTYPELEXEM1 [IDTYPELEXEM2 [...]]", 
    ignore_headline=>"IDTYPELEXEM1 [IDTYPELEXEM2 [...]]",
    prefix=>PREFIX
); 


This is the initialization function. It is called only once, at the creation of a new search index, to create the configuration and indexing tables.

=over 2

=item txttid

The table where the documents are stored together with its primary key 
(e.g. messages.msg_id)

=item dict

List of available dictionaries. Dictionaries should support three methods: 
lemms, is_stoplexem, drop and init. init is used for the initialization of 
the dictionary. lemms returns an array of lexems for a given word and 
is_stoplexem answers whether the given lexeme corresponds to a stop word or 
not. drop is used for clearing dictionaries tables (if any) 
while dropping OpenFTS instance. Methods is_stoplexem, drop and init are 
optional.

=item parser

The full name of the parser in use. Parser should have the same interface as 
Search::OpenFTS::Parser module.

=item map

A mapping from types of lexemes to dictionaries. This is helpful for optimizing
the search engine and it is also helpful for indexing multi-languages or 
exotic-text documents.

=item tsvector_field

The field name that holds the text index of integers for each document. 
This field must have tsvector type( from contrib/tsearch ) 

=item ignore_id_index

Type IDs of lexemes to ignore while indexing documents.

=item ignore_id_headline

Type IDs of lexemes to ignore while constructing headlines of the search 
results.

=item prefix

If more than one content tables require indexing and searching functionality 
the user can pass a special parameter named prefix which is a character 
value from a-z. The given prefix is used, as a naming convention, to create 
different instances of the configuration and indexing table.

To specify dictionary which requires parameters (snowball stemmer,
for example), use following syntax:

    dict=>[ 
# example how to use snowball stemmer
          { mod=>'Search::OpenFTS::Dict::Snowball', param=>'{lang=>"english"}' },
          'Search::OpenFTS::Dict::UnknownDict',
          ]

=back

=cut

use Search::OpenFTS::Base;
use vars qw(@ISA);
@ISA = ('Search::OpenFTS::Base');

sub new {
    my ( $class, $dbi, $prefix ) = @_;
    $class = ref($class) || $class;

    my $self =
      Search::OpenFTS::Base->new( $dbi, index => 1, prefix => $prefix );
    return undef unless defined $self;

    $self->{INIT} = 0;

    bless( $self, $class );
    return $self;
}

sub init {
    my ( $class, %opt ) = @_;
    $class = ref($class) || $class;
    local $_;
    my $dbi = $opt{dbi};
    die "There is no DBI object" if !ref $dbi;
    my $nameid = $opt{txttid};
    my $dict   = [];
    my $rv     = $opt{dict};
    my $parser = $opt{parser};
    $parser ||= 'Search::OpenFTS::Parser';
    my $prefix = $opt{prefix};
    $prefix ||= '';

    if ( length $prefix ) {
        $prefix =~ s/^([a-z]).*$/$1/ || die "Wrong prefix";
        $prefix .= '_';
    }

    if ( !length $opt{tsvector_field} ) {
        die "There is not tsvector_field option";
    }

    my $success = 1;

    #Creation of tables
    $success &&= $dbi->do(
        "create table ${prefix}fts_conf (
	name	varchar not null,
	did	int not null default -1,
	mod	varchar not null,
	param	varchar,
	primary key (name,did)
	);"
    );

    if ( defined $opt{map} && length $opt{map} )
    {    #Check syntax of map and save it
        my $map = eval $opt{map};
        die "Can't eval '$opt{map}': $@" if $@;
        $opt{map} =~ s/'/\\'/g;
        $success &&=
          $dbi->do(
"insert into ${prefix}fts_conf ( name, mod, param ) values ( 'map', '', '$opt{map}' );"
          );
    }

    {    #Check the syntax of mapping and insert it into the configuration table
        $parser = { mod => $parser, param => '' } if !ref $parser;
        my $prs_param = {};
        if ( exists $parser->{param} && length $parser->{param} ) {
            $prs_param = eval $parser->{param};
            die "Can't eval '$parser->{param}': $@" if $@;
        }
        eval( 'use ' . $parser->{mod} . ';' );
        die "Can't use parser: $@" if ($@);
        my $prs;
        eval { $prs = $parser->{mod}->new(%$prs_param); };
        die "Can't call new method in parser: $@" if ($@);
        $success &&=
          $dbi->do(
"insert into ${prefix}fts_conf ( name, mod, param ) values ( 'parser', '$parser->{mod}', '$parser->{param}' );"
          );
        undef $prs;
    }

    #Other configuration parameters used by Search::OpenFTS::Base
    $success &&=
      $dbi->do(
"insert into ${prefix}fts_conf ( name, mod ) values ( 'ignore_id_index', '"
          . join( ' ', @{ $opt{ignore_id_index} } )
          . "' );" )
      if ref $opt{ignore_id_index};
    $success &&=
      $dbi->do(
"insert into ${prefix}fts_conf ( name, mod ) values ( 'ignore_headline', '"
          . join( ' ', @{ $opt{ignore_headline} } )
          . "' );" )
      if ref $opt{ignore_headline};
    $success &&=
      $dbi->do(
"insert into ${prefix}fts_conf ( name, mod ) values ( 'tsvector_field', '$opt{tsvector_field}' );"
      );
    $success &&=
      $dbi->do(
"insert into ${prefix}fts_conf ( name, mod ) values ( 'txttid', '$nameid' );"
      );

    if ( !( ref $rv && $#{$rv} >= 0 ) ) {
        $rv = ['Search::OpenFTS::Dict::UnknownDict'];
    }

    @$rv = map { ( ref $_ ) ? $_ : { mod => $_ } } @$rv;

    foreach my $i ( 0 .. $#{$rv} ) {
        my $param = $rv->[$i]{param};
	$param ||= '';
        if ( length $param ) {
            $rv->[$i]{param} = eval $param;
            die "Can't eval '$param': $@" if $@;
        }
        $rv->[$i]{param}{DBI} = $dbi;

        eval( 'use ' . $rv->[$i]{mod} . ';' );
        die "Can't use $rv->[$i]{mod} module: $@" if ($@);
        eval { $dict->[$i] = $rv->[$i]{mod}->init( %{ $rv->[$i]{param} } ); };
        if ($@) {
            eval { $dict->[$i] = $rv->[$i]{mod}->new( %{ $rv->[$i]{param} } ); };
            die "Can' call new/init method in $rv->[$i]{mod}: $@" if ($@);
        }
        if ( !$dict->[$i]->can('lemms') ) {
            die "Dictionary $rv->[$i]{mod} has no method lemms";
        }
        $param =~ s/'/\\'/g;
        $success &&=
          $dbi->do(
"insert into ${prefix}fts_conf ( name, did, mod, param ) values ( 'dict', $i, '$rv->[$i]{mod}', '$param' );"
          );
    }
    undef $dict;    #destroy dictionaries

    die "Can't insert row(s) in ${prefix}fts_conf" if !$success;

    my $self = Search::OpenFTS::Index->new( $dbi, $prefix );
    die "Search::OpenFTS::Index->new() returns undef" unless defined $self;

    $self->{INIT} = 1;
    return $self;
}

sub DESTROY {
    my ($self) = @_;
    if ( $self->{INIT} ) {
        warn "Creating index...\n";
        if ( !$self->create_index ) {
            warn "Can't create index !!!\n";
        }
    }
}

=head2 Methods

=over 4

=item index( $txt_id, [ $FH | $text | $reftext ] );

=item index( $txt_id, [ $FH | $text | $reftext ], $title );

Used for indexing text.

=cut

sub index {
    my ( $self, $tid, $txt, $title ) = @_;
    local $_;

    # %struct = (
    #	lexem=>[ posisions ]
    # );
    #
    my ( $num, %struct );
    $num = 1;
    if ( defined $title && length $title ) {
        $num = $self->_addtxt( \%struct, \$title, $num, undef, 'A' );
    }
    $num += $self->_addtxt( \%struct, ( ref $txt ) ? $txt : \$txt, $num );

    return $self->_flush( \%struct, $tid, $num );
}

sub _flush {
    my ( $self, $rs, $tid, $num ) = @_;
    my $success = 1;
    local $_;
    my $orig;

    my $list = join(
        ' ',
        map {
            $orig = $_;
            $_ =~ s/([\r\t\n '\\:])/\\\\$1/g;
            "$_:" . join( ',', @{ $rs->{$orig} } )
          } keys %$rs
    );

    if ( defined $list && length $list ) {
        $success &&=
          $self->{DBI}->do(
"update $self->{TABLE} set $self->{TSVECTOR_FIELD} = '$list' where $self->{IDNAME} = $tid;"
          );
    } else {
        $success &&=
          $self->{DBI}->do(
"update $self->{TABLE} set $self->{TSVECTOR_FIELD} = null where $self->{IDNAME} = $tid;"
          );
    }

    return $success;
}

sub _addtxt {
    my ( $self, $rs, $rtxt, $num, $length, $wclass ) = @_;
    $num    ||= 1;
    $length ||= 0;
    $wclass ||= '';

    $self->{PRS}->start_parser( $rtxt, $length );

    my ( $type, $word, $lexems );
    while ( ( ( $type, $word ) = $self->{PRS}->get_word ) && $type ) {
        next if exists $self->{IGNOREID}{$type};
        $lexems = $self->_get_lexems( $type, $word );
        if ( defined $lexems && $#$lexems >= 0 ) {
            foreach my $l (@$lexems) {
                $rs->{$l} ||= [];
                push( @{ $rs->{$l} }, $num . $wclass );
            }
        }
        $num++;
    }

    $self->{PRS}->end_parser;

    return $num;
}

=pod

=item delete ( $txt_id ) 

Deletes all records of the given identifier.

=cut

sub delete {
    my ( $self, $tid ) = @_;
    my $success = 1;
    $success &&=
      $self->{DBI}->do(
"update $self->{TABLE} set $self->{TSVECTOR_FIELD} = null where $self->{IDNAME} = $tid;"
      );
    return $success;
}

=pod

=item create_index

=item create_index(1);

Creates indices for fast searching, non-zero option - verbose mode

=cut

sub create_index {
    my ( $self, $verbose ) = @_;
    my $success = 1;
    print "Creating index $self->{PREFIX}gist_key on $self->{TABLE}\n"
      if $verbose;
    $success &&=
      $self->{DBI}->do(
"CREATE INDEX $self->{PREFIX}gist_key on $self->{TABLE}  using gist ( $self->{TSVECTOR_FIELD} );"
      );
    $self->{INIT} = 0 if $success;
    return $success;
}

=pod

=item drop_index() 

Removes all indices on tables correspoding current instance of OpenFTS.
Any error are ignored, only warn. This method is opposite for create_index.
This is usefull for bulk uploading.

=cut

sub drop_index {
    my $self = shift;

    $self->{DBI}->do("drop index  $self->{PREFIX}gist_key;")
      || warn "Can't drop index $self->{PREFIX}gist_key";
}

=pod

=item drop() 

Removes all tables correspoding current instance of OpenFTS.
Any error are ignored, only warn.

=cut

sub drop {
    my $self = shift;

    foreach my $dict ( @{ $self->{DICT} } ) {
        $dict->drop if $dict->can('drop');
    }

    $self->{DBI}->do("drop table  $self->{PREFIX}fts_conf;")
      || warn "Can't drop table $self->{PREFIX}fts_conf";
}

=pod

=item start_index( $tid )

Opening a session for indexing

Use:

my $idx = Search::OpenFTS::Index->new( ... );

my $idx_chunk = $idx->start_index( ID );

foreach my $f ( glob <*.html> ) {

	$idx_chunk->index_chunk( IO::File->new( $f ) );

} 

$idx_chunk->flush;

=cut

sub start_index {
    my ( $self, $tid ) = @_;
    bless { index => $self, tid => $tid, struct => {}, nummax => 0 },
      'Search::OpenFTS::Index::Indexer';
}

=item B<fix_permissions>($user)

Grant r/o access on indexes and search table to user $user or to PUBLIC
if $user doesn't specified.

Return TRUE on success or error message if fails. Please, check return value
explicitly for '1' !

Calls fix_permissions for each dictionary if it can.

=cut

sub fix_permissions {
    my ( $self, $user ) = @_;
    my $dbh = $self->{DBI};
    return undef if !$dbh;
    my $autocommit = $dbh->{AutoCommit};
    $dbh->{AutoCommit} = 0;
    $user ||= 'PUBLIC';
    my $success = 1;
    $success &&= $dbh->do("grant select on $self->{PREFIX}fts_conf to $user;");

    foreach my $dict ( @{ $self->{DICT} } ) {
        $success &&= $dict->fix_permissions($user)
          if $dict->can('fix_permissions');
    }
    $dbh->commit if $success;
    $dbh->{AutoCommit} = $autocommit;
    return ($success) ? 1 : $dbh->errstr;
}

{

    package Search::OpenFTS::Index::Indexer;

=pod

=item index_chunk( [FH|REFTXT|TXT], direction=>[1|-1] )

=item index_chunk( [FH|REFTXT|TXT], wclass=>[A|B|C|D] )

=item index_chunk( FH, direction=>[1|-1], offset=>$offset, length=>$length  );

=item index_chunk( FH, wclass=>[A|B|C|D], offset=>$offset, length=>$length  );

Adds a part to an index. Option 'direction' is to store compatibility with
old version of OpenFTS. wclass option has defaults 'D'.

=cut

    sub index_chunk {
        my ( $self, $txt, %opt ) = @_;
        my $direct = $opt{direction};
        local $_;
        $direct ||= 1;
        my $wclass = $opt{wclass};
        if ( !defined $wclass ) {
            $wclass = ( $direct < 0 ) ? 'A' : '';
        } else {
            $wclass =~ /^\s*(\S).*$/;
        }

        my $length = undef;
        if ( ref $txt ne 'SCALAR' ) {
            seek( $txt, $opt{offset}, 0 ) if defined $opt{offset};
        }
        $length = $opt{length} if defined $opt{length};

        $self->{nummax} =
          $self->{index}->_addtxt( $self->{struct}, ( ref $txt ) ? $txt : \$txt,
            $self->{nummax}, $length, $wclass );

        return 1;
    }

=pod

=item flush 

Dump in base of an index

=back

=cut

    sub flush {
        my ($self) = @_;
        $self->{index}->_flush( $self->{struct}, $self->{tid},
            $self->{nummax} - $self->{nummin} + 1 );
    }

}    #package Indexer

=head1 DESCRIPTION

=head1 SEE ALSO

    The OpenFTS Primer          (  see doc/ subdirectory )

    The Crash-course to OpenFTS ( in examples/ subdirectory )

    perldoc Search::OpenFTS::Search

    perldoc Search::OpenFTS::Parser

    perldoc Search::OpenFTS::Dict::PorterEng

    perldoc Search::OpenFTS::Dict::Snowball

    perldoc Search::OpenFTS::Dict::UnknownDict

    perldoc Search::OpenFTS::Morph::ISpell

=cut

1;
__END__